aboutsummaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/AArch64/AArch64.h11
-rw-r--r--lib/Target/AArch64/AArch64A53Fix835769.cpp240
-rw-r--r--lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp706
-rw-r--r--lib/Target/AArch64/AArch64AddressTypePromotion.cpp6
-rw-r--r--lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp12
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp33
-rw-r--r--lib/Target/AArch64/AArch64BranchRelaxation.cpp11
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.h141
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.td35
-rw-r--r--lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp4
-rw-r--r--lib/Target/AArch64/AArch64CollectLOH.cpp52
-rw-r--r--lib/Target/AArch64/AArch64ConditionOptimizer.cpp422
-rw-r--r--lib/Target/AArch64/AArch64ConditionalCompares.cpp12
-rw-r--r--lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp9
-rw-r--r--lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp3
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp4510
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp57
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.h4
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp272
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp1319
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h42
-rw-r--r--lib/Target/AArch64/AArch64InstrAtomics.td9
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td44
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp997
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h42
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td416
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp16
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp3
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.h6
-rw-r--r--lib/Target/AArch64/AArch64MachineCombinerPattern.h42
-rw-r--r--lib/Target/AArch64/AArch64MachineFunctionInfo.h6
-rw-r--r--lib/Target/AArch64/AArch64PBQPRegAlloc.cpp383
-rw-r--r--lib/Target/AArch64/AArch64PBQPRegAlloc.h38
-rw-r--r--lib/Target/AArch64/AArch64PerfectShuffle.h5
-rw-r--r--lib/Target/AArch64/AArch64PromoteConstant.cpp8
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp12
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.h6
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.td5
-rw-r--r--lib/Target/AArch64/AArch64SchedA57.td371
-rw-r--r--lib/Target/AArch64/AArch64SchedA57WriteRes.td52
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.cpp3
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.h4
-rw-r--r--lib/Target/AArch64/AArch64StorePairSuppress.cpp14
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp34
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h40
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp133
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h34
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.h4
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp78
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp149
-rw-r--r--lib/Target/AArch64/CMakeLists.txt6
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp67
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.h11
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h4
-rw-r--r--lib/Target/AArch64/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp18
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h9
-rw-r--r--lib/Target/AArch64/LLVMBuild.txt2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h104
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp53
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp74
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h8
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp5
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp5
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp47
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h15
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp85
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp2
-rw-r--r--lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp13
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.cpp36
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h13
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp6
-rw-r--r--lib/Target/ARM/ARM.h9
-rw-r--r--lib/Target/ARM/ARM.td34
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp161
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h10
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp264
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h76
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp59
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h6
-rw-r--r--lib/Target/ARM/ARMCallingConv.h29
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp1909
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp47
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h4
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp14
-rw-r--r--lib/Target/ARM/ARMFPUName.def1
-rw-r--r--lib/Target/ARM/ARMFPUName.h6
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp185
-rw-r--r--lib/Target/ARM/ARMFeatures.h4
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp415
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h4
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.cpp4
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.h6
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp88
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp1087
-rw-r--r--lib/Target/ARM/ARMISelLowering.h36
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td10
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp50
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h8
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td362
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td48
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td28
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td179
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td80
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp344
-rw-r--r--lib/Target/ARM/ARMJITInfo.h177
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp273
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp36
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h22
-rw-r--r--lib/Target/ARM/ARMOptimizeBarriersPass.cpp2
-rw-r--r--lib/Target/ARM/ARMPerfectShuffle.h5
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h4
-rw-r--r--lib/Target/ARM/ARMRelocations.h62
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.h4
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp157
-rw-r--r--lib/Target/ARM/ARMSubtarget.h79
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp137
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h43
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h4
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp31
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp888
-rw-r--r--lib/Target/ARM/CMakeLists.txt5
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp442
-rw-r--r--lib/Target/ARM/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp221
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h7
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h51
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMArchName.h6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp432
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h69
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h33
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h27
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h26
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp23
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp39
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h7
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp22
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.h11
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp106
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp39
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h6
-rw-r--r--lib/Target/ARM/MCTargetDesc/LLVMBuild.txt2
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp4
-rw-r--r--lib/Target/ARM/Makefile2
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp131
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.h4
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp34
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h9
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp390
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h6
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp7
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp9
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h10
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.cpp2
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.h6
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp6
-rw-r--r--lib/Target/CMakeLists.txt1
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h16
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt25
-rw-r--r--lib/Target/Hexagon/Disassembler/CMakeLists.txt3
-rw-r--r--lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp181
-rw-r--r--lib/Target/Hexagon/Disassembler/LLVMBuild.txt (renamed from lib/Target/Hexagon/InstPrinter/LLVMBuild.txt)6
-rw-r--r--lib/Target/Hexagon/Disassembler/Makefile (renamed from lib/Target/Hexagon/InstPrinter/Makefile)9
-rw-r--r--lib/Target/Hexagon/Hexagon.h4
-rw-r--r--lib/Target/Hexagon/Hexagon.td14
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp26
-rwxr-xr-xlib/Target/Hexagon/HexagonAsmPrinter.h4
-rw-r--r--lib/Target/Hexagon/HexagonCFGOptimizer.cpp28
-rw-r--r--lib/Target/Hexagon/HexagonCallingConvLower.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonCallingConvLower.h4
-rw-r--r--lib/Target/Hexagon/HexagonCopyToCombine.cpp40
-rw-r--r--lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp46
-rw-r--r--lib/Target/Hexagon/HexagonFixupHwLoops.cpp14
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp31
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.h4
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp53
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp152
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp61
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h15
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormats.td22
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV4.td5
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp672
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h9
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td5302
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV3.td141
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td3628
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV5.td495
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsics.td9
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsDerived.td7
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsV4.td9
-rw-r--r--lib/Target/Hexagon/HexagonMCInstLower.cpp1
-rw-r--r--lib/Target/Hexagon/HexagonMachineFunctionInfo.h4
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp10
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.h20
-rw-r--r--lib/Target/Hexagon/HexagonNewValueJump.cpp51
-rw-r--r--lib/Target/Hexagon/HexagonOperands.td23
-rw-r--r--lib/Target/Hexagon/HexagonPeephole.cpp27
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp37
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h4
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.td164
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.h4
-rw-r--r--lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp15
-rw-r--r--lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp26
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h28
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp43
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h31
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.cpp5
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.h4
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp142
-rw-r--r--lib/Target/Hexagon/HexagonVarargsCallingConvention.h20
-rw-r--r--lib/Target/Hexagon/InstPrinter/CMakeLists.txt3
-rw-r--r--lib/Target/Hexagon/LLVMBuild.txt4
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt6
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp74
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h14
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp62
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp (renamed from lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp)62
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h (renamed from lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h)4
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp1
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h4
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp88
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h60
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp128
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h124
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp63
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h26
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt2
-rw-r--r--lib/Target/Hexagon/Makefile16
-rw-r--r--lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h4
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h4
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp2
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h4
-rw-r--r--lib/Target/MSP430/MSP430.h4
-rw-r--r--lib/Target/MSP430/MSP430BranchSelector.cpp3
-rw-r--r--lib/Target/MSP430/MSP430CallingConv.td2
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.cpp69
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.h4
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp6
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp58
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h6
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp2
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h4
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.td236
-rw-r--r--lib/Target/MSP430/MSP430MCInstLower.cpp5
-rw-r--r--lib/Target/MSP430/MSP430MCInstLower.h4
-rw-r--r--lib/Target/MSP430/MSP430MachineFunctionInfo.h4
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp48
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h6
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.td38
-rw-r--r--lib/Target/MSP430/MSP430SelectionDAGInfo.h4
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.cpp2
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.h24
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp11
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h30
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp1436
-rw-r--r--lib/Target/Mips/CMakeLists.txt6
-rw-r--r--lib/Target/Mips/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp537
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp33
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.h6
-rw-r--r--lib/Target/Mips/LLVMBuild.txt2
-rw-r--r--lib/Target/Mips/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp (renamed from lib/Target/Mips/MipsABIInfo.cpp)0
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIInfo.h (renamed from lib/Target/Mips/MipsABIInfo.h)2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp17
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h6
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp11
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp40
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h24
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h9
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp250
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h66
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp5
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCExpr.h7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp9
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp253
-rw-r--r--lib/Target/Mips/Makefile2
-rw-r--r--lib/Target/Mips/MicroMipsInstrFPU.td8
-rw-r--r--lib/Target/Mips/MicroMipsInstrFormats.td277
-rw-r--r--lib/Target/Mips/MicroMipsInstrInfo.td520
-rw-r--r--lib/Target/Mips/Mips.h6
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.cpp8
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.h4
-rw-r--r--lib/Target/Mips/Mips16HardFloat.cpp28
-rw-r--r--lib/Target/Mips/Mips16HardFloat.h23
-rw-r--r--lib/Target/Mips/Mips16HardFloatInfo.h4
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.cpp17
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.h4
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.cpp53
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.h15
-rw-r--r--lib/Target/Mips/Mips16InstrFormats.td2
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp6
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.h4
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td20
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.cpp8
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.h4
-rw-r--r--lib/Target/Mips/Mips32r6InstrFormats.td2
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td26
-rw-r--r--lib/Target/Mips/MipsAnalyzeImmediate.cpp3
-rw-r--r--lib/Target/Mips/MipsAnalyzeImmediate.h4
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp59
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.h12
-rw-r--r--lib/Target/Mips/MipsCCState.h7
-rw-r--r--lib/Target/Mips/MipsCallingConv.td58
-rw-r--r--lib/Target/Mips/MipsCodeEmitter.cpp481
-rw-r--r--lib/Target/Mips/MipsCondMov.td37
-rw-r--r--lib/Target/Mips/MipsConstantIslandPass.cpp11
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp139
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp1131
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp2
-rw-r--r--lib/Target/Mips/MipsFrameLowering.h4
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.h4
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp286
-rw-r--r--lib/Target/Mips/MipsISelLowering.h41
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td68
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td16
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp2
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h4
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td232
-rw-r--r--lib/Target/Mips/MipsJITInfo.cpp286
-rw-r--r--lib/Target/Mips/MipsJITInfo.h71
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp28
-rw-r--r--lib/Target/Mips/MipsMCInstLower.h4
-rw-r--r--lib/Target/Mips/MipsMSAInstrInfo.td2
-rw-r--r--lib/Target/Mips/MipsMachineFunction.cpp18
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h10
-rw-r--r--lib/Target/Mips/MipsModuleISelDAGToDAG.h4
-rw-r--r--lib/Target/Mips/MipsOptimizePICCall.cpp2
-rw-r--r--lib/Target/Mips/MipsOptionRecord.h12
-rw-r--r--lib/Target/Mips/MipsOs16.h8
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp6
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h4
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td44
-rw-r--r--lib/Target/Mips/MipsRelocations.h41
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp62
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.h4
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp7
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h4
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp91
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.h15
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp24
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.h5
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.cpp4
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.h4
-rw-r--r--lib/Target/Mips/MipsSelectionDAGInfo.h4
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp58
-rw-r--r--lib/Target/Mips/MipsSubtarget.h38
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp100
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h47
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.cpp73
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.h23
-rw-r--r--lib/Target/Mips/MipsTargetStreamer.h73
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt24
-rw-r--r--lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h4
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h4
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp5
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h8
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h4
-rw-r--r--lib/Target/NVPTX/ManagedStringPool.h4
-rw-r--r--lib/Target/NVPTX/NVPTX.h10
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.h6
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp229
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h30
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.cpp10
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.h4
-rw-r--r--lib/Target/NVPTX/NVPTXGenericToNVVM.cpp74
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp9
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h5
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp105
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h16
-rw-r--r--lib/Target/NVPTX/NVPTXImageOptimizer.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.h4
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td10
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.h4
-rw-r--r--lib/Target/NVPTX/NVPTXLowerStructArgs.cpp134
-rw-r--r--lib/Target/NVPTX/NVPTXMCExpr.h7
-rw-r--r--lib/Target/NVPTX/NVPTXMachineFunctionInfo.h5
-rw-r--r--lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp9
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.h4
-rw-r--r--lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp6
-rw-r--r--lib/Target/NVPTX/NVPTXSection.h4
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.cpp3
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h24
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp23
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h41
-rw-r--r--lib/Target/NVPTX/NVPTXTargetObjectFile.h7
-rw-r--r--lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp115
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.cpp19
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.h4
-rw-r--r--lib/Target/NVPTX/NVPTXVector.td4
-rw-r--r--lib/Target/NVPTX/NVPTXutil.h4
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp224
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt5
-rw-r--r--lib/Target/PowerPC/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp28
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp7
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h5
-rw-r--r--lib/Target/PowerPC/LLVMBuild.txt2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp23
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp11
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h11
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp57
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp73
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h11
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp26
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp20
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h4
-rw-r--r--lib/Target/PowerPC/Makefile2
-rw-r--r--lib/Target/PowerPC/PPC.h7
-rw-r--r--lib/Target/PowerPC/PPC.td52
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp232
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp28
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp25
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.h35
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td48
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp293
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp246
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp89
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h4
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.h12
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp2293
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp1622
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h126
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td73
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td114
-rw-r--r--lib/Target/PowerPC/PPCInstrBuilder.h4
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td115
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp71
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h15
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td458
-rw-r--r--lib/Target/PowerPC/PPCInstrSPE.td447
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td156
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp482
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.h46
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp6
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.cpp3
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h11
-rw-r--r--lib/Target/PowerPC/PPCPerfectShuffle.h5
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp82
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h6
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td9
-rw-r--r--lib/Target/PowerPC/PPCRelocations.h56
-rw-r--r--lib/Target/PowerPC/PPCSchedule.td2
-rw-r--r--lib/Target/PowerPC/PPCScheduleP7.td3
-rw-r--r--lib/Target/PowerPC/PPCScheduleP8.td397
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.h4
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp119
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h86
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp136
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h42
-rw-r--r--lib/Target/PowerPC/PPCTargetObjectFile.h4
-rw-r--r--lib/Target/PowerPC/PPCTargetStreamer.h4
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp80
-rw-r--r--lib/Target/PowerPC/README.txt275
-rw-r--r--lib/Target/R600/AMDGPU.h17
-rw-r--r--lib/Target/R600/AMDGPU.td25
-rw-r--r--lib/Target/R600/AMDGPUAlwaysInlinePass.cpp66
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.cpp270
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.h38
-rw-r--r--lib/Target/R600/AMDGPUCallingConv.td32
-rw-r--r--lib/Target/R600/AMDGPUFrameLowering.h6
-rw-r--r--lib/Target/R600/AMDGPUISelDAGToDAG.cpp471
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp1038
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h91
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.cpp58
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.h19
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.td63
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td115
-rw-r--r--lib/Target/R600/AMDGPUIntrinsicInfo.cpp2
-rw-r--r--lib/Target/R600/AMDGPUIntrinsicInfo.h8
-rw-r--r--lib/Target/R600/AMDGPUMCInstLower.cpp23
-rw-r--r--lib/Target/R600/AMDGPUMCInstLower.h9
-rw-r--r--lib/Target/R600/AMDGPUMachineFunction.cpp4
-rw-r--r--lib/Target/R600/AMDGPUMachineFunction.h12
-rw-r--r--lib/Target/R600/AMDGPUPromoteAlloca.cpp48
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.cpp3
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.h8
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.cpp80
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h58
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp145
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.h52
-rw-r--r--lib/Target/R600/AMDGPUTargetTransformInfo.cpp20
-rw-r--r--lib/Target/R600/AMDILCFGStructurizer.cpp5
-rw-r--r--lib/Target/R600/AMDKernelCodeT.h704
-rw-r--r--lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp320
-rw-r--r--lib/Target/R600/AsmParser/CMakeLists.txt3
-rw-r--r--lib/Target/R600/AsmParser/LLVMBuild.txt23
-rw-r--r--lib/Target/R600/AsmParser/Makefile15
-rw-r--r--lib/Target/R600/CIInstructions.td42
-rw-r--r--lib/Target/R600/CMakeLists.txt8
-rw-r--r--lib/Target/R600/CaymanInstructions.td2
-rw-r--r--lib/Target/R600/EvergreenInstructions.td73
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp275
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h23
-rw-r--r--lib/Target/R600/LLVMBuild.txt5
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp13
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h6
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp23
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h18
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h6
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp19
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h7
-rw-r--r--lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp13
-rw-r--r--lib/Target/R600/Makefile4
-rw-r--r--lib/Target/R600/Processors.td32
-rw-r--r--lib/Target/R600/R600ClauseMergePass.cpp3
-rw-r--r--lib/Target/R600/R600ControlFlowFinalizer.cpp21
-rw-r--r--lib/Target/R600/R600Defines.h6
-rw-r--r--lib/Target/R600/R600EmitClauseMarkers.cpp3
-rw-r--r--lib/Target/R600/R600ExpandSpecialInstrs.cpp3
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp176
-rw-r--r--lib/Target/R600/R600ISelLowering.h6
-rw-r--r--lib/Target/R600/R600InstrFormats.td3
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp15
-rw-r--r--lib/Target/R600/R600InstrInfo.h12
-rw-r--r--lib/Target/R600/R600Instructions.td44
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.h6
-rw-r--r--lib/Target/R600/R600MachineScheduler.cpp33
-rw-r--r--lib/Target/R600/R600MachineScheduler.h4
-rw-r--r--lib/Target/R600/R600OptimizeVectorRegisters.cpp8
-rw-r--r--lib/Target/R600/R600Packetizer.cpp15
-rw-r--r--lib/Target/R600/R600RegisterInfo.h6
-rw-r--r--lib/Target/R600/SIDefines.h96
-rw-r--r--lib/Target/R600/SIFixSGPRCopies.cpp106
-rw-r--r--lib/Target/R600/SIFixSGPRLiveRanges.cpp147
-rw-r--r--lib/Target/R600/SIFoldOperands.cpp275
-rw-r--r--lib/Target/R600/SIISelLowering.cpp1643
-rw-r--r--lib/Target/R600/SIISelLowering.h57
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp90
-rw-r--r--lib/Target/R600/SIInstrFormats.td256
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp1702
-rw-r--r--lib/Target/R600/SIInstrInfo.h195
-rw-r--r--lib/Target/R600/SIInstrInfo.td1817
-rw-r--r--lib/Target/R600/SIInstructions.td3016
-rw-r--r--lib/Target/R600/SILoadStoreOptimizer.cpp434
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp86
-rw-r--r--lib/Target/R600/SILowerI1Copies.cpp105
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.cpp96
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.h41
-rw-r--r--lib/Target/R600/SIPrepareScratchRegs.cpp196
-rw-r--r--lib/Target/R600/SIRegisterInfo.cpp355
-rw-r--r--lib/Target/R600/SIRegisterInfo.h50
-rw-r--r--lib/Target/R600/SIRegisterInfo.td78
-rw-r--r--lib/Target/R600/SISchedule.td80
-rw-r--r--lib/Target/R600/SIShrinkInstructions.cpp136
-rw-r--r--lib/Target/R600/SITypeRewriter.cpp2
-rw-r--r--lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp6
-rw-r--r--lib/Target/R600/VIInstrFormats.td145
-rw-r--r--lib/Target/R600/VIInstructions.td89
-rw-r--r--lib/Target/README.txt84
-rw-r--r--lib/Target/Sparc/AsmParser/SparcAsmParser.cpp13
-rw-r--r--lib/Target/Sparc/CMakeLists.txt5
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp9
-rw-r--r--lib/Target/Sparc/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/Sparc/Disassembler/SparcDisassembler.cpp61
-rw-r--r--lib/Target/Sparc/InstPrinter/SparcInstPrinter.h4
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h4
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp5
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h4
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp5
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h7
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp6
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h4
-rw-r--r--lib/Target/Sparc/Makefile2
-rw-r--r--lib/Target/Sparc/README.txt2
-rw-r--r--lib/Target/Sparc/Sparc.h6
-rw-r--r--lib/Target/Sparc/SparcAsmPrinter.cpp5
-rw-r--r--lib/Target/Sparc/SparcCodeEmitter.cpp280
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.cpp6
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.h4
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp15
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp72
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h4
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h4
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td2
-rw-r--r--lib/Target/Sparc/SparcInstrVIS.td34
-rw-r--r--lib/Target/Sparc/SparcJITInfo.cpp326
-rw-r--r--lib/Target/Sparc/SparcJITInfo.h67
-rw-r--r--lib/Target/Sparc/SparcMachineFunctionInfo.h4
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp6
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h4
-rw-r--r--lib/Target/Sparc/SparcRelocations.h56
-rw-r--r--lib/Target/Sparc/SparcSelectionDAGInfo.h4
-rw-r--r--lib/Target/Sparc/SparcSubtarget.h25
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp26
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h29
-rw-r--r--lib/Target/Sparc/SparcTargetObjectFile.h4
-rw-r--r--lib/Target/Sparc/SparcTargetStreamer.h4
-rw-r--r--lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp9
-rw-r--r--lib/Target/SystemZ/CMakeLists.txt2
-rw-r--r--lib/Target/SystemZ/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp21
-rw-r--r--lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h4
-rw-r--r--lib/Target/SystemZ/LLVMBuild.txt2
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp7
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h7
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h4
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp15
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h4
-rw-r--r--lib/Target/SystemZ/Makefile1
-rw-r--r--lib/Target/SystemZ/SystemZ.h4
-rw-r--r--lib/Target/SystemZ/SystemZAsmPrinter.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZAsmPrinter.h4
-rw-r--r--lib/Target/SystemZ/SystemZCallingConv.h4
-rw-r--r--lib/Target/SystemZ/SystemZConstantPoolValue.h4
-rw-r--r--lib/Target/SystemZ/SystemZElimCompare.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.cpp16
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.h4
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp12
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp65
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h11
-rw-r--r--lib/Target/SystemZ/SystemZInstrBuilder.h4
-rw-r--r--lib/Target/SystemZ/SystemZInstrFP.td4
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h4
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td26
-rw-r--r--lib/Target/SystemZ/SystemZLongBranch.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZMCInstLower.h4
-rw-r--r--lib/Target/SystemZ/SystemZMachineFunctionInfo.h4
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp8
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h4
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.h4
-rw-r--r--lib/Target/SystemZ/SystemZShortenInst.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.h22
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp19
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h28
-rw-r--r--lib/Target/Target.cpp2
-rw-r--r--lib/Target/TargetJITInfo.cpp14
-rw-r--r--lib/Target/TargetLibraryInfo.cpp19
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp31
-rw-r--r--lib/Target/TargetMachine.cpp41
-rw-r--r--lib/Target/TargetMachineC.cpp15
-rw-r--r--lib/Target/TargetSubtargetInfo.cpp4
-rw-r--r--lib/Target/X86/AsmParser/CMakeLists.txt3
-rw-r--r--lib/Target/X86/AsmParser/LLVMBuild.txt2
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp1033
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.h28
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp559
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParserCommon.h10
-rw-r--r--lib/Target/X86/AsmParser/X86Operand.h95
-rw-r--r--lib/Target/X86/CMakeLists.txt3
-rw-r--r--lib/Target/X86/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp211
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.h16
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp171
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h23
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h35
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp75
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.h23
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp340
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.h6
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp52
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.h12
-rw-r--r--lib/Target/X86/MCTargetDesc/LLVMBuild.txt2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp65
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h168
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86FixupKinds.h4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp25
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h9
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp116
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp11
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h8
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp131
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp11
-rw-r--r--lib/Target/X86/README.txt177
-rw-r--r--lib/Target/X86/TargetInfo/X86TargetInfo.cpp2
-rw-r--r--lib/Target/X86/Utils/LLVMBuild.txt2
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp170
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h31
-rw-r--r--lib/Target/X86/X86.h14
-rw-r--r--lib/Target/X86/X86.td262
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp28
-rw-r--r--lib/Target/X86/X86AsmPrinter.h75
-rw-r--r--lib/Target/X86/X86AtomicExpandPass.cpp283
-rw-r--r--lib/Target/X86/X86CallingConv.h17
-rw-r--r--lib/Target/X86/X86CallingConv.td77
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp1498
-rw-r--r--lib/Target/X86/X86FastISel.cpp611
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp9
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp392
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp507
-rw-r--r--lib/Target/X86/X86FrameLowering.h23
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp222
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp8127
-rw-r--r--lib/Target/X86/X86ISelLowering.h266
-rw-r--r--lib/Target/X86/X86InstrAVX512.td3774
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td359
-rw-r--r--lib/Target/X86/X86InstrBuilder.h4
-rw-r--r--lib/Target/X86/X86InstrCompiler.td186
-rw-r--r--lib/Target/X86/X86InstrControl.td92
-rw-r--r--lib/Target/X86/X86InstrExtension.td26
-rw-r--r--lib/Target/X86/X86InstrFMA.td6
-rw-r--r--lib/Target/X86/X86InstrFPStack.td85
-rw-r--r--lib/Target/X86/X86InstrFormats.td168
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td61
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp1082
-rw-r--r--lib/Target/X86/X86InstrInfo.h10
-rw-r--r--lib/Target/X86/X86InstrInfo.td775
-rw-r--r--lib/Target/X86/X86InstrMMX.td45
-rw-r--r--lib/Target/X86/X86InstrSGX.td24
-rw-r--r--lib/Target/X86/X86InstrSSE.td1760
-rw-r--r--lib/Target/X86/X86InstrShiftRotate.td58
-rw-r--r--lib/Target/X86/X86InstrSystem.td40
-rw-r--r--lib/Target/X86/X86InstrTSX.td9
-rw-r--r--lib/Target/X86/X86IntrinsicsInfo.h547
-rw-r--r--lib/Target/X86/X86JITInfo.cpp588
-rw-r--r--lib/Target/X86/X86JITInfo.h79
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp527
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.cpp19
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h27
-rw-r--r--lib/Target/X86/X86PadShortFunction.cpp5
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp43
-rw-r--r--lib/Target/X86/X86RegisterInfo.h5
-rw-r--r--lib/Target/X86/X86RegisterInfo.td45
-rw-r--r--lib/Target/X86/X86Relocations.h52
-rw-r--r--lib/Target/X86/X86SchedHaswell.td1885
-rw-r--r--lib/Target/X86/X86SchedSandyBridge.td1
-rw-r--r--lib/Target/X86/X86Schedule.td20
-rw-r--r--lib/Target/X86/X86ScheduleAtom.td5
-rw-r--r--lib/Target/X86/X86ScheduleBtVer2.td341
-rw-r--r--lib/Target/X86/X86ScheduleSLM.td1
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp37
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.h9
-rw-r--r--lib/Target/X86/X86Subtarget.cpp85
-rw-r--r--lib/Target/X86/X86Subtarget.h98
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp117
-rw-r--r--lib/Target/X86/X86TargetMachine.h37
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h4
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp162
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp22
-rw-r--r--lib/Target/XCore/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/XCore/Disassembler/XCoreDisassembler.cpp57
-rw-r--r--lib/Target/XCore/InstPrinter/XCoreInstPrinter.h4
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp1
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h4
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp8
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h4
-rw-r--r--lib/Target/XCore/XCore.h4
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp4
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp26
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.h6
-rw-r--r--lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp3
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp72
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h6
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp9
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h4
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td38
-rw-r--r--lib/Target/XCore/XCoreMCInstLower.h4
-rw-r--r--lib/Target/XCore/XCoreMachineFunctionInfo.h6
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp17
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h4
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.cpp2
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.h4
-rw-r--r--lib/Target/XCore/XCoreSubtarget.h22
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp18
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h27
-rw-r--r--lib/Target/XCore/XCoreTargetObjectFile.cpp6
-rw-r--r--lib/Target/XCore/XCoreTargetObjectFile.h4
-rw-r--r--lib/Target/XCore/XCoreTargetStreamer.h4
-rw-r--r--lib/Target/XCore/XCoreTargetTransformInfo.cpp6
783 files changed, 66547 insertions, 32661 deletions
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h
index 1c022aaf86bd..e96d18bdef06 100644
--- a/lib/Target/AArch64/AArch64.h
+++ b/lib/Target/AArch64/AArch64.h
@@ -12,13 +12,13 @@
//
//===----------------------------------------------------------------------===//
-#ifndef TARGET_AArch64_H
-#define TARGET_AArch64_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64_H
-#include "Utils/AArch64BaseInfo.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
-#include "llvm/Target/TargetMachine.h"
+#include "Utils/AArch64BaseInfo.h"
#include "llvm/Support/DataTypes.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -36,7 +36,10 @@ FunctionPass *createAArch64StorePairSuppressPass();
FunctionPass *createAArch64ExpandPseudoPass();
FunctionPass *createAArch64LoadStoreOptimizationPass();
ModulePass *createAArch64PromoteConstantPass();
+FunctionPass *createAArch64ConditionOptimizerPass();
FunctionPass *createAArch64AddressTypePromotionPass();
+FunctionPass *createAArch64A57FPLoadBalancing();
+FunctionPass *createAArch64A53Fix835769();
/// \brief Creates an ARM-specific Target Transformation Info pass.
ImmutablePass *
createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM);
diff --git a/lib/Target/AArch64/AArch64A53Fix835769.cpp b/lib/Target/AArch64/AArch64A53Fix835769.cpp
new file mode 100644
index 000000000000..852a6352af0e
--- /dev/null
+++ b/lib/Target/AArch64/AArch64A53Fix835769.cpp
@@ -0,0 +1,240 @@
+//===-- AArch64A53Fix835769.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass changes code to work around Cortex-A53 erratum 835769.
+// It works around it by inserting a nop instruction in code sequences that
+// in some circumstances may trigger the erratum.
+// It inserts a nop instruction between a sequence of the following 2 classes
+// of instructions:
+// instr 1: mem-instr (including loads, stores and prefetches).
+// instr 2: non-SIMD integer multiply-accumulate writing 64-bit X registers.
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-fix-cortex-a53-835769"
+
+STATISTIC(NumNopsAdded, "Number of Nops added to work around erratum 835769");
+
+//===----------------------------------------------------------------------===//
+// Helper functions
+
+// Is the instruction a match for the instruction that comes first in the
+// sequence of instructions that can trigger the erratum?
+static bool isFirstInstructionInSequence(MachineInstr *MI) {
+ // Must return true if this instruction is a load, a store or a prefetch.
+ switch (MI->getOpcode()) {
+ case AArch64::PRFMl:
+ case AArch64::PRFMroW:
+ case AArch64::PRFMroX:
+ case AArch64::PRFMui:
+ case AArch64::PRFUMi:
+ return true;
+ default:
+ return (MI->mayLoad() || MI->mayStore());
+ }
+}
+
+// Is the instruction a match for the instruction that comes second in the
+// sequence that can trigger the erratum?
+static bool isSecondInstructionInSequence(MachineInstr *MI) {
+ // Must return true for non-SIMD integer multiply-accumulates, writing
+ // to a 64-bit register.
+ switch (MI->getOpcode()) {
+ // Erratum cannot be triggered when the destination register is 32 bits,
+ // therefore only include the following.
+ case AArch64::MSUBXrrr:
+ case AArch64::MADDXrrr:
+ case AArch64::SMADDLrrr:
+ case AArch64::SMSUBLrrr:
+ case AArch64::UMADDLrrr:
+ case AArch64::UMSUBLrrr:
+ // Erratum can only be triggered by multiply-adds, not by regular
+ // non-accumulating multiplies, i.e. when Ra=XZR='11111'
+ return MI->getOperand(3).getReg() != AArch64::XZR;
+ default:
+ return false;
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+
+namespace {
+class AArch64A53Fix835769 : public MachineFunctionPass {
+ const AArch64InstrInfo *TII;
+
+public:
+ static char ID;
+ explicit AArch64A53Fix835769() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+
+ const char *getPassName() const override {
+ return "Workaround A53 erratum 835769 pass";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ bool runOnBasicBlock(MachineBasicBlock &MBB);
+};
+char AArch64A53Fix835769::ID = 0;
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+
+bool
+AArch64A53Fix835769::runOnMachineFunction(MachineFunction &F) {
+ const TargetMachine &TM = F.getTarget();
+
+ bool Changed = false;
+ DEBUG(dbgs() << "***** AArch64A53Fix835769 *****\n");
+
+ TII = TM.getSubtarget<AArch64Subtarget>().getInstrInfo();
+
+ for (auto &MBB : F) {
+ Changed |= runOnBasicBlock(MBB);
+ }
+
+ return Changed;
+}
+
+// Return the block that was fallen through to get to MBB, if any,
+// otherwise nullptr.
+static MachineBasicBlock *getBBFallenThrough(MachineBasicBlock *MBB,
+ const TargetInstrInfo *TII) {
+ // Get the previous machine basic block in the function.
+ MachineFunction::iterator MBBI = *MBB;
+
+ // Can't go off top of function.
+ if (MBBI == MBB->getParent()->begin())
+ return nullptr;
+
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 2> Cond;
+
+ MachineBasicBlock *PrevBB = std::prev(MBBI);
+ for (MachineBasicBlock *S : MBB->predecessors())
+ if (S == PrevBB && !TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond) &&
+ !TBB && !FBB)
+ return S;
+
+ return nullptr;
+}
+
+// Iterate through fallen through blocks trying to find a previous non-pseudo if
+// there is one, otherwise return nullptr. Only look for instructions in
+// previous blocks, not the current block, since we only use this to look at
+// previous blocks.
+static MachineInstr *getLastNonPseudo(MachineBasicBlock &MBB,
+ const TargetInstrInfo *TII) {
+ MachineBasicBlock *FMBB = &MBB;
+
+ // If there is no non-pseudo in the current block, loop back around and try
+ // the previous block (if there is one).
+ while ((FMBB = getBBFallenThrough(FMBB, TII))) {
+ for (auto I = FMBB->rbegin(), E = FMBB->rend(); I != E; ++I) {
+ if (!I->isPseudo())
+ return &*I;
+ }
+ }
+
+ // There was no previous non-pseudo in the fallen through blocks
+ return nullptr;
+}
+
+static void insertNopBeforeInstruction(MachineBasicBlock &MBB, MachineInstr* MI,
+ const TargetInstrInfo *TII) {
+ // If we are the first instruction of the block, put the NOP at the end of
+ // the previous fallthrough block
+ if (MI == &MBB.front()) {
+ MachineInstr *I = getLastNonPseudo(MBB, TII);
+ assert(I && "Expected instruction");
+ DebugLoc DL = I->getDebugLoc();
+ BuildMI(I->getParent(), DL, TII->get(AArch64::HINT)).addImm(0);
+ }
+ else {
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(MBB, MI, DL, TII->get(AArch64::HINT)).addImm(0);
+ }
+
+ ++NumNopsAdded;
+}
+
+bool
+AArch64A53Fix835769::runOnBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ DEBUG(dbgs() << "Running on MBB: " << MBB << " - scanning instructions...\n");
+
+ // First, scan the basic block, looking for a sequence of 2 instructions
+ // that match the conditions under which the erratum may trigger.
+
+ // List of terminating instructions in matching sequences
+ std::vector<MachineInstr*> Sequences;
+ unsigned Idx = 0;
+ MachineInstr *PrevInstr = nullptr;
+
+ // Try and find the last non-pseudo instruction in any fallen through blocks,
+ // if there isn't one, then we use nullptr to represent that.
+ PrevInstr = getLastNonPseudo(MBB, TII);
+
+ for (auto &MI : MBB) {
+ MachineInstr *CurrInstr = &MI;
+ DEBUG(dbgs() << " Examining: " << MI);
+ if (PrevInstr) {
+ DEBUG(dbgs() << " PrevInstr: " << *PrevInstr
+ << " CurrInstr: " << *CurrInstr
+ << " isFirstInstructionInSequence(PrevInstr): "
+ << isFirstInstructionInSequence(PrevInstr) << "\n"
+ << " isSecondInstructionInSequence(CurrInstr): "
+ << isSecondInstructionInSequence(CurrInstr) << "\n");
+ if (isFirstInstructionInSequence(PrevInstr) &&
+ isSecondInstructionInSequence(CurrInstr)) {
+ DEBUG(dbgs() << " ** pattern found at Idx " << Idx << "!\n");
+ Sequences.push_back(CurrInstr);
+ }
+ }
+ if (!CurrInstr->isPseudo())
+ PrevInstr = CurrInstr;
+ ++Idx;
+ }
+
+ DEBUG(dbgs() << "Scan complete, "<< Sequences.size()
+ << " occurences of pattern found.\n");
+
+ // Then update the basic block, inserting nops between the detected sequences.
+ for (auto &MI : Sequences) {
+ Changed = true;
+ insertNopBeforeInstruction(MBB, MI, TII);
+ }
+
+ return Changed;
+}
+
+// Factory function used by AArch64TargetMachine to add the pass to
+// the passmanager.
+FunctionPass *llvm::createAArch64A53Fix835769() {
+ return new AArch64A53Fix835769();
+}
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
new file mode 100644
index 000000000000..dd1a1ea3159d
--- /dev/null
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -0,0 +1,706 @@
+//===-- AArch64A57FPLoadBalancing.cpp - Balance FP ops statically on A57---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// For best-case performance on Cortex-A57, we should try to use a balanced
+// mix of odd and even D-registers when performing a critical sequence of
+// independent, non-quadword FP/ASIMD floating-point multiply or
+// multiply-accumulate operations.
+//
+// This pass attempts to detect situations where the register allocation may
+// adversely affect this load balancing and to change the registers used so as
+// to better utilize the CPU.
+//
+// Ideally we'd just take each multiply or multiply-accumulate in turn and
+// allocate it alternating even or odd registers. However, multiply-accumulates
+// are most efficiently performed in the same functional unit as their
+// accumulation operand. Therefore this pass tries to find maximal sequences
+// ("Chains") of multiply-accumulates linked via their accumulation operand,
+// and assign them all the same "color" (oddness/evenness).
+//
+// This optimization affects S-register and D-register floating point
+// multiplies and FMADD/FMAs, as well as vector (floating point only) muls and
+// FMADD/FMA. Q register instructions (and 128-bit vector instructions) are
+// not affected.
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <list>
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-a57-fp-load-balancing"
+
+// Enforce the algorithm to use the scavenged register even when the original
+// destination register is the correct color. Used for testing.
+static cl::opt<bool>
+TransformAll("aarch64-a57-fp-load-balancing-force-all",
+ cl::desc("Always modify dest registers regardless of color"),
+ cl::init(false), cl::Hidden);
+
+// Never use the balance information obtained from chains - return a specific
+// color always. Used for testing.
+static cl::opt<unsigned>
+OverrideBalance("aarch64-a57-fp-load-balancing-override",
+ cl::desc("Ignore balance information, always return "
+ "(1: Even, 2: Odd)."),
+ cl::init(0), cl::Hidden);
+
+//===----------------------------------------------------------------------===//
+// Helper functions
+
+// Is the instruction a type of multiply on 64-bit (or 32-bit) FPRs?
+static bool isMul(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case AArch64::FMULSrr:
+ case AArch64::FNMULSrr:
+ case AArch64::FMULDrr:
+ case AArch64::FNMULDrr:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// Is the instruction a type of FP multiply-accumulate on 64-bit (or 32-bit) FPRs?
+static bool isMla(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case AArch64::FMSUBSrrr:
+ case AArch64::FMADDSrrr:
+ case AArch64::FNMSUBSrrr:
+ case AArch64::FNMADDSrrr:
+ case AArch64::FMSUBDrrr:
+ case AArch64::FMADDDrrr:
+ case AArch64::FNMSUBDrrr:
+ case AArch64::FNMADDDrrr:
+ return true;
+ default:
+ return false;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// A "color", which is either even or odd. Yes, these aren't really colors
+/// but the algorithm is conceptually doing two-color graph coloring.
+enum class Color { Even, Odd };
+#ifndef NDEBUG
+static const char *ColorNames[2] = { "Even", "Odd" };
+#endif
+
+class Chain;
+
+class AArch64A57FPLoadBalancing : public MachineFunctionPass {
+ const AArch64InstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ const TargetRegisterInfo *TRI;
+ RegisterClassInfo RCI;
+
+public:
+ static char ID;
+ explicit AArch64A57FPLoadBalancing() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+
+ const char *getPassName() const override {
+ return "A57 FP Anti-dependency breaker";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ bool runOnBasicBlock(MachineBasicBlock &MBB);
+ bool colorChainSet(std::vector<Chain*> GV, MachineBasicBlock &MBB,
+ int &Balance);
+ bool colorChain(Chain *G, Color C, MachineBasicBlock &MBB);
+ int scavengeRegister(Chain *G, Color C, MachineBasicBlock &MBB);
+ void scanInstruction(MachineInstr *MI, unsigned Idx,
+ std::map<unsigned, Chain*> &Active,
+ std::set<std::unique_ptr<Chain>> &AllChains);
+ void maybeKillChain(MachineOperand &MO, unsigned Idx,
+ std::map<unsigned, Chain*> &RegChains);
+ Color getColor(unsigned Register);
+ Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain*> &L);
+};
+char AArch64A57FPLoadBalancing::ID = 0;
+
+/// A Chain is a sequence of instructions that are linked together by
+/// an accumulation operand. For example:
+///
+/// fmul d0<def>, ?
+/// fmla d1<def>, ?, ?, d0<kill>
+/// fmla d2<def>, ?, ?, d1<kill>
+///
+/// There may be other instructions interleaved in the sequence that
+/// do not belong to the chain. These other instructions must not use
+/// the "chain" register at any point.
+///
+/// We currently only support chains where the "chain" operand is killed
+/// at each link in the chain for simplicity.
+/// A chain has three important instructions - Start, Last and Kill.
+/// * The start instruction is the first instruction in the chain.
+/// * Last is the final instruction in the chain.
+/// * Kill may or may not be defined. If defined, Kill is the instruction
+/// where the outgoing value of the Last instruction is killed.
+/// This information is important as if we know the outgoing value is
+/// killed with no intervening uses, we can safely change its register.
+///
+/// Without a kill instruction, we must assume the outgoing value escapes
+/// beyond our model and either must not change its register or must
+/// create a fixup FMOV to keep the old register value consistent.
+///
+class Chain {
+public:
+ /// The important (marker) instructions.
+ MachineInstr *StartInst, *LastInst, *KillInst;
+ /// The index, from the start of the basic block, that each marker
+ /// appears. These are stored so we can do quick interval tests.
+ unsigned StartInstIdx, LastInstIdx, KillInstIdx;
+ /// All instructions in the chain.
+ std::set<MachineInstr*> Insts;
+ /// True if KillInst cannot be modified. If this is true,
+ /// we cannot change LastInst's outgoing register.
+ /// This will be true for tied values and regmasks.
+ bool KillIsImmutable;
+ /// The "color" of LastInst. This will be the preferred chain color,
+ /// as changing intermediate nodes is easy but changing the last
+ /// instruction can be more tricky.
+ Color LastColor;
+
+ Chain(MachineInstr *MI, unsigned Idx, Color C)
+ : StartInst(MI), LastInst(MI), KillInst(nullptr),
+ StartInstIdx(Idx), LastInstIdx(Idx), KillInstIdx(0),
+ LastColor(C) {
+ Insts.insert(MI);
+ }
+
+ /// Add a new instruction into the chain. The instruction's dest operand
+ /// has the given color.
+ void add(MachineInstr *MI, unsigned Idx, Color C) {
+ LastInst = MI;
+ LastInstIdx = Idx;
+ LastColor = C;
+ assert((KillInstIdx == 0 || LastInstIdx < KillInstIdx) &&
+ "Chain: broken invariant. A Chain can only be killed after its last "
+ "def");
+
+ Insts.insert(MI);
+ }
+
+ /// Return true if MI is a member of the chain.
+ bool contains(MachineInstr *MI) { return Insts.count(MI) > 0; }
+
+ /// Return the number of instructions in the chain.
+ unsigned size() const {
+ return Insts.size();
+ }
+
+ /// Inform the chain that its last active register (the dest register of
+ /// LastInst) is killed by MI with no intervening uses or defs.
+ void setKill(MachineInstr *MI, unsigned Idx, bool Immutable) {
+ KillInst = MI;
+ KillInstIdx = Idx;
+ KillIsImmutable = Immutable;
+ assert((KillInstIdx == 0 || LastInstIdx < KillInstIdx) &&
+ "Chain: broken invariant. A Chain can only be killed after its last "
+ "def");
+ }
+
+ /// Return the first instruction in the chain.
+ MachineInstr *getStart() const { return StartInst; }
+ /// Return the last instruction in the chain.
+ MachineInstr *getLast() const { return LastInst; }
+ /// Return the "kill" instruction (as set with setKill()) or NULL.
+ MachineInstr *getKill() const { return KillInst; }
+ /// Return an instruction that can be used as an iterator for the end
+ /// of the chain. This is the maximum of KillInst (if set) and LastInst.
+ MachineBasicBlock::iterator getEnd() const {
+ return ++MachineBasicBlock::iterator(KillInst ? KillInst : LastInst);
+ }
+
+ /// Can the Kill instruction (assuming one exists) be modified?
+ bool isKillImmutable() const { return KillIsImmutable; }
+
+ /// Return the preferred color of this chain.
+ Color getPreferredColor() {
+ if (OverrideBalance != 0)
+ return OverrideBalance == 1 ? Color::Even : Color::Odd;
+ return LastColor;
+ }
+
+ /// Return true if this chain (StartInst..KillInst) overlaps with Other.
+ bool rangeOverlapsWith(const Chain &Other) const {
+ unsigned End = KillInst ? KillInstIdx : LastInstIdx;
+ unsigned OtherEnd = Other.KillInst ?
+ Other.KillInstIdx : Other.LastInstIdx;
+
+ return StartInstIdx <= OtherEnd && Other.StartInstIdx <= End;
+ }
+
+ /// Return true if this chain starts before Other.
+ bool startsBefore(Chain *Other) {
+ return StartInstIdx < Other->StartInstIdx;
+ }
+
+ /// Return true if the group will require a fixup MOV at the end.
+ bool requiresFixup() const {
+ return (getKill() && isKillImmutable()) || !getKill();
+ }
+
+ /// Return a simple string representation of the chain.
+ std::string str() const {
+ std::string S;
+ raw_string_ostream OS(S);
+
+ OS << "{";
+ StartInst->print(OS, NULL, true);
+ OS << " -> ";
+ LastInst->print(OS, NULL, true);
+ if (KillInst) {
+ OS << " (kill @ ";
+ KillInst->print(OS, NULL, true);
+ OS << ")";
+ }
+ OS << "}";
+
+ return OS.str();
+ }
+
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+
+bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ DEBUG(dbgs() << "***** AArch64A57FPLoadBalancing *****\n");
+
+ const TargetMachine &TM = F.getTarget();
+ MRI = &F.getRegInfo();
+ TRI = F.getRegInfo().getTargetRegisterInfo();
+ TII = TM.getSubtarget<AArch64Subtarget>().getInstrInfo();
+ RCI.runOnMachineFunction(F);
+
+ for (auto &MBB : F) {
+ Changed |= runOnBasicBlock(MBB);
+ }
+
+ return Changed;
+}
+
+bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ DEBUG(dbgs() << "Running on MBB: " << MBB << " - scanning instructions...\n");
+
+ // First, scan the basic block producing a set of chains.
+
+ // The currently "active" chains - chains that can be added to and haven't
+ // been killed yet. This is keyed by register - all chains can only have one
+ // "link" register between each inst in the chain.
+ std::map<unsigned, Chain*> ActiveChains;
+ std::set<std::unique_ptr<Chain>> AllChains;
+ unsigned Idx = 0;
+ for (auto &MI : MBB)
+ scanInstruction(&MI, Idx++, ActiveChains, AllChains);
+
+ DEBUG(dbgs() << "Scan complete, "<< AllChains.size() << " chains created.\n");
+
+ // Group the chains into disjoint sets based on their liveness range. This is
+ // a poor-man's version of graph coloring. Ideally we'd create an interference
+ // graph and perform full-on graph coloring on that, but;
+ // (a) That's rather heavyweight for only two colors.
+ // (b) We expect multiple disjoint interference regions - in practice the live
+ // range of chains is quite small and they are clustered between loads
+ // and stores.
+ EquivalenceClasses<Chain*> EC;
+ for (auto &I : AllChains)
+ EC.insert(I.get());
+
+ for (auto &I : AllChains)
+ for (auto &J : AllChains)
+ if (I != J && I->rangeOverlapsWith(*J))
+ EC.unionSets(I.get(), J.get());
+ DEBUG(dbgs() << "Created " << EC.getNumClasses() << " disjoint sets.\n");
+
+ // Now we assume that every member of an equivalence class interferes
+ // with every other member of that class, and with no members of other classes.
+
+ // Convert the EquivalenceClasses to a simpler set of sets.
+ std::vector<std::vector<Chain*> > V;
+ for (auto I = EC.begin(), E = EC.end(); I != E; ++I) {
+ std::vector<Chain*> Cs(EC.member_begin(I), EC.member_end());
+ if (Cs.empty()) continue;
+ V.push_back(std::move(Cs));
+ }
+
+ // Now we have a set of sets, order them by start address so
+ // we can iterate over them sequentially.
+ std::sort(V.begin(), V.end(),
+ [](const std::vector<Chain*> &A,
+ const std::vector<Chain*> &B) {
+ return A.front()->startsBefore(B.front());
+ });
+
+ // As we only have two colors, we can track the global (BB-level) balance of
+ // odds versus evens. We aim to keep this near zero to keep both execution
+ // units fed.
+ // Positive means we're even-heavy, negative we're odd-heavy.
+ //
+ // FIXME: If chains have interdependencies, for example:
+ // mul r0, r1, r2
+ // mul r3, r0, r1
+ // We do not model this and may color each one differently, assuming we'll
+ // get ILP when we obviously can't. This hasn't been seen to be a problem
+ // in practice so far, so we simplify the algorithm by ignoring it.
+ int Parity = 0;
+
+ for (auto &I : V)
+ Changed |= colorChainSet(std::move(I), MBB, Parity);
+
+ return Changed;
+}
+
+Chain *AArch64A57FPLoadBalancing::getAndEraseNext(Color PreferredColor,
+ std::vector<Chain*> &L) {
+ if (L.empty())
+ return nullptr;
+
+ // We try and get the best candidate from L to color next, given that our
+ // preferred color is "PreferredColor". L is ordered from larger to smaller
+ // chains. It is beneficial to color the large chains before the small chains,
+ // but if we can't find a chain of the maximum length with the preferred color,
+ // we fuzz the size and look for slightly smaller chains before giving up and
+ // returning a chain that must be recolored.
+
+ // FIXME: Does this need to be configurable?
+ const unsigned SizeFuzz = 1;
+ unsigned MinSize = L.front()->size() - SizeFuzz;
+ for (auto I = L.begin(), E = L.end(); I != E; ++I) {
+ if ((*I)->size() <= MinSize) {
+ // We've gone past the size limit. Return the previous item.
+ Chain *Ch = *--I;
+ L.erase(I);
+ return Ch;
+ }
+
+ if ((*I)->getPreferredColor() == PreferredColor) {
+ Chain *Ch = *I;
+ L.erase(I);
+ return Ch;
+ }
+ }
+
+ // Bailout case - just return the first item.
+ Chain *Ch = L.front();
+ L.erase(L.begin());
+ return Ch;
+}
+
+bool AArch64A57FPLoadBalancing::colorChainSet(std::vector<Chain*> GV,
+ MachineBasicBlock &MBB,
+ int &Parity) {
+ bool Changed = false;
+ DEBUG(dbgs() << "colorChainSet(): #sets=" << GV.size() << "\n");
+
+ // Sort by descending size order so that we allocate the most important
+ // sets first.
+ // Tie-break equivalent sizes by sorting chains requiring fixups before
+ // those without fixups. The logic here is that we should look at the
+ // chains that we cannot change before we look at those we can,
+ // so the parity counter is updated and we know what color we should
+ // change them to!
+ std::sort(GV.begin(), GV.end(), [](const Chain *G1, const Chain *G2) {
+ if (G1->size() != G2->size())
+ return G1->size() > G2->size();
+ return G1->requiresFixup() > G2->requiresFixup();
+ });
+
+ Color PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
+ while (Chain *G = getAndEraseNext(PreferredColor, GV)) {
+ // Start off by assuming we'll color to our own preferred color.
+ Color C = PreferredColor;
+ if (Parity == 0)
+ // But if we really don't care, use the chain's preferred color.
+ C = G->getPreferredColor();
+
+ DEBUG(dbgs() << " - Parity=" << Parity << ", Color="
+ << ColorNames[(int)C] << "\n");
+
+ // If we'll need a fixup FMOV, don't bother. Testing has shown that this
+ // happens infrequently and when it does it has at least a 50% chance of
+ // slowing code down instead of speeding it up.
+ if (G->requiresFixup() && C != G->getPreferredColor()) {
+ C = G->getPreferredColor();
+ DEBUG(dbgs() << " - " << G->str() << " - not worthwhile changing; "
+ "color remains " << ColorNames[(int)C] << "\n");
+ }
+
+ Changed |= colorChain(G, C, MBB);
+
+ Parity += (C == Color::Even) ? G->size() : -G->size();
+ PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
+ }
+
+ return Changed;
+}
+
+int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C,
+ MachineBasicBlock &MBB) {
+ RegScavenger RS;
+ RS.enterBasicBlock(&MBB);
+ RS.forward(MachineBasicBlock::iterator(G->getStart()));
+
+ // Can we find an appropriate register that is available throughout the life
+ // of the chain?
+ unsigned RegClassID = G->getStart()->getDesc().OpInfo[0].RegClass;
+ BitVector AvailableRegs = RS.getRegsAvailable(TRI->getRegClass(RegClassID));
+ for (MachineBasicBlock::iterator I = G->getStart(), E = G->getEnd();
+ I != E; ++I) {
+ RS.forward(I);
+ AvailableRegs &= RS.getRegsAvailable(TRI->getRegClass(RegClassID));
+
+ // Remove any registers clobbered by a regmask or any def register that is
+ // immediately dead.
+ for (auto J : I->operands()) {
+ if (J.isRegMask())
+ AvailableRegs.clearBitsNotInMask(J.getRegMask());
+
+ if (J.isReg() && J.isDef() && AvailableRegs[J.getReg()]) {
+ assert(J.isDead() && "Non-dead def should have been removed by now!");
+ AvailableRegs.reset(J.getReg());
+ }
+ }
+ }
+
+ // Make sure we allocate in-order, to get the cheapest registers first.
+ auto Ord = RCI.getOrder(TRI->getRegClass(RegClassID));
+ for (auto Reg : Ord) {
+ if (!AvailableRegs[Reg])
+ continue;
+ if ((C == Color::Even && (Reg % 2) == 0) ||
+ (C == Color::Odd && (Reg % 2) == 1))
+ return Reg;
+ }
+
+ return -1;
+}
+
+bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
+ MachineBasicBlock &MBB) {
+ bool Changed = false;
+ DEBUG(dbgs() << " - colorChain(" << G->str() << ", "
+ << ColorNames[(int)C] << ")\n");
+
+ // Try and obtain a free register of the right class. Without a register
+ // to play with we cannot continue.
+ int Reg = scavengeRegister(G, C, MBB);
+ if (Reg == -1) {
+ DEBUG(dbgs() << "Scavenging (thus coloring) failed!\n");
+ return false;
+ }
+ DEBUG(dbgs() << " - Scavenged register: " << TRI->getName(Reg) << "\n");
+
+ std::map<unsigned, unsigned> Substs;
+ for (MachineBasicBlock::iterator I = G->getStart(), E = G->getEnd();
+ I != E; ++I) {
+ if (!G->contains(I) &&
+ (&*I != G->getKill() || G->isKillImmutable()))
+ continue;
+
+ // I is a member of G, or I is a mutable instruction that kills G.
+
+ std::vector<unsigned> ToErase;
+ for (auto &U : I->operands()) {
+ if (U.isReg() && U.isUse() && Substs.find(U.getReg()) != Substs.end()) {
+ unsigned OrigReg = U.getReg();
+ U.setReg(Substs[OrigReg]);
+ if (U.isKill())
+ // Don't erase straight away, because there may be other operands
+ // that also reference this substitution!
+ ToErase.push_back(OrigReg);
+ } else if (U.isRegMask()) {
+ for (auto J : Substs) {
+ if (U.clobbersPhysReg(J.first))
+ ToErase.push_back(J.first);
+ }
+ }
+ }
+ // Now it's safe to remove the substs identified earlier.
+ for (auto J : ToErase)
+ Substs.erase(J);
+
+ // Only change the def if this isn't the last instruction.
+ if (&*I != G->getKill()) {
+ MachineOperand &MO = I->getOperand(0);
+
+ bool Change = TransformAll || getColor(MO.getReg()) != C;
+ if (G->requiresFixup() && &*I == G->getLast())
+ Change = false;
+
+ if (Change) {
+ Substs[MO.getReg()] = Reg;
+ MO.setReg(Reg);
+ MRI->setPhysRegUsed(Reg);
+
+ Changed = true;
+ }
+ }
+ }
+ assert(Substs.size() == 0 && "No substitutions should be left active!");
+
+ if (G->getKill()) {
+ DEBUG(dbgs() << " - Kill instruction seen.\n");
+ } else {
+ // We didn't have a kill instruction, but we didn't seem to need to change
+ // the destination register anyway.
+ DEBUG(dbgs() << " - Destination register not changed.\n");
+ }
+ return Changed;
+}
+
+void AArch64A57FPLoadBalancing::
+scanInstruction(MachineInstr *MI, unsigned Idx,
+ std::map<unsigned, Chain*> &ActiveChains,
+ std::set<std::unique_ptr<Chain>> &AllChains) {
+ // Inspect "MI", updating ActiveChains and AllChains.
+
+ if (isMul(MI)) {
+
+ for (auto &I : MI->uses())
+ maybeKillChain(I, Idx, ActiveChains);
+ for (auto &I : MI->defs())
+ maybeKillChain(I, Idx, ActiveChains);
+
+ // Create a new chain. Multiplies don't require forwarding so can go on any
+ // unit.
+ unsigned DestReg = MI->getOperand(0).getReg();
+
+ DEBUG(dbgs() << "New chain started for register "
+ << TRI->getName(DestReg) << " at " << *MI);
+
+ auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
+ ActiveChains[DestReg] = G.get();
+ AllChains.insert(std::move(G));
+
+ } else if (isMla(MI)) {
+
+ // It is beneficial to keep MLAs on the same functional unit as their
+ // accumulator operand.
+ unsigned DestReg = MI->getOperand(0).getReg();
+ unsigned AccumReg = MI->getOperand(3).getReg();
+
+ maybeKillChain(MI->getOperand(1), Idx, ActiveChains);
+ maybeKillChain(MI->getOperand(2), Idx, ActiveChains);
+ if (DestReg != AccumReg)
+ maybeKillChain(MI->getOperand(0), Idx, ActiveChains);
+
+ if (ActiveChains.find(AccumReg) != ActiveChains.end()) {
+ DEBUG(dbgs() << "Chain found for accumulator register "
+ << TRI->getName(AccumReg) << " in MI " << *MI);
+
+ // For simplicity we only chain together sequences of MULs/MLAs where the
+ // accumulator register is killed on each instruction. This means we don't
+ // need to track other uses of the registers we want to rewrite.
+ //
+ // FIXME: We could extend to handle the non-kill cases for more coverage.
+ if (MI->getOperand(3).isKill()) {
+ // Add to chain.
+ DEBUG(dbgs() << "Instruction was successfully added to chain.\n");
+ ActiveChains[AccumReg]->add(MI, Idx, getColor(DestReg));
+ // Handle cases where the destination is not the same as the accumulator.
+ if (DestReg != AccumReg) {
+ ActiveChains[DestReg] = ActiveChains[AccumReg];
+ ActiveChains.erase(AccumReg);
+ }
+ return;
+ }
+
+ DEBUG(dbgs() << "Cannot add to chain because accumulator operand wasn't "
+ << "marked <kill>!\n");
+ maybeKillChain(MI->getOperand(3), Idx, ActiveChains);
+ }
+
+ DEBUG(dbgs() << "Creating new chain for dest register "
+ << TRI->getName(DestReg) << "\n");
+ auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
+ ActiveChains[DestReg] = G.get();
+ AllChains.insert(std::move(G));
+
+ } else {
+
+ // Non-MUL or MLA instruction. Invalidate any chain in the uses or defs
+ // lists.
+ for (auto &I : MI->uses())
+ maybeKillChain(I, Idx, ActiveChains);
+ for (auto &I : MI->defs())
+ maybeKillChain(I, Idx, ActiveChains);
+
+ }
+}
+
+void AArch64A57FPLoadBalancing::
+maybeKillChain(MachineOperand &MO, unsigned Idx,
+ std::map<unsigned, Chain*> &ActiveChains) {
+ // Given an operand and the set of active chains (keyed by register),
+ // determine if a chain should be ended and remove from ActiveChains.
+ MachineInstr *MI = MO.getParent();
+
+ if (MO.isReg()) {
+
+ // If this is a KILL of a current chain, record it.
+ if (MO.isKill() && ActiveChains.find(MO.getReg()) != ActiveChains.end()) {
+ DEBUG(dbgs() << "Kill seen for chain " << TRI->getName(MO.getReg())
+ << "\n");
+ ActiveChains[MO.getReg()]->setKill(MI, Idx, /*Immutable=*/MO.isTied());
+ }
+ ActiveChains.erase(MO.getReg());
+
+ } else if (MO.isRegMask()) {
+
+ for (auto I = ActiveChains.begin(), E = ActiveChains.end();
+ I != E;) {
+ if (MO.clobbersPhysReg(I->first)) {
+ DEBUG(dbgs() << "Kill (regmask) seen for chain "
+ << TRI->getName(I->first) << "\n");
+ I->second->setKill(MI, Idx, /*Immutable=*/true);
+ ActiveChains.erase(I++);
+ } else
+ ++I;
+ }
+
+ }
+}
+
+Color AArch64A57FPLoadBalancing::getColor(unsigned Reg) {
+ if ((TRI->getEncodingValue(Reg) % 2) == 0)
+ return Color::Even;
+ else
+ return Color::Odd;
+}
+
+// Factory function used by AArch64TargetMachine to add the pass to the passmanager.
+FunctionPass *llvm::createAArch64A57FPLoadBalancing() {
+ return new AArch64A57FPLoadBalancing();
+}
diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
index ab2c4b73e67c..287989ffaf09 100644
--- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
+++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
@@ -19,7 +19,7 @@
// a = add nsw i64 f, 3
// e = getelementptr ..., i64 a
//
-// This is legal to do so if the computations are markers with either nsw or nuw
+// This is legal to do if the computations are marked with either nsw or nuw
// markers.
// Moreover, the current heuristic is simple: it does not create new sext
// operations, i.e., it gives up when a sext would have forked (e.g., if
@@ -223,7 +223,7 @@ AArch64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const {
}
// Input:
-// - SExtInsts contains all the sext instructions that are use direclty in
+// - SExtInsts contains all the sext instructions that are used directly in
// GetElementPtrInst, i.e., access to memory.
// Algorithm:
// - For each sext operation in SExtInsts:
@@ -353,7 +353,7 @@ AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
// If the use is already of the right type, connect its uses to its argument
// and delete it.
- // This can happen for an Instruction which all uses are sign extended.
+ // This can happen for an Instruction all uses of which are sign extended.
if (!ToRemove.count(SExt) &&
SExt->getType() == SExt->getOperand(0)->getType()) {
DEBUG(dbgs() << "Sign extension is useless, attach its use to "
diff --git a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index 734fb215e6ee..5afe0f4439e7 100644
--- a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
+++ b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -36,9 +36,10 @@
#include "AArch64.h"
#include "AArch64InstrInfo.h"
#include "AArch64RegisterInfo.h"
+#include "AArch64Subtarget.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -166,6 +167,12 @@ static int getTransformOpcode(unsigned Opc) {
return AArch64::ADDv1i64;
case AArch64::SUBXrr:
return AArch64::SUBv1i64;
+ case AArch64::ANDXrr:
+ return AArch64::ANDv8i8;
+ case AArch64::EORXrr:
+ return AArch64::EORv8i8;
+ case AArch64::ORRXrr:
+ return AArch64::ORRv8i8;
}
// No AdvSIMD equivalent, so just return the original opcode.
return Opc;
@@ -371,7 +378,8 @@ bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
const TargetMachine &TM = mf.getTarget();
MRI = &mf.getRegInfo();
- TII = static_cast<const AArch64InstrInfo *>(TM.getInstrInfo());
+ TII = static_cast<const AArch64InstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
// Just check things on a one-block-at-a-time basis.
for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I)
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index cd94e244dc38..08ee687d84a2 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
-#include "AArch64MachineFunctionInfo.h"
#include "AArch64MCInstLower.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "InstPrinter/AArch64InstPrinter.h"
@@ -23,8 +23,8 @@
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
@@ -54,7 +54,7 @@ public:
AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: AsmPrinter(TM, Streamer),
Subtarget(&TM.getSubtarget<AArch64Subtarget>()),
- MCInstLowering(OutContext, *Mang, *this), SM(*this), AArch64FI(nullptr),
+ MCInstLowering(OutContext, *this), SM(*this), AArch64FI(nullptr),
LOHLabelCounter(0) {}
const char *getPassName() const override {
@@ -145,7 +145,7 @@ void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
if (!Stubs.empty()) {
OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
OutStreamer.EmitLabel(Stubs[i].first);
@@ -252,8 +252,8 @@ bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
const TargetRegisterClass *RC,
bool isVector, raw_ostream &O) {
assert(MO.isReg() && "Should only get here with a register!");
- const AArch64RegisterInfo *RI =
- static_cast<const AArch64RegisterInfo *>(TM.getRegisterInfo());
+ const AArch64RegisterInfo *RI = static_cast<const AArch64RegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
unsigned Reg = MO.getReg();
unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
assert(RI->regsOverlap(RegToPrint, Reg));
@@ -381,8 +381,23 @@ void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
unsigned NumNOPBytes = MI.getOperand(1).getImm();
SM.recordStackMap(MI);
- // Emit padding.
assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
+
+ // Scan ahead to trim the shadow.
+ const MachineBasicBlock &MBB = *MI.getParent();
+ MachineBasicBlock::const_iterator MII(MI);
+ ++MII;
+ while (NumNOPBytes > 0) {
+ if (MII == MBB.end() || MII->isCall() ||
+ MII->getOpcode() == AArch64::DBG_VALUE ||
+ MII->getOpcode() == TargetOpcode::PATCHPOINT ||
+ MII->getOpcode() == TargetOpcode::STACKMAP)
+ break;
+ ++MII;
+ NumNOPBytes -= 4;
+ }
+
+ // Emit nops.
for (unsigned i = 0; i < NumNOPBytes; i += 4)
EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
}
@@ -518,7 +533,5 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
extern "C" void LLVMInitializeAArch64AsmPrinter() {
RegisterAsmPrinter<AArch64AsmPrinter> X(TheAArch64leTarget);
RegisterAsmPrinter<AArch64AsmPrinter> Y(TheAArch64beTarget);
-
- RegisterAsmPrinter<AArch64AsmPrinter> Z(TheARM64leTarget);
- RegisterAsmPrinter<AArch64AsmPrinter> W(TheARM64beTarget);
+ RegisterAsmPrinter<AArch64AsmPrinter> Z(TheARM64Target);
}
diff --git a/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
index 484e7e8ffce4..e2b636773b0a 100644
--- a/lib/Target/AArch64/AArch64BranchRelaxation.cpp
+++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
@@ -12,15 +12,16 @@
#include "AArch64.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-branch-relax"
@@ -136,7 +137,7 @@ static bool BBHasFallthrough(MachineBasicBlock *MBB) {
if (NextBB == MBB->getParent()->end())
return false;
- for (MachineBasicBlock *S : MBB->successors())
+ for (MachineBasicBlock *S : MBB->successors())
if (S == NextBB)
return true;
@@ -475,7 +476,9 @@ bool AArch64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "***** AArch64BranchRelaxation *****\n");
- TII = (const AArch64InstrInfo *)MF->getTarget().getInstrInfo();
+ TII = (const AArch64InstrInfo *)MF->getTarget()
+ .getSubtargetImpl()
+ ->getInstrInfo();
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
diff --git a/lib/Target/AArch64/AArch64CallingConvention.h b/lib/Target/AArch64/AArch64CallingConvention.h
new file mode 100644
index 000000000000..baf80bc5483d
--- /dev/null
+++ b/lib/Target/AArch64/AArch64CallingConvention.h
@@ -0,0 +1,141 @@
+//=== AArch64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the AArch64 Calling Convention
+// that aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64CALLINGCONVENTION_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64CALLINGCONVENTION_H
+
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace {
+using namespace llvm;
+
+static const uint16_t XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
+ AArch64::X3, AArch64::X4, AArch64::X5,
+ AArch64::X6, AArch64::X7};
+static const uint16_t HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
+ AArch64::H3, AArch64::H4, AArch64::H5,
+ AArch64::H6, AArch64::H7};
+static const uint16_t SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
+ AArch64::S3, AArch64::S4, AArch64::S5,
+ AArch64::S6, AArch64::S7};
+static const uint16_t DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
+ AArch64::D3, AArch64::D4, AArch64::D5,
+ AArch64::D6, AArch64::D7};
+static const uint16_t QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
+ AArch64::Q3, AArch64::Q4, AArch64::Q5,
+ AArch64::Q6, AArch64::Q7};
+
+static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
+ MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
+ CCState &State, unsigned SlotAlign) {
+ unsigned Size = LocVT.getSizeInBits() / 8;
+ unsigned StackAlign = State.getMachineFunction()
+ .getSubtarget()
+ .getDataLayout()
+ ->getStackAlignment();
+ unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign);
+
+ for (auto &It : PendingMembers) {
+ It.convertToMem(State.AllocateStack(Size, std::max(Align, SlotAlign)));
+ State.addLoc(It);
+ SlotAlign = 1;
+ }
+
+ // All pending members have now been allocated
+ PendingMembers.clear();
+ return true;
+}
+
+/// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An
+/// [N x Ty] type must still be contiguous in memory though.
+static bool CC_AArch64_Custom_Stack_Block(
+ unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+ // Add the argument to the list to be allocated once we know the size of the
+ // block.
+ PendingMembers.push_back(
+ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+ if (!ArgFlags.isInConsecutiveRegsLast())
+ return true;
+
+ return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, 8);
+}
+
+/// Given an [N x Ty] block, it should be passed in a consecutive sequence of
+/// registers. If no such sequence is available, mark the rest of the registers
+/// of that type as used and place the argument on the stack.
+static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ // Try to allocate a contiguous block of registers, each of the correct
+ // size to hold one member.
+ ArrayRef<uint16_t> RegList;
+ if (LocVT.SimpleTy == MVT::i64)
+ RegList = XRegList;
+ else if (LocVT.SimpleTy == MVT::f16)
+ RegList = HRegList;
+ else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector())
+ RegList = SRegList;
+ else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector())
+ RegList = DRegList;
+ else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector())
+ RegList = QRegList;
+ else {
+ // Not an array we want to split up after all.
+ return false;
+ }
+
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+ // Add the argument to the list to be allocated once we know the size of the
+ // block.
+ PendingMembers.push_back(
+ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+ if (!ArgFlags.isInConsecutiveRegsLast())
+ return true;
+
+ unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
+ if (RegResult) {
+ for (auto &It : PendingMembers) {
+ It.convertToReg(RegResult);
+ State.addLoc(It);
+ ++RegResult;
+ }
+ PendingMembers.clear();
+ return true;
+ }
+
+ // Mark all regs in the class as unavailable
+ for (auto Reg : RegList)
+ State.AllocateReg(Reg);
+
+ const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
+ State.getMachineFunction().getSubtarget());
+ unsigned SlotAlign = Subtarget.isTargetDarwin() ? 1 : 8;
+
+ return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
+}
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td
index 1fe5138b529d..1a8040275ca8 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/lib/Target/AArch64/AArch64CallingConvention.td
@@ -16,7 +16,7 @@ class CCIfAlign<string Align, CCAction A> :
CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
/// CCIfBigEndian - Match only if we're in big endian mode.
class CCIfBigEndian<CCAction A> :
- CCIf<"State.getTarget().getDataLayout()->isBigEndian()", A>;
+ CCIf<"State.getMachineFunction().getSubtarget().getDataLayout()->isBigEndian()", A>;
//===----------------------------------------------------------------------===//
// ARM AAPCS64 Calling Convention
@@ -40,6 +40,8 @@ def CC_AArch64_AAPCS : CallingConv<[
// slot is 64-bit.
CCIfByVal<CCPassByVal<8, 8>>,
+ CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
+
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
@@ -60,18 +62,18 @@ def CC_AArch64_AAPCS : CallingConv<[
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32],
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
+ CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
// If more than will fit in registers, pass them on the stack instead.
CCIfType<[i1, i8, i16, f16], CCAssignToStack<8, 8>>,
CCIfType<[i32, f32], CCAssignToStack<8, 8>>,
- CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8],
+ CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
CCAssignToStack<8, 8>>,
- CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
+ CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
CCAssignToStack<16, 16>>
]>;
@@ -96,10 +98,10 @@ def RetCC_AArch64_AAPCS : CallingConv<[
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32],
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
+ CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
]>;
@@ -119,6 +121,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[
// slot is 64-bit.
CCIfByVal<CCPassByVal<8, 8>>,
+ CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
+
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
@@ -139,25 +143,28 @@ def CC_AArch64_DarwinPCS : CallingConv<[
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32],
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
// If more than will fit in registers, pass them on the stack instead.
CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>,
CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
- CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8],
+ CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
CCAssignToStack<8, 8>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>>
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCAssignToStack<16, 16>>
]>;
def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
+ CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Stack_Block">>,
+
// Handle all scalar types as either i64 or f64.
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
CCIfType<[f16, f32], CCPromoteToType<f64>>,
@@ -165,8 +172,10 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
// Everything is on the stack.
// i128 is split to two i64s, and its stack alignment is 16 bytes.
CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
- CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32], CCAssignToStack<8, 8>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>>
+ CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
+ CCAssignToStack<8, 8>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCAssignToStack<16, 16>>
]>;
// The WebKit_JS calling convention only passes the first argument (the callee)
diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index 4d23dc59d7ac..aab8e384b8d0 100644
--- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -94,7 +94,7 @@ struct LDTLSCleanup : public MachineFunctionPass {
MachineFunction *MF = I->getParent()->getParent();
const AArch64TargetMachine *TM =
static_cast<const AArch64TargetMachine *>(&MF->getTarget());
- const AArch64InstrInfo *TII = TM->getInstrInfo();
+ const AArch64InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
// Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
// code sequence assumes the address will be.
@@ -114,7 +114,7 @@ struct LDTLSCleanup : public MachineFunctionPass {
MachineFunction *MF = I->getParent()->getParent();
const AArch64TargetMachine *TM =
static_cast<const AArch64TargetMachine *>(&MF->getTarget());
- const AArch64InstrInfo *TII = TM->getInstrInfo();
+ const AArch64InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
// Create a virtual register for the TLS base address.
MachineRegisterInfo &RegInfo = MF->getRegInfo();
diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp
index 6b1f09678e9a..87b545b186b9 100644
--- a/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -101,25 +101,26 @@
#include "AArch64.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-collect-loh"
@@ -194,12 +195,14 @@ typedef SetVector<const MachineInstr *> SetOfMachineInstr;
/// Map a basic block to a set of instructions per register.
/// This is used to represent the exposed uses of a basic block
/// per register.
-typedef MapVector<const MachineBasicBlock *, SetOfMachineInstr *>
+typedef MapVector<const MachineBasicBlock *,
+ std::unique_ptr<SetOfMachineInstr[]>>
BlockToSetOfInstrsPerColor;
/// Map a basic block to an instruction per register.
/// This is used to represent the live-out definitions of a basic block
/// per register.
-typedef MapVector<const MachineBasicBlock *, const MachineInstr **>
+typedef MapVector<const MachineBasicBlock *,
+ std::unique_ptr<const MachineInstr *[]>>
BlockToInstrPerColor;
/// Map an instruction to a set of instructions. Used to represent the
/// mapping def to reachable uses or use to definitions.
@@ -236,9 +239,9 @@ static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets,
SetOfMachineInstr *result;
BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB);
if (it != sets.end())
- result = it->second;
+ result = it->second.get();
else
- result = sets[&MBB] = new SetOfMachineInstr[nbRegs];
+ result = (sets[&MBB] = make_unique<SetOfMachineInstr[]>(nbRegs)).get();
return result[reg];
}
@@ -283,14 +286,14 @@ static void initReachingDef(MachineFunction &MF,
const MapRegToId &RegToId,
const MachineInstr *DummyOp, bool ADRPMode) {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
unsigned NbReg = RegToId.size();
for (MachineBasicBlock &MBB : MF) {
- const MachineInstr **&BBGen = Gen[&MBB];
- BBGen = new const MachineInstr *[NbReg];
- memset(BBGen, 0, sizeof(const MachineInstr *) * NbReg);
+ auto &BBGen = Gen[&MBB];
+ BBGen = make_unique<const MachineInstr *[]>(NbReg);
+ std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr);
BitVector &BBKillSet = Kill[&MBB];
BBKillSet.resize(NbReg);
@@ -421,22 +424,6 @@ static void reachingDefAlgorithm(MachineFunction &MF,
} while (HasChanged);
}
-/// Release all memory dynamically allocated during the reaching
-/// definition algorithm.
-static void finitReachingDef(BlockToSetOfInstrsPerColor &In,
- BlockToSetOfInstrsPerColor &Out,
- BlockToInstrPerColor &Gen,
- BlockToSetOfInstrsPerColor &ReachableUses) {
- for (auto &IT : Out)
- delete[] IT.second;
- for (auto &IT : In)
- delete[] IT.second;
- for (auto &IT : ReachableUses)
- delete[] IT.second;
- for (auto &IT : Gen)
- delete[] IT.second;
-}
-
/// Reaching definition algorithm.
/// \param MF function on which the algorithm will operate.
/// \param[out] ColorOpToReachedUses will contain the result of the reaching
@@ -473,9 +460,6 @@ static void reachingDef(MachineFunction &MF,
if (!DummyOp)
reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill,
ReachableUses, RegToId.size());
-
- // finit.
- finitReachingDef(In, Out, Gen, ReachableUses);
}
#ifndef NDEBUG
@@ -1043,7 +1027,7 @@ static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId,
bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
MapRegToId RegToId;
@@ -1059,8 +1043,8 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *DummyOp = nullptr;
if (BasicBlockScopeOnly) {
- const AArch64InstrInfo *TII =
- static_cast<const AArch64InstrInfo *>(TM.getInstrInfo());
+ const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
// For local analysis, create a dummy operation to record uses that are not
// local.
DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc());
diff --git a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
new file mode 100644
index 000000000000..0fbd3c685179
--- /dev/null
+++ b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -0,0 +1,422 @@
+//=- AArch64ConditionOptimizer.cpp - Remove useless comparisons for AArch64 -=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to make consecutive compares of values use same operands to
+// allow CSE pass to remove duplicated instructions. For this it analyzes
+// branches and adjusts comparisons with immediate values by converting:
+// * GE -> GT
+// * GT -> GE
+// * LT -> LE
+// * LE -> LT
+// and adjusting immediate values appropriately. It basically corrects two
+// immediate values towards each other to make them equal.
+//
+// Consider the following example in C:
+//
+// if ((a < 5 && ...) || (a > 5 && ...)) {
+// ~~~~~ ~~~~~
+// ^ ^
+// x y
+//
+// Here both "x" and "y" expressions compare "a" with "5". When "x" evaluates
+// to "false", "y" can just check flags set by the first comparison. As a
+// result of the canonicalization employed by
+// SelectionDAGBuilder::visitSwitchCase, DAGCombine, and other target-specific
+// code, assembly ends up in the form that is not CSE friendly:
+//
+// ...
+// cmp w8, #4
+// b.gt .LBB0_3
+// ...
+// .LBB0_3:
+// cmp w8, #6
+// b.lt .LBB0_6
+// ...
+//
+// Same assembly after the pass:
+//
+// ...
+// cmp w8, #5
+// b.ge .LBB0_3
+// ...
+// .LBB0_3:
+// cmp w8, #5 // <-- CSE pass removes this instruction
+// b.le .LBB0_6
+// ...
+//
+// Currently only SUBS and ADDS followed by b.?? are supported.
+//
+// TODO: maybe handle TBNZ/TBZ the same way as CMP when used instead for "a < 0"
+// TODO: handle other conditional instructions (e.g. CSET)
+// TODO: allow second branching to be anything if it doesn't require adjusting
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cstdlib>
+#include <tuple>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-condopt"
+
+STATISTIC(NumConditionsAdjusted, "Number of conditions adjusted");
+
+namespace {
+class AArch64ConditionOptimizer : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ MachineDominatorTree *DomTree;
+
+public:
+ // Stores immediate, compare instruction opcode and branch condition (in this
+ // order) of adjusted comparison.
+ typedef std::tuple<int, int, AArch64CC::CondCode> CmpInfo;
+
+ static char ID;
+ AArch64ConditionOptimizer() : MachineFunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ MachineInstr *findSuitableCompare(MachineBasicBlock *MBB);
+ CmpInfo adjustCmp(MachineInstr *CmpMI, AArch64CC::CondCode Cmp);
+ void modifyCmp(MachineInstr *CmpMI, const CmpInfo &Info);
+ bool adjustTo(MachineInstr *CmpMI, AArch64CC::CondCode Cmp, MachineInstr *To,
+ int ToImm);
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ const char *getPassName() const override {
+ return "AArch64 Condition Optimizer";
+ }
+};
+} // end anonymous namespace
+
+char AArch64ConditionOptimizer::ID = 0;
+
+namespace llvm {
+void initializeAArch64ConditionOptimizerPass(PassRegistry &);
+}
+
+INITIALIZE_PASS_BEGIN(AArch64ConditionOptimizer, "aarch64-condopt",
+ "AArch64 CondOpt Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(AArch64ConditionOptimizer, "aarch64-condopt",
+ "AArch64 CondOpt Pass", false, false)
+
+FunctionPass *llvm::createAArch64ConditionOptimizerPass() {
+ return new AArch64ConditionOptimizer();
+}
+
+void AArch64ConditionOptimizer::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+// Finds compare instruction that corresponds to supported types of branching.
+// Returns the instruction or nullptr on failures or detecting unsupported
+// instructions.
+MachineInstr *AArch64ConditionOptimizer::findSuitableCompare(
+ MachineBasicBlock *MBB) {
+ MachineBasicBlock::iterator I = MBB->getFirstTerminator();
+ if (I == MBB->end())
+ return nullptr;
+
+ if (I->getOpcode() != AArch64::Bcc)
+ return nullptr;
+
+ // Now find the instruction controlling the terminator.
+ for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
+ --I;
+ assert(!I->isTerminator() && "Spurious terminator");
+ switch (I->getOpcode()) {
+ // cmp is an alias for subs with a dead destination register.
+ case AArch64::SUBSWri:
+ case AArch64::SUBSXri:
+ // cmn is an alias for adds with a dead destination register.
+ case AArch64::ADDSWri:
+ case AArch64::ADDSXri:
+ if (I->getOperand(0).isDead())
+ return I;
+
+ DEBUG(dbgs() << "Destination of cmp is not dead, " << *I << '\n');
+ return nullptr;
+
+ // Prevent false positive case like:
+ // cmp w19, #0
+ // cinc w0, w19, gt
+ // ...
+ // fcmp d8, #0.0
+ // b.gt .LBB0_5
+ case AArch64::FCMPDri:
+ case AArch64::FCMPSri:
+ case AArch64::FCMPESri:
+ case AArch64::FCMPEDri:
+
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXrr:
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSXrr:
+ case AArch64::FCMPSrr:
+ case AArch64::FCMPDrr:
+ case AArch64::FCMPESrr:
+ case AArch64::FCMPEDrr:
+ // Skip comparison instructions without immediate operands.
+ return nullptr;
+ }
+ }
+ DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n');
+ return nullptr;
+}
+
+// Changes opcode adds <-> subs considering register operand width.
+static int getComplementOpc(int Opc) {
+ switch (Opc) {
+ case AArch64::ADDSWri: return AArch64::SUBSWri;
+ case AArch64::ADDSXri: return AArch64::SUBSXri;
+ case AArch64::SUBSWri: return AArch64::ADDSWri;
+ case AArch64::SUBSXri: return AArch64::ADDSXri;
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+}
+
+// Changes form of comparison inclusive <-> exclusive.
+static AArch64CC::CondCode getAdjustedCmp(AArch64CC::CondCode Cmp) {
+ switch (Cmp) {
+ case AArch64CC::GT: return AArch64CC::GE;
+ case AArch64CC::GE: return AArch64CC::GT;
+ case AArch64CC::LT: return AArch64CC::LE;
+ case AArch64CC::LE: return AArch64CC::LT;
+ default:
+ llvm_unreachable("Unexpected condition code");
+ }
+}
+
+// Transforms GT -> GE, GE -> GT, LT -> LE, LE -> LT by updating comparison
+// operator and condition code.
+AArch64ConditionOptimizer::CmpInfo AArch64ConditionOptimizer::adjustCmp(
+ MachineInstr *CmpMI, AArch64CC::CondCode Cmp) {
+ int Opc = CmpMI->getOpcode();
+
+ // CMN (compare with negative immediate) is an alias to ADDS (as
+ // "operand - negative" == "operand + positive")
+ bool Negative = (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri);
+
+ int Correction = (Cmp == AArch64CC::GT) ? 1 : -1;
+ // Negate Correction value for comparison with negative immediate (CMN).
+ if (Negative) {
+ Correction = -Correction;
+ }
+
+ const int OldImm = (int)CmpMI->getOperand(2).getImm();
+ const int NewImm = std::abs(OldImm + Correction);
+
+ // Handle +0 -> -1 and -0 -> +1 (CMN with 0 immediate) transitions by
+ // adjusting compare instruction opcode.
+ if (OldImm == 0 && ((Negative && Correction == 1) ||
+ (!Negative && Correction == -1))) {
+ Opc = getComplementOpc(Opc);
+ }
+
+ return CmpInfo(NewImm, Opc, getAdjustedCmp(Cmp));
+}
+
+// Applies changes to comparison instruction suggested by adjustCmp().
+void AArch64ConditionOptimizer::modifyCmp(MachineInstr *CmpMI,
+ const CmpInfo &Info) {
+ int Imm;
+ int Opc;
+ AArch64CC::CondCode Cmp;
+ std::tie(Imm, Opc, Cmp) = Info;
+
+ MachineBasicBlock *const MBB = CmpMI->getParent();
+
+ // Change immediate in comparison instruction (ADDS or SUBS).
+ BuildMI(*MBB, CmpMI, CmpMI->getDebugLoc(), TII->get(Opc))
+ .addOperand(CmpMI->getOperand(0))
+ .addOperand(CmpMI->getOperand(1))
+ .addImm(Imm)
+ .addOperand(CmpMI->getOperand(3));
+ CmpMI->eraseFromParent();
+
+ // The fact that this comparison was picked ensures that it's related to the
+ // first terminator instruction.
+ MachineInstr *BrMI = MBB->getFirstTerminator();
+
+ // Change condition in branch instruction.
+ BuildMI(*MBB, BrMI, BrMI->getDebugLoc(), TII->get(AArch64::Bcc))
+ .addImm(Cmp)
+ .addOperand(BrMI->getOperand(1));
+ BrMI->eraseFromParent();
+
+ MBB->updateTerminator();
+
+ ++NumConditionsAdjusted;
+}
+
+// Parse a condition code returned by AnalyzeBranch, and compute the CondCode
+// corresponding to TBB.
+// Returns true if parsing was successful, otherwise false is returned.
+static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) {
+ // A normal br.cond simply has the condition code.
+ if (Cond[0].getImm() != -1) {
+ assert(Cond.size() == 1 && "Unknown Cond array format");
+ CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
+ return true;
+ }
+ return false;
+}
+
+// Adjusts one cmp instruction to another one if result of adjustment will allow
+// CSE. Returns true if compare instruction was changed, otherwise false is
+// returned.
+bool AArch64ConditionOptimizer::adjustTo(MachineInstr *CmpMI,
+ AArch64CC::CondCode Cmp, MachineInstr *To, int ToImm)
+{
+ CmpInfo Info = adjustCmp(CmpMI, Cmp);
+ if (std::get<0>(Info) == ToImm && std::get<1>(Info) == To->getOpcode()) {
+ modifyCmp(CmpMI, Info);
+ return true;
+ }
+ return false;
+}
+
+bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+ TII = MF.getTarget().getSubtargetImpl()->getInstrInfo();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+
+ bool Changed = false;
+
+ // Visit blocks in dominator tree pre-order. The pre-order enables multiple
+ // cmp-conversions from the same head block.
+ // Note that updateDomTree() modifies the children of the DomTree node
+ // currently being visited. The df_iterator supports that; it doesn't look at
+ // child_begin() / child_end() until after a node has been visited.
+ for (MachineDomTreeNode *I : depth_first(DomTree)) {
+ MachineBasicBlock *HBB = I->getBlock();
+
+ SmallVector<MachineOperand, 4> HeadCond;
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ if (TII->AnalyzeBranch(*HBB, TBB, FBB, HeadCond)) {
+ continue;
+ }
+
+ // Equivalence check is to skip loops.
+ if (!TBB || TBB == HBB) {
+ continue;
+ }
+
+ SmallVector<MachineOperand, 4> TrueCond;
+ MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr;
+ if (TII->AnalyzeBranch(*TBB, TBB_TBB, TBB_FBB, TrueCond)) {
+ continue;
+ }
+
+ MachineInstr *HeadCmpMI = findSuitableCompare(HBB);
+ if (!HeadCmpMI) {
+ continue;
+ }
+
+ MachineInstr *TrueCmpMI = findSuitableCompare(TBB);
+ if (!TrueCmpMI) {
+ continue;
+ }
+
+ AArch64CC::CondCode HeadCmp;
+ if (HeadCond.empty() || !parseCond(HeadCond, HeadCmp)) {
+ continue;
+ }
+
+ AArch64CC::CondCode TrueCmp;
+ if (TrueCond.empty() || !parseCond(TrueCond, TrueCmp)) {
+ continue;
+ }
+
+ const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm();
+ const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm();
+
+ DEBUG(dbgs() << "Head branch:\n");
+ DEBUG(dbgs() << "\tcondition: "
+ << AArch64CC::getCondCodeName(HeadCmp) << '\n');
+ DEBUG(dbgs() << "\timmediate: " << HeadImm << '\n');
+
+ DEBUG(dbgs() << "True branch:\n");
+ DEBUG(dbgs() << "\tcondition: "
+ << AArch64CC::getCondCodeName(TrueCmp) << '\n');
+ DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n');
+
+ if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::LT) ||
+ (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::GT)) &&
+ std::abs(TrueImm - HeadImm) == 2) {
+ // This branch transforms machine instructions that correspond to
+ //
+ // 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...)
+ // 2) (a < {TrueImm} && ...) || (a > {HeadImm} && ...)
+ //
+ // into
+ //
+ // 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...)
+ // 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...)
+
+ CmpInfo HeadCmpInfo = adjustCmp(HeadCmpMI, HeadCmp);
+ CmpInfo TrueCmpInfo = adjustCmp(TrueCmpMI, TrueCmp);
+ if (std::get<0>(HeadCmpInfo) == std::get<0>(TrueCmpInfo) &&
+ std::get<1>(HeadCmpInfo) == std::get<1>(TrueCmpInfo)) {
+ modifyCmp(HeadCmpMI, HeadCmpInfo);
+ modifyCmp(TrueCmpMI, TrueCmpInfo);
+ Changed = true;
+ }
+ } else if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::GT) ||
+ (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::LT)) &&
+ std::abs(TrueImm - HeadImm) == 1) {
+ // This branch transforms machine instructions that correspond to
+ //
+ // 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...)
+ // 2) (a < {TrueImm} && ...) || (a < {HeadImm} && ...)
+ //
+ // into
+ //
+ // 1) (a <= {NewImm} && ...) || (a > {NewImm} && ...)
+ // 2) (a < {NewImm} && ...) || (a >= {NewImm} && ...)
+
+ // GT -> GE transformation increases immediate value, so picking the
+ // smaller one; LT -> LE decreases immediate value so invert the choice.
+ bool adjustHeadCond = (HeadImm < TrueImm);
+ if (HeadCmp == AArch64CC::LT) {
+ adjustHeadCond = !adjustHeadCond;
+ }
+
+ if (adjustHeadCond) {
+ Changed |= adjustTo(HeadCmpMI, HeadCmp, TrueCmpMI, TrueImm);
+ } else {
+ Changed |= adjustTo(TrueCmpMI, TrueCmp, HeadCmpMI, HeadImm);
+ }
+ }
+ // Other transformation cases almost never occur due to generation of < or >
+ // comparisons instead of <= and >=.
+ }
+
+ return Changed;
+}
diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 452cdecf8a0c..54f53dc14b25 100644
--- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -191,8 +191,8 @@ public:
/// runOnMachineFunction - Initialize per-function data structures.
void runOnMachineFunction(MachineFunction &MF) {
this->MF = &MF;
- TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
}
@@ -723,7 +723,7 @@ namespace {
class AArch64ConditionalCompares : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
- const MCSchedModel *SchedModel;
+ MCSchedModel SchedModel;
// Does the proceeded function has Oz attribute.
bool MinSize;
MachineRegisterInfo *MRI;
@@ -845,7 +845,7 @@ bool AArch64ConditionalCompares::shouldConvert() {
// the cost of a misprediction.
//
// Set a limit on the delay we will accept.
- unsigned DelayLimit = SchedModel->MispredictPenalty * 3 / 4;
+ unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
// Instruction depths can be computed for all trace instructions above CmpBB.
unsigned HeadDepth =
@@ -891,8 +891,8 @@ bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
<< "********** Function: " << MF.getName() << '\n');
- TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
SchedModel =
MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel();
MRI = &MF.getRegInfo();
diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index a2d853c85fef..74fc167433f6 100644
--- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -14,11 +14,12 @@
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-dead-defs"
@@ -36,11 +37,11 @@ public:
static char ID; // Pass identification, replacement for typeid.
explicit AArch64DeadRegisterDefinitions() : MachineFunctionPass(ID) {}
- virtual bool runOnMachineFunction(MachineFunction &F) override;
+ bool runOnMachineFunction(MachineFunction &F) override;
const char *getPassName() const override { return "Dead register definitions"; }
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -119,7 +120,7 @@ bool AArch64DeadRegisterDefinitions::processMachineBasicBlock(
// Scan the function for instructions that have a dead definition of a
// register. Replace that register with the zero register when possible.
bool AArch64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
- TRI = MF.getTarget().getRegisterInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
bool Changed = false;
DEBUG(dbgs() << "***** AArch64DeadRegisterDefinitions *****\n");
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 8839085c4a80..c850680d4c6d 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -16,6 +16,7 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Support/MathExtras.h"
@@ -722,7 +723,7 @@ bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
}
bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
+ TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
bool Modified = false;
for (auto &MBB : MF)
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 2164d77b7900..419fbc8be6c9 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -14,9 +14,11 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
-#include "AArch64TargetMachine.h"
+#include "AArch64CallingConvention.h"
#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -39,8 +41,7 @@ using namespace llvm;
namespace {
-class AArch64FastISel : public FastISel {
-
+class AArch64FastISel final : public FastISel {
class Address {
public:
typedef enum {
@@ -50,16 +51,23 @@ class AArch64FastISel : public FastISel {
private:
BaseKind Kind;
+ AArch64_AM::ShiftExtendType ExtType;
union {
unsigned Reg;
int FI;
} Base;
+ unsigned OffsetReg;
+ unsigned Shift;
int64_t Offset;
+ const GlobalValue *GV;
public:
- Address() : Kind(RegBase), Offset(0) { Base.Reg = 0; }
+ Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
+ OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
void setKind(BaseKind K) { Kind = K; }
BaseKind getKind() const { return Kind; }
+ void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
+ AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
bool isRegBase() const { return Kind == RegBase; }
bool isFIBase() const { return Kind == FrameIndexBase; }
void setReg(unsigned Reg) {
@@ -70,6 +78,12 @@ class AArch64FastISel : public FastISel {
assert(isRegBase() && "Invalid base register access!");
return Base.Reg;
}
+ void setOffsetReg(unsigned Reg) {
+ OffsetReg = Reg;
+ }
+ unsigned getOffsetReg() const {
+ return OffsetReg;
+ }
void setFI(unsigned FI) {
assert(isFIBase() && "Invalid base frame index access!");
Base.FI = FI;
@@ -80,8 +94,11 @@ class AArch64FastISel : public FastISel {
}
void setOffset(int64_t O) { Offset = O; }
int64_t getOffset() { return Offset; }
+ void setShift(unsigned S) { Shift = S; }
+ unsigned getShift() { return Shift; }
- bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); }
+ void setGlobalValue(const GlobalValue *G) { GV = G; }
+ const GlobalValue *getGlobalValue() { return GV; }
};
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
@@ -89,74 +106,152 @@ class AArch64FastISel : public FastISel {
const AArch64Subtarget *Subtarget;
LLVMContext *Context;
+ bool fastLowerArguments() override;
+ bool fastLowerCall(CallLoweringInfo &CLI) override;
+ bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
+
private:
// Selection routines.
- bool SelectLoad(const Instruction *I);
- bool SelectStore(const Instruction *I);
- bool SelectBranch(const Instruction *I);
- bool SelectIndirectBr(const Instruction *I);
- bool SelectCmp(const Instruction *I);
- bool SelectSelect(const Instruction *I);
- bool SelectFPExt(const Instruction *I);
- bool SelectFPTrunc(const Instruction *I);
- bool SelectFPToInt(const Instruction *I, bool Signed);
- bool SelectIntToFP(const Instruction *I, bool Signed);
- bool SelectRem(const Instruction *I, unsigned ISDOpcode);
- bool SelectCall(const Instruction *I, const char *IntrMemName);
- bool SelectIntrinsicCall(const IntrinsicInst &I);
- bool SelectRet(const Instruction *I);
- bool SelectTrunc(const Instruction *I);
- bool SelectIntExt(const Instruction *I);
- bool SelectMul(const Instruction *I);
+ bool selectAddSub(const Instruction *I);
+ bool selectLogicalOp(const Instruction *I);
+ bool selectLoad(const Instruction *I);
+ bool selectStore(const Instruction *I);
+ bool selectBranch(const Instruction *I);
+ bool selectIndirectBr(const Instruction *I);
+ bool selectCmp(const Instruction *I);
+ bool selectSelect(const Instruction *I);
+ bool selectFPExt(const Instruction *I);
+ bool selectFPTrunc(const Instruction *I);
+ bool selectFPToInt(const Instruction *I, bool Signed);
+ bool selectIntToFP(const Instruction *I, bool Signed);
+ bool selectRem(const Instruction *I, unsigned ISDOpcode);
+ bool selectRet(const Instruction *I);
+ bool selectTrunc(const Instruction *I);
+ bool selectIntExt(const Instruction *I);
+ bool selectMul(const Instruction *I);
+ bool selectShift(const Instruction *I);
+ bool selectBitCast(const Instruction *I);
+ bool selectFRem(const Instruction *I);
+ bool selectSDiv(const Instruction *I);
+ bool selectGetElementPtr(const Instruction *I);
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
- bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
- bool ComputeAddress(const Value *Obj, Address &Addr);
- bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
- bool UseUnscaled);
- void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
- unsigned Flags, bool UseUnscaled);
- bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
- bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
+ bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
+ bool isValueAvailable(const Value *V) const;
+ bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
+ bool computeCallAddress(const Value *V, Address &Addr);
+ bool simplifyAddress(Address &Addr, MVT VT);
+ void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
+ unsigned Flags, unsigned ScaleFactor,
+ MachineMemOperand *MMO);
+ bool isMemCpySmall(uint64_t Len, unsigned Alignment);
+ bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
unsigned Alignment);
- // Emit functions.
- bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt);
- bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
- bool UseUnscaled = false);
- bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
- bool UseUnscaled = false);
- unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
- unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
+ bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
+ const Value *Cond);
+ bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
+ bool optimizeSelect(const SelectInst *SI);
+ std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
+
+ // Emit helper routines.
+ unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
+ const Value *RHS, bool SetFlags = false,
+ bool WantResult = true, bool IsZExt = false);
+ unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
+ bool SetFlags = false, bool WantResult = true);
+ unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
+ bool WantResult = true);
+ unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
+ AArch64_AM::ShiftExtendType ShiftType,
+ uint64_t ShiftImm, bool SetFlags = false,
+ bool WantResult = true);
+ unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
+ AArch64_AM::ShiftExtendType ExtType,
+ uint64_t ShiftImm, bool SetFlags = false,
+ bool WantResult = true);
- unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
- unsigned AArch64MaterializeGV(const GlobalValue *GV);
+ // Emit functions.
+ bool emitCompareAndBranch(const BranchInst *BI);
+ bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
+ bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
+ bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
+ bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
+ unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
+ MachineMemOperand *MMO = nullptr);
+ bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
+ MachineMemOperand *MMO = nullptr);
+ unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
+ unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
+ unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
+ bool SetFlags = false, bool WantResult = true,
+ bool IsZExt = false);
+ unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
+ unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
+ bool SetFlags = false, bool WantResult = true,
+ bool IsZExt = false);
+ unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
+ unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
+ unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
+ unsigned RHSReg, bool RHSIsKill,
+ AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
+ bool WantResult = true);
+ unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
+ const Value *RHS);
+ unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, uint64_t Imm);
+ unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
+ uint64_t ShiftImm);
+ unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
+ unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill);
+ unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
+ uint64_t Imm, bool IsZExt = true);
+ unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill);
+ unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
+ uint64_t Imm, bool IsZExt = true);
+ unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill);
+ unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
+ uint64_t Imm, bool IsZExt = false);
+
+ unsigned materializeInt(const ConstantInt *CI, MVT VT);
+ unsigned materializeFP(const ConstantFP *CFP, MVT VT);
+ unsigned materializeGV(const GlobalValue *GV);
// Call handling routines.
private:
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
- bool ProcessCallArgs(SmallVectorImpl<Value *> &Args,
- SmallVectorImpl<unsigned> &ArgRegs,
- SmallVectorImpl<MVT> &ArgVTs,
- SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
- SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
+ bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
unsigned &NumBytes);
- bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
- const Instruction *I, CallingConv::ID CC, unsigned &NumBytes);
+ bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
public:
// Backend specific FastISel code.
- unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
- unsigned TargetMaterializeConstant(const Constant *C) override;
+ unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
+ unsigned fastMaterializeConstant(const Constant *C) override;
+ unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
- explicit AArch64FastISel(FunctionLoweringInfo &funcInfo,
- const TargetLibraryInfo *libInfo)
- : FastISel(funcInfo, libInfo) {
+ explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo)
+ : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
Subtarget = &TM.getSubtarget<AArch64Subtarget>();
- Context = &funcInfo.Fn->getContext();
+ Context = &FuncInfo.Fn->getContext();
}
- bool TargetSelectInstruction(const Instruction *I) override;
+ bool fastSelectInstruction(const Instruction *I) override;
#include "AArch64GenFastISel.inc"
};
@@ -165,13 +260,52 @@ public:
#include "AArch64GenCallingConv.inc"
+/// \brief Check if the sign-/zero-extend will be a noop.
+static bool isIntExtFree(const Instruction *I) {
+ assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
+ "Unexpected integer extend instruction.");
+ assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
+ "Unexpected value type.");
+ bool IsZExt = isa<ZExtInst>(I);
+
+ if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
+ if (LI->hasOneUse())
+ return true;
+
+ if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
+ if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
+ return true;
+
+ return false;
+}
+
+/// \brief Determine the implicit scale factor that is applied by a memory
+/// operation for a given value type.
+static unsigned getImplicitScaleFactor(MVT VT) {
+ switch (VT.SimpleTy) {
+ default:
+ return 0; // invalid
+ case MVT::i1: // fall-through
+ case MVT::i8:
+ return 1;
+ case MVT::i16:
+ return 2;
+ case MVT::i32: // fall-through
+ case MVT::f32:
+ return 4;
+ case MVT::i64: // fall-through
+ case MVT::f64:
+ return 8;
+ }
+}
+
CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
if (CC == CallingConv::WebKit_JS)
return CC_AArch64_WebKit_JS;
return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
}
-unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
"Alloca should always return a pointer.");
@@ -183,7 +317,7 @@ unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
- unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
+ unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
ResultReg)
.addFrameIndex(SI->second)
@@ -195,28 +329,59 @@ unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
return 0;
}
-unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
+unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
+ if (VT > MVT::i64)
+ return 0;
+
+ if (!CI->isZero())
+ return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+
+ // Create a copy from the zero register to materialize a "0" value.
+ const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
+ : &AArch64::GPR32RegClass;
+ unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(ZeroReg, getKillRegState(true));
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
+ // Positive zero (+0.0) has to be materialized with a fmov from the zero
+ // register, because the immediate version of fmov cannot encode zero.
+ if (CFP->isNullValue())
+ return fastMaterializeFloatZero(CFP);
+
if (VT != MVT::f32 && VT != MVT::f64)
return 0;
const APFloat Val = CFP->getValueAPF();
- bool is64bit = (VT == MVT::f64);
-
+ bool Is64Bit = (VT == MVT::f64);
// This checks to see if we can use FMOV instructions to materialize
// a constant, otherwise we have to materialize via the constant pool.
if (TLI.isFPImmLegal(Val, VT)) {
- int Imm;
- unsigned Opc;
- if (is64bit) {
- Imm = AArch64_AM::getFP64Imm(Val);
- Opc = AArch64::FMOVDi;
- } else {
- Imm = AArch64_AM::getFP32Imm(Val);
- Opc = AArch64::FMOVSi;
- }
+ int Imm =
+ Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
+ assert((Imm != -1) && "Cannot encode floating-point constant.");
+ unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
+ return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
+ }
+
+ // For the MachO large code model materialize the FP constant in code.
+ if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
+ unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
+ const TargetRegisterClass *RC = Is64Bit ?
+ &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+
+ unsigned TmpReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
+ .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(TmpReg, getKillRegState(true));
+
return ResultReg;
}
@@ -226,20 +391,20 @@ unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
if (Align == 0)
Align = DL.getTypeAllocSize(CFP->getType());
- unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+ unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
- ADRPReg).addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGE);
+ ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
- unsigned Opc = is64bit ? AArch64::LDRDui : AArch64::LDRSui;
+ unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(ADRPReg)
- .addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
return ResultReg;
}
-unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
+unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
// We can't handle thread-local variables quickly yet.
if (GV->isThreadLocal())
return 0;
@@ -262,30 +427,34 @@ unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
// ADRP + LDRX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
ADRPReg)
- .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
+ .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
ResultReg = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
ResultReg)
- .addReg(ADRPReg)
- .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
- AArch64II::MO_NC);
+ .addReg(ADRPReg)
+ .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
+ } else if (OpFlags & AArch64II::MO_CONSTPOOL) {
+ // We can't handle addresses loaded from a constant pool quickly yet.
+ return 0;
} else {
// ADRP + ADDX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
- ADRPReg).addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
+ ADRPReg)
+ .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
ResultReg)
- .addReg(ADRPReg)
- .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
- .addImm(0);
+ .addReg(ADRPReg)
+ .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
+ .addImm(0);
}
return ResultReg;
}
-unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
+unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
EVT CEVT = TLI.getValueType(C->getType(), true);
// Only handle simple types.
@@ -293,17 +462,48 @@ unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
return 0;
MVT VT = CEVT.getSimpleVT();
- // FIXME: Handle ConstantInt.
- if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
- return AArch64MaterializeFP(CFP, VT);
+ if (const auto *CI = dyn_cast<ConstantInt>(C))
+ return materializeInt(CI, VT);
+ else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return materializeFP(CFP, VT);
else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
- return AArch64MaterializeGV(GV);
+ return materializeGV(GV);
return 0;
}
+unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
+ assert(CFP->isNullValue() &&
+ "Floating-point constant is not a positive zero.");
+ MVT VT;
+ if (!isTypeLegal(CFP->getType(), VT))
+ return 0;
+
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return 0;
+
+ bool Is64Bit = (VT == MVT::f64);
+ unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
+ unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
+ return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
+}
+
+/// \brief Check if the multiply is by a power-of-2 constant.
+static bool isMulPowOf2(const Value *I) {
+ if (const auto *MI = dyn_cast<MulOperator>(I)) {
+ if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
+ if (C->getValue().isPowerOf2())
+ return true;
+ if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
+ if (C->getValue().isPowerOf2())
+ return true;
+ }
+ return false;
+}
+
// Computes the address to get to an object.
-bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
+bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
+{
const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
@@ -330,18 +530,18 @@ bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
break;
case Instruction::BitCast: {
// Look through bitcasts.
- return ComputeAddress(U->getOperand(0), Addr);
+ return computeAddress(U->getOperand(0), Addr, Ty);
}
case Instruction::IntToPtr: {
// Look past no-op inttoptrs.
if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
- return ComputeAddress(U->getOperand(0), Addr);
+ return computeAddress(U->getOperand(0), Addr, Ty);
break;
}
case Instruction::PtrToInt: {
// Look past no-op ptrtoints.
if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
- return ComputeAddress(U->getOperand(0), Addr);
+ return computeAddress(U->getOperand(0), Addr, Ty);
break;
}
case Instruction::GetElementPtr: {
@@ -383,7 +583,7 @@ bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
// Try to grab the base operand now.
Addr.setOffset(TmpOffset);
- if (ComputeAddress(U->getOperand(0), Addr))
+ if (computeAddress(U->getOperand(0), Addr, Ty))
return true;
// We failed, restore everything and try the other options.
@@ -403,14 +603,301 @@ bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
}
break;
}
+ case Instruction::Add: {
+ // Adds of constants are common and easy enough.
+ const Value *LHS = U->getOperand(0);
+ const Value *RHS = U->getOperand(1);
+
+ if (isa<ConstantInt>(LHS))
+ std::swap(LHS, RHS);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
+ return computeAddress(LHS, Addr, Ty);
+ }
+
+ Address Backup = Addr;
+ if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
+ return true;
+ Addr = Backup;
+
+ break;
+ }
+ case Instruction::Sub: {
+ // Subs of constants are common and easy enough.
+ const Value *LHS = U->getOperand(0);
+ const Value *RHS = U->getOperand(1);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
+ return computeAddress(LHS, Addr, Ty);
+ }
+ break;
+ }
+ case Instruction::Shl: {
+ if (Addr.getOffsetReg())
+ break;
+
+ const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
+ if (!CI)
+ break;
+
+ unsigned Val = CI->getZExtValue();
+ if (Val < 1 || Val > 3)
+ break;
+
+ uint64_t NumBytes = 0;
+ if (Ty && Ty->isSized()) {
+ uint64_t NumBits = DL.getTypeSizeInBits(Ty);
+ NumBytes = NumBits / 8;
+ if (!isPowerOf2_64(NumBits))
+ NumBytes = 0;
+ }
+
+ if (NumBytes != (1ULL << Val))
+ break;
+
+ Addr.setShift(Val);
+ Addr.setExtendType(AArch64_AM::LSL);
+
+ const Value *Src = U->getOperand(0);
+ if (const auto *I = dyn_cast<Instruction>(Src))
+ if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
+ Src = I;
+
+ // Fold the zext or sext when it won't become a noop.
+ if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
+ if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::UXTW);
+ Src = ZE->getOperand(0);
+ }
+ } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
+ if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::SXTW);
+ Src = SE->getOperand(0);
+ }
+ }
+
+ if (const auto *AI = dyn_cast<BinaryOperator>(Src))
+ if (AI->getOpcode() == Instruction::And) {
+ const Value *LHS = AI->getOperand(0);
+ const Value *RHS = AI->getOperand(1);
+
+ if (const auto *C = dyn_cast<ConstantInt>(LHS))
+ if (C->getValue() == 0xffffffff)
+ std::swap(LHS, RHS);
+
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue() == 0xffffffff) {
+ Addr.setExtendType(AArch64_AM::UXTW);
+ unsigned Reg = getRegForValue(LHS);
+ if (!Reg)
+ return false;
+ bool RegIsKill = hasTrivialKill(LHS);
+ Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
+ AArch64::sub_32);
+ Addr.setOffsetReg(Reg);
+ return true;
+ }
+ }
+
+ unsigned Reg = getRegForValue(Src);
+ if (!Reg)
+ return false;
+ Addr.setOffsetReg(Reg);
+ return true;
+ }
+ case Instruction::Mul: {
+ if (Addr.getOffsetReg())
+ break;
+
+ if (!isMulPowOf2(U))
+ break;
+
+ const Value *LHS = U->getOperand(0);
+ const Value *RHS = U->getOperand(1);
+
+ // Canonicalize power-of-2 value to the RHS.
+ if (const auto *C = dyn_cast<ConstantInt>(LHS))
+ if (C->getValue().isPowerOf2())
+ std::swap(LHS, RHS);
+
+ assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
+ const auto *C = cast<ConstantInt>(RHS);
+ unsigned Val = C->getValue().logBase2();
+ if (Val < 1 || Val > 3)
+ break;
+
+ uint64_t NumBytes = 0;
+ if (Ty && Ty->isSized()) {
+ uint64_t NumBits = DL.getTypeSizeInBits(Ty);
+ NumBytes = NumBits / 8;
+ if (!isPowerOf2_64(NumBits))
+ NumBytes = 0;
+ }
+
+ if (NumBytes != (1ULL << Val))
+ break;
+
+ Addr.setShift(Val);
+ Addr.setExtendType(AArch64_AM::LSL);
+
+ const Value *Src = LHS;
+ if (const auto *I = dyn_cast<Instruction>(Src))
+ if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
+ Src = I;
+
+
+ // Fold the zext or sext when it won't become a noop.
+ if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
+ if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::UXTW);
+ Src = ZE->getOperand(0);
+ }
+ } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
+ if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::SXTW);
+ Src = SE->getOperand(0);
+ }
+ }
+
+ unsigned Reg = getRegForValue(Src);
+ if (!Reg)
+ return false;
+ Addr.setOffsetReg(Reg);
+ return true;
+ }
+ case Instruction::And: {
+ if (Addr.getOffsetReg())
+ break;
+
+ if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
+ break;
+
+ const Value *LHS = U->getOperand(0);
+ const Value *RHS = U->getOperand(1);
+
+ if (const auto *C = dyn_cast<ConstantInt>(LHS))
+ if (C->getValue() == 0xffffffff)
+ std::swap(LHS, RHS);
+
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue() == 0xffffffff) {
+ Addr.setShift(0);
+ Addr.setExtendType(AArch64_AM::LSL);
+ Addr.setExtendType(AArch64_AM::UXTW);
+
+ unsigned Reg = getRegForValue(LHS);
+ if (!Reg)
+ return false;
+ bool RegIsKill = hasTrivialKill(LHS);
+ Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
+ AArch64::sub_32);
+ Addr.setOffsetReg(Reg);
+ return true;
+ }
+ break;
+ }
+ case Instruction::SExt:
+ case Instruction::ZExt: {
+ if (!Addr.getReg() || Addr.getOffsetReg())
+ break;
+
+ const Value *Src = nullptr;
+ // Fold the zext or sext when it won't become a noop.
+ if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
+ if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::UXTW);
+ Src = ZE->getOperand(0);
+ }
+ } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
+ if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::SXTW);
+ Src = SE->getOperand(0);
+ }
+ }
+
+ if (!Src)
+ break;
+
+ Addr.setShift(0);
+ unsigned Reg = getRegForValue(Src);
+ if (!Reg)
+ return false;
+ Addr.setOffsetReg(Reg);
+ return true;
+ }
+ } // end switch
+
+ if (Addr.isRegBase() && !Addr.getReg()) {
+ unsigned Reg = getRegForValue(Obj);
+ if (!Reg)
+ return false;
+ Addr.setReg(Reg);
+ return true;
+ }
+
+ if (!Addr.getOffsetReg()) {
+ unsigned Reg = getRegForValue(Obj);
+ if (!Reg)
+ return false;
+ Addr.setOffsetReg(Reg);
+ return true;
+ }
+
+ return false;
+}
+
+bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
+ const User *U = nullptr;
+ unsigned Opcode = Instruction::UserOp1;
+ bool InMBB = true;
+
+ if (const auto *I = dyn_cast<Instruction>(V)) {
+ Opcode = I->getOpcode();
+ U = I;
+ InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
+ } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case Instruction::BitCast:
+ // Look past bitcasts if its operand is in the same BB.
+ if (InMBB)
+ return computeCallAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs if its operand is in the same BB.
+ if (InMBB &&
+ TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return computeCallAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints if its operand is in the same BB.
+ if (InMBB &&
+ TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return computeCallAddress(U->getOperand(0), Addr);
+ break;
+ }
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ Addr.setGlobalValue(GV);
+ return true;
+ }
+
+ // If all else fails, try to materialize the value in a register.
+ if (!Addr.getGlobalValue()) {
+ Addr.setReg(getRegForValue(V));
+ return Addr.getReg() != 0;
}
- // Try to get this in a register if nothing else has worked.
- if (!Addr.isValid())
- Addr.setReg(getRegForValue(Obj));
- return Addr.isValid();
+ return false;
}
+
bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
EVT evt = TLI.getValueType(Ty, true);
@@ -428,62 +915,122 @@ bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
return TLI.isTypeLegal(VT);
}
-bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
+/// \brief Determine if the value type is supported by FastISel.
+///
+/// FastISel for AArch64 can handle more value types than are legal. This adds
+/// simple value type such as i1, i8, and i16.
+bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
+ if (Ty->isVectorTy() && !IsVectorAllowed)
+ return false;
+
if (isTypeLegal(Ty, VT))
return true;
// If this is a type than can be sign or zero-extended to a basic operation
- // go ahead and accept it now. For stores, this reflects truncation.
+ // go ahead and accept it now.
if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
return true;
return false;
}
-bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
- int64_t ScaleFactor, bool UseUnscaled) {
- bool needsLowering = false;
- int64_t Offset = Addr.getOffset();
- switch (VT.SimpleTy) {
- default:
+bool AArch64FastISel::isValueAvailable(const Value *V) const {
+ if (!isa<Instruction>(V))
+ return true;
+
+ const auto *I = cast<Instruction>(V);
+ if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
+ return true;
+
+ return false;
+}
+
+bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
+ unsigned ScaleFactor = getImplicitScaleFactor(VT);
+ if (!ScaleFactor)
return false;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- case MVT::f32:
- case MVT::f64:
- if (!UseUnscaled)
- // Using scaled, 12-bit, unsigned immediate offsets.
- needsLowering = ((Offset & 0xfff) != Offset);
- else
- // Using unscaled, 9-bit, signed immediate offsets.
- needsLowering = (Offset > 256 || Offset < -256);
- break;
- }
- //If this is a stack pointer and the offset needs to be simplified then put
+ bool ImmediateOffsetNeedsLowering = false;
+ bool RegisterOffsetNeedsLowering = false;
+ int64_t Offset = Addr.getOffset();
+ if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
+ ImmediateOffsetNeedsLowering = true;
+ else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
+ !isUInt<12>(Offset / ScaleFactor))
+ ImmediateOffsetNeedsLowering = true;
+
+ // Cannot encode an offset register and an immediate offset in the same
+ // instruction. Fold the immediate offset into the load/store instruction and
+ // emit an additonal add to take care of the offset register.
+ if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
+ RegisterOffsetNeedsLowering = true;
+
+ // Cannot encode zero register as base.
+ if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
+ RegisterOffsetNeedsLowering = true;
+
+ // If this is a stack pointer and the offset needs to be simplified then put
// the alloca address into a register, set the base type back to register and
// continue. This should almost never happen.
- if (needsLowering && Addr.getKind() == Address::FrameIndexBase) {
- unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
+ if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
+ {
+ unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
ResultReg)
- .addFrameIndex(Addr.getFI())
- .addImm(0)
- .addImm(0);
+ .addFrameIndex(Addr.getFI())
+ .addImm(0)
+ .addImm(0);
Addr.setKind(Address::RegBase);
Addr.setReg(ResultReg);
}
+ if (RegisterOffsetNeedsLowering) {
+ unsigned ResultReg = 0;
+ if (Addr.getReg()) {
+ if (Addr.getExtendType() == AArch64_AM::SXTW ||
+ Addr.getExtendType() == AArch64_AM::UXTW )
+ ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
+ /*TODO:IsKill=*/false, Addr.getOffsetReg(),
+ /*TODO:IsKill=*/false, Addr.getExtendType(),
+ Addr.getShift());
+ else
+ ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
+ /*TODO:IsKill=*/false, Addr.getOffsetReg(),
+ /*TODO:IsKill=*/false, AArch64_AM::LSL,
+ Addr.getShift());
+ } else {
+ if (Addr.getExtendType() == AArch64_AM::UXTW)
+ ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
+ /*Op0IsKill=*/false, Addr.getShift(),
+ /*IsZExt=*/true);
+ else if (Addr.getExtendType() == AArch64_AM::SXTW)
+ ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
+ /*Op0IsKill=*/false, Addr.getShift(),
+ /*IsZExt=*/false);
+ else
+ ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
+ /*Op0IsKill=*/false, Addr.getShift());
+ }
+ if (!ResultReg)
+ return false;
+
+ Addr.setReg(ResultReg);
+ Addr.setOffsetReg(0);
+ Addr.setShift(0);
+ Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
+ }
+
// Since the offset is too large for the load/store instruction get the
// reg+offset into a register.
- if (needsLowering) {
- uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor;
- unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false,
- UnscaledOffset, MVT::i64);
- if (ResultReg == 0)
+ if (ImmediateOffsetNeedsLowering) {
+ unsigned ResultReg;
+ if (Addr.getReg())
+ // Try to fold the immediate into the add instruction.
+ ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
+ else
+ ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
+
+ if (!ResultReg)
return false;
Addr.setReg(ResultReg);
Addr.setOffset(0);
@@ -491,222 +1038,1021 @@ bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
return true;
}
-void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
+void AArch64FastISel::addLoadStoreOperands(Address &Addr,
const MachineInstrBuilder &MIB,
- unsigned Flags, bool UseUnscaled) {
- int64_t Offset = Addr.getOffset();
+ unsigned Flags,
+ unsigned ScaleFactor,
+ MachineMemOperand *MMO) {
+ int64_t Offset = Addr.getOffset() / ScaleFactor;
// Frame base works a bit differently. Handle it separately.
- if (Addr.getKind() == Address::FrameIndexBase) {
+ if (Addr.isFIBase()) {
int FI = Addr.getFI();
// FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
// and alignment should be based on the VT.
- MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(FI, Offset), Flags,
- MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+ MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(FI, Offset), Flags,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
// Now add the rest of the operands.
- MIB.addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
+ MIB.addFrameIndex(FI).addImm(Offset);
} else {
- // Now add the rest of the operands.
- MIB.addReg(Addr.getReg());
- MIB.addImm(Offset);
+ assert(Addr.isRegBase() && "Unexpected address kind.");
+ const MCInstrDesc &II = MIB->getDesc();
+ unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
+ Addr.setReg(
+ constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
+ Addr.setOffsetReg(
+ constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
+ if (Addr.getOffsetReg()) {
+ assert(Addr.getOffset() == 0 && "Unexpected offset");
+ bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
+ Addr.getExtendType() == AArch64_AM::SXTX;
+ MIB.addReg(Addr.getReg());
+ MIB.addReg(Addr.getOffsetReg());
+ MIB.addImm(IsSigned);
+ MIB.addImm(Addr.getShift() != 0);
+ } else
+ MIB.addReg(Addr.getReg()).addImm(Offset);
}
+
+ if (MMO)
+ MIB.addMemOperand(MMO);
}
-bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
- bool UseUnscaled) {
- // Negative offsets require unscaled, 9-bit, signed immediate offsets.
- // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
- if (!UseUnscaled && Addr.getOffset() < 0)
- UseUnscaled = true;
+unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
+ const Value *RHS, bool SetFlags,
+ bool WantResult, bool IsZExt) {
+ AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
+ bool NeedExtend = false;
+ switch (RetVT.SimpleTy) {
+ default:
+ return 0;
+ case MVT::i1:
+ NeedExtend = true;
+ break;
+ case MVT::i8:
+ NeedExtend = true;
+ ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
+ break;
+ case MVT::i16:
+ NeedExtend = true;
+ ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
+ break;
+ case MVT::i32: // fall-through
+ case MVT::i64:
+ break;
+ }
+ MVT SrcVT = RetVT;
+ RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
+
+ // Canonicalize immediates to the RHS first.
+ if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
+ std::swap(LHS, RHS);
+
+ // Canonicalize mul by power of 2 to the RHS.
+ if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
+ if (isMulPowOf2(LHS))
+ std::swap(LHS, RHS);
+
+ // Canonicalize shift immediate to the RHS.
+ if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
+ if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
+ if (isa<ConstantInt>(SI->getOperand(1)))
+ if (SI->getOpcode() == Instruction::Shl ||
+ SI->getOpcode() == Instruction::LShr ||
+ SI->getOpcode() == Instruction::AShr )
+ std::swap(LHS, RHS);
+
+ unsigned LHSReg = getRegForValue(LHS);
+ if (!LHSReg)
+ return 0;
+ bool LHSIsKill = hasTrivialKill(LHS);
- unsigned Opc;
+ if (NeedExtend)
+ LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
+
+ unsigned ResultReg = 0;
+ if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
+ uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
+ if (C->isNegative())
+ ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
+ SetFlags, WantResult);
+ else
+ ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
+ WantResult);
+ } else if (const auto *C = dyn_cast<Constant>(RHS))
+ if (C->isNullValue())
+ ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
+ WantResult);
+
+ if (ResultReg)
+ return ResultReg;
+
+ // Only extend the RHS within the instruction if there is a valid extend type.
+ if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
+ isValueAvailable(RHS)) {
+ if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
+ if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
+ if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
+ unsigned RHSReg = getRegForValue(SI->getOperand(0));
+ if (!RHSReg)
+ return 0;
+ bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
+ return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, ExtendType, C->getZExtValue(),
+ SetFlags, WantResult);
+ }
+ unsigned RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return 0;
+ bool RHSIsKill = hasTrivialKill(RHS);
+ return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
+ ExtendType, 0, SetFlags, WantResult);
+ }
+
+ // Check if the mul can be folded into the instruction.
+ if (RHS->hasOneUse() && isValueAvailable(RHS))
+ if (isMulPowOf2(RHS)) {
+ const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
+ const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
+
+ if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
+ if (C->getValue().isPowerOf2())
+ std::swap(MulLHS, MulRHS);
+
+ assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
+ uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
+ unsigned RHSReg = getRegForValue(MulLHS);
+ if (!RHSReg)
+ return 0;
+ bool RHSIsKill = hasTrivialKill(MulLHS);
+ return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
+ AArch64_AM::LSL, ShiftVal, SetFlags, WantResult);
+ }
+
+ // Check if the shift can be folded into the instruction.
+ if (RHS->hasOneUse() && isValueAvailable(RHS))
+ if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
+ if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
+ AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
+ switch (SI->getOpcode()) {
+ default: break;
+ case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
+ case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
+ case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
+ }
+ uint64_t ShiftVal = C->getZExtValue();
+ if (ShiftType != AArch64_AM::InvalidShiftExtend) {
+ unsigned RHSReg = getRegForValue(SI->getOperand(0));
+ if (!RHSReg)
+ return 0;
+ bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
+ return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, ShiftType, ShiftVal, SetFlags,
+ WantResult);
+ }
+ }
+ }
+
+ unsigned RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return 0;
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ if (NeedExtend)
+ RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
+
+ return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
+ SetFlags, WantResult);
+}
+
+unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg,
+ bool RHSIsKill, bool SetFlags,
+ bool WantResult) {
+ assert(LHSReg && RHSReg && "Invalid register number.");
+
+ if (RetVT != MVT::i32 && RetVT != MVT::i64)
+ return 0;
+
+ static const unsigned OpcTable[2][2][2] = {
+ { { AArch64::SUBWrr, AArch64::SUBXrr },
+ { AArch64::ADDWrr, AArch64::ADDXrr } },
+ { { AArch64::SUBSWrr, AArch64::SUBSXrr },
+ { AArch64::ADDSWrr, AArch64::ADDSXrr } }
+ };
+ bool Is64Bit = RetVT == MVT::i64;
+ unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
+ const TargetRegisterClass *RC =
+ Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ unsigned ResultReg;
+ if (WantResult)
+ ResultReg = createResultReg(RC);
+ else
+ ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
+
+ const MCInstrDesc &II = TII.get(Opc);
+ LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
+ RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(LHSReg, getKillRegState(LHSIsKill))
+ .addReg(RHSReg, getKillRegState(RHSIsKill));
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, uint64_t Imm,
+ bool SetFlags, bool WantResult) {
+ assert(LHSReg && "Invalid register number.");
+
+ if (RetVT != MVT::i32 && RetVT != MVT::i64)
+ return 0;
+
+ unsigned ShiftImm;
+ if (isUInt<12>(Imm))
+ ShiftImm = 0;
+ else if ((Imm & 0xfff000) == Imm) {
+ ShiftImm = 12;
+ Imm >>= 12;
+ } else
+ return 0;
+
+ static const unsigned OpcTable[2][2][2] = {
+ { { AArch64::SUBWri, AArch64::SUBXri },
+ { AArch64::ADDWri, AArch64::ADDXri } },
+ { { AArch64::SUBSWri, AArch64::SUBSXri },
+ { AArch64::ADDSWri, AArch64::ADDSXri } }
+ };
+ bool Is64Bit = RetVT == MVT::i64;
+ unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
const TargetRegisterClass *RC;
- bool VTIsi1 = false;
- int64_t ScaleFactor = 0;
+ if (SetFlags)
+ RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ else
+ RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
+ unsigned ResultReg;
+ if (WantResult)
+ ResultReg = createResultReg(RC);
+ else
+ ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
+
+ const MCInstrDesc &II = TII.get(Opc);
+ LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(LHSReg, getKillRegState(LHSIsKill))
+ .addImm(Imm)
+ .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg,
+ bool RHSIsKill,
+ AArch64_AM::ShiftExtendType ShiftType,
+ uint64_t ShiftImm, bool SetFlags,
+ bool WantResult) {
+ assert(LHSReg && RHSReg && "Invalid register number.");
+
+ if (RetVT != MVT::i32 && RetVT != MVT::i64)
+ return 0;
+
+ static const unsigned OpcTable[2][2][2] = {
+ { { AArch64::SUBWrs, AArch64::SUBXrs },
+ { AArch64::ADDWrs, AArch64::ADDXrs } },
+ { { AArch64::SUBSWrs, AArch64::SUBSXrs },
+ { AArch64::ADDSWrs, AArch64::ADDSXrs } }
+ };
+ bool Is64Bit = RetVT == MVT::i64;
+ unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
+ const TargetRegisterClass *RC =
+ Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ unsigned ResultReg;
+ if (WantResult)
+ ResultReg = createResultReg(RC);
+ else
+ ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
+
+ const MCInstrDesc &II = TII.get(Opc);
+ LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
+ RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(LHSReg, getKillRegState(LHSIsKill))
+ .addReg(RHSReg, getKillRegState(RHSIsKill))
+ .addImm(getShifterImm(ShiftType, ShiftImm));
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg,
+ bool RHSIsKill,
+ AArch64_AM::ShiftExtendType ExtType,
+ uint64_t ShiftImm, bool SetFlags,
+ bool WantResult) {
+ assert(LHSReg && RHSReg && "Invalid register number.");
+
+ if (RetVT != MVT::i32 && RetVT != MVT::i64)
+ return 0;
+
+ static const unsigned OpcTable[2][2][2] = {
+ { { AArch64::SUBWrx, AArch64::SUBXrx },
+ { AArch64::ADDWrx, AArch64::ADDXrx } },
+ { { AArch64::SUBSWrx, AArch64::SUBSXrx },
+ { AArch64::ADDSWrx, AArch64::ADDSXrx } }
+ };
+ bool Is64Bit = RetVT == MVT::i64;
+ unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
+ const TargetRegisterClass *RC = nullptr;
+ if (SetFlags)
+ RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ else
+ RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
+ unsigned ResultReg;
+ if (WantResult)
+ ResultReg = createResultReg(RC);
+ else
+ ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
+
+ const MCInstrDesc &II = TII.get(Opc);
+ LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
+ RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(LHSReg, getKillRegState(LHSIsKill))
+ .addReg(RHSReg, getKillRegState(RHSIsKill))
+ .addImm(getArithExtendImm(ExtType, ShiftImm));
+ return ResultReg;
+}
+
+bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
+ Type *Ty = LHS->getType();
+ EVT EVT = TLI.getValueType(Ty, true);
+ if (!EVT.isSimple())
+ return false;
+ MVT VT = EVT.getSimpleVT();
+
switch (VT.SimpleTy) {
default:
return false;
case MVT::i1:
- VTIsi1 = true;
- // Intentional fall-through.
case MVT::i8:
- Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui;
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ return emitICmp(VT, LHS, RHS, IsZExt);
+ case MVT::f32:
+ case MVT::f64:
+ return emitFCmp(VT, LHS, RHS);
+ }
+}
+
+bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
+ bool IsZExt) {
+ return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
+ IsZExt) != 0;
+}
+
+bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
+ uint64_t Imm) {
+ return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
+ /*SetFlags=*/true, /*WantResult=*/false) != 0;
+}
+
+bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
+ if (RetVT != MVT::f32 && RetVT != MVT::f64)
+ return false;
+
+ // Check to see if the 2nd operand is a constant that we can encode directly
+ // in the compare.
+ bool UseImm = false;
+ if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
+ if (CFP->isZero() && !CFP->isNegative())
+ UseImm = true;
+
+ unsigned LHSReg = getRegForValue(LHS);
+ if (!LHSReg)
+ return false;
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ if (UseImm) {
+ unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(LHSReg, getKillRegState(LHSIsKill));
+ return true;
+ }
+
+ unsigned RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return false;
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(LHSReg, getKillRegState(LHSIsKill))
+ .addReg(RHSReg, getKillRegState(RHSIsKill));
+ return true;
+}
+
+unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
+ bool SetFlags, bool WantResult, bool IsZExt) {
+ return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
+ IsZExt);
+}
+
+/// \brief This method is a wrapper to simplify add emission.
+///
+/// First try to emit an add with an immediate operand using emitAddSub_ri. If
+/// that fails, then try to materialize the immediate into a register and use
+/// emitAddSub_rr instead.
+unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
+ int64_t Imm) {
+ unsigned ResultReg;
+ if (Imm < 0)
+ ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
+ else
+ ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
+
+ if (ResultReg)
+ return ResultReg;
+
+ unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
+ if (!CReg)
+ return 0;
+
+ ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
+ bool SetFlags, bool WantResult, bool IsZExt) {
+ return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
+ IsZExt);
+}
+
+unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg,
+ bool RHSIsKill, bool WantResult) {
+ return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, /*SetFlags=*/true, WantResult);
+}
+
+unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg,
+ bool RHSIsKill,
+ AArch64_AM::ShiftExtendType ShiftType,
+ uint64_t ShiftImm, bool WantResult) {
+ return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
+ WantResult);
+}
+
+unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
+ const Value *LHS, const Value *RHS) {
+ // Canonicalize immediates to the RHS first.
+ if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
+ std::swap(LHS, RHS);
+
+ // Canonicalize mul by power-of-2 to the RHS.
+ if (LHS->hasOneUse() && isValueAvailable(LHS))
+ if (isMulPowOf2(LHS))
+ std::swap(LHS, RHS);
+
+ // Canonicalize shift immediate to the RHS.
+ if (LHS->hasOneUse() && isValueAvailable(LHS))
+ if (const auto *SI = dyn_cast<ShlOperator>(LHS))
+ if (isa<ConstantInt>(SI->getOperand(1)))
+ std::swap(LHS, RHS);
+
+ unsigned LHSReg = getRegForValue(LHS);
+ if (!LHSReg)
+ return 0;
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ unsigned ResultReg = 0;
+ if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
+ uint64_t Imm = C->getZExtValue();
+ ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
+ }
+ if (ResultReg)
+ return ResultReg;
+
+ // Check if the mul can be folded into the instruction.
+ if (RHS->hasOneUse() && isValueAvailable(RHS))
+ if (isMulPowOf2(RHS)) {
+ const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
+ const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
+
+ if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
+ if (C->getValue().isPowerOf2())
+ std::swap(MulLHS, MulRHS);
+
+ assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
+ uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
+
+ unsigned RHSReg = getRegForValue(MulLHS);
+ if (!RHSReg)
+ return 0;
+ bool RHSIsKill = hasTrivialKill(MulLHS);
+ return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, ShiftVal);
+ }
+
+ // Check if the shift can be folded into the instruction.
+ if (RHS->hasOneUse() && isValueAvailable(RHS))
+ if (const auto *SI = dyn_cast<ShlOperator>(RHS))
+ if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
+ uint64_t ShiftVal = C->getZExtValue();
+ unsigned RHSReg = getRegForValue(SI->getOperand(0));
+ if (!RHSReg)
+ return 0;
+ bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
+ return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, ShiftVal);
+ }
+
+ unsigned RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return 0;
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
+ ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
+ if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
+ uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
+ ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ }
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
+ unsigned LHSReg, bool LHSIsKill,
+ uint64_t Imm) {
+ assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
+ "ISD nodes are not consecutive!");
+ static const unsigned OpcTable[3][2] = {
+ { AArch64::ANDWri, AArch64::ANDXri },
+ { AArch64::ORRWri, AArch64::ORRXri },
+ { AArch64::EORWri, AArch64::EORXri }
+ };
+ const TargetRegisterClass *RC;
+ unsigned Opc;
+ unsigned RegSize;
+ switch (RetVT.SimpleTy) {
+ default:
+ return 0;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32: {
+ unsigned Idx = ISDOpc - ISD::AND;
+ Opc = OpcTable[Idx][0];
+ RC = &AArch64::GPR32spRegClass;
+ RegSize = 32;
+ break;
+ }
+ case MVT::i64:
+ Opc = OpcTable[ISDOpc - ISD::AND][1];
+ RC = &AArch64::GPR64spRegClass;
+ RegSize = 64;
+ break;
+ }
+
+ if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
+ return 0;
+
+ unsigned ResultReg =
+ fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
+ AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
+ if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
+ uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
+ ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ }
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
+ unsigned LHSReg, bool LHSIsKill,
+ unsigned RHSReg, bool RHSIsKill,
+ uint64_t ShiftImm) {
+ assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
+ "ISD nodes are not consecutive!");
+ static const unsigned OpcTable[3][2] = {
+ { AArch64::ANDWrs, AArch64::ANDXrs },
+ { AArch64::ORRWrs, AArch64::ORRXrs },
+ { AArch64::EORWrs, AArch64::EORXrs }
+ };
+ const TargetRegisterClass *RC;
+ unsigned Opc;
+ switch (RetVT.SimpleTy) {
+ default:
+ return 0;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ Opc = OpcTable[ISDOpc - ISD::AND][0];
RC = &AArch64::GPR32RegClass;
+ break;
+ case MVT::i64:
+ Opc = OpcTable[ISDOpc - ISD::AND][1];
+ RC = &AArch64::GPR64RegClass;
+ break;
+ }
+ unsigned ResultReg =
+ fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
+ AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
+ if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
+ uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
+ ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ }
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
+ uint64_t Imm) {
+ return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
+}
+
+unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
+ bool WantZExt, MachineMemOperand *MMO) {
+ // Simplify this down to something we can handle.
+ if (!simplifyAddress(Addr, VT))
+ return 0;
+
+ unsigned ScaleFactor = getImplicitScaleFactor(VT);
+ if (!ScaleFactor)
+ llvm_unreachable("Unexpected value type.");
+
+ // Negative offsets require unscaled, 9-bit, signed immediate offsets.
+ // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
+ bool UseScaled = true;
+ if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
+ UseScaled = false;
ScaleFactor = 1;
+ }
+
+ static const unsigned GPOpcTable[2][8][4] = {
+ // Sign-extend.
+ { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
+ AArch64::LDURXi },
+ { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
+ AArch64::LDURXi },
+ { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
+ AArch64::LDRXui },
+ { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
+ AArch64::LDRXui },
+ { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
+ AArch64::LDRXroX },
+ { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
+ AArch64::LDRXroX },
+ { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
+ AArch64::LDRXroW },
+ { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
+ AArch64::LDRXroW }
+ },
+ // Zero-extend.
+ { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
+ AArch64::LDURXi },
+ { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
+ AArch64::LDURXi },
+ { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
+ AArch64::LDRXui },
+ { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
+ AArch64::LDRXui },
+ { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
+ AArch64::LDRXroX },
+ { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
+ AArch64::LDRXroX },
+ { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
+ AArch64::LDRXroW },
+ { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
+ AArch64::LDRXroW }
+ }
+ };
+
+ static const unsigned FPOpcTable[4][2] = {
+ { AArch64::LDURSi, AArch64::LDURDi },
+ { AArch64::LDRSui, AArch64::LDRDui },
+ { AArch64::LDRSroX, AArch64::LDRDroX },
+ { AArch64::LDRSroW, AArch64::LDRDroW }
+ };
+
+ unsigned Opc;
+ const TargetRegisterClass *RC;
+ bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
+ Addr.getOffsetReg();
+ unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
+ if (Addr.getExtendType() == AArch64_AM::UXTW ||
+ Addr.getExtendType() == AArch64_AM::SXTW)
+ Idx++;
+
+ bool IsRet64Bit = RetVT == MVT::i64;
+ switch (VT.SimpleTy) {
+ default:
+ llvm_unreachable("Unexpected value type.");
+ case MVT::i1: // Intentional fall-through.
+ case MVT::i8:
+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
+ RC = (IsRet64Bit && !WantZExt) ?
+ &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
break;
case MVT::i16:
- Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui;
- RC = &AArch64::GPR32RegClass;
- ScaleFactor = 2;
+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
+ RC = (IsRet64Bit && !WantZExt) ?
+ &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
break;
case MVT::i32:
- Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui;
- RC = &AArch64::GPR32RegClass;
- ScaleFactor = 4;
+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
+ RC = (IsRet64Bit && !WantZExt) ?
+ &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
break;
case MVT::i64:
- Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui;
+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
RC = &AArch64::GPR64RegClass;
- ScaleFactor = 8;
break;
case MVT::f32:
- Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui;
- RC = TLI.getRegClassFor(VT);
- ScaleFactor = 4;
+ Opc = FPOpcTable[Idx][0];
+ RC = &AArch64::FPR32RegClass;
break;
case MVT::f64:
- Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui;
- RC = TLI.getRegClassFor(VT);
- ScaleFactor = 8;
+ Opc = FPOpcTable[Idx][1];
+ RC = &AArch64::FPR64RegClass;
break;
}
- // Scale the offset.
- if (!UseUnscaled) {
- int64_t Offset = Addr.getOffset();
- if (Offset & (ScaleFactor - 1))
- // Retry using an unscaled, 9-bit, signed immediate offset.
- return EmitLoad(VT, ResultReg, Addr, /*UseUnscaled*/ true);
-
- Addr.setOffset(Offset / ScaleFactor);
- }
-
- // Simplify this down to something we can handle.
- if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
- return false;
// Create the base instruction, then add the operands.
- ResultReg = createResultReg(RC);
+ unsigned ResultReg = createResultReg(RC);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg);
- AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, UseUnscaled);
+ addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
// Loading an i1 requires special handling.
- if (VTIsi1) {
- MRI.constrainRegClass(ResultReg, &AArch64::GPR32RegClass);
- unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
- ANDReg)
- .addReg(ResultReg)
- .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
+ if (VT == MVT::i1) {
+ unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
+ assert(ANDReg && "Unexpected AND instruction emission failure.");
ResultReg = ANDReg;
}
+
+ // For zero-extending loads to 64bit we emit a 32bit load and then convert
+ // the 32bit reg to a 64bit reg.
+ if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
+ unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), Reg64)
+ .addImm(0)
+ .addReg(ResultReg, getKillRegState(true))
+ .addImm(AArch64::sub_32);
+ ResultReg = Reg64;
+ }
+ return ResultReg;
+}
+
+bool AArch64FastISel::selectAddSub(const Instruction *I) {
+ MVT VT;
+ if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
+ return false;
+
+ if (VT.isVector())
+ return selectOperator(I, I->getOpcode());
+
+ unsigned ResultReg;
+ switch (I->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected instruction.");
+ case Instruction::Add:
+ ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
+ break;
+ case Instruction::Sub:
+ ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
+ break;
+ }
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
+ MVT VT;
+ if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
+ return false;
+
+ if (VT.isVector())
+ return selectOperator(I, I->getOpcode());
+
+ unsigned ResultReg;
+ switch (I->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected instruction.");
+ case Instruction::And:
+ ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
+ break;
+ case Instruction::Or:
+ ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
+ break;
+ case Instruction::Xor:
+ ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
+ break;
+ }
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
return true;
}
-bool AArch64FastISel::SelectLoad(const Instruction *I) {
+bool AArch64FastISel::selectLoad(const Instruction *I) {
MVT VT;
// Verify we have a legal type before going any further. Currently, we handle
// simple types that will directly fit in a register (i32/f32/i64/f64) or
// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
- if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic())
+ if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
+ cast<LoadInst>(I)->isAtomic())
return false;
// See if we can handle this address.
Address Addr;
- if (!ComputeAddress(I->getOperand(0), Addr))
+ if (!computeAddress(I->getOperand(0), Addr, I->getType()))
return false;
- unsigned ResultReg;
- if (!EmitLoad(VT, ResultReg, Addr))
+ // Fold the following sign-/zero-extend into the load instruction.
+ bool WantZExt = true;
+ MVT RetVT = VT;
+ const Value *IntExtVal = nullptr;
+ if (I->hasOneUse()) {
+ if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
+ if (isTypeSupported(ZE->getType(), RetVT))
+ IntExtVal = ZE;
+ else
+ RetVT = VT;
+ } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
+ if (isTypeSupported(SE->getType(), RetVT))
+ IntExtVal = SE;
+ else
+ RetVT = VT;
+ WantZExt = false;
+ }
+ }
+
+ unsigned ResultReg =
+ emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
+ if (!ResultReg)
return false;
- UpdateValueMap(I, ResultReg);
+ // There are a few different cases we have to handle, because the load or the
+ // sign-/zero-extend might not be selected by FastISel if we fall-back to
+ // SelectionDAG. There is also an ordering issue when both instructions are in
+ // different basic blocks.
+ // 1.) The load instruction is selected by FastISel, but the integer extend
+ // not. This usually happens when the integer extend is in a different
+ // basic block and SelectionDAG took over for that basic block.
+ // 2.) The load instruction is selected before the integer extend. This only
+ // happens when the integer extend is in a different basic block.
+ // 3.) The load instruction is selected by SelectionDAG and the integer extend
+ // by FastISel. This happens if there are instructions between the load
+ // and the integer extend that couldn't be selected by FastISel.
+ if (IntExtVal) {
+ // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
+ // could select it. Emit a copy to subreg if necessary. FastISel will remove
+ // it when it selects the integer extend.
+ unsigned Reg = lookUpRegForValue(IntExtVal);
+ if (!Reg) {
+ if (RetVT == MVT::i64 && VT <= MVT::i32) {
+ if (WantZExt) {
+ // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
+ std::prev(FuncInfo.InsertPt)->eraseFromParent();
+ ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
+ } else
+ ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
+ /*IsKill=*/true,
+ AArch64::sub_32);
+ }
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ // The integer extend has already been emitted - delete all the instructions
+ // that have been emitted by the integer extend lowering code and use the
+ // result from the load instruction directly.
+ while (Reg) {
+ auto *MI = MRI.getUniqueVRegDef(Reg);
+ if (!MI)
+ break;
+ Reg = 0;
+ for (auto &Opnd : MI->uses()) {
+ if (Opnd.isReg()) {
+ Reg = Opnd.getReg();
+ break;
+ }
+ }
+ MI->eraseFromParent();
+ }
+ updateValueMap(IntExtVal, ResultReg);
+ return true;
+ }
+
+ updateValueMap(I, ResultReg);
return true;
}
-bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
- bool UseUnscaled) {
+bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
+ MachineMemOperand *MMO) {
+ // Simplify this down to something we can handle.
+ if (!simplifyAddress(Addr, VT))
+ return false;
+
+ unsigned ScaleFactor = getImplicitScaleFactor(VT);
+ if (!ScaleFactor)
+ llvm_unreachable("Unexpected value type.");
+
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
- if (!UseUnscaled && Addr.getOffset() < 0)
- UseUnscaled = true;
-
- unsigned StrOpc;
- bool VTIsi1 = false;
- int64_t ScaleFactor = 0;
- // Using scaled, 12-bit, unsigned immediate offsets.
- switch (VT.SimpleTy) {
- default:
- return false;
- case MVT::i1:
- VTIsi1 = true;
- case MVT::i8:
- StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui;
+ bool UseScaled = true;
+ if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
+ UseScaled = false;
ScaleFactor = 1;
- break;
- case MVT::i16:
- StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui;
- ScaleFactor = 2;
- break;
- case MVT::i32:
- StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui;
- ScaleFactor = 4;
- break;
- case MVT::i64:
- StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui;
- ScaleFactor = 8;
- break;
- case MVT::f32:
- StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui;
- ScaleFactor = 4;
- break;
- case MVT::f64:
- StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui;
- ScaleFactor = 8;
- break;
}
- // Scale the offset.
- if (!UseUnscaled) {
- int64_t Offset = Addr.getOffset();
- if (Offset & (ScaleFactor - 1))
- // Retry using an unscaled, 9-bit, signed immediate offset.
- return EmitStore(VT, SrcReg, Addr, /*UseUnscaled*/ true);
- Addr.setOffset(Offset / ScaleFactor);
- }
+ static const unsigned OpcTable[4][6] = {
+ { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
+ AArch64::STURSi, AArch64::STURDi },
+ { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
+ AArch64::STRSui, AArch64::STRDui },
+ { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
+ AArch64::STRSroX, AArch64::STRDroX },
+ { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
+ AArch64::STRSroW, AArch64::STRDroW }
+ };
- // Simplify this down to something we can handle.
- if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
- return false;
+ unsigned Opc;
+ bool VTIsi1 = false;
+ bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
+ Addr.getOffsetReg();
+ unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
+ if (Addr.getExtendType() == AArch64_AM::UXTW ||
+ Addr.getExtendType() == AArch64_AM::SXTW)
+ Idx++;
+
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type.");
+ case MVT::i1: VTIsi1 = true;
+ case MVT::i8: Opc = OpcTable[Idx][0]; break;
+ case MVT::i16: Opc = OpcTable[Idx][1]; break;
+ case MVT::i32: Opc = OpcTable[Idx][2]; break;
+ case MVT::i64: Opc = OpcTable[Idx][3]; break;
+ case MVT::f32: Opc = OpcTable[Idx][4]; break;
+ case MVT::f64: Opc = OpcTable[Idx][5]; break;
+ }
// Storing an i1 requires special handling.
- if (VTIsi1) {
- MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
- unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
- ANDReg)
- .addReg(SrcReg)
- .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
+ if (VTIsi1 && SrcReg != AArch64::WZR) {
+ unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
+ assert(ANDReg && "Unexpected AND instruction emission failure.");
SrcReg = ANDReg;
}
// Create the base instruction, then add the operands.
- MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(StrOpc)).addReg(SrcReg);
- AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, UseUnscaled);
+ const MCInstrDesc &II = TII.get(Opc);
+ SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
+ addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
+
return true;
}
-bool AArch64FastISel::SelectStore(const Instruction *I) {
+bool AArch64FastISel::selectStore(const Instruction *I) {
MVT VT;
- Value *Op0 = I->getOperand(0);
+ const Value *Op0 = I->getOperand(0);
// Verify we have a legal type before going any further. Currently, we handle
// simple types that will directly fit in a register (i32/f32/i64/f64) or
// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
- if (!isLoadStoreTypeLegal(Op0->getType(), VT) ||
+ if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) ||
cast<StoreInst>(I)->isAtomic())
return false;
- // Get the value to be stored into a register.
- unsigned SrcReg = getRegForValue(Op0);
- if (SrcReg == 0)
+ // Get the value to be stored into a register. Use the zero register directly
+ // when possible to avoid an unnecessary copy and a wasted register.
+ unsigned SrcReg = 0;
+ if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
+ if (CI->isZero())
+ SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+ } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
+ if (CF->isZero() && !CF->isNegative()) {
+ VT = MVT::getIntegerVT(VT.getSizeInBits());
+ SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+ }
+ }
+
+ if (!SrcReg)
+ SrcReg = getRegForValue(Op0);
+
+ if (!SrcReg)
return false;
// See if we can handle this address.
Address Addr;
- if (!ComputeAddress(I->getOperand(1), Addr))
+ if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
return false;
- if (!EmitStore(VT, SrcReg, Addr))
+ if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
return false;
return true;
}
@@ -757,58 +2103,235 @@ static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
}
}
-bool AArch64FastISel::SelectBranch(const Instruction *I) {
+/// \brief Try to emit a combined compare-and-branch instruction.
+bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
+ assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
+ const CmpInst *CI = cast<CmpInst>(BI->getCondition());
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+
+ const Value *LHS = CI->getOperand(0);
+ const Value *RHS = CI->getOperand(1);
+
+ MVT VT;
+ if (!isTypeSupported(LHS->getType(), VT))
+ return false;
+
+ unsigned BW = VT.getSizeInBits();
+ if (BW > 64)
+ return false;
+
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // Try to take advantage of fallthrough opportunities.
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ Predicate = CmpInst::getInversePredicate(Predicate);
+ }
+
+ int TestBit = -1;
+ bool IsCmpNE;
+ switch (Predicate) {
+ default:
+ return false;
+ case CmpInst::ICMP_EQ:
+ case CmpInst::ICMP_NE:
+ if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
+ std::swap(LHS, RHS);
+
+ if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
+ return false;
+
+ if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
+ if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
+ const Value *AndLHS = AI->getOperand(0);
+ const Value *AndRHS = AI->getOperand(1);
+
+ if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
+ if (C->getValue().isPowerOf2())
+ std::swap(AndLHS, AndRHS);
+
+ if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
+ if (C->getValue().isPowerOf2()) {
+ TestBit = C->getValue().logBase2();
+ LHS = AndLHS;
+ }
+ }
+
+ if (VT == MVT::i1)
+ TestBit = 0;
+
+ IsCmpNE = Predicate == CmpInst::ICMP_NE;
+ break;
+ case CmpInst::ICMP_SLT:
+ case CmpInst::ICMP_SGE:
+ if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
+ return false;
+
+ TestBit = BW - 1;
+ IsCmpNE = Predicate == CmpInst::ICMP_SLT;
+ break;
+ case CmpInst::ICMP_SGT:
+ case CmpInst::ICMP_SLE:
+ if (!isa<ConstantInt>(RHS))
+ return false;
+
+ if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
+ return false;
+
+ TestBit = BW - 1;
+ IsCmpNE = Predicate == CmpInst::ICMP_SLE;
+ break;
+ } // end switch
+
+ static const unsigned OpcTable[2][2][2] = {
+ { {AArch64::CBZW, AArch64::CBZX },
+ {AArch64::CBNZW, AArch64::CBNZX} },
+ { {AArch64::TBZW, AArch64::TBZX },
+ {AArch64::TBNZW, AArch64::TBNZX} }
+ };
+
+ bool IsBitTest = TestBit != -1;
+ bool Is64Bit = BW == 64;
+ if (TestBit < 32 && TestBit >= 0)
+ Is64Bit = false;
+
+ unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
+ const MCInstrDesc &II = TII.get(Opc);
+
+ unsigned SrcReg = getRegForValue(LHS);
+ if (!SrcReg)
+ return false;
+ bool SrcIsKill = hasTrivialKill(LHS);
+
+ if (BW == 64 && !Is64Bit)
+ SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
+ AArch64::sub_32);
+
+ if ((BW < 32) && !IsBitTest)
+ SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
+
+ // Emit the combined compare and branch instruction.
+ SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
+ if (IsBitTest)
+ MIB.addImm(TestBit);
+ MIB.addMBB(TBB);
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+ fastEmitBranch(FBB, DbgLoc);
+
+ return true;
+}
+
+bool AArch64FastISel::selectBranch(const Instruction *I) {
const BranchInst *BI = cast<BranchInst>(I);
+ if (BI->isUnconditional()) {
+ MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ fastEmitBranch(MSucc, BI->getDebugLoc());
+ return true;
+ }
+
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+ AArch64CC::CondCode CC = AArch64CC::NE;
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
- if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
- // We may not handle every CC for now.
- AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
- if (CC == AArch64CC::AL)
- return false;
+ if (CI->hasOneUse() && isValueAvailable(CI)) {
+ // Try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+ switch (Predicate) {
+ default:
+ break;
+ case CmpInst::FCMP_FALSE:
+ fastEmitBranch(FBB, DbgLoc);
+ return true;
+ case CmpInst::FCMP_TRUE:
+ fastEmitBranch(TBB, DbgLoc);
+ return true;
+ }
+
+ // Try to emit a combined compare-and-branch first.
+ if (emitCompareAndBranch(BI))
+ return true;
+
+ // Try to take advantage of fallthrough opportunities.
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ Predicate = CmpInst::getInversePredicate(Predicate);
+ }
// Emit the cmp.
- if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
return false;
+ // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
+ // instruction.
+ CC = getCompareCC(Predicate);
+ AArch64CC::CondCode ExtraCC = AArch64CC::AL;
+ switch (Predicate) {
+ default:
+ break;
+ case CmpInst::FCMP_UEQ:
+ ExtraCC = AArch64CC::EQ;
+ CC = AArch64CC::VS;
+ break;
+ case CmpInst::FCMP_ONE:
+ ExtraCC = AArch64CC::MI;
+ CC = AArch64CC::GT;
+ break;
+ }
+ assert((CC != AArch64CC::AL) && "Unexpected condition code.");
+
+ // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
+ if (ExtraCC != AArch64CC::AL) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
+ .addImm(ExtraCC)
+ .addMBB(TBB);
+ }
+
// Emit the branch.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
- FuncInfo.MBB->addSuccessor(TBB);
- FastEmitBranch(FBB, DbgLoc);
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+
+ fastEmitBranch(FBB, DbgLoc);
return true;
}
} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
MVT SrcVT;
- if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
- (isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) {
+ if (TI->hasOneUse() && isValueAvailable(TI) &&
+ isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) {
unsigned CondReg = getRegForValue(TI->getOperand(0));
- if (CondReg == 0)
+ if (!CondReg)
return false;
+ bool CondIsKill = hasTrivialKill(TI->getOperand(0));
// Issue an extract_subreg to get the lower 32-bits.
- if (SrcVT == MVT::i64)
- CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true,
+ if (SrcVT == MVT::i64) {
+ CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
AArch64::sub_32);
+ CondIsKill = true;
+ }
- MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
- unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(AArch64::ANDWri), ANDReg)
- .addReg(CondReg)
- .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(AArch64::SUBSWri))
- .addReg(ANDReg)
- .addReg(ANDReg)
- .addImm(0)
- .addImm(0);
+ unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
+ assert(ANDReg && "Unexpected AND instruction emission failure.");
+ emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
- unsigned CC = AArch64CC::NE;
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
CC = AArch64CC::EQ;
@@ -816,23 +2339,57 @@ bool AArch64FastISel::SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
- FuncInfo.MBB->addSuccessor(TBB);
- FastEmitBranch(FBB, DbgLoc);
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+
+ fastEmitBranch(FBB, DbgLoc);
return true;
}
- } else if (const ConstantInt *CI =
- dyn_cast<ConstantInt>(BI->getCondition())) {
+ } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
uint64_t Imm = CI->getZExtValue();
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
.addMBB(Target);
- FuncInfo.MBB->addSuccessor(Target);
+
+ // Obtain the branch weight and add the target to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ Target->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(Target, BranchWeight);
+ return true;
+ } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
+ // Fake request the condition, otherwise the intrinsic might be completely
+ // optimized away.
+ unsigned CondReg = getRegForValue(BI->getCondition());
+ if (!CondReg)
+ return false;
+
+ // Emit the branch.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
+ .addImm(CC)
+ .addMBB(TBB);
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+
+ fastEmitBranch(FBB, DbgLoc);
return true;
}
unsigned CondReg = getRegForValue(BI->getCondition());
if (CondReg == 0)
return false;
+ bool CondRegIsKill = hasTrivialKill(BI->getCondition());
// We've been divorced from our compare! Our block was split, and
// now our compare lives in a predecessor block. We musn't
@@ -841,13 +2398,8 @@ bool AArch64FastISel::SelectBranch(const Instruction *I) {
// Regardless, the compare has been done in the predecessor block,
// and it left a value for us in a virtual register. Ergo, we test
// the one-bit value left in the virtual register.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri),
- AArch64::WZR)
- .addReg(CondReg)
- .addImm(0)
- .addImm(0);
+ emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
- unsigned CC = AArch64CC::NE;
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
CC = AArch64CC::EQ;
@@ -856,20 +2408,28 @@ bool AArch64FastISel::SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
- FuncInfo.MBB->addSuccessor(TBB);
- FastEmitBranch(FBB, DbgLoc);
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+
+ fastEmitBranch(FBB, DbgLoc);
return true;
}
-bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
+bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
const IndirectBrInst *BI = cast<IndirectBrInst>(I);
unsigned AddrReg = getRegForValue(BI->getOperand(0));
if (AddrReg == 0)
return false;
// Emit the indirect branch.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BR))
- .addReg(AddrReg);
+ const MCInstrDesc &II = TII.get(AArch64::BR);
+ AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
// Make sure the CFG is up-to-date.
for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
@@ -878,211 +2438,271 @@ bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
return true;
}
-bool AArch64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
- Type *Ty = Src1Value->getType();
- EVT SrcEVT = TLI.getValueType(Ty, true);
- if (!SrcEVT.isSimple())
- return false;
- MVT SrcVT = SrcEVT.getSimpleVT();
-
- // Check to see if the 2nd operand is a constant that we can encode directly
- // in the compare.
- uint64_t Imm;
- bool UseImm = false;
- bool isNegativeImm = false;
- if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
- if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
- SrcVT == MVT::i8 || SrcVT == MVT::i1) {
- const APInt &CIVal = ConstInt->getValue();
-
- Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue();
- if (CIVal.isNegative()) {
- isNegativeImm = true;
- Imm = -Imm;
- }
- // FIXME: We can handle more immediates using shifts.
- UseImm = ((Imm & 0xfff) == Imm);
- }
- } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
- if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
- if (ConstFP->isZero() && !ConstFP->isNegative())
- UseImm = true;
- }
+bool AArch64FastISel::selectCmp(const Instruction *I) {
+ const CmpInst *CI = cast<CmpInst>(I);
- unsigned ZReg;
- unsigned CmpOpc;
- bool isICmp = true;
- bool needsExt = false;
- switch (SrcVT.SimpleTy) {
+ // Try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+ unsigned ResultReg = 0;
+ switch (Predicate) {
default:
- return false;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- needsExt = true;
- // Intentional fall-through.
- case MVT::i32:
- ZReg = AArch64::WZR;
- if (UseImm)
- CmpOpc = isNegativeImm ? AArch64::ADDSWri : AArch64::SUBSWri;
- else
- CmpOpc = AArch64::SUBSWrr;
break;
- case MVT::i64:
- ZReg = AArch64::XZR;
- if (UseImm)
- CmpOpc = isNegativeImm ? AArch64::ADDSXri : AArch64::SUBSXri;
- else
- CmpOpc = AArch64::SUBSXrr;
- break;
- case MVT::f32:
- isICmp = false;
- CmpOpc = UseImm ? AArch64::FCMPSri : AArch64::FCMPSrr;
+ case CmpInst::FCMP_FALSE:
+ ResultReg = createResultReg(&AArch64::GPR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(AArch64::WZR, getKillRegState(true));
break;
- case MVT::f64:
- isICmp = false;
- CmpOpc = UseImm ? AArch64::FCMPDri : AArch64::FCMPDrr;
+ case CmpInst::FCMP_TRUE:
+ ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
break;
}
- unsigned SrcReg1 = getRegForValue(Src1Value);
- if (SrcReg1 == 0)
- return false;
-
- unsigned SrcReg2;
- if (!UseImm) {
- SrcReg2 = getRegForValue(Src2Value);
- if (SrcReg2 == 0)
- return false;
+ if (ResultReg) {
+ updateValueMap(I, ResultReg);
+ return true;
}
- // We have i1, i8, or i16, we need to either zero extend or sign extend.
- if (needsExt) {
- SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
- if (SrcReg1 == 0)
- return false;
- if (!UseImm) {
- SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
- if (SrcReg2 == 0)
- return false;
- }
- }
+ // Emit the cmp.
+ if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ return false;
- if (isICmp) {
- if (UseImm)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
- .addReg(ZReg)
- .addReg(SrcReg1)
- .addImm(Imm)
- .addImm(0);
- else
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
- .addReg(ZReg)
- .addReg(SrcReg1)
- .addReg(SrcReg2);
- } else {
- if (UseImm)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
- .addReg(SrcReg1);
- else
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
- .addReg(SrcReg1)
- .addReg(SrcReg2);
- }
- return true;
-}
+ ResultReg = createResultReg(&AArch64::GPR32RegClass);
-bool AArch64FastISel::SelectCmp(const Instruction *I) {
- const CmpInst *CI = cast<CmpInst>(I);
+ // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
+ // condition codes are inverted, because they are used by CSINC.
+ static unsigned CondCodeTable[2][2] = {
+ { AArch64CC::NE, AArch64CC::VC },
+ { AArch64CC::PL, AArch64CC::LE }
+ };
+ unsigned *CondCodes = nullptr;
+ switch (Predicate) {
+ default:
+ break;
+ case CmpInst::FCMP_UEQ:
+ CondCodes = &CondCodeTable[0][0];
+ break;
+ case CmpInst::FCMP_ONE:
+ CondCodes = &CondCodeTable[1][0];
+ break;
+ }
- // We may not handle every CC for now.
- AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
- if (CC == AArch64CC::AL)
- return false;
+ if (CondCodes) {
+ unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
+ TmpReg1)
+ .addReg(AArch64::WZR, getKillRegState(true))
+ .addReg(AArch64::WZR, getKillRegState(true))
+ .addImm(CondCodes[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
+ ResultReg)
+ .addReg(TmpReg1, getKillRegState(true))
+ .addReg(AArch64::WZR, getKillRegState(true))
+ .addImm(CondCodes[1]);
- // Emit the cmp.
- if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
- return false;
+ updateValueMap(I, ResultReg);
+ return true;
+ }
// Now set a register based on the comparison.
+ AArch64CC::CondCode CC = getCompareCC(Predicate);
+ assert((CC != AArch64CC::AL) && "Unexpected condition code.");
AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
- unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
ResultReg)
- .addReg(AArch64::WZR)
- .addReg(AArch64::WZR)
+ .addReg(AArch64::WZR, getKillRegState(true))
+ .addReg(AArch64::WZR, getKillRegState(true))
.addImm(invertedCC);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
-bool AArch64FastISel::SelectSelect(const Instruction *I) {
- const SelectInst *SI = cast<SelectInst>(I);
-
- EVT DestEVT = TLI.getValueType(SI->getType(), true);
- if (!DestEVT.isSimple())
+/// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false'
+/// value.
+bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
+ if (!SI->getType()->isIntegerTy(1))
return false;
- MVT DestVT = DestEVT.getSimpleVT();
- if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
- DestVT != MVT::f64)
- return false;
+ const Value *Src1Val, *Src2Val;
+ unsigned Opc = 0;
+ bool NeedExtraOp = false;
+ if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
+ if (CI->isOne()) {
+ Src1Val = SI->getCondition();
+ Src2Val = SI->getFalseValue();
+ Opc = AArch64::ORRWrr;
+ } else {
+ assert(CI->isZero());
+ Src1Val = SI->getFalseValue();
+ Src2Val = SI->getCondition();
+ Opc = AArch64::BICWrr;
+ }
+ } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
+ if (CI->isOne()) {
+ Src1Val = SI->getCondition();
+ Src2Val = SI->getTrueValue();
+ Opc = AArch64::ORRWrr;
+ NeedExtraOp = true;
+ } else {
+ assert(CI->isZero());
+ Src1Val = SI->getCondition();
+ Src2Val = SI->getTrueValue();
+ Opc = AArch64::ANDWrr;
+ }
+ }
- unsigned CondReg = getRegForValue(SI->getCondition());
- if (CondReg == 0)
- return false;
- unsigned TrueReg = getRegForValue(SI->getTrueValue());
- if (TrueReg == 0)
+ if (!Opc)
return false;
- unsigned FalseReg = getRegForValue(SI->getFalseValue());
- if (FalseReg == 0)
+
+ unsigned Src1Reg = getRegForValue(Src1Val);
+ if (!Src1Reg)
return false;
+ bool Src1IsKill = hasTrivialKill(Src1Val);
+ unsigned Src2Reg = getRegForValue(Src2Val);
+ if (!Src2Reg)
+ return false;
+ bool Src2IsKill = hasTrivialKill(Src2Val);
- MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
- unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
- ANDReg)
- .addReg(CondReg)
- .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
+ if (NeedExtraOp) {
+ Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
+ Src1IsKill = true;
+ }
+ unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32spRegClass, Src1Reg,
+ Src1IsKill, Src2Reg, Src2IsKill);
+ updateValueMap(SI, ResultReg);
+ return true;
+}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri))
- .addReg(ANDReg)
- .addReg(ANDReg)
- .addImm(0)
- .addImm(0);
+bool AArch64FastISel::selectSelect(const Instruction *I) {
+ assert(isa<SelectInst>(I) && "Expected a select instruction.");
+ MVT VT;
+ if (!isTypeSupported(I->getType(), VT))
+ return false;
- unsigned SelectOpc;
- switch (DestVT.SimpleTy) {
+ unsigned Opc;
+ const TargetRegisterClass *RC;
+ switch (VT.SimpleTy) {
default:
return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
case MVT::i32:
- SelectOpc = AArch64::CSELWr;
+ Opc = AArch64::CSELWr;
+ RC = &AArch64::GPR32RegClass;
break;
case MVT::i64:
- SelectOpc = AArch64::CSELXr;
+ Opc = AArch64::CSELXr;
+ RC = &AArch64::GPR64RegClass;
break;
case MVT::f32:
- SelectOpc = AArch64::FCSELSrrr;
+ Opc = AArch64::FCSELSrrr;
+ RC = &AArch64::FPR32RegClass;
break;
case MVT::f64:
- SelectOpc = AArch64::FCSELDrrr;
+ Opc = AArch64::FCSELDrrr;
+ RC = &AArch64::FPR64RegClass;
break;
}
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc),
- ResultReg)
- .addReg(TrueReg)
- .addReg(FalseReg)
- .addImm(AArch64CC::NE);
+ const SelectInst *SI = cast<SelectInst>(I);
+ const Value *Cond = SI->getCondition();
+ AArch64CC::CondCode CC = AArch64CC::NE;
+ AArch64CC::CondCode ExtraCC = AArch64CC::AL;
- UpdateValueMap(I, ResultReg);
+ if (optimizeSelect(SI))
+ return true;
+
+ // Try to pickup the flags, so we don't have to emit another compare.
+ if (foldXALUIntrinsic(CC, I, Cond)) {
+ // Fake request the condition to force emission of the XALU intrinsic.
+ unsigned CondReg = getRegForValue(Cond);
+ if (!CondReg)
+ return false;
+ } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
+ isValueAvailable(Cond)) {
+ const auto *Cmp = cast<CmpInst>(Cond);
+ // Try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
+ const Value *FoldSelect = nullptr;
+ switch (Predicate) {
+ default:
+ break;
+ case CmpInst::FCMP_FALSE:
+ FoldSelect = SI->getFalseValue();
+ break;
+ case CmpInst::FCMP_TRUE:
+ FoldSelect = SI->getTrueValue();
+ break;
+ }
+
+ if (FoldSelect) {
+ unsigned SrcReg = getRegForValue(FoldSelect);
+ if (!SrcReg)
+ return false;
+ unsigned UseReg = lookUpRegForValue(SI);
+ if (UseReg)
+ MRI.clearKillFlags(UseReg);
+
+ updateValueMap(I, SrcReg);
+ return true;
+ }
+
+ // Emit the cmp.
+ if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
+ return false;
+
+ // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
+ CC = getCompareCC(Predicate);
+ switch (Predicate) {
+ default:
+ break;
+ case CmpInst::FCMP_UEQ:
+ ExtraCC = AArch64CC::EQ;
+ CC = AArch64CC::VS;
+ break;
+ case CmpInst::FCMP_ONE:
+ ExtraCC = AArch64CC::MI;
+ CC = AArch64CC::GT;
+ break;
+ }
+ assert((CC != AArch64CC::AL) && "Unexpected condition code.");
+ } else {
+ unsigned CondReg = getRegForValue(Cond);
+ if (!CondReg)
+ return false;
+ bool CondIsKill = hasTrivialKill(Cond);
+
+ // Emit a TST instruction (ANDS wzr, reg, #imm).
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDSWri),
+ AArch64::WZR)
+ .addReg(CondReg, getKillRegState(CondIsKill))
+ .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
+ }
+
+ unsigned Src1Reg = getRegForValue(SI->getTrueValue());
+ bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
+
+ unsigned Src2Reg = getRegForValue(SI->getFalseValue());
+ bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
+
+ if (!Src1Reg || !Src2Reg)
+ return false;
+
+ if (ExtraCC != AArch64CC::AL) {
+ Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
+ Src2IsKill, ExtraCC);
+ Src2IsKill = true;
+ }
+ unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
+ Src2IsKill, CC);
+ updateValueMap(I, ResultReg);
return true;
}
-bool AArch64FastISel::SelectFPExt(const Instruction *I) {
+bool AArch64FastISel::selectFPExt(const Instruction *I) {
Value *V = I->getOperand(0);
if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
return false;
@@ -1094,11 +2714,11 @@ bool AArch64FastISel::SelectFPExt(const Instruction *I) {
unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
ResultReg).addReg(Op);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
-bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
+bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
Value *V = I->getOperand(0);
if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
return false;
@@ -1110,12 +2730,12 @@ bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
ResultReg).addReg(Op);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
// FPToUI and FPToSI
-bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
+bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
MVT DestVT;
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
return false;
@@ -1144,11 +2764,11 @@ bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(SrcReg);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
-bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
+bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
MVT DestVT;
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
return false;
@@ -1156,22 +2776,21 @@ bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
"Unexpected value type.");
unsigned SrcReg = getRegForValue(I->getOperand(0));
- if (SrcReg == 0)
+ if (!SrcReg)
return false;
+ bool SrcIsKill = hasTrivialKill(I->getOperand(0));
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
// Handle sign-extension.
if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
SrcReg =
- EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
- if (SrcReg == 0)
+ emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
+ if (!SrcReg)
return false;
+ SrcIsKill = true;
}
- MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &AArch64::GPR64RegClass
- : &AArch64::GPR32RegClass);
-
unsigned Opc;
if (SrcVT == MVT::i64) {
if (Signed)
@@ -1185,21 +2804,128 @@ bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
}
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(SrcReg);
- UpdateValueMap(I, ResultReg);
+ unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
+ SrcIsKill);
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool AArch64FastISel::fastLowerArguments() {
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ const Function *F = FuncInfo.Fn;
+ if (F->isVarArg())
+ return false;
+
+ CallingConv::ID CC = F->getCallingConv();
+ if (CC != CallingConv::C)
+ return false;
+
+ // Only handle simple cases of up to 8 GPR and FPR each.
+ unsigned GPRCnt = 0;
+ unsigned FPRCnt = 0;
+ unsigned Idx = 0;
+ for (auto const &Arg : F->args()) {
+ // The first argument is at index 1.
+ ++Idx;
+ if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::Nest))
+ return false;
+
+ Type *ArgTy = Arg.getType();
+ if (ArgTy->isStructTy() || ArgTy->isArrayTy())
+ return false;
+
+ EVT ArgVT = TLI.getValueType(ArgTy);
+ if (!ArgVT.isSimple())
+ return false;
+
+ MVT VT = ArgVT.getSimpleVT().SimpleTy;
+ if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
+ return false;
+
+ if (VT.isVector() &&
+ (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
+ return false;
+
+ if (VT >= MVT::i1 && VT <= MVT::i64)
+ ++GPRCnt;
+ else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
+ VT.is128BitVector())
+ ++FPRCnt;
+ else
+ return false;
+
+ if (GPRCnt > 8 || FPRCnt > 8)
+ return false;
+ }
+
+ static const MCPhysReg Registers[6][8] = {
+ { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
+ AArch64::W5, AArch64::W6, AArch64::W7 },
+ { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
+ AArch64::X5, AArch64::X6, AArch64::X7 },
+ { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
+ AArch64::H5, AArch64::H6, AArch64::H7 },
+ { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
+ AArch64::S5, AArch64::S6, AArch64::S7 },
+ { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
+ AArch64::D5, AArch64::D6, AArch64::D7 },
+ { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
+ AArch64::Q5, AArch64::Q6, AArch64::Q7 }
+ };
+
+ unsigned GPRIdx = 0;
+ unsigned FPRIdx = 0;
+ for (auto const &Arg : F->args()) {
+ MVT VT = TLI.getSimpleValueType(Arg.getType());
+ unsigned SrcReg;
+ const TargetRegisterClass *RC;
+ if (VT >= MVT::i1 && VT <= MVT::i32) {
+ SrcReg = Registers[0][GPRIdx++];
+ RC = &AArch64::GPR32RegClass;
+ VT = MVT::i32;
+ } else if (VT == MVT::i64) {
+ SrcReg = Registers[1][GPRIdx++];
+ RC = &AArch64::GPR64RegClass;
+ } else if (VT == MVT::f16) {
+ SrcReg = Registers[2][FPRIdx++];
+ RC = &AArch64::FPR16RegClass;
+ } else if (VT == MVT::f32) {
+ SrcReg = Registers[3][FPRIdx++];
+ RC = &AArch64::FPR32RegClass;
+ } else if ((VT == MVT::f64) || VT.is64BitVector()) {
+ SrcReg = Registers[4][FPRIdx++];
+ RC = &AArch64::FPR64RegClass;
+ } else if (VT.is128BitVector()) {
+ SrcReg = Registers[5][FPRIdx++];
+ RC = &AArch64::FPR128RegClass;
+ } else
+ llvm_unreachable("Unexpected value type.");
+
+ unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
+ // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
+ // Without this, EmitLiveInCopies may eliminate the livein if its only
+ // use is a bitcast (which isn't turned into an instruction).
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(DstReg, getKillRegState(true));
+ updateValueMap(&Arg, ResultReg);
+ }
return true;
}
-bool AArch64FastISel::ProcessCallArgs(
- SmallVectorImpl<Value *> &Args, SmallVectorImpl<unsigned> &ArgRegs,
- SmallVectorImpl<MVT> &ArgVTs, SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
- SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
- unsigned &NumBytes) {
+bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
+ SmallVectorImpl<MVT> &OutVTs,
+ unsigned &NumBytes) {
+ CallingConv::ID CC = CLI.CallConv;
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
- CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
+ CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
+ CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
// Get a count of how many bytes are to be pushed on the stack.
NumBytes = CCInfo.getNextStackOffset();
@@ -1207,13 +2933,17 @@ bool AArch64FastISel::ProcessCallArgs(
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
- .addImm(NumBytes);
+ .addImm(NumBytes);
// Process the args.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- unsigned Arg = ArgRegs[VA.getValNo()];
- MVT ArgVT = ArgVTs[VA.getValNo()];
+ const Value *ArgVal = CLI.OutVals[VA.getValNo()];
+ MVT ArgVT = OutVTs[VA.getValNo()];
+
+ unsigned ArgReg = getRegForValue(ArgVal);
+ if (!ArgReg)
+ return false;
// Handle arg promotion: SExt, ZExt, AExt.
switch (VA.getLocInfo()) {
@@ -1222,8 +2952,8 @@ bool AArch64FastISel::ProcessCallArgs(
case CCValAssign::SExt: {
MVT DestVT = VA.getLocVT();
MVT SrcVT = ArgVT;
- Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ false);
- if (Arg == 0)
+ ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
+ if (!ArgReg)
return false;
break;
}
@@ -1232,8 +2962,8 @@ bool AArch64FastISel::ProcessCallArgs(
case CCValAssign::ZExt: {
MVT DestVT = VA.getLocVT();
MVT SrcVT = ArgVT;
- Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ true);
- if (Arg == 0)
+ ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
+ if (!ArgReg)
return false;
break;
}
@@ -1244,14 +2974,18 @@ bool AArch64FastISel::ProcessCallArgs(
// Now copy/store arg to correct locations.
if (VA.isRegLoc() && !VA.needsCustom()) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);
- RegArgs.push_back(VA.getLocReg());
+ TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
+ CLI.OutRegs.push_back(VA.getLocReg());
} else if (VA.needsCustom()) {
// FIXME: Handle custom args.
return false;
} else {
assert(VA.isMemLoc() && "Assuming store on stack.");
+ // Don't emit stores for undef values.
+ if (isa<UndefValue>(ArgVal))
+ continue;
+
// Need to store on the stack.
unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
@@ -1264,26 +2998,31 @@ bool AArch64FastISel::ProcessCallArgs(
Addr.setReg(AArch64::SP);
Addr.setOffset(VA.getLocMemOffset() + BEAlign);
- if (!EmitStore(ArgVT, Arg, Addr))
+ unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getStack(Addr.getOffset()),
+ MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
+
+ if (!emitStore(ArgVT, ArgReg, Addr, MMO))
return false;
}
}
return true;
}
-bool AArch64FastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
- const Instruction *I, CallingConv::ID CC,
- unsigned &NumBytes) {
+bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
+ unsigned NumBytes) {
+ CallingConv::ID CC = CLI.CallConv;
+
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
- .addImm(NumBytes)
- .addImm(0);
+ .addImm(NumBytes).addImm(0);
// Now the return value.
if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
// Only handle a single return value.
@@ -1294,147 +3033,147 @@ bool AArch64FastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
MVT CopyVT = RVLocs[0].getValVT();
unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY),
- ResultReg).addReg(RVLocs[0].getLocReg());
- UsedRegs.push_back(RVLocs[0].getLocReg());
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(RVLocs[0].getLocReg());
+ CLI.InRegs.push_back(RVLocs[0].getLocReg());
- // Finally update the result.
- UpdateValueMap(I, ResultReg);
+ CLI.ResultReg = ResultReg;
+ CLI.NumResultRegs = 1;
}
return true;
}
-bool AArch64FastISel::SelectCall(const Instruction *I,
- const char *IntrMemName = nullptr) {
- const CallInst *CI = cast<CallInst>(I);
- const Value *Callee = CI->getCalledValue();
+bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
+ CallingConv::ID CC = CLI.CallConv;
+ bool IsTailCall = CLI.IsTailCall;
+ bool IsVarArg = CLI.IsVarArg;
+ const Value *Callee = CLI.Callee;
+ const char *SymName = CLI.SymName;
- // Don't handle inline asm or intrinsics.
- if (isa<InlineAsm>(Callee))
+ if (!Callee && !SymName)
return false;
- // Only handle global variable Callees.
- const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
- if (!GV)
+ // Allow SelectionDAG isel to handle tail calls.
+ if (IsTailCall)
return false;
- // Check the calling convention.
- ImmutableCallSite CS(CI);
- CallingConv::ID CC = CS.getCallingConv();
+ CodeModel::Model CM = TM.getCodeModel();
+ // Only support the small and large code model.
+ if (CM != CodeModel::Small && CM != CodeModel::Large)
+ return false;
+
+ // FIXME: Add large code model support for ELF.
+ if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
+ return false;
// Let SDISel handle vararg functions.
- PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
- FunctionType *FTy = cast<FunctionType>(PT->getElementType());
- if (FTy->isVarArg())
+ if (IsVarArg)
return false;
- // Handle *simple* calls for now.
+ // FIXME: Only handle *simple* calls for now.
MVT RetVT;
- Type *RetTy = I->getType();
- if (RetTy->isVoidTy())
+ if (CLI.RetTy->isVoidTy())
RetVT = MVT::isVoid;
- else if (!isTypeLegal(RetTy, RetVT))
+ else if (!isTypeLegal(CLI.RetTy, RetVT))
return false;
- // Set up the argument vectors.
- SmallVector<Value *, 8> Args;
- SmallVector<unsigned, 8> ArgRegs;
- SmallVector<MVT, 8> ArgVTs;
- SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
- Args.reserve(CS.arg_size());
- ArgRegs.reserve(CS.arg_size());
- ArgVTs.reserve(CS.arg_size());
- ArgFlags.reserve(CS.arg_size());
-
- for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
- i != e; ++i) {
- // If we're lowering a memory intrinsic instead of a regular call, skip the
- // last two arguments, which shouldn't be passed to the underlying function.
- if (IntrMemName && e - i <= 2)
- break;
-
- unsigned Arg = getRegForValue(*i);
- if (Arg == 0)
+ for (auto Flag : CLI.OutFlags)
+ if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
return false;
- ISD::ArgFlagsTy Flags;
- unsigned AttrInd = i - CS.arg_begin() + 1;
- if (CS.paramHasAttr(AttrInd, Attribute::SExt))
- Flags.setSExt();
- if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
- Flags.setZExt();
-
- // FIXME: Only handle *easy* calls for now.
- if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
- CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
- CS.paramHasAttr(AttrInd, Attribute::Nest) ||
- CS.paramHasAttr(AttrInd, Attribute::ByVal))
- return false;
+ // Set up the argument vectors.
+ SmallVector<MVT, 16> OutVTs;
+ OutVTs.reserve(CLI.OutVals.size());
- MVT ArgVT;
- Type *ArgTy = (*i)->getType();
- if (!isTypeLegal(ArgTy, ArgVT) &&
- !(ArgVT == MVT::i1 || ArgVT == MVT::i8 || ArgVT == MVT::i16))
+ for (auto *Val : CLI.OutVals) {
+ MVT VT;
+ if (!isTypeLegal(Val->getType(), VT) &&
+ !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
return false;
// We don't handle vector parameters yet.
- if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
+ if (VT.isVector() || VT.getSizeInBits() > 64)
return false;
- unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
- Flags.setOrigAlign(OriginalAlignment);
-
- Args.push_back(*i);
- ArgRegs.push_back(Arg);
- ArgVTs.push_back(ArgVT);
- ArgFlags.push_back(Flags);
+ OutVTs.push_back(VT);
}
+ Address Addr;
+ if (Callee && !computeCallAddress(Callee, Addr))
+ return false;
+
// Handle the arguments now that we've gotten them.
- SmallVector<unsigned, 4> RegArgs;
unsigned NumBytes;
- if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+ if (!processCallArgs(CLI, OutVTs, NumBytes))
return false;
// Issue the call.
MachineInstrBuilder MIB;
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BL));
- if (!IntrMemName)
- MIB.addGlobalAddress(GV, 0, 0);
- else
- MIB.addExternalSymbol(IntrMemName, 0);
+ if (CM == CodeModel::Small) {
+ const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
+ if (SymName)
+ MIB.addExternalSymbol(SymName, 0);
+ else if (Addr.getGlobalValue())
+ MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
+ else if (Addr.getReg()) {
+ unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
+ MIB.addReg(Reg);
+ } else
+ return false;
+ } else {
+ unsigned CallReg = 0;
+ if (SymName) {
+ unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
+ ADRPReg)
+ .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
+
+ CallReg = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
+ CallReg)
+ .addReg(ADRPReg)
+ .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
+ } else if (Addr.getGlobalValue())
+ CallReg = materializeGV(Addr.getGlobalValue());
+ else if (Addr.getReg())
+ CallReg = Addr.getReg();
+
+ if (!CallReg)
+ return false;
+
+ const MCInstrDesc &II = TII.get(AArch64::BLR);
+ CallReg = constrainOperandRegClass(II, CallReg, 0);
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
+ }
// Add implicit physical register uses to the call.
- for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
- MIB.addReg(RegArgs[i], RegState::Implicit);
+ for (auto Reg : CLI.OutRegs)
+ MIB.addReg(Reg, RegState::Implicit);
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv()));
-
- // Finish off the call including any return values.
- SmallVector<unsigned, 4> UsedRegs;
- if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes))
- return false;
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
- // Set all unused physreg defs as dead.
- static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+ CLI.Call = MIB;
- return true;
+ // Finish off the call including any return values.
+ return finishCall(CLI, RetVT, NumBytes);
}
-bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
+bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
if (Alignment)
return Len / Alignment <= 4;
else
return Len < 32;
}
-bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
+bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
uint64_t Len, unsigned Alignment) {
// Make sure we don't bloat code by inlining very large memcpy's.
- if (!IsMemCpySmall(Len, Alignment))
+ if (!isMemCpySmall(Len, Alignment))
return false;
int64_t UnscaledOffset = 0;
@@ -1464,14 +3203,11 @@ bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
}
}
- bool RV;
- unsigned ResultReg;
- RV = EmitLoad(VT, ResultReg, Src);
- if (!RV)
+ unsigned ResultReg = emitLoad(VT, VT, Src);
+ if (!ResultReg)
return false;
- RV = EmitStore(VT, ResultReg, Dest);
- if (!RV)
+ if (!emitStore(VT, ResultReg, Dest))
return false;
int64_t Size = VT.getSizeInBits() / 8;
@@ -1486,73 +3222,430 @@ bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
return true;
}
-bool AArch64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
- // FIXME: Handle more intrinsics.
- switch (I.getIntrinsicID()) {
+/// \brief Check if it is possible to fold the condition from the XALU intrinsic
+/// into the user. The condition code will only be updated on success.
+bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
+ const Instruction *I,
+ const Value *Cond) {
+ if (!isa<ExtractValueInst>(Cond))
+ return false;
+
+ const auto *EV = cast<ExtractValueInst>(Cond);
+ if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
+ return false;
+
+ const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
+ MVT RetVT;
+ const Function *Callee = II->getCalledFunction();
+ Type *RetTy =
+ cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
+ if (!isTypeLegal(RetTy, RetVT))
+ return false;
+
+ if (RetVT != MVT::i32 && RetVT != MVT::i64)
+ return false;
+
+ const Value *LHS = II->getArgOperand(0);
+ const Value *RHS = II->getArgOperand(1);
+
+ // Canonicalize immediate to the RHS.
+ if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
+ isCommutativeIntrinsic(II))
+ std::swap(LHS, RHS);
+
+ // Simplify multiplies.
+ unsigned IID = II->getIntrinsicID();
+ switch (IID) {
+ default:
+ break;
+ case Intrinsic::smul_with_overflow:
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue() == 2)
+ IID = Intrinsic::sadd_with_overflow;
+ break;
+ case Intrinsic::umul_with_overflow:
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue() == 2)
+ IID = Intrinsic::uadd_with_overflow;
+ break;
+ }
+
+ AArch64CC::CondCode TmpCC;
+ switch (IID) {
default:
return false;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ TmpCC = AArch64CC::VS;
+ break;
+ case Intrinsic::uadd_with_overflow:
+ TmpCC = AArch64CC::HS;
+ break;
+ case Intrinsic::usub_with_overflow:
+ TmpCC = AArch64CC::LO;
+ break;
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ TmpCC = AArch64CC::NE;
+ break;
+ }
+
+ // Check if both instructions are in the same basic block.
+ if (!isValueAvailable(II))
+ return false;
+
+ // Make sure nothing is in the way
+ BasicBlock::const_iterator Start = I;
+ BasicBlock::const_iterator End = II;
+ for (auto Itr = std::prev(Start); Itr != End; --Itr) {
+ // We only expect extractvalue instructions between the intrinsic and the
+ // instruction to be selected.
+ if (!isa<ExtractValueInst>(Itr))
+ return false;
+
+ // Check that the extractvalue operand comes from the intrinsic.
+ const auto *EVI = cast<ExtractValueInst>(Itr);
+ if (EVI->getAggregateOperand() != II)
+ return false;
+ }
+
+ CC = TmpCC;
+ return true;
+}
+
+bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
+ // FIXME: Handle more intrinsics.
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::frameaddress: {
+ MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ const AArch64RegisterInfo *RegInfo =
+ static_cast<const AArch64RegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
+ unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
+ unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
+ // Recursively load frame address
+ // ldr x0, [fp]
+ // ldr x0, [x0]
+ // ldr x0, [x0]
+ // ...
+ unsigned DestReg;
+ unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
+ while (Depth--) {
+ DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
+ SrcReg, /*IsKill=*/true, 0);
+ assert(DestReg && "Unexpected LDR instruction emission failure.");
+ SrcReg = DestReg;
+ }
+
+ updateValueMap(II, SrcReg);
+ return true;
+ }
case Intrinsic::memcpy:
case Intrinsic::memmove: {
- const MemTransferInst &MTI = cast<MemTransferInst>(I);
+ const auto *MTI = cast<MemTransferInst>(II);
// Don't handle volatile.
- if (MTI.isVolatile())
+ if (MTI->isVolatile())
return false;
// Disable inlining for memmove before calls to ComputeAddress. Otherwise,
// we would emit dead code because we don't currently handle memmoves.
- bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
- if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
+ bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
+ if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
// Small memcpy's are common enough that we want to do them without a call
// if possible.
- uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
- unsigned Alignment = MTI.getAlignment();
- if (IsMemCpySmall(Len, Alignment)) {
+ uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
+ unsigned Alignment = MTI->getAlignment();
+ if (isMemCpySmall(Len, Alignment)) {
Address Dest, Src;
- if (!ComputeAddress(MTI.getRawDest(), Dest) ||
- !ComputeAddress(MTI.getRawSource(), Src))
+ if (!computeAddress(MTI->getRawDest(), Dest) ||
+ !computeAddress(MTI->getRawSource(), Src))
return false;
- if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
+ if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
return true;
}
}
- if (!MTI.getLength()->getType()->isIntegerTy(64))
+ if (!MTI->getLength()->getType()->isIntegerTy(64))
return false;
- if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
+ if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
return false;
- const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
- return SelectCall(&I, IntrMemName);
+ const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
+ return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
}
case Intrinsic::memset: {
- const MemSetInst &MSI = cast<MemSetInst>(I);
+ const MemSetInst *MSI = cast<MemSetInst>(II);
// Don't handle volatile.
- if (MSI.isVolatile())
+ if (MSI->isVolatile())
return false;
- if (!MSI.getLength()->getType()->isIntegerTy(64))
+ if (!MSI->getLength()->getType()->isIntegerTy(64))
return false;
- if (MSI.getDestAddressSpace() > 255)
+ if (MSI->getDestAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
return false;
- return SelectCall(&I, "memset");
+ return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
+ }
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ case Intrinsic::pow: {
+ MVT RetVT;
+ if (!isTypeLegal(II->getType(), RetVT))
+ return false;
+
+ if (RetVT != MVT::f32 && RetVT != MVT::f64)
+ return false;
+
+ static const RTLIB::Libcall LibCallTable[3][2] = {
+ { RTLIB::SIN_F32, RTLIB::SIN_F64 },
+ { RTLIB::COS_F32, RTLIB::COS_F64 },
+ { RTLIB::POW_F32, RTLIB::POW_F64 }
+ };
+ RTLIB::Libcall LC;
+ bool Is64Bit = RetVT == MVT::f64;
+ switch (II->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unexpected intrinsic.");
+ case Intrinsic::sin:
+ LC = LibCallTable[0][Is64Bit];
+ break;
+ case Intrinsic::cos:
+ LC = LibCallTable[1][Is64Bit];
+ break;
+ case Intrinsic::pow:
+ LC = LibCallTable[2][Is64Bit];
+ break;
+ }
+
+ ArgListTy Args;
+ Args.reserve(II->getNumArgOperands());
+
+ // Populate the argument list.
+ for (auto &Arg : II->arg_operands()) {
+ ArgListEntry Entry;
+ Entry.Val = Arg;
+ Entry.Ty = Arg->getType();
+ Args.push_back(Entry);
+ }
+
+ CallLoweringInfo CLI;
+ CLI.setCallee(TLI.getLibcallCallingConv(LC), II->getType(),
+ TLI.getLibcallName(LC), std::move(Args));
+ if (!lowerCallTo(CLI))
+ return false;
+ updateValueMap(II, CLI.ResultReg);
+ return true;
+ }
+ case Intrinsic::fabs: {
+ MVT VT;
+ if (!isTypeLegal(II->getType(), VT))
+ return false;
+
+ unsigned Opc;
+ switch (VT.SimpleTy) {
+ default:
+ return false;
+ case MVT::f32:
+ Opc = AArch64::FABSSr;
+ break;
+ case MVT::f64:
+ Opc = AArch64::FABSDr;
+ break;
+ }
+ unsigned SrcReg = getRegForValue(II->getOperand(0));
+ if (!SrcReg)
+ return false;
+ bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
+ .addReg(SrcReg, getKillRegState(SrcRegIsKill));
+ updateValueMap(II, ResultReg);
+ return true;
}
case Intrinsic::trap: {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
.addImm(1);
return true;
}
+ case Intrinsic::sqrt: {
+ Type *RetTy = II->getCalledFunction()->getReturnType();
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ unsigned Op0Reg = getRegForValue(II->getOperand(0));
+ if (!Op0Reg)
+ return false;
+ bool Op0IsKill = hasTrivialKill(II->getOperand(0));
+
+ unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(II, ResultReg);
+ return true;
+ }
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow: {
+ // This implements the basic lowering of the xalu with overflow intrinsics.
+ const Function *Callee = II->getCalledFunction();
+ auto *Ty = cast<StructType>(Callee->getReturnType());
+ Type *RetTy = Ty->getTypeAtIndex(0U);
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return false;
+
+ const Value *LHS = II->getArgOperand(0);
+ const Value *RHS = II->getArgOperand(1);
+ // Canonicalize immediate to the RHS.
+ if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
+ isCommutativeIntrinsic(II))
+ std::swap(LHS, RHS);
+
+ // Simplify multiplies.
+ unsigned IID = II->getIntrinsicID();
+ switch (IID) {
+ default:
+ break;
+ case Intrinsic::smul_with_overflow:
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue() == 2) {
+ IID = Intrinsic::sadd_with_overflow;
+ RHS = LHS;
+ }
+ break;
+ case Intrinsic::umul_with_overflow:
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue() == 2) {
+ IID = Intrinsic::uadd_with_overflow;
+ RHS = LHS;
+ }
+ break;
+ }
+
+ unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
+ AArch64CC::CondCode CC = AArch64CC::Invalid;
+ switch (IID) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::sadd_with_overflow:
+ ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
+ CC = AArch64CC::VS;
+ break;
+ case Intrinsic::uadd_with_overflow:
+ ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
+ CC = AArch64CC::HS;
+ break;
+ case Intrinsic::ssub_with_overflow:
+ ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
+ CC = AArch64CC::VS;
+ break;
+ case Intrinsic::usub_with_overflow:
+ ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
+ CC = AArch64CC::LO;
+ break;
+ case Intrinsic::smul_with_overflow: {
+ CC = AArch64CC::NE;
+ unsigned LHSReg = getRegForValue(LHS);
+ if (!LHSReg)
+ return false;
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ unsigned RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return false;
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ if (VT == MVT::i32) {
+ MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
+ unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
+ /*IsKill=*/false, 32);
+ MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
+ AArch64::sub_32);
+ ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
+ AArch64::sub_32);
+ emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
+ AArch64_AM::ASR, 31, /*WantResult=*/false);
+ } else {
+ assert(VT == MVT::i64 && "Unexpected value type.");
+ MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
+ unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
+ RHSReg, RHSIsKill);
+ emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
+ AArch64_AM::ASR, 63, /*WantResult=*/false);
+ }
+ break;
+ }
+ case Intrinsic::umul_with_overflow: {
+ CC = AArch64CC::NE;
+ unsigned LHSReg = getRegForValue(LHS);
+ if (!LHSReg)
+ return false;
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ unsigned RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return false;
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ if (VT == MVT::i32) {
+ MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
+ emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
+ /*IsKill=*/false, AArch64_AM::LSR, 32,
+ /*WantResult=*/false);
+ MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
+ AArch64::sub_32);
+ } else {
+ assert(VT == MVT::i64 && "Unexpected value type.");
+ MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
+ unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
+ RHSReg, RHSIsKill);
+ emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
+ /*IsKill=*/false, /*WantResult=*/false);
+ }
+ break;
+ }
+ }
+
+ if (MulReg) {
+ ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
+ }
+
+ ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
+ AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
+ /*IsKill=*/true, getInvertedCondCode(CC));
+ (void)ResultReg2;
+ assert((ResultReg1 + 1) == ResultReg2 &&
+ "Nonconsecutive result registers.");
+ updateValueMap(II, ResultReg1, 2);
+ return true;
+ }
}
return false;
}
-bool AArch64FastISel::SelectRet(const Instruction *I) {
+bool AArch64FastISel::selectRet(const Instruction *I) {
const ReturnInst *Ret = cast<ReturnInst>(I);
const Function &F = *I->getParent()->getParent();
@@ -1572,8 +3665,7 @@ bool AArch64FastISel::SelectRet(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
- CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
- I->getContext());
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
: RetCC_AArch64_AAPCS;
CCInfo.AnalyzeReturn(Outs, RetCC);
@@ -1586,11 +3678,14 @@ bool AArch64FastISel::SelectRet(const Instruction *I) {
const Value *RV = Ret->getOperand(0);
// Don't bother handling odd stuff for now.
- if (VA.getLocInfo() != CCValAssign::Full)
+ if ((VA.getLocInfo() != CCValAssign::Full) &&
+ (VA.getLocInfo() != CCValAssign::BCvt))
return false;
+
// Only handle register returns for now.
if (!VA.isRegLoc())
return false;
+
unsigned Reg = getRegForValue(RV);
if (Reg == 0)
return false;
@@ -1606,12 +3701,14 @@ bool AArch64FastISel::SelectRet(const Instruction *I) {
return false;
// Vectors (of > 1 lane) in big endian need tricky handling.
- if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1)
+ if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
+ !Subtarget->isLittleEndian())
return false;
MVT RVVT = RVEVT.getSimpleVT();
if (RVVT == MVT::f128)
return false;
+
MVT DestVT = VA.getValVT();
// Special handling for extended integers.
if (RVVT != DestVT) {
@@ -1621,8 +3718,8 @@ bool AArch64FastISel::SelectRet(const Instruction *I) {
if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
return false;
- bool isZExt = Outs[0].Flags.isZExt();
- SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
+ bool IsZExt = Outs[0].Flags.isZExt();
+ SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
if (SrcReg == 0)
return false;
}
@@ -1642,7 +3739,7 @@ bool AArch64FastISel::SelectRet(const Instruction *I) {
return true;
}
-bool AArch64FastISel::SelectTrunc(const Instruction *I) {
+bool AArch64FastISel::selectTrunc(const Instruction *I) {
Type *DestTy = I->getType();
Value *Op = I->getOperand(0);
Type *SrcTy = Op->getType();
@@ -1667,10 +3764,14 @@ bool AArch64FastISel::SelectTrunc(const Instruction *I) {
unsigned SrcReg = getRegForValue(Op);
if (!SrcReg)
return false;
+ bool SrcIsKill = hasTrivialKill(Op);
// If we're truncating from i64 to a smaller non-legal type then generate an
- // AND. Otherwise, we know the high bits are undefined and a truncate doesn't
- // generate any code.
+ // AND. Otherwise, we know the high bits are undefined and a truncate only
+ // generate a COPY. We cannot mark the source register also as result
+ // register, because this can incorrectly transfer the kill flag onto the
+ // source register.
+ unsigned ResultReg;
if (SrcVT == MVT::i64) {
uint64_t Mask = 0;
switch (DestVT.SimpleTy) {
@@ -1688,23 +3789,23 @@ bool AArch64FastISel::SelectTrunc(const Instruction *I) {
break;
}
// Issue an extract_subreg to get the lower 32-bits.
- unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true,
+ unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
AArch64::sub_32);
- MRI.constrainRegClass(Reg32, &AArch64::GPR32RegClass);
// Create the AND instruction which performs the actual truncation.
- unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
- ANDReg)
- .addReg(Reg32)
- .addImm(AArch64_AM::encodeLogicalImmediate(Mask, 32));
- SrcReg = ANDReg;
+ ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
+ assert(ResultReg && "Unexpected AND instruction emission failure.");
+ } else {
+ ResultReg = createResultReg(&AArch64::GPR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
}
- UpdateValueMap(I, SrcReg);
+ updateValueMap(I, ResultReg);
return true;
}
-unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
+unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
DestVT == MVT::i64) &&
"Unexpected value type.");
@@ -1712,14 +3813,9 @@ unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
if (DestVT == MVT::i8 || DestVT == MVT::i16)
DestVT = MVT::i32;
- if (isZExt) {
- MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
- unsigned ResultReg = createResultReg(&AArch64::GPR32spRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
- ResultReg)
- .addReg(SrcReg)
- .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
-
+ if (IsZExt) {
+ unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
+ assert(ResultReg && "Unexpected AND instruction emission failure.");
if (DestVT == MVT::i64) {
// We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
// upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
@@ -1737,18 +3833,389 @@ unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
// FIXME: We're SExt i1 to i64.
return 0;
}
- unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SBFMWri),
- ResultReg)
- .addReg(SrcReg)
+ return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
+ /*TODO:IsKill=*/false, 0, 0);
+ }
+}
+
+unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ unsigned Opc, ZReg;
+ switch (RetVT.SimpleTy) {
+ default: return 0;
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ RetVT = MVT::i32;
+ Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
+ case MVT::i64:
+ Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
+ }
+
+ const TargetRegisterClass *RC =
+ (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
+ /*IsKill=*/ZReg, true);
+}
+
+unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ if (RetVT != MVT::i64)
+ return 0;
+
+ return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
+ Op0, Op0IsKill, Op1, Op1IsKill,
+ AArch64::XZR, /*IsKill=*/true);
+}
+
+unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ if (RetVT != MVT::i64)
+ return 0;
+
+ return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
+ Op0, Op0IsKill, Op1, Op1IsKill,
+ AArch64::XZR, /*IsKill=*/true);
+}
+
+unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill) {
+ unsigned Opc = 0;
+ bool NeedTrunc = false;
+ uint64_t Mask = 0;
+ switch (RetVT.SimpleTy) {
+ default: return 0;
+ case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
+ case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
+ case MVT::i32: Opc = AArch64::LSLVWr; break;
+ case MVT::i64: Opc = AArch64::LSLVXr; break;
+ }
+
+ const TargetRegisterClass *RC =
+ (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ if (NeedTrunc) {
+ Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
+ Op1IsKill = true;
+ }
+ unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
+ Op1IsKill);
+ if (NeedTrunc)
+ ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
+ bool Op0IsKill, uint64_t Shift,
+ bool IsZExt) {
+ assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
+ "Unexpected source/return type pair.");
+ assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
+ SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
+ "Unexpected source value type.");
+ assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
+ RetVT == MVT::i64) && "Unexpected return value type.");
+
+ bool Is64Bit = (RetVT == MVT::i64);
+ unsigned RegSize = Is64Bit ? 64 : 32;
+ unsigned DstBits = RetVT.getSizeInBits();
+ unsigned SrcBits = SrcVT.getSizeInBits();
+ const TargetRegisterClass *RC =
+ Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+
+ // Just emit a copy for "zero" shifts.
+ if (Shift == 0) {
+ if (RetVT == SrcVT) {
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill));
+ return ResultReg;
+ } else
+ return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
+ }
+
+ // Don't deal with undefined shifts.
+ if (Shift >= DstBits)
+ return 0;
+
+ // For immediate shifts we can fold the zero-/sign-extension into the shift.
+ // {S|U}BFM Wd, Wn, #r, #s
+ // Wd<32+s-r,32-r> = Wn<s:0> when r > s
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = shl i16 %1, 4
+ // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
+ // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
+ // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
+ // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = shl i16 %1, 8
+ // Wd<32+7-24,32-24> = Wn<7:0>
+ // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
+ // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
+ // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = shl i16 %1, 12
+ // Wd<32+3-20,32-20> = Wn<3:0>
+ // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
+ // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
+ // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
+
+ unsigned ImmR = RegSize - Shift;
+ // Limit the width to the length of the source type.
+ unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
+ static const unsigned OpcTable[2][2] = {
+ {AArch64::SBFMWri, AArch64::SBFMXri},
+ {AArch64::UBFMWri, AArch64::UBFMXri}
+ };
+ unsigned Opc = OpcTable[IsZExt][Is64Bit];
+ if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
+ unsigned TmpReg = MRI.createVirtualRegister(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), TmpReg)
.addImm(0)
- .addImm(0);
- return ResultReg;
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(AArch64::sub_32);
+ Op0 = TmpReg;
+ Op0IsKill = true;
}
+ return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
}
-unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
- bool isZExt) {
+unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill) {
+ unsigned Opc = 0;
+ bool NeedTrunc = false;
+ uint64_t Mask = 0;
+ switch (RetVT.SimpleTy) {
+ default: return 0;
+ case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
+ case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
+ case MVT::i32: Opc = AArch64::LSRVWr; break;
+ case MVT::i64: Opc = AArch64::LSRVXr; break;
+ }
+
+ const TargetRegisterClass *RC =
+ (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ if (NeedTrunc) {
+ Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
+ Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
+ Op0IsKill = Op1IsKill = true;
+ }
+ unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
+ Op1IsKill);
+ if (NeedTrunc)
+ ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
+ bool Op0IsKill, uint64_t Shift,
+ bool IsZExt) {
+ assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
+ "Unexpected source/return type pair.");
+ assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
+ SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
+ "Unexpected source value type.");
+ assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
+ RetVT == MVT::i64) && "Unexpected return value type.");
+
+ bool Is64Bit = (RetVT == MVT::i64);
+ unsigned RegSize = Is64Bit ? 64 : 32;
+ unsigned DstBits = RetVT.getSizeInBits();
+ unsigned SrcBits = SrcVT.getSizeInBits();
+ const TargetRegisterClass *RC =
+ Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+
+ // Just emit a copy for "zero" shifts.
+ if (Shift == 0) {
+ if (RetVT == SrcVT) {
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill));
+ return ResultReg;
+ } else
+ return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
+ }
+
+ // Don't deal with undefined shifts.
+ if (Shift >= DstBits)
+ return 0;
+
+ // For immediate shifts we can fold the zero-/sign-extension into the shift.
+ // {S|U}BFM Wd, Wn, #r, #s
+ // Wd<s-r:0> = Wn<s:r> when r <= s
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = lshr i16 %1, 4
+ // Wd<7-4:0> = Wn<7:4>
+ // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
+ // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = lshr i16 %1, 8
+ // Wd<7-7,0> = Wn<7:7>
+ // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = lshr i16 %1, 12
+ // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
+ // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
+
+ if (Shift >= SrcBits && IsZExt)
+ return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
+
+ // It is not possible to fold a sign-extend into the LShr instruction. In this
+ // case emit a sign-extend.
+ if (!IsZExt) {
+ Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
+ if (!Op0)
+ return 0;
+ Op0IsKill = true;
+ SrcVT = RetVT;
+ SrcBits = SrcVT.getSizeInBits();
+ IsZExt = true;
+ }
+
+ unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
+ unsigned ImmS = SrcBits - 1;
+ static const unsigned OpcTable[2][2] = {
+ {AArch64::SBFMWri, AArch64::SBFMXri},
+ {AArch64::UBFMWri, AArch64::UBFMXri}
+ };
+ unsigned Opc = OpcTable[IsZExt][Is64Bit];
+ if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
+ unsigned TmpReg = MRI.createVirtualRegister(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), TmpReg)
+ .addImm(0)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(AArch64::sub_32);
+ Op0 = TmpReg;
+ Op0IsKill = true;
+ }
+ return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
+}
+
+unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill) {
+ unsigned Opc = 0;
+ bool NeedTrunc = false;
+ uint64_t Mask = 0;
+ switch (RetVT.SimpleTy) {
+ default: return 0;
+ case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
+ case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
+ case MVT::i32: Opc = AArch64::ASRVWr; break;
+ case MVT::i64: Opc = AArch64::ASRVXr; break;
+ }
+
+ const TargetRegisterClass *RC =
+ (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ if (NeedTrunc) {
+ Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
+ Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
+ Op0IsKill = Op1IsKill = true;
+ }
+ unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
+ Op1IsKill);
+ if (NeedTrunc)
+ ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
+ bool Op0IsKill, uint64_t Shift,
+ bool IsZExt) {
+ assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
+ "Unexpected source/return type pair.");
+ assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
+ SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
+ "Unexpected source value type.");
+ assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
+ RetVT == MVT::i64) && "Unexpected return value type.");
+
+ bool Is64Bit = (RetVT == MVT::i64);
+ unsigned RegSize = Is64Bit ? 64 : 32;
+ unsigned DstBits = RetVT.getSizeInBits();
+ unsigned SrcBits = SrcVT.getSizeInBits();
+ const TargetRegisterClass *RC =
+ Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+
+ // Just emit a copy for "zero" shifts.
+ if (Shift == 0) {
+ if (RetVT == SrcVT) {
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill));
+ return ResultReg;
+ } else
+ return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
+ }
+
+ // Don't deal with undefined shifts.
+ if (Shift >= DstBits)
+ return 0;
+
+ // For immediate shifts we can fold the zero-/sign-extension into the shift.
+ // {S|U}BFM Wd, Wn, #r, #s
+ // Wd<s-r:0> = Wn<s:r> when r <= s
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = ashr i16 %1, 4
+ // Wd<7-4:0> = Wn<7:4>
+ // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
+ // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = ashr i16 %1, 8
+ // Wd<7-7,0> = Wn<7:7>
+ // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = ashr i16 %1, 12
+ // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
+ // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
+
+ if (Shift >= SrcBits && IsZExt)
+ return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
+
+ unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
+ unsigned ImmS = SrcBits - 1;
+ static const unsigned OpcTable[2][2] = {
+ {AArch64::SBFMWri, AArch64::SBFMXri},
+ {AArch64::UBFMWri, AArch64::UBFMXri}
+ };
+ unsigned Opc = OpcTable[IsZExt][Is64Bit];
+ if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
+ unsigned TmpReg = MRI.createVirtualRegister(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), TmpReg)
+ .addImm(0)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(AArch64::sub_32);
+ Op0 = TmpReg;
+ Op0IsKill = true;
+ }
+ return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
+}
+
+unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ bool IsZExt) {
assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
// FastISel does not have plumbing to deal with extensions where the SrcVT or
@@ -1768,24 +4235,24 @@ unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
default:
return 0;
case MVT::i1:
- return Emiti1Ext(SrcReg, DestVT, isZExt);
+ return emiti1Ext(SrcReg, DestVT, IsZExt);
case MVT::i8:
if (DestVT == MVT::i64)
- Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
+ Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
else
- Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
+ Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
Imm = 7;
break;
case MVT::i16:
if (DestVT == MVT::i64)
- Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
+ Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
else
- Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
+ Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
Imm = 15;
break;
case MVT::i32:
assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
- Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
+ Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
Imm = 31;
break;
}
@@ -1803,45 +4270,167 @@ unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
SrcReg = Src64;
}
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(SrcReg)
- .addImm(0)
- .addImm(Imm);
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
+}
- return ResultReg;
+static bool isZExtLoad(const MachineInstr *LI) {
+ switch (LI->getOpcode()) {
+ default:
+ return false;
+ case AArch64::LDURBBi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURWi:
+ case AArch64::LDRBBui:
+ case AArch64::LDRHHui:
+ case AArch64::LDRWui:
+ case AArch64::LDRBBroX:
+ case AArch64::LDRHHroX:
+ case AArch64::LDRWroX:
+ case AArch64::LDRBBroW:
+ case AArch64::LDRHHroW:
+ case AArch64::LDRWroW:
+ return true;
+ }
}
-bool AArch64FastISel::SelectIntExt(const Instruction *I) {
- // On ARM, in general, integer casts don't involve legal types; this code
- // handles promotable integers. The high bits for a type smaller than
- // the register size are assumed to be undefined.
- Type *DestTy = I->getType();
- Value *Src = I->getOperand(0);
- Type *SrcTy = Src->getType();
+static bool isSExtLoad(const MachineInstr *LI) {
+ switch (LI->getOpcode()) {
+ default:
+ return false;
+ case AArch64::LDURSBWi:
+ case AArch64::LDURSHWi:
+ case AArch64::LDURSBXi:
+ case AArch64::LDURSHXi:
+ case AArch64::LDURSWi:
+ case AArch64::LDRSBWui:
+ case AArch64::LDRSHWui:
+ case AArch64::LDRSBXui:
+ case AArch64::LDRSHXui:
+ case AArch64::LDRSWui:
+ case AArch64::LDRSBWroX:
+ case AArch64::LDRSHWroX:
+ case AArch64::LDRSBXroX:
+ case AArch64::LDRSHXroX:
+ case AArch64::LDRSWroX:
+ case AArch64::LDRSBWroW:
+ case AArch64::LDRSHWroW:
+ case AArch64::LDRSBXroW:
+ case AArch64::LDRSHXroW:
+ case AArch64::LDRSWroW:
+ return true;
+ }
+}
- bool isZExt = isa<ZExtInst>(I);
- unsigned SrcReg = getRegForValue(Src);
- if (!SrcReg)
+bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
+ MVT SrcVT) {
+ const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
+ if (!LI || !LI->hasOneUse())
return false;
- EVT SrcEVT = TLI.getValueType(SrcTy, true);
- EVT DestEVT = TLI.getValueType(DestTy, true);
- if (!SrcEVT.isSimple())
+ // Check if the load instruction has already been selected.
+ unsigned Reg = lookUpRegForValue(LI);
+ if (!Reg)
return false;
- if (!DestEVT.isSimple())
+
+ MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
+ if (!MI)
return false;
- MVT SrcVT = SrcEVT.getSimpleVT();
- MVT DestVT = DestEVT.getSimpleVT();
- unsigned ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
- if (ResultReg == 0)
+ // Check if the correct load instruction has been emitted - SelectionDAG might
+ // have emitted a zero-extending load, but we need a sign-extending load.
+ bool IsZExt = isa<ZExtInst>(I);
+ const auto *LoadMI = MI;
+ if (LoadMI->getOpcode() == TargetOpcode::COPY &&
+ LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
+ unsigned LoadReg = MI->getOperand(1).getReg();
+ LoadMI = MRI.getUniqueVRegDef(LoadReg);
+ assert(LoadMI && "Expected valid instruction");
+ }
+ if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
+ return false;
+
+ // Nothing to be done.
+ if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
+ updateValueMap(I, Reg);
+ return true;
+ }
+
+ if (IsZExt) {
+ unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), Reg64)
+ .addImm(0)
+ .addReg(Reg, getKillRegState(true))
+ .addImm(AArch64::sub_32);
+ Reg = Reg64;
+ } else {
+ assert((MI->getOpcode() == TargetOpcode::COPY &&
+ MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
+ "Expected copy instruction");
+ Reg = MI->getOperand(1).getReg();
+ MI->eraseFromParent();
+ }
+ updateValueMap(I, Reg);
+ return true;
+}
+
+bool AArch64FastISel::selectIntExt(const Instruction *I) {
+ assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
+ "Unexpected integer extend instruction.");
+ MVT RetVT;
+ MVT SrcVT;
+ if (!isTypeSupported(I->getType(), RetVT))
+ return false;
+
+ if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
+ return false;
+
+ // Try to optimize already sign-/zero-extended values from load instructions.
+ if (optimizeIntExtLoad(I, RetVT, SrcVT))
+ return true;
+
+ unsigned SrcReg = getRegForValue(I->getOperand(0));
+ if (!SrcReg)
+ return false;
+ bool SrcIsKill = hasTrivialKill(I->getOperand(0));
+
+ // Try to optimize already sign-/zero-extended values from function arguments.
+ bool IsZExt = isa<ZExtInst>(I);
+ if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
+ if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
+ if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
+ unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), ResultReg)
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(SrcIsKill))
+ .addImm(AArch64::sub_32);
+ SrcReg = ResultReg;
+ }
+ // Conservatively clear all kill flags from all uses, because we are
+ // replacing a sign-/zero-extend instruction at IR level with a nop at MI
+ // level. The result of the instruction at IR level might have been
+ // trivially dead, which is now not longer true.
+ unsigned UseReg = lookUpRegForValue(I);
+ if (UseReg)
+ MRI.clearKillFlags(UseReg);
+
+ updateValueMap(I, SrcReg);
+ return true;
+ }
+ }
+
+ unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
+ if (!ResultReg)
return false;
- UpdateValueMap(I, ResultReg);
+
+ updateValueMap(I, ResultReg);
return true;
}
-bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
+bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
EVT DestEVT = TLI.getValueType(I->getType(), true);
if (!DestEVT.isSimple())
return false;
@@ -1851,144 +4440,529 @@ bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
return false;
unsigned DivOpc;
- bool is64bit = (DestVT == MVT::i64);
+ bool Is64bit = (DestVT == MVT::i64);
switch (ISDOpcode) {
default:
return false;
case ISD::SREM:
- DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
+ DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
break;
case ISD::UREM:
- DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
+ DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
break;
}
- unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
+ unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
unsigned Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
+ bool Src0IsKill = hasTrivialKill(I->getOperand(0));
unsigned Src1Reg = getRegForValue(I->getOperand(1));
if (!Src1Reg)
return false;
+ bool Src1IsKill = hasTrivialKill(I->getOperand(1));
- unsigned QuotReg = createResultReg(TLI.getRegClassFor(DestVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), QuotReg)
- .addReg(Src0Reg)
- .addReg(Src1Reg);
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
+ Src1Reg, /*IsKill=*/false);
+ assert(QuotReg && "Unexpected DIV instruction emission failure.");
// The remainder is computed as numerator - (quotient * denominator) using the
// MSUB instruction.
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg)
- .addReg(QuotReg)
- .addReg(Src1Reg)
- .addReg(Src0Reg);
- UpdateValueMap(I, ResultReg);
+ unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
+ Src1Reg, Src1IsKill, Src0Reg,
+ Src0IsKill);
+ updateValueMap(I, ResultReg);
return true;
}
-bool AArch64FastISel::SelectMul(const Instruction *I) {
- EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
- if (!SrcEVT.isSimple())
+bool AArch64FastISel::selectMul(const Instruction *I) {
+ MVT VT;
+ if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
return false;
- MVT SrcVT = SrcEVT.getSimpleVT();
- // Must be simple value type. Don't handle vectors.
- if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
- SrcVT != MVT::i8)
+ if (VT.isVector())
+ return selectBinaryOp(I, ISD::MUL);
+
+ const Value *Src0 = I->getOperand(0);
+ const Value *Src1 = I->getOperand(1);
+ if (const auto *C = dyn_cast<ConstantInt>(Src0))
+ if (C->getValue().isPowerOf2())
+ std::swap(Src0, Src1);
+
+ // Try to simplify to a shift instruction.
+ if (const auto *C = dyn_cast<ConstantInt>(Src1))
+ if (C->getValue().isPowerOf2()) {
+ uint64_t ShiftVal = C->getValue().logBase2();
+ MVT SrcVT = VT;
+ bool IsZExt = true;
+ if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
+ if (!isIntExtFree(ZExt)) {
+ MVT VT;
+ if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
+ SrcVT = VT;
+ IsZExt = true;
+ Src0 = ZExt->getOperand(0);
+ }
+ }
+ } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
+ if (!isIntExtFree(SExt)) {
+ MVT VT;
+ if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
+ SrcVT = VT;
+ IsZExt = false;
+ Src0 = SExt->getOperand(0);
+ }
+ }
+ }
+
+ unsigned Src0Reg = getRegForValue(Src0);
+ if (!Src0Reg)
+ return false;
+ bool Src0IsKill = hasTrivialKill(Src0);
+
+ unsigned ResultReg =
+ emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
+
+ if (ResultReg) {
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ unsigned Src0Reg = getRegForValue(I->getOperand(0));
+ if (!Src0Reg)
+ return false;
+ bool Src0IsKill = hasTrivialKill(I->getOperand(0));
+
+ unsigned Src1Reg = getRegForValue(I->getOperand(1));
+ if (!Src1Reg)
+ return false;
+ bool Src1IsKill = hasTrivialKill(I->getOperand(1));
+
+ unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
+
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool AArch64FastISel::selectShift(const Instruction *I) {
+ MVT RetVT;
+ if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
+ return false;
+
+ if (RetVT.isVector())
+ return selectOperator(I, I->getOpcode());
+
+ if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ unsigned ResultReg = 0;
+ uint64_t ShiftVal = C->getZExtValue();
+ MVT SrcVT = RetVT;
+ bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
+ const Value *Op0 = I->getOperand(0);
+ if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
+ if (!isIntExtFree(ZExt)) {
+ MVT TmpVT;
+ if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
+ SrcVT = TmpVT;
+ IsZExt = true;
+ Op0 = ZExt->getOperand(0);
+ }
+ }
+ } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
+ if (!isIntExtFree(SExt)) {
+ MVT TmpVT;
+ if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
+ SrcVT = TmpVT;
+ IsZExt = false;
+ Op0 = SExt->getOperand(0);
+ }
+ }
+ }
+
+ unsigned Op0Reg = getRegForValue(Op0);
+ if (!Op0Reg)
+ return false;
+ bool Op0IsKill = hasTrivialKill(Op0);
+
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("Unexpected instruction.");
+ case Instruction::Shl:
+ ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
+ break;
+ case Instruction::AShr:
+ ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
+ break;
+ case Instruction::LShr:
+ ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
+ break;
+ }
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (!Op0Reg)
+ return false;
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (!Op1Reg)
+ return false;
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+ unsigned ResultReg = 0;
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("Unexpected instruction.");
+ case Instruction::Shl:
+ ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
+ break;
+ case Instruction::AShr:
+ ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
+ break;
+ case Instruction::LShr:
+ ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
+ break;
+ }
+
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool AArch64FastISel::selectBitCast(const Instruction *I) {
+ MVT RetVT, SrcVT;
+
+ if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
+ return false;
+ if (!isTypeLegal(I->getType(), RetVT))
return false;
unsigned Opc;
- unsigned ZReg;
- switch (SrcVT.SimpleTy) {
+ if (RetVT == MVT::f32 && SrcVT == MVT::i32)
+ Opc = AArch64::FMOVWSr;
+ else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
+ Opc = AArch64::FMOVXDr;
+ else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
+ Opc = AArch64::FMOVSWr;
+ else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
+ Opc = AArch64::FMOVDXr;
+ else
+ return false;
+
+ const TargetRegisterClass *RC = nullptr;
+ switch (RetVT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type.");
+ case MVT::i32: RC = &AArch64::GPR32RegClass; break;
+ case MVT::i64: RC = &AArch64::GPR64RegClass; break;
+ case MVT::f32: RC = &AArch64::FPR32RegClass; break;
+ case MVT::f64: RC = &AArch64::FPR64RegClass; break;
+ }
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (!Op0Reg)
+ return false;
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+ unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
+
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool AArch64FastISel::selectFRem(const Instruction *I) {
+ MVT RetVT;
+ if (!isTypeLegal(I->getType(), RetVT))
+ return false;
+
+ RTLIB::Libcall LC;
+ switch (RetVT.SimpleTy) {
default:
return false;
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- ZReg = AArch64::WZR;
- Opc = AArch64::MADDWrrr;
- SrcVT = MVT::i32;
+ case MVT::f32:
+ LC = RTLIB::REM_F32;
break;
- case MVT::i64:
- ZReg = AArch64::XZR;
- Opc = AArch64::MADDXrrr;
+ case MVT::f64:
+ LC = RTLIB::REM_F64;
break;
}
+ ArgListTy Args;
+ Args.reserve(I->getNumOperands());
+
+ // Populate the argument list.
+ for (auto &Arg : I->operands()) {
+ ArgListEntry Entry;
+ Entry.Val = Arg;
+ Entry.Ty = Arg->getType();
+ Args.push_back(Entry);
+ }
+
+ CallLoweringInfo CLI;
+ CLI.setCallee(TLI.getLibcallCallingConv(LC), I->getType(),
+ TLI.getLibcallName(LC), std::move(Args));
+ if (!lowerCallTo(CLI))
+ return false;
+ updateValueMap(I, CLI.ResultReg);
+ return true;
+}
+
+bool AArch64FastISel::selectSDiv(const Instruction *I) {
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ if (!isa<ConstantInt>(I->getOperand(1)))
+ return selectBinaryOp(I, ISD::SDIV);
+
+ const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
+ if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
+ !(C.isPowerOf2() || (-C).isPowerOf2()))
+ return selectBinaryOp(I, ISD::SDIV);
+
+ unsigned Lg2 = C.countTrailingZeros();
unsigned Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
+ bool Src0IsKill = hasTrivialKill(I->getOperand(0));
- unsigned Src1Reg = getRegForValue(I->getOperand(1));
- if (!Src1Reg)
+ if (cast<BinaryOperator>(I)->isExact()) {
+ unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
+ if (!ResultReg)
+ return false;
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
+ unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
+ if (!AddReg)
return false;
- // Create the base instruction, then add the operands.
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(SrcVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(Src0Reg)
- .addReg(Src1Reg)
- .addReg(ZReg);
- UpdateValueMap(I, ResultReg);
+ // (Src0 < 0) ? Pow2 - 1 : 0;
+ if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
+ return false;
+
+ unsigned SelectOpc;
+ const TargetRegisterClass *RC;
+ if (VT == MVT::i64) {
+ SelectOpc = AArch64::CSELXr;
+ RC = &AArch64::GPR64RegClass;
+ } else {
+ SelectOpc = AArch64::CSELWr;
+ RC = &AArch64::GPR32RegClass;
+ }
+ unsigned SelectReg =
+ fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
+ Src0IsKill, AArch64CC::LT);
+ if (!SelectReg)
+ return false;
+
+ // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
+ // negate the result.
+ unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+ unsigned ResultReg;
+ if (C.isNegative())
+ ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
+ SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
+ else
+ ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
+
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
+/// have to duplicate it for AArch64, because otherwise we would fail during the
+/// sign-extend emission.
+std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
+ unsigned IdxN = getRegForValue(Idx);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return std::pair<unsigned, bool>(0, false);
+
+ bool IdxNIsKill = hasTrivialKill(Idx);
+
+ // If the index is smaller or larger than intptr_t, truncate or extend it.
+ MVT PtrVT = TLI.getPointerTy();
+ EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
+ if (IdxVT.bitsLT(PtrVT)) {
+ IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
+ IdxNIsKill = true;
+ } else if (IdxVT.bitsGT(PtrVT))
+ llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
+ return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
+}
+
+/// This is mostly a copy of the existing FastISel GEP code, but we have to
+/// duplicate it for AArch64, because otherwise we would bail out even for
+/// simple cases. This is because the standard fastEmit functions don't cover
+/// MUL at all and ADD is lowered very inefficientily.
+bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
+ unsigned N = getRegForValue(I->getOperand(0));
+ if (!N)
+ return false;
+ bool NIsKill = hasTrivialKill(I->getOperand(0));
+
+ // Keep a running tab of the total offset to coalesce multiple N = N + Offset
+ // into a single N = N + TotalOffset.
+ uint64_t TotalOffs = 0;
+ Type *Ty = I->getOperand(0)->getType();
+ MVT VT = TLI.getPointerTy();
+ for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) {
+ const Value *Idx = *OI;
+ if (auto *StTy = dyn_cast<StructType>(Ty)) {
+ unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+ // N = N + Offset
+ if (Field)
+ TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
+ Ty = StTy->getElementType(Field);
+ } else {
+ Ty = cast<SequentialType>(Ty)->getElementType();
+ // If this is a constant subscript, handle it quickly.
+ if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->isZero())
+ continue;
+ // N = N + Offset
+ TotalOffs +=
+ DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
+ continue;
+ }
+ if (TotalOffs) {
+ N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
+ if (!N)
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
+
+ // N = N + Idx * ElementSize;
+ uint64_t ElementSize = DL.getTypeAllocSize(Ty);
+ std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
+ unsigned IdxN = Pair.first;
+ bool IdxNIsKill = Pair.second;
+ if (!IdxN)
+ return false;
+
+ if (ElementSize != 1) {
+ unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
+ if (!C)
+ return false;
+ IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
+ if (!IdxN)
+ return false;
+ IdxNIsKill = true;
+ }
+ N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
+ if (!N)
+ return false;
+ }
+ }
+ if (TotalOffs) {
+ N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
+ if (!N)
+ return false;
+ }
+ updateValueMap(I, N);
return true;
}
-bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) {
+bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
default:
break;
- case Instruction::Load:
- return SelectLoad(I);
- case Instruction::Store:
- return SelectStore(I);
+ case Instruction::Add:
+ case Instruction::Sub:
+ return selectAddSub(I);
+ case Instruction::Mul:
+ return selectMul(I);
+ case Instruction::SDiv:
+ return selectSDiv(I);
+ case Instruction::SRem:
+ if (!selectBinaryOp(I, ISD::SREM))
+ return selectRem(I, ISD::SREM);
+ return true;
+ case Instruction::URem:
+ if (!selectBinaryOp(I, ISD::UREM))
+ return selectRem(I, ISD::UREM);
+ return true;
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return selectShift(I);
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return selectLogicalOp(I);
case Instruction::Br:
- return SelectBranch(I);
+ return selectBranch(I);
case Instruction::IndirectBr:
- return SelectIndirectBr(I);
- case Instruction::FCmp:
- case Instruction::ICmp:
- return SelectCmp(I);
- case Instruction::Select:
- return SelectSelect(I);
- case Instruction::FPExt:
- return SelectFPExt(I);
- case Instruction::FPTrunc:
- return SelectFPTrunc(I);
+ return selectIndirectBr(I);
+ case Instruction::BitCast:
+ if (!FastISel::selectBitCast(I))
+ return selectBitCast(I);
+ return true;
case Instruction::FPToSI:
- return SelectFPToInt(I, /*Signed=*/true);
+ if (!selectCast(I, ISD::FP_TO_SINT))
+ return selectFPToInt(I, /*Signed=*/true);
+ return true;
case Instruction::FPToUI:
- return SelectFPToInt(I, /*Signed=*/false);
+ return selectFPToInt(I, /*Signed=*/false);
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ return selectIntExt(I);
+ case Instruction::Trunc:
+ if (!selectCast(I, ISD::TRUNCATE))
+ return selectTrunc(I);
+ return true;
+ case Instruction::FPExt:
+ return selectFPExt(I);
+ case Instruction::FPTrunc:
+ return selectFPTrunc(I);
case Instruction::SIToFP:
- return SelectIntToFP(I, /*Signed=*/true);
+ if (!selectCast(I, ISD::SINT_TO_FP))
+ return selectIntToFP(I, /*Signed=*/true);
+ return true;
case Instruction::UIToFP:
- return SelectIntToFP(I, /*Signed=*/false);
- case Instruction::SRem:
- return SelectRem(I, ISD::SREM);
- case Instruction::URem:
- return SelectRem(I, ISD::UREM);
- case Instruction::Call:
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
- return SelectIntrinsicCall(*II);
- return SelectCall(I);
+ return selectIntToFP(I, /*Signed=*/false);
+ case Instruction::Load:
+ return selectLoad(I);
+ case Instruction::Store:
+ return selectStore(I);
+ case Instruction::FCmp:
+ case Instruction::ICmp:
+ return selectCmp(I);
+ case Instruction::Select:
+ return selectSelect(I);
case Instruction::Ret:
- return SelectRet(I);
- case Instruction::Trunc:
- return SelectTrunc(I);
- case Instruction::ZExt:
- case Instruction::SExt:
- return SelectIntExt(I);
- case Instruction::Mul:
- // FIXME: This really should be handled by the target-independent selector.
- return SelectMul(I);
+ return selectRet(I);
+ case Instruction::FRem:
+ return selectFRem(I);
+ case Instruction::GetElementPtr:
+ return selectGetElementPtr(I);
}
- return false;
+
+ // fall-back to target-independent instruction selection.
+ return selectOperator(I, I->getOpcode());
// Silence warnings.
(void)&CC_AArch64_DarwinPCS_VarArg;
}
namespace llvm {
-llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
- const TargetLibraryInfo *libInfo) {
- return new AArch64FastISel(funcInfo, libInfo);
+llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) {
+ return new AArch64FastISel(FuncInfo, LibInfo);
}
}
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 9c337172dd06..66aa216db2c9 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -17,16 +17,16 @@
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -86,13 +86,14 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
#ifndef NDEBUG
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
assert(!RegInfo->needsStackRealignment(MF) &&
"No stack realignment on AArch64!");
#endif
return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
- MFI->isFrameAddressTaken());
+ MFI->isFrameAddressTaken() || MFI->hasStackMap() ||
+ MFI->hasPatchPoint());
}
/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
@@ -109,13 +110,13 @@ void AArch64FrameLowering::eliminateCallFramePseudoInstr(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const AArch64InstrInfo *TII =
- static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
+ static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
DebugLoc DL = I->getDebugLoc();
int Opc = I->getOpcode();
bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (!TFI->hasReservedCallFrame(MF)) {
unsigned Align = getStackAlignment();
@@ -131,7 +132,7 @@ void AArch64FrameLowering::eliminateCallFramePseudoInstr(
// FIXME: in-function stack adjustment for calls is limited to 24-bits
// because there's no guaranteed temporary register available.
//
- // ADD/SUB (immediate) has only LSL #0 and LSL #12 avaiable.
+ // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
// 1) For offset <= 12-bit, we use LSL #0
// 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
// LSL #0, and the other uses LSL #12.
@@ -158,7 +159,7 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
DebugLoc DL = MBB.findDebugLoc(MBBI);
// Add callee saved registers to move list.
@@ -166,7 +167,7 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
if (CSI.empty())
return;
- const DataLayout *TD = MF.getTarget().getDataLayout();
+ const DataLayout *TD = MF.getSubtarget().getDataLayout();
bool HasFP = hasFP(MF);
// Calculate amount of bytes used for return address storing.
@@ -195,7 +196,8 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
nullptr, DwarfReg, Offset - TotalSkipped));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
}
@@ -205,8 +207,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *Fn = MF.getFunction();
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
- MF.getTarget().getRegisterInfo());
- const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ MF.getSubtarget().getRegisterInfo());
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
@@ -233,7 +235,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
} else if (NumBytes) {
++NumRedZoneFunctions;
}
@@ -300,7 +303,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false);
if (needsFrameMoves) {
- const DataLayout *TD = MF.getTarget().getDataLayout();
+ const DataLayout *TD = MF.getSubtarget().getDataLayout();
const int StackGrowth = -TD->getPointerSize(0);
unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -376,26 +379,30 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
// Record the location of the stored LR
unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true);
CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
// Record the location of the stored FP
CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
} else {
// Encode the stack size of the leaf function.
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
// Now emit the moves for whatever callee saved regs we have.
@@ -435,9 +442,9 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
MachineFrameInfo *MFI = MF.getFrameInfo();
const AArch64InstrInfo *TII =
- static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
+ static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
- MF.getTarget().getRegisterInfo());
+ MF.getSubtarget().getRegisterInfo());
DebugLoc DL = MBBI->getDebugLoc();
unsigned RetOpcode = MBBI->getOpcode();
@@ -548,7 +555,7 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
bool PreferFP) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
- MF.getTarget().getRegisterInfo());
+ MF.getSubtarget().getRegisterInfo());
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
int FPOffset = MFI->getObjectOffset(FI) + 16;
int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
@@ -617,7 +624,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
unsigned Count = CSI.size();
DebugLoc DL;
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
@@ -693,7 +700,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
unsigned Count = CSI.size();
DebugLoc DL;
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
@@ -761,7 +768,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
MachineFunction &MF, RegScavenger *RS) const {
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
- MF.getTarget().getRegisterInfo());
+ MF.getSubtarget().getRegisterInfo());
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
MachineRegisterInfo *MRI = &MF.getRegInfo();
SmallVector<unsigned, 4> UnspilledCSGPRs;
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
index 7686e6f18432..df3875f9f264 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64_FRAMELOWERING_H
-#define AArch64_FRAMELOWERING_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
#include "llvm/Target/TargetFrameLowering.h"
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 3f49fabfb58e..bb2e1e2b31b7 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -303,7 +303,7 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
/// \brief Determine wether it is worth to fold V into an extended register.
bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
- // it hurts if the a value is used at least twice, unless we are optimizing
+ // it hurts if the value is used at least twice, unless we are optimizing
// for code size.
if (ForCodeSize || V.hasOneUse())
return true;
@@ -569,6 +569,27 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
return isWorthFolding(N);
}
+/// If there's a use of this ADDlow that's not itself a load/store then we'll
+/// need to create a real ADD instruction from it anyway and there's no point in
+/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
+/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
+/// leads to duplaicated ADRP instructions.
+static bool isWorthFoldingADDlow(SDValue N) {
+ for (auto Use : N->uses()) {
+ if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
+ Use->getOpcode() != ISD::ATOMIC_LOAD &&
+ Use->getOpcode() != ISD::ATOMIC_STORE)
+ return false;
+
+ // ldar and stlr have much more restrictive addressing modes (just a
+ // register).
+ if (cast<MemSDNode>(Use)->getOrdering() > Monotonic)
+ return false;
+ }
+
+ return true;
+}
+
/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
/// immediate" address. The "Size" argument is the size in bytes of the memory
/// reference, which determines the scale.
@@ -582,7 +603,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
return true;
}
- if (N.getOpcode() == AArch64ISD::ADDlow) {
+ if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
GlobalAddressSDNode *GAN =
dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
Base = N.getOperand(0);
@@ -594,7 +615,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
unsigned Alignment = GV->getAlignment();
const DataLayout *DL = TLI->getDataLayout();
Type *Ty = GV->getType()->getElementType();
- if (Alignment == 0 && Ty->isSized() && !Subtarget->isTargetDarwin())
+ if (Alignment == 0 && Ty->isSized())
Alignment = DL->getABITypeAlignment(Ty);
if (Alignment >= Size)
@@ -777,6 +798,21 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
return false;
}
+// Check if the given immediate is preferred by ADD. If an immediate can be
+// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
+// encoded by one MOVZ, return true.
+static bool isPreferredADD(int64_t ImmOff) {
+ // Constant in [0x0, 0xfff] can be encoded in ADD.
+ if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
+ return true;
+ // Check if it can be encoded in an "ADD LSL #12".
+ if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
+ // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
+ return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
+ (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
+ return false;
+}
+
bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
SDValue &Base, SDValue &Offset,
SDValue &SignExtend,
@@ -786,11 +822,6 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
SDValue LHS = N.getOperand(0);
SDValue RHS = N.getOperand(1);
- // We don't want to match immediate adds here, because they are better lowered
- // to the register-immediate addressing modes.
- if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
- return false;
-
// Check if this particular node is reused in any non-memory related
// operation. If yes, do not try to fold this node into the address
// computation, since the computation will be kept.
@@ -800,6 +831,36 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
return false;
}
+ // Watch out if RHS is a wide immediate, it can not be selected into
+ // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
+ // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
+ // instructions like:
+ // MOV X0, WideImmediate
+ // ADD X1, BaseReg, X0
+ // LDR X2, [X1, 0]
+ // For such situation, using [BaseReg, XReg] addressing mode can save one
+ // ADD/SUB:
+ // MOV X0, WideImmediate
+ // LDR X2, [BaseReg, X0]
+ if (isa<ConstantSDNode>(RHS)) {
+ int64_t ImmOff = (int64_t)dyn_cast<ConstantSDNode>(RHS)->getZExtValue();
+ unsigned Scale = Log2_32(Size);
+ // Skip the immediate can be seleced by load/store addressing mode.
+ // Also skip the immediate can be encoded by a single ADD (SUB is also
+ // checked by using -ImmOff).
+ if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
+ isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
+ return false;
+
+ SDLoc DL(N.getNode());
+ SDValue Ops[] = { RHS };
+ SDNode *MOVI =
+ CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
+ SDValue MOVIV = SDValue(MOVI, 0);
+ // This ADD of two X register will be selected into [Reg+Reg] mode.
+ N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
+ }
+
// Remember if it is worth folding N when it produces extended register.
bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
@@ -1381,20 +1442,21 @@ static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
return true;
}
-static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
- unsigned &LSB, unsigned &MSB) {
- // We are looking for the following pattern which basically extracts a single
- // bit from the source value and places it in the LSB of the destination
- // value, all other bits of the destination value or set to zero:
+static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
+ SDValue &Opd0, unsigned &LSB,
+ unsigned &MSB) {
+ // We are looking for the following pattern which basically extracts several
+ // continuous bits from the source value and places it from the LSB of the
+ // destination value, all other bits of the destination value or set to zero:
//
// Value2 = AND Value, MaskImm
// SRL Value2, ShiftImm
//
- // with MaskImm >> ShiftImm == 1.
+ // with MaskImm >> ShiftImm to search for the bit width.
//
// This gets selected into a single UBFM:
//
- // UBFM Value, ShiftImm, ShiftImm
+ // UBFM Value, ShiftImm, BitWide + Srl_imm -1
//
if (N->getOpcode() != ISD::SRL)
@@ -1410,15 +1472,16 @@ static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
if (!isIntImmediate(N->getOperand(1), Srl_imm))
return false;
- // Check whether we really have a one bit extract here.
- if (And_mask >> Srl_imm == 0x1) {
+ // Check whether we really have several bits extract here.
+ unsigned BitWide = 64 - CountLeadingOnes_64(~(And_mask >> Srl_imm));
+ if (BitWide && isMask_64(And_mask >> Srl_imm)) {
if (N->getValueType(0) == MVT::i32)
Opc = AArch64::UBFMWri;
else
Opc = AArch64::UBFMXri;
- LSB = MSB = Srl_imm;
-
+ LSB = Srl_imm;
+ MSB = BitWide + Srl_imm - 1;
return true;
}
@@ -1439,8 +1502,8 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
assert((VT == MVT::i32 || VT == MVT::i64) &&
"Type checking must have been done before calling this function");
- // Check for AND + SRL doing a one bit extract.
- if (isOneBitExtractOpFromShr(N, Opc, Opd0, LSB, MSB))
+ // Check for AND + SRL doing several bits extract.
+ if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, LSB, MSB))
return true;
// we're looking for a shift of a shift
@@ -2116,7 +2179,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case 32:
SubReg = AArch64::ssub;
break;
- case 16: // FALLTHROUGH
+ case 16:
+ SubReg = AArch64::hsub;
+ break;
case 8:
llvm_unreachable("unexpected zext-requiring extract element!");
}
@@ -2204,9 +2269,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
@@ -2222,9 +2287,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
@@ -2240,9 +2305,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
@@ -2258,9 +2323,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
@@ -2276,9 +2341,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
@@ -2294,9 +2359,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
@@ -2312,9 +2377,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
@@ -2330,9 +2395,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
@@ -2348,9 +2413,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
@@ -2364,7 +2429,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::aarch64_neon_ld2lane:
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectLoadLane(Node, 2, AArch64::LD2i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectLoadLane(Node, 2, AArch64::LD2i16);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2376,7 +2442,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::aarch64_neon_ld3lane:
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectLoadLane(Node, 3, AArch64::LD3i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectLoadLane(Node, 3, AArch64::LD3i16);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2388,7 +2455,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::aarch64_neon_ld4lane:
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectLoadLane(Node, 4, AArch64::LD4i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectLoadLane(Node, 4, AArch64::LD4i16);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2448,9 +2516,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectStore(Node, 2, AArch64::ST1Twov8b);
else if (VT == MVT::v16i8)
return SelectStore(Node, 2, AArch64::ST1Twov16b);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectStore(Node, 2, AArch64::ST1Twov4h);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectStore(Node, 2, AArch64::ST1Twov8h);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectStore(Node, 2, AArch64::ST1Twov2s);
@@ -2467,9 +2535,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectStore(Node, 3, AArch64::ST1Threev8b);
else if (VT == MVT::v16i8)
return SelectStore(Node, 3, AArch64::ST1Threev16b);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectStore(Node, 3, AArch64::ST1Threev4h);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectStore(Node, 3, AArch64::ST1Threev8h);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectStore(Node, 3, AArch64::ST1Threev2s);
@@ -2486,9 +2554,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectStore(Node, 4, AArch64::ST1Fourv8b);
else if (VT == MVT::v16i8)
return SelectStore(Node, 4, AArch64::ST1Fourv16b);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectStore(Node, 4, AArch64::ST1Fourv4h);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectStore(Node, 4, AArch64::ST1Fourv8h);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectStore(Node, 4, AArch64::ST1Fourv2s);
@@ -2505,9 +2573,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectStore(Node, 2, AArch64::ST2Twov8b);
else if (VT == MVT::v16i8)
return SelectStore(Node, 2, AArch64::ST2Twov16b);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectStore(Node, 2, AArch64::ST2Twov4h);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectStore(Node, 2, AArch64::ST2Twov8h);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectStore(Node, 2, AArch64::ST2Twov2s);
@@ -2524,9 +2592,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectStore(Node, 3, AArch64::ST3Threev8b);
else if (VT == MVT::v16i8)
return SelectStore(Node, 3, AArch64::ST3Threev16b);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectStore(Node, 3, AArch64::ST3Threev4h);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectStore(Node, 3, AArch64::ST3Threev8h);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectStore(Node, 3, AArch64::ST3Threev2s);
@@ -2543,9 +2611,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectStore(Node, 4, AArch64::ST4Fourv8b);
else if (VT == MVT::v16i8)
return SelectStore(Node, 4, AArch64::ST4Fourv16b);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectStore(Node, 4, AArch64::ST4Fourv4h);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectStore(Node, 4, AArch64::ST4Fourv8h);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectStore(Node, 4, AArch64::ST4Fourv2s);
@@ -2560,7 +2628,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::aarch64_neon_st2lane: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectStoreLane(Node, 2, AArch64::ST2i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectStoreLane(Node, 2, AArch64::ST2i16);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2573,7 +2642,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::aarch64_neon_st3lane: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectStoreLane(Node, 3, AArch64::ST3i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectStoreLane(Node, 3, AArch64::ST3i16);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2586,7 +2656,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::aarch64_neon_st4lane: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectStoreLane(Node, 4, AArch64::ST4i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectStoreLane(Node, 4, AArch64::ST4i16);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2603,9 +2674,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
@@ -2622,9 +2693,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
@@ -2641,9 +2712,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
@@ -2660,9 +2731,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
@@ -2679,9 +2750,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
@@ -2698,9 +2769,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
@@ -2717,9 +2788,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
@@ -2736,9 +2807,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
@@ -2755,9 +2826,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
@@ -2774,9 +2845,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
@@ -2791,7 +2862,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case AArch64ISD::LD1LANEpost: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2804,7 +2876,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case AArch64ISD::LD2LANEpost: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2817,7 +2890,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case AArch64ISD::LD3LANEpost: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2830,7 +2904,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case AArch64ISD::LD4LANEpost: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2846,9 +2921,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
else if (VT == MVT::v16i8)
return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
@@ -2866,9 +2941,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
else if (VT == MVT::v16i8)
return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
@@ -2886,9 +2961,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
else if (VT == MVT::v16i8)
return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
@@ -2906,9 +2981,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
else if (VT == MVT::v16i8)
return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
@@ -2926,9 +3001,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
else if (VT == MVT::v16i8)
return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
@@ -2946,9 +3021,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
else if (VT == MVT::v16i8)
return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
@@ -2964,7 +3039,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
VT = Node->getOperand(1).getValueType();
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2978,7 +3054,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
VT = Node->getOperand(1).getValueType();
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2992,7 +3069,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
VT = Node->getOperand(1).getValueType();
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index f2004ea1899d..0d44f992a2ab 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -12,9 +12,10 @@
//===----------------------------------------------------------------------===//
#include "AArch64ISelLowering.h"
+#include "AArch64CallingConvention.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64PerfectShuffle.h"
#include "AArch64Subtarget.h"
-#include "AArch64MachineFunctionInfo.h"
#include "AArch64TargetMachine.h"
#include "AArch64TargetObjectFile.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
@@ -38,10 +39,12 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumShiftInserts, "Number of vector shift inserts");
+namespace {
enum AlignMode {
StrictAlign,
NoStrictAlign
};
+}
static cl::opt<AlignMode>
Align(cl::desc("Load/store alignment support"),
@@ -64,18 +67,9 @@ EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
cl::desc("Allow AArch64 SLI/SRI formation"),
cl::init(false));
-//===----------------------------------------------------------------------===//
-// AArch64 Lowering public interface.
-//===----------------------------------------------------------------------===//
-static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
- if (TT.isOSBinFormatMachO())
- return new AArch64_MachoTargetObjectFile();
-
- return new AArch64_ELFTargetObjectFile();
-}
-AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM)
- : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
+AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
+ : TargetLowering(TM) {
Subtarget = &TM.getSubtarget<AArch64Subtarget>();
// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
@@ -106,6 +100,7 @@ AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM)
addDRTypeForNEON(MVT::v2i32);
addDRTypeForNEON(MVT::v1i64);
addDRTypeForNEON(MVT::v1f64);
+ addDRTypeForNEON(MVT::v4f16);
addQRTypeForNEON(MVT::v4f32);
addQRTypeForNEON(MVT::v2f64);
@@ -113,6 +108,7 @@ AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM)
addQRTypeForNEON(MVT::v8i16);
addQRTypeForNEON(MVT::v4i32);
addQRTypeForNEON(MVT::v2i64);
+ addQRTypeForNEON(MVT::v8f16);
}
// Compute derived properties from the register classes
@@ -278,6 +274,94 @@ AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+ // f16 is storage-only, so we promote operations to f32 if we know this is
+ // valid, and ignore them otherwise. The operations not mentioned here will
+ // fail to select, but this is not a major problem as no source language
+ // should be emitting native f16 operations yet.
+ setOperationAction(ISD::FADD, MVT::f16, Promote);
+ setOperationAction(ISD::FDIV, MVT::f16, Promote);
+ setOperationAction(ISD::FMUL, MVT::f16, Promote);
+ setOperationAction(ISD::FSUB, MVT::f16, Promote);
+
+ // v4f16 is also a storage-only type, so promote it to v4f32 when that is
+ // known to be safe.
+ setOperationAction(ISD::FADD, MVT::v4f16, Promote);
+ setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
+ setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
+ setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote);
+ setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote);
+ AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
+ AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
+ AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
+ AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
+ AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32);
+ AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32);
+
+ // Expand all other v4f16 operations.
+ // FIXME: We could generate better code by promoting some operations to
+ // a pair of v4f32s
+ setOperationAction(ISD::FABS, MVT::v4f16, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
+ setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
+ setOperationAction(ISD::FMA, MVT::v4f16, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
+ setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
+ setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
+ setOperationAction(ISD::FREM, MVT::v4f16, Expand);
+ setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
+ setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
+ setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
+ setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
+ setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
+ setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
+ setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
+
+
+ // v8f16 is also a storage-only type, so expand it.
+ setOperationAction(ISD::FABS, MVT::v8f16, Expand);
+ setOperationAction(ISD::FADD, MVT::v8f16, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
+ setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
+ setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
+ setOperationAction(ISD::FMA, MVT::v8f16, Expand);
+ setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
+ setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
+ setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
+ setOperationAction(ISD::FREM, MVT::v8f16, Expand);
+ setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
+ setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
+ setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
+ setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
+ setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
+ setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
+ setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
+ setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
+
// AArch64 has implementations of a lot of rounding-like FP operations.
static MVT RoundingTypes[] = { MVT::f32, MVT::f64};
for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) {
@@ -303,13 +387,24 @@ AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM)
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
}
+ // Make floating-point constants legal for the large code model, so they don't
+ // become loads from the constant pool.
+ if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+ }
+
// AArch64 does not have floating-point extending loads, i1 sign-extending
// load, floating-point truncating stores, or v2i32->v2i16 truncating store.
- setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand);
+ for (MVT VT : MVT::fp_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
+ }
+ for (MVT VT : MVT::integer_valuetypes())
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
+
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
@@ -439,30 +534,31 @@ AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM)
// AArch64 doesn't have MUL.2d:
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+ // Custom handling for some quad-vector types to detect MULL.
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+
setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
// Likewise, narrowing and extending vector loads/stores aren't handled
// directly.
- for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,
- Expand);
-
- setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
-
- setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
-
- for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
- setTruncStoreAction((MVT::SimpleValueType)VT,
- (MVT::SimpleValueType)InnerVT, Expand);
- setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
- setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ for (MVT VT : MVT::vector_valuetypes()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
+
+ setOperationAction(ISD::MULHS, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
+ setOperationAction(ISD::BSWAP, VT, Expand);
+
+ for (MVT InnerVT : MVT::vector_valuetypes()) {
+ setTruncStoreAction(VT, InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
+ }
}
// AArch64 has implementations of a lot of rounding-like FP operations.
@@ -477,16 +573,20 @@ AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM)
setOperationAction(ISD::FROUND, Ty, Legal);
}
}
+
+ // Prefer likely predicted branches to selects on out-of-order cores.
+ if (Subtarget->isCortexA57())
+ PredictableSelectIsExpensive = true;
}
void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
- if (VT == MVT::v2f32) {
+ if (VT == MVT::v2f32 || VT == MVT::v4f16) {
setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i32);
setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
AddPromotedToType(ISD::STORE, VT.getSimpleVT(), MVT::v2i32);
- } else if (VT == MVT::v2f64 || VT == MVT::v4f32) {
+ } else if (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16) {
setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i64);
@@ -523,7 +623,8 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
setOperationAction(ISD::VSELECT, VT.getSimpleVT(), Expand);
- setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
+ for (MVT InnerVT : MVT::all_valuetypes())
+ setLoadExtAction(ISD::EXTLOAD, InnerVT, VT.getSimpleVT(), Expand);
// CNT supports only B element sizes.
if (VT != MVT::v8i8 && VT != MVT::v16i8)
@@ -727,6 +828,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
+ case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
@@ -756,6 +858,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
+ case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
+ case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
}
}
@@ -774,7 +878,8 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
// EndBB:
// Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineFunction *MF = MBB->getParent();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
DebugLoc DL = MI->getDebugLoc();
@@ -1020,6 +1125,8 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) {
+ SDValue Cmp;
+ AArch64CC::CondCode AArch64CC;
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
EVT VT = RHS.getValueType();
uint64_t C = RHSC->getZExtValue();
@@ -1051,9 +1158,9 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
break;
case ISD::SETLE:
case ISD::SETGT:
- if ((VT == MVT::i32 && C != 0x7fffffff &&
+ if ((VT == MVT::i32 && C != INT32_MAX &&
isLegalArithImmed((uint32_t)(C + 1))) ||
- (VT == MVT::i64 && C != 0x7ffffffffffffffULL &&
+ (VT == MVT::i64 && C != INT64_MAX &&
isLegalArithImmed(C + 1ULL))) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
@@ -1062,9 +1169,9 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
break;
case ISD::SETULE:
case ISD::SETUGT:
- if ((VT == MVT::i32 && C != 0xffffffff &&
+ if ((VT == MVT::i32 && C != UINT32_MAX &&
isLegalArithImmed((uint32_t)(C + 1))) ||
- (VT == MVT::i64 && C != 0xfffffffffffffffULL &&
+ (VT == MVT::i64 && C != UINT64_MAX &&
isLegalArithImmed(C + 1ULL))) {
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
@@ -1074,9 +1181,45 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
}
}
}
-
- SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
- AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
+ // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
+ // For the i8 operand, the largest immediate is 255, so this can be easily
+ // encoded in the compare instruction. For the i16 operand, however, the
+ // largest immediate cannot be encoded in the compare.
+ // Therefore, use a sign extending load and cmn to avoid materializing the -1
+ // constant. For example,
+ // movz w1, #65535
+ // ldrh w0, [x0, #0]
+ // cmp w0, w1
+ // >
+ // ldrsh w0, [x0, #0]
+ // cmn w0, #1
+ // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
+ // if and only if (sext LHS) == (sext RHS). The checks are in place to ensure
+ // both the LHS and RHS are truely zero extended and to make sure the
+ // transformation is profitable.
+ if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
+ if ((cast<ConstantSDNode>(RHS)->getZExtValue() >> 16 == 0) &&
+ isa<LoadSDNode>(LHS)) {
+ if (cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
+ cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
+ LHS.getNode()->hasNUsesOfValue(1, 0)) {
+ int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
+ if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
+ SDValue SExt =
+ DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
+ DAG.getValueType(MVT::i16));
+ Cmp = emitComparison(SExt,
+ DAG.getConstant(ValueofRHS, RHS.getValueType()),
+ CC, dl, DAG);
+ AArch64CC = changeIntCCToAArch64CC(CC);
+ AArch64cc = DAG.getConstant(AArch64CC, MVT::i32);
+ return Cmp;
+ }
+ }
+ }
+ }
+ Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
+ AArch64CC = changeIntCCToAArch64CC(CC);
AArch64cc = DAG.getConstant(AArch64CC, MVT::i32);
return Cmp;
}
@@ -1333,8 +1476,7 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
- // The data thing is not used.
- // unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+ unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
bool IsStream = !Locality;
// When the locality number is set
@@ -1349,6 +1491,7 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
// built the mask value encoding the expected behavior.
unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
+ (!IsData << 3) | // IsDataCache bit
(Locality << 1) | // Cache level bits
(unsigned)IsStream; // Stream bit
return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
@@ -1400,7 +1543,10 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
if (VT.getSizeInBits() > InVT.getSizeInBits()) {
SDLoc dl(Op);
- SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, Op.getOperand(0));
+ MVT ExtVT =
+ MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
+ VT.getVectorNumElements());
+ SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
}
@@ -1505,7 +1651,7 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
(ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
- StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+ StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
.setCallee(CallingConv::Fast, RetTy, Callee, std::move(Args), 0);
@@ -1529,6 +1675,197 @@ static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
0);
}
+static EVT getExtensionTo64Bits(const EVT &OrigVT) {
+ if (OrigVT.getSizeInBits() >= 64)
+ return OrigVT;
+
+ assert(OrigVT.isSimple() && "Expecting a simple value type");
+
+ MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
+ switch (OrigSimpleTy) {
+ default: llvm_unreachable("Unexpected Vector Type");
+ case MVT::v2i8:
+ case MVT::v2i16:
+ return MVT::v2i32;
+ case MVT::v4i8:
+ return MVT::v4i16;
+ }
+}
+
+static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
+ const EVT &OrigTy,
+ const EVT &ExtTy,
+ unsigned ExtOpcode) {
+ // The vector originally had a size of OrigTy. It was then extended to ExtTy.
+ // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
+ // 64-bits we need to insert a new extension so that it will be 64-bits.
+ assert(ExtTy.is128BitVector() && "Unexpected extension size");
+ if (OrigTy.getSizeInBits() >= 64)
+ return N;
+
+ // Must extend size to at least 64 bits to be used as an operand for VMULL.
+ EVT NewVT = getExtensionTo64Bits(OrigTy);
+
+ return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
+}
+
+static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
+ bool isSigned) {
+ EVT VT = N->getValueType(0);
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDNode *Elt = N->getOperand(i).getNode();
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ unsigned HalfSize = EltSize / 2;
+ if (isSigned) {
+ if (!isIntN(HalfSize, C->getSExtValue()))
+ return false;
+ } else {
+ if (!isUIntN(HalfSize, C->getZExtValue()))
+ return false;
+ }
+ continue;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
+ return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
+ N->getOperand(0)->getValueType(0),
+ N->getValueType(0),
+ N->getOpcode());
+
+ assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
+ EVT VT = N->getValueType(0);
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT TruncVT = MVT::getIntegerVT(EltSize);
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
+ const APInt &CInt = C->getAPIntValue();
+ // Element types smaller than 32 bits are not legal, so use i32 elements.
+ // The values are implicitly truncated so sext vs. zext doesn't matter.
+ Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
+ MVT::getVectorVT(TruncVT, NumElts), Ops);
+}
+
+static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND)
+ return true;
+ if (isExtendedBUILD_VECTOR(N, DAG, true))
+ return true;
+ return false;
+}
+
+static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::ZERO_EXTEND)
+ return true;
+ if (isExtendedBUILD_VECTOR(N, DAG, false))
+ return true;
+ return false;
+}
+
+static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+ SDNode *N0 = N->getOperand(0).getNode();
+ SDNode *N1 = N->getOperand(1).getNode();
+ return N0->hasOneUse() && N1->hasOneUse() &&
+ isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
+ }
+ return false;
+}
+
+static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+ SDNode *N0 = N->getOperand(0).getNode();
+ SDNode *N1 = N->getOperand(1).getNode();
+ return N0->hasOneUse() && N1->hasOneUse() &&
+ isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
+ }
+ return false;
+}
+
+static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
+ // Multiplications are only custom-lowered for 128-bit vectors so that
+ // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
+ EVT VT = Op.getValueType();
+ assert(VT.is128BitVector() && VT.isInteger() &&
+ "unexpected type for custom-lowering ISD::MUL");
+ SDNode *N0 = Op.getOperand(0).getNode();
+ SDNode *N1 = Op.getOperand(1).getNode();
+ unsigned NewOpc = 0;
+ bool isMLA = false;
+ bool isN0SExt = isSignExtended(N0, DAG);
+ bool isN1SExt = isSignExtended(N1, DAG);
+ if (isN0SExt && isN1SExt)
+ NewOpc = AArch64ISD::SMULL;
+ else {
+ bool isN0ZExt = isZeroExtended(N0, DAG);
+ bool isN1ZExt = isZeroExtended(N1, DAG);
+ if (isN0ZExt && isN1ZExt)
+ NewOpc = AArch64ISD::UMULL;
+ else if (isN1SExt || isN1ZExt) {
+ // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
+ // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
+ if (isN1SExt && isAddSubSExt(N0, DAG)) {
+ NewOpc = AArch64ISD::SMULL;
+ isMLA = true;
+ } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
+ NewOpc = AArch64ISD::UMULL;
+ isMLA = true;
+ } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
+ std::swap(N0, N1);
+ NewOpc = AArch64ISD::UMULL;
+ isMLA = true;
+ }
+ }
+
+ if (!NewOpc) {
+ if (VT == MVT::v2i64)
+ // Fall through to expand this. It is not legal.
+ return SDValue();
+ else
+ // Other vector multiplications are legal.
+ return Op;
+ }
+ }
+
+ // Legalize to a S/UMULL instruction
+ SDLoc DL(Op);
+ SDValue Op0;
+ SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
+ if (!isMLA) {
+ Op0 = skipExtensionForVectorMULL(N0, DAG);
+ assert(Op0.getValueType().is64BitVector() &&
+ Op1.getValueType().is64BitVector() &&
+ "unexpected types for extended operands to VMULL");
+ return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+ }
+ // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
+ // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
+ // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
+ SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
+ SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
+ EVT Op1VT = Op1.getValueType();
+ return DAG.getNode(N0->getOpcode(), DL, VT,
+ DAG.getNode(NewOpc, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
+ DAG.getNode(NewOpc, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
+}
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
@@ -1629,6 +1966,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerFP_TO_INT(Op, DAG);
case ISD::FSINCOS:
return LowerFSINCOS(Op, DAG);
+ case ISD::MUL:
+ return LowerMUL(Op, DAG);
}
}
@@ -1643,8 +1982,7 @@ unsigned AArch64TargetLowering::getFunctionAlignment(const Function *F) const {
#include "AArch64GenCallingConv.inc"
-/// Selects the correct CCAssignFn for a the given CallingConvention
-/// value.
+/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
bool IsVarArg) const {
switch (CC) {
@@ -1669,8 +2007,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
// At this point, Ins[].VT may already be promoted to i32. To correctly
// handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
@@ -1774,10 +2112,11 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
} else { // VA.isRegLoc()
assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
unsigned ArgOffset = VA.getLocMemOffset();
- unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
+ unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
uint32_t BEAlign = 0;
- if (ArgSize < 8 && !Subtarget->isLittleEndian())
+ if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
+ !Ins[i].Flags.isInConsecutiveRegs())
BEAlign = 8 - ArgSize;
int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
@@ -1809,7 +2148,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
ArgValue = DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
- MemVT, false, false, false, nullptr);
+ MemVT, false, false, false, 0);
InVals.push_back(ArgValue);
}
@@ -1941,8 +2280,8 @@ SDValue AArch64TargetLowering::LowerCallResult(
: RetCC_AArch64_AAPCS;
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC);
// Copy all of the result registers out of their specified physreg.
@@ -2011,6 +2350,21 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
return false;
}
+ // Externally-defined functions with weak linkage should not be
+ // tail-called on AArch64 when the OS does not support dynamic
+ // pre-emption of symbols, as the AAELF spec requires normal calls
+ // to undefined weak functions to be replaced with a NOP or jump to the
+ // next instruction. The behaviour of branch instructions in this
+ // situation (as used for tail calls) is implementation-defined, so we
+ // cannot rely on the linker replacing the tail call with a return.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ const Triple TT(getTargetMachine().getTargetTriple());
+ if (GV->hasExternalWeakLinkage() &&
+ (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
+ return false;
+ }
+
// Now we search for cases where we can use a tail call without changing the
// ABI. Sibcall is used in some places (particularly gcc) to refer to this
// concept.
@@ -2028,8 +2382,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
// FIXME: for now we take the most conservative of these in both cases:
// disallow all variadic memory operands.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
@@ -2041,13 +2395,13 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
// results are returned in the same way as what the caller expects.
if (!CCMatch) {
SmallVector<CCValAssign, 16> RVLocs1;
- CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs1, *DAG.getContext());
+ CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
+ *DAG.getContext());
CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForCall(CalleeCC, isVarArg));
SmallVector<CCValAssign, 16> RVLocs2;
- CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs2, *DAG.getContext());
+ CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
+ *DAG.getContext());
CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForCall(CallerCC, isVarArg));
if (RVLocs1.size() != RVLocs2.size())
@@ -2072,8 +2426,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
return true;
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
@@ -2170,8 +2524,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
if (IsVarArg) {
// Handle fixed and variable vector arguments differently.
@@ -2316,9 +2670,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// common case. It should also work for fundamental types too.
uint32_t BEAlign = 0;
unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
- : VA.getLocVT().getSizeInBits();
+ : VA.getValVT().getSizeInBits();
OpSize = (OpSize + 7) / 8;
- if (!Subtarget->isLittleEndian() && !Flags.isByVal()) {
+ if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
+ !Flags.isInConsecutiveRegs()) {
if (OpSize < 8)
BEAlign = 8 - OpSize;
}
@@ -2350,8 +2705,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
DAG.getConstant(Outs[i].Flags.getByValSize(), MVT::i64);
SDValue Cpy = DAG.getMemcpy(
Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
- /*isVolatile = */ false,
- /*alwaysInline = */ false, DstInfo, MachinePointerInfo());
+ /*isVol = */ false,
+ /*AlwaysInline = */ false, DstInfo, MachinePointerInfo());
MemOpChains.push_back(Cpy);
} else {
@@ -2440,7 +2795,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Add a register mask operand representing the call-preserved registers.
const uint32_t *Mask;
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const AArch64RegisterInfo *ARI =
static_cast<const AArch64RegisterInfo *>(TRI);
if (IsThisReturn) {
@@ -2494,7 +2850,7 @@ bool AArch64TargetLowering::CanLowerReturn(
? RetCC_AArch64_WebKit_JS
: RetCC_AArch64_AAPCS;
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC);
}
@@ -2508,8 +2864,8 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
? RetCC_AArch64_WebKit_JS
: RetCC_AArch64_AAPCS;
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC);
// Copy the result values into the output registers.
@@ -2560,7 +2916,8 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
SDLoc DL(Op);
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *GV = GN->getGlobal();
unsigned char OpFlags =
Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
@@ -2575,6 +2932,25 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr);
}
+ if ((OpFlags & AArch64II::MO_CONSTPOOL) != 0) {
+ assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
+ "use of MO_CONSTPOOL only supported on small model");
+ SDValue Hi = DAG.getTargetConstantPool(GV, PtrVT, 0, 0, AArch64II::MO_PAGE);
+ SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
+ unsigned char LoFlags = AArch64II::MO_PAGEOFF | AArch64II::MO_NC;
+ SDValue Lo = DAG.getTargetConstantPool(GV, PtrVT, 0, 0, LoFlags);
+ SDValue PoolAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+ SDValue GlobalAddr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), PoolAddr,
+ MachinePointerInfo::getConstantPool(),
+ /*isVolatile=*/ false,
+ /*isNonTemporal=*/ true,
+ /*isInvariant=*/ true, 8);
+ if (GN->getOffset() != 0)
+ return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalAddr,
+ DAG.getConstant(GN->getOffset(), PtrVT));
+ return GlobalAddr;
+ }
+
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
const unsigned char MO_NC = AArch64II::MO_NC;
return DAG.getNode(
@@ -2651,7 +3027,8 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const AArch64RegisterInfo *ARI =
static_cast<const AArch64RegisterInfo *>(TRI);
const uint32_t *Mask = ARI->getTLSCallPreservedMask();
@@ -2701,7 +3078,8 @@ SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const AArch64RegisterInfo *ARI =
static_cast<const AArch64RegisterInfo *>(TRI);
const uint32_t *Mask = ARI->getTLSCallPreservedMask();
@@ -2916,11 +3294,6 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
isPowerOf2_64(LHS.getConstantOperandVal(1))) {
SDValue Test = LHS.getOperand(0);
uint64_t Mask = LHS.getConstantOperandVal(1);
-
- // TBZ only operates on i64's, but the ext should be free.
- if (Test.getValueType() == MVT::i32)
- Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64);
-
return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
}
@@ -2936,18 +3309,29 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
isPowerOf2_64(LHS.getConstantOperandVal(1))) {
SDValue Test = LHS.getOperand(0);
uint64_t Mask = LHS.getConstantOperandVal(1);
-
- // TBNZ only operates on i64's, but the ext should be free.
- if (Test.getValueType() == MVT::i32)
- Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64);
-
return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
}
return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
+ } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
+ // Don't combine AND since emitComparison converts the AND to an ANDS
+ // (a.k.a. TST) and the test in the test bit and branch instruction
+ // becomes redundant. This would also increase register pressure.
+ uint64_t Mask = LHS.getValueType().getSizeInBits() - 1;
+ return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
+ DAG.getConstant(Mask, MVT::i64), Dest);
}
}
+ if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
+ LHS.getOpcode() != ISD::AND) {
+ // Don't combine AND since emitComparison converts the AND to an ANDS
+ // (a.k.a. TST) and the test in the test bit and branch instruction
+ // becomes redundant. This would also increase register pressure.
+ uint64_t Mask = LHS.getValueType().getSizeInBits() - 1;
+ return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
+ DAG.getConstant(Mask, MVT::i64), Dest);
+ }
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
@@ -3062,6 +3446,9 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
AttributeSet::FunctionIndex, Attribute::NoImplicitFloat))
return SDValue();
+ if (!Subtarget->hasNEON())
+ return SDValue();
+
// While there is no integer popcount instruction, it can
// be more efficiently lowered to the following sequence that uses
// AdvSIMD registers/instructions as long as the copies to/from
@@ -4013,8 +4400,10 @@ void AArch64TargetLowering::LowerAsmOperandForConstraint(
return;
case 'J': {
uint64_t NVal = -C->getSExtValue();
- if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal))
+ if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
+ CVal = C->getSExtValue();
break;
+ }
return;
}
// The K and L constraints apply *only* to logical immediates, including
@@ -4138,10 +4527,30 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements();
- SmallVector<SDValue, 2> SourceVecs;
- SmallVector<unsigned, 2> MinElts;
- SmallVector<unsigned, 2> MaxElts;
+ struct ShuffleSourceInfo {
+ SDValue Vec;
+ unsigned MinElt;
+ unsigned MaxElt;
+
+ // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
+ // be compatible with the shuffle we intend to construct. As a result
+ // ShuffleVec will be some sliding window into the original Vec.
+ SDValue ShuffleVec;
+
+ // Code should guarantee that element i in Vec starts at element "WindowBase
+ // + i * WindowScale in ShuffleVec".
+ int WindowBase;
+ int WindowScale;
+
+ bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
+ ShuffleSourceInfo(SDValue Vec)
+ : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),
+ WindowScale(1) {}
+ };
+ // First gather all vectors used as an immediate source for this BUILD_VECTOR
+ // node.
+ SmallVector<ShuffleSourceInfo, 2> Sources;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.getOpcode() == ISD::UNDEF)
@@ -4152,133 +4561,155 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
return SDValue();
}
- // Record this extraction against the appropriate vector if possible...
+ // Add this element source to the list if it's not already there.
SDValue SourceVec = V.getOperand(0);
- unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
- bool FoundSource = false;
- for (unsigned j = 0; j < SourceVecs.size(); ++j) {
- if (SourceVecs[j] == SourceVec) {
- if (MinElts[j] > EltNo)
- MinElts[j] = EltNo;
- if (MaxElts[j] < EltNo)
- MaxElts[j] = EltNo;
- FoundSource = true;
- break;
- }
- }
+ auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);
+ if (Source == Sources.end())
+ Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
- // Or record a new source if not...
- if (!FoundSource) {
- SourceVecs.push_back(SourceVec);
- MinElts.push_back(EltNo);
- MaxElts.push_back(EltNo);
- }
+ // Update the minimum and maximum lane number seen.
+ unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
+ Source->MinElt = std::min(Source->MinElt, EltNo);
+ Source->MaxElt = std::max(Source->MaxElt, EltNo);
}
// Currently only do something sane when at most two source vectors
- // involved.
- if (SourceVecs.size() > 2)
+ // are involved.
+ if (Sources.size() > 2)
return SDValue();
- SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
- int VEXTOffsets[2] = { 0, 0 };
- int OffsetMultipliers[2] = { 1, 1 };
-
- // This loop extracts the usage patterns of the source vectors
- // and prepares appropriate SDValues for a shuffle if possible.
- for (unsigned i = 0; i < SourceVecs.size(); ++i) {
- unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements();
- SDValue CurSource = SourceVecs[i];
- if (SourceVecs[i].getValueType().getVectorElementType() !=
- VT.getVectorElementType()) {
- // It may hit this case if SourceVecs[i] is AssertSext/AssertZext.
- // Then bitcast it to the vector which holds asserted element type,
- // and record the multiplier of element width between SourceVecs and
- // Build_vector which is needed to extract the correct lanes later.
- EVT CastVT =
- EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
- SourceVecs[i].getValueSizeInBits() /
- VT.getVectorElementType().getSizeInBits());
-
- CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]);
- OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts;
- NumSrcElts *= OffsetMultipliers[i];
- MaxElts[i] *= OffsetMultipliers[i];
- MinElts[i] *= OffsetMultipliers[i];
+ // Find out the smallest element size among result and two sources, and use
+ // it as element size to build the shuffle_vector.
+ EVT SmallestEltTy = VT.getVectorElementType();
+ for (auto &Source : Sources) {
+ EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
+ if (SrcEltTy.bitsLT(SmallestEltTy)) {
+ SmallestEltTy = SrcEltTy;
}
+ }
+ unsigned ResMultiplier =
+ VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
+ NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
+ EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
- if (CurSource.getValueType() == VT) {
- // No VEXT necessary
- ShuffleSrcs[i] = CurSource;
- VEXTOffsets[i] = 0;
+ // If the source vector is too wide or too narrow, we may nevertheless be able
+ // to construct a compatible shuffle either by concatenating it with UNDEF or
+ // extracting a suitable range of elements.
+ for (auto &Src : Sources) {
+ EVT SrcVT = Src.ShuffleVec.getValueType();
+
+ if (SrcVT.getSizeInBits() == VT.getSizeInBits())
continue;
- } else if (NumSrcElts < NumElts) {
+
+ // This stage of the search produces a source with the same element type as
+ // the original, but with a total width matching the BUILD_VECTOR output.
+ EVT EltVT = SrcVT.getVectorElementType();
+ unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
+ EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
+
+ if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
+ assert(2 * SrcVT.getSizeInBits() == VT.getSizeInBits());
// We can pad out the smaller vector for free, so if it's part of a
// shuffle...
- ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, CurSource,
- DAG.getUNDEF(CurSource.getValueType()));
+ Src.ShuffleVec =
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
+ DAG.getUNDEF(Src.ShuffleVec.getValueType()));
continue;
}
- // Since only 64-bit and 128-bit vectors are legal on ARM and
- // we've eliminated the other cases...
- assert(NumSrcElts == 2 * NumElts &&
- "unexpected vector sizes in ReconstructShuffle");
+ assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits());
- if (MaxElts[i] - MinElts[i] >= NumElts) {
+ if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
// Span too large for a VEXT to cope
return SDValue();
}
- if (MinElts[i] >= NumElts) {
+ if (Src.MinElt >= NumSrcElts) {
// The extraction can just take the second half
- VEXTOffsets[i] = NumElts;
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
- DAG.getIntPtrConstant(NumElts));
- } else if (MaxElts[i] < NumElts) {
+ Src.ShuffleVec =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(NumSrcElts, MVT::i64));
+ Src.WindowBase = -NumSrcElts;
+ } else if (Src.MaxElt < NumSrcElts) {
// The extraction can just take the first half
- VEXTOffsets[i] = 0;
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
- DAG.getIntPtrConstant(0));
+ Src.ShuffleVec =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(0, MVT::i64));
} else {
// An actual VEXT is needed
- VEXTOffsets[i] = MinElts[i];
- SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
- DAG.getIntPtrConstant(0));
- SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
- DAG.getIntPtrConstant(NumElts));
- unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
- ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
- DAG.getConstant(Imm, MVT::i32));
+ SDValue VEXTSrc1 =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(0, MVT::i64));
+ SDValue VEXTSrc2 =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(NumSrcElts, MVT::i64));
+ unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
+
+ Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
+ VEXTSrc2, DAG.getConstant(Imm, MVT::i32));
+ Src.WindowBase = -Src.MinElt;
}
}
- SmallVector<int, 8> Mask;
-
- for (unsigned i = 0; i < NumElts; ++i) {
+ // Another possible incompatibility occurs from the vector element types. We
+ // can fix this by bitcasting the source vectors to the same type we intend
+ // for the shuffle.
+ for (auto &Src : Sources) {
+ EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
+ if (SrcEltTy == SmallestEltTy)
+ continue;
+ assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
+ Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
+ Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
+ Src.WindowBase *= Src.WindowScale;
+ }
+
+ // Final sanity check before we try to actually produce a shuffle.
+ DEBUG(
+ for (auto Src : Sources)
+ assert(Src.ShuffleVec.getValueType() == ShuffleVT);
+ );
+
+ // The stars all align, our next step is to produce the mask for the shuffle.
+ SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
+ int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits();
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
SDValue Entry = Op.getOperand(i);
- if (Entry.getOpcode() == ISD::UNDEF) {
- Mask.push_back(-1);
+ if (Entry.getOpcode() == ISD::UNDEF)
continue;
- }
- SDValue ExtractVec = Entry.getOperand(0);
- int ExtractElt =
- cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
- if (ExtractVec == SourceVecs[0]) {
- Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]);
- } else {
- Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts -
- VEXTOffsets[1]);
- }
+ auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0));
+ int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
+
+ // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
+ // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
+ // segment.
+ EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
+ int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
+ VT.getVectorElementType().getSizeInBits());
+ int LanesDefined = BitsDefined / BitsPerShuffleLane;
+
+ // This source is expected to fill ResMultiplier lanes of the final shuffle,
+ // starting at the appropriate offset.
+ int *LaneMask = &Mask[i * ResMultiplier];
+
+ int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
+ ExtractBase += NumElts * (Src - Sources.begin());
+ for (int j = 0; j < LanesDefined; ++j)
+ LaneMask[j] = ExtractBase + j;
}
// Final check before we try to produce nonsense...
- if (isShuffleMaskLegal(Mask, VT))
- return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
- &Mask[0]);
+ if (!isShuffleMaskLegal(Mask, ShuffleVT))
+ return SDValue();
- return SDValue();
+ SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
+ for (unsigned i = 0; i < Sources.size(); ++i)
+ ShuffleOps[i] = Sources[i].ShuffleVec;
+
+ SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
+ ShuffleOps[1], &Mask[0]);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
}
// check if an EXT instruction can handle the shuffle mask when the
@@ -4607,7 +5038,8 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
VT.getVectorElementType() == MVT::f32)
return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
// vrev <4 x i16> -> REV32
- if (VT.getVectorElementType() == MVT::i16)
+ if (VT.getVectorElementType() == MVT::i16 ||
+ VT.getVectorElementType() == MVT::f16)
return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
// vrev <4 x i8> -> REV16
assert(VT.getVectorElementType() == MVT::i8);
@@ -4727,7 +5159,7 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
static unsigned getDUPLANEOp(EVT EltType) {
if (EltType == MVT::i8)
return AArch64ISD::DUPLANE8;
- if (EltType == MVT::i16)
+ if (EltType == MVT::i16 || EltType == MVT::f16)
return AArch64ISD::DUPLANE16;
if (EltType == MVT::i32 || EltType == MVT::f32)
return AArch64ISD::DUPLANE32;
@@ -4857,7 +5289,8 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SDValue SrcLaneV = DAG.getConstant(SrcLane, MVT::i64);
EVT ScalarVT = VT.getVectorElementType();
- if (ScalarVT.getSizeInBits() < 32)
+
+ if (ScalarVT.getSizeInBits() < 32 && ScalarVT.isInteger())
ScalarVT = MVT::i32;
return DAG.getNode(
@@ -4945,7 +5378,7 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
@@ -4954,7 +5387,7 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
@@ -4963,7 +5396,7 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(16, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
@@ -4972,7 +5405,7 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(24, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
@@ -4981,7 +5414,7 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
@@ -4990,7 +5423,7 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
@@ -5145,7 +5578,7 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
@@ -5154,7 +5587,7 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
@@ -5163,7 +5596,7 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(16, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
@@ -5172,7 +5605,7 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(24, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
@@ -5181,7 +5614,7 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
@@ -5190,7 +5623,7 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
@@ -5263,13 +5696,13 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (VT.getSizeInBits() == 128) {
SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::v2i64,
DAG.getConstant(CnstVal, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
// Support the V64 version via subregister insertion.
SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::f64,
DAG.getConstant(CnstVal, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
@@ -5278,7 +5711,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
@@ -5287,7 +5720,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
@@ -5296,7 +5729,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(16, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
@@ -5305,7 +5738,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(24, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
@@ -5314,7 +5747,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
@@ -5323,7 +5756,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) {
@@ -5332,7 +5765,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(264, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) {
@@ -5341,7 +5774,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(272, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType9(CnstVal)) {
@@ -5349,7 +5782,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
// The few faces of FMOV...
@@ -5358,7 +5791,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32;
SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType12(CnstVal) &&
@@ -5366,7 +5799,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal);
SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64,
DAG.getConstant(CnstVal, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
// The many faces of MVNI...
@@ -5377,7 +5810,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
@@ -5386,7 +5819,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
@@ -5395,7 +5828,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(16, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
@@ -5404,7 +5837,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(24, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
@@ -5413,7 +5846,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
@@ -5422,7 +5855,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) {
@@ -5431,7 +5864,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(264, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) {
@@ -5440,7 +5873,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(272, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
@@ -5616,11 +6049,12 @@ SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// Insertion/extraction are legal for V128 types.
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
- VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
+ VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
+ VT == MVT::v8f16)
return Op;
if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
- VT != MVT::v1i64 && VT != MVT::v2f32)
+ VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)
return SDValue();
// For V64 types, we perform insertion by expanding the value
@@ -5649,11 +6083,12 @@ AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// Insertion/extraction are legal for V128 types.
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
- VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
+ VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
+ VT == MVT::v8f16)
return Op;
if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
- VT != MVT::v1i64 && VT != MVT::v2f32)
+ VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)
return SDValue();
// For V64 types, we perform extraction by expanding the value
@@ -6187,7 +6622,7 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::NoImplicitFloat) &&
(memOpAlign(SrcAlign, DstAlign, 16) ||
- (allowsUnalignedMemoryAccesses(MVT::f128, 0, &Fast) && Fast)))
+ (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
return MVT::f128;
return Size >= 8 ? MVT::i64 : MVT::i32;
@@ -6382,6 +6817,48 @@ static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
return performIntegerAbsCombine(N, DAG);
}
+SDValue
+AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ std::vector<SDNode *> *Created) const {
+ // fold (sdiv X, pow2)
+ EVT VT = N->getValueType(0);
+ if ((VT != MVT::i32 && VT != MVT::i64) ||
+ !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue N0 = N->getOperand(0);
+ unsigned Lg2 = Divisor.countTrailingZeros();
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, VT);
+
+ // Add (N0 < 0) ? Pow2 - 1 : 0;
+ SDValue CCVal;
+ SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
+ SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
+
+ if (Created) {
+ Created->push_back(Cmp.getNode());
+ Created->push_back(Add.getNode());
+ Created->push_back(CSel.getNode());
+ }
+
+ // Divide by pow2.
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, MVT::i64));
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (Divisor.isNonNegative())
+ return SRA;
+
+ if (Created)
+ Created->push_back(SRA.getNode());
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), SRA);
+}
+
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
@@ -6459,14 +6936,14 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
return SDValue();
- // Now check that the other operand of the AND is a constant splat. We could
+ // Now check that the other operand of the AND is a constant. We could
// make the transformation for non-constant splats as well, but it's unclear
// that would be a benefit as it would not eliminate any operations, just
// perform one more step in scalar code before moving to the vector unit.
if (BuildVectorSDNode *BV =
dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
- // Bail out if the vector isn't a constant splat.
- if (!BV->getConstantSplatNode())
+ // Bail out if the vector isn't a constant.
+ if (!BV->isConstant())
return SDValue();
// Everything checks out. Build up the new and improved node.
@@ -6486,7 +6963,8 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
return SDValue();
}
-static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
// First try to optimize away the conversion when it's conditionally from
// a constant. Vectors only.
SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
@@ -6505,7 +6983,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) {
// conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
// This eliminates an "integer-to-vector-move UOP and improve throughput.
SDValue N0 = N->getOperand(0);
- if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
// Do not change the width of a volatile load.
!cast<LoadSDNode>(N0)->isVolatile()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -7266,11 +7744,11 @@ static SDValue performExtendCombine(SDNode *N,
// If the vector type isn't a simple VT, it's beyond the scope of what
// we're worried about here. Let legalization do its thing and hope for
// the best.
- if (!ResVT.isSimple())
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src->getValueType(0);
+ if (!ResVT.isSimple() || !SrcVT.isSimple())
return SDValue();
- SDValue Src = N->getOperand(0);
- MVT SrcVT = Src->getValueType(0).getSimpleVT();
// If the source VT is a 64-bit vector, we can play games and get the
// better results we want.
if (SrcVT.getSizeInBits() != 64)
@@ -7294,9 +7772,9 @@ static SDValue performExtendCombine(SDNode *N,
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
LoVT.getVectorNumElements());
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
- DAG.getIntPtrConstant(0));
+ DAG.getConstant(0, MVT::i64));
Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
- DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ DAG.getConstant(InNVT.getVectorNumElements(), MVT::i64));
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
@@ -7418,9 +7896,9 @@ static SDValue performSTORECombine(SDNode *N,
EVT HalfVT =
EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts);
SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
- DAG.getIntPtrConstant(0));
+ DAG.getConstant(0, MVT::i64));
SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
- DAG.getIntPtrConstant(NumElts));
+ DAG.getConstant(NumElts, MVT::i64));
SDValue BasePtr = S->getBasePtr();
SDValue NewST1 =
DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
@@ -7504,7 +7982,7 @@ static SDValue performPostLD1Combine(SDNode *N,
Ops.push_back(Inc);
EVT Tys[3] = { VT, MVT::i64, MVT::Other };
- SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, 3));
+ SDVTList SDTys = DAG.getVTList(Tys);
unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
MemVT,
@@ -7634,7 +8112,7 @@ static SDValue performNEONPostLDSTCombine(SDNode *N,
Tys[n] = VecTy;
Tys[n++] = MVT::i64; // Type of write back register
Tys[n] = MVT::Other; // Type of the chain
- SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumResultVecs + 2));
+ SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
@@ -7655,10 +8133,272 @@ static SDValue performNEONPostLDSTCombine(SDNode *N,
return SDValue();
}
+// Checks to see if the value is the prescribed width and returns information
+// about its extension mode.
+static
+bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
+ ExtType = ISD::NON_EXTLOAD;
+ switch(V.getNode()->getOpcode()) {
+ default:
+ return false;
+ case ISD::LOAD: {
+ LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
+ if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
+ || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
+ ExtType = LoadNode->getExtensionType();
+ return true;
+ }
+ return false;
+ }
+ case ISD::AssertSext: {
+ VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
+ if ((TypeNode->getVT() == MVT::i8 && width == 8)
+ || (TypeNode->getVT() == MVT::i16 && width == 16)) {
+ ExtType = ISD::SEXTLOAD;
+ return true;
+ }
+ return false;
+ }
+ case ISD::AssertZext: {
+ VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
+ if ((TypeNode->getVT() == MVT::i8 && width == 8)
+ || (TypeNode->getVT() == MVT::i16 && width == 16)) {
+ ExtType = ISD::ZEXTLOAD;
+ return true;
+ }
+ return false;
+ }
+ case ISD::Constant:
+ case ISD::TargetConstant: {
+ if (std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
+ 1LL << (width - 1))
+ return true;
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// This function does a whole lot of voodoo to determine if the tests are
+// equivalent without and with a mask. Essentially what happens is that given a
+// DAG resembling:
+//
+// +-------------+ +-------------+ +-------------+ +-------------+
+// | Input | | AddConstant | | CompConstant| | CC |
+// +-------------+ +-------------+ +-------------+ +-------------+
+// | | | |
+// V V | +----------+
+// +-------------+ +----+ | |
+// | ADD | |0xff| | |
+// +-------------+ +----+ | |
+// | | | |
+// V V | |
+// +-------------+ | |
+// | AND | | |
+// +-------------+ | |
+// | | |
+// +-----+ | |
+// | | |
+// V V V
+// +-------------+
+// | CMP |
+// +-------------+
+//
+// The AND node may be safely removed for some combinations of inputs. In
+// particular we need to take into account the extension type of the Input,
+// the exact values of AddConstant, CompConstant, and CC, along with the nominal
+// width of the input (this can work for any width inputs, the above graph is
+// specific to 8 bits.
+//
+// The specific equations were worked out by generating output tables for each
+// AArch64CC value in terms of and AddConstant (w1), CompConstant(w2). The
+// problem was simplified by working with 4 bit inputs, which means we only
+// needed to reason about 24 distinct bit patterns: 8 patterns unique to zero
+// extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
+// patterns present in both extensions (0,7). For every distinct set of
+// AddConstant and CompConstants bit patterns we can consider the masked and
+// unmasked versions to be equivalent if the result of this function is true for
+// all 16 distinct bit patterns of for the current extension type of Input (w0).
+//
+// sub w8, w0, w1
+// and w10, w8, #0x0f
+// cmp w8, w2
+// cset w9, AArch64CC
+// cmp w10, w2
+// cset w11, AArch64CC
+// cmp w9, w11
+// cset w0, eq
+// ret
+//
+// Since the above function shows when the outputs are equivalent it defines
+// when it is safe to remove the AND. Unfortunately it only runs on AArch64 and
+// would be expensive to run during compiles. The equations below were written
+// in a test harness that confirmed they gave equivalent outputs to the above
+// for all inputs function, so they can be used determine if the removal is
+// legal instead.
+//
+// isEquivalentMaskless() is the code for testing if the AND can be removed
+// factored out of the DAG recognition as the DAG can take several forms.
+
+static
+bool isEquivalentMaskless(unsigned CC, unsigned width,
+ ISD::LoadExtType ExtType, signed AddConstant,
+ signed CompConstant) {
+ // By being careful about our equations and only writing the in term
+ // symbolic values and well known constants (0, 1, -1, MaxUInt) we can
+ // make them generally applicable to all bit widths.
+ signed MaxUInt = (1 << width);
+
+ // For the purposes of these comparisons sign extending the type is
+ // equivalent to zero extending the add and displacing it by half the integer
+ // width. Provided we are careful and make sure our equations are valid over
+ // the whole range we can just adjust the input and avoid writing equations
+ // for sign extended inputs.
+ if (ExtType == ISD::SEXTLOAD)
+ AddConstant -= (1 << (width-1));
+
+ switch(CC) {
+ case AArch64CC::LE:
+ case AArch64CC::GT: {
+ if ((AddConstant == 0) ||
+ (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
+ (AddConstant >= 0 && CompConstant < 0) ||
+ (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
+ return true;
+ } break;
+ case AArch64CC::LT:
+ case AArch64CC::GE: {
+ if ((AddConstant == 0) ||
+ (AddConstant >= 0 && CompConstant <= 0) ||
+ (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
+ return true;
+ } break;
+ case AArch64CC::HI:
+ case AArch64CC::LS: {
+ if ((AddConstant >= 0 && CompConstant < 0) ||
+ (AddConstant <= 0 && CompConstant >= -1 &&
+ CompConstant < AddConstant + MaxUInt))
+ return true;
+ } break;
+ case AArch64CC::PL:
+ case AArch64CC::MI: {
+ if ((AddConstant == 0) ||
+ (AddConstant > 0 && CompConstant <= 0) ||
+ (AddConstant < 0 && CompConstant <= AddConstant))
+ return true;
+ } break;
+ case AArch64CC::LO:
+ case AArch64CC::HS: {
+ if ((AddConstant >= 0 && CompConstant <= 0) ||
+ (AddConstant <= 0 && CompConstant >= 0 &&
+ CompConstant <= AddConstant + MaxUInt))
+ return true;
+ } break;
+ case AArch64CC::EQ:
+ case AArch64CC::NE: {
+ if ((AddConstant > 0 && CompConstant < 0) ||
+ (AddConstant < 0 && CompConstant >= 0 &&
+ CompConstant < AddConstant + MaxUInt) ||
+ (AddConstant >= 0 && CompConstant >= 0 &&
+ CompConstant >= AddConstant) ||
+ (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
+
+ return true;
+ } break;
+ case AArch64CC::VS:
+ case AArch64CC::VC:
+ case AArch64CC::AL:
+ case AArch64CC::NV:
+ return true;
+ case AArch64CC::Invalid:
+ break;
+ }
+
+ return false;
+}
+
+static
+SDValue performCONDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG, unsigned CCIndex,
+ unsigned CmpIndex) {
+ unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
+ SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
+ unsigned CondOpcode = SubsNode->getOpcode();
+
+ if (CondOpcode != AArch64ISD::SUBS)
+ return SDValue();
+
+ // There is a SUBS feeding this condition. Is it fed by a mask we can
+ // use?
+
+ SDNode *AndNode = SubsNode->getOperand(0).getNode();
+ unsigned MaskBits = 0;
+
+ if (AndNode->getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
+ uint32_t CNV = CN->getZExtValue();
+ if (CNV == 255)
+ MaskBits = 8;
+ else if (CNV == 65535)
+ MaskBits = 16;
+ }
+
+ if (!MaskBits)
+ return SDValue();
+
+ SDValue AddValue = AndNode->getOperand(0);
+
+ if (AddValue.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ // The basic dag structure is correct, grab the inputs and validate them.
+
+ SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
+ SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
+ SDValue SubsInputValue = SubsNode->getOperand(1);
+
+ // The mask is present and the provenance of all the values is a smaller type,
+ // lets see if the mask is superfluous.
+
+ if (!isa<ConstantSDNode>(AddInputValue2.getNode()) ||
+ !isa<ConstantSDNode>(SubsInputValue.getNode()))
+ return SDValue();
+
+ ISD::LoadExtType ExtType;
+
+ if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) ||
+ !checkValueWidth(AddInputValue2, MaskBits, ExtType) ||
+ !checkValueWidth(AddInputValue1, MaskBits, ExtType) )
+ return SDValue();
+
+ if(!isEquivalentMaskless(CC, MaskBits, ExtType,
+ cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
+ cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
+ return SDValue();
+
+ // The AND is not necessary, remove it.
+
+ SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
+ SubsNode->getValueType(1));
+ SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
+
+ SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops);
+ DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode());
+
+ return SDValue(N, 0);
+}
+
// Optimize compare with zero and branch.
static SDValue performBRCONDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
+ SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3);
+ if (NV.getNode())
+ N = NV.getNode();
SDValue Chain = N->getOperand(0);
SDValue Dest = N->getOperand(1);
SDValue CCVal = N->getOperand(2);
@@ -7747,21 +8487,29 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
EVT ResVT = N->getValueType(0);
- if (!N->getOperand(1).getValueType().isVector())
+ if (N0.getOpcode() != ISD::SETCC || N0.getValueType() != MVT::i1)
return SDValue();
- if (N0.getOpcode() != ISD::SETCC || N0.getValueType() != MVT::i1)
+ // If NumMaskElts == 0, the comparison is larger than select result. The
+ // largest real NEON comparison is 64-bits per lane, which means the result is
+ // at most 32-bits and an illegal vector. Just bail out for now.
+ EVT SrcVT = N0.getOperand(0).getValueType();
+
+ // Don't try to do this optimization when the setcc itself has i1 operands.
+ // There are no legal vectors of i1, so this would be pointless.
+ if (SrcVT == MVT::i1)
return SDValue();
- SDLoc DL(N0);
+ int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
+ if (!ResVT.isVector() || NumMaskElts == 0)
+ return SDValue();
- EVT SrcVT = N0.getOperand(0).getValueType();
- SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT,
- ResVT.getSizeInBits() / SrcVT.getSizeInBits());
+ SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
// First perform a vector comparison, where lane 0 is the one we're interested
// in.
+ SDLoc DL(N0);
SDValue LHS =
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
SDValue RHS =
@@ -7771,8 +8519,8 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
// Now duplicate the comparison mask we want across all other lanes.
SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask.data());
- Mask = DAG.getNode(ISD::BITCAST, DL, ResVT.changeVectorElementTypeToInteger(),
- Mask);
+ Mask = DAG.getNode(ISD::BITCAST, DL,
+ ResVT.changeVectorElementTypeToInteger(), Mask);
return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
}
@@ -7792,7 +8540,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performMulCombine(N, DAG, DCI, Subtarget);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
- return performIntToFpCombine(N, DAG);
+ return performIntToFpCombine(N, DAG, Subtarget);
case ISD::OR:
return performORCombine(N, DCI, Subtarget);
case ISD::INTRINSIC_WO_CHAIN:
@@ -7813,6 +8561,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performSTORECombine(N, DCI, DAG, Subtarget);
case AArch64ISD::BRCOND:
return performBRCONDCombine(N, DCI, DAG);
+ case AArch64ISD::CSEL:
+ return performCONDCombine(N, DCI, DAG, 2, 3);
case AArch64ISD::DUP:
return performPostLD1Combine(N, DCI, false);
case ISD::INSERT_VECTOR_ELT:
@@ -7968,13 +8718,12 @@ bool AArch64TargetLowering::getPostIndexedAddressParts(
static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) {
- if (N->getValueType(0) != MVT::i16)
- return;
-
SDLoc DL(N);
SDValue Op = N->getOperand(0);
- assert(Op.getValueType() == MVT::f16 &&
- "Inconsistent bitcast? Only 16-bit types should be i16 or f16");
+
+ if (N->getValueType(0) != MVT::i16 || Op.getValueType() != MVT::f16)
+ return;
+
Op = SDValue(
DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
DAG.getUNDEF(MVT::i32), Op,
@@ -8000,17 +8749,14 @@ void AArch64TargetLowering::ReplaceNodeResults(
}
}
-bool AArch64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
- // Loads and stores less than 128-bits are already atomic; ones above that
- // are doomed anyway, so defer to the default libcall and blame the OS when
- // things go wrong:
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128;
- else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- return LI->getType()->getPrimitiveSizeInBits() == 128;
+bool AArch64TargetLowering::useLoadStackGuardNode() const {
+ return true;
+}
- // For the real atomic operations, we have ldxr/stxr up to 128 bits.
- return Inst->getType()->getPrimitiveSizeInBits() <= 128;
+bool AArch64TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+ // Combine multiple FDIVs with the same divisor into multiple FMULs by the
+ // reciprocal if there are three or more FDIVs.
+ return NumUsers > 2;
}
TargetLoweringBase::LegalizeTypeAction
@@ -8025,12 +8771,37 @@ AArch64TargetLowering::getPreferredVectorAction(EVT VT) const {
return TargetLoweringBase::getPreferredVectorAction(VT);
}
+// Loads and stores less than 128-bits are already atomic; ones above that
+// are doomed anyway, so defer to the default libcall and blame the OS when
+// things go wrong.
+bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+ unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
+ return Size == 128;
+}
+
+// Loads and stores less than 128-bits are already atomic; ones above that
+// are doomed anyway, so defer to the default libcall and blame the OS when
+// things go wrong.
+bool AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+ unsigned Size = LI->getType()->getPrimitiveSizeInBits();
+ return Size == 128;
+}
+
+// For the real atomic operations, we have ldxr/stxr up to 128 bits,
+bool AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+ unsigned Size = AI->getType()->getPrimitiveSizeInBits();
+ return Size <= 128;
+}
+
+bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const {
+ return true;
+}
+
Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
- bool IsAcquire =
- Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+ bool IsAcquire = isAtLeastAcquire(Ord);
// Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
// intrinsic must return {i64, i64} and we have to recombine them into a
@@ -8065,8 +8836,7 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
Value *Val, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- bool IsRelease =
- Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+ bool IsRelease = isAtLeastRelease(Ord);
// Since the intrinsics must have legal type, the i128 intrinsics take two
// parameters: "i64, i64". We must marshal Val into the appropriate form
@@ -8093,3 +8863,8 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
Val, Stxr->getFunctionType()->getParamType(0)),
Addr);
}
+
+bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
+ return Ty->isArrayTy();
+}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index cb0b9ef261dc..cc25bede8d62 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_AArch64_ISELLOWERING_H
-#define LLVM_TARGET_AArch64_ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -162,6 +162,16 @@ enum {
SITOF,
UITOF,
+ /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
+ /// world w.r.t vectors; which causes additional REV instructions to be
+ /// generated to compensate for the byte-swapping. But sometimes we do
+ /// need to re-interpret the data in SIMD vector registers in big-endian
+ /// mode without emitting such REV instructions.
+ NVCAST,
+
+ SMULL,
+ UMULL,
+
// NEON Load/Store with post-increment base updates
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
LD3post,
@@ -197,10 +207,9 @@ class AArch64TargetLowering : public TargetLowering {
bool RequireStrictAlign;
public:
- explicit AArch64TargetLowering(TargetMachine &TM);
+ explicit AArch64TargetLowering(const TargetMachine &TM);
- /// Selects the correct CCAssignFn for a the given CallingConvention
- /// value.
+ /// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
/// computeKnownBitsForTargetNode - Determine which of the bits specified in
@@ -212,10 +221,11 @@ public:
MVT getScalarShiftAmountTy(EVT LHSTy) const override;
- /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+ /// allowsMisalignedMemoryAccesses - Returns true if the target allows
/// unaligned memory accesses. of the specified type.
- bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
- bool *Fast = nullptr) const override {
+ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
+ unsigned Align = 1,
+ bool *Fast = nullptr) const override {
if (RequireStrictAlign)
return false;
// FIXME: True for Cyclone, but not necessary others.
@@ -317,13 +327,17 @@ public:
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ bool hasLoadLinkedStoreConditional() const override;
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const override;
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr, AtomicOrdering Ord) const override;
- bool shouldExpandAtomicInIR(Instruction *Inst) const override;
+ bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+ bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
+ bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ bool useLoadStackGuardNode() const override;
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(EVT VT) const override;
@@ -424,6 +438,10 @@ private:
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+ std::vector<SDNode *> *Created) const override;
+ bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
+
ConstraintType
getConstraintType(const std::string &Constraint) const override;
unsigned getRegisterByName(const char* RegName, EVT VT) const override;
@@ -455,6 +473,10 @@ private:
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
+
+ bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
+ CallingConv::ID CallConv,
+ bool isVarArg) const override;
};
namespace AArch64 {
@@ -464,4 +486,4 @@ FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
} // end namespace llvm
-#endif // LLVM_TARGET_AArch64_ISELLOWERING_H
+#endif
diff --git a/lib/Target/AArch64/AArch64InstrAtomics.td b/lib/Target/AArch64/AArch64InstrAtomics.td
index 3b9e3c630596..4923a1161dfc 100644
--- a/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -29,8 +29,7 @@ def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>;
class acquiring_load<PatFrag base>
: PatFrag<(ops node:$ptr), (base node:$ptr), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- assert(Ordering != AcquireRelease && "unexpected load ordering");
- return Ordering == Acquire || Ordering == SequentiallyConsistent;
+ return isAtLeastAcquire(Ordering);
}]>;
// An atomic load operation that does not need either acquire or release
@@ -38,7 +37,7 @@ class acquiring_load<PatFrag base>
class relaxed_load<PatFrag base>
: PatFrag<(ops node:$ptr), (base node:$ptr), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- return Ordering == Monotonic || Ordering == Unordered;
+ return !isAtLeastAcquire(Ordering);
}]>;
// 8-bit loads
@@ -114,14 +113,14 @@ class releasing_store<PatFrag base>
: PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
assert(Ordering != AcquireRelease && "unexpected store ordering");
- return Ordering == Release || Ordering == SequentiallyConsistent;
+ return isAtLeastRelease(Ordering);
}]>;
// An atomic store operation that doesn't actually need to be atomic on AArch64.
class relaxed_store<PatFrag base>
: PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- return Ordering == Monotonic || Ordering == Unordered;
+ return !isAtLeastRelease(Ordering);
}]>;
// 8-bit stores
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index e88c0c038c33..d295c028ec45 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -843,7 +843,7 @@ def MRSSystemRegisterOperand : AsmOperandClass {
let ParserMethod = "tryParseSysReg";
let DiagnosticType = "MRS";
}
-// concatenation of 1, op0, op1, CRn, CRm, op2. 16-bit immediate.
+// concatenation of op0, op1, CRn, CRm, op2. 16-bit immediate.
def mrs_sysreg_op : Operand<i32> {
let ParserMatchClass = MRSSystemRegisterOperand;
let DecoderMethod = "DecodeMRSSystemRegister";
@@ -863,9 +863,8 @@ def msr_sysreg_op : Operand<i32> {
class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg),
"mrs", "\t$Rt, $systemreg"> {
- bits<15> systemreg;
- let Inst{20} = 1;
- let Inst{19-5} = systemreg;
+ bits<16> systemreg;
+ let Inst{20-5} = systemreg;
}
// FIXME: Some of these def NZCV, others don't. Best way to model that?
@@ -873,9 +872,8 @@ class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg),
// would do it, but feels like overkill at this point.
class MSRI : RtSystemI<0, (outs), (ins msr_sysreg_op:$systemreg, GPR64:$Rt),
"msr", "\t$systemreg, $Rt"> {
- bits<15> systemreg;
- let Inst{20} = 1;
- let Inst{19-5} = systemreg;
+ bits<16> systemreg;
+ let Inst{20-5} = systemreg;
}
def SystemPStateFieldOperand : AsmOperandClass {
@@ -1351,14 +1349,15 @@ class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype,
}
multiclass MulAccum<bit isSub, string asm, SDNode AccNode> {
+ // MADD/MSUB generation is decided by MachineCombiner.cpp
def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm,
- [(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))]>,
+ [/*(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))*/]>,
Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> {
let Inst{31} = 0;
}
def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm,
- [(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))]>,
+ [/*(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))*/]>,
Sched<[WriteIM64, ReadIM, ReadIM, ReadIMA]> {
let Inst{31} = 1;
}
@@ -1636,7 +1635,7 @@ class AddSubRegAlias<string asm, Instruction inst, RegisterClass dstRegtype,
multiclass AddSub<bit isSub, string mnemonic,
SDPatternOperator OpNode = null_frag> {
- let hasSideEffects = 0 in {
+ let hasSideEffects = 0, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
// Add/Subtract immediate
def Wri : BaseAddSubImm<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32,
mnemonic, OpNode> {
@@ -1961,14 +1960,14 @@ class LogicalRegAlias<string asm, Instruction inst, RegisterClass regtype>
multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode,
string Alias> {
- let AddedComplexity = 6 in
+ let AddedComplexity = 6, isReMaterializable = 1, isAsCheapAsAMove = 1 in
def Wri : BaseLogicalImm<opc, GPR32sp, GPR32, logical_imm32, mnemonic,
[(set GPR32sp:$Rd, (OpNode GPR32:$Rn,
logical_imm32:$imm))]> {
let Inst{31} = 0;
let Inst{22} = 0; // 64-bit version has an additional bit of immediate.
}
- let AddedComplexity = 6 in
+ let AddedComplexity = 6, isReMaterializable = 1, isAsCheapAsAMove = 1 in
def Xri : BaseLogicalImm<opc, GPR64sp, GPR64, logical_imm64, mnemonic,
[(set GPR64sp:$Rd, (OpNode GPR64:$Rn,
logical_imm64:$imm))]> {
@@ -2013,8 +2012,10 @@ class BaseLogicalRegPseudo<RegisterClass regtype, SDPatternOperator OpNode>
// Split from LogicalImm as not all instructions have both.
multiclass LogicalReg<bits<2> opc, bit N, string mnemonic,
SDPatternOperator OpNode> {
+ let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def Wrr : BaseLogicalRegPseudo<GPR32, OpNode>;
def Xrr : BaseLogicalRegPseudo<GPR64, OpNode>;
+ }
def Wrs : BaseLogicalSReg<opc, N, GPR32, logical_shifted_reg32, mnemonic,
[(set GPR32:$Rd, (OpNode GPR32:$Rn,
@@ -2995,7 +2996,7 @@ class LoadPreIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
: BaseLoadStorePreIdx<sz, V, opc,
(outs GPR64sp:$wback, regtype:$Rt),
(ins GPR64sp:$Rn, simm9:$offset), asm,
- "$Rn = $wback", []>,
+ "$Rn = $wback,@earlyclobber $wback", []>,
Sched<[WriteLD, WriteAdr]>;
let mayStore = 1, mayLoad = 0 in
@@ -3004,7 +3005,7 @@ class StorePreIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
: BaseLoadStorePreIdx<sz, V, opc,
(outs GPR64sp:$wback),
(ins regtype:$Rt, GPR64sp:$Rn, simm9:$offset),
- asm, "$Rn = $wback",
+ asm, "$Rn = $wback,@earlyclobber $wback",
[(set GPR64sp:$wback,
(storeop (Ty regtype:$Rt), GPR64sp:$Rn, simm9:$offset))]>,
Sched<[WriteAdr, WriteST]>;
@@ -3014,7 +3015,6 @@ class StorePreIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
// Load/store post-indexed
//---
-// (pre-index) load/stores.
class BaseLoadStorePostIdx<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops,
string asm, string cstr, list<dag> pat>
: I<oops, iops, asm, "\t$Rt, [$Rn], $offset", cstr, pat> {
@@ -3042,7 +3042,7 @@ class LoadPostIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
: BaseLoadStorePostIdx<sz, V, opc,
(outs GPR64sp:$wback, regtype:$Rt),
(ins GPR64sp:$Rn, simm9:$offset),
- asm, "$Rn = $wback", []>,
+ asm, "$Rn = $wback,@earlyclobber $wback", []>,
Sched<[WriteLD, WriteI]>;
let mayStore = 1, mayLoad = 0 in
@@ -3051,7 +3051,7 @@ class StorePostIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
: BaseLoadStorePostIdx<sz, V, opc,
(outs GPR64sp:$wback),
(ins regtype:$Rt, GPR64sp:$Rn, simm9:$offset),
- asm, "$Rn = $wback",
+ asm, "$Rn = $wback,@earlyclobber $wback",
[(set GPR64sp:$wback,
(storeop (Ty regtype:$Rt), GPR64sp:$Rn, simm9:$offset))]>,
Sched<[WriteAdr, WriteST, ReadAdrBase]>;
@@ -3115,7 +3115,7 @@ multiclass StorePairOffset<bits<2> opc, bit V, RegisterClass regtype,
// (pre-indexed)
class BaseLoadStorePairPreIdx<bits<2> opc, bit V, bit L, dag oops, dag iops,
string asm>
- : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn, $offset]!", "$Rn = $wback", []> {
+ : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn, $offset]!", "$Rn = $wback,@earlyclobber $wback", []> {
bits<5> Rt;
bits<5> Rt2;
bits<5> Rn;
@@ -3156,7 +3156,7 @@ class StorePairPreIdx<bits<2> opc, bit V, RegisterClass regtype,
class BaseLoadStorePairPostIdx<bits<2> opc, bit V, bit L, dag oops, dag iops,
string asm>
- : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn], $offset", "$Rn = $wback", []> {
+ : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn], $offset", "$Rn = $wback,@earlyclobber $wback", []> {
bits<5> Rt;
bits<5> Rt2;
bits<5> Rn;
@@ -4383,7 +4383,7 @@ class BaseSIMDVectorLShiftLongBySize<bit Q, bits<2> size,
}
multiclass SIMDVectorLShiftLongBySizeBHS {
- let neverHasSideEffects = 1 in {
+ let hasSideEffects = 0 in {
def v8i8 : BaseSIMDVectorLShiftLongBySize<0, 0b00, V64,
"shll", ".8h", ".8b", "8">;
def v16i8 : BaseSIMDVectorLShiftLongBySize<1, 0b00, V128,
@@ -5260,6 +5260,10 @@ multiclass SIMDZipVector<bits<3>opc, string asm,
def v2i64 : BaseSIMDZipVector<0b111, opc, V128,
asm, ".2d", OpNode, v2i64>;
+ def : Pat<(v4f16 (OpNode V64:$Rn, V64:$Rm)),
+ (!cast<Instruction>(NAME#"v4i16") V64:$Rn, V64:$Rm)>;
+ def : Pat<(v8f16 (OpNode V128:$Rn, V128:$Rm)),
+ (!cast<Instruction>(NAME#"v8i16") V128:$Rn, V128:$Rm)>;
def : Pat<(v2f32 (OpNode V64:$Rn, V64:$Rm)),
(!cast<Instruction>(NAME#"v2i32") V64:$Rn, V64:$Rm)>;
def : Pat<(v4f32 (OpNode V128:$Rn, V128:$Rm)),
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index ce85b2ceba95..e582ed4d0650 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
+#include "AArch64MachineCombinerPattern.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -260,8 +261,9 @@ void AArch64InstrInfo::instantiateCondBranch(
BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
} else {
// Folded compare-and-branch
+ // Note that we use addOperand instead of addReg to keep the flags.
const MachineInstrBuilder MIB =
- BuildMI(&MBB, DL, get(Cond[1].getImm())).addReg(Cond[2].getReg());
+ BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]);
if (Cond.size() > 3)
MIB.addImm(Cond[3].getImm());
MIB.addMBB(TBB);
@@ -541,6 +543,51 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
CC);
}
+// FIXME: this implementation should be micro-architecture dependent, so a
+// micro-architecture target hook should be introduced here in future.
+bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
+ if (!Subtarget.isCortexA57() && !Subtarget.isCortexA53())
+ return MI->isAsCheapAsAMove();
+
+ switch (MI->getOpcode()) {
+ default:
+ return false;
+
+ // add/sub on register without shift
+ case AArch64::ADDWri:
+ case AArch64::ADDXri:
+ case AArch64::SUBWri:
+ case AArch64::SUBXri:
+ return (MI->getOperand(3).getImm() == 0);
+
+ // logical ops on immediate
+ case AArch64::ANDWri:
+ case AArch64::ANDXri:
+ case AArch64::EORWri:
+ case AArch64::EORXri:
+ case AArch64::ORRWri:
+ case AArch64::ORRXri:
+ return true;
+
+ // logical ops on register without shift
+ case AArch64::ANDWrr:
+ case AArch64::ANDXrr:
+ case AArch64::BICWrr:
+ case AArch64::BICXrr:
+ case AArch64::EONWrr:
+ case AArch64::EONXrr:
+ case AArch64::EORWrr:
+ case AArch64::EORXrr:
+ case AArch64::ORNWrr:
+ case AArch64::ORNXrr:
+ case AArch64::ORRWrr:
+ case AArch64::ORRXrr:
+ return true;
+ }
+
+ llvm_unreachable("Unknown opcode to check as cheap as a move!");
+}
+
bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const {
@@ -561,6 +608,42 @@ bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
}
}
+bool
+AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
+ MachineInstr *MIb,
+ AliasAnalysis *AA) const {
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ unsigned BaseRegA = 0, BaseRegB = 0;
+ int OffsetA = 0, OffsetB = 0;
+ int WidthA = 0, WidthB = 0;
+
+ assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
+ "MIa must be a store or a load");
+ assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
+ "MIb must be a store or a load");
+
+ if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() ||
+ MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
+ return false;
+
+ // Retrieve the base register, offset from the base register and width. Width
+ // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
+ // base registers are identical, and the offset of a lower memory access +
+ // the width doesn't overlap the offset of a higher memory access,
+ // then the memory accesses are different.
+ if (getLdStBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
+ getLdStBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
+ if (BaseRegA == BaseRegB) {
+ int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
+ int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
+ int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
+ if (LowOffset + LowWidth <= HighOffset)
+ return true;
+ }
+ }
+ return false;
+}
+
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
/// Return true if the comparison instruction can be analyzed.
@@ -595,7 +678,8 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
SrcReg = MI->getOperand(1).getReg();
SrcReg2 = 0;
CmpMask = ~0;
- CmpValue = MI->getOperand(2).getImm();
+ // FIXME: In order to convert CmpValue to 0 or 1
+ CmpValue = (MI->getOperand(2).getImm() != 0);
return true;
case AArch64::ANDSWri:
case AArch64::ANDSXri:
@@ -604,9 +688,14 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
SrcReg = MI->getOperand(1).getReg();
SrcReg2 = 0;
CmpMask = ~0;
- CmpValue = AArch64_AM::decodeLogicalImmediate(
- MI->getOperand(2).getImm(),
- MI->getOpcode() == AArch64::ANDSWri ? 32 : 64);
+ // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
+ // while the type of CmpValue is int. When converting uint64_t to int,
+ // the high 32 bits of uint64_t will be lost.
+ // In fact it causes a bug in spec2006-483.xalancbmk
+ // CmpValue is only used to compare with zero in OptimizeCompareInstr
+ CmpValue = (AArch64_AM::decodeLogicalImmediate(
+ MI->getOperand(2).getImm(),
+ MI->getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0);
return true;
}
@@ -619,8 +708,8 @@ static bool UpdateOperandRegClass(MachineInstr *Instr) {
MachineFunction *MF = MBB->getParent();
assert(MF && "Can't get MachineFunction here");
const TargetMachine *TM = &MF->getTarget();
- const TargetInstrInfo *TII = TM->getInstrInfo();
- const TargetRegisterInfo *TRI = TM->getRegisterInfo();
+ const TargetInstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
+ const TargetRegisterInfo *TRI = TM->getSubtargetImpl()->getRegisterInfo();
MachineRegisterInfo *MRI = &MF->getRegInfo();
for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx;
@@ -652,6 +741,87 @@ static bool UpdateOperandRegClass(MachineInstr *Instr) {
return true;
}
+/// \brief Return the opcode that does not set flags when possible - otherwise
+/// return the original opcode. The caller is responsible to do the actual
+/// substitution and legality checking.
+static unsigned convertFlagSettingOpcode(const MachineInstr *MI) {
+ // Don't convert all compare instructions, because for some the zero register
+ // encoding becomes the sp register.
+ bool MIDefinesZeroReg = false;
+ if (MI->definesRegister(AArch64::WZR) || MI->definesRegister(AArch64::XZR))
+ MIDefinesZeroReg = true;
+
+ switch (MI->getOpcode()) {
+ default:
+ return MI->getOpcode();
+ case AArch64::ADDSWrr:
+ return AArch64::ADDWrr;
+ case AArch64::ADDSWri:
+ return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
+ case AArch64::ADDSWrs:
+ return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
+ case AArch64::ADDSWrx:
+ return AArch64::ADDWrx;
+ case AArch64::ADDSXrr:
+ return AArch64::ADDXrr;
+ case AArch64::ADDSXri:
+ return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
+ case AArch64::ADDSXrs:
+ return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
+ case AArch64::ADDSXrx:
+ return AArch64::ADDXrx;
+ case AArch64::SUBSWrr:
+ return AArch64::SUBWrr;
+ case AArch64::SUBSWri:
+ return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
+ case AArch64::SUBSWrs:
+ return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
+ case AArch64::SUBSWrx:
+ return AArch64::SUBWrx;
+ case AArch64::SUBSXrr:
+ return AArch64::SUBXrr;
+ case AArch64::SUBSXri:
+ return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
+ case AArch64::SUBSXrs:
+ return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
+ case AArch64::SUBSXrx:
+ return AArch64::SUBXrx;
+ }
+}
+
+/// True when condition code could be modified on the instruction
+/// trace starting at from and ending at to.
+static bool modifiesConditionCode(MachineInstr *From, MachineInstr *To,
+ const bool CheckOnlyCCWrites,
+ const TargetRegisterInfo *TRI) {
+ // We iterate backward starting \p To until we hit \p From
+ MachineBasicBlock::iterator I = To, E = From, B = To->getParent()->begin();
+
+ // Early exit if To is at the beginning of the BB.
+ if (I == B)
+ return true;
+
+ // Check whether the definition of SrcReg is in the same basic block as
+ // Compare. If not, assume the condition code gets modified on some path.
+ if (To->getParent() != From->getParent())
+ return true;
+
+ // Check that NZCV isn't set on the trace.
+ for (--I; I != E; --I) {
+ const MachineInstr &Instr = *I;
+
+ if (Instr.modifiesRegister(AArch64::NZCV, TRI) ||
+ (!CheckOnlyCCWrites && Instr.readsRegister(AArch64::NZCV, TRI)))
+ // This instruction modifies or uses NZCV after the one we want to
+ // change.
+ return true;
+ if (I == B)
+ // We currently don't allow the instruction trace to cross basic
+ // block boundaries
+ return true;
+ }
+ return false;
+}
/// optimizeCompareInstr - Convert the instruction supplying the argument to the
/// comparison into one that sets the zero bit in the flags register.
bool AArch64InstrInfo::optimizeCompareInstr(
@@ -661,28 +831,15 @@ bool AArch64InstrInfo::optimizeCompareInstr(
// Replace SUBSWrr with SUBWrr if NZCV is not used.
int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true);
if (Cmp_NZCV != -1) {
- unsigned NewOpc;
- switch (CmpInstr->getOpcode()) {
- default:
- return false;
- case AArch64::ADDSWrr: NewOpc = AArch64::ADDWrr; break;
- case AArch64::ADDSWri: NewOpc = AArch64::ADDWri; break;
- case AArch64::ADDSWrs: NewOpc = AArch64::ADDWrs; break;
- case AArch64::ADDSWrx: NewOpc = AArch64::ADDWrx; break;
- case AArch64::ADDSXrr: NewOpc = AArch64::ADDXrr; break;
- case AArch64::ADDSXri: NewOpc = AArch64::ADDXri; break;
- case AArch64::ADDSXrs: NewOpc = AArch64::ADDXrs; break;
- case AArch64::ADDSXrx: NewOpc = AArch64::ADDXrx; break;
- case AArch64::SUBSWrr: NewOpc = AArch64::SUBWrr; break;
- case AArch64::SUBSWri: NewOpc = AArch64::SUBWri; break;
- case AArch64::SUBSWrs: NewOpc = AArch64::SUBWrs; break;
- case AArch64::SUBSWrx: NewOpc = AArch64::SUBWrx; break;
- case AArch64::SUBSXrr: NewOpc = AArch64::SUBXrr; break;
- case AArch64::SUBSXri: NewOpc = AArch64::SUBXri; break;
- case AArch64::SUBSXrs: NewOpc = AArch64::SUBXrs; break;
- case AArch64::SUBSXrx: NewOpc = AArch64::SUBXrx; break;
+ if (CmpInstr->definesRegister(AArch64::WZR) ||
+ CmpInstr->definesRegister(AArch64::XZR)) {
+ CmpInstr->eraseFromParent();
+ return true;
}
-
+ unsigned Opc = CmpInstr->getOpcode();
+ unsigned NewOpc = convertFlagSettingOpcode(CmpInstr);
+ if (NewOpc == Opc)
+ return false;
const MCInstrDesc &MCID = get(NewOpc);
CmpInstr->setDesc(MCID);
CmpInstr->RemoveOperand(Cmp_NZCV);
@@ -693,6 +850,9 @@ bool AArch64InstrInfo::optimizeCompareInstr(
}
// Continue only if we have a "ri" where immediate is zero.
+ // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
+ // function.
+ assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
if (CmpValue != 0 || SrcReg2 != 0)
return false;
@@ -705,36 +865,10 @@ bool AArch64InstrInfo::optimizeCompareInstr(
if (!MI)
return false;
- // We iterate backward, starting from the instruction before CmpInstr and
- // stop when reaching the definition of the source register or done with the
- // basic block, to check whether NZCV is used or modified in between.
- MachineBasicBlock::iterator I = CmpInstr, E = MI,
- B = CmpInstr->getParent()->begin();
-
- // Early exit if CmpInstr is at the beginning of the BB.
- if (I == B)
- return false;
-
- // Check whether the definition of SrcReg is in the same basic block as
- // Compare. If not, we can't optimize away the Compare.
- if (MI->getParent() != CmpInstr->getParent())
- return false;
-
- // Check that NZCV isn't set between the comparison instruction and the one we
- // want to change.
+ bool CheckOnlyCCWrites = false;
const TargetRegisterInfo *TRI = &getRegisterInfo();
- for (--I; I != E; --I) {
- const MachineInstr &Instr = *I;
-
- if (Instr.modifiesRegister(AArch64::NZCV, TRI) ||
- Instr.readsRegister(AArch64::NZCV, TRI))
- // This instruction modifies or uses NZCV after the one we want to
- // change. We can't do this transformation.
- return false;
- if (I == B)
- // The 'and' is below the comparison instruction.
- return false;
- }
+ if (modifiesConditionCode(MI, CmpInstr, CheckOnlyCCWrites, TRI))
+ return false;
unsigned NewOpc = MI->getOpcode();
switch (MI->getOpcode()) {
@@ -848,6 +982,56 @@ bool AArch64InstrInfo::optimizeCompareInstr(
return true;
}
+bool
+AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ if (MI->getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
+ return false;
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Reg = MI->getOperand(0).getReg();
+ const GlobalValue *GV =
+ cast<GlobalValue>((*MI->memoperands_begin())->getValue());
+ const TargetMachine &TM = MBB.getParent()->getTarget();
+ unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
+ const unsigned char MO_NC = AArch64II::MO_NC;
+
+ if ((OpFlags & AArch64II::MO_GOT) != 0) {
+ BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
+ .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
+ BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
+ .addReg(Reg, RegState::Kill).addImm(0)
+ .addMemOperand(*MI->memoperands_begin());
+ } else if (TM.getCodeModel() == CodeModel::Large) {
+ BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
+ .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
+ BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
+ BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
+ BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
+ BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
+ .addReg(Reg, RegState::Kill).addImm(0)
+ .addMemOperand(*MI->memoperands_begin());
+ } else {
+ BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
+ .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
+ unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
+ BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addGlobalAddress(GV, 0, LoFlags)
+ .addMemOperand(*MI->memoperands_begin());
+ }
+
+ MBB.erase(MI);
+
+ return true;
+}
+
/// Return true if this is this instruction has a non-zero immediate
bool AArch64InstrInfo::hasShiftedReg(const MachineInstr *MI) const {
switch (MI->getOpcode()) {
@@ -963,12 +1147,14 @@ bool AArch64InstrInfo::isGPRCopy(const MachineInstr *MI) const {
MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands");
return true;
}
+ break;
case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
if (MI->getOperand(2).getImm() == 0) {
assert(MI->getDesc().getNumOperands() == 4 &&
MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands");
return true;
}
+ break;
}
return false;
}
@@ -991,6 +1177,7 @@ bool AArch64InstrInfo::isFPRCopy(const MachineInstr *MI) const {
"invalid ORRv16i8 operands");
return true;
}
+ break;
}
return false;
}
@@ -1152,6 +1339,102 @@ AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
};
}
+bool AArch64InstrInfo::getLdStBaseRegImmOfsWidth(
+ MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width,
+ const TargetRegisterInfo *TRI) const {
+ // Handle only loads/stores with base register followed by immediate offset.
+ if (LdSt->getNumOperands() != 3)
+ return false;
+ if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
+ return false;
+
+ // Offset is calculated as the immediate operand multiplied by the scaling factor.
+ // Unscaled instructions have scaling factor set to 1.
+ int Scale = 0;
+ switch (LdSt->getOpcode()) {
+ default:
+ return false;
+ case AArch64::LDURQi:
+ case AArch64::STURQi:
+ Width = 16;
+ Scale = 1;
+ break;
+ case AArch64::LDURXi:
+ case AArch64::LDURDi:
+ case AArch64::STURXi:
+ case AArch64::STURDi:
+ Width = 8;
+ Scale = 1;
+ break;
+ case AArch64::LDURWi:
+ case AArch64::LDURSi:
+ case AArch64::LDURSWi:
+ case AArch64::STURWi:
+ case AArch64::STURSi:
+ Width = 4;
+ Scale = 1;
+ break;
+ case AArch64::LDURHi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURSHXi:
+ case AArch64::LDURSHWi:
+ case AArch64::STURHi:
+ case AArch64::STURHHi:
+ Width = 2;
+ Scale = 1;
+ break;
+ case AArch64::LDURBi:
+ case AArch64::LDURBBi:
+ case AArch64::LDURSBXi:
+ case AArch64::LDURSBWi:
+ case AArch64::STURBi:
+ case AArch64::STURBBi:
+ Width = 1;
+ Scale = 1;
+ break;
+ case AArch64::LDRXui:
+ case AArch64::STRXui:
+ Scale = Width = 8;
+ break;
+ case AArch64::LDRWui:
+ case AArch64::STRWui:
+ Scale = Width = 4;
+ break;
+ case AArch64::LDRBui:
+ case AArch64::STRBui:
+ Scale = Width = 1;
+ break;
+ case AArch64::LDRHui:
+ case AArch64::STRHui:
+ Scale = Width = 2;
+ break;
+ case AArch64::LDRSui:
+ case AArch64::STRSui:
+ Scale = Width = 4;
+ break;
+ case AArch64::LDRDui:
+ case AArch64::STRDui:
+ Scale = Width = 8;
+ break;
+ case AArch64::LDRQui:
+ case AArch64::STRQui:
+ Scale = Width = 16;
+ break;
+ case AArch64::LDRBBui:
+ case AArch64::STRBBui:
+ Scale = Width = 1;
+ break;
+ case AArch64::LDRHHui:
+ case AArch64::STRHHui:
+ Scale = Width = 2;
+ break;
+ };
+
+ BaseReg = LdSt->getOperand(1).getReg();
+ Offset = LdSt->getOperand(2).getImm() * Scale;
+ return true;
+}
+
/// Detect opportunities for ldp/stp formation.
///
/// Only called for LdSt for which getLdStBaseRegImmOfs returns true.
@@ -1194,16 +1477,15 @@ bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
}
}
-MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
- int FrameIx,
- uint64_t Offset,
- const MDNode *MDPtr,
- DebugLoc DL) const {
+MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
+ MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
+ const MDNode *Expr, DebugLoc DL) const {
MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
.addFrameIndex(FrameIx)
.addImm(0)
.addImm(Offset)
- .addMetadata(MDPtr);
+ .addMetadata(Var)
+ .addMetadata(Expr);
return &*MIB;
}
@@ -2087,3 +2369,592 @@ void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
NopInst.setOpcode(AArch64::HINT);
NopInst.addOperand(MCOperand::CreateImm(0));
}
+/// useMachineCombiner - return true when a target supports MachineCombiner
+bool AArch64InstrInfo::useMachineCombiner() const {
+ // AArch64 supports the combiner
+ return true;
+}
+//
+// True when Opc sets flag
+static bool isCombineInstrSettingFlag(unsigned Opc) {
+ switch (Opc) {
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSWri:
+ case AArch64::ADDSXrr:
+ case AArch64::ADDSXri:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXrr:
+ // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
+ case AArch64::SUBSWri:
+ case AArch64::SUBSXri:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+//
+// 32b Opcodes that can be combined with a MUL
+static bool isCombineInstrCandidate32(unsigned Opc) {
+ switch (Opc) {
+ case AArch64::ADDWrr:
+ case AArch64::ADDWri:
+ case AArch64::SUBWrr:
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSWri:
+ case AArch64::SUBSWrr:
+ // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
+ case AArch64::SUBWri:
+ case AArch64::SUBSWri:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+//
+// 64b Opcodes that can be combined with a MUL
+static bool isCombineInstrCandidate64(unsigned Opc) {
+ switch (Opc) {
+ case AArch64::ADDXrr:
+ case AArch64::ADDXri:
+ case AArch64::SUBXrr:
+ case AArch64::ADDSXrr:
+ case AArch64::ADDSXri:
+ case AArch64::SUBSXrr:
+ // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
+ case AArch64::SUBXri:
+ case AArch64::SUBSXri:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+//
+// Opcodes that can be combined with a MUL
+static bool isCombineInstrCandidate(unsigned Opc) {
+ return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
+}
+
+static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
+ unsigned MulOpc, unsigned ZeroReg) {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineInstr *MI = nullptr;
+ // We need a virtual register definition.
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ MI = MRI.getUniqueVRegDef(MO.getReg());
+ // And it needs to be in the trace (otherwise, it won't have a depth).
+ if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != MulOpc)
+ return false;
+
+ assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
+ MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
+
+ // The third input reg must be zero.
+ if (MI->getOperand(3).getReg() != ZeroReg)
+ return false;
+
+ // Must only used by the user we combine with.
+ if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
+ return false;
+
+ return true;
+}
+
+/// hasPattern - return true when there is potentially a faster code sequence
+/// for an instruction chain ending in \p Root. All potential patterns are
+/// listed
+/// in the \p Pattern vector. Pattern should be sorted in priority order since
+/// the pattern evaluator stops checking as soon as it finds a faster sequence.
+
+bool AArch64InstrInfo::hasPattern(
+ MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern) const {
+ unsigned Opc = Root.getOpcode();
+ MachineBasicBlock &MBB = *Root.getParent();
+ bool Found = false;
+
+ if (!isCombineInstrCandidate(Opc))
+ return 0;
+ if (isCombineInstrSettingFlag(Opc)) {
+ int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
+ // When NZCV is live bail out.
+ if (Cmp_NZCV == -1)
+ return 0;
+ unsigned NewOpc = convertFlagSettingOpcode(&Root);
+ // When opcode can't change bail out.
+ // CHECKME: do we miss any cases for opcode conversion?
+ if (NewOpc == Opc)
+ return 0;
+ Opc = NewOpc;
+ }
+
+ switch (Opc) {
+ default:
+ break;
+ case AArch64::ADDWrr:
+ assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
+ "ADDWrr does not have register operands");
+ if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
+ AArch64::WZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP1);
+ Found = true;
+ }
+ if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
+ AArch64::WZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP2);
+ Found = true;
+ }
+ break;
+ case AArch64::ADDXrr:
+ if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
+ AArch64::XZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP1);
+ Found = true;
+ }
+ if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
+ AArch64::XZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP2);
+ Found = true;
+ }
+ break;
+ case AArch64::SUBWrr:
+ if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
+ AArch64::WZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP1);
+ Found = true;
+ }
+ if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
+ AArch64::WZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP2);
+ Found = true;
+ }
+ break;
+ case AArch64::SUBXrr:
+ if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
+ AArch64::XZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP1);
+ Found = true;
+ }
+ if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
+ AArch64::XZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP2);
+ Found = true;
+ }
+ break;
+ case AArch64::ADDWri:
+ if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
+ AArch64::WZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULADDWI_OP1);
+ Found = true;
+ }
+ break;
+ case AArch64::ADDXri:
+ if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
+ AArch64::XZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULADDXI_OP1);
+ Found = true;
+ }
+ break;
+ case AArch64::SUBWri:
+ if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
+ AArch64::WZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULSUBWI_OP1);
+ Found = true;
+ }
+ break;
+ case AArch64::SUBXri:
+ if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
+ AArch64::XZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULSUBXI_OP1);
+ Found = true;
+ }
+ break;
+ }
+ return Found;
+}
+
+/// genMadd - Generate madd instruction and combine mul and add.
+/// Example:
+/// MUL I=A,B,0
+/// ADD R,I,C
+/// ==> MADD R,A,B,C
+/// \param Root is the ADD instruction
+/// \param [out] InsInstrs is a vector of machine instructions and will
+/// contain the generated madd instruction
+/// \param IdxMulOpd is index of operand in Root that is the result of
+/// the MUL. In the example above IdxMulOpd is 1.
+/// \param MaddOpc the opcode fo the madd instruction
+static MachineInstr *genMadd(MachineFunction &MF, MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII, MachineInstr &Root,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ unsigned IdxMulOpd, unsigned MaddOpc,
+ const TargetRegisterClass *RC) {
+ assert(IdxMulOpd == 1 || IdxMulOpd == 2);
+
+ unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
+ MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
+ unsigned ResultReg = Root.getOperand(0).getReg();
+ unsigned SrcReg0 = MUL->getOperand(1).getReg();
+ bool Src0IsKill = MUL->getOperand(1).isKill();
+ unsigned SrcReg1 = MUL->getOperand(2).getReg();
+ bool Src1IsKill = MUL->getOperand(2).isKill();
+ unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
+ bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
+
+ if (TargetRegisterInfo::isVirtualRegister(ResultReg))
+ MRI.constrainRegClass(ResultReg, RC);
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
+ MRI.constrainRegClass(SrcReg0, RC);
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
+ MRI.constrainRegClass(SrcReg1, RC);
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
+ MRI.constrainRegClass(SrcReg2, RC);
+
+ MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
+ ResultReg)
+ .addReg(SrcReg0, getKillRegState(Src0IsKill))
+ .addReg(SrcReg1, getKillRegState(Src1IsKill))
+ .addReg(SrcReg2, getKillRegState(Src2IsKill));
+ // Insert the MADD
+ InsInstrs.push_back(MIB);
+ return MUL;
+}
+
+/// genMaddR - Generate madd instruction and combine mul and add using
+/// an extra virtual register
+/// Example - an ADD intermediate needs to be stored in a register:
+/// MUL I=A,B,0
+/// ADD R,I,Imm
+/// ==> ORR V, ZR, Imm
+/// ==> MADD R,A,B,V
+/// \param Root is the ADD instruction
+/// \param [out] InsInstrs is a vector of machine instructions and will
+/// contain the generated madd instruction
+/// \param IdxMulOpd is index of operand in Root that is the result of
+/// the MUL. In the example above IdxMulOpd is 1.
+/// \param MaddOpc the opcode fo the madd instruction
+/// \param VR is a virtual register that holds the value of an ADD operand
+/// (V in the example above).
+static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII, MachineInstr &Root,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ unsigned IdxMulOpd, unsigned MaddOpc,
+ unsigned VR, const TargetRegisterClass *RC) {
+ assert(IdxMulOpd == 1 || IdxMulOpd == 2);
+
+ MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
+ unsigned ResultReg = Root.getOperand(0).getReg();
+ unsigned SrcReg0 = MUL->getOperand(1).getReg();
+ bool Src0IsKill = MUL->getOperand(1).isKill();
+ unsigned SrcReg1 = MUL->getOperand(2).getReg();
+ bool Src1IsKill = MUL->getOperand(2).isKill();
+
+ if (TargetRegisterInfo::isVirtualRegister(ResultReg))
+ MRI.constrainRegClass(ResultReg, RC);
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
+ MRI.constrainRegClass(SrcReg0, RC);
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
+ MRI.constrainRegClass(SrcReg1, RC);
+ if (TargetRegisterInfo::isVirtualRegister(VR))
+ MRI.constrainRegClass(VR, RC);
+
+ MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
+ ResultReg)
+ .addReg(SrcReg0, getKillRegState(Src0IsKill))
+ .addReg(SrcReg1, getKillRegState(Src1IsKill))
+ .addReg(VR);
+ // Insert the MADD
+ InsInstrs.push_back(MIB);
+ return MUL;
+}
+
+/// genAlternativeCodeSequence - when hasPattern() finds a pattern
+/// this function generates the instructions that could replace the
+/// original code sequence
+void AArch64InstrInfo::genAlternativeCodeSequence(
+ MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
+ MachineBasicBlock &MBB = *Root.getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+
+ MachineInstr *MUL;
+ const TargetRegisterClass *RC;
+ unsigned Opc;
+ switch (Pattern) {
+ default:
+ // signal error.
+ break;
+ case MachineCombinerPattern::MC_MULADDW_OP1:
+ case MachineCombinerPattern::MC_MULADDX_OP1:
+ // MUL I=A,B,0
+ // ADD R,I,C
+ // ==> MADD R,A,B,C
+ // --- Create(MADD);
+ if (Pattern == MachineCombinerPattern::MC_MULADDW_OP1) {
+ Opc = AArch64::MADDWrrr;
+ RC = &AArch64::GPR32RegClass;
+ } else {
+ Opc = AArch64::MADDXrrr;
+ RC = &AArch64::GPR64RegClass;
+ }
+ MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
+ case MachineCombinerPattern::MC_MULADDW_OP2:
+ case MachineCombinerPattern::MC_MULADDX_OP2:
+ // MUL I=A,B,0
+ // ADD R,C,I
+ // ==> MADD R,A,B,C
+ // --- Create(MADD);
+ if (Pattern == MachineCombinerPattern::MC_MULADDW_OP2) {
+ Opc = AArch64::MADDWrrr;
+ RC = &AArch64::GPR32RegClass;
+ } else {
+ Opc = AArch64::MADDXrrr;
+ RC = &AArch64::GPR64RegClass;
+ }
+ MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MC_MULADDWI_OP1:
+ case MachineCombinerPattern::MC_MULADDXI_OP1: {
+ // MUL I=A,B,0
+ // ADD R,I,Imm
+ // ==> ORR V, ZR, Imm
+ // ==> MADD R,A,B,V
+ // --- Create(MADD);
+ const TargetRegisterClass *OrrRC;
+ unsigned BitSize, OrrOpc, ZeroReg;
+ if (Pattern == MachineCombinerPattern::MC_MULADDWI_OP1) {
+ OrrOpc = AArch64::ORRWri;
+ OrrRC = &AArch64::GPR32spRegClass;
+ BitSize = 32;
+ ZeroReg = AArch64::WZR;
+ Opc = AArch64::MADDWrrr;
+ RC = &AArch64::GPR32RegClass;
+ } else {
+ OrrOpc = AArch64::ORRXri;
+ OrrRC = &AArch64::GPR64spRegClass;
+ BitSize = 64;
+ ZeroReg = AArch64::XZR;
+ Opc = AArch64::MADDXrrr;
+ RC = &AArch64::GPR64RegClass;
+ }
+ unsigned NewVR = MRI.createVirtualRegister(OrrRC);
+ uint64_t Imm = Root.getOperand(2).getImm();
+
+ if (Root.getOperand(3).isImm()) {
+ unsigned Val = Root.getOperand(3).getImm();
+ Imm = Imm << Val;
+ }
+ uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
+ uint64_t Encoding;
+ if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
+ .addReg(ZeroReg)
+ .addImm(Encoding);
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
+ }
+ break;
+ }
+ case MachineCombinerPattern::MC_MULSUBW_OP1:
+ case MachineCombinerPattern::MC_MULSUBX_OP1: {
+ // MUL I=A,B,0
+ // SUB R,I, C
+ // ==> SUB V, 0, C
+ // ==> MADD R,A,B,V // = -C + A*B
+ // --- Create(MADD);
+ const TargetRegisterClass *SubRC;
+ unsigned SubOpc, ZeroReg;
+ if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP1) {
+ SubOpc = AArch64::SUBWrr;
+ SubRC = &AArch64::GPR32spRegClass;
+ ZeroReg = AArch64::WZR;
+ Opc = AArch64::MADDWrrr;
+ RC = &AArch64::GPR32RegClass;
+ } else {
+ SubOpc = AArch64::SUBXrr;
+ SubRC = &AArch64::GPR64spRegClass;
+ ZeroReg = AArch64::XZR;
+ Opc = AArch64::MADDXrrr;
+ RC = &AArch64::GPR64RegClass;
+ }
+ unsigned NewVR = MRI.createVirtualRegister(SubRC);
+ // SUB NewVR, 0, C
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
+ .addReg(ZeroReg)
+ .addOperand(Root.getOperand(2));
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
+ break;
+ }
+ case MachineCombinerPattern::MC_MULSUBW_OP2:
+ case MachineCombinerPattern::MC_MULSUBX_OP2:
+ // MUL I=A,B,0
+ // SUB R,C,I
+ // ==> MSUB R,A,B,C (computes C - A*B)
+ // --- Create(MSUB);
+ if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP2) {
+ Opc = AArch64::MSUBWrrr;
+ RC = &AArch64::GPR32RegClass;
+ } else {
+ Opc = AArch64::MSUBXrrr;
+ RC = &AArch64::GPR64RegClass;
+ }
+ MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MC_MULSUBWI_OP1:
+ case MachineCombinerPattern::MC_MULSUBXI_OP1: {
+ // MUL I=A,B,0
+ // SUB R,I, Imm
+ // ==> ORR V, ZR, -Imm
+ // ==> MADD R,A,B,V // = -Imm + A*B
+ // --- Create(MADD);
+ const TargetRegisterClass *OrrRC;
+ unsigned BitSize, OrrOpc, ZeroReg;
+ if (Pattern == MachineCombinerPattern::MC_MULSUBWI_OP1) {
+ OrrOpc = AArch64::ORRWri;
+ OrrRC = &AArch64::GPR32spRegClass;
+ BitSize = 32;
+ ZeroReg = AArch64::WZR;
+ Opc = AArch64::MADDWrrr;
+ RC = &AArch64::GPR32RegClass;
+ } else {
+ OrrOpc = AArch64::ORRXri;
+ OrrRC = &AArch64::GPR64spRegClass;
+ BitSize = 64;
+ ZeroReg = AArch64::XZR;
+ Opc = AArch64::MADDXrrr;
+ RC = &AArch64::GPR64RegClass;
+ }
+ unsigned NewVR = MRI.createVirtualRegister(OrrRC);
+ int Imm = Root.getOperand(2).getImm();
+ if (Root.getOperand(3).isImm()) {
+ unsigned Val = Root.getOperand(3).getImm();
+ Imm = Imm << Val;
+ }
+ uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize);
+ uint64_t Encoding;
+ if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
+ .addReg(ZeroReg)
+ .addImm(Encoding);
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
+ }
+ break;
+ }
+ } // end switch (Pattern)
+ // Record MUL and ADD/SUB for deletion
+ DelInstrs.push_back(MUL);
+ DelInstrs.push_back(&Root);
+
+ return;
+}
+
+/// \brief Replace csincr-branch sequence by simple conditional branch
+///
+/// Examples:
+/// 1.
+/// csinc w9, wzr, wzr, <condition code>
+/// tbnz w9, #0, 0x44
+/// to
+/// b.<inverted condition code>
+///
+/// 2.
+/// csinc w9, wzr, wzr, <condition code>
+/// tbz w9, #0, 0x44
+/// to
+/// b.<condition code>
+///
+/// \param MI Conditional Branch
+/// \return True when the simple conditional branch is generated
+///
+bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const {
+ bool IsNegativeBranch = false;
+ bool IsTestAndBranch = false;
+ unsigned TargetBBInMI = 0;
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Unknown branch instruction?");
+ case AArch64::Bcc:
+ return false;
+ case AArch64::CBZW:
+ case AArch64::CBZX:
+ TargetBBInMI = 1;
+ break;
+ case AArch64::CBNZW:
+ case AArch64::CBNZX:
+ TargetBBInMI = 1;
+ IsNegativeBranch = true;
+ break;
+ case AArch64::TBZW:
+ case AArch64::TBZX:
+ TargetBBInMI = 2;
+ IsTestAndBranch = true;
+ break;
+ case AArch64::TBNZW:
+ case AArch64::TBNZX:
+ TargetBBInMI = 2;
+ IsNegativeBranch = true;
+ IsTestAndBranch = true;
+ break;
+ }
+ // So we increment a zero register and test for bits other
+ // than bit 0? Conservatively bail out in case the verifier
+ // missed this case.
+ if (IsTestAndBranch && MI->getOperand(1).getImm())
+ return false;
+
+ // Find Definition.
+ assert(MI->getParent() && "Incomplete machine instruciton\n");
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ unsigned VReg = MI->getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(VReg))
+ return false;
+
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+
+ // Look for CSINC
+ if (!(DefMI->getOpcode() == AArch64::CSINCWr &&
+ DefMI->getOperand(1).getReg() == AArch64::WZR &&
+ DefMI->getOperand(2).getReg() == AArch64::WZR) &&
+ !(DefMI->getOpcode() == AArch64::CSINCXr &&
+ DefMI->getOperand(1).getReg() == AArch64::XZR &&
+ DefMI->getOperand(2).getReg() == AArch64::XZR))
+ return false;
+
+ if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
+ return false;
+
+ AArch64CC::CondCode CC =
+ (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
+ bool CheckOnlyCCWrites = true;
+ // Convert only when the condition code is not modified between
+ // the CSINC and the branch. The CC may be used by other
+ // instructions in between.
+ if (modifiesConditionCode(DefMI, MI, CheckOnlyCCWrites, &getRegisterInfo()))
+ return false;
+ MachineBasicBlock &RefToMBB = *MBB;
+ MachineBasicBlock *TBB = MI->getOperand(TargetBBInMI).getMBB();
+ DebugLoc DL = MI->getDebugLoc();
+ if (IsNegativeBranch)
+ CC = AArch64CC::getInvertedCondCode(CC);
+ BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
+ MI->eraseFromParent();
+ return true;
+}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index f70b82b7c2da..d8f127498e54 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -11,11 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_AArch64INSTRINFO_H
-#define LLVM_TARGET_AArch64INSTRINFO_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
+#include "llvm/CodeGen/MachineCombinerPattern.h"
#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
@@ -46,9 +47,15 @@ public:
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+ bool isAsCheapAsAMove(const MachineInstr *MI) const override;
+
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
unsigned &DstReg, unsigned &SubIdx) const override;
+ bool
+ areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb,
+ AliasAnalysis *AA = nullptr) const override;
+
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const override;
unsigned isStoreToStackSlot(const MachineInstr *MI,
@@ -87,6 +94,10 @@ public:
unsigned &Offset,
const TargetRegisterInfo *TRI) const override;
+ bool getLdStBaseRegImmOfsWidth(MachineInstr *LdSt, unsigned &BaseReg,
+ int &Offset, int &Width,
+ const TargetRegisterInfo *TRI) const;
+
bool enableClusterLoads() const override { return true; }
bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt,
@@ -96,8 +107,8 @@ public:
MachineInstr *Second) const override;
MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
- uint64_t Offset, const MDNode *MDPtr,
- DebugLoc DL) const;
+ uint64_t Offset, const MDNode *Var,
+ const MDNode *Expr, DebugLoc DL) const;
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc, unsigned Opcode,
@@ -117,6 +128,7 @@ public:
int FrameIndex, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
+ using TargetInstrInfo::foldMemoryOperandImpl;
MachineInstr *
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
@@ -153,7 +165,27 @@ public:
bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
unsigned SrcReg2, int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const override;
-
+ bool optimizeCondBranch(MachineInstr *MI) const override;
+ /// hasPattern - return true when there is potentially a faster code sequence
+ /// for an instruction chain ending in <Root>. All potential patterns are
+ /// listed
+ /// in the <Pattern> array.
+ bool hasPattern(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern)
+ const override;
+
+ /// genAlternativeCodeSequence - when hasPattern() finds a pattern
+ /// this function generates the instructions that could replace the
+ /// original code sequence
+ void genAlternativeCodeSequence(
+ MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
+ /// useMachineCombiner - AArch64 supports MachineCombiner
+ bool useMachineCombiner() const override;
+
+ bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
private:
void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL,
MachineBasicBlock *TBB,
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 0ba069e99a86..e0fb90a9f621 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -24,6 +24,7 @@ def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<"FeatureCRC", "crc">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
+def IsCyclone : Predicate<"Subtarget->isCyclone()">;
//===----------------------------------------------------------------------===//
// AArch64-specific DAG Nodes.
@@ -236,6 +237,12 @@ def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL",
def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
SDT_AArch64WrapperLarge>;
+def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;
+
+def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>;
+def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;
//===----------------------------------------------------------------------===//
@@ -474,6 +481,24 @@ def trunc_imm : SDNodeXForm<imm, [{
def : Pat<(i64 i64imm_32bit:$src),
(SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
+// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
+def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
+return CurDAG->getTargetConstant(
+ N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i32);
+}]>;
+
+def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
+return CurDAG->getTargetConstant(
+ N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i64);
+}]>;
+
+
+def : Pat<(f32 fpimm:$in),
+ (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>;
+def : Pat<(f64 fpimm:$in),
+ (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>;
+
+
// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
// sequences.
def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
@@ -632,6 +657,10 @@ def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))),
(MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
(MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
+def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)),
+ (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
+def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)),
+ (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
} // AddedComplexity = 7
let AddedComplexity = 5 in {
@@ -782,7 +811,7 @@ def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
//===----------------------------------------------------------------------===//
// Bitfield immediate extraction instruction.
//===----------------------------------------------------------------------===//
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
defm EXTR : ExtractImm<"extr">;
def : InstAlias<"ror $dst, $src, $shift",
(EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
@@ -797,7 +826,7 @@ def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)),
//===----------------------------------------------------------------------===//
// Other bitfield immediate instructions.
//===----------------------------------------------------------------------===//
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">;
defm SBFM : BitfieldImm<0b00, "sbfm">;
defm UBFM : BitfieldImm<0b10, "ubfm">;
@@ -970,9 +999,9 @@ def : InstAlias<"cneg $dst, $src, $cc",
// PC-relative instructions.
//===----------------------------------------------------------------------===//
let isReMaterializable = 1 in {
-let neverHasSideEffects = 1, mayStore = 0, mayLoad = 0 in {
+let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
def ADR : ADRI<0, "adr", adrlabel, []>;
-} // neverHasSideEffects = 1
+} // hasSideEffects = 0
def ADRP : ADRI<1, "adrp", adrplabel,
[(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>;
@@ -1173,6 +1202,9 @@ defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>;
defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>;
defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>;
+defm : ScalToVecROLoadPat<ro16, load, i32, v4f16, LDRHroW, LDRHroX, hsub>;
+defm : ScalToVecROLoadPat<ro16, load, i32, v8f16, LDRHroW, LDRHroX, hsub>;
+
defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>;
defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>;
@@ -1213,6 +1245,7 @@ let Predicates = [IsLE] in {
defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>;
+ defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>;
}
defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>;
@@ -1226,6 +1259,7 @@ let Predicates = [IsLE] in {
defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>;
+ defm : VecROLoadPat<ro128, v8f16, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>;
}
} // AddedComplexity = 10
@@ -1355,6 +1389,8 @@ let Predicates = [IsLE] in {
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
}
def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
@@ -1376,6 +1412,8 @@ let Predicates = [IsLE] in {
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+ (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
}
def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
@@ -1512,6 +1550,8 @@ let Predicates = [IsLE] in {
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
+ (LDURDi GPR64sp:$Rn, simm9:$offset)>;
}
def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
@@ -1532,6 +1572,8 @@ let Predicates = [IsLE] in {
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
+ (LDURQi GPR64sp:$Rn, simm9:$offset)>;
}
// anyext -> zext
@@ -1828,6 +1870,7 @@ let Predicates = [IsLE] in {
defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>;
+ defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>;
}
defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
@@ -1842,9 +1885,37 @@ let Predicates = [IsLE] in {
defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>;
+ defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>;
}
} // AddedComplexity = 10
+// Match stores from lane 0 to the appropriate subreg's store.
+multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
+ ValueType VecTy, ValueType STy,
+ SubRegIndex SubRegIdx,
+ Instruction STRW, Instruction STRX> {
+
+ def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
+ (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
+ (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
+ GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
+
+ def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
+ (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
+ (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
+ GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
+}
+
+let AddedComplexity = 19 in {
+ defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>;
+ defm : VecROStoreLane0Pat<ro16, store , v8i16, i16, hsub, STRHroW, STRHroX>;
+ defm : VecROStoreLane0Pat<ro32, truncstorei32, v4i32, i32, ssub, STRSroW, STRSroX>;
+ defm : VecROStoreLane0Pat<ro32, store , v4i32, i32, ssub, STRSroW, STRSroX>;
+ defm : VecROStoreLane0Pat<ro32, store , v4f32, f32, ssub, STRSroW, STRSroX>;
+ defm : VecROStoreLane0Pat<ro64, store , v2i64, i64, dsub, STRDroW, STRDroX>;
+ defm : VecROStoreLane0Pat<ro64, store , v2f64, f64, dsub, STRDroW, STRDroX>;
+}
+
//---
// (unsigned immediate)
defm STRX : StoreUI<0b11, 0, 0b00, GPR64, uimm12s8, "str",
@@ -1892,6 +1963,9 @@ let Predicates = [IsLE] in {
def : Pat<(store (v2i32 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(store (v4f16 FPR64:$Rt),
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
+ (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
}
def : Pat<(store (v1f64 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
@@ -1921,6 +1995,9 @@ let Predicates = [IsLE] in {
def : Pat<(store (v2i64 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(store (v8f16 FPR128:$Rt),
+ (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+ (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
}
def : Pat<(store (f128 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
@@ -1983,6 +2060,9 @@ let Predicates = [IsLE] in {
def : Pat<(store (v2i32 FPR64:$Rt),
(am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
(STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v4f16 FPR64:$Rt),
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
}
def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
(STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
@@ -2013,6 +2093,9 @@ let Predicates = [IsLE] in {
def : Pat<(store (v2f64 FPR128:$Rt),
(am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
(STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v8f16 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
}
// unscaled i64 truncating stores
@@ -2089,6 +2172,8 @@ def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
@@ -2102,6 +2187,8 @@ def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
//---
// (immediate post-indexed)
@@ -2139,6 +2226,8 @@ def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
@@ -2152,6 +2241,8 @@ def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
//===----------------------------------------------------------------------===//
// Load/store exclusive instructions.
@@ -2384,6 +2475,28 @@ defm FMOV : FPMoveImmediate<"fmov">;
//===----------------------------------------------------------------------===//
defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_aarch64_neon_abs>;
+def : Pat<(xor (v8i8 (AArch64vashr V64:$src, (i32 7))),
+ (v8i8 (add V64:$src, (AArch64vashr V64:$src, (i32 7))))),
+ (ABSv8i8 V64:$src)>;
+def : Pat<(xor (v4i16 (AArch64vashr V64:$src, (i32 15))),
+ (v4i16 (add V64:$src, (AArch64vashr V64:$src, (i32 15))))),
+ (ABSv4i16 V64:$src)>;
+def : Pat<(xor (v2i32 (AArch64vashr V64:$src, (i32 31))),
+ (v2i32 (add V64:$src, (AArch64vashr V64:$src, (i32 31))))),
+ (ABSv2i32 V64:$src)>;
+def : Pat<(xor (v16i8 (AArch64vashr V128:$src, (i32 7))),
+ (v16i8 (add V128:$src, (AArch64vashr V128:$src, (i32 7))))),
+ (ABSv16i8 V128:$src)>;
+def : Pat<(xor (v8i16 (AArch64vashr V128:$src, (i32 15))),
+ (v8i16 (add V128:$src, (AArch64vashr V128:$src, (i32 15))))),
+ (ABSv8i16 V128:$src)>;
+def : Pat<(xor (v4i32 (AArch64vashr V128:$src, (i32 31))),
+ (v4i32 (add V128:$src, (AArch64vashr V128:$src, (i32 31))))),
+ (ABSv4i32 V128:$src)>;
+def : Pat<(xor (v2i64 (AArch64vashr V128:$src, (i32 63))),
+ (v2i64 (add V128:$src, (AArch64vashr V128:$src, (i32 63))))),
+ (ABSv2i64 V128:$src)>;
+
defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
@@ -2412,6 +2525,11 @@ def : Pat<(v2f64 (fextend (v2f32 (extract_subvector (v4f32 V128:$Rn),
(i64 2))))),
(FCVTLv4i32 V128:$Rn)>;
+def : Pat<(v4f32 (fextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>;
+def : Pat<(v4f32 (fextend (v4f16 (extract_subvector (v8f16 V128:$Rn),
+ (i64 4))))),
+ (FCVTLv8i16 V128:$Rn)>;
+
defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>;
@@ -2423,6 +2541,7 @@ def : Pat<(concat_vectors V64:$Rd,
(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
(FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
def : Pat<(v2f32 (fround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>;
+def : Pat<(v4f16 (fround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>;
def : Pat<(concat_vectors V64:$Rd, (v2f32 (fround (v2f64 V128:$Rn)))),
(FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
@@ -2505,6 +2624,10 @@ defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>
defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
+def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>;
+def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>;
+def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
+def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>;
def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
@@ -3101,6 +3224,46 @@ defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw",
BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>;
+// Additional patterns for SMULL and UMULL
+multiclass Neon_mul_widen_patterns<SDPatternOperator opnode,
+ Instruction INST8B, Instruction INST4H, Instruction INST2S> {
+ def : Pat<(v8i16 (opnode (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
+ (INST8B V64:$Rn, V64:$Rm)>;
+ def : Pat<(v4i32 (opnode (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
+ (INST4H V64:$Rn, V64:$Rm)>;
+ def : Pat<(v2i64 (opnode (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
+ (INST2S V64:$Rn, V64:$Rm)>;
+}
+
+defm : Neon_mul_widen_patterns<AArch64smull, SMULLv8i8_v8i16,
+ SMULLv4i16_v4i32, SMULLv2i32_v2i64>;
+defm : Neon_mul_widen_patterns<AArch64umull, UMULLv8i8_v8i16,
+ UMULLv4i16_v4i32, UMULLv2i32_v2i64>;
+
+// Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL
+multiclass Neon_mulacc_widen_patterns<SDPatternOperator opnode,
+ Instruction INST8B, Instruction INST4H, Instruction INST2S> {
+ def : Pat<(v8i16 (opnode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
+ (INST8B V128:$Rd, V64:$Rn, V64:$Rm)>;
+ def : Pat<(v4i32 (opnode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
+ (INST4H V128:$Rd, V64:$Rn, V64:$Rm)>;
+ def : Pat<(v2i64 (opnode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
+ (INST2S V128:$Rd, V64:$Rn, V64:$Rm)>;
+}
+
+defm : Neon_mulacc_widen_patterns<
+ TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>,
+ SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
+defm : Neon_mulacc_widen_patterns<
+ TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>,
+ UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
+defm : Neon_mulacc_widen_patterns<
+ TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>,
+ SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
+defm : Neon_mulacc_widen_patterns<
+ TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>,
+ UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
+
// Patterns for 64-bit pmull
def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm),
(PMULLv1i64 V64:$Rn, V64:$Rm)>;
@@ -3183,6 +3346,10 @@ def : Pat<(v2i64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
(EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
def : Pat<(v2f64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
(EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
+def : Pat<(v4f16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
+ (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
+def : Pat<(v8f16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+ (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
// We use EXT to handle extract_subvector to copy the upper 64-bits of a
// 128-bit vector.
@@ -3194,6 +3361,8 @@ def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 2))),
(EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 1))),
(EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
+def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 4))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 2))),
(EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))),
@@ -3306,6 +3475,19 @@ def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))),
(v2f64 (DUPv2i64lane
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
(i64 0)))>;
+def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))),
+ (v4f16 (DUPv4i16lane
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
+ (i64 0)))>;
+def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))),
+ (v8f16 (DUPv8i16lane
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
+ (i64 0)))>;
+
+def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
+ (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
+def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
+ (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
(DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
@@ -3427,6 +3609,23 @@ def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
+def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
+ (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
+ (EXTRACT_SUBREG
+ (INSvi16lane
+ (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
+ VectorIndexS:$imm,
+ (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
+ (i64 0)),
+ dsub)>;
+
+def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
+ (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
+ (INSvi16lane
+ V128:$Rn, VectorIndexH:$imm,
+ (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
+ (i64 0))>;
+
def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
(f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
(EXTRACT_SUBREG
@@ -3507,6 +3706,7 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
dsub)>;
}
+defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>;
@@ -3522,6 +3722,8 @@ def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
(f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
(f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
+def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
+ (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
(f64 (EXTRACT_SUBREG
(INSvi64lane (v2f64 (IMPLICIT_DEF)), 0,
@@ -3532,6 +3734,11 @@ def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
(INSvi32lane (v4f32 (IMPLICIT_DEF)), 0,
V128:$Rn, VectorIndexS:$idx),
ssub))>;
+def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
+ (f16 (EXTRACT_SUBREG
+ (INSvi16lane (v8f16 (IMPLICIT_DEF)), 0,
+ V128:$Rn, VectorIndexH:$idx),
+ hsub))>;
// All concat_vectors operations are canonicalised to act on i64 vectors for
// AArch64. In the general case we need an instruction, which had just as well be
@@ -3546,6 +3753,7 @@ def : ConcatPat<v2f64, v1f64>;
def : ConcatPat<v4i32, v2i32>;
def : ConcatPat<v4f32, v2f32>;
def : ConcatPat<v8i16, v4i16>;
+def : ConcatPat<v8f16, v4f16>;
def : ConcatPat<v16i8, v8i8>;
// If the high lanes are undef, though, we can just ignore them:
@@ -3965,7 +4173,7 @@ def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
// AdvSIMD indexed element
//----------------------------------------------------------------------------
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
defm FMLA : SIMDFPIndexedSDTied<0, 0b0001, "fmla">;
defm FMLS : SIMDFPIndexedSDTied<0, 0b0101, "fmls">;
}
@@ -4386,7 +4594,7 @@ class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
0),
dsub)),
0),
- ssub)))>, Requires<[NotForCodeSize]>;
+ ssub)))>, Requires<[NotForCodeSize, IsCyclone]>;
def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
(LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
@@ -4439,8 +4647,8 @@ class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
0),
dsub)),
0),
- dsub)))>, Requires<[NotForCodeSize]>;
-
+ dsub)))>, Requires<[NotForCodeSize, IsCyclone]>;
+
def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
(LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
@@ -4519,7 +4727,7 @@ defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>;
defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>;
defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
-let mayLoad = 1, neverHasSideEffects = 1 in {
+let mayLoad = 1, hasSideEffects = 0 in {
defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>;
defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>;
defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>;
@@ -4563,6 +4771,10 @@ def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
(LD1Rv2d GPR64sp:$Rn)>;
def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
(LD1Rv1d GPR64sp:$Rn)>;
+def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
+ (LD1Rv4h GPR64sp:$Rn)>;
+def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
+ (LD1Rv8h GPR64sp:$Rn)>;
class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction LD1>
@@ -4576,6 +4788,7 @@ def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>;
def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>;
def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>;
def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>;
+def : Ld1Lane128Pat<load, VectorIndexH, v8f16, f16, LD1i16>;
class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction LD1>
@@ -4590,6 +4803,7 @@ def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>;
def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>;
def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>;
+def : Ld1Lane64Pat<load, VectorIndexH, v4f16, f16, LD1i16>;
defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
@@ -4603,7 +4817,7 @@ defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>;
defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
-let AddedComplexity = 15 in
+let AddedComplexity = 19 in
class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1>
: Pat<(scalar_store
@@ -4617,8 +4831,9 @@ def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>;
def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>;
def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>;
def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>;
+def : St1Lane128Pat<store, VectorIndexH, v8f16, f16, ST1i16>;
-let AddedComplexity = 15 in
+let AddedComplexity = 19 in
class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1>
: Pat<(scalar_store
@@ -4631,6 +4846,7 @@ def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>;
def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>;
def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>;
+def : St1Lane64Pat<store, VectorIndexH, v4f16, f16, ST1i16>;
multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1,
@@ -4655,6 +4871,7 @@ defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>;
defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>;
defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>;
defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>;
+defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>;
multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1,
@@ -4678,8 +4895,9 @@ defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
+defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>;
-let mayStore = 1, neverHasSideEffects = 1 in {
+let mayStore = 1, hasSideEffects = 0 in {
defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>;
defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>;
defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>;
@@ -4856,10 +5074,77 @@ def : Pat<(trap), (BRK 1)>;
// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
//
+// Natural vector casts (64 bit)
+def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1f64 (AArch64NvCast (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
+
+def : Pat<(v8i8 (AArch64NvCast (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+// Natural vector casts (128 bit)
+def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v4f32 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>;
+
let Predicates = [IsLE] in {
def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))),
@@ -4868,6 +5153,8 @@ def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
+ (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
@@ -4880,6 +5167,8 @@ def : Pat<(v4i16 (bitconvert GPR64:$Xn)),
(REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
def : Pat<(v2i32 (bitconvert GPR64:$Xn)),
(REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
+def : Pat<(v4f16 (bitconvert GPR64:$Xn)),
+ (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
def : Pat<(v2f32 (bitconvert GPR64:$Xn)),
(REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
@@ -4889,6 +5178,8 @@ def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
(REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
(REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
+def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
+ (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
(REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
}
@@ -4917,6 +5208,7 @@ let Predicates = [IsLE] in {
def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
}
let Predicates = [IsBE] in {
@@ -4926,6 +5218,8 @@ def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))),
(v1i64 (REV64v4i16 FPR64:$src))>;
def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))),
(v1i64 (REV64v8i8 FPR64:$src))>;
+def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))),
+ (v1i64 (REV64v4i16 FPR64:$src))>;
def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))),
(v1i64 (REV64v2i32 FPR64:$src))>;
}
@@ -4938,6 +5232,7 @@ def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))),
@@ -4950,6 +5245,8 @@ def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))),
(v2i32 (REV64v2i32 FPR64:$src))>;
def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))),
(v2i32 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))),
+ (v2i32 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
@@ -4958,6 +5255,7 @@ def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>;
}
@@ -4970,6 +5268,8 @@ def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))),
(v4i16 (REV16v8i8 FPR64:$src))>;
def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))),
(v4i16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))),
+ (v4i16 (REV32v4i16 FPR64:$src))>;
def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))),
(v4i16 (REV32v4i16 FPR64:$src))>;
def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
@@ -4977,12 +5277,41 @@ def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
}
let Predicates = [IsLE] in {
+def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))),
+ (v4f16 (REV16v8i8 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+}
+
+
+
+let Predicates = [IsLE] in {
def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))),
@@ -4997,6 +5326,8 @@ def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))),
(v8i8 (REV32v8i8 FPR64:$src))>;
def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))),
(v8i8 (REV64v8i8 FPR64:$src))>;
+def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))),
+ (v8i8 (REV16v8i8 FPR64:$src))>;
}
let Predicates = [IsLE] in {
@@ -5004,6 +5335,7 @@ def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>;
+def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))),
@@ -5014,6 +5346,8 @@ def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))),
(f64 (REV64v2i32 FPR64:$src))>;
def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))),
(f64 (REV64v8i8 FPR64:$src))>;
+def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))),
+ (f64 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
@@ -5023,6 +5357,7 @@ def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
+def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))),
@@ -5033,6 +5368,8 @@ def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))),
(v1f64 (REV64v8i8 FPR64:$src))>;
def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))),
(v1f64 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))),
+ (v1f64 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
@@ -5043,6 +5380,7 @@ def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))),
@@ -5055,6 +5393,8 @@ def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))),
(v2f32 (REV64v2i32 FPR64:$src))>;
def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))),
(v2f32 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))),
+ (v2f32 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
@@ -5064,6 +5404,7 @@ def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
+def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>;
}
let Predicates = [IsBE] in {
@@ -5075,6 +5416,9 @@ def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))),
def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))),
(f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
(REV64v8i16 FPR128:$src), (i32 8)))>;
+def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))),
+ (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
+ (REV64v8i16 FPR128:$src), (i32 8)))>;
def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))),
(f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))),
@@ -5089,6 +5433,7 @@ let Predicates = [IsLE] in {
def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
}
@@ -5100,6 +5445,8 @@ def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))),
(v2f64 (REV64v4i32 FPR128:$src))>;
def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))),
(v2f64 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))),
+ (v2f64 (REV64v8i16 FPR128:$src))>;
def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))),
(v2f64 (REV64v16i8 FPR128:$src))>;
def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))),
@@ -5110,6 +5457,7 @@ def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
@@ -5120,6 +5468,8 @@ def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))),
(REV64v4i32 FPR128:$src), (i32 8)))>;
def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))),
(v4f32 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))),
+ (v4f32 (REV32v8i16 FPR128:$src))>;
def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))),
(v4f32 (REV32v16i8 FPR128:$src))>;
def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))),
@@ -5135,6 +5485,7 @@ def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))),
@@ -5148,6 +5499,8 @@ def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))),
(v2i64 (REV64v16i8 FPR128:$src))>;
def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))),
(v2i64 (REV64v4i32 FPR128:$src))>;
+def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))),
+ (v2i64 (REV64v8i16 FPR128:$src))>;
}
def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
@@ -5157,6 +5510,7 @@ def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))),
@@ -5171,6 +5525,8 @@ def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))),
(v4i32 (REV32v16i8 FPR128:$src))>;
def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))),
(v4i32 (REV64v4i32 FPR128:$src))>;
+def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))),
+ (v4i32 (REV32v8i16 FPR128:$src))>;
}
def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
@@ -5181,6 +5537,7 @@ def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))),
@@ -5197,6 +5554,36 @@ def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))),
(v8i16 (REV64v8i16 FPR128:$src))>;
def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))),
(v8i16 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))),
+ (v8i16 (REV32v8i16 FPR128:$src))>;
+}
+
+let Predicates = [IsLE] in {
+def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))),
+ (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src),
+ (REV64v8i16 FPR128:$src),
+ (i32 8)))>;
+def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))),
+ (v8f16 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))),
+ (v8f16 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))),
+ (v8f16 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))),
+ (v8f16 (REV16v16i8 FPR128:$src))>;
+def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))),
+ (v8f16 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))),
+ (v8f16 (REV32v8i16 FPR128:$src))>;
}
let Predicates = [IsLE] in {
@@ -5206,6 +5593,7 @@ def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))),
@@ -5222,6 +5610,8 @@ def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))),
(v16i8 (REV64v16i8 FPR128:$src))>;
def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))),
(v16i8 (REV32v16i8 FPR128:$src))>;
+def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))),
+ (v16i8 (REV16v16i8 FPR128:$src))>;
}
def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
@@ -5245,6 +5635,8 @@ def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (i32 0)),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (i32 0)),
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (i32 0)),
+ (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (i32 0)),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 3df9c4f23aa1..81579810b648 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -13,20 +13,21 @@
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-ldst-opt"
@@ -108,7 +109,7 @@ private:
int getMemSize(MachineInstr *MemMI);
};
char AArch64LoadStoreOpt::ID = 0;
-}
+} // namespace
static bool isUnscaledLdst(unsigned Opc) {
switch (Opc) {
@@ -931,8 +932,9 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
const TargetMachine &TM = Fn.getTarget();
- TII = static_cast<const AArch64InstrInfo *>(TM.getInstrInfo());
- TRI = TM.getRegisterInfo();
+ TII = static_cast<const AArch64InstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
+ TRI = TM.getSubtargetImpl()->getRegisterInfo();
bool Modified = false;
for (auto &MBB : Fn)
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index 75a17b9dc999..e57b0f4dbb09 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -25,8 +25,7 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
-AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, Mangler &mang,
- AsmPrinter &printer)
+AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, AsmPrinter &printer)
: Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {}
MCSymbol *
diff --git a/lib/Target/AArch64/AArch64MCInstLower.h b/lib/Target/AArch64/AArch64MCInstLower.h
index ba50ba9e2fe5..1e29b80c2d62 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.h
+++ b/lib/Target/AArch64/AArch64MCInstLower.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64_MCINSTLOWER_H
-#define AArch64_MCINSTLOWER_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MCINSTLOWER_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64MCINSTLOWER_H
#include "llvm/ADT/Triple.h"
#include "llvm/Support/Compiler.h"
@@ -33,7 +33,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64MCInstLower {
Triple TargetTriple;
public:
- AArch64MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer);
+ AArch64MCInstLower(MCContext &ctx, AsmPrinter &printer);
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
diff --git a/lib/Target/AArch64/AArch64MachineCombinerPattern.h b/lib/Target/AArch64/AArch64MachineCombinerPattern.h
new file mode 100644
index 000000000000..4164b3364559
--- /dev/null
+++ b/lib/Target/AArch64/AArch64MachineCombinerPattern.h
@@ -0,0 +1,42 @@
+//===- AArch64MachineCombinerPattern.h -===//
+//===- AArch64 instruction pattern supported by combiner -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines instruction pattern supported by combiner
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINECOMBINERPATTERN_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINECOMBINERPATTERN_H
+
+namespace llvm {
+
+/// Enumeration of instruction pattern supported by machine combiner
+///
+///
+namespace MachineCombinerPattern {
+enum MC_PATTERN : int {
+ MC_NONE = 0,
+ MC_MULADDW_OP1 = 1,
+ MC_MULADDW_OP2 = 2,
+ MC_MULSUBW_OP1 = 3,
+ MC_MULSUBW_OP2 = 4,
+ MC_MULADDWI_OP1 = 5,
+ MC_MULSUBWI_OP1 = 6,
+ MC_MULADDX_OP1 = 7,
+ MC_MULADDX_OP2 = 8,
+ MC_MULSUBX_OP1 = 9,
+ MC_MULSUBX_OP2 = 10,
+ MC_MULADDXI_OP1 = 11,
+ MC_MULSUBXI_OP1 = 12
+};
+} // end namespace MachineCombinerPattern
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 7c257ba9116f..536a8d0f97a0 100644
--- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64MACHINEFUNCTIONINFO_H
-#define AArch64MACHINEFUNCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -160,4 +160,4 @@ private:
};
} // End llvm namespace
-#endif // AArch64MACHINEFUNCTIONINFO_H
+#endif
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
new file mode 100644
index 000000000000..f942c4e98a6f
--- /dev/null
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
@@ -0,0 +1,383 @@
+//===-- AArch64PBQPRegAlloc.cpp - AArch64 specific PBQP constraints -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file contains the AArch64 / Cortex-A57 specific register allocation
+// constraints for use by the PBQP register allocator.
+//
+// It is essentially a transcription of what is contained in
+// AArch64A57FPLoadBalancing, which tries to use a balanced
+// mix of odd and even D-registers when performing a critical sequence of
+// independent, non-quadword FP/ASIMD floating-point multiply-accumulates.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64-pbqp"
+
+#include "AArch64.h"
+#include "AArch64PBQPRegAlloc.h"
+#include "AArch64RegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocPBQP.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+#ifndef NDEBUG
+bool isFPReg(unsigned reg) {
+ return AArch64::FPR32RegClass.contains(reg) ||
+ AArch64::FPR64RegClass.contains(reg) ||
+ AArch64::FPR128RegClass.contains(reg);
+}
+#endif
+
+bool isOdd(unsigned reg) {
+ switch (reg) {
+ default:
+ llvm_unreachable("Register is not from the expected class !");
+ case AArch64::S1:
+ case AArch64::S3:
+ case AArch64::S5:
+ case AArch64::S7:
+ case AArch64::S9:
+ case AArch64::S11:
+ case AArch64::S13:
+ case AArch64::S15:
+ case AArch64::S17:
+ case AArch64::S19:
+ case AArch64::S21:
+ case AArch64::S23:
+ case AArch64::S25:
+ case AArch64::S27:
+ case AArch64::S29:
+ case AArch64::S31:
+ case AArch64::D1:
+ case AArch64::D3:
+ case AArch64::D5:
+ case AArch64::D7:
+ case AArch64::D9:
+ case AArch64::D11:
+ case AArch64::D13:
+ case AArch64::D15:
+ case AArch64::D17:
+ case AArch64::D19:
+ case AArch64::D21:
+ case AArch64::D23:
+ case AArch64::D25:
+ case AArch64::D27:
+ case AArch64::D29:
+ case AArch64::D31:
+ case AArch64::Q1:
+ case AArch64::Q3:
+ case AArch64::Q5:
+ case AArch64::Q7:
+ case AArch64::Q9:
+ case AArch64::Q11:
+ case AArch64::Q13:
+ case AArch64::Q15:
+ case AArch64::Q17:
+ case AArch64::Q19:
+ case AArch64::Q21:
+ case AArch64::Q23:
+ case AArch64::Q25:
+ case AArch64::Q27:
+ case AArch64::Q29:
+ case AArch64::Q31:
+ return true;
+ case AArch64::S0:
+ case AArch64::S2:
+ case AArch64::S4:
+ case AArch64::S6:
+ case AArch64::S8:
+ case AArch64::S10:
+ case AArch64::S12:
+ case AArch64::S14:
+ case AArch64::S16:
+ case AArch64::S18:
+ case AArch64::S20:
+ case AArch64::S22:
+ case AArch64::S24:
+ case AArch64::S26:
+ case AArch64::S28:
+ case AArch64::S30:
+ case AArch64::D0:
+ case AArch64::D2:
+ case AArch64::D4:
+ case AArch64::D6:
+ case AArch64::D8:
+ case AArch64::D10:
+ case AArch64::D12:
+ case AArch64::D14:
+ case AArch64::D16:
+ case AArch64::D18:
+ case AArch64::D20:
+ case AArch64::D22:
+ case AArch64::D24:
+ case AArch64::D26:
+ case AArch64::D28:
+ case AArch64::D30:
+ case AArch64::Q0:
+ case AArch64::Q2:
+ case AArch64::Q4:
+ case AArch64::Q6:
+ case AArch64::Q8:
+ case AArch64::Q10:
+ case AArch64::Q12:
+ case AArch64::Q14:
+ case AArch64::Q16:
+ case AArch64::Q18:
+ case AArch64::Q20:
+ case AArch64::Q22:
+ case AArch64::Q24:
+ case AArch64::Q26:
+ case AArch64::Q28:
+ case AArch64::Q30:
+ return false;
+
+ }
+}
+
+bool haveSameParity(unsigned reg1, unsigned reg2) {
+ assert(isFPReg(reg1) && "Expecting an FP register for reg1");
+ assert(isFPReg(reg2) && "Expecting an FP register for reg2");
+
+ return isOdd(reg1) == isOdd(reg2);
+}
+
+}
+
+bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
+ unsigned Ra) {
+ if (Rd == Ra)
+ return false;
+
+ LiveIntervals &LIs = G.getMetadata().LIS;
+
+ if (TRI->isPhysicalRegister(Rd) || TRI->isPhysicalRegister(Ra)) {
+ DEBUG(dbgs() << "Rd is a physical reg:" << TRI->isPhysicalRegister(Rd)
+ << '\n');
+ DEBUG(dbgs() << "Ra is a physical reg:" << TRI->isPhysicalRegister(Ra)
+ << '\n');
+ return false;
+ }
+
+ PBQPRAGraph::NodeId node1 = G.getMetadata().getNodeIdForVReg(Rd);
+ PBQPRAGraph::NodeId node2 = G.getMetadata().getNodeIdForVReg(Ra);
+
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRdAllowed =
+ &G.getNodeMetadata(node1).getAllowedRegs();
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRaAllowed =
+ &G.getNodeMetadata(node2).getAllowedRegs();
+
+ PBQPRAGraph::EdgeId edge = G.findEdge(node1, node2);
+
+ // The edge does not exist. Create one with the appropriate interference
+ // costs.
+ if (edge == G.invalidEdgeId()) {
+ const LiveInterval &ld = LIs.getInterval(Rd);
+ const LiveInterval &la = LIs.getInterval(Ra);
+ bool livesOverlap = ld.overlaps(la);
+
+ PBQPRAGraph::RawMatrix costs(vRdAllowed->size() + 1,
+ vRaAllowed->size() + 1, 0);
+ for (unsigned i = 0, ie = vRdAllowed->size(); i != ie; ++i) {
+ unsigned pRd = (*vRdAllowed)[i];
+ for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) {
+ unsigned pRa = (*vRaAllowed)[j];
+ if (livesOverlap && TRI->regsOverlap(pRd, pRa))
+ costs[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+ else
+ costs[i + 1][j + 1] = haveSameParity(pRd, pRa) ? 0.0 : 1.0;
+ }
+ }
+ G.addEdge(node1, node2, std::move(costs));
+ return true;
+ }
+
+ if (G.getEdgeNode1Id(edge) == node2) {
+ std::swap(node1, node2);
+ std::swap(vRdAllowed, vRaAllowed);
+ }
+
+ // Enforce minCost(sameParity(RaClass)) > maxCost(otherParity(RdClass))
+ PBQPRAGraph::RawMatrix costs(G.getEdgeCosts(edge));
+ for (unsigned i = 0, ie = vRdAllowed->size(); i != ie; ++i) {
+ unsigned pRd = (*vRdAllowed)[i];
+
+ // Get the maximum cost (excluding unallocatable reg) for same parity
+ // registers
+ PBQP::PBQPNum sameParityMax = std::numeric_limits<PBQP::PBQPNum>::min();
+ for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) {
+ unsigned pRa = (*vRaAllowed)[j];
+ if (haveSameParity(pRd, pRa))
+ if (costs[i + 1][j + 1] !=
+ std::numeric_limits<PBQP::PBQPNum>::infinity() &&
+ costs[i + 1][j + 1] > sameParityMax)
+ sameParityMax = costs[i + 1][j + 1];
+ }
+
+ // Ensure all registers with a different parity have a higher cost
+ // than sameParityMax
+ for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) {
+ unsigned pRa = (*vRaAllowed)[j];
+ if (!haveSameParity(pRd, pRa))
+ if (sameParityMax > costs[i + 1][j + 1])
+ costs[i + 1][j + 1] = sameParityMax + 1.0;
+ }
+ }
+ G.setEdgeCosts(edge, std::move(costs));
+
+ return true;
+}
+
+void A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
+ unsigned Ra) {
+ LiveIntervals &LIs = G.getMetadata().LIS;
+
+ // Do some Chain management
+ if (Chains.count(Ra)) {
+ if (Rd != Ra) {
+ DEBUG(dbgs() << "Moving acc chain from " << PrintReg(Ra, TRI) << " to "
+ << PrintReg(Rd, TRI) << '\n';);
+ Chains.remove(Ra);
+ Chains.insert(Rd);
+ }
+ } else {
+ DEBUG(dbgs() << "Creating new acc chain for " << PrintReg(Rd, TRI)
+ << '\n';);
+ Chains.insert(Rd);
+ }
+
+ PBQPRAGraph::NodeId node1 = G.getMetadata().getNodeIdForVReg(Rd);
+
+ const LiveInterval &ld = LIs.getInterval(Rd);
+ for (auto r : Chains) {
+ // Skip self
+ if (r == Rd)
+ continue;
+
+ const LiveInterval &lr = LIs.getInterval(r);
+ if (ld.overlaps(lr)) {
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRdAllowed =
+ &G.getNodeMetadata(node1).getAllowedRegs();
+
+ PBQPRAGraph::NodeId node2 = G.getMetadata().getNodeIdForVReg(r);
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRrAllowed =
+ &G.getNodeMetadata(node2).getAllowedRegs();
+
+ PBQPRAGraph::EdgeId edge = G.findEdge(node1, node2);
+ assert(edge != G.invalidEdgeId() &&
+ "PBQP error ! The edge should exist !");
+
+ DEBUG(dbgs() << "Refining constraint !\n";);
+
+ if (G.getEdgeNode1Id(edge) == node2) {
+ std::swap(node1, node2);
+ std::swap(vRdAllowed, vRrAllowed);
+ }
+
+ // Enforce that cost is higher with all other Chains of the same parity
+ PBQP::Matrix costs(G.getEdgeCosts(edge));
+ for (unsigned i = 0, ie = vRdAllowed->size(); i != ie; ++i) {
+ unsigned pRd = (*vRdAllowed)[i];
+
+ // Get the maximum cost (excluding unallocatable reg) for all other
+ // parity registers
+ PBQP::PBQPNum sameParityMax = std::numeric_limits<PBQP::PBQPNum>::min();
+ for (unsigned j = 0, je = vRrAllowed->size(); j != je; ++j) {
+ unsigned pRa = (*vRrAllowed)[j];
+ if (!haveSameParity(pRd, pRa))
+ if (costs[i + 1][j + 1] !=
+ std::numeric_limits<PBQP::PBQPNum>::infinity() &&
+ costs[i + 1][j + 1] > sameParityMax)
+ sameParityMax = costs[i + 1][j + 1];
+ }
+
+ // Ensure all registers with same parity have a higher cost
+ // than sameParityMax
+ for (unsigned j = 0, je = vRrAllowed->size(); j != je; ++j) {
+ unsigned pRa = (*vRrAllowed)[j];
+ if (haveSameParity(pRd, pRa))
+ if (sameParityMax > costs[i + 1][j + 1])
+ costs[i + 1][j + 1] = sameParityMax + 1.0;
+ }
+ }
+ G.setEdgeCosts(edge, std::move(costs));
+ }
+ }
+}
+
+static bool regJustKilledBefore(const LiveIntervals &LIs, unsigned reg,
+ const MachineInstr &MI) {
+ LiveInterval LI = LIs.getInterval(reg);
+ SlotIndex SI = LIs.getInstructionIndex(&MI);
+ return LI.expiredAt(SI);
+}
+
+void A57ChainingConstraint::apply(PBQPRAGraph &G) {
+ const MachineFunction &MF = G.getMetadata().MF;
+ LiveIntervals &LIs = G.getMetadata().LIS;
+
+ TRI = MF.getTarget().getSubtargetImpl()->getRegisterInfo();
+ DEBUG(MF.dump());
+
+ for (const auto &MBB: MF) {
+ Chains.clear(); // FIXME: really needed ? Could not work at MF level ?
+
+ for (const auto &MI: MBB) {
+
+ // Forget Chains which have expired
+ for (auto r : Chains) {
+ SmallVector<unsigned, 8> toDel;
+ if(regJustKilledBefore(LIs, r, MI)) {
+ DEBUG(dbgs() << "Killing chain " << PrintReg(r, TRI) << " at ";
+ MI.print(dbgs()););
+ toDel.push_back(r);
+ }
+
+ while (!toDel.empty()) {
+ Chains.remove(toDel.back());
+ toDel.pop_back();
+ }
+ }
+
+ switch (MI.getOpcode()) {
+ case AArch64::FMSUBSrrr:
+ case AArch64::FMADDSrrr:
+ case AArch64::FNMSUBSrrr:
+ case AArch64::FNMADDSrrr:
+ case AArch64::FMSUBDrrr:
+ case AArch64::FMADDDrrr:
+ case AArch64::FNMSUBDrrr:
+ case AArch64::FNMADDDrrr: {
+ unsigned Rd = MI.getOperand(0).getReg();
+ unsigned Ra = MI.getOperand(3).getReg();
+
+ if (addIntraChainConstraint(G, Rd, Ra))
+ addInterChainConstraint(G, Rd, Ra);
+ break;
+ }
+
+ case AArch64::FMLAv2f32:
+ case AArch64::FMLSv2f32: {
+ unsigned Rd = MI.getOperand(0).getReg();
+ addInterChainConstraint(G, Rd, Rd);
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+ }
+}
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.h b/lib/Target/AArch64/AArch64PBQPRegAlloc.h
new file mode 100644
index 000000000000..4f656f94ea12
--- /dev/null
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.h
@@ -0,0 +1,38 @@
+//===-- AArch64PBQPRegAlloc.h - AArch64 specific PBQP constraints -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64PBQPREGALOC_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64PBQPREGALOC_H
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/PBQPRAConstraint.h"
+
+namespace llvm {
+
+/// Add the accumulator chaining constraint to a PBQP graph
+class A57ChainingConstraint : public PBQPRAConstraint {
+public:
+ // Add A57 specific constraints to the PBQP graph.
+ void apply(PBQPRAGraph &G) override;
+
+private:
+ SmallSetVector<unsigned, 32> Chains;
+ const TargetRegisterInfo *TRI;
+
+ // Add the accumulator chaining constraint, inside the chain, i.e. so that
+ // parity(Rd) == parity(Ra).
+ // \return true if a constraint was added
+ bool addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra);
+
+ // Add constraints between existing chains
+ void addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra);
+};
+}
+
+#endif // LLVM_LIB_TARGET_AARCH64_AARCH64PBQPREGALOC_H
diff --git a/lib/Target/AArch64/AArch64PerfectShuffle.h b/lib/Target/AArch64/AArch64PerfectShuffle.h
index b22fa2424d5c..9e9eec48c555 100644
--- a/lib/Target/AArch64/AArch64PerfectShuffle.h
+++ b/lib/Target/AArch64/AArch64PerfectShuffle.h
@@ -12,6 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64PERFECTSHUFFLE_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64PERFECTSHUFFLE_H
+
// 31 entries have cost 0
// 242 entries have cost 1
// 1447 entries have cost 2
@@ -6584,3 +6587,5 @@ static const unsigned PerfectShuffleTable[6561+1] = {
835584U, // <u,u,u,u>: Cost 0 copy LHS
0
};
+
+#endif
diff --git a/lib/Target/AArch64/AArch64PromoteConstant.cpp b/lib/Target/AArch64/AArch64PromoteConstant.cpp
index 4723cc4978e5..97b0f0eba408 100644
--- a/lib/Target/AArch64/AArch64PromoteConstant.cpp
+++ b/lib/Target/AArch64/AArch64PromoteConstant.cpp
@@ -21,18 +21,18 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -193,7 +193,7 @@ private:
// Inserting into the DenseMap may invalidate existing iterator.
// Keep a copy of the key to find the iterator to erase.
Instruction *OldInstr = IPI->first;
- InsertPts.insert(InsertionPoints::value_type(NewPt, IPI->second));
+ InsertPts[NewPt] = std::move(IPI->second);
// Erase IPI.
IPI = InsertPts.find(OldInstr);
InsertPts.erase(IPI);
@@ -569,7 +569,7 @@ bool AArch64PromoteConstant::runOnFunction(Function &F) {
// global. Do not promote constant expressions either, as they may
// require some code expansion.
if (Cst && !isa<GlobalValue>(Cst) && !isa<ConstantExpr>(Cst) &&
- AlreadyChecked.insert(Cst))
+ AlreadyChecked.insert(Cst).second)
LocalChange |= promoteConstant(Cst);
}
}
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 01b9587b3174..d734d436add7 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -76,7 +76,7 @@ AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
BitVector
AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
// FIXME: avoid re-calculating this every time.
BitVector Reserved(getNumRegs());
@@ -105,7 +105,7 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
unsigned Reg) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
switch (Reg) {
default:
@@ -169,7 +169,7 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
unsigned
AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
return TFI->hasFP(MF) ? AArch64::FP : AArch64::SP;
}
@@ -236,7 +236,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
// Note that the incoming offset is based on the SP value at function entry,
// so it'll be negative.
MachineFunction &MF = *MI->getParent()->getParent();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
MachineFrameInfo *MFI = MF.getFrameInfo();
// Estimate an offset from the frame pointer.
@@ -326,7 +326,7 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
const AArch64FrameLowering *TFI = static_cast<const AArch64FrameLowering *>(
- MF.getTarget().getFrameLowering());
+ MF.getSubtarget().getFrameLowering());
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
unsigned FrameReg;
@@ -364,7 +364,7 @@ namespace llvm {
unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
switch (RC->getID()) {
default:
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h
index 76af1edce723..51a503485bbe 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_AArch64REGISTERINFO_H
-#define LLVM_TARGET_AArch64REGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64REGISTERINFO_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64REGISTERINFO_H
#define GET_REGINFO_HEADER
#include "AArch64GenRegisterInfo.inc"
@@ -98,4 +98,4 @@ public:
} // end namespace llvm
-#endif // LLVM_TARGET_AArch64REGISTERINFO_H
+#endif
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
index a30e4ad0b5d8..d5ff3f1f3373 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -390,13 +390,14 @@ def FPR16 : RegisterClass<"AArch64", [f16], 16, (sequence "H%u", 0, 31)> {
}
def FPR32 : RegisterClass<"AArch64", [f32, i32], 32,(sequence "S%u", 0, 31)>;
def FPR64 : RegisterClass<"AArch64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32,
- v1i64],
+ v1i64, v4f16],
64, (sequence "D%u", 0, 31)>;
// We don't (yet) have an f128 legal type, so don't use that here. We
// normalize 128-bit vectors to v2f64 for arg passing and such, so use
// that here.
def FPR128 : RegisterClass<"AArch64",
- [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128],
+ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128,
+ v8f16],
128, (sequence "Q%u", 0, 31)>;
// The lower 16 vector registers. Some instructions can only take registers
diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td
index 8209f965cfd5..3ec41578a94c 100644
--- a/lib/Target/AArch64/AArch64SchedA57.td
+++ b/lib/Target/AArch64/AArch64SchedA57.td
@@ -12,11 +12,24 @@
//
//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// The Cortex-A57 is a traditional superscaler microprocessor with a
+// conservative 3-wide in-order stage for decode and dispatch. Combined with the
+// much wider out-of-order issue stage, this produced a need to carefully
+// schedule micro-ops so that all three decoded each cycle are successfully
+// issued as the reservation station(s) simply don't stay occupied for long.
+// Therefore, IssueWidth is set to the narrower of the two at three, while still
+// modeling the machine as out-of-order.
+
def CortexA57Model : SchedMachineModel {
- let IssueWidth = 8; // 3-way decode and 8-way issue
+ let IssueWidth = 3; // 3-way decode and dispatch
let MicroOpBufferSize = 128; // 128 micro-op re-order buffer
let LoadLatency = 4; // Optimistic load latency
let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch
+
+ // Enable partial & runtime unrolling. The magic number is chosen based on
+ // experiments and benchmarking data.
+ let LoopMicroOpBufferSize = 16;
}
//===----------------------------------------------------------------------===//
@@ -24,18 +37,17 @@ def CortexA57Model : SchedMachineModel {
// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where
// micro-ops wait for their operands and then issue out-of-order.
-def A57UnitB : ProcResource<1> { let BufferSize = 8; } // Type B micro-ops
-def A57UnitI : ProcResource<2> { let BufferSize = 8; } // Type I micro-ops
-def A57UnitM : ProcResource<1> { let BufferSize = 8; } // Type M micro-ops
-def A57UnitL : ProcResource<1> { let BufferSize = 8; } // Type L micro-ops
-def A57UnitS : ProcResource<1> { let BufferSize = 8; } // Type S micro-ops
-def A57UnitX : ProcResource<1> { let BufferSize = 8; } // Type X micro-ops
-def A57UnitW : ProcResource<1> { let BufferSize = 8; } // Type W micro-ops
+def A57UnitB : ProcResource<1>; // Type B micro-ops
+def A57UnitI : ProcResource<2>; // Type I micro-ops
+def A57UnitM : ProcResource<1>; // Type M micro-ops
+def A57UnitL : ProcResource<1>; // Type L micro-ops
+def A57UnitS : ProcResource<1>; // Type S micro-ops
+def A57UnitX : ProcResource<1>; // Type X micro-ops
+def A57UnitW : ProcResource<1>; // Type W micro-ops
let SchedModel = CortexA57Model in {
def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops
}
-
let SchedModel = CortexA57Model in {
//===----------------------------------------------------------------------===//
@@ -71,7 +83,7 @@ def : SchedAlias<WriteSTIdx, A57Write_1cyc_1I_1S>;
def : SchedAlias<WriteF, A57Write_3cyc_1V>;
def : SchedAlias<WriteFCmp, A57Write_3cyc_1V>;
def : SchedAlias<WriteFCvt, A57Write_5cyc_1V>;
-def : SchedAlias<WriteFCopy, A57Write_3cyc_1V>;
+def : SchedAlias<WriteFCopy, A57Write_5cyc_1L>;
def : SchedAlias<WriteFImm, A57Write_3cyc_1V>;
def : SchedAlias<WriteFMul, A57Write_5cyc_1V>;
def : SchedAlias<WriteFDiv, A57Write_18cyc_1X>;
@@ -85,13 +97,12 @@ def : WriteRes<WriteHint, []> { let Latency = 1; }
def : WriteRes<WriteLDHi, []> { let Latency = 4; }
-// Forwarding logic is not [yet] explicitly modeled beyond what is captured
-// in the latencies of the A57 Generic SchedWriteRes's.
+// Forwarding logic is only modeled for multiply and accumulate
def : ReadAdvance<ReadI, 0>;
def : ReadAdvance<ReadISReg, 0>;
def : ReadAdvance<ReadIEReg, 0>;
def : ReadAdvance<ReadIM, 0>;
-def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>;
def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
@@ -134,7 +145,13 @@ def : InstRW<[A57Write_2cyc_1M], (instregex "BFM")>;
// Cryptography Extensions
// -----------------------------------------------------------------------------
-def : InstRW<[A57Write_3cyc_1W], (instregex "CRC32")>;
+def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>;
+def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>;
+def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>;
+def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>;
+def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>;
+def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>;
+def : InstRW<[A57Write_3cyc_1W], (instregex "^CRC32")>;
// Vector Load
@@ -301,4 +318,330 @@ def : InstRW<[A57Write_8cyc_8S_4V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_PO
def : InstRW<[A57Write_8cyc_8S], (instregex "ST4Fourv(2d)$")>;
def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+// Vector - Integer
+// -----------------------------------------------------------------------------
+
+// Reference for forms in this group
+// D form - v8i8, v4i16, v2i32
+// Q form - v16i8, v8i16, v4i32
+// D form - v1i8, v1i16, v1i32, v1i64
+// Q form - v16i8, v8i16, v4i32, v2i64
+// D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64
+// Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64
+
+// ASIMD absolute diff accum, D-form
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
+// ASIMD absolute diff accum, Q-form
+def : InstRW<[A57Write_5cyc_2X], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
+// ASIMD absolute diff accum long
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABAL")>;
+
+// ASIMD arith, reduce, 4H/4S
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
+// ASIMD arith, reduce, 8B/8H
+def : InstRW<[A57Write_7cyc_1V_1X], (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
+// ASIMD arith, reduce, 16B
+def : InstRW<[A57Write_8cyc_2X], (instregex "^[SU]?ADDL?Vv16i8v$")>;
+
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[A57Write_7cyc_1V_1X], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
+// ASIMD max/min, reduce, 16B
+def : InstRW<[A57Write_8cyc_2X], (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
+
+// ASIMD multiply, D-form
+def : InstRW<[A57Write_5cyc_1W], (instregex "^(P?MUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
+// ASIMD multiply, Q-form
+def : InstRW<[A57Write_6cyc_2W], (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+
+// ASIMD multiply accumulate, D-form
+def : InstRW<[A57Write_5cyc_1W], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
+// ASIMD multiply accumulate, Q-form
+def : InstRW<[A57Write_6cyc_2W], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
+
+// ASIMD multiply accumulate long
+// ASIMD multiply accumulate saturating long
+def A57WriteIVMA : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
+def A57ReadIVMA4 : SchedReadAdvance<4, [A57WriteIVMA]>;
+def : InstRW<[A57WriteIVMA, A57ReadIVMA4], (instregex "^(S|U|SQD)ML[AS]L")>;
+
+// ASIMD multiply long
+def : InstRW<[A57Write_5cyc_1W], (instregex "^(S|U|SQD)MULL")>;
+def : InstRW<[A57Write_5cyc_1W], (instregex "^PMULL(v8i8|v16i8)")>;
+def : InstRW<[A57Write_3cyc_1W], (instregex "^PMULL(v1i64|v2i64)")>;
+
+// ASIMD pairwise add and accumulate
+// ASIMD shift accumulate
+def A57WriteIVA : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57ReadIVA3 : SchedReadAdvance<3, [A57WriteIVA]>;
+def : InstRW<[A57WriteIVA, A57ReadIVA3], (instregex "^[SU]ADALP")>;
+def : InstRW<[A57WriteIVA, A57ReadIVA3], (instregex "^(S|SR|U|UR)SRA")>;
+
+// ASIMD shift by immed, complex
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?(Q|R){1,2}SHR")>;
+def : InstRW<[A57Write_4cyc_1X], (instregex "^SQSHLU")>;
+
+
+// ASIMD shift by register, basic, Q-form
+def : InstRW<[A57Write_4cyc_2X], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD shift by register, complex, D-form
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU][QR]{1,2}SHL(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
+
+// ASIMD shift by register, complex, Q-form
+def : InstRW<[A57Write_5cyc_2X], (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
+
+
+// Vector - Floating Point
+// -----------------------------------------------------------------------------
+
+// Reference for forms in this group
+// D form - v2f32
+// Q form - v4f32, v2f64
+// D form - 32, 64
+// D form - v1i32, v1i64
+// D form - v2i32
+// Q form - v4i32, v2i64
+
+// ASIMD FP arith, normal, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^(FABD|FADD|FSUB)(v2f32|32|64|v2i32p)")>;
+// ASIMD FP arith, normal, Q-form
+def : InstRW<[A57Write_5cyc_2V], (instregex "^(FABD|FADD|FSUB)(v4f32|v2f64|v2i64p)")>;
+
+// ASIMD FP arith, pairwise, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^FADDP(v2f32|32|64|v2i32)")>;
+// ASIMD FP arith, pairwise, Q-form
+def : InstRW<[A57Write_9cyc_3V], (instregex "^FADDP(v4f32|v2f64|v2i64)")>;
+
+// ASIMD FP compare, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^(FACGE|FACGT|FCMEQ|FCMGE|FCMGT|FCMLE|FCMLT)(v2f32|32|64|v1i32|v2i32|v1i64)")>;
+// ASIMD FP compare, Q-form
+def : InstRW<[A57Write_5cyc_2V], (instregex "^(FACGE|FACGT|FCMEQ|FCMGE|FCMGT|FCMLE|FCMLT)(v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP convert, long and narrow
+def : InstRW<[A57Write_8cyc_3V], (instregex "^FCVT(L|N|XN)v")>;
+// ASIMD FP convert, other, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
+// ASIMD FP convert, other, Q-form
+def : InstRW<[A57Write_5cyc_2V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP divide, D-form, F32
+def : InstRW<[A57Write_18cyc_1X], (instregex "FDIVv2f32")>;
+// ASIMD FP divide, Q-form, F32
+def : InstRW<[A57Write_36cyc_2X], (instregex "FDIVv4f32")>;
+// ASIMD FP divide, Q-form, F64
+def : InstRW<[A57Write_64cyc_2X], (instregex "FDIVv2f64")>;
+
+// Note: These were simply duplicated from ASIMD FDIV because of missing documentation
+// ASIMD FP square root, D-form, F32
+def : InstRW<[A57Write_18cyc_1X], (instregex "FSQRTv2f32")>;
+// ASIMD FP square root, Q-form, F32
+def : InstRW<[A57Write_36cyc_2X], (instregex "FSQRTv4f32")>;
+// ASIMD FP square root, Q-form, F64
+def : InstRW<[A57Write_64cyc_2X], (instregex "FSQRTv2f64")>;
+
+// ASIMD FP max/min, normal, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^(FMAX|FMIN)(NM)?(v2f32)")>;
+// ASIMD FP max/min, normal, Q-form
+def : InstRW<[A57Write_5cyc_2V], (instregex "^(FMAX|FMIN)(NM)?(v4f32|v2f64)")>;
+// ASIMD FP max/min, pairwise, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^(FMAX|FMIN)(NM)?P(v2f32|v2i32)")>;
+// ASIMD FP max/min, pairwise, Q-form
+def : InstRW<[A57Write_9cyc_3V], (instregex "^(FMAX|FMIN)(NM)?P(v4f32|v2f64|v2i64)")>;
+// ASIMD FP max/min, reduce
+def : InstRW<[A57Write_10cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv")>;
+
+// ASIMD FP multiply, D-form, FZ
+def : InstRW<[A57Write_5cyc_1V], (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
+// ASIMD FP multiply, Q-form, FZ
+def : InstRW<[A57Write_5cyc_2V], (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP multiply accumulate, D-form, FZ
+// ASIMD FP multiply accumulate, Q-form, FZ
+def A57WriteFPVMAD : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
+def A57WriteFPVMAQ : SchedWriteRes<[A57UnitV, A57UnitV]> { let Latency = 10; }
+def A57ReadFPVMA5 : SchedReadAdvance<5, [A57WriteFPVMAD, A57WriteFPVMAQ]>;
+def : InstRW<[A57WriteFPVMAD, A57ReadFPVMA5], (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
+def : InstRW<[A57WriteFPVMAQ, A57ReadFPVMA5], (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP round, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT[AIMNPXZ](v2f32)")>;
+// ASIMD FP round, Q-form
+def : InstRW<[A57Write_5cyc_2V], (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
+
+
+// Vector - Miscellaneous
+// -----------------------------------------------------------------------------
+
+// Reference for forms in this group
+// D form - v8i8, v4i16, v2i32
+// Q form - v16i8, v8i16, v4i32
+// D form - v1i8, v1i16, v1i32, v1i64
+// Q form - v16i8, v8i16, v4i32, v2i64
+
+// ASIMD bitwise insert, Q-form
+def : InstRW<[A57Write_3cyc_2V], (instregex "^(BIF|BIT|BSL)v16i8")>;
+
+// ASIMD duplicate, gen reg, D-form and Q-form
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^CPY")>;
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^DUPv.+gpr")>;
+
+// ASIMD move, saturating
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]QXTU?N")>;
+
+// ASIMD reciprocal estimate, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^[FU](RECP|RSQRT)(E|X)(v2f32|v1i32|v2i32|v1i64)")>;
+// ASIMD reciprocal estimate, Q-form
+def : InstRW<[A57Write_5cyc_2V], (instregex "^[FU](RECP|RSQRT)(E|X)(v2f64|v4f32|v4i32)")>;
+
+// ASIMD reciprocal step, D-form, FZ
+def : InstRW<[A57Write_9cyc_1V], (instregex "^F(RECP|RSQRT)S(v2f32|v1i32|v2i32|v1i64|32|64)")>;
+// ASIMD reciprocal step, Q-form, FZ
+def : InstRW<[A57Write_9cyc_2V], (instregex "^F(RECP|RSQRT)S(v2f64|v4f32|v4i32)")>;
+
+// ASIMD table lookup, D-form
+def : InstRW<[A57Write_3cyc_1V], (instregex "^TB[LX]v8i8One")>;
+def : InstRW<[A57Write_6cyc_2V], (instregex "^TB[LX]v8i8Two")>;
+def : InstRW<[A57Write_9cyc_3V], (instregex "^TB[LX]v8i8Three")>;
+def : InstRW<[A57Write_12cyc_4V], (instregex "^TB[LX]v8i8Four")>;
+// ASIMD table lookup, Q-form
+def : InstRW<[A57Write_6cyc_3V], (instregex "^TB[LX]v16i8One")>;
+def : InstRW<[A57Write_9cyc_5V], (instregex "^TB[LX]v16i8Two")>;
+def : InstRW<[A57Write_12cyc_7V], (instregex "^TB[LX]v16i8Three")>;
+def : InstRW<[A57Write_15cyc_9V], (instregex "^TB[LX]v16i8Four")>;
+
+// ASIMD transfer, element to gen reg
+def : InstRW<[A57Write_6cyc_1I_1L], (instregex "^[SU]MOVv")>;
+
+// ASIMD transfer, gen reg to element
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^INSv")>;
+
+// ASIMD unzip/zip, Q-form
+def : InstRW<[A57Write_6cyc_3V], (instregex "^(UZP|ZIP)(1|2)(v16i8|v8i16|v4i32|v2i64)")>;
+
+
+// Remainder
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_5cyc_1V], (instregex "^F(ADD|SUB)[DS]rr")>;
+
+def A57WriteFPMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
+def A57ReadFPMA5 : SchedReadAdvance<5, [A57WriteFPMA]>;
+def A57ReadFPM : SchedReadAdvance<0>;
+def : InstRW<[A57WriteFPMA, A57ReadFPM, A57ReadFPM, A57ReadFPMA5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>;
+
+def : InstRW<[A57Write_10cyc_1L_1V], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>;
+def : InstRW<[A57Write_10cyc_1L_1V], (instregex "^[SU]CVTF")>;
+
+def : InstRW<[A57Write_32cyc_1X], (instrs FDIVDrr)>;
+def : InstRW<[A57Write_18cyc_1X], (instrs FDIVSrr)>;
+
+def : InstRW<[A57Write_5cyc_1V], (instregex "^F(MAX|MIN).+rr")>;
+
+def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT.+r")>;
+
+def : InstRW<[A57Write_32cyc_1X], (instrs FSQRTDr)>;
+def : InstRW<[A57Write_18cyc_1X], (instrs FSQRTSr)>;
+
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDNPDi)>;
+def : InstRW<[A57Write_6cyc_2L, WriteLDHi], (instrs LDNPQi)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDNPSi)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDPDi)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPDpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPDpre)>;
+def : InstRW<[A57Write_6cyc_2L, WriteLDHi], (instrs LDPQi)>;
+def : InstRW<[A57Write_6cyc_2L, WriteLDHi, WriteAdr], (instrs LDPQpost)>;
+def : InstRW<[A57Write_6cyc_2L, WriteLDHi, WriteAdr], (instrs LDPQpre)>;
+def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi], (instrs LDPSWi)>;
+def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi, WriteAdr], (instrs LDPSWpost)>;
+def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi, WriteAdr], (instrs LDPSWpre)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDPSi)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPSpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPSpre)>;
+def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRBpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRBpre)>;
+def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRBroW)>;
+def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRBroX)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRBui)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRDl)>;
+def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRDpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRDpre)>;
+def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRDroW)>;
+def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRDroX)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRDui)>;
+def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRHHroW)>;
+def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRHHroX)>;
+def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRHpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRHpre)>;
+def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRHroW)>;
+def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRHroX)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRHui)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRQl)>;
+def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRQpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRQpre)>;
+def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRQroW)>;
+def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRQroX)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRQui)>;
+def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHWroW)>;
+def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHWroX)>;
+def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHXroW)>;
+def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHXroX)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRSl)>;
+def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRSpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRSpre)>;
+def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRSroW)>;
+def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRSroX)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRSui)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDURBi)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDURDi)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDURHi)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDURQi)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDURSi)>;
+
+def : InstRW<[A57Write_2cyc_2S], (instrs STNPDi)>;
+def : InstRW<[A57Write_4cyc_1I_4S], (instrs STNPQi)>;
+def : InstRW<[A57Write_2cyc_2S], (instrs STNPXi)>;
+def : InstRW<[A57Write_2cyc_2S], (instrs STPDi)>;
+def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPDpost)>;
+def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPDpre)>;
+def : InstRW<[A57Write_4cyc_1I_4S], (instrs STPQi)>;
+def : InstRW<[WriteAdr, A57Write_4cyc_1I_4S], (instrs STPQpost)>;
+def : InstRW<[WriteAdr, A57Write_4cyc_2I_4S], (instrs STPQpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPSpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPSpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPWpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPWpre)>;
+def : InstRW<[A57Write_2cyc_2S], (instrs STPXi)>;
+def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPXpost)>;
+def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPXpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRBBpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRBBpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRBpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRBpre)>;
+def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRBroW)>;
+def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRBroX)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRDpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRDpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRHHpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRHHpre)>;
+def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHHroW)>;
+def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHHroX)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRHpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRHpre)>;
+def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHroW)>;
+def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHroX)>;
+def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S, ReadAdrBase], (instrs STRQpost)>;
+def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STRQpre)>;
+def : InstRW<[A57Write_2cyc_1I_2S, ReadAdrBase], (instrs STRQroW)>;
+def : InstRW<[A57Write_2cyc_1I_2S, ReadAdrBase], (instrs STRQroX)>;
+def : InstRW<[A57Write_2cyc_1I_2S], (instrs STRQui)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRSpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRSpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRWpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRWpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRXpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRXpre)>;
+def : InstRW<[A57Write_2cyc_2S], (instrs STURQi)>;
+
} // SchedModel = CortexA57Model
diff --git a/lib/Target/AArch64/AArch64SchedA57WriteRes.td b/lib/Target/AArch64/AArch64SchedA57WriteRes.td
index a8f421bf38d2..6f30108a23e3 100644
--- a/lib/Target/AArch64/AArch64SchedA57WriteRes.td
+++ b/lib/Target/AArch64/AArch64SchedA57WriteRes.td
@@ -28,14 +28,18 @@ def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; }
def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; }
-def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18; }
-def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19; }
+def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18;
+ let ResourceCycles = [18]; }
+def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19;
+ let ResourceCycles = [19]; }
def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; }
def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; }
def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; }
def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; }
-def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32; }
-def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35; }
+def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32;
+ let ResourceCycles = [32]; }
+def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35;
+ let ResourceCycles = [35]; }
def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; }
def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; }
@@ -53,6 +57,7 @@ def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; }
def A57Write_64cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
let Latency = 64;
let NumMicroOps = 2;
+ let ResourceCycles = [32, 32];
}
def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI,
A57UnitL]> {
@@ -137,6 +142,7 @@ def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
def A57Write_36cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
let Latency = 36;
let NumMicroOps = 2;
+ let ResourceCycles = [18, 18];
}
def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI,
A57UnitM]> {
@@ -153,6 +159,10 @@ def A57Write_3cyc_1S_1V : SchedWriteRes<[A57UnitS,
let Latency = 3;
let NumMicroOps = 2;
}
+def A57Write_3cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
def A57Write_4cyc_1I_1L : SchedWriteRes<[A57UnitI,
A57UnitL]> {
let Latency = 4;
@@ -295,6 +305,11 @@ def A57Write_9cyc_1L_3V : SchedWriteRes<[A57UnitL,
let Latency = 9;
let NumMicroOps = 4;
}
+def A57Write_12cyc_4V : SchedWriteRes<[A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 12;
+ let NumMicroOps = 4;
+}
//===----------------------------------------------------------------------===//
@@ -334,6 +349,11 @@ def A57Write_9cyc_2L_3V : SchedWriteRes<[A57UnitL, A57UnitL,
let Latency = 9;
let NumMicroOps = 5;
}
+def A57Write_9cyc_5V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+}
//===----------------------------------------------------------------------===//
@@ -399,7 +419,7 @@ def A57Write_4cyc_1I_4S_2V : SchedWriteRes<[A57UnitI,
let Latency = 4;
let NumMicroOps = 7;
}
-def A57Write_6cyc_1I_6S : SchedWriteRes<[A57UnitI,
+def A57Write_6cyc_1I_6S : SchedWriteRes<[A57UnitI,
A57UnitS, A57UnitS, A57UnitS,
A57UnitS, A57UnitS, A57UnitS]> {
let Latency = 6;
@@ -412,6 +432,12 @@ def A57Write_9cyc_1I_2L_4V : SchedWriteRes<[A57UnitI,
let Latency = 9;
let NumMicroOps = 7;
}
+def A57Write_12cyc_7V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 12;
+ let NumMicroOps = 7;
+}
//===----------------------------------------------------------------------===//
@@ -443,11 +469,11 @@ def A57Write_8cyc_8S : SchedWriteRes<[A57UnitS, A57UnitS,
//===----------------------------------------------------------------------===//
// Define Generic 9 micro-op types
-def A57Write_8cyc_1I_8S : SchedWriteRes<[A57UnitI,
- A57UnitS, A57UnitS,
- A57UnitS, A57UnitS,
- A57UnitS, A57UnitS,
- A57UnitS, A57UnitS]> {
+def A57Write_8cyc_1I_8S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
let Latency = 8;
let NumMicroOps = 9;
}
@@ -459,6 +485,12 @@ def A57Write_11cyc_1I_4L_4V : SchedWriteRes<[A57UnitI,
let Latency = 11;
let NumMicroOps = 9;
}
+def A57Write_15cyc_9V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 15;
+ let NumMicroOps = 9;
+}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 1bf64fc4310b..0cfd5826aadf 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -36,8 +36,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
// instead of memset.
if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) {
const AArch64TargetLowering &TLI =
- *static_cast<const AArch64TargetLowering *>(
- DAG.getTarget().getTargetLowering());
+ *DAG.getTarget().getSubtarget<AArch64Subtarget>().getTargetLowering();
EVT IntPtr = TLI.getPointerTy();
Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 1180eea6f4c1..11932d2b1c22 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64SELECTIONDAGINFO_H
-#define AArch64SELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64SELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index 45f8ddbd2d85..0c36e8f29263 100644
--- a/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -39,7 +39,7 @@ public:
static char ID;
AArch64StorePairSuppress() : MachineFunctionPass(ID) {}
- virtual const char *getPassName() const override {
+ const char *getPassName() const override {
return "AArch64 Store Pair Suppression";
}
@@ -50,7 +50,7 @@ private:
bool isNarrowFPStore(const MachineInstr &MI);
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MachineTraceMetrics>();
AU.addPreserved<MachineTraceMetrics>();
@@ -85,8 +85,7 @@ bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB)
// If a subtarget does not define resources for STPQi, bail here.
if (SCDesc->isValid() && !SCDesc->isVariant()) {
- unsigned ResLenWithSTP = BBTrace.getResourceLength(
- ArrayRef<const MachineBasicBlock *>(), SCDesc);
+ unsigned ResLenWithSTP = BBTrace.getResourceLength(None, SCDesc);
if (ResLenWithSTP > ResLength) {
DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber()
<< " resources " << ResLength << " -> " << ResLenWithSTP
@@ -118,12 +117,13 @@ bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) {
bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
- TII = static_cast<const AArch64InstrInfo *>(MF->getTarget().getInstrInfo());
- TRI = MF->getTarget().getRegisterInfo();
+ TII =
+ static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
+ TRI = MF->getSubtarget().getRegisterInfo();
MRI = &MF->getRegInfo();
const TargetSubtargetInfo &ST =
MF->getTarget().getSubtarget<TargetSubtargetInfo>();
- SchedModel.init(*ST.getSchedModel(), &ST, TII);
+ SchedModel.init(ST.getSchedModel(), &ST, TII);
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index bb0b72c585b8..47b5d546955f 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
+#include "AArch64PBQPRegAlloc.h"
#include "AArch64Subtarget.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineScheduler.h"
@@ -43,8 +44,8 @@ AArch64Subtarget::initializeSubtargetDependencies(StringRef FS) {
AArch64Subtarget::AArch64Subtarget(const std::string &TT,
const std::string &CPU,
- const std::string &FS, TargetMachine &TM,
- bool LittleEndian)
+ const std::string &FS,
+ const TargetMachine &TM, bool LittleEndian)
: AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU),
@@ -64,13 +65,7 @@ AArch64Subtarget::AArch64Subtarget(const std::string &TT,
unsigned char
AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
const TargetMachine &TM) const {
-
- // Determine whether this is a reference to a definition or a declaration.
- // Materializable GVs (in JIT lazy compilation mode) do not require an extra
- // load from stub.
- bool isDecl = GV->hasAvailableExternallyLinkage();
- if (GV->isDeclaration() && !GV->isMaterializable())
- isDecl = true;
+ bool isDecl = GV->isDeclarationForLinker();
// MachO large model always goes via a GOT, simply to get a single 8-byte
// absolute relocation on all global addresses.
@@ -78,10 +73,15 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
return AArch64II::MO_GOT;
// The small code mode's direct accesses use ADRP, which cannot necessarily
- // produce the value 0 (if the code is above 4GB). Therefore they must use the
- // GOT.
- if (TM.getCodeModel() == CodeModel::Small && GV->isWeakForLinker() && isDecl)
- return AArch64II::MO_GOT;
+ // produce the value 0 (if the code is above 4GB).
+ if (TM.getCodeModel() == CodeModel::Small &&
+ GV->isWeakForLinker() && isDecl) {
+ // In PIC mode use the GOT, but in absolute mode use a constant pool load.
+ if (TM.getRelocationModel() == Reloc::Static)
+ return AArch64II::MO_CONSTPOOL;
+ else
+ return AArch64II::MO_GOT;
+ }
// If symbol visibility is hidden, the extra load is not needed if
// the symbol is definitely defined in the current translation unit.
@@ -128,3 +128,11 @@ void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
bool AArch64Subtarget::enableEarlyIfConversion() const {
return EnableEarlyIfConvert;
}
+
+std::unique_ptr<PBQPRAConstraint>
+AArch64Subtarget::getCustomPBQPConstraints() const {
+ if (!isCortexA57())
+ return nullptr;
+
+ return llvm::make_unique<A57ChainingConstraint>();
+}
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index 52124f650c7a..e2740f137c26 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -11,12 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64SUBTARGET_H
-#define AArch64SUBTARGET_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
-#include "AArch64InstrInfo.h"
#include "AArch64FrameLowering.h"
#include "AArch64ISelLowering.h"
+#include "AArch64InstrInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64SelectionDAGInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -69,18 +69,27 @@ public:
/// This constructor initializes the data members to match that
/// of the specified triple.
AArch64Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, TargetMachine &TM, bool LittleEndian);
+ const std::string &FS, const TargetMachine &TM,
+ bool LittleEndian);
- const AArch64SelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
- const AArch64FrameLowering *getFrameLowering() const {
+ const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const AArch64FrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
- const AArch64TargetLowering *getTargetLowering() const {
+ const AArch64TargetLowering *getTargetLowering() const override {
return &TLInfo;
}
- const AArch64InstrInfo *getInstrInfo() const { return &InstrInfo; }
- const DataLayout *getDataLayout() const { return &DL; }
+ const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const AArch64RegisterInfo *getRegisterInfo() const override {
+ return &getInstrInfo()->getRegisterInfo();
+ }
bool enableMachineScheduler() const override { return true; }
+ bool enablePostMachineScheduler() const override {
+ return isCortexA53() || isCortexA57();
+ }
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
@@ -94,12 +103,19 @@ public:
bool isLittleEndian() const { return DL.isLittleEndian(); }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
+ bool isTargetIOS() const { return TargetTriple.isiOS(); }
+ bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
+ bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
+ bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
-
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
bool isCyclone() const { return CPUString == "cyclone"; }
+ bool isCortexA57() const { return CPUString == "cortex-a57"; }
+ bool isCortexA53() const { return CPUString == "cortex-a53"; }
+
+ bool useAA() const override { return isCortexA53(); }
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
@@ -126,7 +142,9 @@ public:
unsigned NumRegionInstrs) const override;
bool enableEarlyIfConversion() const override;
+
+ std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
};
} // End llvm namespace
-#endif // AArch64SUBTARGET_H
+#endif
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index f99b90b800f4..d4f19d2abd80 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -12,8 +12,11 @@
#include "AArch64.h"
#include "AArch64TargetMachine.h"
-#include "llvm/PassManager.h"
+#include "AArch64TargetObjectFile.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/IR/Function.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
@@ -24,6 +27,10 @@ static cl::opt<bool>
EnableCCMP("aarch64-ccmp", cl::desc("Enable the CCMP formation pass"),
cl::init(true), cl::Hidden);
+static cl::opt<bool> EnableMCR("aarch64-mcr",
+ cl::desc("Enable the machine combiner pass"),
+ cl::init(true), cl::Hidden);
+
static cl::opt<bool>
EnableStPairSuppress("aarch64-stp-suppress", cl::desc("Suppress STP for AArch64"),
cl::init(true), cl::Hidden);
@@ -59,13 +66,41 @@ EnableAtomicTidy("aarch64-atomic-cfg-tidy", cl::Hidden,
" to make use of cmpxchg flow-based information"),
cl::init(true));
+static cl::opt<bool>
+EnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden,
+ cl::desc("Run early if-conversion"),
+ cl::init(true));
+
+static cl::opt<bool>
+EnableCondOpt("aarch64-condopt",
+ cl::desc("Enable the condition optimizer pass"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
+ cl::desc("Work around Cortex-A53 erratum 835769"),
+ cl::init(false));
+
+static cl::opt<bool>
+EnableGEPOpt("aarch64-gep-opt", cl::Hidden,
+ cl::desc("Enable optimizations on complex GEPs"),
+ cl::init(true));
+
extern "C" void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget);
RegisterTargetMachine<AArch64beTargetMachine> Y(TheAArch64beTarget);
+ RegisterTargetMachine<AArch64leTargetMachine> Z(TheARM64Target);
+}
+
+//===----------------------------------------------------------------------===//
+// AArch64 Lowering public interface.
+//===----------------------------------------------------------------------===//
+static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
+ if (TT.isOSBinFormatMachO())
+ return make_unique<AArch64_MachoTargetObjectFile>();
- RegisterTargetMachine<AArch64leTargetMachine> Z(TheARM64leTarget);
- RegisterTargetMachine<AArch64beTargetMachine> W(TheARM64beTarget);
+ return make_unique<AArch64_ELFTargetObjectFile>();
}
/// TargetMachine ctor - Create an AArch64 architecture model.
@@ -77,10 +112,39 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL,
bool LittleEndian)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, *this, LittleEndian) {
+ TLOF(createTLOF(Triple(getTargetTriple()))),
+ Subtarget(TT, CPU, FS, *this, LittleEndian), isLittle(LittleEndian) {
initAsmInfo();
}
+AArch64TargetMachine::~AArch64TargetMachine() {}
+
+const AArch64Subtarget *
+AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
+ AttributeSet FnAttrs = F.getAttributes();
+ Attribute CPUAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+ Attribute FSAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+
+ auto &I = SubtargetMap[CPU + FS];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this, isLittle);
+ }
+ return I.get();
+}
+
void AArch64leTargetMachine::anchor() { }
AArch64leTargetMachine::
@@ -104,7 +168,10 @@ namespace {
class AArch64PassConfig : public TargetPassConfig {
public:
AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {
+ if (TM->getOptLevel() != CodeGenOpt::None)
+ substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
+ }
AArch64TargetMachine &getAArch64TargetMachine() const {
return getTM<AArch64TargetMachine>();
@@ -114,10 +181,10 @@ public:
bool addPreISel() override;
bool addInstSelector() override;
bool addILPOpts() override;
- bool addPreRegAlloc() override;
- bool addPostRegAlloc() override;
- bool addPreSched2() override;
- bool addPreEmitPass() override;
+ void addPreRegAlloc() override;
+ void addPostRegAlloc() override;
+ void addPreSched2() override;
+ void addPreEmitPass() override;
};
} // namespace
@@ -136,7 +203,7 @@ TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
void AArch64PassConfig::addIRPasses() {
// Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
// ourselves.
- addPass(createAtomicExpandLoadLinkedPass(TM));
+ addPass(createAtomicExpandPass(TM));
// Cmpxchg instructions are often used with a subsequent comparison to
// determine whether it succeeded. We can exploit existing control-flow in
@@ -145,6 +212,19 @@ void AArch64PassConfig::addIRPasses() {
addPass(createCFGSimplificationPass());
TargetPassConfig::addIRPasses();
+
+ if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
+ // Call SeparateConstOffsetFromGEP pass to extract constants within indices
+ // and lower a GEP with multiple indices to either arithmetic operations or
+ // multiple GEPs with single index.
+ addPass(createSeparateConstOffsetFromGEPPass(TM, true));
+ // Call EarlyCSE pass to find and remove subexpressions in the lowered
+ // result.
+ addPass(createEarlyCSEPass());
+ // Do loop invariant code motion in case part of the lowered result is
+ // invariant.
+ addPass(createLICMPass());
+ }
}
// Pass Pipeline Configuration
@@ -174,43 +254,56 @@ bool AArch64PassConfig::addInstSelector() {
}
bool AArch64PassConfig::addILPOpts() {
+ if (EnableCondOpt)
+ addPass(createAArch64ConditionOptimizerPass());
if (EnableCCMP)
addPass(createAArch64ConditionalCompares());
- addPass(&EarlyIfConverterID);
+ if (EnableMCR)
+ addPass(&MachineCombinerID);
+ if (EnableEarlyIfConversion)
+ addPass(&EarlyIfConverterID);
if (EnableStPairSuppress)
addPass(createAArch64StorePairSuppressPass());
return true;
}
-bool AArch64PassConfig::addPreRegAlloc() {
+void AArch64PassConfig::addPreRegAlloc() {
// Use AdvSIMD scalar instructions whenever profitable.
- if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar)
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) {
addPass(createAArch64AdvSIMDScalar());
- return true;
+ // The AdvSIMD pass may produce copies that can be rewritten to
+ // be register coaleascer friendly.
+ addPass(&PeepholeOptimizerID);
+ }
}
-bool AArch64PassConfig::addPostRegAlloc() {
+void AArch64PassConfig::addPostRegAlloc() {
// Change dead register definitions to refer to the zero register.
if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
addPass(createAArch64DeadRegisterDefinitions());
- return true;
+ if (TM->getOptLevel() != CodeGenOpt::None &&
+ (TM->getSubtarget<AArch64Subtarget>().isCortexA53() ||
+ TM->getSubtarget<AArch64Subtarget>().isCortexA57()) &&
+ usingDefaultRegAlloc())
+ // Improve performance for some FP/SIMD code for A57.
+ addPass(createAArch64A57FPLoadBalancing());
}
-bool AArch64PassConfig::addPreSched2() {
+void AArch64PassConfig::addPreSched2() {
// Expand some pseudo instructions to allow proper scheduling.
addPass(createAArch64ExpandPseudoPass());
// Use load/store pair instructions when possible.
if (TM->getOptLevel() != CodeGenOpt::None && EnableLoadStoreOpt)
addPass(createAArch64LoadStoreOptimizationPass());
- return true;
}
-bool AArch64PassConfig::addPreEmitPass() {
+void AArch64PassConfig::addPreEmitPass() {
+ if (EnableA53Fix835769)
+ addPass(createAArch64A53Fix835769());
// Relax conditional branch instructions if they're otherwise out of
// range of their destination.
addPass(createAArch64BranchRelaxation());
if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
TM->getSubtarget<AArch64Subtarget>().isTargetMachO())
addPass(createAArch64CollectLOHPass());
- return true;
}
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index 852cb3f8d2e9..75c65c57905f 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64TARGETMACHINE_H
-#define AArch64TARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETMACHINE_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETMACHINE_H
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
@@ -23,7 +23,9 @@ namespace llvm {
class AArch64TargetMachine : public LLVMTargetMachine {
protected:
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
AArch64Subtarget Subtarget;
+ mutable StringMap<std::unique_ptr<AArch64Subtarget>> SubtargetMap;
public:
AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU,
@@ -31,33 +33,25 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool IsLittleEndian);
+ ~AArch64TargetMachine() override;
+
const AArch64Subtarget *getSubtargetImpl() const override {
return &Subtarget;
}
- const AArch64TargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
- const AArch64FrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
- const AArch64InstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const AArch64RegisterInfo *getRegisterInfo() const override {
- return &getInstrInfo()->getRegisterInfo();
- }
- const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
+ const AArch64Subtarget *getSubtargetImpl(const Function &F) const override;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
/// \brief Register AArch64 analysis passes with a pass manager.
void addAnalysisPasses(PassManagerBase &PM) override;
+
+ TargetLoweringObjectFile* getObjFileLowering() const override {
+ return TLOF.get();
+ }
+
+private:
+ bool isLittle;
};
// AArch64leTargetMachine - AArch64 little endian target machine.
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h
index de63cb42542a..2e595f91961a 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_AArch64_TARGETOBJECTFILE_H
-#define LLVM_TARGET_AArch64_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 1dac14b96af9..b1a2914236ba 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -51,7 +51,7 @@ public:
AArch64TTI(const AArch64TargetMachine *TM)
: ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
- TLI(TM->getTargetLowering()) {
+ TLI(TM->getSubtargetImpl()->getTargetLowering()) {
initializeAArch64TTIPass(*PassRegistry::getPassRegistry());
}
@@ -104,7 +104,7 @@ public:
return 64;
}
- unsigned getMaximumUnrollFactor() const override { return 2; }
+ unsigned getMaxInterleaveFactor() const override;
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const
override;
@@ -112,10 +112,11 @@ public:
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const
override;
- unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind Opd1Info = OK_AnyValue,
- OperandValueKind Opd2Info = OK_AnyValue) const
- override;
+ unsigned getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
+ OperandValueKind Opd2Info = OK_AnyValue,
+ OperandValueProperties Opd1PropInfo = OP_None,
+ OperandValueProperties Opd2PropInfo = OP_None) const override;
unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override;
@@ -124,6 +125,13 @@ public:
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace) const override;
+
+ unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override;
+
+ void getUnrollingPreferences(const Function *F, Loop *L,
+ UnrollingPreferences &UP) const override;
+
+
/// @}
};
@@ -400,18 +408,42 @@ unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
return 2;
}
-unsigned AArch64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind Opd1Info,
- OperandValueKind Opd2Info) const {
+unsigned AArch64TTI::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
+ OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
+ OperandValueProperties Opd2PropInfo) const {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ if (ISD == ISD::SDIV &&
+ Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
+ Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
+ // On AArch64, scalar signed division by constants power-of-two are
+ // normally expanded to the sequence ADD + CMP + SELECT + SRA.
+ // The OperandValue properties many not be same as that of previous
+ // operation; conservatively assume OP_None.
+ unsigned Cost =
+ getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ return Cost;
+ }
+
switch (ISD) {
default:
- return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Opd1Info,
- Opd2Info);
+ return TargetTransformInfo::getArithmeticInstrCost(
+ Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
case ISD::ADD:
case ISD::MUL:
case ISD::XOR:
@@ -498,3 +530,27 @@ unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src,
return LT.first;
}
+
+unsigned AArch64TTI::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const {
+ unsigned Cost = 0;
+ for (auto *I : Tys) {
+ if (!I->isVectorTy())
+ continue;
+ if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
+ Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
+ getMemoryOpCost(Instruction::Load, I, 128, 0);
+ }
+ return Cost;
+}
+
+unsigned AArch64TTI::getMaxInterleaveFactor() const {
+ if (ST->isCortexA57())
+ return 4;
+ return 2;
+}
+
+void AArch64TTI::getUnrollingPreferences(const Function *F, Loop *L,
+ UnrollingPreferences &UP) const {
+ // Disable partial & runtime unrolling on -Os.
+ UP.PartialOptSizeThreshold = 0;
+}
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 37e92961ff07..8eb906b3d94f 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -10,27 +10,28 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCExpr.h"
#include "Utils/AArch64BaseInfo.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
#include <cstdio>
using namespace llvm;
@@ -42,7 +43,6 @@ class AArch64AsmParser : public MCTargetAsmParser {
private:
StringRef Mnemonic; ///< Instruction mnemonic.
MCSubtargetInfo &STI;
- MCAsmParser &Parser;
// Map of register aliases registers via the .req directive.
StringMap<std::pair<bool, unsigned> > RegisterReqs;
@@ -52,10 +52,7 @@ private:
return static_cast<AArch64TargetStreamer &>(TS);
}
- MCAsmParser &getParser() const { return Parser; }
- MCAsmLexer &getLexer() const { return Parser.getLexer(); }
-
- SMLoc getLoc() const { return Parser.getTok().getLoc(); }
+ SMLoc getLoc() const { return getParser().getTok().getLoc(); }
bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
AArch64CC::CondCode parseCondCodeString(StringRef Cond);
@@ -69,11 +66,13 @@ private:
bool parseOperand(OperandVector &Operands, bool isCondCode,
bool invertCondCode);
- void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
- bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+ void Warning(SMLoc L, const Twine &Msg) { getParser().Warning(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg) { return getParser().Error(L, Msg); }
bool showMatchError(SMLoc Loc, unsigned ErrCode);
bool parseDirectiveWord(unsigned Size, SMLoc L);
+ bool parseDirectiveInst(SMLoc L);
+
bool parseDirectiveTLSDescCall(SMLoc L);
bool parseDirectiveLOH(StringRef LOH, SMLoc L);
@@ -85,7 +84,7 @@ private:
bool validateInstruction(MCInst &Inst, SmallVectorImpl<SMLoc> &Loc);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
/// @name Auto-generated Match Functions
/// {
@@ -117,10 +116,11 @@ public:
AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
const MCInstrInfo &MII,
const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ : MCTargetAsmParser(), STI(_STI) {
MCAsmParserExtension::Initialize(_Parser);
- if (Parser.getStreamer().getTargetStreamer() == nullptr)
- new AArch64TargetStreamer(Parser.getStreamer());
+ MCStreamer &S = getParser().getStreamer();
+ if (S.getTargetStreamer() == nullptr)
+ new AArch64TargetStreamer(S);
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -1875,6 +1875,7 @@ unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name,
/// Identifier when called, and if it is a register name the token is eaten and
/// the register is added to the operand list.
int AArch64AsmParser::tryParseRegister() {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
@@ -1899,6 +1900,7 @@ int AArch64AsmParser::tryParseRegister() {
/// tryMatchVectorRegister - Try to parse a vector register name with optional
/// kind specifier. If it is a register specifier, eat the token and return it.
int AArch64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) {
+ MCAsmParser &Parser = getParser();
if (Parser.getTok().isNot(AsmToken::Identifier)) {
TokError("vector register expected");
return -1;
@@ -1931,6 +1933,7 @@ int AArch64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) {
/// tryParseSysCROperand - Try to parse a system instruction CR operand name.
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
if (Parser.getTok().isNot(AsmToken::Identifier)) {
@@ -1960,6 +1963,7 @@ AArch64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
/// tryParsePrefetch - Try to parse a prefetch operand.
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
const AsmToken &Tok = Parser.getTok();
// Either an identifier for named values or a 5-bit immediate.
@@ -2007,6 +2011,7 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
/// instruction.
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
const MCExpr *Expr;
@@ -2057,6 +2062,7 @@ AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
/// instruction.
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
const MCExpr *Expr;
@@ -2076,6 +2082,7 @@ AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
/// tryParseFPImm - A floating point immediate expression operand.
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
bool Hash = false;
@@ -2138,6 +2145,7 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
/// tryParseAddSubImm - Parse ADD/SUB shifted immediate operand
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParseAddSubImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
if (Parser.getTok().is(AsmToken::Hash))
@@ -2229,6 +2237,7 @@ AArch64CC::CondCode AArch64AsmParser::parseCondCodeString(StringRef Cond) {
/// parseCondCode - Parse a Condition Code operand.
bool AArch64AsmParser::parseCondCode(OperandVector &Operands,
bool invertCondCode) {
+ MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
const AsmToken &Tok = Parser.getTok();
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
@@ -2254,6 +2263,7 @@ bool AArch64AsmParser::parseCondCode(OperandVector &Operands,
/// them if present.
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
std::string LowerID = Tok.getString().lower();
AArch64_AM::ShiftExtendType ShOp =
@@ -2299,10 +2309,11 @@ AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
if (Hash)
Parser.Lex(); // Eat the '#'.
- // Make sure we do actually have a number
- if (!Parser.getTok().is(AsmToken::Integer)) {
- Error(Parser.getTok().getLoc(),
- "expected integer shift amount");
+ // Make sure we do actually have a number or a parenthesized expression.
+ SMLoc E = Parser.getTok().getLoc();
+ if (!Parser.getTok().is(AsmToken::Integer) &&
+ !Parser.getTok().is(AsmToken::LParen)) {
+ Error(E, "expected integer shift amount");
return MatchOperand_ParseFail;
}
@@ -2312,11 +2323,11 @@ AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
if (!MCE) {
- TokError("expected #imm after shift specifier");
+ Error(E, "expected constant '#imm' after shift specifier");
return MatchOperand_ParseFail;
}
- SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+ E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
Operands.push_back(AArch64Operand::CreateShiftExtend(
ShOp, MCE->getValue(), true, S, E, getContext()));
return MatchOperand_Success;
@@ -2333,6 +2344,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
Operands.push_back(
AArch64Operand::CreateToken("sys", false, NameLoc, getContext()));
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
StringRef Op = Tok.getString();
SMLoc S = Tok.getLoc();
@@ -2571,6 +2583,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
// Can be either a #imm style literal or an option name
@@ -2624,6 +2637,7 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
@@ -2638,6 +2652,7 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
/// tryParseVectorRegister - Parse a vector register operand.
bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
if (Parser.getTok().isNot(AsmToken::Identifier))
return true;
@@ -2686,6 +2701,7 @@ bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
/// parseRegister - Parse a non-vector register operand.
bool AArch64AsmParser::parseRegister(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
// Try for a vector register.
if (!tryParseVectorRegister(Operands))
@@ -2728,6 +2744,7 @@ bool AArch64AsmParser::parseRegister(OperandVector &Operands) {
}
bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
+ MCAsmParser &Parser = getParser();
bool HasELFModifier = false;
AArch64MCExpr::VariantKind RefKind;
@@ -2806,6 +2823,7 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
/// parseVectorList - Parse a vector list operand for AdvSIMD instructions.
bool AArch64AsmParser::parseVectorList(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
assert(Parser.getTok().is(AsmToken::LCurly) && "Token is not a Left Bracket");
SMLoc S = getLoc();
Parser.Lex(); // Eat left bracket token.
@@ -2904,6 +2922,7 @@ bool AArch64AsmParser::parseVectorList(OperandVector &Operands) {
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
return MatchOperand_NoMatch;
@@ -2949,6 +2968,7 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
/// operand regardless of the mnemonic.
bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
bool invertCondCode) {
+ MCAsmParser &Parser = getParser();
// Check if the current operand has a custom associated parser, if so, try to
// custom parse the operand, or fallback to the general approach.
OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
@@ -3114,6 +3134,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
Name = StringSwitch<StringRef>(Name.lower())
.Case("beq", "b.eq")
.Case("bne", "b.ne")
@@ -3562,12 +3583,12 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
}
}
-static const char *getSubtargetFeatureName(unsigned Val);
+static const char *getSubtargetFeatureName(uint64_t Val);
bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
assert(!Operands.empty() && "Unexpect empty operand list!");
AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[0]);
@@ -3817,7 +3838,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// Special case the error message for the very common case where only
// a single subtarget feature is missing (neon, e.g.).
std::string Msg = "instruction requires:";
- unsigned Mask = 1;
+ uint64_t Mask = 1;
for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
if (ErrorInfo & Mask) {
Msg += " ";
@@ -3831,7 +3852,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return showMatchError(IDLoc, MatchResult);
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0U) {
+ if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
@@ -3906,11 +3927,15 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
llvm_unreachable("Implement any new match types added!");
- return true;
}
/// ParseDirective parses the arm specific directives
bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
+ const MCObjectFileInfo::Environment Format =
+ getContext().getObjectFileInfo()->getObjectFileType();
+ bool IsMachO = Format == MCObjectFileInfo::IsMachO;
+ bool IsCOFF = Format == MCObjectFileInfo::IsCOFF;
+
StringRef IDVal = DirectiveID.getIdentifier();
SMLoc Loc = DirectiveID.getLoc();
if (IDVal == ".hword")
@@ -3926,12 +3951,18 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".unreq")
return parseDirectiveUnreq(DirectiveID.getLoc());
+ if (!IsMachO && !IsCOFF) {
+ if (IDVal == ".inst")
+ return parseDirectiveInst(Loc);
+ }
+
return parseDirectiveLOH(IDVal, Loc);
}
/// parseDirectiveWord
/// ::= .word [ expression (, expression)* ]
bool AArch64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
@@ -3954,6 +3985,47 @@ bool AArch64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
return false;
}
+/// parseDirectiveInst
+/// ::= .inst opcode [, ...]
+bool AArch64AsmParser::parseDirectiveInst(SMLoc Loc) {
+ MCAsmParser &Parser = getParser();
+ if (getLexer().is(AsmToken::EndOfStatement)) {
+ Parser.eatToEndOfStatement();
+ Error(Loc, "expected expression following directive");
+ return false;
+ }
+
+ for (;;) {
+ const MCExpr *Expr;
+
+ if (getParser().parseExpression(Expr)) {
+ Error(Loc, "expected expression");
+ return false;
+ }
+
+ const MCConstantExpr *Value = dyn_cast_or_null<MCConstantExpr>(Expr);
+ if (!Value) {
+ Error(Loc, "expected constant expression");
+ return false;
+ }
+
+ getTargetStreamer().emitInst(Value->getValue());
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma)) {
+ Error(Loc, "unexpected token in directive");
+ return false;
+ }
+
+ Parser.Lex(); // Eat comma.
+ }
+
+ Parser.Lex();
+ return false;
+}
+
// parseDirectiveTLSDescCall:
// ::= .tlsdesccall symbol
bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
@@ -3985,10 +4057,9 @@ bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
// We successfully get a numeric value for the identifier.
// Check if it is valid.
int64_t Id = getParser().getTok().getIntVal();
- Kind = (MCLOHType)Id;
- // Check that Id does not overflow MCLOHType.
- if (!isValidMCLOHType(Kind) || Id != Kind)
+ if (Id <= -1U && !isValidMCLOHType(Id))
return TokError("invalid numeric identifier in directive");
+ Kind = (MCLOHType)Id;
} else {
StringRef Name = getTok().getIdentifier();
// We successfully parse an identifier.
@@ -4036,6 +4107,7 @@ bool AArch64AsmParser::parseDirectiveLtorg(SMLoc L) {
/// parseDirectiveReq
/// ::= name .req registername
bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
+ MCAsmParser &Parser = getParser();
Parser.Lex(); // Eat the '.req' token.
SMLoc SRegLoc = getLoc();
unsigned RegNum = tryParseRegister();
@@ -4067,7 +4139,7 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
Parser.Lex(); // Consume the EndOfStatement
auto pair = std::make_pair(IsVector, RegNum);
- if (RegisterReqs.GetOrCreateValue(Name, pair).getValue() != pair)
+ if (!RegisterReqs.insert(std::make_pair(Name, pair)).second)
Warning(L, "ignoring redefinition of register alias '" + Name + "'");
return true;
@@ -4076,6 +4148,7 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
/// parseDirectiveUneq
/// ::= .unreq registername
bool AArch64AsmParser::parseDirectiveUnreq(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (Parser.getTok().isNot(AsmToken::Identifier)) {
Error(Parser.getTok().getLoc(), "unexpected input in .unreq directive.");
Parser.eatToEndOfStatement();
@@ -4140,9 +4213,7 @@ AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
extern "C" void LLVMInitializeAArch64AsmParser() {
RegisterMCAsmParser<AArch64AsmParser> X(TheAArch64leTarget);
RegisterMCAsmParser<AArch64AsmParser> Y(TheAArch64beTarget);
-
- RegisterMCAsmParser<AArch64AsmParser> Z(TheARM64leTarget);
- RegisterMCAsmParser<AArch64AsmParser> W(TheARM64beTarget);
+ RegisterMCAsmParser<AArch64AsmParser> Z(TheARM64Target);
}
#define GET_REGISTER_MATCHER
diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt
index 789d549bb156..f26327ff84ad 100644
--- a/lib/Target/AArch64/CMakeLists.txt
+++ b/lib/Target/AArch64/CMakeLists.txt
@@ -2,7 +2,7 @@ set(LLVM_TARGET_DEFINITIONS AArch64.td)
tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info)
tablegen(LLVM AArch64GenInstrInfo.inc -gen-instr-info)
-tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering)
tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
@@ -15,6 +15,7 @@ tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler)
add_public_tablegen_target(AArch64CommonTableGen)
add_llvm_target(AArch64CodeGen
+ AArch64A57FPLoadBalancing.cpp
AArch64AddressTypePromotion.cpp
AArch64AdvSIMDScalarPass.cpp
AArch64AsmPrinter.cpp
@@ -25,13 +26,16 @@ add_llvm_target(AArch64CodeGen
AArch64DeadRegisterDefinitionsPass.cpp
AArch64ExpandPseudoInsts.cpp
AArch64FastISel.cpp
+ AArch64A53Fix835769.cpp
AArch64FrameLowering.cpp
+ AArch64ConditionOptimizer.cpp
AArch64ISelDAGToDAG.cpp
AArch64ISelLowering.cpp
AArch64InstrInfo.cpp
AArch64LoadStoreOptimizer.cpp
AArch64MCInstLower.cpp
AArch64PromoteConstant.cpp
+ AArch64PBQPRegAlloc.cpp
AArch64RegisterInfo.cpp
AArch64SelectionDAGInfo.cpp
AArch64StorePairSuppress.cpp
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 6de27d6d51a5..878e29c09744 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -15,12 +15,11 @@
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
-#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MemoryObject.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -200,26 +199,24 @@ static MCDisassembler *createAArch64Disassembler(const Target &T,
}
DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &os,
- raw_ostream &cs) const {
- CommentStream = &cs;
-
- uint8_t bytes[4];
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &OS,
+ raw_ostream &CS) const {
+ CommentStream = &CS;
Size = 0;
// We want to read exactly 4 bytes of data.
- if (Region.readBytes(Address, 4, (uint8_t *)bytes) == -1)
+ if (Bytes.size() < 4)
return Fail;
Size = 4;
// Encoded as a small-endian 32-bit word in the stream.
- uint32_t insn =
- (bytes[3] << 24) | (bytes[2] << 16) | (bytes[1] << 8) | (bytes[0] << 0);
+ uint32_t Insn =
+ (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
// Calling the auto-generated decoder function.
- return decodeInstruction(DecoderTable32, MI, insn, Address, this, STI);
+ return decodeInstruction(DecoderTable32, MI, Insn, Address, this, STI);
}
static MCSymbolizer *
@@ -243,13 +240,9 @@ extern "C" void LLVMInitializeAArch64Disassembler() {
TargetRegistry::RegisterMCSymbolizer(TheAArch64beTarget,
createAArch64ExternalSymbolizer);
- TargetRegistry::RegisterMCDisassembler(TheARM64leTarget,
- createAArch64Disassembler);
- TargetRegistry::RegisterMCDisassembler(TheARM64beTarget,
+ TargetRegistry::RegisterMCDisassembler(TheARM64Target,
createAArch64Disassembler);
- TargetRegistry::RegisterMCSymbolizer(TheARM64leTarget,
- createAArch64ExternalSymbolizer);
- TargetRegistry::RegisterMCSymbolizer(TheARM64beTarget,
+ TargetRegistry::RegisterMCSymbolizer(TheARM64Target,
createAArch64ExternalSymbolizer);
}
@@ -592,7 +585,7 @@ static DecodeStatus DecodeFixedPointScaleImm32(llvm::MCInst &Inst, unsigned Imm,
uint64_t Addr,
const void *Decoder) {
// scale{5} is asserted as 1 in tblgen.
- Imm |= 0x20;
+ Imm |= 0x20;
Inst.addOperand(MCOperand::CreateImm(64 - Imm));
return Success;
}
@@ -614,7 +607,7 @@ static DecodeStatus DecodePCRelLabel19(llvm::MCInst &Inst, unsigned Imm,
if (ImmVal & (1 << (19 - 1)))
ImmVal |= ~((1LL << 19) - 1);
- if (!Dis->tryAddingSymbolicOperand(Inst, ImmVal << 2, Addr,
+ if (!Dis->tryAddingSymbolicOperand(Inst, ImmVal * 4, Addr,
Inst.getOpcode() != AArch64::LDRXl, 0, 4))
Inst.addOperand(MCOperand::CreateImm(ImmVal));
return Success;
@@ -630,35 +623,19 @@ static DecodeStatus DecodeMemExtend(llvm::MCInst &Inst, unsigned Imm,
static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm,
uint64_t Address,
const void *Decoder) {
- const AArch64Disassembler *Dis =
- static_cast<const AArch64Disassembler *>(Decoder);
- const MCSubtargetInfo &STI = Dis->getSubtargetInfo();
-
- Imm |= 0x8000;
Inst.addOperand(MCOperand::CreateImm(Imm));
- bool ValidNamed;
- (void)AArch64SysReg::MRSMapper(STI.getFeatureBits())
- .toString(Imm, ValidNamed);
-
- return ValidNamed ? Success : Fail;
+ // Every system register in the encoding space is valid with the syntax
+ // S<op0>_<op1>_<Cn>_<Cm>_<op2>, so decoding system registers always succeeds.
+ return Success;
}
static DecodeStatus DecodeMSRSystemRegister(llvm::MCInst &Inst, unsigned Imm,
uint64_t Address,
const void *Decoder) {
- const AArch64Disassembler *Dis =
- static_cast<const AArch64Disassembler *>(Decoder);
- const MCSubtargetInfo &STI = Dis->getSubtargetInfo();
-
- Imm |= 0x8000;
Inst.addOperand(MCOperand::CreateImm(Imm));
- bool ValidNamed;
- (void)AArch64SysReg::MSRMapper(STI.getFeatureBits())
- .toString(Imm, ValidNamed);
-
- return ValidNamed ? Success : Fail;
+ return Success;
}
static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
@@ -1510,7 +1487,7 @@ static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn,
if (imm & (1 << (26 - 1)))
imm |= ~((1LL << 26) - 1);
- if (!Dis->tryAddingSymbolicOperand(Inst, imm << 2, Addr, true, 0, 4))
+ if (!Dis->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 4))
Inst.addOperand(MCOperand::CreateImm(imm));
return Success;
@@ -1530,7 +1507,7 @@ static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst,
bool ValidNamed;
(void)AArch64PState::PStateMapper().toString(pstate_field, ValidNamed);
-
+
return ValidNamed ? Success : Fail;
}
@@ -1552,7 +1529,7 @@ static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn,
else
DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
Inst.addOperand(MCOperand::CreateImm(bit));
- if (!Dis->tryAddingSymbolicOperand(Inst, dst << 2, Addr, true, 0, 4))
+ if (!Dis->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 4))
Inst.addOperand(MCOperand::CreateImm(dst));
return Success;
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.h b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
index 68d4867977b0..7fb57adfeeba 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64DISASSEMBLER_H
-#define AArch64DISASSEMBLER_H
+#ifndef LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64DISASSEMBLER_H
+#define LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64DISASSEMBLER_H
#include "llvm/MC/MCDisassembler.h"
@@ -28,11 +28,10 @@ public:
~AArch64Disassembler() {}
- /// getInstruction - See MCDisassembler.
MCDisassembler::DecodeStatus
- getInstruction(MCInst &instr, uint64_t &size, const MemoryObject &region,
- uint64_t address, raw_ostream &vStream,
- raw_ostream &cStream) const override;
+ getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes,
+ uint64_t Address, raw_ostream &VStream,
+ raw_ostream &CStream) const override;
};
} // namespace llvm
diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
index 171d31c48cd7..12b8450b13c6 100644
--- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
+++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64EXTERNALSYMBOLIZER_H
-#define AArch64EXTERNALSYMBOLIZER_H
+#ifndef LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64EXTERNALSYMBOLIZER_H
+#define LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64EXTERNALSYMBOLIZER_H
#include "llvm/MC/MCExternalSymbolizer.h"
diff --git a/lib/Target/AArch64/Disassembler/LLVMBuild.txt b/lib/Target/AArch64/Disassembler/LLVMBuild.txt
index a4224f4a2f53..62827e8f50eb 100644
--- a/lib/Target/AArch64/Disassembler/LLVMBuild.txt
+++ b/lib/Target/AArch64/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = AArch64Disassembler
parent = AArch64
-required_libraries = AArch64Info AArch64Utils MC Support
+required_libraries = AArch64Info AArch64Utils MC MCDisassembler Support
add_to_library_groups = AArch64
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index 8a21f0650cdc..46a1d7978729 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -16,8 +16,8 @@
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
@@ -1223,7 +1223,7 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
// If the label has already been resolved to an immediate offset (say, when
// we're running the disassembler), just print the immediate.
if (Op.isImm()) {
- O << "#" << (Op.getImm() << 2);
+ O << "#" << (Op.getImm() * 4);
return;
}
@@ -1247,7 +1247,7 @@ void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
// If the label has already been resolved to an immediate offset (say, when
// we're running the disassembler), just print the immediate.
if (Op.isImm()) {
- O << "#" << (Op.getImm() << 12);
+ O << "#" << (Op.getImm() * (1 << 12));
return;
}
@@ -1276,24 +1276,20 @@ void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned Val = MI->getOperand(OpNo).getImm();
- bool Valid;
auto Mapper = AArch64SysReg::MRSMapper(getAvailableFeatures());
- std::string Name = Mapper.toString(Val, Valid);
+ std::string Name = Mapper.toString(Val);
- if (Valid)
- O << StringRef(Name).upper();
+ O << StringRef(Name).upper();
}
void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned Val = MI->getOperand(OpNo).getImm();
- bool Valid;
auto Mapper = AArch64SysReg::MSRMapper(getAvailableFeatures());
- std::string Name = Mapper.toString(Val, Valid);
+ std::string Name = Mapper.toString(Val);
- if (Valid)
- O << StringRef(Name).upper();
+ O << StringRef(Name).upper();
}
void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index fe7666e5cadb..5f51621d674c 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64INSTPRINTER_H
-#define AArch64INSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H
+#define LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
@@ -127,8 +127,9 @@ public:
void printInstruction(const MCInst *MI, raw_ostream &O) override;
bool printAliasInstr(const MCInst *MI, raw_ostream &O) override;
- virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx, raw_ostream &O);
+ void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+ unsigned PrintMethodIdx,
+ raw_ostream &O) override;
StringRef getRegName(unsigned RegNo) const override {
return getRegisterName(RegNo);
}
diff --git a/lib/Target/AArch64/LLVMBuild.txt b/lib/Target/AArch64/LLVMBuild.txt
index 642c18394a67..573fa10561cf 100644
--- a/lib/Target/AArch64/LLVMBuild.txt
+++ b/lib/Target/AArch64/LLVMBuild.txt
@@ -31,5 +31,5 @@ has_jit = 1
type = Library
name = AArch64CodeGen
parent = AArch64
-required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AArch64Utils Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target
+required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target
add_to_library_groups = AArch64
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index 8b1e44e26e93..4db9dea241a9 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_AArch64_AArch64ADDRESSINGMODES_H
-#define LLVM_TARGET_AArch64_AArch64ADDRESSINGMODES_H
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ADDRESSINGMODES_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ADDRESSINGMODES_H
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
@@ -51,7 +51,7 @@ enum ShiftExtendType {
/// getShiftName - Get the string encoding for the shift type.
static inline const char *getShiftExtendName(AArch64_AM::ShiftExtendType ST) {
switch (ST) {
- default: assert(false && "unhandled shift type!");
+ default: llvm_unreachable("unhandled shift type!");
case AArch64_AM::LSL: return "lsl";
case AArch64_AM::LSR: return "lsr";
case AArch64_AM::ASR: return "asr";
@@ -210,67 +210,63 @@ static inline uint64_t ror(uint64_t elt, unsigned size) {
/// as the immediate operand of a logical instruction for the given register
/// size. If so, return true with "encoding" set to the encoded value in
/// the form N:immr:imms.
-static inline bool processLogicalImmediate(uint64_t imm, unsigned regSize,
- uint64_t &encoding) {
- if (imm == 0ULL || imm == ~0ULL ||
- (regSize != 64 && (imm >> regSize != 0 || imm == ~0U)))
+static inline bool processLogicalImmediate(uint64_t Imm, unsigned RegSize,
+ uint64_t &Encoding) {
+ if (Imm == 0ULL || Imm == ~0ULL ||
+ (RegSize != 64 && (Imm >> RegSize != 0 || Imm == ~0U)))
return false;
- unsigned size = 2;
- uint64_t eltVal = imm;
-
// First, determine the element size.
- while (size < regSize) {
- unsigned numElts = regSize / size;
- unsigned mask = (1ULL << size) - 1;
- uint64_t lowestEltVal = imm & mask;
-
- bool allMatched = true;
- for (unsigned i = 1; i < numElts; ++i) {
- uint64_t currEltVal = (imm >> (i*size)) & mask;
- if (currEltVal != lowestEltVal) {
- allMatched = false;
- break;
- }
- }
+ unsigned Size = RegSize;
+
+ do {
+ Size /= 2;
+ uint64_t Mask = (1ULL << Size) - 1;
- if (allMatched) {
- eltVal = lowestEltVal;
+ if ((Imm & Mask) != ((Imm >> Size) & Mask)) {
+ Size *= 2;
break;
}
-
- size *= 2;
- }
+ } while (Size > 2);
// Second, determine the rotation to make the element be: 0^m 1^n.
- for (unsigned i = 0; i < size; ++i) {
- eltVal = ror(eltVal, size);
- uint32_t clz = countLeadingZeros(eltVal) - (64 - size);
- uint32_t cto = CountTrailingOnes_64(eltVal);
-
- if (clz + cto == size) {
- // Encode in immr the number of RORs it would take to get *from* this
- // element value to our target value, where i+1 is the number of RORs
- // to go the opposite direction.
- unsigned immr = size - (i + 1);
-
- // If size has a 1 in the n'th bit, create a value that has zeroes in
- // bits [0, n] and ones above that.
- uint64_t nimms = ~(size-1) << 1;
-
- // Or the CTO value into the low bits, which must be below the Nth bit
- // bit mentioned above.
- nimms |= (cto-1);
-
- // Extract the seventh bit and toggle it to create the N field.
- unsigned N = ((nimms >> 6) & 1) ^ 1;
-
- encoding = (N << 12) | (immr << 6) | (nimms & 0x3f);
- return true;
- }
+ uint32_t CTO, I;
+ uint64_t Mask = ((uint64_t)-1LL) >> (64 - Size);
+ Imm &= Mask;
+
+ if (isShiftedMask_64(Imm)) {
+ I = countTrailingZeros(Imm);
+ assert(I < 64 && "undefined behavior");
+ CTO = CountTrailingOnes_64(Imm >> I);
+ } else {
+ Imm |= ~Mask;
+ if (!isShiftedMask_64(~Imm))
+ return false;
+
+ unsigned CLO = CountLeadingOnes_64(Imm);
+ I = 64 - CLO;
+ CTO = CLO + CountTrailingOnes_64(Imm) - (64 - Size);
}
- return false;
+ // Encode in Immr the number of RORs it would take to get *from* 0^m 1^n
+ // to our target value, where I is the number of RORs to go the opposite
+ // direction.
+ assert(Size > I && "I should be smaller than element size");
+ unsigned Immr = (Size - I) & (Size - 1);
+
+ // If size has a 1 in the n'th bit, create a value that has zeroes in
+ // bits [0, n] and ones above that.
+ uint64_t NImms = ~(Size-1) << 1;
+
+ // Or the CTO value into the low bits, which must be below the Nth bit
+ // bit mentioned above.
+ NImms |= (CTO-1);
+
+ // Extract the seventh bit and toggle it to create the N field.
+ unsigned N = ((NImms >> 6) & 1) ^ 1;
+
+ Encoding = (N << 12) | (Immr << 6) | (NImms & 0x3f);
+ return true;
}
/// isLogicalImmediate - Return true if the immediate is valid for a logical
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index a917616f142d..423da6500c48 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -13,10 +13,11 @@
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachO.h"
using namespace llvm;
@@ -131,7 +132,7 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
int64_t SignedValue = static_cast<int64_t>(Value);
switch (Kind) {
default:
- assert(false && "Unknown fixup kind!");
+ llvm_unreachable("Unknown fixup kind!");
case AArch64::fixup_aarch64_pcrel_adr_imm21:
if (SignedValue > 2097151 || SignedValue < -2097152)
report_fatal_error("fixup value out of range");
@@ -238,7 +239,7 @@ bool AArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
void AArch64AsmBackend::relaxInstruction(const MCInst &Inst,
MCInst &Res) const {
- assert(false && "AArch64AsmBackend::relaxInstruction() unimplemented");
+ llvm_unreachable("AArch64AsmBackend::relaxInstruction() unimplemented");
}
bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
@@ -316,42 +317,6 @@ public:
MachO::CPU_SUBTYPE_ARM64_ALL);
}
- bool doesSectionRequireSymbols(const MCSection &Section) const override {
- // Any section for which the linker breaks things into atoms needs to
- // preserve symbols, including assembler local symbols, to identify
- // those atoms. These sections are:
- // Sections of type:
- //
- // S_CSTRING_LITERALS (e.g. __cstring)
- // S_LITERAL_POINTERS (e.g. objc selector pointers)
- // S_16BYTE_LITERALS, S_8BYTE_LITERALS, S_4BYTE_LITERALS
- //
- // Sections named:
- //
- // __TEXT,__eh_frame
- // __TEXT,__ustring
- // __DATA,__cfstring
- // __DATA,__objc_classrefs
- // __DATA,__objc_catlist
- //
- // FIXME: It would be better if the compiler used actual linker local
- // symbols for each of these sections rather than preserving what
- // are ostensibly assembler local symbols.
- const MCSectionMachO &SMO = static_cast<const MCSectionMachO &>(Section);
- return (SMO.getType() == MachO::S_CSTRING_LITERALS ||
- SMO.getType() == MachO::S_4BYTE_LITERALS ||
- SMO.getType() == MachO::S_8BYTE_LITERALS ||
- SMO.getType() == MachO::S_16BYTE_LITERALS ||
- SMO.getType() == MachO::S_LITERAL_POINTERS ||
- (SMO.getSegmentName() == "__TEXT" &&
- (SMO.getSectionName() == "__eh_frame" ||
- SMO.getSectionName() == "__ustring")) ||
- (SMO.getSegmentName() == "__DATA" &&
- (SMO.getSectionName() == "__cfstring" ||
- SMO.getSectionName() == "__objc_classrefs" ||
- SMO.getSectionName() == "__objc_catlist")));
- }
-
/// \brief Generate the compact unwind encoding from the CFI directives.
uint32_t generateCompactUnwindEncoding(
ArrayRef<MCCFIInstruction> Instrs) const override {
@@ -534,8 +499,8 @@ void ELFAArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
// store fixups in .eh_frame section in big endian order
if (!IsLittleEndian && Fixup.getKind() == FK_Data_4) {
const MCSection *Sec = Fixup.getValue()->FindAssociatedSection();
- const MCSectionELF *SecELF = static_cast<const MCSectionELF *>(Sec);
- if (SecELF->getSectionName() == ".eh_frame")
+ const MCSectionELF *SecELF = dyn_cast_or_null<const MCSectionELF>(Sec);
+ if (SecELF && SecELF->getSectionName() == ".eh_frame")
Value = ByteSwap_32(unsigned(Value));
}
AArch64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel);
@@ -551,7 +516,8 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
return new DarwinAArch64AsmBackend(T, MRI);
assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target");
- return new ELFAArch64AsmBackend(T, TheTriple.getOS(), /*IsLittleEndian=*/true);
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+ return new ELFAArch64AsmBackend(T, OSABI, /*IsLittleEndian=*/true);
}
MCAsmBackend *llvm::createAArch64beAsmBackend(const Target &T,
@@ -561,6 +527,7 @@ MCAsmBackend *llvm::createAArch64beAsmBackend(const Target &T,
assert(TheTriple.isOSBinFormatELF() &&
"Big endian is only supported for ELF targets!");
- return new ELFAArch64AsmBackend(T, TheTriple.getOS(),
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+ return new ELFAArch64AsmBackend(T, OSABI,
/*IsLittleEndian=*/false);
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index e05191eaf3e0..5ea49c3b1a79 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -78,7 +78,7 @@ unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target,
if (SymLoc == AArch64MCExpr::VK_GOTTPREL && !IsNC)
return ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21;
if (SymLoc == AArch64MCExpr::VK_TLSDESC && !IsNC)
- return ELF::R_AARCH64_TLSDESC_ADR_PAGE;
+ return ELF::R_AARCH64_TLSDESC_ADR_PAGE21;
llvm_unreachable("invalid symbol kind for ADRP relocation");
case AArch64::fixup_aarch64_pcrel_branch26:
return ELF::R_AARCH64_JUMP26;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index a79406d9d1fe..8dc6c305b981 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -15,8 +15,10 @@
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
@@ -34,12 +36,42 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
namespace {
+class AArch64ELFStreamer;
+
+class AArch64TargetAsmStreamer : public AArch64TargetStreamer {
+ formatted_raw_ostream &OS;
+
+ void emitInst(uint32_t Inst) override;
+
+public:
+ AArch64TargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
+};
+
+AArch64TargetAsmStreamer::AArch64TargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS)
+ : AArch64TargetStreamer(S), OS(OS) {}
+
+void AArch64TargetAsmStreamer::emitInst(uint32_t Inst) {
+ OS << "\t.inst\t0x" << utohexstr(Inst) << "\n";
+}
+
+class AArch64TargetELFStreamer : public AArch64TargetStreamer {
+private:
+ AArch64ELFStreamer &getStreamer();
+
+ void emitInst(uint32_t Inst) override;
+
+public:
+ AArch64TargetELFStreamer(MCStreamer &S) : AArch64TargetStreamer(S) {}
+};
+
/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
/// the appropriate points in the object files. These symbols are defined in the
/// AArch64 ELF ABI:
@@ -55,6 +87,8 @@ namespace {
/// by MachO. Beware!
class AArch64ELFStreamer : public MCELFStreamer {
public:
+ friend class AArch64TargetELFStreamer;
+
AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
MCCodeEmitter *Emitter)
: MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0),
@@ -82,6 +116,18 @@ public:
MCELFStreamer::EmitInstruction(Inst, STI);
}
+ void emitInst(uint32_t Inst) {
+ char Buffer[4];
+ const bool LittleEndian = getContext().getAsmInfo()->isLittleEndian();
+
+ EmitA64MappingSymbol();
+ for (unsigned II = 0; II != 4; ++II) {
+ const unsigned I = LittleEndian ? (4 - II - 1) : II;
+ Buffer[4 - II - 1] = uint8_t(Inst >> I * CHAR_BIT);
+ }
+ MCELFStreamer::EmitBytes(StringRef(Buffer, 4));
+ }
+
/// This is one of the functions used to emit data into an ELF section, so the
/// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
/// if necessary.
@@ -131,7 +177,9 @@ private:
MCELF::SetType(SD, ELF::STT_NOTYPE);
MCELF::SetBinding(SD, ELF::STB_LOCAL);
SD.setExternal(false);
- Symbol->setSection(*getCurrentSection().first);
+ auto Sec = getCurrentSection().first;
+ assert(Sec && "need a section");
+ Symbol->setSection(*Sec);
const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
Symbol->setVariableValue(Value);
@@ -144,17 +192,35 @@ private:
/// @}
};
+} // end anonymous namespace
+
+AArch64ELFStreamer &AArch64TargetELFStreamer::getStreamer() {
+ return static_cast<AArch64ELFStreamer &>(Streamer);
+}
+
+void AArch64TargetELFStreamer::emitInst(uint32_t Inst) {
+ getStreamer().emitInst(Inst);
}
namespace llvm {
+MCStreamer *
+createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useDwarfDirectory,
+ MCInstPrinter *InstPrint, MCCodeEmitter *CE,
+ MCAsmBackend *TAB, bool ShowInst) {
+ MCStreamer *S = llvm::createAsmStreamer(
+ Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
+ new AArch64TargetAsmStreamer(*S, OS);
+ return S;
+}
+
MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll, bool NoExecStack) {
+ bool RelaxAll) {
AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
+ new AArch64TargetELFStreamer(*S);
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
- if (NoExecStack)
- S->getAssembler().setNoExecStack(true);
return S;
}
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
index bc6973bd5f8b..71b05ccdc6a8 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_AARCH64_ELF_STREAMER_H
-#define LLVM_AARCH64_ELF_STREAMER_H
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ELFSTREAMER_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ELFSTREAMER_H
#include "llvm/MC/MCELFStreamer.h"
@@ -20,7 +20,7 @@ namespace llvm {
MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll, bool NoExecStack);
+ bool RelaxAll);
}
-#endif // AArch64_ELF_STREAMER_H
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
index bf405fbac77b..0f5b765c7697 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_AArch64FIXUPKINDS_H
-#define LLVM_AArch64FIXUPKINDS_H
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64FIXUPKINDS_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64FIXUPKINDS_H
#include "llvm/MC/MCFixup.h"
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 1763b40e2d48..f048474fd4a9 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -13,8 +13,8 @@
#include "AArch64MCAsmInfo.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -37,6 +37,7 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() {
AssemblerDialect = AsmWriterVariant == Default ? 1 : AsmWriterVariant;
PrivateGlobalPrefix = "L";
+ PrivateLabelPrefix = "L";
SeparatorString = "%%";
CommentString = ";";
PointerSize = CalleeSaveStackSlotSize = 8;
@@ -66,7 +67,7 @@ const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol(
AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(StringRef TT) {
Triple T(TT);
- if (T.getArch() == Triple::arm64_be || T.getArch() == Triple::aarch64_be)
+ if (T.getArch() == Triple::aarch64_be)
IsLittleEndian = false;
// We prefer NEON instructions to be printed in the short form.
@@ -79,6 +80,7 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(StringRef TT) {
CommentString = "//";
PrivateGlobalPrefix = ".L";
+ PrivateLabelPrefix = ".L";
Code32Directive = ".code\t32";
Data16bitsDirective = "\t.hword\t";
@@ -89,7 +91,6 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(StringRef TT) {
WeakRefDirective = "\t.weak\t";
- HasLEB128 = true;
SupportsDebugInformation = true;
// Exceptions handling
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index 42a031d7c2ca..5d03c213c782 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64TARGETASMINFO_H
-#define AArch64TARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H
#include "llvm/MC/MCAsmInfoDarwin.h"
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index f0513575edb4..4756a1924198 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -15,13 +15,13 @@
#include "MCTargetDesc/AArch64FixupKinds.h"
#include "MCTargetDesc/AArch64MCExpr.h"
#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -437,8 +437,7 @@ AArch64MCCodeEmitter::getVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
return 3;
}
- assert(false && "Invalid value for vector shift amount!");
- return 0;
+ llvm_unreachable("Invalid value for vector shift amount!");
}
uint32_t
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index 42a6787da48e..e396df82197b 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -90,8 +90,9 @@ const MCSection *AArch64MCExpr::FindAssociatedSection() const {
}
bool AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const {
- if (!getSubExpr()->EvaluateAsRelocatable(Res, Layout))
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const {
+ if (!getSubExpr()->EvaluateAsRelocatable(Res, Layout, Fixup))
return false;
Res =
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index 5422f9d7067e..db48ac9f5692 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_AArch64MCEXPR_H
-#define LLVM_AArch64MCEXPR_H
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCEXPR_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCEXPR_H
#include "llvm/MC/MCExpr.h"
#include "llvm/Support/ErrorHandling.h"
@@ -152,7 +152,8 @@ public:
const MCSection *FindAssociatedSection() const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const override;
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override;
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index ae698c59f6ce..0f7a6b81b41d 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -126,15 +126,14 @@ static MCInstPrinter *createAArch64MCInstPrinter(const Target &T,
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll,
- bool NoExecStack) {
+ const MCSubtargetInfo &STI, bool RelaxAll) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin())
return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
/*LabelSections*/ true);
- return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
+ return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
}
// Force static initialization.
@@ -142,17 +141,14 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
// Register the MC asm info.
RegisterMCAsmInfoFn X(TheAArch64leTarget, createAArch64MCAsmInfo);
RegisterMCAsmInfoFn Y(TheAArch64beTarget, createAArch64MCAsmInfo);
- RegisterMCAsmInfoFn Z(TheARM64leTarget, createAArch64MCAsmInfo);
- RegisterMCAsmInfoFn W(TheARM64beTarget, createAArch64MCAsmInfo);
+ RegisterMCAsmInfoFn Z(TheARM64Target, createAArch64MCAsmInfo);
// Register the MC codegen info.
TargetRegistry::RegisterMCCodeGenInfo(TheAArch64leTarget,
createAArch64MCCodeGenInfo);
TargetRegistry::RegisterMCCodeGenInfo(TheAArch64beTarget,
createAArch64MCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheARM64leTarget,
- createAArch64MCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheARM64beTarget,
+ TargetRegistry::RegisterMCCodeGenInfo(TheARM64Target,
createAArch64MCCodeGenInfo);
// Register the MC instruction info.
@@ -160,9 +156,7 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
createAArch64MCInstrInfo);
TargetRegistry::RegisterMCInstrInfo(TheAArch64beTarget,
createAArch64MCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheARM64leTarget,
- createAArch64MCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheARM64beTarget,
+ TargetRegistry::RegisterMCInstrInfo(TheARM64Target,
createAArch64MCInstrInfo);
// Register the MC register info.
@@ -170,9 +164,7 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
createAArch64MCRegisterInfo);
TargetRegistry::RegisterMCRegInfo(TheAArch64beTarget,
createAArch64MCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheARM64leTarget,
- createAArch64MCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheARM64beTarget,
+ TargetRegistry::RegisterMCRegInfo(TheARM64Target,
createAArch64MCRegisterInfo);
// Register the MC subtarget info.
@@ -180,9 +172,7 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
createAArch64MCSubtargetInfo);
TargetRegistry::RegisterMCSubtargetInfo(TheAArch64beTarget,
createAArch64MCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheARM64leTarget,
- createAArch64MCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheARM64beTarget,
+ TargetRegistry::RegisterMCSubtargetInfo(TheARM64Target,
createAArch64MCSubtargetInfo);
// Register the asm backend.
@@ -190,19 +180,15 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
createAArch64leAsmBackend);
TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget,
createAArch64beAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheARM64leTarget,
+ TargetRegistry::RegisterMCAsmBackend(TheARM64Target,
createAArch64leAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheARM64beTarget,
- createAArch64beAsmBackend);
// Register the MC Code Emitter
TargetRegistry::RegisterMCCodeEmitter(TheAArch64leTarget,
createAArch64MCCodeEmitter);
TargetRegistry::RegisterMCCodeEmitter(TheAArch64beTarget,
createAArch64MCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheARM64leTarget,
- createAArch64MCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheARM64beTarget,
+ TargetRegistry::RegisterMCCodeEmitter(TheARM64Target,
createAArch64MCCodeEmitter);
// Register the object streamer.
@@ -210,16 +196,21 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
createMCStreamer);
TargetRegistry::RegisterMCObjectStreamer(TheAArch64beTarget,
createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheARM64leTarget, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheARM64beTarget, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheARM64Target, createMCStreamer);
+
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmStreamer(TheAArch64leTarget,
+ createAArch64MCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(TheAArch64beTarget,
+ createAArch64MCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(TheARM64Target,
+ createAArch64MCAsmStreamer);
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(TheAArch64leTarget,
createAArch64MCInstPrinter);
TargetRegistry::RegisterMCInstPrinter(TheAArch64beTarget,
createAArch64MCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheARM64leTarget,
- createAArch64MCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheARM64beTarget,
+ TargetRegistry::RegisterMCInstPrinter(TheARM64Target,
createAArch64MCInstPrinter);
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index d886ea23c13e..1553115781c3 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -11,19 +11,22 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64MCTARGETDESC_H
-#define AArch64MCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCTARGETDESC_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCTARGETDESC_H
#include "llvm/Support/DataTypes.h"
#include <string>
namespace llvm {
+class formatted_raw_ostream;
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
+class MCInstPrinter;
class MCRegisterInfo;
class MCObjectWriter;
+class MCStreamer;
class MCSubtargetInfo;
class StringRef;
class Target;
@@ -31,8 +34,7 @@ class raw_ostream;
extern Target TheAArch64leTarget;
extern Target TheAArch64beTarget;
-extern Target TheARM64leTarget;
-extern Target TheARM64beTarget;
+extern Target TheARM64Target;
MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
@@ -51,6 +53,11 @@ MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
MCObjectWriter *createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType,
uint32_t CPUSubtype);
+MCStreamer *
+createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useDwarfDirectory,
+ MCInstPrinter *InstPrint, MCCodeEmitter *CE,
+ MCAsmBackend *TAB, bool ShowInst);
} // End llvm namespace
// Defines symbolic names for AArch64 registers. This defines a mapping from
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index ba9536676e12..f6fab5d6b6fe 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -9,15 +9,16 @@
#include "MCTargetDesc/AArch64FixupKinds.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
-#include "llvm/MC/MCAssembler.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachO.h"
using namespace llvm;
@@ -33,7 +34,7 @@ public:
: MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype,
/*UseAggressiveSymbolFolding=*/true) {}
- void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
+ void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
const MCAsmLayout &Layout, const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) override;
@@ -112,8 +113,25 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
}
}
+static bool canUseLocalRelocation(const MCSectionMachO &Section,
+ const MCSymbol &Symbol, unsigned Log2Size) {
+ // Debug info sections can use local relocations.
+ if (Section.hasAttribute(MachO::S_ATTR_DEBUG))
+ return true;
+
+ // Otherwise, only pointer sized relocations are supported.
+ if (Log2Size != 3)
+ return false;
+
+ // But only if they don't point to a cstring.
+ if (!Symbol.isInSection())
+ return true;
+ const MCSectionMachO &RefSec = cast<MCSectionMachO>(Symbol.getSection());
+ return RefSec.getType() != MachO::S_CSTRING_LITERALS;
+}
+
void AArch64MachObjectWriter::RecordRelocation(
- MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
+ MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
@@ -123,9 +141,9 @@ void AArch64MachObjectWriter::RecordRelocation(
unsigned Log2Size = 0;
int64_t Value = 0;
unsigned Index = 0;
- unsigned IsExtern = 0;
unsigned Type = 0;
unsigned Kind = Fixup.getKind();
+ const MCSymbolData *RelSymbol = nullptr;
FixupOffset += Fixup.getOffset();
@@ -171,10 +189,8 @@ void AArch64MachObjectWriter::RecordRelocation(
// FIXME: Should this always be extern?
// SymbolNum of 0 indicates the absolute section.
Type = MachO::ARM64_RELOC_UNSIGNED;
- Index = 0;
if (IsPCRel) {
- IsExtern = 1;
Asm.getContext().FatalError(Fixup.getLoc(),
"PC relative absolute relocation!");
@@ -198,15 +214,12 @@ void AArch64MachObjectWriter::RecordRelocation(
Layout.getSymbolOffset(&B_SD) ==
Layout.getFragmentOffset(Fragment) + Fixup.getOffset()) {
// SymB is the PC, so use a PC-rel pointer-to-GOT relocation.
- Index = A_Base->getIndex();
- IsExtern = 1;
Type = MachO::ARM64_RELOC_POINTER_TO_GOT;
IsPCRel = 1;
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
- MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
- (IsExtern << 27) | (Type << 28));
- Writer->addRelocation(Fragment->getParent(), MRE);
+ MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+ Writer->addRelocation(A_Base, Fragment->getParent(), MRE);
return;
} else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
@@ -252,26 +265,31 @@ void AArch64MachObjectWriter::RecordRelocation(
? 0
: Writer->getSymbolAddress(B_Base, Layout));
- Index = A_Base->getIndex();
- IsExtern = 1;
Type = MachO::ARM64_RELOC_UNSIGNED;
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
- MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
- (IsExtern << 27) | (Type << 28));
- Writer->addRelocation(Fragment->getParent(), MRE);
+ MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+ Writer->addRelocation(A_Base, Fragment->getParent(), MRE);
- Index = B_Base->getIndex();
- IsExtern = 1;
+ RelSymbol = B_Base;
Type = MachO::ARM64_RELOC_SUBTRACTOR;
} else { // A + constant
const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
- const MCSymbolData &SD = Asm.getSymbolData(*Symbol);
- const MCSymbolData *Base = Asm.getAtom(&SD);
const MCSectionMachO &Section = static_cast<const MCSectionMachO &>(
Fragment->getParent()->getSection());
+ bool CanUseLocalRelocation =
+ canUseLocalRelocation(Section, *Symbol, Log2Size);
+ if (Symbol->isTemporary() && (Value || !CanUseLocalRelocation)) {
+ const MCSection &Sec = Symbol->getSection();
+ if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec))
+ Asm.addLocalUsedInReloc(*Symbol);
+ }
+
+ const MCSymbolData &SD = Asm.getSymbolData(*Symbol);
+ const MCSymbolData *Base = Asm.getAtom(&SD);
+
// If the symbol is a variable and we weren't able to get a Base for it
// (i.e., it's not in the symbol table associated with a section) resolve
// the relocation based its expansion instead.
@@ -288,7 +306,8 @@ void AArch64MachObjectWriter::RecordRelocation(
// FIXME: Will the Target we already have ever have any data in it
// we need to preserve and merge with the new Target? How about
// the FixedValue?
- if (!Symbol->getVariableValue()->EvaluateAsRelocatable(Target, &Layout))
+ if (!Symbol->getVariableValue()->EvaluateAsRelocatable(Target, &Layout,
+ &Fixup))
Asm.getContext().FatalError(Fixup.getLoc(),
"unable to resolve variable '" +
Symbol->getName() + "'");
@@ -309,16 +328,13 @@ void AArch64MachObjectWriter::RecordRelocation(
// sections, and for pointer-sized relocations (.quad), we allow section
// relocations. It's code sections that run into trouble.
if (Base) {
- Index = Base->getIndex();
- IsExtern = 1;
+ RelSymbol = Base;
// Add the local offset, if needed.
if (Base != &SD)
Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
} else if (Symbol->isInSection()) {
- // Pointer-sized relocations can use a local relocation. Otherwise,
- // we have to be in a debug info section.
- if (!Section.hasAttribute(MachO::S_ATTR_DEBUG) && Log2Size != 3)
+ if (!CanUseLocalRelocation)
Asm.getContext().FatalError(
Fixup.getLoc(),
"unsupported relocation of local symbol '" + Symbol->getName() +
@@ -328,7 +344,6 @@ void AArch64MachObjectWriter::RecordRelocation(
const MCSectionData &SymSD =
Asm.getSectionData(SD.getSymbol().getSection());
Index = SymSD.getOrdinal() + 1;
- IsExtern = 0;
Value += Writer->getSymbolAddress(&SD, Layout);
if (IsPCRel)
@@ -361,16 +376,16 @@ void AArch64MachObjectWriter::RecordRelocation(
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
- MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
- (IsExtern << 27) | (Type << 28));
- Writer->addRelocation(Fragment->getParent(), MRE);
+ MRE.r_word1 =
+ (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+ Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
// Now set up the Addend relocation.
Type = MachO::ARM64_RELOC_ADDEND;
Index = Value;
+ RelSymbol = nullptr;
IsPCRel = 0;
Log2Size = 2;
- IsExtern = 0;
// Put zero into the instruction itself. The addend is in the relocation.
Value = 0;
@@ -382,9 +397,9 @@ void AArch64MachObjectWriter::RecordRelocation(
// struct relocation_info (8 bytes)
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
- MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
- (IsExtern << 27) | (Type << 28));
- Writer->addRelocation(Fragment->getParent(), MRE);
+ MRE.r_word1 =
+ (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+ Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
}
MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index dcc1a3c9f05e..e3112fa76d3b 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -39,3 +39,5 @@ void AArch64TargetStreamer::emitCurrentConstantPool() {
// finish() - write out any non-empty assembler constant pools.
void AArch64TargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
+
+void AArch64TargetStreamer::emitInst(uint32_t Inst) {}
diff --git a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
index 3a382c165e7c..f42ecb1677de 100644
--- a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
+++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@@ -14,18 +14,19 @@ using namespace llvm;
namespace llvm {
Target TheAArch64leTarget;
Target TheAArch64beTarget;
-Target TheARM64leTarget;
-Target TheARM64beTarget;
+Target TheARM64Target;
} // end namespace llvm
extern "C" void LLVMInitializeAArch64TargetInfo() {
- RegisterTarget<Triple::arm64, /*HasJIT=*/true> X(TheARM64leTarget, "arm64",
- "AArch64 (little endian)");
- RegisterTarget<Triple::arm64_be, /*HasJIT=*/true> Y(TheARM64beTarget, "arm64_be",
- "AArch64 (big endian)");
+ // Now register the "arm64" name for use with "-march". We don't want it to
+ // take possession of the Triple::aarch64 tag though.
+ TargetRegistry::RegisterTarget(TheARM64Target, "arm64",
+ "ARM64 (little endian)",
+ [](Triple::ArchType) { return false; }, true);
RegisterTarget<Triple::aarch64, /*HasJIT=*/true> Z(
TheAArch64leTarget, "aarch64", "AArch64 (little endian)");
RegisterTarget<Triple::aarch64_be, /*HasJIT=*/true> W(
TheAArch64beTarget, "aarch64_be", "AArch64 (big endian)");
+
}
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index 3c24bb30a26d..bc6c7a96f85e 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -791,22 +791,22 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
}
}
- // Try to parse an S<op0>_<op1>_<Cn>_<Cm>_<op2> register name, where the bits
- // are: 11 xxx 1x11 xxxx xxx
- Regex GenericRegPattern("^s3_([0-7])_c(1[15])_c([0-9]|1[0-5])_([0-7])$");
+ // Try to parse an S<op0>_<op1>_<Cn>_<Cm>_<op2> register name
+ Regex GenericRegPattern("^s([0-3])_([0-7])_c([0-9]|1[0-5])_c([0-9]|1[0-5])_([0-7])$");
- SmallVector<StringRef, 4> Ops;
+ SmallVector<StringRef, 5> Ops;
if (!GenericRegPattern.match(NameLower, &Ops)) {
Valid = false;
return -1;
}
- uint32_t Op0 = 3, Op1 = 0, CRn = 0, CRm = 0, Op2 = 0;
+ uint32_t Op0 = 0, Op1 = 0, CRn = 0, CRm = 0, Op2 = 0;
uint32_t Bits;
- Ops[1].getAsInteger(10, Op1);
- Ops[2].getAsInteger(10, CRn);
- Ops[3].getAsInteger(10, CRm);
- Ops[4].getAsInteger(10, Op2);
+ Ops[1].getAsInteger(10, Op0);
+ Ops[2].getAsInteger(10, Op1);
+ Ops[3].getAsInteger(10, CRn);
+ Ops[4].getAsInteger(10, CRm);
+ Ops[5].getAsInteger(10, Op2);
Bits = (Op0 << 14) | (Op1 << 11) | (CRn << 7) | (CRm << 3) | Op2;
Valid = true;
@@ -814,11 +814,10 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
}
std::string
-AArch64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
+AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const {
// First search the registers shared by all
for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
if (SysRegPairs[i].Value == Bits) {
- Valid = true;
return SysRegPairs[i].Name;
}
}
@@ -827,7 +826,6 @@ AArch64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
if (FeatureBits & AArch64::ProcCyclone) {
for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) {
if (CycloneSysRegPairs[i].Value == Bits) {
- Valid = true;
return CycloneSysRegPairs[i].Name;
}
}
@@ -837,28 +835,18 @@ AArch64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
// write-only).
for (unsigned i = 0; i < NumInstPairs; ++i) {
if (InstPairs[i].Value == Bits) {
- Valid = true;
return InstPairs[i].Name;
}
}
+ assert(Bits < 0x10000);
uint32_t Op0 = (Bits >> 14) & 0x3;
uint32_t Op1 = (Bits >> 11) & 0x7;
uint32_t CRn = (Bits >> 7) & 0xf;
uint32_t CRm = (Bits >> 3) & 0xf;
uint32_t Op2 = Bits & 0x7;
- // Only combinations matching: 11 xxx 1x11 xxxx xxx are valid for a generic
- // name.
- if (Op0 != 3 || (CRn != 11 && CRn != 15)) {
- Valid = false;
- return "";
- }
-
- assert(Op0 == 3 && (CRn == 11 || CRn == 15) && "Invalid generic sysreg");
-
- Valid = true;
- return "s3_" + utostr(Op1) + "_c" + utostr(CRn)
+ return "s" + utostr(Op0)+ "_" + utostr(Op1) + "_c" + utostr(CRn)
+ "_c" + utostr(CRm) + "_" + utostr(Op2);
}
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 9d2ce21c9626..c60b09a5f119 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -14,8 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64BASEINFO_H
-#define AArch64BASEINFO_H
+#ifndef LLVM_LIB_TARGET_AARCH64_UTILS_AARCH64BASEINFO_H
+#define LLVM_LIB_TARGET_AARCH64_UTILS_AARCH64BASEINFO_H
// FIXME: Is it easiest to fix this layering violation by moving the .inc
// #includes from AArch64MCTargetDesc.h to here?
@@ -1143,7 +1143,7 @@ namespace AArch64SysReg {
SysRegMapper(uint64_t FeatureBits) : FeatureBits(FeatureBits) { }
uint32_t fromString(StringRef Name, bool &Valid) const;
- std::string toString(uint32_t Bits, bool &Valid) const;
+ std::string toString(uint32_t Bits) const;
};
struct MSRMapper : SysRegMapper {
@@ -1271,7 +1271,12 @@ namespace AArch64II {
/// thread-local symbol. On Darwin, only one type of thread-local access
/// exists (pre linker-relaxation), but on ELF the TLSModel used for the
/// referee will affect interpretation.
- MO_TLS = 0x20
+ MO_TLS = 0x20,
+
+ /// MO_CONSTPOOL - This flag indicates that a symbol operand represents
+ /// the address of a constant pool entry for the symbol, rather than the
+ /// address of the symbol itself.
+ MO_CONSTPOOL = 0x40
};
} // end namespace AArch64II
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index 92eaf9e1c9b3..387f1f64e836 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -34,6 +34,8 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <map>
#include <set>
using namespace llvm;
@@ -676,8 +678,8 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
}
bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
- TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
- TRI = Fn.getTarget().getRegisterInfo();
+ TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
+ TRI = Fn.getSubtarget().getRegisterInfo();
MRI = &Fn.getRegInfo();
bool Modified = false;
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 55df29c1499a..02db53a27455 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef TARGET_ARM_H
-#define TARGET_ARM_H
+#ifndef LLVM_LIB_TARGET_ARM_ARM_H
+#define LLVM_LIB_TARGET_ARM_ARM_H
#include "llvm/Support/CodeGen.h"
@@ -23,7 +23,6 @@ class ARMAsmPrinter;
class ARMBaseTargetMachine;
class FunctionPass;
class ImmutablePass;
-class JITCodeEmitter;
class MachineInstr;
class MCInst;
class TargetLowering;
@@ -31,10 +30,6 @@ class TargetMachine;
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
-
-FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
- JITCodeEmitter &JCE);
-
FunctionPass *createA15SDOptimizerPass();
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 7916ccc180c8..244014b5c29f 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -228,6 +228,15 @@ def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
FeatureAvoidPartialCPSR,
FeatureTrustZone, FeatureVirtualization]>;
+def ProcA17 : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17",
+ "Cortex-A17 ARM processors",
+ [FeatureVMLxForwarding,
+ FeatureT2XtPk, FeatureVFP4,
+ FeatureHWDiv, FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureVirtualization,
+ FeatureTrustZone]>;
+
def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
"Cortex-A53 ARM processors",
[FeatureHWDiv, FeatureHWDivARM,
@@ -261,13 +270,6 @@ def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
FeatureHWDivARM]>;
-def FeatureAPCS : SubtargetFeature<"apcs", "TargetABI", "ARM_ABI_APCS",
- "Use the APCS ABI">;
-
-def FeatureAAPCS : SubtargetFeature<"aapcs", "TargetABI", "ARM_ABI_AAPCS",
- "Use the AAPCS ABI">;
-
-
class ProcNoItin<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;
@@ -347,12 +349,8 @@ def : ProcessorModel<"cortex-a8", CortexA8Model,
FeatureAClass]>;
def : ProcessorModel<"cortex-a9", CortexA9Model,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS,
+ FeatureDSPThumb2, FeatureHasRAS, FeatureMP,
FeatureAClass]>;
-def : ProcessorModel<"cortex-a9-mp", CortexA9Model,
- [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureMP,
- FeatureHasRAS, FeatureAClass]>;
// FIXME: A12 has currently the same Schedule model as A9
def : ProcessorModel<"cortex-a12", CortexA9Model,
@@ -366,6 +364,12 @@ def : ProcessorModel<"cortex-a15", CortexA9Model,
FeatureDSPThumb2, FeatureHasRAS,
FeatureAClass]>;
+// FIXME: A17 has currently the same Schedule model as A9
+def : ProcessorModel<"cortex-a17", CortexA9Model,
+ [ProcA17, HasV7Ops, FeatureNEON, FeatureDB,
+ FeatureDSPThumb2, FeatureMP,
+ FeatureHasRAS, FeatureAClass]>;
+
// FIXME: krait has currently the same Schedule model as A9
def : ProcessorModel<"krait", CortexA9Model,
[ProcKrait, HasV7Ops,
@@ -392,6 +396,12 @@ def : ProcNoItin<"cortex-m4", [HasV7Ops,
FeatureT2XtPk, FeatureVFP4,
FeatureVFPOnlySP, FeatureD16,
FeatureMClass]>;
+def : ProcNoItin<"cortex-m7", [HasV7Ops,
+ FeatureThumb2, FeatureNoARM, FeatureDB,
+ FeatureHWDiv, FeatureDSPThumb2,
+ FeatureT2XtPk, FeatureFPARMv8,
+ FeatureD16, FeatureMClass]>;
+
// Swift uArch Processors.
def : ProcessorModel<"swift", SwiftModel,
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 28d2610c3903..b17d4aa19f70 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -76,7 +76,8 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
}
void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
- uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
+ uint64_t Size =
+ TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(CV->getType());
assert(Size && "C++ constructor pointer had zero size!");
const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
@@ -119,6 +120,23 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Emit the rest of the function body.
EmitFunctionBody();
+ // If we need V4T thumb mode Register Indirect Jump pads, emit them.
+ // These are created per function, rather than per TU, since it's
+ // relatively easy to exceed the thumb branch range within a TU.
+ if (! ThumbIndirectPads.empty()) {
+ OutStreamer.EmitAssemblerFlag(MCAF_Code16);
+ EmitAlignment(1);
+ for (unsigned i = 0, e = ThumbIndirectPads.size(); i < e; i++) {
+ OutStreamer.EmitLabel(ThumbIndirectPads[i].second);
+ EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBX)
+ .addReg(ThumbIndirectPads[i].first)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ }
+ ThumbIndirectPads.clear();
+ }
+
// We didn't modify anything.
return false;
}
@@ -136,7 +154,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
assert(!MO.getSubReg() && "Subregs should be eliminated!");
if(ARM::GPRPairRegClass.contains(Reg)) {
const MachineFunction &MF = *MI->getParent()->getParent();
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
Reg = TRI->getSubReg(Reg, ARM::gsub_0);
}
O << ARMInstPrinter::getRegisterName(Reg);
@@ -182,7 +200,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
MCSymbol *ARMAsmPrinter::
GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
SmallString<60> Name;
raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI"
<< getFunctionNumber() << '_' << uid << '_' << uid2;
@@ -191,7 +209,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
SmallString<60> Name;
raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH"
<< getFunctionNumber();
@@ -229,7 +247,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
case 'y': // Print a VFP single precision register as indexed double.
if (MI->getOperand(OpNum).isReg()) {
unsigned Reg = MI->getOperand(OpNum).getReg();
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
// Find the 'd' register that has this 's' register as a sub-register,
// and determine the lane number.
for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) {
@@ -261,7 +279,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
// inline asm statement.
O << "{";
if (ARM::GPRPairRegClass.contains(RegBegin)) {
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
unsigned Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0);
O << ARMInstPrinter::getRegisterName(Reg0) << ", ";
RegBegin = TRI->getSubReg(RegBegin, ARM::gsub_1);
@@ -317,7 +335,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
const MachineOperand &MO = MI->getOperand(OpNum);
if (!MO.isReg())
return true;
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
unsigned Reg = TRI->getSubReg(MO.getReg(), ExtraCode[0] == 'Q' ?
ARM::gsub_0 : ARM::gsub_1);
O << ARMInstPrinter::getRegisterName(Reg);
@@ -343,7 +361,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned Reg = MI->getOperand(OpNum).getReg();
if (!ARM::QPRRegClass.contains(Reg))
return true;
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ?
ARM::dsub_0 : ARM::dsub_1);
O << ARMInstPrinter::getRegisterName(SubReg);
@@ -358,7 +376,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
if (!MO.isReg())
return true;
const MachineFunction &MF = *MI->getParent()->getParent();
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
unsigned Reg = MO.getReg();
if(!ARM::GPRPairRegClass.contains(Reg))
return false;
@@ -478,6 +496,9 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
// Emit ARM Build Attributes
if (Subtarget->isTargetELF())
emitAttributes();
+
+ if (!M.getModuleInlineAsm().empty() && Subtarget->isThumb())
+ OutStreamer.EmitAssemblerFlag(MCAF_Code16);
}
static void
@@ -558,7 +579,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
if (!Stubs.empty()) {
OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
for (auto &stub: Stubs) {
OutStreamer.EmitLabel(stub.first);
@@ -608,6 +629,8 @@ void ARMAsmPrinter::emitAttributes() {
MCTargetStreamer &TS = *OutStreamer.getTargetStreamer();
ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
+ ATS.emitTextAttribute(ARMBuildAttrs::conformance, "2.09");
+
ATS.switchVendor("aeabi");
std::string CPUString = Subtarget->getCPUString();
@@ -663,7 +686,9 @@ void ARMAsmPrinter::emitAttributes() {
ARMBuildAttrs::AllowNeonARMv8);
} else {
if (Subtarget->hasFPARMv8())
- ATS.emitFPU(ARM::FP_ARMV8);
+ // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
+ // FPU, but there are two different names for it depending on the CPU.
+ ATS.emitFPU(Subtarget->hasD16() ? ARM::FPV5_D16 : ARM::FP_ARMV8);
else if (Subtarget->hasVFP4())
ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV4_D16 : ARM::VFPV4);
else if (Subtarget->hasVFP3())
@@ -688,11 +713,44 @@ void ARMAsmPrinter::emitAttributes() {
// Signal various FP modes.
if (!TM.Options.UnsafeFPMath) {
- ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::Allowed);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+ ARMBuildAttrs::IEEEDenormals);
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
ARMBuildAttrs::Allowed);
+
+ // If the user has permitted this code to choose the IEEE 754
+ // rounding at run-time, emit the rounding attribute.
+ if (TM.Options.HonorSignDependentRoundingFPMathOption)
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_rounding,
+ ARMBuildAttrs::Allowed);
+ } else {
+ if (!Subtarget->hasVFP2()) {
+ // When the target doesn't have an FPU (by design or
+ // intention), the assumptions made on the software support
+ // mirror that of the equivalent hardware support *if it
+ // existed*. For v7 and better we indicate that denormals are
+ // flushed preserving sign, and for V6 we indicate that
+ // denormals are flushed to positive zero.
+ if (Subtarget->hasV7Ops())
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+ ARMBuildAttrs::PreserveFPSign);
+ } else if (Subtarget->hasVFP3()) {
+ // In VFPv4, VFPv4U, VFPv3, or VFPv3U, it is preserved. That is,
+ // the sign bit of the zero matches the sign bit of the input or
+ // result that is being flushed to zero.
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+ ARMBuildAttrs::PreserveFPSign);
+ }
+ // For VFPv2 implementations it is implementation defined as
+ // to whether denormals are flushed to positive zero or to
+ // whatever the sign of zero is (ARM v7AR ARM 2.7.5). Historically
+ // LLVM has chosen to flush this to positive zero (most likely for
+ // GCC compatibility), so that's the chosen value here (the
+ // absence of its emission implies zero).
}
+ // TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath is the
+ // equivalent of GCC's -ffinite-math-only flag.
if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath)
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
ARMBuildAttrs::Allowed);
@@ -700,6 +758,13 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
ARMBuildAttrs::AllowIEE754);
+ if (Subtarget->allowsUnalignedMem())
+ ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
+ ARMBuildAttrs::Allowed);
+ else
+ ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
+ ARMBuildAttrs::Not_Allowed);
+
// FIXME: add more flags to ARMBuildAttributes.h
// 8-bytes alignment stuff.
ATS.emitAttribute(ARMBuildAttrs::ABI_align_needed, 1);
@@ -719,6 +784,13 @@ void ARMAsmPrinter::emitAttributes() {
if (Subtarget->hasFP16())
ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP);
+ // FIXME: To support emitting this build attribute as GCC does, the
+ // -mfp16-format option and associated plumbing must be
+ // supported. For now the __fp16 type is exposed by default, so this
+ // attribute should be emitted with value 1.
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_16bit_format,
+ ARMBuildAttrs::FP16FormatIEEE);
+
if (Subtarget->hasMPExtension())
ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
@@ -735,7 +807,7 @@ void ARMAsmPrinter::emitAttributes() {
if (const Module *SourceModule = MMI->getModule()) {
// ABI_PCS_wchar_t to indicate wchar_t width
// FIXME: There is no way to emit value 0 (wchar_t prohibited).
- if (auto WCharWidthValue = cast_or_null<ConstantInt>(
+ if (auto WCharWidthValue = mdconst::extract_or_null<ConstantInt>(
SourceModule->getModuleFlag("wchar_size"))) {
int WCharWidth = WCharWidthValue->getZExtValue();
assert((WCharWidth == 2 || WCharWidth == 4) &&
@@ -746,7 +818,7 @@ void ARMAsmPrinter::emitAttributes() {
// ABI_enum_size to indicate enum width
// FIXME: There is no way to emit value 0 (enums prohibited) or value 3
// (all enums contain a value needing 32 bits to encode).
- if (auto EnumWidthValue = cast_or_null<ConstantInt>(
+ if (auto EnumWidthValue = mdconst::extract_or_null<ConstantInt>(
SourceModule->getModuleFlag("min_enum_size"))) {
int EnumWidth = EnumWidthValue->getZExtValue();
assert((EnumWidth == 1 || EnumWidth == 4) &&
@@ -757,6 +829,17 @@ void ARMAsmPrinter::emitAttributes() {
}
}
+ // TODO: We currently only support either reserving the register, or treating
+ // it as another callee-saved register, but not as SB or a TLS pointer; It
+ // would instead be nicer to push this from the frontend as metadata, as we do
+ // for the wchar and enum size tags
+ if (Subtarget->isR9Reserved())
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
+ ARMBuildAttrs::R9Reserved);
+ else
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
+ ARMBuildAttrs::R9IsGPR);
+
if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization())
ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
ARMBuildAttrs::AllowTZVirtualization);
@@ -834,8 +917,9 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
void ARMAsmPrinter::
EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
- const DataLayout *DL = TM.getDataLayout();
- int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType());
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+ int Size =
+ TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(MCPV->getType());
ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
@@ -1013,7 +1097,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
MCTargetStreamer &TS = *OutStreamer.getTargetStreamer();
ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
const MachineFunction &MF = *MI->getParent()->getParent();
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -1151,7 +1235,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
#include "ARMGenMCPseudoLowering.inc"
void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
// If we just ended a constant pool, mark it as such.
if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) {
@@ -1226,18 +1310,34 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
case ARM::tBX_CALL: {
- EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tMOVr)
- .addReg(ARM::LR)
- .addReg(ARM::PC)
- // Add predicate operands.
- .addImm(ARMCC::AL)
- .addReg(0));
+ if (Subtarget->hasV5TOps())
+ llvm_unreachable("Expected BLX to be selected for v5t+");
+
+ // On ARM v4t, when doing a call from thumb mode, we need to ensure
+ // that the saved lr has its LSB set correctly (the arch doesn't
+ // have blx).
+ // So here we generate a bl to a small jump pad that does bx rN.
+ // The jump pads are emitted after the function body.
+
+ unsigned TReg = MI->getOperand(0).getReg();
+ MCSymbol *TRegSym = nullptr;
+ for (unsigned i = 0, e = ThumbIndirectPads.size(); i < e; i++) {
+ if (ThumbIndirectPads[i].first == TReg) {
+ TRegSym = ThumbIndirectPads[i].second;
+ break;
+ }
+ }
- EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBX)
- .addReg(MI->getOperand(0).getReg())
- // Add predicate operands.
- .addImm(ARMCC::AL)
- .addReg(0));
+ if (!TRegSym) {
+ TRegSym = OutContext.CreateTempSymbol();
+ ThumbIndirectPads.push_back(std::make_pair(TReg, TRegSym));
+ }
+
+ // Create a link-saving branch to the Reg Indirect Jump Pad.
+ EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBL)
+ // Predicate comes first here.
+ .addImm(ARMCC::AL).addReg(0)
+ .addExpr(MCSymbolRefExpr::Create(TRegSym, OutContext)));
return;
}
case ARM::BMOVPCRX_CALL: {
@@ -1567,6 +1667,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitJumpTable(MI);
return;
}
+ case ARM::SPACE:
+ OutStreamer.EmitZeros(MI->getOperand(1).getImm());
+ return;
case ARM::TRAP: {
// Non-Darwin binutils don't yet support the "trap" mnemonic.
// FIXME: Remove this special case when they do.
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 7c103c6763f2..a6214911a783 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMASMPRINTER_H
-#define ARMASMPRINTER_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMASMPRINTER_H
+#define LLVM_LIB_TARGET_ARM_ARMASMPRINTER_H
#include "ARMSubtarget.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -20,6 +20,7 @@ class ARMFunctionInfo;
class MCOperand;
class MachineConstantPool;
class MachineOperand;
+class MCSymbol;
namespace ARM {
enum DW_ISA {
@@ -45,6 +46,11 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
/// InConstantPool - Maintain state when emitting a sequence of constant
/// pool entries so we can properly mark them as data regions.
bool InConstantPool;
+
+ /// ThumbIndirectPads - These maintain a per-function list of jump pad
+ /// labels used for ARMv4t thumb code to make register indirect calls.
+ SmallVector<std::pair<unsigned, MCSymbol*>, 4> ThumbIndirectPads;
+
public:
explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: AsmPrinter(TM, Streamer), AFI(nullptr), MCP(nullptr),
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 0288db91dcbf..e0397cce962d 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -108,7 +108,7 @@ ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
const ScheduleDAG *DAG) const {
if (usePreRAHazardRecognizer()) {
const InstrItineraryData *II =
- &static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
+ static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
}
return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
@@ -518,6 +518,42 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
return Found;
}
+static bool isCPSRDefined(const MachineInstr *MI) {
+ for (const auto &MO : MI->operands())
+ if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef())
+ return true;
+ return false;
+}
+
+static bool isEligibleForITBlock(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: return true;
+ case ARM::tADC: // ADC (register) T1
+ case ARM::tADDi3: // ADD (immediate) T1
+ case ARM::tADDi8: // ADD (immediate) T2
+ case ARM::tADDrr: // ADD (register) T1
+ case ARM::tAND: // AND (register) T1
+ case ARM::tASRri: // ASR (immediate) T1
+ case ARM::tASRrr: // ASR (register) T1
+ case ARM::tBIC: // BIC (register) T1
+ case ARM::tEOR: // EOR (register) T1
+ case ARM::tLSLri: // LSL (immediate) T1
+ case ARM::tLSLrr: // LSL (register) T1
+ case ARM::tLSRri: // LSR (immediate) T1
+ case ARM::tLSRrr: // LSR (register) T1
+ case ARM::tMUL: // MUL T1
+ case ARM::tMVN: // MVN (register) T1
+ case ARM::tORR: // ORR (register) T1
+ case ARM::tROR: // ROR (register) T1
+ case ARM::tRSB: // RSB (immediate) T1
+ case ARM::tSBC: // SBC (register) T1
+ case ARM::tSUBi3: // SUB (immediate) T1
+ case ARM::tSUBi8: // SUB (immediate) T2
+ case ARM::tSUBrr: // SUB (register) T1
+ return !isCPSRDefined(MI);
+ }
+}
+
/// isPredicable - Return true if the specified instruction can be predicated.
/// By default, this returns true for every instruction with a
/// PredicateOperand.
@@ -525,6 +561,9 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
if (!MI->isPredicable())
return false;
+ if (!isEligibleForITBlock(MI))
+ return false;
+
ARMFunctionInfo *AFI =
MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
@@ -555,16 +594,6 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
}
}
-/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
-LLVM_ATTRIBUTE_NOINLINE
-static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
- unsigned JTI);
-static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
- unsigned JTI) {
- assert(JTI < JT.size());
- return JT[JTI].MBBs.size();
-}
-
/// GetInstSize - Return the size of the specified MachineInstr.
///
unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
@@ -637,7 +666,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
// bytes, we can use 16-bit entries instead. Then there won't be an
// alignment issue.
unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
- unsigned NumEntries = getNumJTEntries(JT, JTI);
+ unsigned NumEntries = JT[JTI].MBBs.size();
if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
// Make sure the instruction that follows TBB is 2-byte aligned.
// FIXME: Constant island pass should insert an "ALIGN" instruction
@@ -645,6 +674,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
++NumEntries;
return NumEntries * EntrySize + InstSize;
}
+ case ARM::SPACE:
+ return MI->getOperand(1).getImm();
}
}
@@ -659,6 +690,49 @@ unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
return Size;
}
+void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, bool KillSrc,
+ const ARMSubtarget &Subtarget) const {
+ unsigned Opc = Subtarget.isThumb()
+ ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
+ : ARM::MRS;
+
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
+
+ // There is only 1 A/R class MRS instruction, and it always refers to
+ // APSR. However, there are lots of other possibilities on M-class cores.
+ if (Subtarget.isMClass())
+ MIB.addImm(0x800);
+
+ AddDefaultPred(MIB);
+
+ MIB.addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
+}
+
+void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool KillSrc,
+ const ARMSubtarget &Subtarget) const {
+ unsigned Opc = Subtarget.isThumb()
+ ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
+ : ARM::MSR;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
+
+ if (Subtarget.isMClass())
+ MIB.addImm(0x800);
+ else
+ MIB.addImm(8);
+
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+
+ AddDefaultPred(MIB);
+
+ MIB.addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
+}
+
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -682,7 +756,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = ARM::VMOVRS;
else if (SPRDest && GPRSrc)
Opc = ARM::VMOVSR;
- else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
+ else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
Opc = ARM::VMOVD;
else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
Opc = ARM::VORRq;
@@ -742,6 +816,16 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BeginIdx = ARM::dsub_0;
SubRegs = 4;
Spacing = 2;
+ } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
+ Opc = ARM::VMOVS;
+ BeginIdx = ARM::ssub_0;
+ SubRegs = 2;
+ } else if (SrcReg == ARM::CPSR) {
+ copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
+ return;
+ } else if (DestReg == ARM::CPSR) {
+ copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
+ return;
}
assert(Opc && "Impossible reg-to-reg copy");
@@ -1174,12 +1258,26 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
}
-bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
+bool
+ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ Reloc::Model RM = MF.getTarget().getRelocationModel();
+
+ if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
+ assert(getSubtarget().getTargetTriple().getObjectFormat() ==
+ Triple::MachO &&
+ "LOAD_STACK_GUARD currently supported only for MachO.");
+ expandLoadStackGuard(MI, RM);
+ MI->getParent()->erase(MI);
+ return true;
+ }
+
// This hook gets to expand COPY instructions before they become
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
// changed into a VORR that can go down the NEON pipeline.
- if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15())
+ if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15() ||
+ Subtarget.isFPOnlySP())
return false;
// Look for a copy between even S-registers. That is where we keep floats
@@ -1738,8 +1836,10 @@ bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
return false;
}
-MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
- bool PreferFalse) const {
+MachineInstr *
+ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
+ SmallPtrSetImpl<MachineInstr *> &SeenMIs,
+ bool PreferFalse) const {
assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
"Unknown select instruction");
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
@@ -1787,6 +1887,10 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
NewMI.addOperand(FalseReg);
NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
+ // Update SeenMIs set: register newly created MI and erase removed DefMI.
+ SeenMIs.insert(NewMI);
+ SeenMIs.erase(DefMI);
+
// The caller will erase MI, but not DefMI.
DefMI->eraseFromParent();
return NewMI;
@@ -2832,7 +2936,7 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
// FIXME: The current MachineInstr design does not support relying on machine
// mem operands to determine the width of a memory access. Instead, we expect
// the target to provide this information based on the instruction opcode and
-// operands. However, using MachineMemOperand is a the best solution now for
+// operands. However, using MachineMemOperand is the best solution now for
// two reasons:
//
// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
@@ -3933,6 +4037,38 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI,
return true;
}
+// LoadStackGuard has so far only been implemented for MachO. Different code
+// sequence is needed for other targets.
+void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
+ unsigned LoadImmOpc,
+ unsigned LoadOpc,
+ Reloc::Model RM) const {
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Reg = MI->getOperand(0).getReg();
+ const GlobalValue *GV =
+ cast<GlobalValue>((*MI->memoperands_begin())->getValue());
+ MachineInstrBuilder MIB;
+
+ BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
+ .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
+
+ if (Subtarget.GVIsIndirectSymbol(GV, RM)) {
+ MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
+ MIB.addReg(Reg, RegState::Kill).addImm(0);
+ unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+ MachineMemOperand *MMO = MBB.getParent()->
+ getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 4, 4);
+ MIB.addMemOperand(MMO);
+ AddDefaultPred(MIB);
+ }
+
+ MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
+ MIB.addReg(Reg, RegState::Kill).addImm(0);
+ MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ AddDefaultPred(MIB);
+}
+
bool
ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
unsigned &AddSubOpc,
@@ -4361,29 +4497,6 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
MI->addRegisterKilled(DReg, TRI, true);
}
-void ARMBaseInstrInfo::getUnconditionalBranch(
- MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const {
- if (Subtarget.isThumb())
- Branch.setOpcode(ARM::tB);
- else if (Subtarget.isThumb2())
- Branch.setOpcode(ARM::t2B);
- else
- Branch.setOpcode(ARM::Bcc);
-
- Branch.addOperand(MCOperand::CreateExpr(BranchTarget));
- Branch.addOperand(MCOperand::CreateImm(ARMCC::AL));
- Branch.addOperand(MCOperand::CreateReg(0));
-}
-
-void ARMBaseInstrInfo::getTrap(MCInst &MI) const {
- if (Subtarget.isThumb())
- MI.setOpcode(ARM::tTRAP);
- else if (Subtarget.useNaClTrap())
- MI.setOpcode(ARM::TRAPNaCl);
- else
- MI.setOpcode(ARM::TRAP);
-}
-
bool ARMBaseInstrInfo::hasNOP() const {
return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
}
@@ -4401,3 +4514,72 @@ bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
return false;
}
+
+bool ARMBaseInstrInfo::getRegSequenceLikeInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
+ assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
+ assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
+
+ switch (MI.getOpcode()) {
+ case ARM::VMOVDRR:
+ // dX = VMOVDRR rY, rZ
+ // is the same as:
+ // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
+ // Populate the InputRegs accordingly.
+ // rY
+ const MachineOperand *MOReg = &MI.getOperand(1);
+ InputRegs.push_back(
+ RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_0));
+ // rZ
+ MOReg = &MI.getOperand(2);
+ InputRegs.push_back(
+ RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_1));
+ return true;
+ }
+ llvm_unreachable("Target dependent opcode missing");
+}
+
+bool ARMBaseInstrInfo::getExtractSubregLikeInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ RegSubRegPairAndIdx &InputReg) const {
+ assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
+ assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
+
+ switch (MI.getOpcode()) {
+ case ARM::VMOVRRD:
+ // rX, rY = VMOVRRD dZ
+ // is the same as:
+ // rX = EXTRACT_SUBREG dZ, ssub_0
+ // rY = EXTRACT_SUBREG dZ, ssub_1
+ const MachineOperand &MOReg = MI.getOperand(2);
+ InputReg.Reg = MOReg.getReg();
+ InputReg.SubReg = MOReg.getSubReg();
+ InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
+ return true;
+ }
+ llvm_unreachable("Target dependent opcode missing");
+}
+
+bool ARMBaseInstrInfo::getInsertSubregLikeInputs(
+ const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
+ RegSubRegPairAndIdx &InsertedReg) const {
+ assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
+ assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
+
+ switch (MI.getOpcode()) {
+ case ARM::VSETLNi32:
+ // dX = VSETLNi32 dY, rZ, imm
+ const MachineOperand &MOBaseReg = MI.getOperand(1);
+ const MachineOperand &MOInsertedReg = MI.getOperand(2);
+ const MachineOperand &MOIndex = MI.getOperand(3);
+ BaseReg.Reg = MOBaseReg.getReg();
+ BaseReg.SubReg = MOBaseReg.getSubReg();
+
+ InsertedReg.Reg = MOInsertedReg.getReg();
+ InsertedReg.SubReg = MOInsertedReg.getSubReg();
+ InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1;
+ return true;
+ }
+ llvm_unreachable("Target dependent opcode missing");
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index b8d675806b25..ecbcf5c0f96a 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -11,13 +11,14 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMBASEINSTRUCTIONINFO_H
-#define ARMBASEINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
@@ -34,6 +35,57 @@ protected:
// Can be only subclassed.
explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
+ void expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
+ unsigned LoadImmOpc, unsigned LoadOpc,
+ Reloc::Model RM) const;
+
+ /// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI
+ /// and \p DefIdx.
+ /// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of
+ /// the list is modeled as <Reg:SubReg, SubIdx>.
+ /// E.g., REG_SEQUENCE vreg1:sub1, sub0, vreg2, sub1 would produce
+ /// two elements:
+ /// - vreg1:sub1, sub0
+ /// - vreg2<:0>, sub1
+ ///
+ /// \returns true if it is possible to build such an input sequence
+ /// with the pair \p MI, \p DefIdx. False otherwise.
+ ///
+ /// \pre MI.isRegSequenceLike().
+ bool getRegSequenceLikeInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const override;
+
+ /// Build the equivalent inputs of a EXTRACT_SUBREG for the given \p MI
+ /// and \p DefIdx.
+ /// \p [out] InputReg of the equivalent EXTRACT_SUBREG.
+ /// E.g., EXTRACT_SUBREG vreg1:sub1, sub0, sub1 would produce:
+ /// - vreg1:sub1, sub0
+ ///
+ /// \returns true if it is possible to build such an input sequence
+ /// with the pair \p MI, \p DefIdx. False otherwise.
+ ///
+ /// \pre MI.isExtractSubregLike().
+ bool getExtractSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx,
+ RegSubRegPairAndIdx &InputReg) const override;
+
+ /// Build the equivalent inputs of a INSERT_SUBREG for the given \p MI
+ /// and \p DefIdx.
+ /// \p [out] BaseReg and \p [out] InsertedReg contain
+ /// the equivalent inputs of INSERT_SUBREG.
+ /// E.g., INSERT_SUBREG vreg0:sub0, vreg1:sub1, sub3 would produce:
+ /// - BaseReg: vreg0:sub0
+ /// - InsertedReg: vreg1:sub1, sub3
+ ///
+ /// \returns true if it is possible to build such an input sequence
+ /// with the pair \p MI, \p DefIdx. False otherwise.
+ ///
+ /// \pre MI.isInsertSubregLike().
+ bool
+ getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx,
+ RegSubRegPair &BaseReg,
+ RegSubRegPairAndIdx &InsertedReg) const override;
+
public:
// Return whether the target has an explicit NOP encoding.
bool hasNOP() const;
@@ -104,6 +156,13 @@ public:
unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const override;
+ void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool KillSrc,
+ const ARMSubtarget &Subtarget) const;
+ void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, bool KillSrc,
+ const ARMSubtarget &Subtarget) const;
+
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
@@ -202,7 +261,9 @@ public:
unsigned &TrueOp, unsigned &FalseOp,
bool &Optimizable) const override;
- MachineInstr *optimizeSelect(MachineInstr *MI, bool) const override;
+ MachineInstr *optimizeSelect(MachineInstr *MI,
+ SmallPtrSetImpl<MachineInstr *> &SeenMIs,
+ bool) const override;
/// FoldImmediate - 'Reg' is known to be defined by a move immediate
/// instruction, try to fold the immediate into the use instruction.
@@ -230,12 +291,6 @@ public:
void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned,
const TargetRegisterInfo *TRI) const override;
- void
- getUnconditionalBranch(MCInst &Branch,
- const MCSymbolRefExpr *BranchTarget) const override;
-
- void getTrap(MCInst &MI) const override;
-
/// Get the number of addresses by LDM or VLDM or zero for unknown.
unsigned getNumLDMAddresses(const MachineInstr *MI) const;
@@ -286,6 +341,9 @@ private:
bool verifyInstruction(const MachineInstr *MI,
StringRef &ErrInfo) const override;
+ virtual void expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const = 0;
+
private:
/// Modeling special VFP / NEON fp MLA / MLS hazards.
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 32b5f4aa2942..8744f1c62217 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -60,9 +60,8 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
const MCPhysReg*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- const MCPhysReg *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI())
- ? CSR_iOS_SaveList
- : CSR_AAPCS_SaveList;
+ const MCPhysReg *RegList =
+ STI.isTargetDarwin() ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
if (!MF) return RegList;
@@ -95,8 +94,7 @@ ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
if (CC == CallingConv::GHC)
// This is academic becase all GHC calls are (supposed to be) tail calls
return CSR_NoRegs_RegMask;
- return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
- ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
+ return STI.isTargetDarwin() ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
}
const uint32_t*
@@ -117,13 +115,13 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
if (CC == CallingConv::GHC)
// This is academic becase all GHC calls are (supposed to be) tail calls
return nullptr;
- return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
- ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask;
+ return STI.isTargetDarwin() ? CSR_iOS_ThisReturn_RegMask
+ : CSR_AAPCS_ThisReturn_RegMask;
}
BitVector ARMBaseRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
// FIXME: avoid re-calculating this every time.
BitVector Reserved(getNumRegs());
@@ -182,14 +180,14 @@ ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind
const TargetRegisterClass *
ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
if (RC == &ARM::CCRRegClass)
- return nullptr; // Can't copy CCR registers.
+ return &ARM::rGPRRegClass; // Can't copy CCR registers.
return RC;
}
unsigned
ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
switch (RC->getID()) {
default:
@@ -311,7 +309,7 @@ ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
// When outgoing call frames are so large that we adjust the stack pointer
// around the call, we can no longer use the stack pointer to reach the
@@ -356,7 +354,10 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
return false;
// We may also need a base pointer if there are dynamic allocas or stack
// pointer adjustments around calls.
- if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF))
+ if (MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->hasReservedCallFrame(MF))
return true;
// A base pointer is required and allowed. Check that it isn't too late to
// reserve it.
@@ -367,7 +368,10 @@ bool ARMBaseRegisterInfo::
needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
- unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned StackAlign = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
bool requiresRealignment =
((MFI->getMaxAlignment() > StackAlign) ||
F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
@@ -387,7 +391,7 @@ cannotEliminateFrame(const MachineFunction &MF) const {
unsigned
ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (TFI->hasFP(MF))
return FramePtr;
@@ -404,7 +408,7 @@ emitLoadConstPool(MachineBasicBlock &MBB,
ARMCC::CondCodes Pred,
unsigned PredReg, unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C =
ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val);
@@ -417,11 +421,6 @@ emitLoadConstPool(MachineBasicBlock &MBB,
.setMIFlags(MIFlags);
}
-bool ARMBaseRegisterInfo::mayOverrideLocalAssignment() const {
- // The native linux build hits a downstream codegen bug when this is enabled.
- return STI.isTargetDarwin();
-}
-
bool ARMBaseRegisterInfo::
requiresRegisterScavenging(const MachineFunction &MF) const {
return true;
@@ -531,7 +530,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// Note that the incoming offset is based on the SP value at function entry,
// so it'll be negative.
MachineFunction &MF = *MI->getParent()->getParent();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -584,7 +583,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
int64_t Offset) const {
ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
- (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri);
+ (AFI->isThumb1OnlyFunction() ? ARM::tADDframe : ARM::t2ADDri);
MachineBasicBlock::iterator Ins = MBB->begin();
DebugLoc DL; // Defaults to "unknown"
@@ -593,15 +592,15 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
const MachineFunction &MF = *MBB->getParent();
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const MCInstrDesc &MCID = TII.get(ADDriOpc);
MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
- MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg)
- .addFrameIndex(FrameIdx).addImm(Offset));
+ MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg)
+ .addFrameIndex(FrameIdx).addImm(Offset);
if (!AFI->isThumb1OnlyFunction())
- AddDefaultCC(MIB);
+ AddDefaultCC(AddDefaultPred(MIB));
}
void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
@@ -609,7 +608,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
int Off = Offset; // ARM doesn't need the general 64-bit offsets
unsigned i = 0;
@@ -708,9 +707,9 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
- const ARMFrameLowering *TFI =
- static_cast<const ARMFrameLowering*>(MF.getTarget().getFrameLowering());
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const ARMFrameLowering *TFI = static_cast<const ARMFrameLowering *>(
+ MF.getSubtarget().getFrameLowering());
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
assert(!AFI->isThumb1OnlyFunction() &&
"This eliminateFrameIndex does not support Thumb1!");
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 833d3f218480..e9bc412e99e2 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMBASEREGISTERINFO_H
-#define ARMBASEREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H
#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -174,8 +174,6 @@ public:
unsigned MIFlags = MachineInstr::NoFlags)const;
/// Code Generation virtual methods...
- bool mayOverrideLocalAssignment() const override;
-
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index dc41c1c14bbb..e0d0559ba986 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMCALLINGCONV_H
-#define ARMCALLINGCONV_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMCALLINGCONV_H
+#define LLVM_LIB_TARGET_ARM_ARMCALLINGCONV_H
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
@@ -177,8 +177,9 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs();
+
// AAPCS HFAs must have 1-4 elements, all of the same type
- assert(PendingHAMembers.size() < 8);
+ assert(PendingHAMembers.size() < 4);
if (PendingHAMembers.size() > 0)
assert(PendingHAMembers[0].getLocVT() == LocVT);
@@ -188,26 +189,21 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
if (ArgFlags.isInConsecutiveRegsLast()) {
- assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 8 &&
+ assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 &&
"Homogeneous aggregates must have between 1 and 4 members");
// Try to allocate a contiguous block of registers, each of the correct
// size to hold one member.
- const uint16_t *RegList;
- unsigned NumRegs;
+ ArrayRef<uint16_t> RegList;
switch (LocVT.SimpleTy) {
- case MVT::i32:
case MVT::f32:
RegList = SRegList;
- NumRegs = 16;
break;
case MVT::f64:
RegList = DRegList;
- NumRegs = 8;
break;
case MVT::v2f64:
RegList = QRegList;
- NumRegs = 4;
break;
default:
llvm_unreachable("Unexpected member type for HA");
@@ -215,7 +211,7 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
}
unsigned RegResult =
- State.AllocateRegBlock(RegList, NumRegs, PendingHAMembers.size());
+ State.AllocateRegBlock(RegList, PendingHAMembers.size());
if (RegResult) {
for (SmallVectorImpl<CCValAssign>::iterator It = PendingHAMembers.begin();
@@ -235,20 +231,11 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
State.AllocateReg(SRegList[regNo]);
unsigned Size = LocVT.getSizeInBits() / 8;
- unsigned Align = Size;
-
- if (LocVT.SimpleTy == MVT::v2f64 || LocVT.SimpleTy == MVT::i32) {
- // Vectors are always aligned to 8 bytes. If we've seen an i32 here
- // it's because it's been split from a larger type, also with align 8.
- Align = 8;
- }
+ unsigned Align = std::min(Size, 8U);
for (auto It : PendingHAMembers) {
It.convertToMem(State.AllocateStack(Size, Align));
State.addLoc(It);
-
- // Only the first member needs to be aligned.
- Align = 1;
}
// All pending members have now been allocated
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
deleted file mode 100644
index 5fb6ebfeaae1..000000000000
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ /dev/null
@@ -1,1909 +0,0 @@
-//===-- ARM/ARMCodeEmitter.cpp - Convert ARM code to machine code ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the pass that transforms the ARM machine instructions into
-// relocatable machine code.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM.h"
-#include "ARMBaseInstrInfo.h"
-#include "ARMConstantPoolValue.h"
-#include "ARMMachineFunctionInfo.h"
-#include "ARMRelocations.h"
-#include "ARMSubtarget.h"
-#include "ARMTargetMachine.h"
-#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#ifndef NDEBUG
-#include <iomanip>
-#endif
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-STATISTIC(NumEmitted, "Number of machine instructions emitted");
-
-namespace {
-
- class ARMCodeEmitter : public MachineFunctionPass {
- ARMJITInfo *JTI;
- const ARMBaseInstrInfo *II;
- const DataLayout *TD;
- const ARMSubtarget *Subtarget;
- TargetMachine &TM;
- JITCodeEmitter &MCE;
- MachineModuleInfo *MMI;
- const std::vector<MachineConstantPoolEntry> *MCPEs;
- const std::vector<MachineJumpTableEntry> *MJTEs;
- bool IsPIC;
- bool IsThumb;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineModuleInfo>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- static char ID;
- public:
- ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(ID), JTI(nullptr),
- II((const ARMBaseInstrInfo *)tm.getInstrInfo()),
- TD(tm.getDataLayout()), TM(tm),
- MCE(mce), MCPEs(nullptr), MJTEs(nullptr),
- IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {}
-
- /// getBinaryCodeForInstr - This function, generated by the
- /// CodeEmitterGenerator using TableGen, produces the binary encoding for
- /// machine instructions.
- uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "ARM Machine Code Emitter";
- }
-
- void emitInstruction(const MachineInstr &MI);
-
- private:
-
- void emitWordLE(unsigned Binary);
- void emitDWordLE(uint64_t Binary);
- void emitConstPoolInstruction(const MachineInstr &MI);
- void emitMOVi32immInstruction(const MachineInstr &MI);
- void emitMOVi2piecesInstruction(const MachineInstr &MI);
- void emitLEApcrelJTInstruction(const MachineInstr &MI);
- void emitPseudoMoveInstruction(const MachineInstr &MI);
- void addPCLabel(unsigned LabelID);
- void emitPseudoInstruction(const MachineInstr &MI);
- unsigned getMachineSoRegOpValue(const MachineInstr &MI,
- const MCInstrDesc &MCID,
- const MachineOperand &MO,
- unsigned OpIdx);
-
- unsigned getMachineSoImmOpValue(unsigned SoImm);
- unsigned getAddrModeSBit(const MachineInstr &MI,
- const MCInstrDesc &MCID) const;
-
- void emitDataProcessingInstruction(const MachineInstr &MI,
- unsigned ImplicitRd = 0,
- unsigned ImplicitRn = 0);
-
- void emitLoadStoreInstruction(const MachineInstr &MI,
- unsigned ImplicitRd = 0,
- unsigned ImplicitRn = 0);
-
- void emitMiscLoadStoreInstruction(const MachineInstr &MI,
- unsigned ImplicitRn = 0);
-
- void emitLoadStoreMultipleInstruction(const MachineInstr &MI);
-
- void emitMulFrmInstruction(const MachineInstr &MI);
-
- void emitExtendInstruction(const MachineInstr &MI);
-
- void emitMiscArithInstruction(const MachineInstr &MI);
-
- void emitSaturateInstruction(const MachineInstr &MI);
-
- void emitBranchInstruction(const MachineInstr &MI);
-
- void emitInlineJumpTable(unsigned JTIndex);
-
- void emitMiscBranchInstruction(const MachineInstr &MI);
-
- void emitVFPArithInstruction(const MachineInstr &MI);
-
- void emitVFPConversionInstruction(const MachineInstr &MI);
-
- void emitVFPLoadStoreInstruction(const MachineInstr &MI);
-
- void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI);
-
- void emitNEONLaneInstruction(const MachineInstr &MI);
- void emitNEONDupInstruction(const MachineInstr &MI);
- void emitNEON1RegModImmInstruction(const MachineInstr &MI);
- void emitNEON2RegInstruction(const MachineInstr &MI);
- void emitNEON3RegInstruction(const MachineInstr &MI);
-
- /// getMachineOpValue - Return binary encoding of operand. If the machine
- /// operand requires relocation, record the relocation and return zero.
- unsigned getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const;
- unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) const {
- return getMachineOpValue(MI, MI.getOperand(OpIdx));
- }
-
- // FIXME: The legacy JIT ARMCodeEmitter doesn't rely on the the
- // TableGen'erated getBinaryCodeForInstr() function to encode any
- // operand values, instead querying getMachineOpValue() directly for
- // each operand it needs to encode. Thus, any of the new encoder
- // helper functions can simply return 0 as the values the return
- // are already handled elsewhere. They are placeholders to allow this
- // encoder to continue to function until the MC encoder is sufficiently
- // far along that this one can be eliminated entirely.
- unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val)
- const { return 0; }
- unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val)
- const { return 0; }
- unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val)
- const { return 0; }
- unsigned NEONThumb2V8PostEncoder(const MachineInstr &MI,unsigned Val)
- const { return 0; }
- unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val)
- const { return 0; }
- unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getThumbAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getThumbBLTargetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getThumbBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getThumbBRTargetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getThumbBCCTargetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getThumbCBTargetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getUnconditionalBranchTargetOpValue(const MachineInstr &MI,
- unsigned Op) const { return 0; }
- unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getARMBLTargetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getARMBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getSORegRegOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getSORegImmOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getT2Imm8s4OpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getT2AddrModeImm0_1020s4OpValue(const MachineInstr &MI,unsigned Op)
- const { return 0; }
- unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getT2AddrModeSORegOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getAddrMode6OneLane32AddressOpValue(const MachineInstr &MI,
- unsigned Op)
- const { return 0; }
- unsigned getAddrMode6DupAddressOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getAddrMode6OffsetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI,
- unsigned Op) const { return 0; }
- uint32_t getLdStSORegOpValue(const MachineInstr &MI, unsigned OpIdx)
- const { return 0; }
-
- unsigned getAddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
- const {
- // {17-13} = reg
- // {12} = (U)nsigned (add == '1', sub == '0')
- // {11-0} = imm12
- const MachineOperand &MO = MI.getOperand(Op);
- const MachineOperand &MO1 = MI.getOperand(Op + 1);
- if (!MO.isReg()) {
- emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
- return 0;
- }
- unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
- int32_t Imm12 = MO1.getImm();
- uint32_t Binary;
- Binary = Imm12 & 0xfff;
- if (Imm12 >= 0)
- Binary |= (1 << 12);
- Binary |= (Reg << 13);
- return Binary;
- }
-
- unsigned getHiLo16ImmOpValue(const MachineInstr &MI, unsigned Op) const {
- return 0;
- }
-
- uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
- const { return 0;}
- uint32_t getPostIdxRegOpValue(const MachineInstr &MI, unsigned OpIdx)
- const { return 0;}
- uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
- const { return 0;}
- uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- uint32_t getAddrModeThumbSPOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- uint32_t getAddrModeISOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- uint32_t getAddrModePCOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- uint32_t getAddrMode5OpValue(const MachineInstr &MI, unsigned Op) const {
- // {17-13} = reg
- // {12} = (U)nsigned (add == '1', sub == '0')
- // {11-0} = imm12
- const MachineOperand &MO = MI.getOperand(Op);
- const MachineOperand &MO1 = MI.getOperand(Op + 1);
- if (!MO.isReg()) {
- emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
- return 0;
- }
- unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
- int32_t Imm12 = MO1.getImm();
-
- // Special value for #-0
- if (Imm12 == INT32_MIN)
- Imm12 = 0;
-
- // Immediate is always encoded as positive. The 'U' bit controls add vs
- // sub.
- bool isAdd = true;
- if (Imm12 < 0) {
- Imm12 = -Imm12;
- isAdd = false;
- }
-
- uint32_t Binary = Imm12 & 0xfff;
- if (isAdd)
- Binary |= (1 << 12);
- Binary |= (Reg << 13);
- return Binary;
- }
- unsigned getNEONVcvtImm32OpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
-
- unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
-
- unsigned getShiftRight8Imm(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getShiftRight16Imm(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getShiftRight32Imm(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getShiftRight64Imm(const MachineInstr &MI, unsigned Op)
- const { return 0; }
-
- /// getMovi32Value - Return binary encoding of operand for movw/movt. If the
- /// machine operand requires relocation, record the relocation and return
- /// zero.
- unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO,
- unsigned Reloc);
-
- /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
- ///
- unsigned getShiftOp(unsigned Imm) const ;
-
- /// Routines that handle operands which add machine relocations which are
- /// fixed up by the relocation stage.
- void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
- bool MayNeedFarStub, bool Indirect,
- intptr_t ACPV = 0) const;
- void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
- void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
- void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
- void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
- intptr_t JTBase = 0) const;
- unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) const;
- unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) const;
- unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) const;
- unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) const;
- unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) const;
- unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) const;
- };
-}
-
-char ARMCodeEmitter::ID = 0;
-
-/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM
-/// code to the specified MCE object.
-FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
- JITCodeEmitter &JCE) {
- return new ARMCodeEmitter(TM, JCE);
-}
-
-bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
- TargetMachine &Target = const_cast<TargetMachine&>(MF.getTarget());
-
- assert((Target.getRelocationModel() != Reloc::Default ||
- Target.getRelocationModel() != Reloc::Static) &&
- "JIT relocation model must be set to static or default!");
-
- JTI = static_cast<ARMJITInfo*>(Target.getJITInfo());
- II = static_cast<const ARMBaseInstrInfo*>(Target.getInstrInfo());
- TD = Target.getDataLayout();
-
- Subtarget = &TM.getSubtarget<ARMSubtarget>();
- MCPEs = &MF.getConstantPool()->getConstants();
- MJTEs = nullptr;
- if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
- IsPIC = TM.getRelocationModel() == Reloc::PIC_;
- IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction();
- JTI->Initialize(MF, IsPIC);
- MMI = &getAnalysis<MachineModuleInfo>();
- MCE.setModuleInfo(MMI);
-
- do {
- DEBUG(errs() << "JITTing function '"
- << MF.getName() << "'\n");
- MCE.startFunction(MF);
- for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
- MBB != E; ++MBB) {
- MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I)
- emitInstruction(*I);
- }
- } while (MCE.finishFunction(MF));
-
- return false;
-}
-
-/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
-///
-unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const {
- switch (ARM_AM::getAM2ShiftOpc(Imm)) {
- default: llvm_unreachable("Unknown shift opc!");
- case ARM_AM::asr: return 2;
- case ARM_AM::lsl: return 0;
- case ARM_AM::lsr: return 1;
- case ARM_AM::ror:
- case ARM_AM::rrx: return 3;
- }
-}
-
-/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
-/// machine operand requires relocation, record the relocation and return zero.
-unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
- const MachineOperand &MO,
- unsigned Reloc) {
- assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw))
- && "Relocation to this function should be for movt or movw");
-
- if (MO.isImm())
- return static_cast<unsigned>(MO.getImm());
- else if (MO.isGlobal())
- emitGlobalAddress(MO.getGlobal(), Reloc, true, false);
- else if (MO.isSymbol())
- emitExternalSymbolAddress(MO.getSymbolName(), Reloc);
- else if (MO.isMBB())
- emitMachineBasicBlock(MO.getMBB(), Reloc);
- else {
-#ifndef NDEBUG
- errs() << MO;
-#endif
- llvm_unreachable("Unsupported operand type for movw/movt");
- }
- return 0;
-}
-
-/// getMachineOpValue - Return binary encoding of operand. If the machine
-/// operand requires relocation, record the relocation and return zero.
-unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const {
- if (MO.isReg())
- return II->getRegisterInfo().getEncodingValue(MO.getReg());
- else if (MO.isImm())
- return static_cast<unsigned>(MO.getImm());
- else if (MO.isGlobal())
- emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true, false);
- else if (MO.isSymbol())
- emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch);
- else if (MO.isCPI()) {
- const MCInstrDesc &MCID = MI.getDesc();
- // For VFP load, the immediate offset is multiplied by 4.
- unsigned Reloc = ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm)
- ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry;
- emitConstPoolAddress(MO.getIndex(), Reloc);
- } else if (MO.isJTI())
- emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative);
- else if (MO.isMBB())
- emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch);
- else
- llvm_unreachable("Unable to encode MachineOperand!");
- return 0;
-}
-
-/// emitGlobalAddress - Emit the specified address to the code stream.
-///
-void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
- bool MayNeedFarStub, bool Indirect,
- intptr_t ACPV) const {
- MachineRelocation MR = Indirect
- ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
- const_cast<GlobalValue *>(GV),
- ACPV, MayNeedFarStub)
- : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
- const_cast<GlobalValue *>(GV), ACPV,
- MayNeedFarStub);
- MCE.addRelocation(MR);
-}
-
-/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
-/// be emitted to the current location in the function, and allow it to be PC
-/// relative.
-void ARMCodeEmitter::
-emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
- Reloc, ES));
-}
-
-/// emitConstPoolAddress - Arrange for the address of an constant pool
-/// to be emitted to the current location in the function, and allow it to be PC
-/// relative.
-void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
- // Tell JIT emitter we'll resolve the address.
- MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
- Reloc, CPI, 0, true));
-}
-
-/// emitJumpTableAddress - Arrange for the address of a jump table to
-/// be emitted to the current location in the function, and allow it to be PC
-/// relative.
-void ARMCodeEmitter::
-emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
- Reloc, JTIndex, 0, true));
-}
-
-/// emitMachineBasicBlock - Emit the specified address basic block.
-void ARMCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
- unsigned Reloc,
- intptr_t JTBase) const {
- MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
- Reloc, BB, JTBase));
-}
-
-void ARMCodeEmitter::emitWordLE(unsigned Binary) {
- DEBUG(errs() << " 0x";
- errs().write_hex(Binary) << "\n");
- MCE.emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitDWordLE(uint64_t Binary) {
- DEBUG(errs() << " 0x";
- errs().write_hex(Binary) << "\n");
- MCE.emitDWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
- DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
-
- MCE.processDebugLoc(MI.getDebugLoc(), true);
-
- ++NumEmitted; // Keep track of the # of mi's emitted
- switch (MI.getDesc().TSFlags & ARMII::FormMask) {
- default: {
- llvm_unreachable("Unhandled instruction encoding format!");
- }
- case ARMII::MiscFrm:
- if (MI.getOpcode() == ARM::LEApcrelJT) {
- // Materialize jumptable address.
- emitLEApcrelJTInstruction(MI);
- break;
- }
- llvm_unreachable("Unhandled instruction encoding!");
- case ARMII::Pseudo:
- emitPseudoInstruction(MI);
- break;
- case ARMII::DPFrm:
- case ARMII::DPSoRegFrm:
- emitDataProcessingInstruction(MI);
- break;
- case ARMII::LdFrm:
- case ARMII::StFrm:
- emitLoadStoreInstruction(MI);
- break;
- case ARMII::LdMiscFrm:
- case ARMII::StMiscFrm:
- emitMiscLoadStoreInstruction(MI);
- break;
- case ARMII::LdStMulFrm:
- emitLoadStoreMultipleInstruction(MI);
- break;
- case ARMII::MulFrm:
- emitMulFrmInstruction(MI);
- break;
- case ARMII::ExtFrm:
- emitExtendInstruction(MI);
- break;
- case ARMII::ArithMiscFrm:
- emitMiscArithInstruction(MI);
- break;
- case ARMII::SatFrm:
- emitSaturateInstruction(MI);
- break;
- case ARMII::BrFrm:
- emitBranchInstruction(MI);
- break;
- case ARMII::BrMiscFrm:
- emitMiscBranchInstruction(MI);
- break;
- // VFP instructions.
- case ARMII::VFPUnaryFrm:
- case ARMII::VFPBinaryFrm:
- emitVFPArithInstruction(MI);
- break;
- case ARMII::VFPConv1Frm:
- case ARMII::VFPConv2Frm:
- case ARMII::VFPConv3Frm:
- case ARMII::VFPConv4Frm:
- case ARMII::VFPConv5Frm:
- emitVFPConversionInstruction(MI);
- break;
- case ARMII::VFPLdStFrm:
- emitVFPLoadStoreInstruction(MI);
- break;
- case ARMII::VFPLdStMulFrm:
- emitVFPLoadStoreMultipleInstruction(MI);
- break;
-
- // NEON instructions.
- case ARMII::NGetLnFrm:
- case ARMII::NSetLnFrm:
- emitNEONLaneInstruction(MI);
- break;
- case ARMII::NDupFrm:
- emitNEONDupInstruction(MI);
- break;
- case ARMII::N1RegModImmFrm:
- emitNEON1RegModImmInstruction(MI);
- break;
- case ARMII::N2RegFrm:
- emitNEON2RegInstruction(MI);
- break;
- case ARMII::N3RegFrm:
- emitNEON3RegInstruction(MI);
- break;
- }
- MCE.processDebugLoc(MI.getDebugLoc(), false);
-}
-
-void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
- unsigned CPI = MI.getOperand(0).getImm(); // CP instruction index.
- unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index.
- const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex];
-
- // Remember the CONSTPOOL_ENTRY address for later relocation.
- JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue());
-
- // Emit constpool island entry. In most cases, the actual values will be
- // resolved and relocated after code emission.
- if (MCPE.isMachineConstantPoolEntry()) {
- ARMConstantPoolValue *ACPV =
- static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
-
- DEBUG(errs() << " ** ARM constant pool #" << CPI << " @ "
- << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n');
-
- assert(ACPV->isGlobalValue() && "unsupported constant pool value");
- const GlobalValue *GV = cast<ARMConstantPoolConstant>(ACPV)->getGV();
- if (GV) {
- Reloc::Model RelocM = TM.getRelocationModel();
- emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry,
- isa<Function>(GV),
- Subtarget->GVIsIndirectSymbol(GV, RelocM),
- (intptr_t)ACPV);
- } else {
- const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol();
- emitExternalSymbolAddress(Sym, ARM::reloc_arm_absolute);
- }
- emitWordLE(0);
- } else {
- const Constant *CV = MCPE.Val.ConstVal;
-
- DEBUG({
- errs() << " ** Constant pool #" << CPI << " @ "
- << (void*)MCE.getCurrentPCValue() << " ";
- if (const Function *F = dyn_cast<Function>(CV))
- errs() << F->getName();
- else
- errs() << *CV;
- errs() << '\n';
- });
-
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
- emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false);
- emitWordLE(0);
- } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
- uint32_t Val = uint32_t(*CI->getValue().getRawData());
- emitWordLE(Val);
- } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
- if (CFP->getType()->isFloatTy())
- emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
- else if (CFP->getType()->isDoubleTy())
- emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
- else {
- llvm_unreachable("Unable to handle this constantpool entry!");
- }
- } else {
- llvm_unreachable("Unable to handle this constantpool entry!");
- }
- }
-}
-
-void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) {
- const MachineOperand &MO0 = MI.getOperand(0);
- const MachineOperand &MO1 = MI.getOperand(1);
-
- // Emit the 'movw' instruction.
- unsigned Binary = 0x30 << 20; // mov: Insts{27-20} = 0b00110000
-
- unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF;
-
- // Set the conditional execution predicate.
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Encode Rd.
- Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
-
- // Encode imm16 as imm4:imm12
- Binary |= Lo16 & 0xFFF; // Insts{11-0} = imm12
- Binary |= ((Lo16 >> 12) & 0xF) << 16; // Insts{19-16} = imm4
- emitWordLE(Binary);
-
- unsigned Hi16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16;
- // Emit the 'movt' instruction.
- Binary = 0x34 << 20; // movt: Insts{27-20} = 0b00110100
-
- // Set the conditional execution predicate.
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Encode Rd.
- Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
-
- // Encode imm16 as imm4:imm1, same as movw above.
- Binary |= Hi16 & 0xFFF;
- Binary |= ((Hi16 >> 12) & 0xF) << 16;
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) {
- const MachineOperand &MO0 = MI.getOperand(0);
- const MachineOperand &MO1 = MI.getOperand(1);
- assert(MO1.isImm() && ARM_AM::isSOImmTwoPartVal(MO1.getImm()) &&
- "Not a valid so_imm value!");
- unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm());
- unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm());
-
- // Emit the 'mov' instruction.
- unsigned Binary = 0xd << 21; // mov: Insts{24-21} = 0b1101
-
- // Set the conditional execution predicate.
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Encode Rd.
- Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
-
- // Encode so_imm.
- // Set bit I(25) to identify this is the immediate form of <shifter_op>
- Binary |= 1 << ARMII::I_BitShift;
- Binary |= getMachineSoImmOpValue(V1);
- emitWordLE(Binary);
-
- // Now the 'orr' instruction.
- Binary = 0xc << 21; // orr: Insts{24-21} = 0b1100
-
- // Set the conditional execution predicate.
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Encode Rd.
- Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
-
- // Encode Rn.
- Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRnShift;
-
- // Encode so_imm.
- // Set bit I(25) to identify this is the immediate form of <shifter_op>
- Binary |= 1 << ARMII::I_BitShift;
- Binary |= getMachineSoImmOpValue(V2);
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
- // It's basically add r, pc, (LJTI - $+8)
-
- const MCInstrDesc &MCID = MI.getDesc();
-
- // Emit the 'add' instruction.
- unsigned Binary = 0x4 << 21; // add: Insts{24-21} = 0b0100
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Encode S bit if MI modifies CPSR.
- Binary |= getAddrModeSBit(MI, MCID);
-
- // Encode Rd.
- Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
-
- // Encode Rn which is PC.
- Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift;
-
- // Encode the displacement.
- Binary |= 1 << ARMII::I_BitShift;
- emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base);
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitPseudoMoveInstruction(const MachineInstr &MI) {
- unsigned Opcode = MI.getDesc().Opcode;
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Encode S bit if MI modifies CPSR.
- if (Opcode == ARM::MOVsrl_flag || Opcode == ARM::MOVsra_flag)
- Binary |= 1 << ARMII::S_BitShift;
-
- // Encode register def if there is one.
- Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
-
- // Encode the shift operation.
- switch (Opcode) {
- default: break;
- case ARM::RRX:
- // rrx
- Binary |= 0x6 << 4;
- break;
- case ARM::MOVsrl_flag:
- // lsr #1
- Binary |= (0x2 << 4) | (1 << 7);
- break;
- case ARM::MOVsra_flag:
- // asr #1
- Binary |= (0x4 << 4) | (1 << 7);
- break;
- }
-
- // Encode register Rm.
- Binary |= getMachineOpValue(MI, 1);
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::addPCLabel(unsigned LabelID) {
- DEBUG(errs() << " ** LPC" << LabelID << " @ "
- << (void*)MCE.getCurrentPCValue() << '\n');
- JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue());
-}
-
-void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
- unsigned Opcode = MI.getDesc().Opcode;
- switch (Opcode) {
- default:
- llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
- case ARM::BX_CALL:
- case ARM::BMOVPCRX_CALL: {
- // First emit mov lr, pc
- unsigned Binary = 0x01a0e00f;
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
- emitWordLE(Binary);
-
- // and then emit the branch.
- emitMiscBranchInstruction(MI);
- break;
- }
- case TargetOpcode::INLINEASM: {
- // We allow inline assembler nodes with empty bodies - they can
- // implicitly define registers, which is ok for JIT.
- if (MI.getOperand(0).getSymbolName()[0]) {
- report_fatal_error("JIT does not support inline asm!");
- }
- break;
- }
- case TargetOpcode::CFI_INSTRUCTION:
- break;
- case TargetOpcode::EH_LABEL:
- MCE.emitLabel(MI.getOperand(0).getMCSymbol());
- break;
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- // Do nothing.
- break;
- case ARM::CONSTPOOL_ENTRY:
- emitConstPoolInstruction(MI);
- break;
- case ARM::PICADD: {
- // Remember of the address of the PC label for relocation later.
- addPCLabel(MI.getOperand(2).getImm());
- // PICADD is just an add instruction that implicitly read pc.
- emitDataProcessingInstruction(MI, 0, ARM::PC);
- break;
- }
- case ARM::PICLDR:
- case ARM::PICLDRB:
- case ARM::PICSTR:
- case ARM::PICSTRB: {
- // Remember of the address of the PC label for relocation later.
- addPCLabel(MI.getOperand(2).getImm());
- // These are just load / store instructions that implicitly read pc.
- emitLoadStoreInstruction(MI, 0, ARM::PC);
- break;
- }
- case ARM::PICLDRH:
- case ARM::PICLDRSH:
- case ARM::PICLDRSB:
- case ARM::PICSTRH: {
- // Remember of the address of the PC label for relocation later.
- addPCLabel(MI.getOperand(2).getImm());
- // These are just load / store instructions that implicitly read pc.
- emitMiscLoadStoreInstruction(MI, ARM::PC);
- break;
- }
-
- case ARM::MOVi32imm:
- // Two instructions to materialize a constant.
- if (Subtarget->hasV6T2Ops())
- emitMOVi32immInstruction(MI);
- else
- emitMOVi2piecesInstruction(MI);
- break;
-
- case ARM::LEApcrelJT:
- // Materialize jumptable address.
- emitLEApcrelJTInstruction(MI);
- break;
- case ARM::RRX:
- case ARM::MOVsrl_flag:
- case ARM::MOVsra_flag:
- emitPseudoMoveInstruction(MI);
- break;
- }
-}
-
-unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
- const MCInstrDesc &MCID,
- const MachineOperand &MO,
- unsigned OpIdx) {
- unsigned Binary = getMachineOpValue(MI, MO);
-
- const MachineOperand &MO1 = MI.getOperand(OpIdx + 1);
- const MachineOperand &MO2 = MI.getOperand(OpIdx + 2);
- ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
-
- // Encode the shift opcode.
- unsigned SBits = 0;
- unsigned Rs = MO1.getReg();
- if (Rs) {
- // Set shift operand (bit[7:4]).
- // LSL - 0001
- // LSR - 0011
- // ASR - 0101
- // ROR - 0111
- // RRX - 0110 and bit[11:8] clear.
- switch (SOpc) {
- default: llvm_unreachable("Unknown shift opc!");
- case ARM_AM::lsl: SBits = 0x1; break;
- case ARM_AM::lsr: SBits = 0x3; break;
- case ARM_AM::asr: SBits = 0x5; break;
- case ARM_AM::ror: SBits = 0x7; break;
- case ARM_AM::rrx: SBits = 0x6; break;
- }
- } else {
- // Set shift operand (bit[6:4]).
- // LSL - 000
- // LSR - 010
- // ASR - 100
- // ROR - 110
- switch (SOpc) {
- default: llvm_unreachable("Unknown shift opc!");
- case ARM_AM::lsl: SBits = 0x0; break;
- case ARM_AM::lsr: SBits = 0x2; break;
- case ARM_AM::asr: SBits = 0x4; break;
- case ARM_AM::ror: SBits = 0x6; break;
- }
- }
- Binary |= SBits << 4;
- if (SOpc == ARM_AM::rrx)
- return Binary;
-
- // Encode the shift operation Rs or shift_imm (except rrx).
- if (Rs) {
- // Encode Rs bit[11:8].
- assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
- return Binary | (II->getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift);
- }
-
- // Encode shift_imm bit[11:7].
- return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
-}
-
-unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) {
- int SoImmVal = ARM_AM::getSOImmVal(SoImm);
- assert(SoImmVal != -1 && "Not a valid so_imm value!");
-
- // Encode rotate_imm.
- unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
- << ARMII::SoRotImmShift;
-
- // Encode immed_8.
- Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
- return Binary;
-}
-
-unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
- const MCInstrDesc &MCID) const {
- for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e;--i){
- const MachineOperand &MO = MI.getOperand(i-1);
- if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
- return 1 << ARMII::S_BitShift;
- }
- return 0;
-}
-
-void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
- unsigned ImplicitRd,
- unsigned ImplicitRn) {
- const MCInstrDesc &MCID = MI.getDesc();
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Encode S bit if MI modifies CPSR.
- Binary |= getAddrModeSBit(MI, MCID);
-
- // Encode register def if there is one.
- unsigned NumDefs = MCID.getNumDefs();
- unsigned OpIdx = 0;
- if (NumDefs)
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
- else if (ImplicitRd)
- // Special handling for implicit use (e.g. PC).
- Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift);
-
- if (MCID.Opcode == ARM::MOVi16) {
- // Get immediate from MI.
- unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx),
- ARM::reloc_arm_movw);
- // Encode imm which is the same as in emitMOVi32immInstruction().
- Binary |= Lo16 & 0xFFF;
- Binary |= ((Lo16 >> 12) & 0xF) << 16;
- emitWordLE(Binary);
- return;
- } else if(MCID.Opcode == ARM::MOVTi16) {
- unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx),
- ARM::reloc_arm_movt) >> 16);
- Binary |= Hi16 & 0xFFF;
- Binary |= ((Hi16 >> 12) & 0xF) << 16;
- emitWordLE(Binary);
- return;
- } else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) {
- uint32_t v = ~MI.getOperand(2).getImm();
- int32_t lsb = countTrailingZeros(v);
- int32_t msb = (32 - countLeadingZeros(v)) - 1;
- // Instr{20-16} = msb, Instr{11-7} = lsb
- Binary |= (msb & 0x1F) << 16;
- Binary |= (lsb & 0x1F) << 7;
- emitWordLE(Binary);
- return;
- } else if ((MCID.Opcode == ARM::UBFX) || (MCID.Opcode == ARM::SBFX)) {
- // Encode Rn in Instr{0-3}
- Binary |= getMachineOpValue(MI, OpIdx++);
-
- uint32_t lsb = MI.getOperand(OpIdx++).getImm();
- uint32_t widthm1 = MI.getOperand(OpIdx++).getImm() - 1;
-
- // Instr{20-16} = widthm1, Instr{11-7} = lsb
- Binary |= (widthm1 & 0x1F) << 16;
- Binary |= (lsb & 0x1F) << 7;
- emitWordLE(Binary);
- return;
- }
-
- // If this is a two-address operand, skip it. e.g. MOVCCr operand 1.
- if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
- ++OpIdx;
-
- // Encode first non-shifter register operand if there is one.
- bool isUnary = MCID.TSFlags & ARMII::UnaryDP;
- if (!isUnary) {
- if (ImplicitRn)
- // Special handling for implicit use (e.g. PC).
- Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
- else {
- Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift;
- ++OpIdx;
- }
- }
-
- // Encode shifter operand.
- const MachineOperand &MO = MI.getOperand(OpIdx);
- if ((MCID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) {
- // Encode SoReg.
- emitWordLE(Binary | getMachineSoRegOpValue(MI, MCID, MO, OpIdx));
- return;
- }
-
- if (MO.isReg()) {
- // Encode register Rm.
- emitWordLE(Binary | II->getRegisterInfo().getEncodingValue(MO.getReg()));
- return;
- }
-
- // Encode so_imm.
- Binary |= getMachineSoImmOpValue((unsigned)MO.getImm());
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
- unsigned ImplicitRd,
- unsigned ImplicitRn) {
- const MCInstrDesc &MCID = MI.getDesc();
- unsigned Form = MCID.TSFlags & ARMII::FormMask;
- bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // If this is an LDRi12, STRi12 or LDRcp, nothing more needs be done.
- if (MI.getOpcode() == ARM::LDRi12 || MI.getOpcode() == ARM::LDRcp ||
- MI.getOpcode() == ARM::STRi12) {
- emitWordLE(Binary);
- return;
- }
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- unsigned OpIdx = 0;
-
- // Operand 0 of a pre- and post-indexed store is the address base
- // writeback. Skip it.
- bool Skipped = false;
- if (IsPrePost && Form == ARMII::StFrm) {
- ++OpIdx;
- Skipped = true;
- }
-
- // Set first operand
- if (ImplicitRd)
- // Special handling for implicit use (e.g. PC).
- Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift);
- else
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
-
- // Set second operand
- if (ImplicitRn)
- // Special handling for implicit use (e.g. PC).
- Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
- else
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
-
- // If this is a two-address operand, skip it. e.g. LDR_PRE.
- if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
- ++OpIdx;
-
- const MachineOperand &MO2 = MI.getOperand(OpIdx);
- unsigned AM2Opc = (ImplicitRn == ARM::PC)
- ? 0 : MI.getOperand(OpIdx+1).getImm();
-
- // Set bit U(23) according to sign of immed value (positive or negative).
- Binary |= ((ARM_AM::getAM2Op(AM2Opc) == ARM_AM::add ? 1 : 0) <<
- ARMII::U_BitShift);
- if (!MO2.getReg()) { // is immediate
- if (ARM_AM::getAM2Offset(AM2Opc))
- // Set the value of offset_12 field
- Binary |= ARM_AM::getAM2Offset(AM2Opc);
- emitWordLE(Binary);
- return;
- }
-
- // Set bit I(25), because this is not in immediate encoding.
- Binary |= 1 << ARMII::I_BitShift;
- assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
- // Set bit[3:0] to the corresponding Rm register
- Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg());
-
- // If this instr is in scaled register offset/index instruction, set
- // shift_immed(bit[11:7]) and shift(bit[6:5]) fields.
- if (unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc)) {
- Binary |= getShiftOp(AM2Opc) << ARMII::ShiftImmShift; // shift
- Binary |= ShImm << ARMII::ShiftShift; // shift_immed
- }
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
- unsigned ImplicitRn) {
- const MCInstrDesc &MCID = MI.getDesc();
- unsigned Form = MCID.TSFlags & ARMII::FormMask;
- bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- unsigned OpIdx = 0;
-
- // Operand 0 of a pre- and post-indexed store is the address base
- // writeback. Skip it.
- bool Skipped = false;
- if (IsPrePost && Form == ARMII::StMiscFrm) {
- ++OpIdx;
- Skipped = true;
- }
-
- // Set first operand
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
-
- // Skip LDRD and STRD's second operand.
- if (MCID.Opcode == ARM::LDRD || MCID.Opcode == ARM::STRD)
- ++OpIdx;
-
- // Set second operand
- if (ImplicitRn)
- // Special handling for implicit use (e.g. PC).
- Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
- else
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
-
- // If this is a two-address operand, skip it. e.g. LDRH_POST.
- if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
- ++OpIdx;
-
- const MachineOperand &MO2 = MI.getOperand(OpIdx);
- unsigned AM3Opc = (ImplicitRn == ARM::PC)
- ? 0 : MI.getOperand(OpIdx+1).getImm();
-
- // Set bit U(23) according to sign of immed value (positive or negative)
- Binary |= ((ARM_AM::getAM3Op(AM3Opc) == ARM_AM::add ? 1 : 0) <<
- ARMII::U_BitShift);
-
- // If this instr is in register offset/index encoding, set bit[3:0]
- // to the corresponding Rm register.
- if (MO2.getReg()) {
- Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg());
- emitWordLE(Binary);
- return;
- }
-
- // This instr is in immediate offset/index encoding, set bit 22 to 1.
- Binary |= 1 << ARMII::AM3_I_BitShift;
- if (unsigned ImmOffs = ARM_AM::getAM3Offset(AM3Opc)) {
- // Set operands
- Binary |= (ImmOffs >> 4) << ARMII::ImmHiShift; // immedH
- Binary |= (ImmOffs & 0xF); // immedL
- }
-
- emitWordLE(Binary);
-}
-
-static unsigned getAddrModeUPBits(unsigned Mode) {
- unsigned Binary = 0;
-
- // Set addressing mode by modifying bits U(23) and P(24)
- // IA - Increment after - bit U = 1 and bit P = 0
- // IB - Increment before - bit U = 1 and bit P = 1
- // DA - Decrement after - bit U = 0 and bit P = 0
- // DB - Decrement before - bit U = 0 and bit P = 1
- switch (Mode) {
- default: llvm_unreachable("Unknown addressing sub-mode!");
- case ARM_AM::da: break;
- case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break;
- case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break;
- case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break;
- }
-
- return Binary;
-}
-
-void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
- bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Skip operand 0 of an instruction with base register update.
- unsigned OpIdx = 0;
- if (IsUpdating)
- ++OpIdx;
-
- // Set base address operand
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
-
- // Set addressing mode by modifying bits U(23) and P(24)
- ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
- Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
-
- // Set bit W(21)
- if (IsUpdating)
- Binary |= 0x1 << ARMII::W_BitShift;
-
- // Set registers
- for (unsigned i = OpIdx+2, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || MO.isImplicit())
- break;
- unsigned RegNum = II->getRegisterInfo().getEncodingValue(MO.getReg());
- assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
- RegNum < 16);
- Binary |= 0x1 << RegNum;
- }
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Encode S bit if MI modifies CPSR.
- Binary |= getAddrModeSBit(MI, MCID);
-
- // 32x32->64bit operations have two destination registers. The number
- // of register definitions will tell us if that's what we're dealing with.
- unsigned OpIdx = 0;
- if (MCID.getNumDefs() == 2)
- Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift;
-
- // Encode Rd
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdHiShift;
-
- // Encode Rm
- Binary |= getMachineOpValue(MI, OpIdx++);
-
- // Encode Rs
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRsShift;
-
- // Many multiple instructions (e.g. MLA) have three src operands. Encode
- // it as Rn (for multiply, that's in the same offset as RdLo.
- if (MCID.getNumOperands() > OpIdx &&
- !MCID.OpInfo[OpIdx].isPredicate() &&
- !MCID.OpInfo[OpIdx].isOptionalDef())
- Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift;
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- unsigned OpIdx = 0;
-
- // Encode Rd
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
-
- const MachineOperand &MO1 = MI.getOperand(OpIdx++);
- const MachineOperand &MO2 = MI.getOperand(OpIdx);
- if (MO2.isReg()) {
- // Two register operand form.
- // Encode Rn.
- Binary |= getMachineOpValue(MI, MO1) << ARMII::RegRnShift;
-
- // Encode Rm.
- Binary |= getMachineOpValue(MI, MO2);
- ++OpIdx;
- } else {
- Binary |= getMachineOpValue(MI, MO1);
- }
-
- // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand.
- if (MI.getOperand(OpIdx).isImm() &&
- !MCID.OpInfo[OpIdx].isPredicate() &&
- !MCID.OpInfo[OpIdx].isOptionalDef())
- Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift;
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // PKH instructions are finished at this point
- if (MCID.Opcode == ARM::PKHBT || MCID.Opcode == ARM::PKHTB) {
- emitWordLE(Binary);
- return;
- }
-
- unsigned OpIdx = 0;
-
- // Encode Rd
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
-
- const MachineOperand &MO = MI.getOperand(OpIdx++);
- if (OpIdx == MCID.getNumOperands() ||
- MCID.OpInfo[OpIdx].isPredicate() ||
- MCID.OpInfo[OpIdx].isOptionalDef()) {
- // Encode Rm and it's done.
- Binary |= getMachineOpValue(MI, MO);
- emitWordLE(Binary);
- return;
- }
-
- // Encode Rn.
- Binary |= getMachineOpValue(MI, MO) << ARMII::RegRnShift;
-
- // Encode Rm.
- Binary |= getMachineOpValue(MI, OpIdx++);
-
- // Encode shift_imm.
- unsigned ShiftAmt = MI.getOperand(OpIdx).getImm();
- if (MCID.Opcode == ARM::PKHTB) {
- assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!");
- if (ShiftAmt == 32)
- ShiftAmt = 0;
- }
- assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
- Binary |= ShiftAmt << ARMII::ShiftShift;
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
-
- // Part of binary is determined by TableGen.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Encode Rd
- Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
-
- // Encode saturate bit position.
- unsigned Pos = MI.getOperand(1).getImm();
- if (MCID.Opcode == ARM::SSAT || MCID.Opcode == ARM::SSAT16)
- Pos -= 1;
- assert((Pos < 16 || (Pos < 32 &&
- MCID.Opcode != ARM::SSAT16 &&
- MCID.Opcode != ARM::USAT16)) &&
- "saturate bit position out of range");
- Binary |= Pos << 16;
-
- // Encode Rm
- Binary |= getMachineOpValue(MI, 2);
-
- // Encode shift_imm.
- if (MCID.getNumOperands() == 4) {
- unsigned ShiftOp = MI.getOperand(3).getImm();
- ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
- if (Opc == ARM_AM::asr)
- Binary |= (1 << 6);
- unsigned ShiftAmt = MI.getOperand(3).getImm();
- if (ShiftAmt == 32 && Opc == ARM_AM::asr)
- ShiftAmt = 0;
- assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
- Binary |= ShiftAmt << ARMII::ShiftShift;
- }
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
-
- if (MCID.Opcode == ARM::TPsoft) {
- llvm_unreachable("ARM::TPsoft FIXME"); // FIXME
- }
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Set signed_immed_24 field
- Binary |= getMachineOpValue(MI, 0);
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitInlineJumpTable(unsigned JTIndex) {
- // Remember the base address of the inline jump table.
- uintptr_t JTBase = MCE.getCurrentPCValue();
- JTI->addJumpTableBaseAddr(JTIndex, JTBase);
- DEBUG(errs() << " ** Jump Table #" << JTIndex << " @ " << (void*)JTBase
- << '\n');
-
- // Now emit the jump table entries.
- const std::vector<MachineBasicBlock*> &MBBs = (*MJTEs)[JTIndex].MBBs;
- for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
- if (IsPIC)
- // DestBB address - JT base.
- emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_pic_jt, JTBase);
- else
- // Absolute DestBB address.
- emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_absolute);
- emitWordLE(0);
- }
-}
-
-void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
-
- // Handle jump tables.
- if (MCID.Opcode == ARM::BR_JTr || MCID.Opcode == ARM::BR_JTadd) {
- // First emit a ldr pc, [] instruction.
- emitDataProcessingInstruction(MI, ARM::PC);
-
- // Then emit the inline jump table.
- unsigned JTIndex =
- (MCID.Opcode == ARM::BR_JTr)
- ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex();
- emitInlineJumpTable(JTIndex);
- return;
- } else if (MCID.Opcode == ARM::BR_JTm) {
- // First emit a ldr pc, [] instruction.
- emitLoadStoreInstruction(MI, ARM::PC);
-
- // Then emit the inline jump table.
- emitInlineJumpTable(MI.getOperand(3).getIndex());
- return;
- }
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR)
- // The return register is LR.
- Binary |= II->getRegisterInfo().getEncodingValue(ARM::LR);
- else
- // otherwise, set the return register
- Binary |= getMachineOpValue(MI, 0);
-
- emitWordLE(Binary);
-}
-
-unsigned ARMCodeEmitter::encodeVFPRd(const MachineInstr &MI,
- unsigned OpIdx) const {
- unsigned RegD = MI.getOperand(OpIdx).getReg();
- unsigned Binary = 0;
- bool isSPVFP = ARM::SPRRegClass.contains(RegD);
- RegD = II->getRegisterInfo().getEncodingValue(RegD);
- if (!isSPVFP)
- Binary |= RegD << ARMII::RegRdShift;
- else {
- Binary |= ((RegD & 0x1E) >> 1) << ARMII::RegRdShift;
- Binary |= (RegD & 0x01) << ARMII::D_BitShift;
- }
- return Binary;
-}
-
-unsigned ARMCodeEmitter::encodeVFPRn(const MachineInstr &MI,
- unsigned OpIdx) const {
- unsigned RegN = MI.getOperand(OpIdx).getReg();
- unsigned Binary = 0;
- bool isSPVFP = ARM::SPRRegClass.contains(RegN);
- RegN = II->getRegisterInfo().getEncodingValue(RegN);
- if (!isSPVFP)
- Binary |= RegN << ARMII::RegRnShift;
- else {
- Binary |= ((RegN & 0x1E) >> 1) << ARMII::RegRnShift;
- Binary |= (RegN & 0x01) << ARMII::N_BitShift;
- }
- return Binary;
-}
-
-unsigned ARMCodeEmitter::encodeVFPRm(const MachineInstr &MI,
- unsigned OpIdx) const {
- unsigned RegM = MI.getOperand(OpIdx).getReg();
- unsigned Binary = 0;
- bool isSPVFP = ARM::SPRRegClass.contains(RegM);
- RegM = II->getRegisterInfo().getEncodingValue(RegM);
- if (!isSPVFP)
- Binary |= RegM;
- else {
- Binary |= ((RegM & 0x1E) >> 1);
- Binary |= (RegM & 0x01) << ARMII::M_BitShift;
- }
- return Binary;
-}
-
-void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- unsigned OpIdx = 0;
- assert((Binary & ARMII::D_BitShift) == 0 &&
- (Binary & ARMII::N_BitShift) == 0 &&
- (Binary & ARMII::M_BitShift) == 0 && "VFP encoding bug!");
-
- // Encode Dd / Sd.
- Binary |= encodeVFPRd(MI, OpIdx++);
-
- // If this is a two-address operand, skip it, e.g. FMACD.
- if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
- ++OpIdx;
-
- // Encode Dn / Sn.
- if ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm)
- Binary |= encodeVFPRn(MI, OpIdx++);
-
- if (OpIdx == MCID.getNumOperands() ||
- MCID.OpInfo[OpIdx].isPredicate() ||
- MCID.OpInfo[OpIdx].isOptionalDef()) {
- // FCMPEZD etc. has only one operand.
- emitWordLE(Binary);
- return;
- }
-
- // Encode Dm / Sm.
- Binary |= encodeVFPRm(MI, OpIdx);
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
- unsigned Form = MCID.TSFlags & ARMII::FormMask;
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- switch (Form) {
- default: break;
- case ARMII::VFPConv1Frm:
- case ARMII::VFPConv2Frm:
- case ARMII::VFPConv3Frm:
- // Encode Dd / Sd.
- Binary |= encodeVFPRd(MI, 0);
- break;
- case ARMII::VFPConv4Frm:
- // Encode Dn / Sn.
- Binary |= encodeVFPRn(MI, 0);
- break;
- case ARMII::VFPConv5Frm:
- // Encode Dm / Sm.
- Binary |= encodeVFPRm(MI, 0);
- break;
- }
-
- switch (Form) {
- default: break;
- case ARMII::VFPConv1Frm:
- // Encode Dm / Sm.
- Binary |= encodeVFPRm(MI, 1);
- break;
- case ARMII::VFPConv2Frm:
- case ARMII::VFPConv3Frm:
- // Encode Dn / Sn.
- Binary |= encodeVFPRn(MI, 1);
- break;
- case ARMII::VFPConv4Frm:
- case ARMII::VFPConv5Frm:
- // Encode Dd / Sd.
- Binary |= encodeVFPRd(MI, 1);
- break;
- }
-
- if (Form == ARMII::VFPConv5Frm)
- // Encode Dn / Sn.
- Binary |= encodeVFPRn(MI, 2);
- else if (Form == ARMII::VFPConv3Frm)
- // Encode Dm / Sm.
- Binary |= encodeVFPRm(MI, 2);
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- unsigned OpIdx = 0;
-
- // Encode Dd / Sd.
- Binary |= encodeVFPRd(MI, OpIdx++);
-
- // Encode address base.
- const MachineOperand &Base = MI.getOperand(OpIdx++);
- Binary |= getMachineOpValue(MI, Base) << ARMII::RegRnShift;
-
- // If there is a non-zero immediate offset, encode it.
- if (Base.isReg()) {
- const MachineOperand &Offset = MI.getOperand(OpIdx);
- if (unsigned ImmOffs = ARM_AM::getAM5Offset(Offset.getImm())) {
- if (ARM_AM::getAM5Op(Offset.getImm()) == ARM_AM::add)
- Binary |= 1 << ARMII::U_BitShift;
- Binary |= ImmOffs;
- emitWordLE(Binary);
- return;
- }
- }
-
- // If immediate offset is omitted, default to +0.
- Binary |= 1 << ARMII::U_BitShift;
-
- emitWordLE(Binary);
-}
-
-void
-ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
- bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Skip operand 0 of an instruction with base register update.
- unsigned OpIdx = 0;
- if (IsUpdating)
- ++OpIdx;
-
- // Set base address operand
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
-
- // Set addressing mode by modifying bits U(23) and P(24)
- ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
- Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
-
- // Set bit W(21)
- if (IsUpdating)
- Binary |= 0x1 << ARMII::W_BitShift;
-
- // First register is encoded in Dd.
- Binary |= encodeVFPRd(MI, OpIdx+2);
-
- // Count the number of registers.
- unsigned NumRegs = 1;
- for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || MO.isImplicit())
- break;
- ++NumRegs;
- }
- // Bit 8 will be set if <list> is consecutive 64-bit registers (e.g., D0)
- // Otherwise, it will be 0, in the case of 32-bit registers.
- if(Binary & 0x100)
- Binary |= NumRegs * 2;
- else
- Binary |= NumRegs;
-
- emitWordLE(Binary);
-}
-
-unsigned ARMCodeEmitter::encodeNEONRd(const MachineInstr &MI,
- unsigned OpIdx) const {
- unsigned RegD = MI.getOperand(OpIdx).getReg();
- unsigned Binary = 0;
- RegD = II->getRegisterInfo().getEncodingValue(RegD);
- Binary |= (RegD & 0xf) << ARMII::RegRdShift;
- Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift;
- return Binary;
-}
-
-unsigned ARMCodeEmitter::encodeNEONRn(const MachineInstr &MI,
- unsigned OpIdx) const {
- unsigned RegN = MI.getOperand(OpIdx).getReg();
- unsigned Binary = 0;
- RegN = II->getRegisterInfo().getEncodingValue(RegN);
- Binary |= (RegN & 0xf) << ARMII::RegRnShift;
- Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift;
- return Binary;
-}
-
-unsigned ARMCodeEmitter::encodeNEONRm(const MachineInstr &MI,
- unsigned OpIdx) const {
- unsigned RegM = MI.getOperand(OpIdx).getReg();
- unsigned Binary = 0;
- RegM = II->getRegisterInfo().getEncodingValue(RegM);
- Binary |= (RegM & 0xf);
- Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift;
- return Binary;
-}
-
-/// convertNEONDataProcToThumb - Convert the ARM mode encoding for a NEON
-/// data-processing instruction to the corresponding Thumb encoding.
-static unsigned convertNEONDataProcToThumb(unsigned Binary) {
- assert((Binary & 0xfe000000) == 0xf2000000 &&
- "not an ARM NEON data-processing instruction");
- unsigned UBit = (Binary >> 24) & 1;
- return 0xef000000 | (UBit << 28) | (Binary & 0xffffff);
-}
-
-void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- unsigned RegTOpIdx, RegNOpIdx, LnOpIdx;
- const MCInstrDesc &MCID = MI.getDesc();
- if ((MCID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) {
- RegTOpIdx = 0;
- RegNOpIdx = 1;
- LnOpIdx = 2;
- } else { // ARMII::NSetLnFrm
- RegTOpIdx = 2;
- RegNOpIdx = 0;
- LnOpIdx = 3;
- }
-
- // Set the conditional execution predicate
- Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
-
- unsigned RegT = MI.getOperand(RegTOpIdx).getReg();
- RegT = II->getRegisterInfo().getEncodingValue(RegT);
- Binary |= (RegT << ARMII::RegRdShift);
- Binary |= encodeNEONRn(MI, RegNOpIdx);
-
- unsigned LaneShift;
- if ((Binary & (1 << 22)) != 0)
- LaneShift = 0; // 8-bit elements
- else if ((Binary & (1 << 5)) != 0)
- LaneShift = 1; // 16-bit elements
- else
- LaneShift = 2; // 32-bit elements
-
- unsigned Lane = MI.getOperand(LnOpIdx).getImm() << LaneShift;
- unsigned Opc1 = Lane >> 2;
- unsigned Opc2 = Lane & 3;
- assert((Opc1 & 3) == 0 && "out-of-range lane number operand");
- Binary |= (Opc1 << 21);
- Binary |= (Opc2 << 5);
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) {
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
-
- unsigned RegT = MI.getOperand(1).getReg();
- RegT = II->getRegisterInfo().getEncodingValue(RegT);
- Binary |= (RegT << ARMII::RegRdShift);
- Binary |= encodeNEONRn(MI, 0);
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) {
- unsigned Binary = getBinaryCodeForInstr(MI);
- // Destination register is encoded in Dd.
- Binary |= encodeNEONRd(MI, 0);
- // Immediate fields: Op, Cmode, I, Imm3, Imm4
- unsigned Imm = MI.getOperand(1).getImm();
- unsigned Op = (Imm >> 12) & 1;
- unsigned Cmode = (Imm >> 8) & 0xf;
- unsigned I = (Imm >> 7) & 1;
- unsigned Imm3 = (Imm >> 4) & 0x7;
- unsigned Imm4 = Imm & 0xf;
- Binary |= (I << 24) | (Imm3 << 16) | (Cmode << 8) | (Op << 5) | Imm4;
- if (IsThumb)
- Binary = convertNEONDataProcToThumb(Binary);
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
- unsigned Binary = getBinaryCodeForInstr(MI);
- // Destination register is encoded in Dd; source register in Dm.
- unsigned OpIdx = 0;
- Binary |= encodeNEONRd(MI, OpIdx++);
- if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
- ++OpIdx;
- Binary |= encodeNEONRm(MI, OpIdx);
- if (IsThumb)
- Binary = convertNEONDataProcToThumb(Binary);
- // FIXME: This does not handle VDUPfdf or VDUPfqf.
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
- unsigned Binary = getBinaryCodeForInstr(MI);
- // Destination register is encoded in Dd; source registers in Dn and Dm.
- unsigned OpIdx = 0;
- Binary |= encodeNEONRd(MI, OpIdx++);
- if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
- ++OpIdx;
- Binary |= encodeNEONRn(MI, OpIdx++);
- if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
- ++OpIdx;
- Binary |= encodeNEONRm(MI, OpIdx);
- if (IsThumb)
- Binary = convertNEONDataProcToThumb(Binary);
- // FIXME: This does not handle VMOVDneon or VMOVQ.
- emitWordLE(Binary);
-}
-
-#include "ARMGenCodeEmitter.inc"
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index ce264eeef90f..5d295317c556 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -275,6 +275,7 @@ namespace {
private:
void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
+ bool BBHasFallthrough(MachineBasicBlock *MBB);
CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
unsigned getCPELogAlign(const MachineInstr *CPEMI);
void scanFunctionJumpTables();
@@ -382,7 +383,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
<< MCP->getConstants().size() << " CP entries, aligned to "
<< MCP->getConstantPoolAlignment() << " bytes *****\n");
- TII = (const ARMBaseInstrInfo*)MF->getTarget().getInstrInfo();
+ TII = (const ARMBaseInstrInfo *)MF->getTarget()
+ .getSubtargetImpl()
+ ->getInstrInfo();
AFI = MF->getInfo<ARMFunctionInfo>();
STI = &MF->getTarget().getSubtarget<ARMSubtarget>();
@@ -529,7 +532,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
// identity mapping of CPI's to CPE's.
const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
- const DataLayout &TD = *MF->getTarget().getDataLayout();
+ const DataLayout &TD = *MF->getSubtarget().getDataLayout();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
assert(Size >= 4 && "Too small constant pool entry");
@@ -554,9 +557,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
InsPoint[a] = CPEMI;
// Add a new CPEntry, but no corresponding CPUser yet.
- std::vector<CPEntry> CPEs;
- CPEs.push_back(CPEntry(CPEMI, i));
- CPEntries.push_back(CPEs);
+ CPEntries.emplace_back(1, CPEntry(CPEMI, i));
++NumCPEs;
DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
<< Size << ", align = " << Align <<'\n');
@@ -566,7 +567,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
/// BBHasFallthrough - Return true if the specified basic block can fallthrough
/// into the block immediately after it.
-static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
// Get the next machine basic block in the function.
MachineFunction::iterator MBBI = MBB;
// Can't fall off end of function.
@@ -574,12 +575,15 @@ static bool BBHasFallthrough(MachineBasicBlock *MBB) {
return false;
MachineBasicBlock *NextBB = std::next(MBBI);
- for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
- E = MBB->succ_end(); I != E; ++I)
- if (*I == NextBB)
- return true;
+ if (std::find(MBB->succ_begin(), MBB->succ_end(), NextBB) == MBB->succ_end())
+ return false;
- return false;
+ // Try to analyze the end of the block. A potential fallthrough may already
+ // have an unconditional branch for whatever reason.
+ MachineBasicBlock *TBB, *FBB;
+ SmallVector<MachineOperand, 4> Cond;
+ bool TooDifficult = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond);
+ return TooDifficult || FBB == nullptr;
}
/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
@@ -1203,7 +1207,8 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
unsigned Growth;
if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
(WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
- NewWaterList.count(WaterBB)) && Growth < BestGrowth) {
+ NewWaterList.count(WaterBB) || WaterBB == U.MI->getParent()) &&
+ Growth < BestGrowth) {
// This is the least amount of required padding seen so far.
BestGrowth = Growth;
WaterIter = IP;
@@ -1265,7 +1270,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
ImmBranches.push_back(ImmBranch(&UserMBB->back(),
MaxDisp, false, UncondBr));
- BBInfo[UserMBB->getNumber()].Size += Delta;
+ computeBlockSize(UserMBB);
adjustBBOffsetsAfter(UserMBB);
return;
}
@@ -1309,7 +1314,12 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// Back past any possible branches (allow for a conditional and a maximally
// long unconditional).
if (BaseInsertOffset + 8 >= UserBBI.postOffset()) {
- BaseInsertOffset = UserBBI.postOffset() - UPad - 8;
+ // Ensure BaseInsertOffset is larger than the offset of the instruction
+ // following UserMI so that the loop which searches for the split point
+ // iterates at least once.
+ BaseInsertOffset =
+ std::max(UserBBI.postOffset() - UPad - 8,
+ UserOffset + TII->GetInstSizeInBytes(UserMI) + 1);
DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
}
unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
@@ -1352,6 +1362,11 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
if (CC != ARMCC::AL)
MI = LastIT;
}
+
+ // We really must not split an IT block.
+ DEBUG(unsigned PredReg;
+ assert(!isThumb || getITInstrPredicate(MI, PredReg) == ARMCC::AL));
+
NewMBB = splitBlockBeforeInstr(MI);
}
@@ -1937,7 +1952,9 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
DEBUG(dbgs() << "Shrink JT: " << *MI << " addr: " << *AddrMI
<< " lea: " << *LeaMI);
unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
- MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc))
+ MachineBasicBlock::iterator MI_JT = MI;
+ MachineInstr *NewJTMI =
+ BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc))
.addReg(IdxReg, getKillRegState(IdxRegKill))
.addJumpTableIndex(JTI, JTOP.getTargetFlags())
.addImm(MI->getOperand(JTOpIdx+1).getImm());
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index c7a84155bd48..13bef54b3b7d 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
-#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H
+#define LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/Support/Casting.h"
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 51d3dbb5bd8e..7ddf8793e126 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -22,8 +22,8 @@
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
@@ -867,7 +867,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
if (RI.hasBasePointer(MF)) {
int32_t NumBytes = AFI->getFramePtrSpillOffset();
unsigned FramePtr = RI.getFrameRegister(MF);
- assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
+ assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) &&
"base pointer without frame pointer?");
if (AFI->isThumb2Function()) {
@@ -887,6 +887,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned MaxAlign = MFI->getMaxAlignment();
assert (!AFI->isThumb1OnlyFunction());
// Emit bic r6, r6, MaxAlign
+ assert(MaxAlign <= 256 && "The BIC instruction cannot encode "
+ "immediates larger than 256 with all lower "
+ "bits set.");
unsigned bicOpc = AFI->isThumbFunction() ?
ARM::t2BICri : ARM::BICri;
AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
@@ -980,7 +983,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci;
unsigned PICAddOpc =
IsARM
- ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICADD : ARM::PICLDR)
+ ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
: ARM::tPICADD;
// We need a new const-pool entry to load from.
@@ -1343,8 +1346,9 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
const TargetMachine &TM = MF.getTarget();
- TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
- TRI = TM.getRegisterInfo();
+ TII = static_cast<const ARMBaseInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
+ TRI = TM.getSubtargetImpl()->getRegisterInfo();
STI = &TM.getSubtarget<ARMSubtarget>();
AFI = MF.getInfo<ARMFunctionInfo>();
diff --git a/lib/Target/ARM/ARMFPUName.def b/lib/Target/ARM/ARMFPUName.def
index 1fef3b3bc5e2..34ce85d280e6 100644
--- a/lib/Target/ARM/ARMFPUName.def
+++ b/lib/Target/ARM/ARMFPUName.def
@@ -23,6 +23,7 @@ ARM_FPU_NAME("vfpv3", VFPV3)
ARM_FPU_NAME("vfpv3-d16", VFPV3_D16)
ARM_FPU_NAME("vfpv4", VFPV4)
ARM_FPU_NAME("vfpv4-d16", VFPV4_D16)
+ARM_FPU_NAME("fpv5-d16", FPV5_D16)
ARM_FPU_NAME("fp-armv8", FP_ARMV8)
ARM_FPU_NAME("neon", NEON)
ARM_FPU_NAME("neon-vfpv4", NEON_VFPV4)
diff --git a/lib/Target/ARM/ARMFPUName.h b/lib/Target/ARM/ARMFPUName.h
index 2a64cce4880d..86acffbc8f75 100644
--- a/lib/Target/ARM/ARMFPUName.h
+++ b/lib/Target/ARM/ARMFPUName.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMFPUNAME_H
-#define ARMFPUNAME_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMFPUNAME_H
+#define LLVM_LIB_TARGET_ARM_ARMFPUNAME_H
namespace llvm {
namespace ARM {
@@ -23,4 +23,4 @@ enum FPUKind {
} // namespace ARM
} // namespace llvm
-#endif // ARMFPUNAME_H
+#endif
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index e2d90cd9306c..29462f7a8eb8 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -92,11 +92,11 @@ class ARMFastISel final : public FastISel {
public:
explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo)
- : FastISel(funcInfo, libInfo),
- M(const_cast<Module&>(*funcInfo.Fn->getParent())),
- TM(funcInfo.MF->getTarget()),
- TII(*TM.getInstrInfo()),
- TLI(*TM.getTargetLowering()) {
+ : FastISel(funcInfo, libInfo),
+ M(const_cast<Module &>(*funcInfo.Fn->getParent())),
+ TM(funcInfo.MF->getTarget()),
+ TII(*TM.getSubtargetImpl()->getInstrInfo()),
+ TLI(*TM.getSubtargetImpl()->getTargetLowering()) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
isThumb2 = AFI->isThumbFunction();
@@ -105,39 +105,39 @@ class ARMFastISel final : public FastISel {
// Code from FastISel.cpp.
private:
- unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ unsigned fastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill);
- unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill);
- unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
+ unsigned fastEmitInst_rrr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill,
unsigned Op2, bool Op2IsKill);
- unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
uint64_t Imm);
- unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
+ unsigned fastEmitInst_rri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill,
uint64_t Imm);
- unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+ unsigned fastEmitInst_i(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
uint64_t Imm);
// Backend specific FastISel code.
private:
- bool TargetSelectInstruction(const Instruction *I) override;
- unsigned TargetMaterializeConstant(const Constant *C) override;
- unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
+ bool fastSelectInstruction(const Instruction *I) override;
+ unsigned fastMaterializeConstant(const Constant *C) override;
+ unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) override;
- bool FastLowerArguments() override;
+ bool fastLowerArguments() override;
private:
#include "ARMGenFastISel.inc"
@@ -189,7 +189,9 @@ class ARMFastISel final : public FastISel {
unsigned ARMSelectCallOp(bool UseReg);
unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
- const TargetLowering *getTargetLowering() { return TM.getTargetLowering(); }
+ const TargetLowering *getTargetLowering() {
+ return TM.getSubtargetImpl()->getTargetLowering();
+ }
// Call handling routines.
private:
@@ -283,7 +285,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
return MIB;
}
-unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill) {
unsigned ResultReg = createResultReg(RC);
@@ -305,7 +307,7 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill) {
@@ -333,7 +335,7 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
+unsigned ARMFastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill,
@@ -365,7 +367,7 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
uint64_t Imm) {
@@ -391,7 +393,7 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+unsigned ARMFastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill,
@@ -421,7 +423,7 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
uint64_t Imm) {
unsigned ResultReg = createResultReg(RC);
@@ -511,7 +513,7 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
- return false;
+ return 0;
// If we can do this in a single instruction without a constant pool entry
// do so now.
@@ -534,7 +536,9 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
(ARM_AM::getSOImmVal(Imm) != -1);
if (UseImm) {
unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
- unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
+ &ARM::GPRRegClass;
+ unsigned ImmReg = createResultReg(RC);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ImmReg)
.addImm(Imm));
@@ -542,11 +546,16 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
}
}
+ unsigned ResultReg = 0;
+ if (Subtarget->useMovt(*FuncInfo.MF))
+ ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+
+ if (ResultReg)
+ return ResultReg;
+
// Load from constant pool. For now 32-bit only.
if (VT != MVT::i32)
- return false;
-
- unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ return 0;
// MachineConstantPool wants an explicit alignment.
unsigned Align = DL.getPrefTypeAlignment(C->getType());
@@ -555,21 +564,20 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
Align = DL.getTypeAllocSize(C->getType());
}
unsigned Idx = MCP.getConstantPoolIndex(C, Align);
-
+ ResultReg = createResultReg(TLI.getRegClassFor(VT));
if (isThumb2)
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(ARM::t2LDRpci), DestReg)
- .addConstantPoolIndex(Idx));
+ TII.get(ARM::t2LDRpci), ResultReg)
+ .addConstantPoolIndex(Idx));
else {
// The extra immediate is for addrmode2.
- DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
+ ResultReg = constrainOperandRegClass(TII.get(ARM::LDRcp), ResultReg, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(ARM::LDRcp), DestReg)
- .addConstantPoolIndex(Idx)
- .addImm(0));
+ TII.get(ARM::LDRcp), ResultReg)
+ .addConstantPoolIndex(Idx)
+ .addImm(0));
}
-
- return DestReg;
+ return ResultReg;
}
unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
@@ -578,9 +586,8 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
Reloc::Model RelocM = TM.getRelocationModel();
bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM);
- const TargetRegisterClass *RC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::rGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
+ const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
+ : &ARM::GPRRegClass;
unsigned DestReg = createResultReg(RC);
// FastISel TLS support on non-MachO is broken, punt to SelectionDAG.
@@ -679,7 +686,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
return DestReg;
}
-unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
+unsigned ARMFastISel::fastMaterializeConstant(const Constant *C) {
EVT CEVT = TLI.getValueType(C->getType(), true);
// Only handle simple types.
@@ -698,7 +705,7 @@ unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
// TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
-unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
// Don't handle dynamic allocas.
if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
@@ -885,9 +892,8 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
// put the alloca address into a register, set the base type back to
// register and continue. This should almost never happen.
if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
- const TargetRegisterClass *RC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
+ const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass;
unsigned ResultReg = createResultReg(RC);
unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -901,7 +907,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
// Since the offset is too large for the load/store instruction
// get the reg+offset into a register.
if (needsLowering) {
- Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
+ Addr.Base.Reg = fastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
/*Op0IsKill*/false, Addr.Offset, MVT::i32);
Addr.Offset = 0;
}
@@ -1074,7 +1080,7 @@ bool ARMFastISel::SelectLoad(const Instruction *I) {
unsigned ResultReg;
if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1086,9 +1092,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
// This is mostly going to be Neon/vector support.
default: return false;
case MVT::i1: {
- unsigned Res = createResultReg(isThumb2 ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass);
+ unsigned Res = createResultReg(isThumb2 ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass);
unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1276,7 +1281,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
.addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
- FastEmitBranch(FBB, DbgLoc);
+ fastEmitBranch(FBB, DbgLoc);
FuncInfo.MBB->addSuccessor(TBB);
return true;
}
@@ -1301,7 +1306,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
.addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
- FastEmitBranch(FBB, DbgLoc);
+ fastEmitBranch(FBB, DbgLoc);
FuncInfo.MBB->addSuccessor(TBB);
return true;
}
@@ -1309,7 +1314,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
dyn_cast<ConstantInt>(BI->getCondition())) {
uint64_t Imm = CI->getZExtValue();
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
- FastEmitBranch(Target, DbgLoc);
+ fastEmitBranch(Target, DbgLoc);
return true;
}
@@ -1339,7 +1344,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
.addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
- FastEmitBranch(FBB, DbgLoc);
+ fastEmitBranch(FBB, DbgLoc);
FuncInfo.MBB->addSuccessor(TBB);
return true;
}
@@ -1492,18 +1497,17 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
// Now set a register based on the comparison. Explicitly set the predicates
// here.
unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
- const TargetRegisterClass *RC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::rGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
+ const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
+ : &ARM::GPRRegClass;
unsigned DestReg = createResultReg(RC);
Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
- unsigned ZeroReg = TargetMaterializeConstant(Zero);
+ unsigned ZeroReg = fastMaterializeConstant(Zero);
// ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc), DestReg)
.addReg(ZeroReg).addImm(1)
.addImm(ARMPred).addReg(ARM::CPSR);
- UpdateValueMap(I, DestReg);
+ updateValueMap(I, DestReg);
return true;
}
@@ -1522,7 +1526,7 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(ARM::VCVTDS), Result)
.addReg(Op));
- UpdateValueMap(I, Result);
+ updateValueMap(I, Result);
return true;
}
@@ -1541,7 +1545,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(ARM::VCVTSD), Result)
.addReg(Op));
- UpdateValueMap(I, Result);
+ updateValueMap(I, Result);
return true;
}
@@ -1585,7 +1589,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg).addReg(FP));
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1617,7 +1621,7 @@ bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
if (IntReg == 0) return false;
- UpdateValueMap(I, IntReg);
+ updateValueMap(I, IntReg);
return true;
}
@@ -1693,7 +1697,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
.addImm(ARMCC::EQ)
.addReg(ARM::CPSR);
}
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1783,7 +1787,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg)
.addReg(SrcReg1).addReg(SrcReg2));
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1825,7 +1829,7 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg)
.addReg(Op1).addReg(Op2));
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1883,7 +1887,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
unsigned &NumBytes,
bool isVarArg) {
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, ArgLocs, *Context);
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags,
CCAssignFnForCall(CC, false, isVarArg));
@@ -1941,6 +1945,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
// Process the args.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
+ const Value *ArgVal = Args[VA.getValNo()];
unsigned Arg = ArgRegs[VA.getValNo()];
MVT ArgVT = ArgVTs[VA.getValNo()];
@@ -1967,7 +1972,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
break;
}
case CCValAssign::BCvt: {
- unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
+ unsigned BC = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
/*TODO: Kill=*/false);
assert(BC != 0 && "Failed to emit a bitcast!");
Arg = BC;
@@ -2001,6 +2006,11 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
} else {
assert(VA.isMemLoc());
// Need to store on the stack.
+
+ // Don't emit stores for undef values.
+ if (isa<UndefValue>(ArgVal))
+ continue;
+
Address Addr;
Addr.BaseType = Address::RegBase;
Addr.Base.Reg = ARM::SP;
@@ -2026,7 +2036,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
// Now the return value.
if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
// Copy all of the result registers out of their specified physreg.
@@ -2045,7 +2055,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
UsedRegs.push_back(RVLocs[1].getLocReg());
// Finally update the result.
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
} else {
assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
MVT CopyVT = RVLocs[0].getValVT();
@@ -2063,7 +2073,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
UsedRegs.push_back(RVLocs[0].getLocReg());
// Finally update the result.
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
}
}
@@ -2087,7 +2097,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
- CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext());
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */,
F.isVarArg()));
@@ -2192,7 +2202,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
// Can't handle non-double multi-reg retvals.
if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false));
if (RVLocs.size() >= 2 && RetVT != MVT::f64)
return false;
@@ -2303,7 +2313,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
RetVT != MVT::i16 && RetVT != MVT::i32) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
if (RVLocs.size() >= 2 && RetVT != MVT::f64)
return false;
@@ -2476,18 +2486,13 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
MFI->setFrameAddressIsTaken(true);
- unsigned LdrOpc;
- const TargetRegisterClass *RC;
- if (isThumb2) {
- LdrOpc = ARM::t2LDRi12;
- RC = (const TargetRegisterClass*)&ARM::tGPRRegClass;
- } else {
- LdrOpc = ARM::LDRi12;
- RC = (const TargetRegisterClass*)&ARM::GPRRegClass;
- }
+ unsigned LdrOpc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
+ const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass;
const ARMBaseRegisterInfo *RegInfo =
- static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo());
+ static_cast<const ARMBaseRegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
unsigned SrcReg = FramePtr;
@@ -2505,7 +2510,7 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
.addReg(SrcReg).addImm(0));
SrcReg = DestReg;
}
- UpdateValueMap(&I, SrcReg);
+ updateValueMap(&I, SrcReg);
return true;
}
case Intrinsic::memcpy:
@@ -2583,7 +2588,7 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) {
// Because the high bits are undefined, a truncate doesn't generate
// any code.
- UpdateValueMap(I, SrcReg);
+ updateValueMap(I, SrcReg);
return true;
}
@@ -2745,7 +2750,7 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) {
MVT DestVT = DestEVT.getSimpleVT();
unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
if (ResultReg == 0) return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -2800,12 +2805,12 @@ bool ARMFastISel::SelectShift(const Instruction *I,
}
AddOptionalDefs(MIB);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
// TODO: SoftFP support.
-bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
+bool ARMFastISel::fastSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
case Instruction::Load:
@@ -2983,7 +2988,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
return DestReg2;
}
-bool ARMFastISel::FastLowerArguments() {
+bool ARMFastISel::fastLowerArguments() {
if (!FuncInfo.CanLowerReturn)
return false;
@@ -3050,7 +3055,7 @@ bool ARMFastISel::FastLowerArguments() {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY),
ResultReg).addReg(DstReg, getKillRegState(true));
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
}
return true;
diff --git a/lib/Target/ARM/ARMFeatures.h b/lib/Target/ARM/ARMFeatures.h
index e191a3c779f1..0c910ab6130f 100644
--- a/lib/Target/ARM/ARMFeatures.h
+++ b/lib/Target/ARM/ARMFeatures.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef TARGET_ARM_FEATURES_H
-#define TARGET_ARM_FEATURES_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMFEATURES_H
+#define LLVM_LIB_TARGET_ARM_ARMFEATURES_H
#include "MCTargetDesc/ARMMCTargetDesc.h"
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 3624377c9402..45c2c30db738 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -47,7 +47,7 @@ ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
/// pointer register. This is true if the function has variable sized allocas
/// or if frame pointer elimination is disabled.
bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
// iOS requires FP not to be clobbered for backtracing purpose.
if (STI.isTargetIOS())
@@ -137,12 +137,27 @@ static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
}
static int sizeOfSPAdjustment(const MachineInstr *MI) {
- assert(MI->getOpcode() == ARM::VSTMDDB_UPD);
+ int RegSize;
+ switch (MI->getOpcode()) {
+ case ARM::VSTMDDB_UPD:
+ RegSize = 8;
+ break;
+ case ARM::STMDB_UPD:
+ case ARM::t2STMDB_UPD:
+ RegSize = 4;
+ break;
+ case ARM::t2STR_PRE:
+ case ARM::STR_PRE_IMM:
+ return 4;
+ default:
+ llvm_unreachable("Unknown push or pop like instruction");
+ }
+
int count = 0;
// ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
// pred) so the list starts at 4.
for (int i = MI->getNumOperands() - 1; i >= 4; --i)
- count += 8;
+ count += RegSize;
return count;
}
@@ -154,6 +169,110 @@ static bool WindowsRequiresStackProbe(const MachineFunction &MF,
return StackSizeInBytes >= 4096;
}
+namespace {
+struct StackAdjustingInsts {
+ struct InstInfo {
+ MachineBasicBlock::iterator I;
+ unsigned SPAdjust;
+ bool BeforeFPSet;
+ };
+
+ SmallVector<InstInfo, 4> Insts;
+
+ void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
+ bool BeforeFPSet = false) {
+ InstInfo Info = {I, SPAdjust, BeforeFPSet};
+ Insts.push_back(Info);
+ }
+
+ void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
+ auto Info = std::find_if(Insts.begin(), Insts.end(),
+ [&](InstInfo &Info) { return Info.I == I; });
+ assert(Info != Insts.end() && "invalid sp adjusting instruction");
+ Info->SPAdjust += ExtraBytes;
+ }
+
+ void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB,
+ DebugLoc dl, const ARMBaseInstrInfo &TII, bool HasFP) {
+ unsigned CFAOffset = 0;
+ for (auto &Info : Insts) {
+ if (HasFP && !Info.BeforeFPSet)
+ return;
+
+ CFAOffset -= Info.SPAdjust;
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
+ BuildMI(MBB, std::next(Info.I), dl,
+ TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ }
+};
+}
+
+/// Emit an instruction sequence that will align the address in
+/// register Reg by zero-ing out the lower bits. For versions of the
+/// architecture that support Neon, this must be done in a single
+/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
+/// single instruction. That function only gets called when optimizing
+/// spilling of D registers on a core with the Neon instruction set
+/// present.
+static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
+ const TargetInstrInfo &TII,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, const unsigned Reg,
+ const unsigned Alignment,
+ const bool MustBeSingleInstruction) {
+ const ARMSubtarget &AST = MF.getTarget().getSubtarget<ARMSubtarget>();
+ const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
+ const unsigned AlignMask = Alignment - 1;
+ const unsigned NrBitsToZero = countTrailingZeros(Alignment);
+ assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
+ if (!AFI->isThumbFunction()) {
+ // if the BFC instruction is available, use that to zero the lower
+ // bits:
+ // bfc Reg, #0, log2(Alignment)
+ // otherwise use BIC, if the mask to zero the required number of bits
+ // can be encoded in the bic immediate field
+ // bic Reg, Reg, Alignment-1
+ // otherwise, emit
+ // lsr Reg, Reg, log2(Alignment)
+ // lsl Reg, Reg, log2(Alignment)
+ if (CanUseBFC) {
+ AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(~AlignMask));
+ } else if (AlignMask <= 255) {
+ AddDefaultCC(
+ AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(AlignMask)));
+ } else {
+ assert(!MustBeSingleInstruction &&
+ "Shouldn't call emitAligningInstructions demanding a single "
+ "instruction to be emitted for large stack alignment for a target "
+ "without BFC.");
+ AddDefaultCC(AddDefaultPred(
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))));
+ AddDefaultCC(AddDefaultPred(
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))));
+ }
+ } else {
+ // Since this is only reached for Thumb-2 targets, the BFC instruction
+ // should always be available.
+ assert(CanUseBFC);
+ AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(~AlignMask));
+ }
+}
+
void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -163,20 +282,20 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
MCContext &Context = MMI.getContext();
const TargetMachine &TM = MF.getTarget();
const MCRegisterInfo *MRI = Context.getRegisterInfo();
- const ARMBaseRegisterInfo *RegInfo =
- static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo());
- const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+ const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
+ const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
assert(!AFI->isThumb1OnlyFunction() &&
"This emitPrologue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned Align = TM.getFrameLowering()->getStackAlignment();
+ unsigned Align =
+ TM.getSubtargetImpl()->getFrameLowering()->getStackAlignment();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
- int CFAOffset = 0;
// Determine the sizes of each callee-save spill areas and record which frame
// belongs to which callee-save spill areas.
@@ -189,15 +308,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
return;
+ StackAdjustingInsts DefCFAOffsetCandidates;
+
// Allocate the vararg register save area.
if (ArgRegsSaveSize) {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
MachineInstr::FrameSetup);
- CFAOffset -= ArgRegsSaveSize;
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true);
}
if (!AFI->hasStackFrame() &&
@@ -205,11 +322,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
if (NumBytes - ArgRegsSaveSize != 0) {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
MachineInstr::FrameSetup);
- CFAOffset -= NumBytes - ArgRegsSaveSize;
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ DefCFAOffsetCandidates.addInst(std::prev(MBBI),
+ NumBytes - ArgRegsSaveSize, true);
}
return;
}
@@ -252,21 +366,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
}
// Move past area 1.
- MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push,
- DPRCSPush;
- if (GPRCS1Size > 0)
+ MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
+ if (GPRCS1Size > 0) {
GPRCS1Push = LastPush = MBBI++;
+ DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
+ }
// Determine starting offsets of spill areas.
bool HasFP = hasFP(MF);
- unsigned DPRCSOffset = NumBytes - (ArgRegsSaveSize + GPRCS1Size
- + GPRCS2Size + DPRCSSize);
- unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
- unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+ unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
+ unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
+ unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
+ unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign;
+ unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
int FramePtrOffsetInPush = 0;
if (HasFP) {
- FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI)
- + GPRCS1Size + ArgRegsSaveSize;
+ FramePtrOffsetInPush =
+ MFI->getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize;
AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
NumBytes);
}
@@ -275,16 +391,32 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
// Move past area 2.
- if (GPRCS2Size > 0)
+ if (GPRCS2Size > 0) {
GPRCS2Push = LastPush = MBBI++;
+ DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
+ }
+
+ // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
+ // .cfi_offset operations will reflect that.
+ if (DPRGapSize) {
+ assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
+ if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, DPRGapSize))
+ DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
+ else {
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
+ MachineInstr::FrameSetup);
+ DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
+ }
+ }
// Move past area 3.
if (DPRCSSize > 0) {
- DPRCSPush = MBBI;
// Since vpush register list cannot have gaps, there may be multiple vpush
// instructions in the prologue.
- while (MBBI->getOpcode() == ARM::VSTMDDB_UPD)
+ while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
+ DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(MBBI));
LastPush = MBBI++;
+ }
}
// Move past the aligned DPRCS2 area.
@@ -343,18 +475,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
NumBytes = 0;
}
- unsigned adjustedGPRCS1Size = GPRCS1Size;
if (NumBytes) {
// Adjust SP after all the callee-save spills.
- if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes)) {
- if (LastPush == GPRCS1Push) {
- FramePtrOffsetInPush += NumBytes;
- adjustedGPRCS1Size += NumBytes;
- NumBytes = 0;
- }
- } else
+ if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes))
+ DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
+ else {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
MachineInstr::FrameSetup);
+ DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
+ }
if (HasFP && isARM)
// Restore from fp only in ARM mode: e.g. sub sp, r7, #24
@@ -368,13 +497,42 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setShouldRestoreSPFromFP(true);
}
- if (adjustedGPRCS1Size > 0) {
- CFAOffset -= adjustedGPRCS1Size;
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
- MachineBasicBlock::iterator Pos = ++GPRCS1Push;
- BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ // Set FP to point to the stack slot that contains the previous FP.
+ // For iOS, FP is R7, which has now been stored in spill area 1.
+ // Otherwise, if this is not iOS, all the callee-saved registers go
+ // into spill area 1, including the FP in R11. In either case, it
+ // is in area one and the adjustment needs to take place just after
+ // that push.
+ if (HasFP) {
+ MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
+ unsigned PushSize = sizeOfSPAdjustment(GPRCS1Push);
+ emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
+ dl, TII, FramePtr, ARM::SP,
+ PushSize + FramePtrOffsetInPush,
+ MachineInstr::FrameSetup);
+ if (FramePtrOffsetInPush + PushSize != 0) {
+ unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
+ nullptr, MRI->getDwarfRegNum(FramePtr, true),
+ -(ArgRegsSaveSize - FramePtrOffsetInPush)));
+ BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ } else {
+ unsigned CFIIndex =
+ MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(
+ nullptr, MRI->getDwarfRegNum(FramePtr, true)));
+ BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ }
+
+ // Now that the prologue's actual instructions are finalised, we can insert
+ // the necessary DWARF cf instructions to describe the situation. Start by
+ // recording where each register ended up:
+ if (GPRCS1Size > 0) {
+ MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
+ int CFIIndex;
for (const auto &Entry : CSI) {
unsigned Reg = Entry.getReg();
int FI = Entry.getFrameIdx();
@@ -399,47 +557,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI)));
BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
break;
}
}
}
- // Set FP to point to the stack slot that contains the previous FP.
- // For iOS, FP is R7, which has now been stored in spill area 1.
- // Otherwise, if this is not iOS, all the callee-saved registers go
- // into spill area 1, including the FP in R11. In either case, it
- // is in area one and the adjustment needs to take place just after
- // that push.
- if (HasFP) {
- emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, GPRCS1Push, dl, TII,
- FramePtr, ARM::SP, FramePtrOffsetInPush,
- MachineInstr::FrameSetup);
- if (FramePtrOffsetInPush) {
- CFAOffset += FramePtrOffsetInPush;
- unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
- nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset));
- BuildMI(MBB, GPRCS1Push, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
-
- } else {
- unsigned CFIIndex =
- MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(
- nullptr, MRI->getDwarfRegNum(FramePtr, true)));
- BuildMI(MBB, GPRCS1Push, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- }
- }
-
if (GPRCS2Size > 0) {
- MachineBasicBlock::iterator Pos = ++GPRCS2Push;
- if (!HasFP) {
- CFAOffset -= GPRCS2Size;
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
- BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- }
+ MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
for (const auto &Entry : CSI) {
unsigned Reg = Entry.getReg();
int FI = Entry.getFrameIdx();
@@ -455,7 +581,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
break;
}
@@ -465,17 +592,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
if (DPRCSSize > 0) {
// Since vpush register list cannot have gaps, there may be multiple vpush
// instructions in the prologue.
- do {
- MachineBasicBlock::iterator Push = DPRCSPush++;
- if (!HasFP) {
- CFAOffset -= sizeOfSPAdjustment(Push);
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
- BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- }
- } while (DPRCSPush->getOpcode() == ARM::VSTMDDB_UPD);
-
+ MachineBasicBlock::iterator Pos = std::next(LastPush);
for (const auto &Entry : CSI) {
unsigned Reg = Entry.getReg();
int FI = Entry.getFrameIdx();
@@ -485,21 +602,18 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned Offset = MFI->getObjectOffset(FI);
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
- BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
}
}
- if (NumBytes) {
- if (!HasFP) {
- CFAOffset -= NumBytes;
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- }
- }
+ // Now we can emit descriptions of where the canonical frame address was
+ // throughout the process. If we have a frame pointer, it takes over the job
+ // half-way through, so only the first few .cfi_def_cfa_offset instructions
+ // actually get emitted.
+ DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP);
if (STI.isTargetELF() && hasFP(MF))
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
@@ -507,6 +621,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+ AFI->setDPRCalleeSavedGapSize(DPRGapSize);
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
// If we need dynamic stack realignment, do it here. Be paranoid and make
@@ -515,28 +630,24 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
// realigned.
if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
unsigned MaxAlign = MFI->getMaxAlignment();
- assert (!AFI->isThumb1OnlyFunction());
+ assert(!AFI->isThumb1OnlyFunction());
if (!AFI->isThumbFunction()) {
- // Emit bic sp, sp, MaxAlign
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
- TII.get(ARM::BICri), ARM::SP)
- .addReg(ARM::SP, RegState::Kill)
- .addImm(MaxAlign-1)));
+ emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
+ false);
} else {
- // We cannot use sp as source/dest register here, thus we're emitting the
- // following sequence:
+ // We cannot use sp as source/dest register here, thus we're using r4 to
+ // perform the calculations. We're emitting the following sequence:
// mov r4, sp
- // bic r4, r4, MaxAlign
+ // -- use emitAligningInstructions to produce best sequence to zero
+ // -- out lower bits in r4
// mov sp, r4
// FIXME: It will be better just to find spare register here.
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
- .addReg(ARM::SP, RegState::Kill));
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
- TII.get(ARM::t2BICri), ARM::R4)
- .addReg(ARM::R4, RegState::Kill)
- .addImm(MaxAlign-1)));
+ .addReg(ARM::SP, RegState::Kill));
+ emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
+ false);
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
- .addReg(ARM::R4, RegState::Kill));
+ .addReg(ARM::R4, RegState::Kill));
}
AFI->setShouldRestoreSPFromFP(true);
@@ -574,7 +685,7 @@ void ARMFrameLowering::fixTCReturn(MachineFunction &MF,
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc dl = MBBI->getDebugLoc();
const ARMBaseInstrInfo &TII =
- *MF.getTarget().getSubtarget<ARMSubtarget>().getInstrInfo();
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri))
return;
@@ -622,14 +733,17 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
assert(!AFI->isThumb1OnlyFunction() &&
"This emitEpilogue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned Align = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -659,6 +773,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
NumBytes -= (ArgRegsSaveSize +
AFI->getGPRCalleeSavedArea1Size() +
AFI->getGPRCalleeSavedArea2Size() +
+ AFI->getDPRCalleeSavedGapSize() +
AFI->getDPRCalleeSavedAreaSize());
// Reset SP based on frame pointer only if the stack frame extends beyond
@@ -707,6 +822,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
MBBI++;
}
+ if (AFI->getDPRCalleeSavedGapSize()) {
+ assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
+ "unexpected DPR alignment gap");
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize());
+ }
+
if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
}
@@ -732,8 +853,8 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
int FI, unsigned &FrameReg,
int SPAdj) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- const ARMBaseRegisterInfo *RegInfo =
- static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
int FPOffset = Offset - AFI->getFramePtrSpillOffset();
@@ -818,7 +939,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
unsigned NumAlignedDPRCS2Regs,
unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
@@ -891,7 +1012,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
bool(*Func)(unsigned, bool),
unsigned NumAlignedDPRCS2Regs) const {
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL = MI->getDebugLoc();
unsigned RetOpcode = MI->getOpcode();
@@ -981,7 +1102,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineFrameInfo &MFI = *MF.getFrameInfo();
// Mark the D-register spill slots as properly aligned. Since MFI computes
@@ -1021,15 +1142,16 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// The immediate is <= 64, so it doesn't need any special encoding.
unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
- .addReg(ARM::SP)
- .addImm(8 * NumAlignedDPRCS2Regs)));
+ .addReg(ARM::SP)
+ .addImm(8 * NumAlignedDPRCS2Regs)));
- // bic r4, r4, #align-1
- Opc = isThumb ? ARM::t2BICri : ARM::BICri;
unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment();
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
- .addReg(ARM::R4, RegState::Kill)
- .addImm(MaxAlign - 1)));
+ // We must set parameter MustBeSingleInstruction to true, since
+ // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
+ // stack alignment. Luckily, this can always be done since all ARM
+ // architecture versions that support Neon also support the BFC
+ // instruction.
+ emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
// mov sp, r4
// The stack pointer must be adjusted before spilling anything, otherwise
@@ -1140,7 +1262,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
// Find the frame index assigned to d8.
int D8SpillFI = 0;
@@ -1355,12 +1477,15 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
return;
// Don't bother if the default stack alignment is sufficiently high.
- if (MF.getTarget().getFrameLowering()->getStackAlignment() >= 8)
+ if (MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment() >= 8)
return;
// Aligned spills require stack realignment.
- const ARMBaseRegisterInfo *RegInfo =
- static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
if (!RegInfo->canRealignStack(MF))
return;
@@ -1399,10 +1524,10 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
unsigned NumGPRSpills = 0;
SmallVector<unsigned, 4> UnspilledCS1GPRs;
SmallVector<unsigned, 4> UnspilledCS2GPRs;
- const ARMBaseRegisterInfo *RegInfo =
- static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -1565,7 +1690,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// of GPRs, spill one extra callee save GPR so we won't have to pad between
// the integer and double callee save areas.
unsigned TargetAlign = getStackAlignment();
- if (TargetAlign == 8 && (NumGPRSpills & 1)) {
+ if (TargetAlign >= 8 && (NumGPRSpills & 1)) {
if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
unsigned Reg = UnspilledCS1GPRs[i];
@@ -1643,7 +1768,7 @@ void ARMFrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
if (!hasReservedCallFrame(MF)) {
// If we have alloca, convert as follows:
// ADJCALLSTACKDOWN -> sub, sp, sp, amount
@@ -1761,7 +1886,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
MCContext &Context = MMI.getContext();
const MCRegisterInfo *MRI = Context.getRegisterInfo();
const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL;
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 74629c32b398..b7be43642ad0 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARM_FRAMEINFO_H
-#define ARM_FRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H
+#define LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H
#include "llvm/Target/TargetFrameLowering.h"
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 0885c4e5721a..0e4f81c8789e 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -46,8 +46,8 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
const MCInstrDesc &LastMCID = LastMI->getDesc();
const TargetMachine &TM =
MI->getParent()->getParent()->getTarget();
- const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+ const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
// Skip over one non-VFP / NEON instruction.
if (!LastMI->isBarrier() &&
diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h
index a8198e26703e..ccf09db69937 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/lib/Target/ARM/ARMHazardRecognizer.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMHAZARDRECOGNIZER_H
-#define ARMHAZARDRECOGNIZER_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H
+#define LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
@@ -46,4 +46,4 @@ public:
} // end namespace llvm
-#endif // ARMHAZARDRECOGNIZER_H
+#endif
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 38547cfae2e0..9621743fe307 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -425,7 +425,7 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
return true;
if (Use->isMachineOpcode()) {
const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
- CurDAG->getTarget().getInstrInfo());
+ CurDAG->getSubtarget().getInstrInfo());
const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
if (MCID.mayStore())
@@ -526,8 +526,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
if (N.getOpcode() == ISD::FrameIndex) {
// Match frame index.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
@@ -542,16 +541,15 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
}
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int RHSC = (int)RHS->getZExtValue();
+ int RHSC = (int)RHS->getSExtValue();
if (N.getOpcode() == ISD::SUB)
RHSC = -RHSC;
- if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
+ if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
@@ -697,8 +695,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
} else if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
Base = N.getOperand(0);
@@ -718,8 +715,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -896,8 +892,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
Offset = CurDAG->getRegister(0, MVT::i32);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
@@ -911,8 +906,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -957,8 +951,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
} else if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
Base = N.getOperand(0);
@@ -975,8 +968,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
ARM_AM::AddrOpc AddSub = ARM_AM::add;
@@ -1199,8 +1191,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
SDValue &Base, SDValue &OffImm) {
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
@@ -1217,8 +1208,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
@@ -1266,8 +1256,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
if (N.getOpcode() == ISD::FrameIndex) {
// Match frame index.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
@@ -1296,8 +1285,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
@@ -1326,8 +1314,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
@@ -1392,10 +1379,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
OffReg = OffReg.getOperand(0);
else {
ShAmt = 0;
- ShOpcVal = ARM_AM::no_shift;
}
- } else {
- ShOpcVal = ARM_AM::no_shift;
}
}
@@ -1425,7 +1409,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC / 4, MVT::i32);
@@ -1800,6 +1784,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2f64:
case MVT::v2i64: OpcodeIndex = 3;
assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
break;
@@ -1936,6 +1921,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2f64:
case MVT::v2i64: OpcodeIndex = 3;
assert(NumVecs == 1 && "v2i64 type only supported for VST1");
break;
@@ -2361,6 +2347,25 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
}
}
+
+ if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
+ unsigned LSB = 0;
+ if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
+ !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
+ return nullptr;
+
+ if (LSB + Width > 32)
+ return nullptr;
+
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(LSB, MVT::i32),
+ CurDAG->getTargetConstant(Width - 1, MVT::i32),
+ getAL(CurDAG), Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ }
+
return nullptr;
}
@@ -2457,10 +2462,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
if (UseCP) {
- SDValue CPIdx =
- CurDAG->getTargetConstantPool(ConstantInt::get(
- Type::getInt32Ty(*CurDAG->getContext()), Val),
- getTargetLowering()->getPointerTy());
+ SDValue CPIdx = CurDAG->getTargetConstantPool(
+ ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
+ TLI->getPointerTy());
SDNode *ResNode;
if (Subtarget->isThumb()) {
@@ -2490,12 +2494,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::FrameIndex: {
// Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- SDValue TFI = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
if (Subtarget->isThumb1Only()) {
- SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
- getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops);
+ return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
+ CurDAG->getTargetConstant(0, MVT::i32));
} else {
unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
ARM::t2ADDri : ARM::ADDri);
@@ -2509,6 +2511,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
return I;
break;
+ case ISD::SIGN_EXTEND_INREG:
case ISD::SRA:
if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
return I;
@@ -3393,7 +3396,6 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
Ops.push_back(T1.getValue(1));
CurDAG->UpdateNodeOperands(GU, Ops);
- GU = T1.getNode();
}
else {
// For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index f5baf2e4859c..f92f257cd7eb 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -70,9 +71,9 @@ namespace {
class ARMCCState : public CCState {
public:
ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
- const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs,
- LLVMContext &C, ParmContext PC)
- : CCState(CC, isVarArg, MF, TM, locs, C) {
+ SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
+ ParmContext PC)
+ : CCState(CC, isVarArg, MF, locs, C) {
assert(((PC == Call) || (PC == Prologue)) &&
"ARMCCState users must specify whether their context is call"
"or prologue generation.");
@@ -155,19 +156,11 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
}
-static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
- if (TT.isOSBinFormatMachO())
- return new TargetLoweringObjectFileMachO();
- if (TT.isOSWindows())
- return new TargetLoweringObjectFileCOFF();
- return new ARMElfTargetObjectFile();
-}
-
-ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
- : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
+ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
+ : TargetLowering(TM) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
- RegInfo = TM.getRegisterInfo();
- Itins = TM.getInstrItineraryData();
+ RegInfo = TM.getSubtargetImpl()->getRegisterInfo();
+ Itins = TM.getSubtargetImpl()->getInstrItineraryData();
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
@@ -312,6 +305,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// Conversions between floating types.
// RTABI chapter 4.1.2, Table 7
{ RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
// Integer to floating-point conversions.
@@ -387,6 +381,19 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
}
+ // The half <-> float conversion functions are always soft-float, but are
+ // needed for some targets which use a hard-float calling convention by
+ // default.
+ if (Subtarget->isAAPCS_ABI()) {
+ setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
+ } else {
+ setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
+ setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
+ setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
+ }
+
if (Subtarget->isThumb1Only())
addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
else
@@ -394,26 +401,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
!Subtarget->isThumb1Only()) {
addRegisterClass(MVT::f32, &ARM::SPRRegClass);
- if (!Subtarget->isFPOnlySP())
- addRegisterClass(MVT::f64, &ARM::DPRRegClass);
+ addRegisterClass(MVT::f64, &ARM::DPRRegClass);
}
- for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
- for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
- setTruncStoreAction((MVT::SimpleValueType)VT,
- (MVT::SimpleValueType)InnerVT, Expand);
- setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
- setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT InnerVT : MVT::vector_valuetypes()) {
+ setTruncStoreAction(VT, InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
+ }
- setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::MULHS, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
- setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
}
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
@@ -561,15 +565,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
setTargetDAGCombine(ISD::FDIV);
+ setTargetDAGCombine(ISD::LOAD);
// It is legal to extload from v4i8 to v4i16 or v4i32.
MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
MVT::v4i16, MVT::v2i16,
MVT::v2i32};
for (unsigned i = 0; i < 6; ++i) {
- setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);
- setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);
- setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);
+ for (MVT VT : MVT::integer_vector_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, VT, Tys[i], Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Tys[i], Legal);
+ setLoadExtAction(ISD::SEXTLOAD, VT, Tys[i], Legal);
+ }
}
}
@@ -577,12 +584,47 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (!Subtarget->isThumb1Only())
setTargetDAGCombine(ISD::ADDC);
+ if (Subtarget->isFPOnlySP()) {
+ // When targetting a floating-point unit with only single-precision
+ // operations, f64 is legal for the few double-precision instructions which
+ // are present However, no double-precision operations other than moves,
+ // loads and stores are provided by the hardware.
+ setOperationAction(ISD::FADD, MVT::f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FDIV, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FNEG, MVT::f64, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+ setOperationAction(ISD::FEXP, MVT::f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f64, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
+ }
computeRegisterProperties();
// ARM does not have floating-point extending loads.
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
+ for (MVT VT : MVT::fp_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
+ }
// ... or truncating stores
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -590,7 +632,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
// ARM does not have i1 sign extending load.
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ for (MVT VT : MVT::integer_valuetypes())
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
// ARM supports all 4 flavors of integer indexed load / store.
if (!Subtarget->isThumb1Only()) {
@@ -720,8 +763,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
- // the default expansion.
- if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
+ // the default expansion. If we are targeting a single threaded system,
+ // then set them all for expand so we can lower them later into their
+ // non-atomic form.
+ if (TM.Options.ThreadModel == ThreadModel::Single)
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
+ else if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
// ATOMIC_FENCE needs custom lowering; the others should have been expanded
// to ldrex/strex loops already.
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
@@ -729,7 +776,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// On v8, we have particularly efficient implementations of atomic fences
// if they can be combined with nearby atomic loads and stores.
if (!Subtarget->hasV8Ops()) {
- // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
+ // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
setInsertFencesForAtomic(true);
}
} else {
@@ -830,8 +877,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
}
- // v8 adds f64 <-> f16 conversion. Before that it should be expanded.
- if (!Subtarget->hasV8Ops()) {
+ // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
+ if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
}
@@ -847,7 +894,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (Subtarget->hasSinCos()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
- if (Subtarget->getTargetTriple().getOS() == Triple::IOS) {
+ if (Subtarget->getTargetTriple().isiOS()) {
// For iOS, we don't want to the normal expansion of a libcall to
// sincos. We want to issue a libcall to __sincos_stret.
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
@@ -855,6 +902,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
}
+ // FP-ARMv8 implements a lot of rounding-like FP operations.
+ if (Subtarget->hasFPARMv8()) {
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ if (!Subtarget->isFPOnlySP()) {
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ }
+ }
// We have target-specific dag combine patterns for the following nodes:
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
setTargetDAGCombine(ISD::ADD);
@@ -1130,7 +1194,8 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
// Load are scheduled for latency even if there instruction itinerary
// is not available.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
if (MCID.getNumDefs() == 0)
@@ -1267,8 +1332,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext(), Call);
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext(), Call);
CCInfo.AnalyzeCallResult(Ins,
CCAssignFnForNode(CallConv, /* Return*/ true,
isVarArg));
@@ -1428,8 +1493,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext(), Call);
CCInfo.AnalyzeCallOperands(Outs,
CCAssignFnForNode(CallConv, /* Return*/ false,
isVarArg));
@@ -1657,14 +1722,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
bool isStub = (isExt && Subtarget->isTargetMachO()) &&
getTargetMachine().getRelocationModel() != Reloc::Static;
- isARMFunc = !Subtarget->isThumb() || isStub;
+ isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
// ARM call to a local ARM function is predicable.
isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
// tBX takes a register source operand.
if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
- DAG.getTargetGlobalAddress(GV, dl, getPointerTy()));
+ DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
+ 0, ARMII::MO_NONLAZY));
+ Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
+ MachinePointerInfo::getGOT(), false, false, true, 0);
} else if (Subtarget->isTargetCOFF()) {
assert(Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target");
@@ -1690,7 +1758,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
isDirect = true;
bool isStub = Subtarget->isTargetMachO() &&
getTargetMachine().getRelocationModel() != Reloc::Static;
- isARMFunc = !Subtarget->isThumb() || isStub;
+ isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
// tBX takes a register source operand.
const char *Sym = S->getSymbol();
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
@@ -1751,7 +1819,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Add a register mask operand representing the call-preserved registers.
if (!isTailCall) {
const uint32_t *Mask;
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
if (isThisReturn) {
// For 'this' returns, use the R0-preserving mask if applicable
@@ -1951,17 +2020,32 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (Subtarget->isThumb1Only())
return false;
+ // Externally-defined functions with weak linkage should not be
+ // tail-called on ARM when the OS does not support dynamic
+ // pre-emption of symbols, as the AAELF spec requires normal calls
+ // to undefined weak functions to be replaced with a NOP or jump to the
+ // next instruction. The behaviour of branch instructions in this
+ // situation (as used for tail calls) is implementation-defined, so we
+ // cannot rely on the linker replacing the tail call with a return.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ const Triple TT(getTargetMachine().getTargetTriple());
+ if (GV->hasExternalWeakLinkage() &&
+ (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
+ return false;
+ }
+
// If the calling conventions do not match, then we'd better make sure the
// results are returned in the same way as what the caller expects.
if (!CCMatch) {
SmallVector<CCValAssign, 16> RVLocs1;
- ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
+ ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
+ *DAG.getContext(), Call);
CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
SmallVector<CCValAssign, 16> RVLocs2;
- ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
+ ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
+ *DAG.getContext(), Call);
CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
if (RVLocs1.size() != RVLocs2.size())
@@ -1995,8 +2079,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// Check if stack adjustment is needed. For now, do not do this if any
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
+ ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext(), Call);
CCInfo.AnalyzeCallOperands(Outs,
CCAssignFnForNode(CalleeCC, false, isVarArg));
if (CCInfo.getNextStackOffset()) {
@@ -2006,7 +2090,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// the caller's fixed stack objects.
MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
i != e;
++i, ++realArgIdx) {
@@ -2049,7 +2134,7 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
isVarArg));
}
@@ -2097,8 +2182,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slots.
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext(), Call);
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext(), Call);
// Analyze outgoing return values.
CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
@@ -2109,6 +2194,10 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
bool isLittleEndian = Subtarget->isLittle();
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ AFI->setReturnRegsCount(RVLocs.size());
+
// Copy the result values into the output registers.
for (unsigned i = 0, realRVLocIdx = 0;
i != RVLocs.size();
@@ -2227,9 +2316,15 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
if (Copies.count(UseChain.getNode()))
// Second CopyToReg
Copy = *UI;
- else
+ else {
+ // We are at the top of this chain.
+ // If the copy has a glue operand, we conservatively assume it
+ // isn't safe to perform a tail call.
+ if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
+ return false;
// First CopyToReg
TCChain = UseChain;
+ }
}
} else if (Copy->getOpcode() == ISD::BITCAST) {
// f32 returned in a single GPR.
@@ -2238,6 +2333,10 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
Copy = *Copy->use_begin();
if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
return false;
+ // If the copy has a glue operand, we conservatively assume it isn't safe to
+ // perform a tail call.
+ if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
+ return false;
TCChain = Copy->getOperand(0);
} else {
return false;
@@ -2637,7 +2736,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
- unsigned Domain = ARM_MB::ISH;
+ ARM_MB::MemBOpt Domain = ARM_MB::ISH;
if (Subtarget->isMClass()) {
// Only a full system barrier exists in the M-class architectures.
Domain = ARM_MB::SY;
@@ -2750,7 +2849,10 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
}
- unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned Align = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
ArgRegsSize = NumGPRs * 4;
// If parameter is split between stack and GPRs...
@@ -2927,8 +3029,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext(), Prologue);
CCInfo.AnalyzeFormalArguments(Ins,
CCAssignFnForNode(CallConv, /* Return*/ false,
isVarArg));
@@ -2977,7 +3079,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
}
CCInfo.rewindByValRegsInfo();
lastInsIndex = -1;
- if (isVarArg) {
+ if (isVarArg && MFI->hasVAStart()) {
unsigned ExtraArgRegsSize;
unsigned ExtraArgRegsSaveSize;
computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0,
@@ -3033,9 +3135,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
else if (RegVT == MVT::v2f64)
RC = &ARM::QPRRegClass;
else if (RegVT == MVT::i32)
- RC = AFI->isThumb1OnlyFunction() ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
+ RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass;
else
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
@@ -3119,7 +3220,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
}
// varargs
- if (isVarArg)
+ if (isVarArg && MFI->hasVAStart())
VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
CCInfo.getNextStackOffset(),
TotalArgRegsSaveSize);
@@ -3141,6 +3242,18 @@ static bool isFloatingPointZero(SDValue Op) {
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
return CFP->getValueAPF().isPosZero();
}
+ } else if (Op->getOpcode() == ISD::BITCAST &&
+ Op->getValueType(0) == MVT::f64) {
+ // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
+ // created by LowerConstantFP().
+ SDValue BitcastOp = Op->getOperand(0);
+ if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) {
+ SDValue MoveOp = BitcastOp->getOperand(0);
+ if (MoveOp->getOpcode() == ISD::TargetConstant &&
+ cast<ConstantSDNode>(MoveOp)->getZExtValue() == 0) {
+ return true;
+ }
+ }
}
return false;
}
@@ -3209,6 +3322,7 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue
ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
SDLoc dl) const {
+ assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
SDValue Cmp;
if (!isFloatingPointZero(RHS))
Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
@@ -3324,9 +3438,8 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
EVT VT = Op.getValueType();
- return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse,
- ARMcc, CCR, OverflowCmp);
-
+ return getCMOV(SDLoc(Op), VT, SelectTrue, SelectFalse, ARMcc, CCR,
+ OverflowCmp, DAG);
}
// Convert:
@@ -3360,7 +3473,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue CCR = Cond.getOperand(3);
SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
assert(True.getValueType() == VT);
- return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
+ return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
}
}
}
@@ -3430,6 +3543,32 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
}
}
+SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal,
+ SDValue TrueVal, SDValue ARMcc, SDValue CCR,
+ SDValue Cmp, SelectionDAG &DAG) const {
+ if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
+ FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
+ TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
+
+ SDValue TrueLow = TrueVal.getValue(0);
+ SDValue TrueHigh = TrueVal.getValue(1);
+ SDValue FalseLow = FalseVal.getValue(0);
+ SDValue FalseHigh = FalseVal.getValue(1);
+
+ SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
+ ARMcc, CCR, Cmp);
+ SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
+ ARMcc, CCR, duplicateCmp(Cmp, DAG));
+
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
+ } else {
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
+ Cmp);
+ }
+}
+
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
@@ -3439,6 +3578,18 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue FalseVal = Op.getOperand(3);
SDLoc dl(Op);
+ if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
+ DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
+ dl);
+
+ // If softenSetCCOperands only returned one value, we should compare it to
+ // zero.
+ if (!RHS.getNode()) {
+ RHS = DAG.getConstant(0, LHS.getValueType());
+ CC = ISD::SETNE;
+ }
+ }
+
if (LHS.getValueType() == MVT::i32) {
// Try to generate VSEL on ARMv8.
// The VSEL instruction can't use all the usual ARM condition
@@ -3463,8 +3614,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
- return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
- Cmp);
+ return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
}
ARMCC::CondCodes CondCode, CondCode2;
@@ -3479,12 +3629,18 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// select c, a, b
// We only do this in unsafe-fp-math, because signed zeros and NaNs are
// handled differently than the original code sequence.
- if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal &&
- RHS == FalseVal) {
- if (CC == ISD::SETOGT || CC == ISD::SETUGT)
- return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
- if (CC == ISD::SETOLT || CC == ISD::SETULT)
- return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+ if (getTargetMachine().Options.UnsafeFPMath) {
+ if (LHS == TrueVal && RHS == FalseVal) {
+ if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+ return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
+ if (CC == ISD::SETOLT || CC == ISD::SETULT)
+ return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+ } else if (LHS == FalseVal && RHS == TrueVal) {
+ if (CC == ISD::SETOLT || CC == ISD::SETULT)
+ return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
+ if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+ return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+ }
}
bool swpCmpOps = false;
@@ -3503,14 +3659,12 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
- ARMcc, CCR, Cmp);
+ SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
if (CondCode2 != ARMCC::AL) {
SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
// FIXME: Needs another CMP because flag can have but one use.
SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
- Result = DAG.getNode(ARMISD::CMOV, dl, VT,
- Result, TrueVal, ARMcc2, CCR, Cmp2);
+ Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
}
return Result;
}
@@ -3643,6 +3797,18 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Dest = Op.getOperand(4);
SDLoc dl(Op);
+ if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
+ DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
+ dl);
+
+ // If softenSetCCOperands only returned one value, we should compare it to
+ // zero.
+ if (!RHS.getNode()) {
+ RHS = DAG.getConstant(0, LHS.getValueType());
+ CC = ISD::SETNE;
+ }
+ }
+
if (LHS.getValueType() == MVT::i32) {
SDValue ARMcc;
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
@@ -3735,11 +3901,23 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
}
-static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorFP_TO_INT(Op, DAG);
+ if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
+ RTLIB::Libcall LC;
+ if (Op.getOpcode() == ISD::FP_TO_SINT)
+ LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
+ Op.getValueType());
+ else
+ LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
+ Op.getValueType());
+ return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
+ /*isSigned*/ false, SDLoc(Op)).first;
+ }
+
SDLoc dl(Op);
unsigned Opc;
@@ -3789,11 +3967,23 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(Opc, dl, VT, Op);
}
-static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorINT_TO_FP(Op, DAG);
+ if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
+ RTLIB::Libcall LC;
+ if (Op.getOpcode() == ISD::SINT_TO_FP)
+ LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
+ Op.getValueType());
+ else
+ LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
+ Op.getValueType());
+ return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
+ /*isSigned*/ false, SDLoc(Op)).first;
+ }
+
SDLoc dl(Op);
unsigned Opc;
@@ -4302,7 +4492,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
SDLoc dl(Op);
- if (Op.getOperand(1).getValueType().isFloatingPoint()) {
+ if (Op1.getValueType().isFloatingPoint()) {
switch (SetCCOpcode) {
default: llvm_unreachable("Illegal FP comparison");
case ISD::SETUNE:
@@ -4566,6 +4756,11 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
bool IsDouble = Op.getValueType() == MVT::f64;
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
+ // Use the default (constant pool) lowering for double constants when we have
+ // an SP-only FPU
+ if (IsDouble && Subtarget->isFPOnlySP())
+ return SDValue();
+
// Try splatting with a VMOV.f32...
APFloat FPVal = CFP->getValueAPF();
int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
@@ -5744,7 +5939,7 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
// operation legalization where we can't create illegal types.
return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
- LD->getMemoryVT(), LD->isVolatile(),
+ LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(),
LD->isNonTemporal(), LD->getAlignment());
}
@@ -6099,7 +6294,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Pair of floats / doubles used to pass the result.
- StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+ StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
// Create stack object for sret.
const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy);
@@ -6269,6 +6464,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
return LowerDYNAMIC_STACKALLOC(Op, DAG);
llvm_unreachable("Don't know how to custom lower this!");
+ case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
+ case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
}
}
@@ -6305,7 +6502,8 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
void ARMTargetLowering::
SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
MachineBasicBlock *DispatchBB, int FI) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
@@ -6322,9 +6520,8 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
- const TargetRegisterClass *TRC = isThumb ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
+ const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass;
// Grab constant pool and fixed stack memory operands.
MachineMemOperand *CPMMO =
@@ -6388,9 +6585,9 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
.addReg(NewVReg2, RegState::Kill)
.addReg(NewVReg3, RegState::Kill));
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5)
- .addFrameIndex(FI)
- .addImm(36)); // &jbuf[1] :: pc
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
+ .addFrameIndex(FI)
+ .addImm(36); // &jbuf[1] :: pc
AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
.addReg(NewVReg4, RegState::Kill)
.addReg(NewVReg5, RegState::Kill)
@@ -6420,7 +6617,8 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
MachineBasicBlock *ARMTargetLowering::
EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
@@ -6428,9 +6626,8 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
MachineFrameInfo *MFI = MF->getFrameInfo();
int FI = MFI->getFunctionContextIndex();
- const TargetRegisterClass *TRC = Subtarget->isThumb() ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRnopcRegClass;
+ const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
+ : &ARM::GPRnopcRegClass;
// Get a mapping of the call site numbers to all of the landing pads they're
// associated with.
@@ -6749,16 +6946,14 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
for (std::vector<MachineBasicBlock*>::iterator
I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
MachineBasicBlock *CurMBB = *I;
- if (SeenMBBs.insert(CurMBB))
+ if (SeenMBBs.insert(CurMBB).second)
DispContBB->addSuccessor(CurMBB);
}
// N.B. the order the invoke BBs are processed in doesn't matter here.
const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
SmallVector<MachineBasicBlock*, 64> MBBLPads;
- for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
- I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) {
- MachineBasicBlock *BB = *I;
+ for (MachineBasicBlock *BB : InvokeBBs) {
// Remove the landing pad successor from the invoke block and replace it
// with the new dispatch block.
@@ -6937,7 +7132,8 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
// This pseudo instruction has 3 operands: dst, src, size
// We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
// Otherwise, we will generate unrolled scalar copies.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = BB;
++It;
@@ -6979,14 +7175,11 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
// Select the correct opcode and register class for unit size load/store
bool IsNeon = UnitSize >= 8;
- TRC = (IsThumb1 || IsThumb2) ? (const TargetRegisterClass *)&ARM::tGPRRegClass
- : (const TargetRegisterClass *)&ARM::GPRRegClass;
+ TRC = (IsThumb1 || IsThumb2) ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
if (IsNeon)
- VecTRC = UnitSize == 16
- ? (const TargetRegisterClass *)&ARM::DPairRegClass
- : UnitSize == 8
- ? (const TargetRegisterClass *)&ARM::DPRRegClass
- : nullptr;
+ VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
+ : UnitSize == 8 ? &ARM::DPRRegClass
+ : nullptr;
unsigned BytesLeft = SizeVal % UnitSize;
unsigned LoopSize = SizeVal - BytesLeft;
@@ -7171,7 +7364,7 @@ MachineBasicBlock *
ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
MachineBasicBlock *MBB) const {
const TargetMachine &TM = getTargetMachine();
- const TargetInstrInfo &TII = *TM.getInstrInfo();
+ const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
assert(Subtarget->isTargetWindows() &&
@@ -7236,7 +7429,8 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
switch (MI->getOpcode()) {
@@ -7433,9 +7627,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineRegisterInfo &MRI = Fn->getRegInfo();
// In Thumb mode S must not be specified if source register is the SP or
// PC and if destination register is the SP, so restrict register class
- unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ?
- (const TargetRegisterClass*)&ARM::rGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass);
+ unsigned NewRsbDstReg =
+ MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
// Transfer the remainder of BB and its successor edges to sinkMBB.
SinkBB->splice(SinkBB->begin(), BB,
@@ -7489,12 +7682,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
- if (!MI->hasPostISelHook()) {
- assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
- "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
- return;
- }
-
const MCInstrDesc *MCID = &MI->getDesc();
// Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
// RSC. Coming out of isel, they have an implicit CPSR def, but the optional
@@ -7506,8 +7693,8 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// Rename pseudo opcodes.
unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
if (NewOpc) {
- const ARMBaseInstrInfo *TII =
- static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo());
+ const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
MCID = &TII->get(NewOpc);
assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
@@ -8395,7 +8582,10 @@ static SDValue PerformBFICombine(SDNode *N,
unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
unsigned LSB = countTrailingZeros(~InvMask);
unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB;
- unsigned Mask = (1 << Width)-1;
+ assert(Width <
+ static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
+ "undefined behavior");
+ unsigned Mask = (1u << Width) - 1;
unsigned Mask2 = N11C->getZExtValue();
if ((Mask & (~Mask2)) == 0)
return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
@@ -8408,10 +8598,11 @@ static SDValue PerformBFICombine(SDNode *N,
/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
/// ARMISD::VMOVRRD.
static SDValue PerformVMOVRRDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
// vmovrrd(vmovdrr x, y) -> x,y
SDValue InDouble = N->getOperand(0);
- if (InDouble.getOpcode() == ARMISD::VMOVDRR)
+ if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP())
return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
// vmovrrd(load f64) -> (load i32), (load i32)
@@ -8442,8 +8633,6 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
if (DCI.DAG.getTargetLoweringInfo().isBigEndian())
std::swap (NewLD1, NewLD2);
SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
- DCI.RemoveFromWorklist(LD);
- DAG.DeleteNode(LD);
return Result;
}
@@ -8468,147 +8657,6 @@ static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-/// PerformSTORECombine - Target-specific dag combine xforms for
-/// ISD::STORE.
-static SDValue PerformSTORECombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- StoreSDNode *St = cast<StoreSDNode>(N);
- if (St->isVolatile())
- return SDValue();
-
- // Optimize trunc store (of multiple scalars) to shuffle and store. First,
- // pack all of the elements in one place. Next, store to memory in fewer
- // chunks.
- SDValue StVal = St->getValue();
- EVT VT = StVal.getValueType();
- if (St->isTruncatingStore() && VT.isVector()) {
- SelectionDAG &DAG = DCI.DAG;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT StVT = St->getMemoryVT();
- unsigned NumElems = VT.getVectorNumElements();
- assert(StVT != VT && "Cannot truncate to the same type");
- unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
- unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
-
- // From, To sizes and ElemCount must be pow of two
- if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
-
- // We are going to use the original vector elt for storing.
- // Accumulated smaller vector elements must be a multiple of the store size.
- if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
-
- unsigned SizeRatio = FromEltSz / ToEltSz;
- assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
-
- // Create a type on which we perform the shuffle.
- EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
- NumElems*SizeRatio);
- assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
-
- SDLoc DL(St);
- SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
- SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i < NumElems; ++i)
- ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio;
-
- // Can't shuffle using an illegal type.
- if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
-
- SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
- DAG.getUNDEF(WideVec.getValueType()),
- ShuffleVec.data());
- // At this point all of the data is stored at the bottom of the
- // register. We now need to save it to mem.
-
- // Find the largest store unit
- MVT StoreType = MVT::i8;
- for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
- tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
- MVT Tp = (MVT::SimpleValueType)tp;
- if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
- StoreType = Tp;
- }
- // Didn't find a legal store type.
- if (!TLI.isTypeLegal(StoreType))
- return SDValue();
-
- // Bitcast the original vector into a vector of store-size units
- EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
- StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
- assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
- SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
- SmallVector<SDValue, 8> Chains;
- SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
- TLI.getPointerTy());
- SDValue BasePtr = St->getBasePtr();
-
- // Perform one or more big stores into memory.
- unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
- for (unsigned I = 0; I < E; I++) {
- SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- StoreType, ShuffWide,
- DAG.getIntPtrConstant(I));
- SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
- St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
- BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
- Increment);
- Chains.push_back(Ch);
- }
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
- }
-
- if (!ISD::isNormalStore(St))
- return SDValue();
-
- // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
- // ARM stores of arguments in the same cache line.
- if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
- StVal.getNode()->hasOneUse()) {
- SelectionDAG &DAG = DCI.DAG;
- bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian();
- SDLoc DL(St);
- SDValue BasePtr = St->getBasePtr();
- SDValue NewST1 = DAG.getStore(St->getChain(), DL,
- StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ),
- BasePtr, St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
-
- SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
- DAG.getConstant(4, MVT::i32));
- return DAG.getStore(NewST1.getValue(0), DL,
- StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
- OffsetPtr, St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(),
- std::min(4U, St->getAlignment() / 2));
- }
-
- if (StVal.getValueType() != MVT::i64 ||
- StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
- return SDValue();
-
- // Bitcast an i64 store extracted from a vector to f64.
- // Otherwise, the i64 value will be legalized to a pair of i32 values.
- SelectionDAG &DAG = DCI.DAG;
- SDLoc dl(StVal);
- SDValue IntVec = StVal.getOperand(0);
- EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
- IntVec.getValueType().getVectorNumElements());
- SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
- SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
- Vec, StVal.getOperand(1));
- dl = SDLoc(N);
- SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
- // Make the DAGCombiner fold the bitcasts.
- DCI.AddToWorklist(Vec.getNode());
- DCI.AddToWorklist(ExtElt.getNode());
- DCI.AddToWorklist(V.getNode());
- return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
- St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment(),
- St->getTBAAInfo());
-}
-
/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
/// are normal, non-volatile loads. If so, it is profitable to bitcast an
/// i64 vector to have f64 elements, since the value can then be loaded
@@ -8626,7 +8674,8 @@ static bool hasNormalLoadOperand(SDNode *N) {
/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
/// ISD::BUILD_VECTOR.
static SDValue PerformBUILD_VECTORCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI){
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
// build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
// VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
// into a pair of GPRs, which is fine when the value is used as a scalar,
@@ -8828,17 +8877,18 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
DAG.getUNDEF(VT), NewMask.data());
}
-/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
-/// NEON load/store intrinsics to merge base address updates.
+/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
+/// NEON load/store intrinsics, and generic vector load/stores, to merge
+/// base address updates.
+/// For generic load/stores, the memory type is assumed to be a vector.
+/// The caller is assumed to have checked legality.
static SDValue CombineBaseUpdate(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
- if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
- return SDValue();
-
SelectionDAG &DAG = DCI.DAG;
bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
- unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
+ bool isStore = N->getOpcode() == ISD::STORE;
+ unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
SDValue Addr = N->getOperand(AddrOpIdx);
// Search for a use of the address operand that is an increment.
@@ -8899,6 +8949,10 @@ static SDValue CombineBaseUpdate(SDNode *N,
case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
+ case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;
+ NumVecs = 1; isLaneOp = false; break;
+ case ISD::STORE: NewOpc = ARMISD::VST1_UPD;
+ NumVecs = 1; isLoad = false; isLaneOp = false; break;
}
}
@@ -8906,8 +8960,11 @@ static SDValue CombineBaseUpdate(SDNode *N,
EVT VecTy;
if (isLoad)
VecTy = N->getValueType(0);
- else
+ else if (isIntrinsic)
VecTy = N->getOperand(AddrOpIdx+1).getValueType();
+ else
+ VecTy = N->getOperand(1).getValueType();
+
unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
if (isLaneOp)
NumBytes /= VecTy.getVectorNumElements();
@@ -8924,25 +8981,70 @@ static SDValue CombineBaseUpdate(SDNode *N,
continue;
}
+ EVT AlignedVecTy = VecTy;
+
+ // If this is a less-than-standard-aligned load/store, change the type to
+ // match the standard alignment.
+ // The alignment is overlooked when selecting _UPD variants; and it's
+ // easier to introduce bitcasts here than fix that.
+ // There are 3 ways to get to this base-update combine:
+ // - intrinsics: they are assumed to be properly aligned (to the standard
+ // alignment of the memory type), so we don't need to do anything.
+ // - ARMISD::VLDx nodes: they are only generated from the aforementioned
+ // intrinsics, so, likewise, there's nothing to do.
+ // - generic load/store instructions: the alignment is specified as an
+ // explicit operand, rather than implicitly as the standard alignment
+ // of the memory type (like the intrisics). We need to change the
+ // memory type to match the explicit alignment. That way, we don't
+ // generate non-standard-aligned ARMISD::VLDx nodes.
+ if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N)) {
+ unsigned Alignment = LSN->getAlignment();
+ if (Alignment == 0)
+ Alignment = 1;
+ if (Alignment < VecTy.getScalarSizeInBits() / 8) {
+ MVT EltTy = MVT::getIntegerVT(Alignment * 8);
+ assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
+ assert(!isLaneOp && "Unexpected generic load/store lane.");
+ unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
+ AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
+ }
+ }
+
// Create the new updating load/store node.
+ // First, create an SDVTList for the new updating node's results.
EVT Tys[6];
unsigned NumResultVecs = (isLoad ? NumVecs : 0);
unsigned n;
for (n = 0; n < NumResultVecs; ++n)
- Tys[n] = VecTy;
+ Tys[n] = AlignedVecTy;
Tys[n++] = MVT::i32;
Tys[n] = MVT::Other;
- SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumResultVecs+2));
+ SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
+
+ // Then, gather the new node's operands.
SmallVector<SDValue, 8> Ops;
Ops.push_back(N->getOperand(0)); // incoming chain
Ops.push_back(N->getOperand(AddrOpIdx));
Ops.push_back(Inc);
- for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
- Ops.push_back(N->getOperand(i));
+ if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
+ // Try to match the intrinsic's signature
+ Ops.push_back(StN->getValue());
+ Ops.push_back(DAG.getConstant(StN->getAlignment(), MVT::i32));
+ } else {
+ for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i)
+ Ops.push_back(N->getOperand(i));
}
- MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
+
+ // If this is a non-standard-aligned store, the penultimate operand is the
+ // stored value. Bitcast it to the aligned type.
+ if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
+ SDValue &StVal = Ops[Ops.size()-2];
+ StVal = DAG.getNode(ISD::BITCAST, SDLoc(N), AlignedVecTy, StVal);
+ }
+
+ MemSDNode *MemInt = cast<MemSDNode>(N);
SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
- Ops, MemInt->getMemoryVT(),
+ Ops, AlignedVecTy,
MemInt->getMemOperand());
// Update the uses.
@@ -8950,6 +9052,14 @@ static SDValue CombineBaseUpdate(SDNode *N,
for (unsigned i = 0; i < NumResultVecs; ++i) {
NewResults.push_back(SDValue(UpdN.getNode(), i));
}
+
+ // If this is an non-standard-aligned load, the first result is the loaded
+ // value. Bitcast it to the expected result type.
+ if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
+ SDValue &LdVal = NewResults[0];
+ LdVal = DAG.getNode(ISD::BITCAST, SDLoc(N), VecTy, LdVal);
+ }
+
NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
DCI.CombineTo(N, NewResults);
DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
@@ -8959,6 +9069,14 @@ static SDValue CombineBaseUpdate(SDNode *N,
return SDValue();
}
+static SDValue PerformVLDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ return CombineBaseUpdate(N, DCI);
+}
+
/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
@@ -9011,7 +9129,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
for (n = 0; n < NumVecs; ++n)
Tys[n] = VT;
Tys[n] = MVT::Other;
- SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumVecs+1));
+ SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1));
SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
@@ -9072,6 +9190,164 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
}
+static SDValue PerformLOADCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+
+ // If this is a legal vector load, try to combine it into a VLD1_UPD.
+ if (ISD::isNormalLoad(N) && VT.isVector() &&
+ DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return CombineBaseUpdate(N, DCI);
+
+ return SDValue();
+}
+
+/// PerformSTORECombine - Target-specific dag combine xforms for
+/// ISD::STORE.
+static SDValue PerformSTORECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ if (St->isVolatile())
+ return SDValue();
+
+ // Optimize trunc store (of multiple scalars) to shuffle and store. First,
+ // pack all of the elements in one place. Next, store to memory in fewer
+ // chunks.
+ SDValue StVal = St->getValue();
+ EVT VT = StVal.getValueType();
+ if (St->isTruncatingStore() && VT.isVector()) {
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT StVT = St->getMemoryVT();
+ unsigned NumElems = VT.getVectorNumElements();
+ assert(StVT != VT && "Cannot truncate to the same type");
+ unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
+ unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
+
+ // From, To sizes and ElemCount must be pow of two
+ if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
+
+ // We are going to use the original vector elt for storing.
+ // Accumulated smaller vector elements must be a multiple of the store size.
+ if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
+
+ unsigned SizeRatio = FromEltSz / ToEltSz;
+ assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
+
+ // Create a type on which we perform the shuffle.
+ EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
+ NumElems*SizeRatio);
+ assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ SDLoc DL(St);
+ SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i < NumElems; ++i)
+ ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio;
+
+ // Can't shuffle using an illegal type.
+ if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
+ DAG.getUNDEF(WideVec.getValueType()),
+ ShuffleVec.data());
+ // At this point all of the data is stored at the bottom of the
+ // register. We now need to save it to mem.
+
+ // Find the largest store unit
+ MVT StoreType = MVT::i8;
+ for (MVT Tp : MVT::integer_valuetypes()) {
+ if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
+ StoreType = Tp;
+ }
+ // Didn't find a legal store type.
+ if (!TLI.isTypeLegal(StoreType))
+ return SDValue();
+
+ // Bitcast the original vector into a vector of store-size units
+ EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
+ StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
+ assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
+ SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
+ SmallVector<SDValue, 8> Chains;
+ SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
+ TLI.getPointerTy());
+ SDValue BasePtr = St->getBasePtr();
+
+ // Perform one or more big stores into memory.
+ unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
+ for (unsigned I = 0; I < E; I++) {
+ SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ StoreType, ShuffWide,
+ DAG.getIntPtrConstant(I));
+ SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+ BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
+ Increment);
+ Chains.push_back(Ch);
+ }
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ }
+
+ if (!ISD::isNormalStore(St))
+ return SDValue();
+
+ // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
+ // ARM stores of arguments in the same cache line.
+ if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
+ StVal.getNode()->hasOneUse()) {
+ SelectionDAG &DAG = DCI.DAG;
+ bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian();
+ SDLoc DL(St);
+ SDValue BasePtr = St->getBasePtr();
+ SDValue NewST1 = DAG.getStore(St->getChain(), DL,
+ StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ),
+ BasePtr, St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+
+ SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(4, MVT::i32));
+ return DAG.getStore(NewST1.getValue(0), DL,
+ StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
+ OffsetPtr, St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(),
+ std::min(4U, St->getAlignment() / 2));
+ }
+
+ if (StVal.getValueType() == MVT::i64 &&
+ StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+
+ // Bitcast an i64 store extracted from a vector to f64.
+ // Otherwise, the i64 value will be legalized to a pair of i32 values.
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(StVal);
+ SDValue IntVec = StVal.getOperand(0);
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+ IntVec.getValueType().getVectorNumElements());
+ SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
+ SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ Vec, StVal.getOperand(1));
+ dl = SDLoc(N);
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(Vec.getNode());
+ DCI.AddToWorklist(ExtElt.getNode());
+ DCI.AddToWorklist(V.getNode());
+ return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment(),
+ St->getAAInfo());
+ }
+
+ // If this is a legal vector store, try to combine it into a VST1_UPD.
+ if (ISD::isNormalStore(N) && VT.isVector() &&
+ DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return CombineBaseUpdate(N, DCI);
+
+ return SDValue();
+}
+
// isConstVecPow2 - Return true if each vector element is a power of 2, all
// elements are the same constant, C, and Log2(C) ranges from 1 to 32.
static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
@@ -9128,16 +9404,18 @@ static SDValue PerformVCVTCombine(SDNode *N,
MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
- if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) {
+ unsigned NumLanes = Op.getValueType().getVectorNumElements();
+ if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32 ||
+ NumLanes > 4) {
// These instructions only exist converting from f32 to i32. We can handle
// smaller integers by generating an extra truncate, but larger ones would
- // be lossy.
+ // be lossy. We also can't handle more then 4 lanes, since these intructions
+ // only support v2i32/v4i32 types.
return SDValue();
}
unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
Intrinsic::arm_neon_vcvtfp2fxu;
- unsigned NumLanes = Op.getValueType().getVectorNumElements();
SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
@@ -9641,10 +9919,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
case ARMISD::BFI: return PerformBFICombine(N, DCI);
- case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
+ case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
case ISD::STORE: return PerformSTORECombine(N, DCI);
- case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI);
+ case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
@@ -9660,10 +9938,11 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
+ case ISD::LOAD: return PerformLOADCombine(N, DCI);
case ARMISD::VLD2DUP:
case ARMISD::VLD3DUP:
case ARMISD::VLD4DUP:
- return CombineBaseUpdate(N, DCI);
+ return PerformVLDCombine(N, DCI);
case ARMISD::BUILD_VECTOR:
return PerformARMBUILD_VECTORCombine(N, DCI);
case ISD::INTRINSIC_VOID:
@@ -9683,7 +9962,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane:
- return CombineBaseUpdate(N, DCI);
+ return PerformVLDCombine(N, DCI);
default: break;
}
break;
@@ -9696,8 +9975,10 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
}
-bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, unsigned,
- bool *Fast) const {
+bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned,
+ unsigned,
+ bool *Fast) const {
// The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
@@ -9751,11 +10032,12 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
bool Fast;
if (Size >= 16 &&
(memOpAlign(SrcAlign, DstAlign, 16) ||
- (allowsUnalignedMemoryAccesses(MVT::v2f64, 0, &Fast) && Fast))) {
+ (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) {
return MVT::v2f64;
} else if (Size >= 8 &&
(memOpAlign(SrcAlign, DstAlign, 8) ||
- (allowsUnalignedMemoryAccesses(MVT::f64, 0, &Fast) && Fast))) {
+ (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) &&
+ Fast))) {
return MVT::f64;
}
}
@@ -10358,6 +10640,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return RCPair(0U, &ARM::hGPRRegClass);
break;
case 'r':
+ if (Subtarget->isThumb1Only())
+ return RCPair(0U, &ARM::tGPRRegClass);
return RCPair(0U, &ARM::GPRRegClass);
case 'w':
if (VT == MVT::Other)
@@ -10562,7 +10846,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only");
unsigned Opcode = Op->getOpcode();
assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
- "Invalid opcode for Div/Rem lowering");
+ "Invalid opcode for Div/Rem lowering");
bool isSigned = (Opcode == ISD::SDIVREM);
EVT VT = Op->getValueType(0);
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
@@ -10570,10 +10854,10 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
RTLIB::Libcall LC;
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
- case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
- case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
- case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
- case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
}
SDValue InChain = DAG.getEntryNode();
@@ -10593,7 +10877,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
getPointerTy());
- Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL);
+ Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr);
SDLoc dl(Op);
TargetLowering::CallLoweringInfo CLI(DAG);
@@ -10631,6 +10915,31 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
return DAG.getMergeValues(Ops, DL);
}
+SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() &&
+ "Unexpected type for custom-lowering FP_EXTEND");
+
+ RTLIB::Libcall LC;
+ LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ SDValue SrcVal = Op.getOperand(0);
+ return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
+ /*isSigned*/ false, SDLoc(Op)).first;
+}
+
+SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getOperand(0).getValueType() == MVT::f64 &&
+ Subtarget->isFPOnlySP() &&
+ "Unexpected type for custom-lowering FP_ROUND");
+
+ RTLIB::Libcall LC;
+ LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ SDValue SrcVal = Op.getOperand(0);
+ return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
+ /*isSigned*/ false, SDLoc(Op)).first;
+}
+
bool
ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The ARM target isn't yet aware of offsets.
@@ -10658,7 +10967,7 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
return false;
if (VT == MVT::f32)
return ARM_AM::getFP32Imm(Imm) != -1;
- if (VT == MVT::f64)
+ if (VT == MVT::f64 && !Subtarget->isFPOnlySP())
return ARM_AM::getFP64Imm(Imm) != -1;
return false;
}
@@ -10785,31 +11094,154 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return true;
}
-bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
- // Loads and stores less than 64-bits are already atomic; ones above that
- // are doomed anyway, so defer to the default libcall and blame the OS when
- // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
- // anything for those.
- bool IsMClass = Subtarget->isMClass();
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
- return Size == 64 && !IsMClass;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- return LI->getType()->getPrimitiveSizeInBits() == 64 && !IsMClass;
+bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; }
+
+Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
+ ARM_MB::MemBOpt Domain) const {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+
+ // First, if the target has no DMB, see what fallback we can use.
+ if (!Subtarget->hasDataBarrier()) {
+ // Some ARMv6 cpus can support data barriers with an mcr instruction.
+ // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
+ // here.
+ if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
+ Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
+ Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
+ Builder.getInt32(0), Builder.getInt32(7),
+ Builder.getInt32(10), Builder.getInt32(5)};
+ return Builder.CreateCall(MCR, args);
+ } else {
+ // Instead of using barriers, atomic accesses on these subtargets use
+ // libcalls.
+ llvm_unreachable("makeDMB on a target so old that it has no barriers");
+ }
+ } else {
+ Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
+ // Only a full system barrier exists in the M-class architectures.
+ Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
+ Constant *CDomain = Builder.getInt32(Domain);
+ return Builder.CreateCall(DMB, CDomain);
+ }
+}
+
+// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord, bool IsStore,
+ bool IsLoad) const {
+ if (!getInsertFencesForAtomic())
+ return nullptr;
+
+ switch (Ord) {
+ case NotAtomic:
+ case Unordered:
+ llvm_unreachable("Invalid fence: unordered/non-atomic");
+ case Monotonic:
+ case Acquire:
+ return nullptr; // Nothing to do
+ case SequentiallyConsistent:
+ if (!IsStore)
+ return nullptr; // Nothing to do
+ /*FALLTHROUGH*/
+ case Release:
+ case AcquireRelease:
+ if (Subtarget->isSwift())
+ return makeDMB(Builder, ARM_MB::ISHST);
+ // FIXME: add a comment with a link to documentation justifying this.
+ else
+ return makeDMB(Builder, ARM_MB::ISH);
+ }
+ llvm_unreachable("Unknown fence ordering in emitLeadingFence");
+}
+
+Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord, bool IsStore,
+ bool IsLoad) const {
+ if (!getInsertFencesForAtomic())
+ return nullptr;
+
+ switch (Ord) {
+ case NotAtomic:
+ case Unordered:
+ llvm_unreachable("Invalid fence: unordered/not-atomic");
+ case Monotonic:
+ case Release:
+ return nullptr; // Nothing to do
+ case Acquire:
+ case AcquireRelease:
+ case SequentiallyConsistent:
+ return makeDMB(Builder, ARM_MB::ISH);
}
+ llvm_unreachable("Unknown fence ordering in emitTrailingFence");
+}
+
+// Loads and stores less than 64-bits are already atomic; ones above that
+// are doomed anyway, so defer to the default libcall and blame the OS when
+// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
+// anything for those.
+bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+ unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
+ return (Size == 64) && !Subtarget->isMClass();
+}
+
+// Loads and stores less than 64-bits are already atomic; ones above that
+// are doomed anyway, so defer to the default libcall and blame the OS when
+// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
+// anything for those.
+// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
+// guarantee, see DDI0406C ARM architecture reference manual,
+// sections A8.8.72-74 LDRD)
+bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+ unsigned Size = LI->getType()->getPrimitiveSizeInBits();
+ return (Size == 64) && !Subtarget->isMClass();
+}
+
+// For the real atomic operations, we have ldrex/strex up to 32 bits,
+// and up to 64 bits on the non-M profiles
+bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+ unsigned Size = AI->getType()->getPrimitiveSizeInBits();
+ return Size <= (Subtarget->isMClass() ? 32U : 64U);
+}
+
+// This has so far only been implemented for MachO.
+bool ARMTargetLowering::useLoadStackGuardNode() const {
+ return Subtarget->isTargetMachO();
+}
+
+bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
+ unsigned &Cost) const {
+ // If we do not have NEON, vector types are not natively supported.
+ if (!Subtarget->hasNEON())
+ return false;
+
+ // Floating point values and vector values map to the same register file.
+ // Therefore, althought we could do a store extract of a vector type, this is
+ // better to leave at float as we have more freedom in the addressing mode for
+ // those.
+ if (VectorTy->isFPOrFPVectorTy())
+ return false;
+
+ // If the index is unknown at compile time, this is very expensive to lower
+ // and it is not possible to combine the store with the extract.
+ if (!isa<ConstantInt>(Idx))
+ return false;
- // For the real atomic operations, we have ldrex/strex up to 32 bits,
- // and up to 64 bits on the non-M profiles
- unsigned AtomicLimit = IsMClass ? 32 : 64;
- return Inst->getType()->getPrimitiveSizeInBits() <= AtomicLimit;
+ assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
+ unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+ // We can do a store + vector extract on any vector that fits perfectly in a D
+ // or Q register.
+ if (BitWidth == 64 || BitWidth == 128) {
+ Cost = 0;
+ return true;
+ }
+ return false;
}
Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
- bool IsAcquire =
- Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+ bool IsAcquire = isAtLeastAcquire(Ord);
// Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
// intrinsic must return {i32, i32} and we have to recombine them into a
@@ -10845,8 +11277,7 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- bool IsRelease =
- Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+ bool IsRelease = isAtLeastRelease(Ord);
// Since the intrinsics must have legal type, the i64 intrinsics take two
// parameters: "i32, i32". We must marshal Val into the appropriate form
@@ -10944,6 +11375,6 @@ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
HABaseType Base = HA_UNKNOWN;
uint64_t Members = 0;
bool result = isHomogeneousAggregate(Ty, Base, Members);
- DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n");
+ DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump());
return result;
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 1ace0f330f1a..89b0c31ac52e 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMISELLOWERING_H
-#define ARMISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
+#define LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
@@ -232,7 +232,7 @@ namespace llvm {
class ARMTargetLowering : public TargetLowering {
public:
- explicit ARMTargetLowering(TargetMachine &TM);
+ explicit ARMTargetLowering(const TargetMachine &TM);
unsigned getJumpTableEncoding() const override;
@@ -266,11 +266,12 @@ namespace llvm {
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override;
- /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+ /// allowsMisalignedMemoryAccesses - Returns true if the target allows
/// unaligned memory accesses of the specified type. Returns whether it
/// is "fast" by reference in the second argument.
- bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
- bool *Fast) const override;
+ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
+ unsigned Align,
+ bool *Fast) const override;
EVT getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
@@ -391,12 +392,26 @@ namespace llvm {
bool functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
+ bool hasLoadLinkedStoreConditional() const override;
+ Instruction *makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const;
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const override;
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr, AtomicOrdering Ord) const override;
- bool shouldExpandAtomicInIR(Instruction *Inst) const override;
+ Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const override;
+ Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const override;
+
+ bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+ bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
+ bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+
+ bool useLoadStackGuardNode() const override;
+
+ bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
+ unsigned &Cost) const override;
protected:
std::pair<const TargetRegisterClass*, uint8_t>
@@ -473,6 +488,10 @@ namespace llvm {
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
unsigned getRegisterByName(const char* RegName, EVT VT) const override;
@@ -562,6 +581,9 @@ namespace llvm {
bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+ SDValue getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
+ SDValue ARMcc, SDValue CCR, SDValue Cmp,
+ SelectionDAG &DAG) const;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG, SDLoc dl) const;
SDValue getVFPCmp(SDValue LHS, SDValue RHS,
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 59e9260b81ea..7d27cf3fcdcb 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -203,6 +203,16 @@ def msr_mask : Operand<i32> {
let ParserMatchClass = MSRMaskOperand;
}
+def BankedRegOperand : AsmOperandClass {
+ let Name = "BankedReg";
+ let ParserMethod = "parseBankedRegOperand";
+}
+def banked_reg : Operand<i32> {
+ let PrintMethod = "printBankedRegOperand";
+ let DecoderMethod = "DecodeBankedReg";
+ let ParserMatchClass = BankedRegOperand;
+}
+
// Shift Right Immediate - A shift right immediate is encoded differently from
// other shift immediates. The imm6 field is encoded like so:
//
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index f235ac225848..17d1ffaa9ff0 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -90,6 +90,49 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
+void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ const ARMSubtarget &Subtarget = MF.getTarget().getSubtarget<ARMSubtarget>();
+
+ if (!Subtarget.useMovt(MF)) {
+ if (RM == Reloc::PIC_)
+ expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12, RM);
+ else
+ expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_abs, ARM::LDRi12, RM);
+ return;
+ }
+
+ if (RM != Reloc::PIC_) {
+ expandLoadStackGuardBase(MI, ARM::MOVi32imm, ARM::LDRi12, RM);
+ return;
+ }
+
+ const GlobalValue *GV =
+ cast<GlobalValue>((*MI->memoperands_begin())->getValue());
+
+ if (!Subtarget.GVIsIndirectSymbol(GV, RM)) {
+ expandLoadStackGuardBase(MI, ARM::MOV_ga_pcrel, ARM::LDRi12, RM);
+ return;
+ }
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Reg = MI->getOperand(0).getReg();
+ MachineInstrBuilder MIB;
+
+ MIB = BuildMI(MBB, MI, DL, get(ARM::MOV_ga_pcrel_ldr), Reg)
+ .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
+ unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+ MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
+ MachinePointerInfo::getGOT(), Flag, 4, 4);
+ MIB.addMemOperand(MMO);
+ MIB = BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg);
+ MIB.addReg(Reg, RegState::Kill).addImm(0);
+ MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ AddDefaultPred(MIB);
+}
+
namespace {
/// ARMCGBR - Create Global Base Reg pass. This initializes the PIC
/// global base register for ARM ELF.
@@ -113,8 +156,9 @@ namespace {
ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(
*Context, "_GLOBAL_OFFSET_TABLE_", ARMPCLabelIndex, PCAdj);
- unsigned Align = TM->getDataLayout()
- ->getPrefTypeAlignment(Type::getInt32PtrTy(*Context));
+ unsigned Align =
+ TM->getSubtargetImpl()->getDataLayout()->getPrefTypeAlignment(
+ Type::getInt32PtrTy(*Context));
unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align);
MachineBasicBlock &FirstMBB = MF.front();
@@ -124,7 +168,7 @@ namespace {
MF.getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ?
ARM::t2LDRpci : ARM::LDRcp;
- const TargetInstrInfo &TII = *TM->getInstrInfo();
+ const TargetInstrInfo &TII = *TM->getSubtargetImpl()->getInstrInfo();
MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL,
TII.get(Opc), TempReg)
.addConstantPoolIndex(Idx);
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index b09958aaa950..90f34ea08401 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMINSTRUCTIONINFO_H
-#define ARMINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMINSTRINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMINSTRINFO_H
#include "ARMBaseInstrInfo.h"
#include "ARMRegisterInfo.h"
@@ -37,6 +37,10 @@ public:
/// always be able to get register info as well (through this method).
///
const ARMRegisterInfo &getRegisterInfo() const override { return RI; }
+
+private:
+ void expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const override;
};
}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index f0e145a394b2..126c5529c152 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -241,6 +241,9 @@ def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
def HasMP : Predicate<"Subtarget->hasMPExtension()">,
AssemblerPredicate<"FeatureMP",
"mp-extensions">;
+def HasVirtualization: Predicate<"false">,
+ AssemblerPredicate<"FeatureVirtualization",
+ "virtualization-extensions">;
def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">,
AssemblerPredicate<"FeatureTrustZone",
"TrustZone">;
@@ -260,8 +263,6 @@ def IsNotMClass : Predicate<"!Subtarget->isMClass()">,
"!armv*m">;
def IsARM : Predicate<"!Subtarget->isThumb()">,
AssemblerPredicate<"!ModeThumb", "arm-mode">;
-def IsIOS : Predicate<"Subtarget->isTargetIOS()">;
-def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">;
def IsMachO : Predicate<"Subtarget->isTargetMachO()">;
def IsNotMachO : Predicate<"!Subtarget->isTargetMachO()">;
def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
@@ -330,24 +331,6 @@ def imm16_31 : ImmLeaf<i32, [{
return (int32_t)Imm >= 16 && (int32_t)Imm < 32;
}]>;
-def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; }
-def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
- unsigned Value = -(unsigned)N->getZExtValue();
- return Value && ARM_AM::getSOImmVal(Value) != -1;
- }], imm_neg_XFORM> {
- let ParserMatchClass = so_imm_neg_asmoperand;
-}
-
-// Note: this pattern doesn't require an encoder method and such, as it's
-// only used on aliases (Pat<> and InstAlias<>). The actual encoding
-// is handled by the destination instructions, which use so_imm.
-def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; }
-def so_imm_not : Operand<i32>, PatLeaf<(imm), [{
- return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
- }], imm_not_XFORM> {
- let ParserMatchClass = so_imm_not_asmoperand;
-}
-
// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
def sext_16_node : PatLeaf<(i32 GPR:$a), [{
return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
@@ -527,7 +510,7 @@ def shift_imm : Operand<i32> {
let ParserMatchClass = ShifterImmAsmOperand;
}
-// shifter_operand operands: so_reg_reg, so_reg_imm, and so_imm.
+// shifter_operand operands: so_reg_reg, so_reg_imm, and mod_imm.
def ShiftedRegAsmOperand : AsmOperandClass { let Name = "RegShiftedReg"; }
def so_reg_reg : Operand<i32>, // reg reg imm
ComplexPattern<i32, 3, "SelectRegShifterOperand",
@@ -572,27 +555,43 @@ def shift_so_reg_imm : Operand<i32>, // reg reg imm
let MIOperandInfo = (ops GPR, i32imm);
}
-
-// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
-// 8-bit immediate rotated by an arbitrary number of bits.
-def SOImmAsmOperand: ImmAsmOperand { let Name = "ARMSOImm"; }
-def so_imm : Operand<i32>, ImmLeaf<i32, [{
+// mod_imm: match a 32-bit immediate operand, which can be encoded into
+// a 12-bit immediate; an 8-bit integer and a 4-bit rotator (See ARMARM
+// - "Modified Immediate Constants"). Within the MC layer we keep this
+// immediate in its encoded form.
+def ModImmAsmOperand: AsmOperandClass {
+ let Name = "ModImm";
+ let ParserMethod = "parseModImm";
+}
+def mod_imm : Operand<i32>, ImmLeaf<i32, [{
return ARM_AM::getSOImmVal(Imm) != -1;
}]> {
- let EncoderMethod = "getSOImmOpValue";
- let ParserMatchClass = SOImmAsmOperand;
- let DecoderMethod = "DecodeSOImmOperand";
+ let EncoderMethod = "getModImmOpValue";
+ let PrintMethod = "printModImmOperand";
+ let ParserMatchClass = ModImmAsmOperand;
}
-// Break so_imm's up into two pieces. This handles immediates with up to 16
-// bits set in them. This uses so_imm2part to match and so_imm2part_[12] to
-// get the first/second pieces.
-def so_imm2part : PatLeaf<(imm), [{
- return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
-}]>;
+// Note: the patterns mod_imm_not and mod_imm_neg do not require an encoder
+// method and such, as they are only used on aliases (Pat<> and InstAlias<>).
+// The actual parsing, encoding, decoding are handled by the destination
+// instructions, which use mod_imm.
-/// arm_i32imm - True for +V6T2, or true only if so_imm2part is true.
-///
+def ModImmNotAsmOperand : AsmOperandClass { let Name = "ModImmNot"; }
+def mod_imm_not : Operand<i32>, PatLeaf<(imm), [{
+ return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
+ }], imm_not_XFORM> {
+ let ParserMatchClass = ModImmNotAsmOperand;
+}
+
+def ModImmNegAsmOperand : AsmOperandClass { let Name = "ModImmNeg"; }
+def mod_imm_neg : Operand<i32>, PatLeaf<(imm), [{
+ unsigned Value = -(unsigned)N->getZExtValue();
+ return Value && ARM_AM::getSOImmVal(Value) != -1;
+ }], imm_neg_XFORM> {
+ let ParserMatchClass = ModImmNegAsmOperand;
+}
+
+/// arm_i32imm - True for +V6T2, or when isSOImmTwoParVal()
def arm_i32imm : PatLeaf<(imm), [{
if (Subtarget->useMovt(*MF))
return true;
@@ -633,6 +632,8 @@ def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> {
let ParserMatchClass = Imm32AsmOperand;
}
+def imm8_or_16 : ImmLeaf<i32, [{ return Imm == 8 || Imm == 16;}]>;
+
/// imm1_7 predicate - Immediate in the range [1,7].
def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; }
def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> {
@@ -1199,7 +1200,7 @@ include "ARMInstrFormats.td"
// Multiclass helpers...
//
-/// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
+/// AsI1_bin_irs - Defines a set of (op r, {mod_imm|r|so_reg}) patterns for a
/// binop that produces a value.
let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AsI1_bin_irs<bits<4> opcod, string opc,
@@ -1208,9 +1209,9 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
// The register-immediate version is re-materializable. This is useful
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
- def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm), DPFrm,
iii, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
+ [(set GPR:$Rd, (opnode GPR:$Rn, mod_imm:$imm))]>,
Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
@@ -1281,9 +1282,9 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
// The register-immediate version is re-materializable. This is useful
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
- def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm), DPFrm,
iii, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]>,
+ [(set GPR:$Rd, (opnode mod_imm:$imm, GPR:$Rn))]>,
Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
@@ -1351,9 +1352,9 @@ let hasPostISelHook = 1, Defs = [CPSR] in {
multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
InstrItinClass iis, PatFrag opnode,
bit Commutable = 0> {
- def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
+ def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p),
4, iii,
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, mod_imm:$imm))]>,
Sched<[WriteALU, ReadALU]>;
def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p),
@@ -1384,9 +1385,9 @@ let hasPostISelHook = 1, Defs = [CPSR] in {
multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
InstrItinClass iis, PatFrag opnode,
bit Commutable = 0> {
- def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
+ def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p),
4, iii,
- [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>,
+ [(set GPR:$Rd, CPSR, (opnode mod_imm:$imm, GPR:$Rn))]>,
Sched<[WriteALU, ReadALU]>;
def rsi : ARMPseudoInst<(outs GPR:$Rd),
@@ -1405,16 +1406,16 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
}
}
-/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// AI1_cmp_irs - Defines a set of (op r, {mod_imm|r|so_reg}) cmp / test
/// patterns. Similar to AsI1_bin_irs except the instruction does not produce
/// a explicit result, only implicitly set CPSR.
let isCompare = 1, Defs = [CPSR] in {
multiclass AI1_cmp_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
PatFrag opnode, bit Commutable = 0> {
- def ri : AI1<opcod, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii,
+ def ri : AI1<opcod, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, iii,
opc, "\t$Rn, $imm",
- [(opnode GPR:$Rn, so_imm:$imm)]>,
+ [(opnode GPR:$Rn, mod_imm:$imm)]>,
Sched<[WriteCMP, ReadALU]> {
bits<4> Rn;
bits<12> imm;
@@ -1542,9 +1543,9 @@ let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
- def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm),
DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm, CPSR))]>,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, mod_imm:$imm, CPSR))]>,
Requires<[IsARM]>,
Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
@@ -1612,9 +1613,9 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
- def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm),
DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn, CPSR))]>,
+ [(set GPR:$Rd, CPSR, (opnode mod_imm:$imm, GPR:$Rn, CPSR))]>,
Requires<[IsARM]>,
Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
@@ -1808,7 +1809,7 @@ multiclass AI_str1nopc<bit isByte, string opc, InstrItinClass iii,
/// the function. The first operand is the ID# for this instruction, the second
/// is the index into the MachineConstantPool that this is, the third is the
/// size in bytes of this constant pool entry.
-let neverHasSideEffects = 1, isNotDuplicable = 1 in
+let hasSideEffects = 0, isNotDuplicable = 1 in
def CONSTPOOL_ENTRY :
PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
i32imm:$size), NoItinerary, []>;
@@ -1961,7 +1962,7 @@ def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary,
}
def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
- []>, Requires<[IsARM, HasV7]> {
+ [(int_arm_dbg imm0_15:$opt)]>, Requires<[IsARM, HasV7]> {
bits<4> opt;
let Inst{27-4} = 0b001100100000111100001111;
let Inst{3-0} = opt;
@@ -2052,7 +2053,7 @@ def PICSTRB : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
// LEApcrel - Load a pc-relative address into a register without offending the
// assembler.
-let neverHasSideEffects = 1, isReMaterializable = 1 in
+let hasSideEffects = 0, isReMaterializable = 1 in
// The 'adr' mnemonic encodes differently if the label is before or after
// the instruction. The {24-21} opcode bits are set by the fixup, as we don't
// know until then which form of the instruction will be used.
@@ -2382,6 +2383,33 @@ def RFEIB_UPD : RFEI<1, "rfeib\t$Rn!"> {
let Inst{24-23} = 0b11;
}
+// Hypervisor Call is a system instruction
+let isCall = 1 in {
+def HVC : AInoP< (outs), (ins imm0_65535:$imm), BrFrm, NoItinerary,
+ "hvc", "\t$imm", []>,
+ Requires<[IsARM, HasVirtualization]> {
+ bits<16> imm;
+
+ // Even though HVC isn't predicable, it's encoding includes a condition field.
+ // The instruction is undefined if the condition field is 0xf otherwise it is
+ // unpredictable if it isn't condition AL (0xe).
+ let Inst{31-28} = 0b1110;
+ let Unpredictable{31-28} = 0b1111;
+ let Inst{27-24} = 0b0001;
+ let Inst{23-20} = 0b0100;
+ let Inst{19-8} = imm{15-4};
+ let Inst{7-4} = 0b0111;
+ let Inst{3-0} = imm{3-0};
+}
+}
+
+// Return from exception in Hypervisor mode.
+let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in
+def ERET : ABI<0b0001, (outs), (ins), NoItinerary, "eret", "", []>,
+ Requires<[IsARM, HasVirtualization]> {
+ let Inst{23-0} = 0b011000000000000001101110;
+}
+
//===----------------------------------------------------------------------===//
// Load / Store Instructions.
//
@@ -2399,7 +2427,7 @@ defm STRB : AI_str1nopc<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si,
BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
// Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
+let canFoldAsLoad = 1, mayLoad = 1, hasSideEffects = 0,
isReMaterializable = 1, isCodeGenOnly = 1 in
def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr",
@@ -2426,7 +2454,7 @@ def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr",
[(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>;
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode3:$addr),
LdMiscFrm, IIC_iLoad_d_r, "ldrd", "\t$Rt, $Rt2, $addr", []>,
@@ -2503,7 +2531,7 @@ multiclass AI2_ldridx<bit isByte, string opc,
}
-let mayLoad = 1, neverHasSideEffects = 1 in {
+let mayLoad = 1, hasSideEffects = 0 in {
// FIXME: for LDR_PRE_REG etc. the itineray should be either IIC_iLoad_ru or
// IIC_iLoad_siu depending on whether it the offset register is shifted.
defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_iu, IIC_iLoad_ru>;
@@ -2539,7 +2567,7 @@ multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> {
}
}
-let mayLoad = 1, neverHasSideEffects = 1 in {
+let mayLoad = 1, hasSideEffects = 0 in {
defm LDRH : AI3_ldridx<0b1011, "ldrh", IIC_iLoad_bh_ru>;
defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>;
defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>;
@@ -2572,10 +2600,10 @@ def LDRD_POST: AI3ldstidx<0b1101, 0, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
let DecoderMethod = "DecodeAddrMode3Instruction";
}
} // hasExtraDefRegAllocReq = 1
-} // mayLoad = 1, neverHasSideEffects = 1
+} // mayLoad = 1, hasSideEffects = 0
// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT.
-let mayLoad = 1, neverHasSideEffects = 1 in {
+let mayLoad = 1, hasSideEffects = 0 in {
def LDRT_POST_REG : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$addr, am2offset_reg:$offset),
IndexModePost, LdFrm, IIC_iLoad_ru,
@@ -2694,7 +2722,7 @@ def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm,
[(truncstorei16 GPR:$Rt, addrmode3:$addr)]>;
// Store doubleword
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr),
StMiscFrm, IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", []>,
Requires<[IsARM, HasV5TE]> {
@@ -2767,7 +2795,7 @@ multiclass AI2_stridx<bit isByte, string opc,
}
}
-let mayStore = 1, neverHasSideEffects = 1 in {
+let mayStore = 1, hasSideEffects = 0 in {
// FIXME: for STR_PRE_REG etc. the itineray should be either IIC_iStore_ru or
// IIC_iStore_siu depending on whether it the offset register is shifted.
defm STR : AI2_stridx<0, "str", IIC_iStore_iu, IIC_iStore_ru>;
@@ -2830,7 +2858,8 @@ def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb),
(ins GPR:$Rt, addrmode3_pre:$addr), IndexModePre,
StMiscFrm, IIC_iStore_bh_ru,
- "strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ "strh", "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
bits<14> addr;
let Inst{23} = addr{8}; // U bit
let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
@@ -2843,7 +2872,8 @@ def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb),
def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb),
(ins GPR:$Rt, addr_offset_none:$addr, am3offset:$offset),
IndexModePost, StMiscFrm, IIC_iStore_bh_ru,
- "strh", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb",
+ "strh", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt,
addr_offset_none:$addr,
am3offset:$offset))]> {
@@ -2857,7 +2887,7 @@ def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb),
let DecoderMethod = "DecodeAddrMode3Instruction";
}
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb),
(ins GPR:$Rt, GPR:$Rt2, addrmode3_pre:$addr),
IndexModePre, StMiscFrm, IIC_iStore_d_ru,
@@ -2887,7 +2917,7 @@ def STRD_POST: AI3ldstidx<0b1111, 0, 0, (outs GPR:$Rn_wb),
let Inst{3-0} = offset{3-0}; // imm3_0/Rm
let DecoderMethod = "DecodeAddrMode3Instruction";
}
-} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
// STRT, STRBT, and STRHT
@@ -2931,7 +2961,7 @@ def STRBT_POST
: ARMAsmPseudo<"strbt${q} $Rt, $addr",
(ins GPR:$Rt, addr_offset_none:$addr, pred:$q)>;
-let mayStore = 1, neverHasSideEffects = 1 in {
+let mayStore = 1, hasSideEffects = 0 in {
def STRT_POST_REG : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb),
(ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
IndexModePost, StFrm, IIC_iStore_ru,
@@ -3096,17 +3126,18 @@ multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f,
}
}
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m,
- IIC_iLoad_mu>;
+ IIC_iLoad_mu>, ComplexDeprecationPredicate<"ARMLoad">;
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
defm STM : arm_ldst_mult<"stm", "", 0, 0, LdStMulFrm, IIC_iStore_m,
- IIC_iStore_mu>;
+ IIC_iStore_mu>,
+ ComplexDeprecationPredicate<"ARMStore">;
-} // neverHasSideEffects
+} // hasSideEffects
// FIXME: remove when we have a way to marking a MI with these properties.
// FIXME: Should pc be an implicit operand like PICADD, etc?
@@ -3132,7 +3163,7 @@ defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m,
// Move Instructions.
//
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
"mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> {
bits<4> Rd;
@@ -3146,7 +3177,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
}
// A version for the smaller set of tail call registers.
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> {
bits<4> Rd;
@@ -3190,8 +3221,8 @@ def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src),
}
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
-def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi,
- "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP,
+def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins mod_imm:$imm), DPFrm, IIC_iMOVi,
+ "mov", "\t$Rd, $imm", [(set GPR:$Rd, mod_imm:$imm)]>, UnaryDP,
Sched<[WriteALU]> {
bits<4> Rd;
bits<12> imm;
@@ -3401,10 +3432,10 @@ defm RSC : AI1_rsc_irs<0b0111, "rsc",
// assume opposite meanings of the carry flag (i.e., carry == !borrow).
// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
// details.
-def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
- (SUBri GPR:$src, so_imm_neg:$imm)>;
-def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm),
- (SUBSri GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(add GPR:$src, mod_imm_neg:$imm),
+ (SUBri GPR:$src, mod_imm_neg:$imm)>;
+def : ARMPat<(ARMaddc GPR:$src, mod_imm_neg:$imm),
+ (SUBSri GPR:$src, mod_imm_neg:$imm)>;
def : ARMPat<(add GPR:$src, imm0_65535_neg:$imm),
(SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>,
@@ -3416,10 +3447,11 @@ def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm),
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
-def : ARMPat<(ARMadde GPR:$src, so_imm_not:$imm, CPSR),
- (SBCri GPR:$src, so_imm_not:$imm)>;
+def : ARMPat<(ARMadde GPR:$src, mod_imm_not:$imm, CPSR),
+ (SBCri GPR:$src, mod_imm_not:$imm)>;
def : ARMPat<(ARMadde GPR:$src, imm0_65535_neg:$imm, CPSR),
- (SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>;
+ (SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>,
+ Requires<[IsARM, HasV6T2]>;
// Note: These are implemented in C++ code, because they have to generate
// ADD/SUBrs instructions, which use a complex pattern that a xform function
@@ -3697,9 +3729,9 @@ def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift),
let Inst{3-0} = shift{3-0};
}
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
-def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm,
+def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins mod_imm:$imm), DPFrm,
IIC_iMVNi, "mvn", "\t$Rd, $imm",
- [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP, Sched<[WriteALU]> {
+ [(set GPR:$Rd, mod_imm_not:$imm)]>,UnaryDP, Sched<[WriteALU]> {
bits<4> Rd;
bits<12> imm;
let Inst{25} = 1;
@@ -3708,8 +3740,8 @@ def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm,
let Inst{11-0} = imm;
}
-def : ARMPat<(and GPR:$src, so_imm_not:$imm),
- (BICri GPR:$src, so_imm_not:$imm)>;
+def : ARMPat<(and GPR:$src, mod_imm_not:$imm),
+ (BICri GPR:$src, mod_imm_not:$imm)>;
//===----------------------------------------------------------------------===//
// Multiply Instructions.
@@ -3803,7 +3835,7 @@ def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
}
// Extra precision multiplies with low / high results
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let isCommutable = 1 in {
def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
@@ -3870,7 +3902,7 @@ def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
Requires<[IsARM, NoV6]>;
}
-} // neverHasSideEffects
+} // hasSideEffects
// Most significant word multiply
def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
@@ -3934,14 +3966,12 @@ multiclass AI_smul<string opc, PatFrag opnode> {
def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (sra (opnode GPR:$Rn,
- (sext_inreg GPR:$Rm, i16)), (i32 16)))]>,
+ []>,
Requires<[IsARM, HasV5TE]>;
def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (sra (opnode GPR:$Rn,
- (sra GPR:$Rm, (i32 16))), (i32 16)))]>,
+ []>,
Requires<[IsARM, HasV5TE]>;
}
@@ -3983,17 +4013,13 @@ multiclass AI_smla<string opc, PatFrag opnode> {
def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPRnopc:$Rd,
- (add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
- (sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>,
+ []>,
Requires<[IsARM, HasV5TE, UseMulOps]>;
def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPRnopc:$Rd,
- (add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
- (sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>,
+ []>,
Requires<[IsARM, HasV5TE, UseMulOps]>;
}
}
@@ -4113,7 +4139,7 @@ def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
// Misc. Arithmetic Instructions.
//
-def CLZ : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
+def CLZ : AMiscA1I<0b00010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "clz", "\t$Rd, $Rm",
[(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>,
Sched<[WriteALU]>;
@@ -4240,8 +4266,8 @@ defm CMP : AI1_cmp_irs<0b1010, "cmp",
BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
// ARMcmpZ can re-use the above instruction definitions.
-def : ARMPat<(ARMcmpZ GPR:$src, so_imm:$imm),
- (CMPri GPR:$src, so_imm:$imm)>;
+def : ARMPat<(ARMcmpZ GPR:$src, mod_imm:$imm),
+ (CMPri GPR:$src, mod_imm:$imm)>;
def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs),
(CMPrr GPR:$src, GPR:$rhs)>;
def : ARMPat<(ARMcmpZ GPR:$src, so_reg_imm:$rhs),
@@ -4251,9 +4277,9 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs),
// CMN register-integer
let isCompare = 1, Defs = [CPSR] in {
-def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iCMPi,
+def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, IIC_iCMPi,
"cmn", "\t$Rn, $imm",
- [(ARMcmn GPR:$Rn, so_imm:$imm)]>,
+ [(ARMcmn GPR:$Rn, mod_imm:$imm)]>,
Sched<[WriteCMP, ReadALU]> {
bits<4> Rn;
bits<12> imm;
@@ -4326,11 +4352,11 @@ def CMNzrsr : AI1<0b1011, (outs),
}
-def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
- (CMNri GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(ARMcmp GPR:$src, mod_imm_neg:$imm),
+ (CMNri GPR:$src, mod_imm_neg:$imm)>;
-def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
- (CMNri GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(ARMcmpZ GPR:$src, mod_imm_neg:$imm),
+ (CMNri GPR:$src, mod_imm_neg:$imm)>;
// Note that TST/TEQ don't set all the same flags that CMP does!
defm TST : AI1_cmp_irs<0b1000, "tst",
@@ -4357,7 +4383,7 @@ def BCCZi64 : PseudoInst<(outs),
// Conditional moves
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let isCommutable = 1, isSelect = 1 in
def MOVCCr : ARMPseudoInst<(outs GPR:$Rd),
@@ -4394,9 +4420,9 @@ def MOVCCi16
let isMoveImm = 1 in
def MOVCCi : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_imm:$imm, cmovpred:$p),
+ (ins GPR:$false, mod_imm:$imm, cmovpred:$p),
4, IIC_iCMOVi,
- [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm:$imm,
cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
@@ -4412,13 +4438,13 @@ def MOVCCi32imm
let isMoveImm = 1 in
def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_imm:$imm, cmovpred:$p),
+ (ins GPR:$false, mod_imm:$imm, cmovpred:$p),
4, IIC_iCMOVi,
- [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm_not:$imm,
cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
-} // neverHasSideEffects
+} // hasSideEffects
//===----------------------------------------------------------------------===//
@@ -4631,7 +4657,7 @@ def : ARMPat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr),
class acquiring_load<PatFrag base>
: PatFrag<(ops node:$ptr), (base node:$ptr), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- return Ordering == Acquire || Ordering == SequentiallyConsistent;
+ return isAtLeastAcquire(Ordering);
}]>;
def atomic_load_acquire_8 : acquiring_load<atomic_load_8>;
@@ -4641,7 +4667,7 @@ def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
class releasing_store<PatFrag base>
: PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- return Ordering == Release || Ordering == SequentiallyConsistent;
+ return isAtLeastRelease(Ordering);
}]>;
def atomic_store_release_8 : releasing_store<atomic_store_8>;
@@ -5062,12 +5088,31 @@ def MRSsys : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary,
let Unpredictable{11-0} = 0b110100001111;
}
+// However, the MRS (banked register) system instruction (ARMv7VE) *does* have a
+// separate encoding (distinguished by bit 5.
+def MRSbanked : ABI<0b0001, (outs GPRnopc:$Rd), (ins banked_reg:$banked),
+ NoItinerary, "mrs", "\t$Rd, $banked", []>,
+ Requires<[IsARM, HasVirtualization]> {
+ bits<6> banked;
+ bits<4> Rd;
+
+ let Inst{23} = 0;
+ let Inst{22} = banked{5}; // R bit
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = banked{3-0};
+ let Inst{15-12} = Rd;
+ let Inst{11-9} = 0b001;
+ let Inst{8} = banked{4};
+ let Inst{7-0} = 0b00000000;
+}
+
// Move from ARM core register to Special Register
//
-// No need to have both system and application versions, the encodings are the
-// same and the assembly parser has no way to distinguish between them. The mask
-// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
-// the mask with the fields to be accessed in the special register.
+// No need to have both system and application versions of MSR (immediate) or
+// MSR (register), the encodings are the same and the assembly parser has no way
+// to distinguish between them. The mask operand contains the special register
+// (R Bit) in bit 4 and bits 3-0 contains the mask with the fields to be
+// accessed in the special register.
def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
"msr", "\t$mask, $Rn", []> {
bits<5> mask;
@@ -5082,17 +5127,36 @@ def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
let Inst{3-0} = Rn;
}
-def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary,
- "msr", "\t$mask, $a", []> {
+def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, mod_imm:$imm), NoItinerary,
+ "msr", "\t$mask, $imm", []> {
bits<5> mask;
- bits<12> a;
+ bits<12> imm;
let Inst{23} = 0;
let Inst{22} = mask{4}; // R bit
let Inst{21-20} = 0b10;
let Inst{19-16} = mask{3-0};
let Inst{15-12} = 0b1111;
- let Inst{11-0} = a;
+ let Inst{11-0} = imm;
+}
+
+// However, the MSR (banked register) system instruction (ARMv7VE) *does* have a
+// separate encoding (distinguished by bit 5.
+def MSRbanked : ABI<0b0001, (outs), (ins banked_reg:$banked, GPRnopc:$Rn),
+ NoItinerary, "msr", "\t$banked, $Rn", []>,
+ Requires<[IsARM, HasVirtualization]> {
+ bits<6> banked;
+ bits<4> Rn;
+
+ let Inst{23} = 0;
+ let Inst{22} = banked{5}; // R bit
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = banked{3-0};
+ let Inst{15-12} = 0b1111;
+ let Inst{11-9} = 0b001;
+ let Inst{8} = banked{4};
+ let Inst{7-4} = 0b0000;
+ let Inst{3-0} = Rn;
}
// Dynamic stack allocation yields a _chkstk for Windows targets. These calls
@@ -5164,7 +5228,7 @@ let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
NoItinerary,
[(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
- Requires<[IsARM, IsIOS]>;
+ Requires<[IsARM]>;
}
// eh.sjlj.dispatchsetup pseudo-instruction.
@@ -5188,7 +5252,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in
// Large immediate handling.
-// 32-bit immediate using two piece so_imms or movw + movt.
+// 32-bit immediate using two piece mod_imms or movw + movt.
// This is a single pseudo instruction, the benefit is that it can be remat'd
// as a single unit instead of having to handle reg inputs.
// FIXME: Remove this when we can do generalized remat.
@@ -5217,6 +5281,7 @@ def LDRLIT_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
(ARMWrapperPIC tglobaladdr:$addr))]>,
Requires<[IsARM, DontUseMovt]>;
+let AddedComplexity = 10 in
def LDRLIT_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
NoItinerary,
[(set GPR:$dst,
@@ -5280,11 +5345,6 @@ def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)),
(SMULTB GPR:$a, GPR:$b)>;
def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
(SMULTB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
- (i32 16)),
- (SMULWB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)),
- (SMULWB GPR:$a, GPR:$b)>;
def : ARMV5MOPat<(add GPR:$acc,
(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
@@ -5307,13 +5367,6 @@ def : ARMV5MOPat<(add GPR:$acc,
def : ARMV5MOPat<(add GPR:$acc,
(mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
(SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5MOPat<(add GPR:$acc,
- (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
- (i32 16))),
- (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5MOPat<(add GPR:$acc,
- (sra (mul GPR:$a, sext_16_node:$b), (i32 16))),
- (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
// Pre-v7 uses MCR for synchronization barriers.
@@ -5491,36 +5544,36 @@ def : MnemonicAlias<"uqsubaddx", "uqsax">;
// USAX == USUBADDX
def : MnemonicAlias<"usubaddx", "usax">;
-// "mov Rd, so_imm_not" can be handled via "mvn" in assembly, just like
+// "mov Rd, mod_imm_not" can be handled via "mvn" in assembly, just like
// for isel.
def : ARMInstAlias<"mov${s}${p} $Rd, $imm",
- (MVNi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+ (MVNi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
def : ARMInstAlias<"mvn${s}${p} $Rd, $imm",
- (MOVi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+ (MOVi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
// Same for AND <--> BIC
def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm",
- (ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ (ANDri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : ARMInstAlias<"bic${s}${p} $Rdn, $imm",
- (ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ (ANDri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm",
- (BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ (BICri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : ARMInstAlias<"and${s}${p} $Rdn, $imm",
- (BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ (BICri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
-// Likewise, "add Rd, so_imm_neg" -> sub
+// Likewise, "add Rd, mod_imm_neg" -> sub
def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm",
- (SUBri GPR:$Rd, GPR:$Rn, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
+ (SUBri GPR:$Rd, GPR:$Rn, mod_imm_neg:$imm, pred:$p, cc_out:$s)>;
def : ARMInstAlias<"add${s}${p} $Rd, $imm",
- (SUBri GPR:$Rd, GPR:$Rd, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
-// Same for CMP <--> CMN via so_imm_neg
+ (SUBri GPR:$Rd, GPR:$Rd, mod_imm_neg:$imm, pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via mod_imm_neg
def : ARMInstAlias<"cmp${p} $Rd, $imm",
- (CMNri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
+ (CMNri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>;
def : ARMInstAlias<"cmn${p} $Rd, $imm",
- (CMPri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
+ (CMPri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>;
// The shifter forms of the MOV instruction are aliased to the ASR, LSL,
// LSR, ROR, and RRX instructions.
@@ -5593,3 +5646,8 @@ def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
// is discarded.
def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>,
ComplexDeprecationPredicate<"IT">;
+
+let mayLoad = 1, mayStore =1, hasSideEffects = 1 in
+def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
+ NoItinerary,
+ [(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index c02bb3b55f5b..2a7b4b57fd08 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -34,6 +34,14 @@ def nImmSplatI32 : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
let ParserMatchClass = nImmSplatI32AsmOperand;
}
+def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; }
+def nImmSplatNotI16 : Operand<i32> {
+ let ParserMatchClass = nImmSplatNotI16AsmOperand;
+}
+def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; }
+def nImmSplatNotI32 : Operand<i32> {
+ let ParserMatchClass = nImmSplatNotI32AsmOperand;
+}
def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
def nImmVMOVI32 : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
@@ -657,7 +665,7 @@ class VLDQQQQWBPseudo<InstrItinClass itin>
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
"$addr.addr = $wb, $src = $dst">;
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// VLD1 : Vector Load (multiple single elements)
class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
@@ -1015,7 +1023,7 @@ def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
-} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
// Classes for VLD*LN pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
@@ -1098,7 +1106,7 @@ def : Pat<(vector_insert (v4f32 QPR:$src),
(f32 (load addrmode6:$addr)), imm:$lane),
(VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// ...with address register writeback:
class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1351,7 +1359,7 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
-} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
// VLD1DUP : Vector Load (single element to all lanes)
class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
@@ -1397,7 +1405,7 @@ def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load,
def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
(VLD1DUPq32 addrmode6:$addr)>;
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// ...with address register writeback:
multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
@@ -1601,9 +1609,9 @@ def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
-} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
// Classes for VST* pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
@@ -2017,7 +2025,7 @@ def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
-} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
// Classes for VST*LN pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
@@ -2121,7 +2129,7 @@ def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
// VST2LN : Vector Store (single 2-element structure from one lane)
class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -2343,7 +2351,7 @@ def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
-} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
// Use vld1/vst1 for unaligned f64 load / store
def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
@@ -4376,7 +4384,7 @@ defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
"vqdmlsl", "s", null_frag>;
-defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>;
+defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>;
def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
@@ -5429,7 +5437,7 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
[(set GPR:$R, (extractelt (v2i32 DPR:$V),
imm:$lane))]>,
- Requires<[HasNEON, HasFastVGETLNi32]> {
+ Requires<[HasVFP2, HasFastVGETLNi32]> {
let Inst{21} = lane{0};
}
// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
@@ -5497,8 +5505,12 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
(ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
[(set DPR:$V, (insertelt (v2i32 DPR:$src1),
- GPR:$R, imm:$lane))]> {
+ GPR:$R, imm:$lane))]>,
+ Requires<[HasVFP2]> {
let Inst{21} = lane{0};
+ // This instruction is equivalent as
+ // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm)
+ let isInsertSubreg = 1;
}
}
def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
@@ -6635,6 +6647,16 @@ defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
(VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
(VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+// ... immediates
+def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
+ (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
+def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
+ (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
+def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
+ (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
+def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
+ (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
+
// VLD1 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index e17f73af03ec..cc953c637cb4 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -311,7 +311,7 @@ def tHLT : T1I<(outs), (ins imm0_63:$val), NoItinerary, "hlt\t$val",
}
def tSETEND : T1I<(outs), (ins setend_op:$end), NoItinerary, "setend\t$end",
- []>, T1Encoding<0b101101>, Deprecated<HasV8Ops> {
+ []>, T1Encoding<0b101101>, Requires<[IsNotMClass]>, Deprecated<HasV8Ops> {
bits<1> end;
// A8.6.156
let Inst{9-5} = 0b10010;
@@ -360,6 +360,14 @@ def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm),
let DecoderMethod = "DecodeThumbAddSpecialReg";
}
+// Thumb1 frame lowering is rather fragile, we hope to be able to use
+// tADDrSPi, but we may need to insert a sequence that clobbers CPSR.
+def tADDframe : PseudoInst<(outs tGPR:$dst), (ins i32imm:$base, i32imm:$offset),
+ NoItinerary, []>,
+ Requires<[IsThumb, IsThumb1Only]> {
+ let Defs = [CPSR];
+}
+
// ADD sp, sp, #<imm7>
def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
IIC_iALUi, "add", "\t$Rdn, $imm", []>,
@@ -466,7 +474,7 @@ let isCall = 1,
(outs), (ins pred:$p, t_blxtarget:$func), IIC_Br,
"blx${p}\t$func",
[(ARMcall tglobaladdr:$func)]>,
- Requires<[IsThumb, HasV5T]>, Sched<[WriteBrL]> {
+ Requires<[IsThumb, HasV5T, IsNotMClass]>, Sched<[WriteBrL]> {
bits<24> func;
let Inst{26} = func{23};
let Inst{25-16} = func{20-11};
@@ -706,7 +714,7 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
//
// These require base address to be written back or one of the loaded regs.
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
@@ -746,7 +754,7 @@ def tSTMIA_UPD : Thumb1I<(outs GPR:$wb),
let Inst{7-0} = regs;
}
-} // neverHasSideEffects
+} // hasSideEffects
def : InstAlias<"ldm${p} $Rn!, $regs",
(tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)>,
@@ -880,7 +888,7 @@ def tADDrr : // A8.6.6 T1
"add", "\t$Rd, $Rn, $Rm",
[(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
"add", "\t$Rdn, $Rm", []>,
T1Special<{0,0,?,?}>, Sched<[WriteALU]> {
@@ -1040,7 +1048,7 @@ def : tInstAlias <"movs $Rdn, $imm",
// A7-73: MOV(2) - mov setting flag.
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone,
2, IIC_iMOVr,
"mov", "\t$Rd, $Rm", "", []>,
@@ -1062,7 +1070,7 @@ def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
let Inst{5-3} = Rm;
let Inst{2-0} = Rd;
}
-} // neverHasSideEffects
+} // hasSideEffects
// Multiply register
let isCommutable = 1 in
@@ -1240,7 +1248,7 @@ def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p),
let DecoderMethod = "DecodeThumbAddSpecialReg";
}
-let neverHasSideEffects = 1, isReMaterializable = 1 in
+let hasSideEffects = 0, isReMaterializable = 1 in
def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p),
2, IIC_iALUi, []>, Sched<[WriteALU]>;
@@ -1289,7 +1297,7 @@ def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
AddrModeNone, 0, IndexModeNone,
Pseudo, NoItinerary, "", "",
[(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
- Requires<[IsThumb, IsIOS]>;
+ Requires<[IsThumb]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -1355,7 +1363,7 @@ def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>,
Requires<[IsThumb]>;
def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>,
- Requires<[IsThumb, HasV5T]>;
+ Requires<[IsThumb, HasV5T, IsNotMClass]>;
// Indirect calls to ARM routines
def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>,
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 85e93516807b..5e41ea1c294d 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1241,7 +1241,7 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
let DecoderMethod = "DecodeT2Adr";
}
-let neverHasSideEffects = 1, isReMaterializable = 1 in
+let hasSideEffects = 0, isReMaterializable = 1 in
def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>;
let hasSideEffects = 1 in
@@ -1262,22 +1262,22 @@ defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR,
// Loads with zero extension
defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- GPR, UnOpFrag<(zextloadi16 node:$Src)>>;
+ GPRnopc, UnOpFrag<(zextloadi16 node:$Src)>>;
defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- GPR, UnOpFrag<(zextloadi8 node:$Src)>>;
+ GPRnopc, UnOpFrag<(zextloadi8 node:$Src)>>;
// Loads with sign extension
defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- GPR, UnOpFrag<(sextloadi16 node:$Src)>>;
+ GPRnopc, UnOpFrag<(sextloadi16 node:$Src)>>;
defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- GPR, UnOpFrag<(sextloadi8 node:$Src)>>;
+ GPRnopc, UnOpFrag<(sextloadi8 node:$Src)>>;
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
(ins t2addrmode_imm8s4:$addr),
IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>;
-} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
// zextload i1 -> zextload i8
def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr),
@@ -1326,7 +1326,7 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
// Indexed loads
-let mayLoad = 1, neverHasSideEffects = 1 in {
+let mayLoad = 1, hasSideEffects = 0 in {
def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_iu,
@@ -1378,7 +1378,7 @@ def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
"ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
-} // mayLoad = 1, neverHasSideEffects = 1
+} // mayLoad = 1, hasSideEffects = 0
// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110).
// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
@@ -1443,14 +1443,14 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
rGPR, BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
// Store doubleword
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
+let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
(ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>;
// Indexed stores
-let mayStore = 1, neverHasSideEffects = 1 in {
+let mayStore = 1, hasSideEffects = 0 in {
def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb),
(ins GPRnopc:$Rt, t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
@@ -1468,7 +1468,7 @@ def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb),
AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
"strb", "\t$Rt, $addr!",
"$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>;
-} // mayStore = 1, neverHasSideEffects = 1
+} // mayStore = 1, hasSideEffects = 0
def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
(ins GPRnopc:$Rt, addr_offset_none:$Rn,
@@ -1763,7 +1763,7 @@ multiclass thumb2_ld_mult<string asm, InstrItinClass itin,
}
}
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
defm t2LDM : thumb2_ld_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>;
@@ -1848,14 +1848,14 @@ multiclass thumb2_st_mult<string asm, InstrItinClass itin,
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
-} // neverHasSideEffects
+} // hasSideEffects
//===----------------------------------------------------------------------===//
// Move Instructions.
//
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr,
"mov", ".w\t$Rd, $Rm", []>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
@@ -1973,6 +1973,16 @@ def t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">;
+// A simple right-shift can also be used in most cases (the exception is the
+// SXTH operations with a rotate of 24: there the non-contiguous bits are
+// relevant).
+def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, rot_imm:$rot), i8)),
+ (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot), i16)),
+ (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
// Zero extenders
let AddedComplexity = 16 in {
@@ -1999,8 +2009,16 @@ def t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
def t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">;
+
+def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), 0xFF)),
+ (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)),
+ (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
}
+
//===----------------------------------------------------------------------===//
// Arithmetic Instructions.
//
@@ -2554,7 +2572,7 @@ def t2MLS: T2FourReg<
}
// Extra precision multiplies with low / high results
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let isCommutable = 1 in {
def t2SMULL : T2MulLong<0b000, 0b0000,
(outs rGPR:$RdLo, rGPR:$RdHi),
@@ -2585,7 +2603,7 @@ def t2UMAAL : T2MulLong<0b110, 0b0110,
(ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
"umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
Requires<[IsThumb2, HasThumb2DSP]>;
-} // neverHasSideEffects
+} // hasSideEffects
// Rounding variants of the below included for disassembly only
@@ -2708,8 +2726,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
!strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
- (sext_inreg rGPR:$Rm, i16)), (i32 16)))]>,
+ []>,
Requires<[IsThumb2, HasThumb2DSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -2721,8 +2738,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
!strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
- (sra rGPR:$Rm, (i32 16))), (i32 16)))]>,
+ []>,
Requires<[IsThumb2, HasThumb2DSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -2791,8 +2807,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
def WB : T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
!strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
- (sext_inreg rGPR:$Rm, i16)), (i32 16))))]>,
+ []>,
Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -2804,8 +2819,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
def WT : T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
!strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
- (sra rGPR:$Rm, (i32 16))), (i32 16))))]>,
+ []>,
Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -3136,7 +3150,7 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
// Conditional moves
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let isCommutable = 1, isSelect = 1 in
def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
@@ -3199,7 +3213,7 @@ def t2MOVCCi32imm
RegConstraint<"$false = $dst">;
} // isCodeGenOnly = 1
-} // neverHasSideEffects
+} // hasSideEffects
//===----------------------------------------------------------------------===//
// Atomic operations intrinsics
@@ -3291,7 +3305,8 @@ def t2LDREXD : T2I_ldrex<0b0111, (outs rGPR:$Rt, rGPR:$Rt2),
(ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldrexd", "\t$Rt, $Rt2, $addr", "",
- [], {?, ?, ?, ?}> {
+ [], {?, ?, ?, ?}>,
+ Requires<[IsThumb2, IsNotMClass]> {
bits<4> Rt2;
let Inst{11-8} = Rt2;
}
@@ -3367,7 +3382,8 @@ def t2STREXD : T2I_strex<0b0111, (outs rGPR:$Rd),
(ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
- {?, ?, ?, ?}> {
+ {?, ?, ?, ?}>,
+ Requires<[IsThumb2, IsNotMClass]> {
bits<4> Rt2;
let Inst{11-8} = Rt2;
}
@@ -3614,7 +3630,7 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
// Branch and Exchange Jazelle -- for disassembly only
// Rm = Inst{19-16}
def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []>,
- Sched<[WriteBr]> {
+ Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass, PreV8]> {
bits<4> func;
let Inst{31-27} = 0b11110;
let Inst{26} = 0;
@@ -3656,7 +3672,8 @@ let isBranch = 1, isTerminator = 1 in {
// operands, create 3 versions of the same instruction. Once there's a clean
// framework to represent optional operands, change this behavior.
class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
- !strconcat("cps", asm_op), []> {
+ !strconcat("cps", asm_op), []>,
+ Requires<[IsThumb2, IsNotMClass]> {
bits<2> imod;
bits<3> iflags;
bits<5> mode;
@@ -3702,7 +3719,8 @@ def : t2InstAlias<"sevl$p.w", (t2HINT 5, pred:$p)> {
let Predicates = [IsThumb2, HasV8];
}
-def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
+def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt",
+ [(int_arm_dbg imm0_15:$opt)]> {
bits<4> opt;
let Inst{31-20} = 0b111100111010;
let Inst{19-16} = 0b1111;
@@ -3739,7 +3757,8 @@ def t2DCPS3 : T2DCPS<0b11, "dcps3">;
class T2SRS<bits<2> Op, bit W, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : T2I<oops, iops, itin, opc, asm, pattern> {
+ : T2I<oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsThumb2,IsNotMClass]> {
bits<5> mode;
let Inst{31-25} = 0b1110100;
let Inst{24-23} = Op;
@@ -3770,7 +3789,8 @@ def : t2InstAlias<"srsia${p} $mode!", (t2SRSIA_UPD imm0_31:$mode, pred:$p)>;
// Return From Exception is a system instruction.
class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : T2I<oops, iops, itin, opc, asm, pattern> {
+ : T2I<oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsThumb2,IsNotMClass]> {
let Inst{31-20} = op31_20{11-0};
bits<4> Rn;
@@ -3797,13 +3817,34 @@ let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in
def t2SUBS_PC_LR : T2I <(outs), (ins imm0_255:$imm), NoItinerary,
"subs", "\tpc, lr, $imm",
[(ARMintretflag imm0_255:$imm)]>,
- Requires<[IsThumb2]> {
+ Requires<[IsThumb2,IsNotMClass]> {
let Inst{31-8} = 0b111100111101111010001111;
bits<8> imm;
let Inst{7-0} = imm;
}
+// Hypervisor Call is a system instruction.
+let isCall = 1 in {
+def t2HVC : T2XI <(outs), (ins imm0_65535:$imm16), IIC_Br, "hvc.w\t$imm16", []>,
+ Requires<[IsThumb2, HasVirtualization]>, Sched<[WriteBr]> {
+ bits<16> imm16;
+ let Inst{31-20} = 0b111101111110;
+ let Inst{19-16} = imm16{15-12};
+ let Inst{15-12} = 0b1000;
+ let Inst{11-0} = imm16{11-0};
+}
+}
+
+// Alias for HVC without the ".w" optional width specifier
+def : t2InstAlias<"hvc\t$imm16", (t2HVC imm0_65535:$imm16)>;
+
+// ERET - Return from exception in Hypervisor mode.
+// B9.3.3, B9.3.20: ERET is an alias for "SUBS PC, LR, #0" in an implementation that
+// includes virtualization extensions.
+def t2ERET : InstAlias<"eret${p}", (t2SUBS_PC_LR 0, pred:$p)>,
+ Requires<[IsThumb2, HasVirtualization]>;
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
@@ -3941,10 +3982,10 @@ defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc">;
defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl">;
defm t2STC : t2LdStCop<0b1110, 0, 0, "stc">;
defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl">;
-defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">, Requires<[PreV8]>;
-defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">, Requires<[PreV8]>;
-defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">, Requires<[PreV8]>;
-defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">, Requires<[PreV8]>;
+defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">, Requires<[PreV8,IsThumb2]>;
+defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">, Requires<[PreV8,IsThumb2]>;
+defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">, Requires<[PreV8,IsThumb2]>;
+defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">, Requires<[PreV8,IsThumb2]>;
//===----------------------------------------------------------------------===//
@@ -3960,7 +4001,7 @@ def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr",
bits<4> Rd;
let Inst{31-12} = 0b11110011111011111000;
let Inst{11-8} = Rd;
- let Inst{7-0} = 0b0000;
+ let Inst{7-0} = 0b00000000;
}
def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>;
@@ -3970,22 +4011,41 @@ def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr",
bits<4> Rd;
let Inst{31-12} = 0b11110011111111111000;
let Inst{11-8} = Rd;
- let Inst{7-0} = 0b0000;
+ let Inst{7-0} = 0b00000000;
+}
+
+def t2MRSbanked : T2I<(outs rGPR:$Rd), (ins banked_reg:$banked),
+ NoItinerary, "mrs", "\t$Rd, $banked", []>,
+ Requires<[IsThumb, HasVirtualization]> {
+ bits<6> banked;
+ bits<4> Rd;
+
+ let Inst{31-21} = 0b11110011111;
+ let Inst{20} = banked{5}; // R bit
+ let Inst{19-16} = banked{3-0};
+ let Inst{15-12} = 0b1000;
+ let Inst{11-8} = Rd;
+ let Inst{7-5} = 0b001;
+ let Inst{4} = banked{4};
+ let Inst{3-0} = 0b0000;
}
+
// M class MRS.
//
// This MRS has a mask field in bits 7-0 and can take more values than
// the A/R class (a full msr_mask).
-def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$mask), NoItinerary,
- "mrs", "\t$Rd, $mask", []>,
+def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$SYSm), NoItinerary,
+ "mrs", "\t$Rd, $SYSm", []>,
Requires<[IsThumb,IsMClass]> {
bits<4> Rd;
- bits<8> mask;
+ bits<8> SYSm;
let Inst{31-12} = 0b11110011111011111000;
let Inst{11-8} = Rd;
- let Inst{19-16} = 0b1111;
- let Inst{7-0} = mask;
+ let Inst{7-0} = SYSm;
+
+ let Unpredictable{20-16} = 0b11111;
+ let Unpredictable{13} = 0b1;
}
@@ -4010,6 +4070,25 @@ def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn),
let Inst{7-0} = 0;
}
+// However, the MSR (banked register) system instruction (ARMv7VE) *does* have a
+// separate encoding (distinguished by bit 5.
+def t2MSRbanked : T2I<(outs), (ins banked_reg:$banked, rGPR:$Rn),
+ NoItinerary, "msr", "\t$banked, $Rn", []>,
+ Requires<[IsThumb, HasVirtualization]> {
+ bits<6> banked;
+ bits<4> Rn;
+
+ let Inst{31-21} = 0b11110011100;
+ let Inst{20} = banked{5}; // R bit
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b1000;
+ let Inst{11-8} = banked{3-0};
+ let Inst{7-5} = 0b001;
+ let Inst{4} = banked{4};
+ let Inst{3-0} = 0b0000;
+}
+
+
// M class MSR.
//
// Move from ARM core register to Special Register
@@ -4022,7 +4101,13 @@ def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn),
let Inst{20} = 0b0;
let Inst{19-16} = Rn;
let Inst{15-12} = 0b1000;
- let Inst{11-0} = SYSm;
+ let Inst{11-10} = SYSm{11-10};
+ let Inst{9-8} = 0b00;
+ let Inst{7-0} = SYSm{7-0};
+
+ let Unpredictable{20} = 0b1;
+ let Unpredictable{13} = 0b1;
+ let Unpredictable{9-8} = 0b11;
}
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 55a6efcb4c04..e0a931499164 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -194,7 +194,7 @@ multiclass vfp_ldst_mult<string asm, bit L_bit,
}
}
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>;
@@ -202,7 +202,7 @@ defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>;
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpStore_m, IIC_fpStore_mu>;
-} // neverHasSideEffects
+} // hasSideEffects
def : MnemonicAlias<"vldm", "vldmia">;
def : MnemonicAlias<"vstm", "vstmia">;
@@ -515,6 +515,8 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
let Inst{5} = Sm{0};
let Inst{15-12} = Dd{3-0};
let Inst{22} = Dd{4};
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
// Special case encoding: bits 11-8 is 0b1011.
@@ -625,27 +627,30 @@ def : Pat<(f16_to_fp GPR:$a),
def : Pat<(f64 (f16_to_fp GPR:$a)),
(VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>;
-
-multiclass vcvt_inst<string opc, bits<2> rm> {
+multiclass vcvt_inst<string opc, bits<2> rm,
+ SDPatternOperator node = null_frag> {
let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"),
- []>, Requires<[HasFPARMv8]> {
+ [(set SPR:$Sd, (arm_ftosi (node SPR:$Sm)))]>,
+ Requires<[HasFPARMv8]> {
let Inst{17-16} = rm;
}
def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"),
- []>, Requires<[HasFPARMv8]> {
+ [(set SPR:$Sd, (arm_ftoui (node SPR:$Sm)))]>,
+ Requires<[HasFPARMv8]> {
let Inst{17-16} = rm;
}
def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"),
- []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ [(set SPR:$Sd, (arm_ftosi (f64 (node (f64 DPR:$Dm)))))]>,
+ Requires<[HasFPARMv8, HasDPVFP]> {
bits<5> Dm;
let Inst{17-16} = rm;
@@ -659,7 +664,8 @@ multiclass vcvt_inst<string opc, bits<2> rm> {
def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"),
- []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ [(set SPR:$Sd, (arm_ftoui (f64 (node (f64 DPR:$Dm)))))]>,
+ Requires<[HasFPARMv8, HasDPVFP]> {
bits<5> Dm;
let Inst{17-16} = rm;
@@ -672,10 +678,10 @@ multiclass vcvt_inst<string opc, bits<2> rm> {
}
}
-defm VCVTA : vcvt_inst<"a", 0b00>;
+defm VCVTA : vcvt_inst<"a", 0b00, frnd>;
defm VCVTN : vcvt_inst<"n", 0b01>;
-defm VCVTP : vcvt_inst<"p", 0b10>;
-defm VCVTM : vcvt_inst<"m", 0b11>;
+defm VCVTP : vcvt_inst<"p", 0b10, fceil>;
+defm VCVTM : vcvt_inst<"m", 0b11, ffloor>;
def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
@@ -691,18 +697,20 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
let D = VFPNeonA8Domain;
}
-multiclass vrint_inst_zrx<string opc, bit op, bit op2> {
+multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> {
def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm",
- []>, Requires<[HasFPARMv8]> {
+ [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>,
+ Requires<[HasFPARMv8]> {
let Inst{7} = op2;
let Inst{16} = op;
}
def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm",
- []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>,
+ Requires<[HasFPARMv8, HasDPVFP]> {
let Inst{7} = op2;
let Inst{16} = op;
}
@@ -715,22 +723,25 @@ multiclass vrint_inst_zrx<string opc, bit op, bit op2> {
Requires<[HasFPARMv8,HasDPVFP]>;
}
-defm VRINTZ : vrint_inst_zrx<"z", 0, 1>;
-defm VRINTR : vrint_inst_zrx<"r", 0, 0>;
-defm VRINTX : vrint_inst_zrx<"x", 1, 0>;
+defm VRINTZ : vrint_inst_zrx<"z", 0, 1, ftrunc>;
+defm VRINTR : vrint_inst_zrx<"r", 0, 0, fnearbyint>;
+defm VRINTX : vrint_inst_zrx<"x", 1, 0, frint>;
-multiclass vrint_inst_anpm<string opc, bits<2> rm> {
+multiclass vrint_inst_anpm<string opc, bits<2> rm,
+ SDPatternOperator node = null_frag> {
let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"),
- []>, Requires<[HasFPARMv8]> {
+ [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>,
+ Requires<[HasFPARMv8]> {
let Inst{17-16} = rm;
}
def D : ADuInp<0b11101, 0b11, 0b1000, 0b01, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
NoItinerary, !strconcat("vrint", opc, ".f64\t$Dd, $Dm"),
- []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>,
+ Requires<[HasFPARMv8, HasDPVFP]> {
let Inst{17-16} = rm;
}
}
@@ -743,10 +754,10 @@ multiclass vrint_inst_anpm<string opc, bits<2> rm> {
Requires<[HasFPARMv8,HasDPVFP]>;
}
-defm VRINTA : vrint_inst_anpm<"a", 0b00>;
+defm VRINTA : vrint_inst_anpm<"a", 0b00, frnd>;
defm VRINTN : vrint_inst_anpm<"n", 0b01>;
-defm VRINTP : vrint_inst_anpm<"p", 0b10>;
-defm VRINTM : vrint_inst_anpm<"m", 0b11>;
+defm VRINTP : vrint_inst_anpm<"p", 0b10, fceil>;
+defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>;
def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
@@ -758,7 +769,7 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
[(set SPR:$Sd, (fsqrt SPR:$Sm))]>;
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>;
@@ -766,7 +777,7 @@ def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
-} // neverHasSideEffects
+} // hasSideEffects
//===----------------------------------------------------------------------===//
// FP <-> GPR Copies. Int <-> FP Conversions.
@@ -816,7 +827,7 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010,
let D = VFPNeonDomain;
}
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def VMOVRRD : AVConv3I<0b11000101, 0b1011,
(outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
@@ -837,6 +848,11 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011,
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
let D = VFPNeonDomain;
+
+ // This instruction is equivalent to
+ // $Rt = EXTRACT_SUBREG $Dm, ssub_0
+ // $Rt2 = EXTRACT_SUBREG $Dm, ssub_1
+ let isExtractSubreg = 1;
}
def VMOVRRS : AVConv3I<0b11000101, 0b1010,
@@ -860,7 +876,7 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010,
let D = VFPNeonDomain;
let DecoderMethod = "DecodeVMOVRRS";
}
-} // neverHasSideEffects
+} // hasSideEffects
// FMDHR: GPR -> SPR
// FMDLR: GPR -> SPR
@@ -885,9 +901,13 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
let D = VFPNeonDomain;
+
+ // This instruction is equivalent to
+ // $Dm = REG_SEQUENCE $Rt, ssub_0, $Rt2, ssub_1
+ let isRegSequence = 1;
}
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def VMOVSRR : AVConv5I<0b11000100, 0b1010,
(outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
@@ -1523,7 +1543,7 @@ def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
// FP Conditional moves.
//
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def VMOVDcc : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p),
IIC_fpUNA64,
[(set (f64 DPR:$Dd),
@@ -1535,7 +1555,7 @@ def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p),
[(set (f32 SPR:$Sd),
(ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>,
RegConstraint<"$Sn = $Sd">, Requires<[HasVFP2]>;
-} // neverHasSideEffects
+} // hasSideEffects
//===----------------------------------------------------------------------===//
// Move from VFP System Register to ARM core register.
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
deleted file mode 100644
index 6d1114d51aab..000000000000
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ /dev/null
@@ -1,344 +0,0 @@
-//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the JIT interfaces for the ARM target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARMJITInfo.h"
-#include "ARMConstantPoolValue.h"
-#include "ARMMachineFunctionInfo.h"
-#include "ARMRelocations.h"
-#include "MCTargetDesc/ARMBaseInfo.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstdlib>
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
- report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction");
-}
-
-/// JITCompilerFunction - This contains the address of the JIT function used to
-/// compile a function lazily.
-static TargetJITInfo::JITCompilerFn JITCompilerFunction;
-
-// Get the ASMPREFIX for the current host. This is often '_'.
-#ifndef __USER_LABEL_PREFIX__
-#define __USER_LABEL_PREFIX__
-#endif
-#define GETASMPREFIX2(X) #X
-#define GETASMPREFIX(X) GETASMPREFIX2(X)
-#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
-
-// CompilationCallback stub - We can't use a C function with inline assembly in
-// it, because the prolog/epilog inserted by GCC won't work for us. (We need
-// to preserve more context and manipulate the stack directly). Instead,
-// write our own wrapper, which does things our way, so we have complete
-// control over register saving and restoring.
-extern "C" {
-#if defined(__arm__)
- void ARMCompilationCallback();
- asm(
- ".text\n"
- ".align 2\n"
- ".globl " ASMPREFIX "ARMCompilationCallback\n"
- ASMPREFIX "ARMCompilationCallback:\n"
- // Save caller saved registers since they may contain stuff
- // for the real target function right now. We have to act as if this
- // whole compilation callback doesn't exist as far as the caller is
- // concerned, so we can't just preserve the callee saved regs.
- "stmdb sp!, {r0, r1, r2, r3, lr}\n"
-#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
- "vstmdb sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
-#endif
- // The LR contains the address of the stub function on entry.
- // pass it as the argument to the C part of the callback
- "mov r0, lr\n"
- "sub sp, sp, #4\n"
- // Call the C portion of the callback
- "bl " ASMPREFIX "ARMCompilationCallbackC\n"
- "add sp, sp, #4\n"
- // Restoring the LR to the return address of the function that invoked
- // the stub and de-allocating the stack space for it requires us to
- // swap the two saved LR values on the stack, as they're backwards
- // for what we need since the pop instruction has a pre-determined
- // order for the registers.
- // +--------+
- // 0 | LR | Original return address
- // +--------+
- // 1 | LR | Stub address (start of stub)
- // 2-5 | R3..R0 | Saved registers (we need to preserve all regs)
- // 6-20 | D0..D7 | Saved VFP registers
- // +--------+
- //
-#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
- // Restore VFP caller-saved registers.
- "vldmia sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
-#endif
- //
- // We need to exchange the values in slots 0 and 1 so we can
- // return to the address in slot 1 with the address in slot 0
- // restored to the LR.
- "ldr r0, [sp,#20]\n"
- "ldr r1, [sp,#16]\n"
- "str r1, [sp,#20]\n"
- "str r0, [sp,#16]\n"
- // Return to the (newly modified) stub to invoke the real function.
- // The above twiddling of the saved return addresses allows us to
- // deallocate everything, including the LR the stub saved, with two
- // updating load instructions.
- "ldmia sp!, {r0, r1, r2, r3, lr}\n"
- "ldr pc, [sp], #4\n"
- );
-#else // Not an ARM host
- void ARMCompilationCallback() {
- llvm_unreachable("Cannot call ARMCompilationCallback() on a non-ARM arch!");
- }
-#endif
-}
-
-/// ARMCompilationCallbackC - This is the target-specific function invoked
-/// by the function stub when we did not know the real target of a call.
-/// This function must locate the start of the stub or call site and pass
-/// it into the JIT compiler function.
-extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) {
- // Get the address of the compiled code for this function.
- intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)StubAddr);
-
- // Rewrite the call target... so that we don't end up here every time we
- // execute the call. We're replacing the first two instructions of the
- // stub with:
- // ldr pc, [pc,#-4]
- // <addr>
- if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) {
- llvm_unreachable("ERROR: Unable to mark stub writable");
- }
- *(intptr_t *)StubAddr = 0xe51ff004; // ldr pc, [pc, #-4]
- *(intptr_t *)(StubAddr+4) = NewVal;
- if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) {
- llvm_unreachable("ERROR: Unable to mark stub executable");
- }
-}
-
-TargetJITInfo::LazyResolverFn
-ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) {
- JITCompilerFunction = F;
- return ARMCompilationCallback;
-}
-
-void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr,
- JITCodeEmitter &JCE) {
- uint8_t Buffer[4];
- uint8_t *Cur = Buffer;
- MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)Ptr);
- void *PtrAddr = JCE.allocIndirectGV(
- GV, Buffer, sizeof(Buffer), /*Alignment=*/4);
- addIndirectSymAddr(Ptr, (intptr_t)PtrAddr);
- return PtrAddr;
-}
-
-TargetJITInfo::StubLayout ARMJITInfo::getStubLayout() {
- // The stub contains up to 3 4-byte instructions, aligned at 4 bytes, and a
- // 4-byte address. See emitFunctionStub for details.
- StubLayout Result = {16, 4};
- return Result;
-}
-
-void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
- JITCodeEmitter &JCE) {
- void *Addr;
- // If this is just a call to an external function, emit a branch instead of a
- // call. The code is the same except for one bit of the last instruction.
- if (Fn != (void*)(intptr_t)ARMCompilationCallback) {
- // Branch to the corresponding function addr.
- if (IsPIC) {
- // The stub is 16-byte size and 4-aligned.
- intptr_t LazyPtr = getIndirectSymAddr(Fn);
- if (!LazyPtr) {
- // In PIC mode, the function stub is loading a lazy-ptr.
- LazyPtr= (intptr_t)emitGlobalValueIndirectSym((const GlobalValue*)F, Fn, JCE);
- DEBUG(if (F)
- errs() << "JIT: Indirect symbol emitted at [" << LazyPtr
- << "] for GV '" << F->getName() << "'\n";
- else
- errs() << "JIT: Stub emitted at [" << LazyPtr
- << "] for external function at '" << Fn << "'\n");
- }
- JCE.emitAlignment(4);
- Addr = (void*)JCE.getCurrentPCValue();
- if (!sys::Memory::setRangeWritable(Addr, 16)) {
- llvm_unreachable("ERROR: Unable to mark stub writable");
- }
- JCE.emitWordLE(0xe59fc004); // ldr ip, [pc, #+4]
- JCE.emitWordLE(0xe08fc00c); // L_func$scv: add ip, pc, ip
- JCE.emitWordLE(0xe59cf000); // ldr pc, [ip]
- JCE.emitWordLE(LazyPtr - (intptr_t(Addr)+4+8)); // func - (L_func$scv+8)
- sys::Memory::InvalidateInstructionCache(Addr, 16);
- if (!sys::Memory::setRangeExecutable(Addr, 16)) {
- llvm_unreachable("ERROR: Unable to mark stub executable");
- }
- } else {
- // The stub is 8-byte size and 4-aligned.
- JCE.emitAlignment(4);
- Addr = (void*)JCE.getCurrentPCValue();
- if (!sys::Memory::setRangeWritable(Addr, 8)) {
- llvm_unreachable("ERROR: Unable to mark stub writable");
- }
- JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
- JCE.emitWordLE((intptr_t)Fn); // addr of function
- sys::Memory::InvalidateInstructionCache(Addr, 8);
- if (!sys::Memory::setRangeExecutable(Addr, 8)) {
- llvm_unreachable("ERROR: Unable to mark stub executable");
- }
- }
- } else {
- // The compilation callback will overwrite the first two words of this
- // stub with indirect branch instructions targeting the compiled code.
- // This stub sets the return address to restart the stub, so that
- // the new branch will be invoked when we come back.
- //
- // Branch and link to the compilation callback.
- // The stub is 16-byte size and 4-byte aligned.
- JCE.emitAlignment(4);
- Addr = (void*)JCE.getCurrentPCValue();
- if (!sys::Memory::setRangeWritable(Addr, 16)) {
- llvm_unreachable("ERROR: Unable to mark stub writable");
- }
- // Save LR so the callback can determine which stub called it.
- // The compilation callback is responsible for popping this prior
- // to returning.
- JCE.emitWordLE(0xe92d4000); // push {lr}
- // Set the return address to go back to the start of this stub.
- JCE.emitWordLE(0xe24fe00c); // sub lr, pc, #12
- // Invoke the compilation callback.
- JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
- // The address of the compilation callback.
- JCE.emitWordLE((intptr_t)ARMCompilationCallback);
- sys::Memory::InvalidateInstructionCache(Addr, 16);
- if (!sys::Memory::setRangeExecutable(Addr, 16)) {
- llvm_unreachable("ERROR: Unable to mark stub executable");
- }
- }
-
- return Addr;
-}
-
-intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const {
- ARM::RelocationType RT = (ARM::RelocationType)MR->getRelocationType();
- switch (RT) {
- default:
- return (intptr_t)(MR->getResultPointer());
- case ARM::reloc_arm_pic_jt:
- // Destination address - jump table base.
- return (intptr_t)(MR->getResultPointer()) - MR->getConstantVal();
- case ARM::reloc_arm_jt_base:
- // Jump table base address.
- return getJumpTableBaseAddr(MR->getJumpTableIndex());
- case ARM::reloc_arm_cp_entry:
- case ARM::reloc_arm_vfp_cp_entry:
- // Constant pool entry address.
- return getConstantPoolEntryAddr(MR->getConstantPoolIndex());
- case ARM::reloc_arm_machine_cp_entry: {
- ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MR->getConstantVal();
- assert((!ACPV->hasModifier() && !ACPV->mustAddCurrentAddress()) &&
- "Can't handle this machine constant pool entry yet!");
- intptr_t Addr = (intptr_t)(MR->getResultPointer());
- Addr -= getPCLabelAddr(ACPV->getLabelId()) + ACPV->getPCAdjustment();
- return Addr;
- }
- }
-}
-
-/// relocate - Before the JIT can run a block of code that has been emitted,
-/// it must rewrite the code to contain the actual addresses of any
-/// referenced global symbols.
-void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase) {
- for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
- void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
- intptr_t ResultPtr = resolveRelocDestAddr(MR);
- switch ((ARM::RelocationType)MR->getRelocationType()) {
- case ARM::reloc_arm_cp_entry:
- case ARM::reloc_arm_vfp_cp_entry:
- case ARM::reloc_arm_relative: {
- // It is necessary to calculate the correct PC relative value. We
- // subtract the base addr from the target addr to form a byte offset.
- ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
- // If the result is positive, set bit U(23) to 1.
- if (ResultPtr >= 0)
- *((intptr_t*)RelocPos) |= 1 << ARMII::U_BitShift;
- else {
- // Otherwise, obtain the absolute value and set bit U(23) to 0.
- *((intptr_t*)RelocPos) &= ~(1 << ARMII::U_BitShift);
- ResultPtr = - ResultPtr;
- }
- // Set the immed value calculated.
- // VFP immediate offset is multiplied by 4.
- if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry)
- ResultPtr = ResultPtr >> 2;
- *((intptr_t*)RelocPos) |= ResultPtr;
- // Set register Rn to PC (which is register 15 on all architectures).
- // FIXME: This avoids the need for register info in the JIT class.
- *((intptr_t*)RelocPos) |= 15 << ARMII::RegRnShift;
- break;
- }
- case ARM::reloc_arm_pic_jt:
- case ARM::reloc_arm_machine_cp_entry:
- case ARM::reloc_arm_absolute: {
- // These addresses have already been resolved.
- *((intptr_t*)RelocPos) |= (intptr_t)ResultPtr;
- break;
- }
- case ARM::reloc_arm_branch: {
- // It is necessary to calculate the correct value of signed_immed_24
- // field. We subtract the base addr from the target addr to form a
- // byte offset, which must be inside the range -33554432 and +33554428.
- // Then, we set the signed_immed_24 field of the instruction to bits
- // [25:2] of the byte offset. More details ARM-ARM p. A4-11.
- ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
- ResultPtr = (ResultPtr & 0x03FFFFFC) >> 2;
- assert(ResultPtr >= -33554432 && ResultPtr <= 33554428);
- *((intptr_t*)RelocPos) |= ResultPtr;
- break;
- }
- case ARM::reloc_arm_jt_base: {
- // JT base - (instruction addr + 8)
- ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
- *((intptr_t*)RelocPos) |= ResultPtr;
- break;
- }
- case ARM::reloc_arm_movw: {
- ResultPtr = ResultPtr & 0xFFFF;
- *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
- *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
- break;
- }
- case ARM::reloc_arm_movt: {
- ResultPtr = (ResultPtr >> 16) & 0xFFFF;
- *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
- *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
- break;
- }
- }
- }
-}
-
-void ARMJITInfo::Initialize(const MachineFunction &MF, bool isPIC) {
- const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
- JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
- IsPIC = isPIC;
-}
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
deleted file mode 100644
index 27e2a2013404..000000000000
--- a/lib/Target/ARM/ARMJITInfo.h
+++ /dev/null
@@ -1,177 +0,0 @@
-//===-- ARMJITInfo.h - ARM implementation of the JIT interface -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the ARMJITInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARMJITINFO_H
-#define ARMJITINFO_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Target/TargetJITInfo.h"
-
-namespace llvm {
- class ARMTargetMachine;
-
- class ARMJITInfo : public TargetJITInfo {
- // ConstPoolId2AddrMap - A map from constant pool ids to the corresponding
- // CONSTPOOL_ENTRY addresses.
- SmallVector<intptr_t, 16> ConstPoolId2AddrMap;
-
- // JumpTableId2AddrMap - A map from inline jumptable ids to the
- // corresponding inline jump table bases.
- SmallVector<intptr_t, 16> JumpTableId2AddrMap;
-
- // PCLabelMap - A map from PC labels to addresses.
- DenseMap<unsigned, intptr_t> PCLabelMap;
-
- // Sym2IndirectSymMap - A map from symbol (GlobalValue and ExternalSymbol)
- // addresses to their indirect symbol addresses.
- DenseMap<void*, intptr_t> Sym2IndirectSymMap;
-
- // IsPIC - True if the relocation model is PIC. This is used to determine
- // how to codegen function stubs.
- bool IsPIC;
-
- public:
- explicit ARMJITInfo() : IsPIC(false) { useGOT = false; }
-
- /// replaceMachineCodeForFunction - Make it so that calling the function
- /// whose machine code is at OLD turns into a call to NEW, perhaps by
- /// overwriting OLD with a branch to NEW. This is used for self-modifying
- /// code.
- ///
- void replaceMachineCodeForFunction(void *Old, void *New) override;
-
- /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
- /// to emit an indirect symbol which contains the address of the specified
- /// ptr.
- void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
- JITCodeEmitter &JCE) override;
-
- // getStubLayout - Returns the size and alignment of the largest call stub
- // on ARM.
- StubLayout getStubLayout() override;
-
- /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
- /// small native function that simply calls the function at the specified
- /// address.
- void *emitFunctionStub(const Function* F, void *Fn,
- JITCodeEmitter &JCE) override;
-
- /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
- LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
-
- /// relocate - Before the JIT can run a block of code that has been emitted,
- /// it must rewrite the code to contain the actual addresses of any
- /// referenced global symbols.
- void relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase) override;
-
- /// hasCustomConstantPool - Allows a target to specify that constant
- /// pool address resolution is handled by the target.
- bool hasCustomConstantPool() const override { return true; }
-
- /// hasCustomJumpTables - Allows a target to specify that jumptables
- /// are emitted by the target.
- bool hasCustomJumpTables() const override { return true; }
-
- /// allocateSeparateGVMemory - If true, globals should be placed in
- /// separately allocated heap memory rather than in the same
- /// code memory allocated by JITCodeEmitter.
- bool allocateSeparateGVMemory() const override {
-#ifdef __APPLE__
- return true;
-#else
- return false;
-#endif
- }
-
- /// Initialize - Initialize internal stage for the function being JITted.
- /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize
- /// jump table ids to jump table bases map; remember if codegen relocation
- /// model is PIC.
- void Initialize(const MachineFunction &MF, bool isPIC);
-
- /// getConstantPoolEntryAddr - The ARM target puts all constant
- /// pool entries into constant islands. This returns the address of the
- /// constant pool entry of the specified index.
- intptr_t getConstantPoolEntryAddr(unsigned CPI) const {
- assert(CPI < ConstPoolId2AddrMap.size());
- return ConstPoolId2AddrMap[CPI];
- }
-
- /// addConstantPoolEntryAddr - Map a Constant Pool Index to the address
- /// where its associated value is stored. When relocations are processed,
- /// this value will be used to resolve references to the constant.
- void addConstantPoolEntryAddr(unsigned CPI, intptr_t Addr) {
- assert(CPI < ConstPoolId2AddrMap.size());
- ConstPoolId2AddrMap[CPI] = Addr;
- }
-
- /// getJumpTableBaseAddr - The ARM target inline all jump tables within
- /// text section of the function. This returns the address of the base of
- /// the jump table of the specified index.
- intptr_t getJumpTableBaseAddr(unsigned JTI) const {
- assert(JTI < JumpTableId2AddrMap.size());
- return JumpTableId2AddrMap[JTI];
- }
-
- /// addJumpTableBaseAddr - Map a jump table index to the address where
- /// the corresponding inline jump table is emitted. When relocations are
- /// processed, this value will be used to resolve references to the
- /// jump table.
- void addJumpTableBaseAddr(unsigned JTI, intptr_t Addr) {
- assert(JTI < JumpTableId2AddrMap.size());
- JumpTableId2AddrMap[JTI] = Addr;
- }
-
- /// getPCLabelAddr - Retrieve the address of the PC label of the
- /// specified id.
- intptr_t getPCLabelAddr(unsigned Id) const {
- DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id);
- assert(I != PCLabelMap.end());
- return I->second;
- }
-
- /// addPCLabelAddr - Remember the address of the specified PC label.
- void addPCLabelAddr(unsigned Id, intptr_t Addr) {
- PCLabelMap.insert(std::make_pair(Id, Addr));
- }
-
- /// getIndirectSymAddr - Retrieve the address of the indirect symbol of the
- /// specified symbol located at address. Returns 0 if the indirect symbol
- /// has not been emitted.
- intptr_t getIndirectSymAddr(void *Addr) const {
- DenseMap<void*,intptr_t>::const_iterator I= Sym2IndirectSymMap.find(Addr);
- if (I != Sym2IndirectSymMap.end())
- return I->second;
- return 0;
- }
-
- /// addIndirectSymAddr - Add a mapping from address of an emitted symbol to
- /// its indirect symbol address.
- void addIndirectSymAddr(void *SymAddr, intptr_t IndSymAddr) {
- Sym2IndirectSymMap.insert(std::make_pair(SymAddr, IndSymAddr));
- }
-
- private:
- /// resolveRelocDestAddr - Resolve the resulting address of the relocation
- /// if it's not already solved. Constantpool entries must be resolved by
- /// ARM target.
- intptr_t resolveRelocDestAddr(MachineRelocation *MR) const;
- };
-}
-
-#endif
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index a03bcdbddd79..c429ac185211 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -144,6 +144,46 @@ namespace {
char ARMLoadStoreOpt::ID = 0;
}
+static bool definesCPSR(const MachineInstr *MI) {
+ for (const auto &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
+ // If the instruction has live CPSR def, then it's not safe to fold it
+ // into load / store.
+ return true;
+ }
+
+ return false;
+}
+
+static int getMemoryOpOffset(const MachineInstr *MI) {
+ int Opcode = MI->getOpcode();
+ bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ unsigned OffField = MI->getOperand(NumOperands-3).getImm();
+
+ if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
+ Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
+ Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
+ Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
+ return OffField;
+
+ // Thumb1 immediate offsets are scaled by 4
+ if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
+ return OffField * 4;
+
+ int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
+ : ARM_AM::getAM5Offset(OffField) * 4;
+ ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
+ : ARM_AM::getAM5Op(OffField);
+
+ if (Op == ARM_AM::sub)
+ return -Offset;
+
+ return Offset;
+}
+
static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
switch (Opcode) {
default: llvm_unreachable("Unhandled opcode!");
@@ -335,40 +375,50 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
unsigned WordOffset,
ARMCC::CondCodes Pred, unsigned PredReg) {
assert(isThumb1 && "Can only update base register uses for Thumb1!");
-
- // Start updating any instructions with immediate offsets. Insert a sub before
+ // Start updating any instructions with immediate offsets. Insert a SUB before
// the first non-updateable instruction (if any).
for (; MBBI != MBB.end(); ++MBBI) {
+ bool InsertSub = false;
+ unsigned Opc = MBBI->getOpcode();
+
if (MBBI->readsRegister(Base)) {
- unsigned Opc = MBBI->getOpcode();
int Offset;
- bool InsertSub = false;
+ bool IsLoad =
+ Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
+ bool IsStore =
+ Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
- if (Opc == ARM::tLDRi || Opc == ARM::tSTRi ||
- Opc == ARM::tLDRHi || Opc == ARM::tSTRHi ||
- Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) {
+ if (IsLoad || IsStore) {
// Loads and stores with immediate offsets can be updated, but only if
// the new offset isn't negative.
// The MachineOperand containing the offset immediate is the last one
// before predicates.
MachineOperand &MO =
MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
- // The offsets are scaled by 1, 2 or 4 depending on the Opcode
+ // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
Offset = MO.getImm() - WordOffset * getImmScale(Opc);
- if (Offset >= 0)
+
+ // If storing the base register, it needs to be reset first.
+ unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
+
+ if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
MO.setImm(Offset);
else
InsertSub = true;
- } else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) {
- // SUB/ADD using this register. Merge it with the update.
- // If the merged offset is too large, insert a new sub instead.
+ } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
+ !definesCPSR(MBBI)) {
+ // SUBS/ADDS using this register, with a dead def of the CPSR.
+ // Merge it with the update; if the merged offset is too large,
+ // insert a new sub instead.
MachineOperand &MO =
MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
Offset = (Opc == ARM::tSUBi8) ?
MO.getImm() + WordOffset * 4 :
MO.getImm() - WordOffset * 4 ;
- if (TL->isLegalAddImmediate(Offset)) {
+ if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
+ // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
+ // Offset == 0.
MO.setImm(Offset);
// The base register has now been reset, so exit early.
return;
@@ -381,13 +431,19 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
InsertSub = true;
}
- if (InsertSub) {
- // An instruction above couldn't be updated, so insert a sub.
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base))
- .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
- .addImm(Pred).addReg(PredReg);
- return;
- }
+ } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
+ // Since SUBS sets the condition flags, we can't place the base reset
+ // after an instruction that has a live CPSR def.
+ // The base register might also contain an argument for a function call.
+ InsertSub = true;
+ }
+
+ if (InsertSub) {
+ // An instruction above couldn't be updated, so insert a sub.
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
+ .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
+ .addImm(Pred).addReg(PredReg);
+ return;
}
if (MBBI->killsRegister(Base))
@@ -395,15 +451,18 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
return;
}
- // The end of the block was reached. This means register liveness escapes the
- // block, and it's necessary to insert a sub before the last instruction.
- if (MBB.succ_size() > 0)
- // But only insert the SUB if there is actually a successor block.
- // FIXME: Check more carefully if register is live at this point, e.g. by
- // also examining the successor block's register liveness information.
- AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base))
- .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
+ // End of block was reached.
+ if (MBB.succ_size() > 0) {
+ // FIXME: Because of a bug, live registers are sometimes missing from
+ // the successor blocks' live-in sets. This means we can't trust that
+ // information and *always* have to reset at the end of a block.
+ // See PR21029.
+ if (MBBI != MBB.end()) --MBBI;
+ AddDefaultT1CC(
+ BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
+ .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
.addImm(Pred).addReg(PredReg);
+ }
}
/// MergeOps - Create and insert a LDM or STM with Base as base register and
@@ -422,6 +481,28 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
if (NumRegs <= 1)
return false;
+ // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
+ // Compute liveness information for that register to make the decision.
+ bool SafeToClobberCPSR = !isThumb1 ||
+ (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
+ MachineBasicBlock::LQR_Dead);
+
+ bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
+
+ // Exception: If the base register is in the input reglist, Thumb1 LDM is
+ // non-writeback.
+ // It's also not possible to merge an STR of the base register in Thumb1.
+ if (isThumb1)
+ for (unsigned I = 0; I < NumRegs; ++I)
+ if (Base == Regs[I].first) {
+ if (Opcode == ARM::tLDRi) {
+ Writeback = false;
+ break;
+ } else if (Opcode == ARM::tSTRi) {
+ return false;
+ }
+ }
+
ARM_AM::AMSubMode Mode = ARM_AM::ia;
// VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
@@ -445,6 +526,11 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
if (NumRegs <= 2)
return false;
+ // On Thumb1, it's not worth materializing a new base register without
+ // clobbering the CPSR (i.e. not using ADDS/SUBS).
+ if (!SafeToClobberCPSR)
+ return false;
+
unsigned NewBase;
if (isi32Load(Opcode)) {
// If it is a load, then just use one of the destination register to
@@ -459,13 +545,15 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
int BaseOpc =
isThumb2 ? ARM::t2ADDri :
+ (isThumb1 && Offset < 8) ? ARM::tADDi3 :
isThumb1 ? ARM::tADDi8 : ARM::ADDri;
if (Offset < 0) {
+ Offset = - Offset;
BaseOpc =
isThumb2 ? ARM::t2SUBri :
+ (isThumb1 && Offset < 8) ? ARM::tSUBi3 :
isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
- Offset = - Offset;
}
if (!TL->isLegalAddImmediate(Offset))
@@ -473,22 +561,28 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
return false; // Probably not worth it then.
if (isThumb1) {
- if (Base != NewBase) {
+ // Thumb1: depending on immediate size, use either
+ // ADDS NewBase, Base, #imm3
+ // or
+ // MOV NewBase, Base
+ // ADDS NewBase, #imm8.
+ if (Base != NewBase && Offset >= 8) {
// Need to insert a MOV to the new base first.
- // FIXME: If the immediate fits in 3 bits, use ADD instead.
BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
.addReg(Base, getKillRegState(BaseKill))
.addImm(Pred).addReg(PredReg);
+ // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
+ Base = NewBase;
+ BaseKill = false;
}
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase))
- .addReg(NewBase, getKillRegState(true)).addImm(Offset)
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
+ .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
.addImm(Pred).addReg(PredReg);
} else {
BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
.addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
.addImm(Pred).addReg(PredReg).addReg(0);
}
-
Base = NewBase;
BaseKill = true; // New base is always killed straight away.
}
@@ -501,16 +595,16 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
if (!Opcode) return false;
- bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
-
- // Exception: If the base register is in the input reglist, Thumb1 LDM is
- // non-writeback. Check for this.
- if (Opcode == ARM::tLDMIA && isThumb1)
- for (unsigned I = 0; I < NumRegs; ++I)
- if (Base == Regs[I].first) {
- Writeback = false;
- break;
- }
+ // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
+ // - There is no writeback (LDM of base register),
+ // - the base register is killed by the merged instruction,
+ // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
+ // to reset the base register.
+ // Otherwise, don't merge.
+ // It's safe to return here since the code to materialize a new base register
+ // above is also conditional on SafeToClobberCPSR.
+ if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
+ return false;
MachineInstrBuilder MIB;
@@ -525,11 +619,11 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
MIB.addReg(Base, getDefRegState(true))
.addReg(Base, getKillRegState(BaseKill));
- // The base isn't dead after a merged instruction with writeback. Update
- // future uses of the base with the added offset (if possible), or reset
- // the base register as necessary.
+ // The base isn't dead after a merged instruction with writeback.
+ // Insert a sub instruction after the newly formed instruction to reset.
if (!BaseKill)
UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
+
} else {
// No writeback, simply build the MachineInstr.
MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
@@ -700,6 +794,11 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
memOps[i].MBBI = Merges.back();
memOps[i].Position = insertPos;
}
+
+ // Update memOps offsets, since they may have been modified by MergeOps.
+ for (auto &MemOp : memOps) {
+ MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
+ }
}
/// MergeLDR_STR - Merge a number of load / store instructions into one or more
@@ -721,7 +820,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
unsigned Count = 1;
unsigned Limit = ~0U;
-
+ bool BaseKill = false;
// vldm / vstm limit are 32 for S variants, 16 for D variants.
switch (Opcode) {
@@ -760,36 +859,28 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
++Count;
} else {
// Can't merge this in. Try merge the earlier ones first.
- MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
- Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
+ // We need to compute BaseKill here because the MemOps may have been
+ // reordered.
+ BaseKill = Loc->killsRegister(Base);
+
+ MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
+ BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
MemOps, Merges);
return;
}
- if (MemOps[i].Position > MemOps[insertAfter].Position)
+ if (MemOps[i].Position > MemOps[insertAfter].Position) {
insertAfter = i;
+ Loc = MemOps[i].MBBI;
+ }
}
- bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
+ BaseKill = Loc->killsRegister(Base);
MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
}
-static bool definesCPSR(MachineInstr *MI) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
- // If the instruction has live CPSR def, then it's not safe to fold it
- // into load / store.
- return true;
- }
-
- return false;
-}
-
static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
unsigned Bytes, unsigned Limit,
ARMCC::CondCodes Pred, unsigned PredReg) {
@@ -1327,34 +1418,6 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
RS->forward(std::prev(Loc));
}
-static int getMemoryOpOffset(const MachineInstr *MI) {
- int Opcode = MI->getOpcode();
- bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
- unsigned NumOperands = MI->getDesc().getNumOperands();
- unsigned OffField = MI->getOperand(NumOperands-3).getImm();
-
- if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
- Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
- Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
- Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
- return OffField;
-
- // Thumb1 immediate offsets are scaled by 4
- if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
- return OffField * 4;
-
- int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
- : ARM_AM::getAM5Offset(OffField) * 4;
- if (isAM3) {
- if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
- Offset = -Offset;
- } else {
- if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
- Offset = -Offset;
- }
- return Offset;
-}
-
static void InsertLDR_STR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
int Offset, bool isDef,
@@ -1725,21 +1788,15 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
const TargetMachine &TM = Fn.getTarget();
- TL = TM.getTargetLowering();
+ TL = TM.getSubtargetImpl()->getTargetLowering();
AFI = Fn.getInfo<ARMFunctionInfo>();
- TII = TM.getInstrInfo();
- TRI = TM.getRegisterInfo();
+ TII = TM.getSubtargetImpl()->getInstrInfo();
+ TRI = TM.getSubtargetImpl()->getRegisterInfo();
STI = &TM.getSubtarget<ARMSubtarget>();
RS = new RegScavenger();
isThumb2 = AFI->isThumb2Function();
isThumb1 = AFI->isThumbFunction() && !isThumb2;
- // FIXME: Temporarily disabling for Thumb-1 due to miscompiles
- if (isThumb1) {
- delete RS;
- return false;
- }
-
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
@@ -1793,10 +1850,10 @@ namespace {
}
bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
- TD = Fn.getTarget().getDataLayout();
- TII = Fn.getTarget().getInstrInfo();
- TRI = Fn.getTarget().getRegisterInfo();
- STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
+ TD = Fn.getSubtarget().getDataLayout();
+ TII = Fn.getSubtarget().getInstrInfo();
+ TRI = Fn.getSubtarget().getRegisterInfo();
+ STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
MRI = &Fn.getRegInfo();
MF = &Fn;
@@ -1811,7 +1868,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
MachineBasicBlock::iterator I,
MachineBasicBlock::iterator E,
- SmallPtrSet<MachineInstr*, 4> &MemOps,
+ SmallPtrSetImpl<MachineInstr*> &MemOps,
SmallSet<unsigned, 4> &MemRegs,
const TargetRegisterInfo *TRI) {
// Are there stores / loads / calls between them?
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 023f5f8e37a0..fd4f5ff3f202 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -119,11 +119,45 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP) {
OutMI.setOpcode(MI->getOpcode());
+ // In the MC layer, we keep modified immediates in their encoded form
+ bool EncodeImms = false;
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::MOVi:
+ case ARM::MVNi:
+ case ARM::CMPri:
+ case ARM::CMNri:
+ case ARM::TSTri:
+ case ARM::TEQri:
+ case ARM::MSRi:
+ case ARM::ADCri:
+ case ARM::ADDri:
+ case ARM::ADDSri:
+ case ARM::SBCri:
+ case ARM::SUBri:
+ case ARM::SUBSri:
+ case ARM::ANDri:
+ case ARM::ORRri:
+ case ARM::EORri:
+ case ARM::BICri:
+ case ARM::RSBri:
+ case ARM::RSBSri:
+ case ARM::RSCri:
+ EncodeImms = true;
+ break;
+ }
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
MCOperand MCOp;
- if (AP.lowerOperand(MO, MCOp))
+ if (AP.lowerOperand(MO, MCOp)) {
+ if (MCOp.isImm() && EncodeImms) {
+ int32_t Enc = ARM_AM::getSOImmVal(MCOp.getImm());
+ if (Enc != -1)
+ MCOp.setImm(Enc);
+ }
OutMI.addOperand(MCOp);
+ }
}
}
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index d3fabc3ebb04..ddfdb5240c2b 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -11,15 +11,15 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMMACHINEFUNCTIONINFO_H
-#define ARMMACHINEFUNCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H
#include "ARMSubtarget.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/DenseMap.h"
namespace llvm {
@@ -48,6 +48,9 @@ class ARMFunctionInfo : public MachineFunctionInfo {
///
unsigned ArgRegsSaveSize;
+ /// ReturnRegsCount - Number of registers used up in the return.
+ unsigned ReturnRegsCount;
+
/// HasStackFrame - True if this function has a stack frame. Set by
/// processFunctionBeforeCalleeSavedScan().
bool HasStackFrame;
@@ -83,6 +86,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// areas.
unsigned GPRCS1Size;
unsigned GPRCS2Size;
+ unsigned DPRCSAlignGapSize;
unsigned DPRCSSize;
/// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in
@@ -127,10 +131,11 @@ public:
ARMFunctionInfo() :
isThumb(false),
hasThumb2(false),
- ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+ ArgRegsSaveSize(0), ReturnRegsCount(0), HasStackFrame(false),
+ RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
- GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
NumAlignedDPRCS2Regs(0),
JumpTableUId(0), PICLabelUId(0),
VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
@@ -151,6 +156,9 @@ public:
}
void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; }
+ unsigned getReturnRegsCount() const { return ReturnRegsCount; }
+ void setReturnRegsCount(unsigned s) { ReturnRegsCount = s; }
+
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
@@ -176,10 +184,12 @@ public:
unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
+ unsigned getDPRCalleeSavedGapSize() const { return DPRCSAlignGapSize; }
unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
+ void setDPRCalleeSavedGapSize(unsigned s) { DPRCSAlignGapSize = s; }
void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
unsigned getArgumentStackSize() const { return ArgumentStackSize; }
@@ -238,4 +248,4 @@ public:
};
} // End llvm namespace
-#endif // ARMMACHINEFUNCTIONINFO_H
+#endif
diff --git a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
index 2a4925572ae8..1c50f9e9acfa 100644
--- a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
+++ b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
@@ -9,8 +9,8 @@
//===------------------------------------------------------------------------------------------===//
#include "ARM.h"
-#include "ARMMachineFunctionInfo.h"
#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
diff --git a/lib/Target/ARM/ARMPerfectShuffle.h b/lib/Target/ARM/ARMPerfectShuffle.h
index efa22fbed9f7..3ff0bee7e5bf 100644
--- a/lib/Target/ARM/ARMPerfectShuffle.h
+++ b/lib/Target/ARM/ARMPerfectShuffle.h
@@ -12,6 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_ARM_ARMPERFECTSHUFFLE_H
+#define LLVM_LIB_TARGET_ARM_ARMPERFECTSHUFFLE_H
+
// 31 entries have cost 0
// 242 entries have cost 1
// 1447 entries have cost 2
@@ -6584,3 +6587,5 @@ static const unsigned PerfectShuffleTable[6561+1] = {
835584U, // <u,u,u,u>: Cost 0 copy LHS
0
};
+
+#endif
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index 3e6af3f04698..b6231735c2c0 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMREGISTERINFO_H
-#define ARMREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMREGISTERINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMREGISTERINFO_H
#include "ARMBaseRegisterInfo.h"
diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h
deleted file mode 100644
index 21877fd9af37..000000000000
--- a/lib/Target/ARM/ARMRelocations.h
+++ /dev/null
@@ -1,62 +0,0 @@
-//===-- ARMRelocations.h - ARM Code Relocations -----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the ARM target-specific relocation types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARMRELOCATIONS_H
-#define ARMRELOCATIONS_H
-
-#include "llvm/CodeGen/MachineRelocation.h"
-
-namespace llvm {
- namespace ARM {
- enum RelocationType {
- // reloc_arm_absolute - Absolute relocation, just add the relocated value
- // to the value already in memory.
- reloc_arm_absolute,
-
- // reloc_arm_relative - PC relative relocation, add the relocated value to
- // the value already in memory, after we adjust it for where the PC is.
- reloc_arm_relative,
-
- // reloc_arm_cp_entry - PC relative relocation for constpool_entry's whose
- // addresses are kept locally in a map.
- reloc_arm_cp_entry,
-
- // reloc_arm_vfp_cp_entry - Same as reloc_arm_cp_entry except the offset
- // should be divided by 4.
- reloc_arm_vfp_cp_entry,
-
- // reloc_arm_machine_cp_entry - Relocation of a ARM machine constantpool
- // entry.
- reloc_arm_machine_cp_entry,
-
- // reloc_arm_jt_base - PC relative relocation for jump tables whose
- // addresses are kept locally in a map.
- reloc_arm_jt_base,
-
- // reloc_arm_pic_jt - PIC jump table entry relocation: dest bb - jt base.
- reloc_arm_pic_jt,
-
- // reloc_arm_branch - Branch address relocation.
- reloc_arm_branch,
-
- // reloc_arm_movt - MOVT immediate relocation.
- reloc_arm_movt,
-
- // reloc_arm_movw - MOVW immediate relocation.
- reloc_arm_movw
- };
- }
-}
-
-#endif
-
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 3dcc0df349d2..fa30ac31a30f 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -157,7 +157,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
return SDValue();
const ARMTargetLowering &TLI =
- *static_cast<const ARMTargetLowering*>(DAG.getTarget().getTargetLowering());
+ *DAG.getTarget().getSubtarget<ARMSubtarget>().getTargetLowering();
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index 13769dc8efe0..94b98e668470 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMSELECTIONDAGINFO_H
-#define ARMSELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMSELECTIONDAGINFO_H
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index c1b4562f4118..311afe909cf1 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -15,13 +15,14 @@
#include "ARMFrameLowering.h"
#include "ARMISelLowering.h"
#include "ARMInstrInfo.h"
-#include "ARMJITInfo.h"
+#include "ARMMachineFunctionInfo.h"
#include "ARMSelectionDAGInfo.h"
#include "ARMSubtarget.h"
-#include "ARMMachineFunctionInfo.h"
+#include "ARMTargetMachine.h"
#include "Thumb1FrameLowering.h"
#include "Thumb1InstrInfo.h"
#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
@@ -29,7 +30,6 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
@@ -50,11 +50,13 @@ static cl::opt<bool>
UseFusedMulOps("arm-use-mulops",
cl::init(true), cl::Hidden);
+namespace {
enum AlignMode {
DefaultAlign,
StrictAlign,
NoStrictAlign
};
+}
static cl::opt<AlignMode>
Align(cl::desc("Load/store alignment support"),
@@ -101,11 +103,6 @@ static std::string computeDataLayout(ARMSubtarget &ST) {
// Pointers are 32 bits and aligned to 32 bits.
Ret += "-p:32:32";
- // On thumb, i16,i18 and i1 have natural aligment requirements, but we try to
- // align to 32.
- if (ST.isThumb())
- Ret += "-i1:8:32-i8:8:32-i16:16:32";
-
// ABIs other than APCS have 64 bit integers with natural alignment.
if (!ST.isAPCS_ABI())
Ret += "-i64:64";
@@ -122,10 +119,9 @@ static std::string computeDataLayout(ARMSubtarget &ST) {
else
Ret += "-v128:64:128";
- // On thumb and APCS, only try to align aggregates to 32 bits (the default is
- // 64 bits).
- if (ST.isThumb() || ST.isAPCS_ABI())
- Ret += "-a:0:32";
+ // Try to align aggregates to 32 bits (the default is 64 bits, which has no
+ // particular hardware support on 32-bit ARM).
+ Ret += "-a:0:32";
// Integer registers are 32 bits.
Ret += "-n32";
@@ -147,18 +143,18 @@ static std::string computeDataLayout(ARMSubtarget &ST) {
ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
initializeEnvironment();
- resetSubtargetFeatures(CPU, FS);
+ initSubtargetFeatures(CPU, FS);
return *this;
}
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, TargetMachine &TM,
- bool IsLittle, const TargetOptions &Options)
+ const std::string &FS, const ARMBaseTargetMachine &TM,
+ bool IsLittle)
: ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle),
- TargetTriple(TT), Options(Options), TargetABI(ARM_ABI_UNKNOWN),
+ TargetTriple(TT), Options(TM.Options), TM(TM),
DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))),
- TSInfo(DL), JITInfo(),
+ TSInfo(DL),
InstrInfo(isThumb1Only()
? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this)
: !isThumb()
@@ -218,25 +214,9 @@ void ARMSubtarget::initializeEnvironment() {
UnsafeFPMath = false;
}
-void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
- AttributeSet FnAttrs = MF->getFunction()->getAttributes();
- Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
- "target-cpu");
- Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
- "target-features");
- std::string CPU =
- !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : "";
- std::string FS =
- !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
- if (!FS.empty()) {
- initializeEnvironment();
- resetSubtargetFeatures(CPU, FS);
- }
-}
-
-void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
+void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (CPUString.empty()) {
- if (isTargetIOS() && TargetTriple.getArchName().endswith("v7s"))
+ if (isTargetDarwin() && TargetTriple.getArchName().endswith("v7s"))
// Default to the Swift CPU when targeting armv7s/thumbv7s.
CPUString = "swift";
else
@@ -246,8 +226,8 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// Insert the architecture feature derived from the target triple into the
// feature string. This is important for setting features that are implied
// based on the architecture version.
- std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple.getTriple(),
- CPUString);
+ std::string ArchFS =
+ ARM_MC::ParseARMTriple(TargetTriple.getTriple(), CPUString);
if (!FS.empty()) {
if (!ArchFS.empty())
ArchFS = ArchFS + "," + FS.str();
@@ -266,31 +246,9 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
- if (TargetABI == ARM_ABI_UNKNOWN) {
- switch (TargetTriple.getEnvironment()) {
- case Triple::Android:
- case Triple::EABI:
- case Triple::EABIHF:
- case Triple::GNUEABI:
- case Triple::GNUEABIHF:
- TargetABI = ARM_ABI_AAPCS;
- break;
- default:
- if ((isTargetIOS() && isMClass()) ||
- (TargetTriple.isOSBinFormatMachO() &&
- TargetTriple.getOS() == Triple::UnknownOS))
- TargetABI = ARM_ABI_AAPCS;
- else
- TargetABI = ARM_ABI_APCS;
- break;
- }
- }
-
// FIXME: this is invalid for WindowsCE
- if (isTargetWindows()) {
- TargetABI = ARM_ABI_AAPCS;
+ if (isTargetWindows())
NoARM = true;
- }
if (isAAPCS_ABI())
stackAlignment = 8;
@@ -300,46 +258,39 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
UseMovt = hasV6T2Ops() && ArmUseMOVT;
if (isTargetMachO()) {
- IsR9Reserved = ReserveR9 | !HasV6Ops;
+ IsR9Reserved = ReserveR9 || !HasV6Ops;
SupportsTailCall = !isTargetIOS() || !getTargetTriple().isOSVersionLT(5, 0);
} else {
IsR9Reserved = ReserveR9;
SupportsTailCall = !isThumb1Only();
}
- switch (Align) {
- case DefaultAlign:
- // Assume pre-ARMv6 doesn't support unaligned accesses.
- //
- // ARMv6 may or may not support unaligned accesses depending on the
- // SCTLR.U bit, which is architecture-specific. We assume ARMv6
- // Darwin and NetBSD targets support unaligned accesses, and others don't.
- //
- // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit
- // which raises an alignment fault on unaligned accesses. Linux
- // defaults this bit to 0 and handles it as a system-wide (not
- // per-process) setting. It is therefore safe to assume that ARMv7+
- // Linux targets support unaligned accesses. The same goes for NaCl.
- //
- // The above behavior is consistent with GCC.
- AllowsUnalignedMem =
- (hasV7Ops() && (isTargetLinux() || isTargetNaCl() ||
- isTargetNetBSD())) ||
- (hasV6Ops() && (isTargetMachO() || isTargetNetBSD()));
- // The one exception is cortex-m0, which despite being v6, does not
- // support unaligned accesses. Rather than make the above boolean
- // expression even more obtuse, just override the value here.
- if (isThumb1Only() && isMClass())
- AllowsUnalignedMem = false;
- break;
- case StrictAlign:
- AllowsUnalignedMem = false;
- break;
- case NoStrictAlign:
- AllowsUnalignedMem = true;
- break;
+ if (Align == DefaultAlign) {
+ // Assume pre-ARMv6 doesn't support unaligned accesses.
+ //
+ // ARMv6 may or may not support unaligned accesses depending on the
+ // SCTLR.U bit, which is architecture-specific. We assume ARMv6
+ // Darwin and NetBSD targets support unaligned accesses, and others don't.
+ //
+ // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit
+ // which raises an alignment fault on unaligned accesses. Linux
+ // defaults this bit to 0 and handles it as a system-wide (not
+ // per-process) setting. It is therefore safe to assume that ARMv7+
+ // Linux targets support unaligned accesses. The same goes for NaCl.
+ //
+ // The above behavior is consistent with GCC.
+ AllowsUnalignedMem =
+ (hasV7Ops() && (isTargetLinux() || isTargetNaCl() ||
+ isTargetNetBSD())) ||
+ (hasV6Ops() && (isTargetMachO() || isTargetNetBSD()));
+ } else {
+ AllowsUnalignedMem = !(Align == StrictAlign);
}
+ // No v6M core supports unaligned memory access (v6M ARM ARM A3.2)
+ if (isV6M())
+ AllowsUnalignedMem = false;
+
switch (IT) {
case DefaultIT:
RestrictIT = hasV8Ops() ? true : false;
@@ -359,6 +310,15 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
UseNEONForSinglePrecisionFP = true;
}
+bool ARMSubtarget::isAPCS_ABI() const {
+ assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
+ return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_APCS;
+}
+bool ARMSubtarget::isAAPCS_ABI() const {
+ assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
+ return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS;
+}
+
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
bool
ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
@@ -366,11 +326,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
if (RelocM == Reloc::Static)
return false;
- // Materializable GVs (in JIT lazy compilation mode) do not require an extra
- // load from stub.
- bool isDecl = GV->hasAvailableExternallyLinkage();
- if (GV->isDeclaration() && !GV->isMaterializable())
- isDecl = true;
+ bool isDecl = GV->isDeclarationForLinker();
if (!isTargetMachO()) {
// Extra load is needed for all externally visible.
@@ -413,12 +369,11 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
}
unsigned ARMSubtarget::getMispredictionPenalty() const {
- return SchedModel->MispredictPenalty;
+ return SchedModel.MispredictPenalty;
}
bool ARMSubtarget::hasSinCos() const {
- return getTargetTriple().getOS() == Triple::IOS &&
- !getTargetTriple().isOSVersionLT(7, 0);
+ return getTargetTriple().isiOS() && !getTargetTriple().isOSVersionLT(7, 0);
}
// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
@@ -426,7 +381,7 @@ bool ARMSubtarget::enablePostMachineScheduler() const {
return (!isThumb() || hasThumb2());
}
-bool ARMSubtarget::enableAtomicExpandLoadLinked() const {
+bool ARMSubtarget::enableAtomicExpand() const {
return hasAnyDataBarrier() && !isThumb1Only();
}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index f8283b08d485..dbacd4d6aada 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -11,21 +11,19 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMSUBTARGET_H
-#define ARMSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
+#define LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
#include "ARMFrameLowering.h"
#include "ARMISelLowering.h"
#include "ARMInstrInfo.h"
-#include "ARMJITInfo.h"
#include "ARMSelectionDAGInfo.h"
#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "Thumb1FrameLowering.h"
#include "Thumb1InstrInfo.h"
#include "Thumb2InstrInfo.h"
-#include "ARMJITInfo.h"
-#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCInstrItineraries.h"
@@ -39,12 +37,13 @@ namespace llvm {
class GlobalValue;
class StringRef;
class TargetOptions;
+class ARMBaseTargetMachine;
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
- CortexR5, Swift, CortexA53, CortexA57, Krait
+ CortexA17, CortexR5, Swift, CortexA53, CortexA57, Krait,
};
enum ARMProcClassEnum {
None, AClass, RClass, MClass
@@ -188,7 +187,7 @@ protected:
/// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
/// accesses for some types. For details, see
- /// ARMTargetLowering::allowsUnalignedMemoryAccesses().
+ /// ARMTargetLowering::allowsMisalignedMemoryAccesses().
bool AllowsUnalignedMem;
/// RestrictIT - If true, the subtarget disallows generation of deprecated IT
@@ -219,7 +218,7 @@ protected:
Triple TargetTriple;
/// SchedModel - Processor specific instruction costs.
- const MCSchedModel *SchedModel;
+ MCSchedModel SchedModel;
/// Selected instruction itineraries (one entry per itinerary class.)
InstrItineraryData InstrItins;
@@ -227,19 +226,14 @@ protected:
/// Options passed via command line that could influence the target
const TargetOptions &Options;
- public:
- enum {
- ARM_ABI_UNKNOWN,
- ARM_ABI_APCS,
- ARM_ABI_AAPCS // ARM EABI
- } TargetABI;
+ const ARMBaseTargetMachine &TM;
+public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, TargetMachine &TM, bool IsLittle,
- const TargetOptions &Options);
+ const std::string &FS, const ARMBaseTargetMachine &TM, bool IsLittle);
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
@@ -250,27 +244,30 @@ protected:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- /// \brief Reset the features for the ARM target.
- void resetSubtargetFeatures(const MachineFunction *MF) override;
-
/// initializeSubtargetDependencies - Initializes using a CPU and feature string
/// so that we can use initializer lists for subtarget initialization.
ARMSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
- const DataLayout *getDataLayout() const { return &DL; }
- const ARMSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
- ARMJITInfo *getJITInfo() { return &JITInfo; }
- const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo.get(); }
- const ARMTargetLowering *getTargetLowering() const { return &TLInfo; }
- const ARMFrameLowering *getFrameLowering() const { return FrameLowering.get(); }
- const ARMBaseRegisterInfo *getRegisterInfo() const {
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const ARMBaseInstrInfo *getInstrInfo() const override {
+ return InstrInfo.get();
+ }
+ const ARMTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const ARMFrameLowering *getFrameLowering() const override {
+ return FrameLowering.get();
+ }
+ const ARMBaseRegisterInfo *getRegisterInfo() const override {
return &InstrInfo->getRegisterInfo();
}
private:
const DataLayout DL;
ARMSelectionDAGInfo TSInfo;
- ARMJITInfo JITInfo;
// Either Thumb1InstrInfo or Thumb2InstrInfo.
std::unique_ptr<ARMBaseInstrInfo> InstrInfo;
ARMTargetLowering TLInfo;
@@ -278,7 +275,7 @@ private:
std::unique_ptr<ARMFrameLowering> FrameLowering;
void initializeEnvironment();
- void resetSubtargetFeatures(StringRef CPU, StringRef FS);
+ void initSubtargetFeatures(StringRef CPU, StringRef FS);
public:
void computeIssueWidth();
@@ -347,7 +344,7 @@ public:
bool isTargetIOS() const { return TargetTriple.isiOS(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
- bool isTargetNetBSD() const { return TargetTriple.getOS() == Triple::NetBSD; }
+ bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); }
bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
@@ -388,14 +385,8 @@ public:
return TargetTriple.getEnvironment() == Triple::Android;
}
- bool isAPCS_ABI() const {
- assert(TargetABI != ARM_ABI_UNKNOWN);
- return TargetABI == ARM_ABI_APCS;
- }
- bool isAAPCS_ABI() const {
- assert(TargetABI != ARM_ABI_UNKNOWN);
- return TargetABI == ARM_ABI_AAPCS;
- }
+ bool isAPCS_ABI() const;
+ bool isAAPCS_ABI() const;
bool isThumb() const { return InThumbMode; }
bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
@@ -405,6 +396,10 @@ public:
bool isRClass() const { return ARMProcClass == RClass; }
bool isAClass() const { return ARMProcClass == AClass; }
+ bool isV6M() const {
+ return isThumb1Only() && isMClass();
+ }
+
bool isR9Reserved() const { return IsR9Reserved; }
bool useMovt(const MachineFunction &MF) const;
@@ -428,12 +423,14 @@ public:
/// True for some subtargets at > -O0.
bool enablePostMachineScheduler() const override;
- // enableAtomicExpandLoadLinked - True if we need to expand our atomics.
- bool enableAtomicExpandLoadLinked() const override;
+ // enableAtomicExpand- True if we need to expand our atomics.
+ bool enableAtomicExpand() const override;
- /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// getInstrItins - Return the instruction itineraries based on subtarget
/// selection.
- const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index d85194b75ecb..7a8181b7528f 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -11,9 +11,11 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
-#include "ARMTargetMachine.h"
#include "ARMFrameLowering.h"
+#include "ARMTargetMachine.h"
+#include "ARMTargetObjectFile.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
@@ -42,6 +44,65 @@ extern "C" void LLVMInitializeARMTarget() {
RegisterTargetMachine<ThumbBETargetMachine> B(TheThumbBETarget);
}
+static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
+ if (TT.isOSBinFormatMachO())
+ return make_unique<TargetLoweringObjectFileMachO>();
+ if (TT.isOSWindows())
+ return make_unique<TargetLoweringObjectFileCOFF>();
+ return make_unique<ARMElfTargetObjectFile>();
+}
+
+static ARMBaseTargetMachine::ARMABI
+computeTargetABI(const Triple &TT, StringRef CPU,
+ const TargetOptions &Options) {
+ if (Options.MCOptions.getABIName().startswith("aapcs"))
+ return ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ else if (Options.MCOptions.getABIName().startswith("apcs"))
+ return ARMBaseTargetMachine::ARM_ABI_APCS;
+
+ assert(Options.MCOptions.getABIName().empty() &&
+ "Unknown target-abi option!");
+
+ ARMBaseTargetMachine::ARMABI TargetABI =
+ ARMBaseTargetMachine::ARM_ABI_UNKNOWN;
+
+ // FIXME: This is duplicated code from the front end and should be unified.
+ if (TT.isOSBinFormatMachO()) {
+ if (TT.getEnvironment() == llvm::Triple::EABI ||
+ (TT.getOS() == llvm::Triple::UnknownOS &&
+ TT.getObjectFormat() == llvm::Triple::MachO) ||
+ CPU.startswith("cortex-m")) {
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ } else {
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
+ }
+ } else if (TT.isOSWindows()) {
+ // FIXME: this is invalid for WindowsCE
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ } else {
+ // Select the default based on the platform.
+ switch (TT.getEnvironment()) {
+ case llvm::Triple::Android:
+ case llvm::Triple::GNUEABI:
+ case llvm::Triple::GNUEABIHF:
+ case llvm::Triple::EABIHF:
+ case llvm::Triple::EABI:
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ break;
+ case llvm::Triple::GNU:
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
+ break;
+ default:
+ if (TT.getOS() == llvm::Triple::NetBSD)
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
+ else
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ break;
+ }
+ }
+
+ return TargetABI;
+}
/// TargetMachine ctor - Create an ARM architecture model.
///
@@ -51,7 +112,9 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool isLittle)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, *this, isLittle, Options) {
+ TargetABI(computeTargetABI(Triple(TT), CPU, Options)),
+ TLOF(createTLOF(Triple(getTargetTriple()))),
+ Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) {
// Default to triple-appropriate float ABI
if (Options.FloatABIType == FloatABI::Default)
@@ -59,6 +122,46 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft;
}
+ARMBaseTargetMachine::~ARMBaseTargetMachine() {}
+
+const ARMSubtarget *
+ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
+ AttributeSet FnAttrs = F.getAttributes();
+ Attribute CPUAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+ Attribute FSAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+
+ // FIXME: This is related to the code below to reset the target options,
+ // we need to know whether or not the soft float flag is set on the
+ // function before we can generate a subtarget. We also need to use
+ // it as a key for the subtarget since that can be the only difference
+ // between two functions.
+ Attribute SFAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "use-soft-float");
+ bool SoftFloat = !SFAttr.hasAttribute(Attribute::None)
+ ? SFAttr.getValueAsString() == "true"
+ : Options.UseSoftFloat;
+
+ auto &I = SubtargetMap[CPU + FS + (SoftFloat ? "use-soft-float=true"
+ : "use-soft-float=false")];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle);
+ }
+ return I.get();
+}
+
void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
// Add first the target-independent BasicTTI pass, then our ARM pass. This
// allows the ARM pass to delegate to the target independent layer when
@@ -147,9 +250,9 @@ public:
void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
- bool addPreRegAlloc() override;
- bool addPreSched2() override;
- bool addPreEmitPass() override;
+ void addPreRegAlloc() override;
+ void addPreSched2() override;
+ void addPreEmitPass() override;
};
} // namespace
@@ -158,7 +261,10 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
}
void ARMPassConfig::addIRPasses() {
- addPass(createAtomicExpandLoadLinkedPass(TM));
+ if (TM->Options.ThreadModel == ThreadModel::Single)
+ addPass(createLowerAtomicPass());
+ else
+ addPass(createAtomicExpandPass(TM));
// Cmpxchg instructions are often used with a subsequent comparison to
// determine whether it succeeded. We can exploit existing control-flow in
@@ -188,7 +294,7 @@ bool ARMPassConfig::addInstSelector() {
return false;
}
-bool ARMPassConfig::addPreRegAlloc() {
+void ARMPassConfig::addPreRegAlloc() {
if (getOptLevel() != CodeGenOpt::None)
addPass(createARMLoadStoreOptimizationPass(true));
if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9())
@@ -199,13 +305,11 @@ bool ARMPassConfig::addPreRegAlloc() {
getARMSubtarget().hasNEON() && !DisableA15SDOptimization) {
addPass(createA15SDOptimizerPass());
}
- return true;
}
-bool ARMPassConfig::addPreSched2() {
+void ARMPassConfig::addPreSched2() {
if (getOptLevel() != CodeGenOpt::None) {
addPass(createARMLoadStoreOptimizationPass());
- printAndVerify("After ARM load / store optimizer");
if (getARMSubtarget().hasNEON())
addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
@@ -226,11 +330,9 @@ bool ARMPassConfig::addPreSched2() {
}
if (getARMSubtarget().isThumb2())
addPass(createThumb2ITBlockPass());
-
- return true;
}
-bool ARMPassConfig::addPreEmitPass() {
+void ARMPassConfig::addPreEmitPass() {
if (getARMSubtarget().isThumb2()) {
if (!getARMSubtarget().prefers32BitThumb())
addPass(createThumb2SizeReductionPass());
@@ -241,13 +343,4 @@ bool ARMPassConfig::addPreEmitPass() {
addPass(createARMOptimizeBarriersPass());
addPass(createARMConstantIslandPass());
-
- return true;
-}
-
-bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
- JITCodeEmitter &JCE) {
- // Machine code emitter pass for ARM.
- PM.add(createARMJITCodeEmitterPass(*this, JCE));
- return false;
}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index b72b1df4af83..18cf5fa0fa06 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMTARGETMACHINE_H
-#define ARMTARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H
+#define LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H
#include "ARMInstrInfo.h"
#include "ARMSubtarget.h"
@@ -22,8 +22,19 @@
namespace llvm {
class ARMBaseTargetMachine : public LLVMTargetMachine {
+public:
+ enum ARMABI {
+ ARM_ABI_UNKNOWN,
+ ARM_ABI_APCS,
+ ARM_ABI_AAPCS // ARM EABI
+ } TargetABI;
+
protected:
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
ARMSubtarget Subtarget;
+ bool isLittle;
+ mutable StringMap<std::unique_ptr<ARMSubtarget>> SubtargetMap;
+
public:
ARMBaseTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
@@ -31,30 +42,10 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool isLittle);
+ ~ARMBaseTargetMachine() override;
const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; }
- const ARMBaseRegisterInfo *getRegisterInfo() const override {
- return getSubtargetImpl()->getRegisterInfo();
- }
- const ARMTargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
- const ARMBaseInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const ARMFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
- const InstrItineraryData *getInstrItineraryData() const override {
- return &getSubtargetImpl()->getInstrItineraryData();
- }
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
- ARMJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
+ const ARMSubtarget *getSubtargetImpl(const Function &F) const override;
/// \brief Register ARM analysis passes with a pass manager.
void addAnalysisPasses(PassManagerBase &PM) override;
@@ -62,7 +53,9 @@ public:
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE) override;
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
};
/// ARMTargetMachine - ARM target machine.
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index c926421848e5..98e8763c4705 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_ARM_TARGETOBJECTFILE_H
-#define LLVM_TARGET_ARM_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index a2ace629baa1..ec834e8da599 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -49,7 +49,7 @@ public:
ARMTTI(const ARMBaseTargetMachine *TM)
: ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
- TLI(TM->getTargetLowering()) {
+ TLI(TM->getSubtargetImpl()->getTargetLowering()) {
initializeARMTTIPass(*PassRegistry::getPassRegistry());
}
@@ -104,7 +104,7 @@ public:
return 32;
}
- unsigned getMaximumUnrollFactor() const override {
+ unsigned getMaxInterleaveFactor() const override {
// These are out of order CPUs:
if (ST->isCortexA15() || ST->isSwift())
return 2;
@@ -126,10 +126,11 @@ public:
unsigned getAddressComputationCost(Type *Val,
bool IsComplex) const override;
- unsigned
- getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind Op1Info = OK_AnyValue,
- OperandValueKind Op2Info = OK_AnyValue) const override;
+ unsigned getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Op1Info = OK_AnyValue,
+ OperandValueKind Op2Info = OK_AnyValue,
+ OperandValueProperties Opd1PropInfo = OP_None,
+ OperandValueProperties Opd2PropInfo = OP_None) const override;
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace) const override;
@@ -389,6 +390,13 @@ unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy,
ValTy->getScalarSizeInBits() <= 32)
return 3;
+ // Cross-class copies are expensive on many microarchitectures,
+ // so assume they are expensive by default.
+ if ((Opcode == Instruction::InsertElement ||
+ Opcode == Instruction::ExtractElement) &&
+ ValTy->getVectorElementType()->isIntegerTy())
+ return 3;
+
return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index);
}
@@ -497,9 +505,10 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
}
-unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind Op1Info,
- OperandValueKind Op2Info) const {
+unsigned ARMTTI::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+ OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
+ OperandValueProperties Opd2PropInfo) const {
int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
@@ -555,8 +564,8 @@ unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
if (Idx != -1)
return LT.first * CostTbl[Idx].Cost;
- unsigned Cost =
- TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
+ unsigned Cost = TargetTransformInfo::getArithmeticInstrCost(
+ Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
// This is somewhat of a hack. The problem that we are facing is that SROA
// creates a sequence of shift, and, or instructions to construct values.
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index b62706c45fbf..6a00b28f37a7 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -129,12 +129,13 @@ public:
class ARMAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
- MCAsmParser &Parser;
const MCInstrInfo &MII;
const MCRegisterInfo *MRI;
UnwindContext UC;
ARMTargetStreamer &getTargetStreamer() {
+ assert(getParser().getStreamer().getTargetStreamer() &&
+ "do not have a target streamer");
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<ARMTargetStreamer &>(TS);
}
@@ -163,7 +164,10 @@ class ARMAsmParser : public MCTargetAsmParser {
// according to count of instructions in block.
// ~0U if no active IT block.
} ITState;
- bool inITBlock() { return ITState.CurPosition != ~0U;}
+ bool inITBlock() { return ITState.CurPosition != ~0U; }
+ bool lastInITBlock() {
+ return ITState.CurPosition == 4 - countTrailingZeros(ITState.Mask);
+ }
void forwardITPosition() {
if (!inITBlock()) return;
// Move to the next instruction in the IT block, if there is one. If not,
@@ -173,22 +177,23 @@ class ARMAsmParser : public MCTargetAsmParser {
ITState.CurPosition = ~0U; // Done with the IT block after this.
}
-
- MCAsmParser &getParser() const { return Parser; }
- MCAsmLexer &getLexer() const { return Parser.getLexer(); }
-
void Note(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges = None) {
- return Parser.Note(L, Msg, Ranges);
+ return getParser().Note(L, Msg, Ranges);
}
bool Warning(SMLoc L, const Twine &Msg,
ArrayRef<SMRange> Ranges = None) {
- return Parser.Warning(L, Msg, Ranges);
+ return getParser().Warning(L, Msg, Ranges);
}
bool Error(SMLoc L, const Twine &Msg,
ArrayRef<SMRange> Ranges = None) {
- return Parser.Error(L, Msg, Ranges);
+ return getParser().Error(L, Msg, Ranges);
}
+ bool validatetLDMRegList(MCInst Inst, const OperandVector &Operands,
+ unsigned ListNo, bool IsARPop = false);
+ bool validatetSTMRegList(MCInst Inst, const OperandVector &Operands,
+ unsigned ListNo);
+
int tryParseRegister();
bool tryParseRegisterWithWriteBack(OperandVector &);
int tryParseShiftRegister(OperandVector &);
@@ -265,9 +270,15 @@ class ARMAsmParser : public MCTargetAsmParser {
bool hasARM() const {
return !(STI.getFeatureBits() & ARM::FeatureNoARM);
}
+ bool hasThumb2DSP() const {
+ return STI.getFeatureBits() & ARM::FeatureDSPThumb2;
+ }
+ bool hasD16() const {
+ return STI.getFeatureBits() & ARM::FeatureD16;
+ }
void SwitchMode() {
- unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
+ uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
setAvailableFeatures(FB);
}
bool isMClass() const {
@@ -290,6 +301,7 @@ class ARMAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseInstSyncBarrierOptOperand(OperandVector &);
OperandMatchResultTy parseProcIFlagsOperand(OperandVector &);
OperandMatchResultTy parseMSRMaskOperand(OperandVector &);
+ OperandMatchResultTy parseBankedRegOperand(OperandVector &);
OperandMatchResultTy parsePKHImm(OperandVector &O, StringRef Op, int Low,
int High);
OperandMatchResultTy parsePKHLSLImm(OperandVector &O) {
@@ -301,6 +313,7 @@ class ARMAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseSetEndImm(OperandVector &);
OperandMatchResultTy parseShifterImm(OperandVector &);
OperandMatchResultTy parseRotImm(OperandVector &);
+ OperandMatchResultTy parseModImm(OperandVector &);
OperandMatchResultTy parseBitfield(OperandVector &);
OperandMatchResultTy parsePostIdxReg(OperandVector &);
OperandMatchResultTy parseAM3Offset(OperandVector &);
@@ -314,7 +327,7 @@ class ARMAsmParser : public MCTargetAsmParser {
void cvtThumbBranches(MCInst &Inst, const OperandVector &);
bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
- bool processInstruction(MCInst &Inst, const OperandVector &Ops);
+ bool processInstruction(MCInst &Inst, const OperandVector &Ops, MCStreamer &Out);
bool shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands);
bool shouldOmitPredicateOperand(StringRef Mnemonic, OperandVector &Operands);
@@ -329,10 +342,9 @@ public:
};
- ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &MII,
- const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(MII), UC(_Parser) {
+ ARMAsmParser(MCSubtargetInfo & _STI, MCAsmParser & _Parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(_STI), MII(MII), UC(_Parser) {
MCAsmParserExtension::Initialize(_Parser);
// Cache the MCRegisterInfo.
@@ -359,7 +371,7 @@ public:
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
void onLabelParsed(MCSymbol *Symbol) override;
};
@@ -383,6 +395,7 @@ class ARMOperand : public MCParsedAsmOperand {
k_Memory,
k_PostIndexRegister,
k_MSRMask,
+ k_BankedReg,
k_ProcIFlags,
k_VectorIndex,
k_Register,
@@ -396,6 +409,7 @@ class ARMOperand : public MCParsedAsmOperand {
k_ShiftedImmediate,
k_ShifterImmediate,
k_RotateImmediate,
+ k_ModifiedImmediate,
k_BitfieldDescriptor,
k_Token
} Kind;
@@ -435,6 +449,10 @@ class ARMOperand : public MCParsedAsmOperand {
unsigned Val;
};
+ struct BankedRegOp {
+ unsigned Val;
+ };
+
struct TokOp {
const char *Data;
unsigned Length;
@@ -503,6 +521,11 @@ class ARMOperand : public MCParsedAsmOperand {
unsigned Imm;
};
+ struct ModImmOp {
+ unsigned Bits;
+ unsigned Rot;
+ };
+
struct BitfieldOp {
unsigned LSB;
unsigned Width;
@@ -517,6 +540,7 @@ class ARMOperand : public MCParsedAsmOperand {
struct ITMaskOp ITMask;
struct IFlagsOp IFlags;
struct MMaskOp MMask;
+ struct BankedRegOp BankedReg;
struct TokOp Tok;
struct RegOp Reg;
struct VectorListOp VectorList;
@@ -528,6 +552,7 @@ class ARMOperand : public MCParsedAsmOperand {
struct RegShiftedRegOp RegShiftedReg;
struct RegShiftedImmOp RegShiftedImm;
struct RotImmOp RotImm;
+ struct ModImmOp ModImm;
struct BitfieldOp Bitfield;
};
@@ -585,6 +610,9 @@ public:
case k_MSRMask:
MMask = o.MMask;
break;
+ case k_BankedReg:
+ BankedReg = o.BankedReg;
+ break;
case k_ProcIFlags:
IFlags = o.IFlags;
break;
@@ -600,6 +628,9 @@ public:
case k_RotateImmediate:
RotImm = o.RotImm;
break;
+ case k_ModifiedImmediate:
+ ModImm = o.ModImm;
+ break;
case k_BitfieldDescriptor:
Bitfield = o.Bitfield;
break;
@@ -679,6 +710,11 @@ public:
return MMask.Val;
}
+ unsigned getBankedReg() const {
+ assert(Kind == k_BankedReg && "Invalid access!");
+ return BankedReg.Val;
+ }
+
bool isCoprocNum() const { return Kind == k_CoprocNum; }
bool isCoprocReg() const { return Kind == k_CoprocReg; }
bool isCoprocOption() const { return Kind == k_CoprocOption; }
@@ -1003,33 +1039,17 @@ public:
}
bool isAdrLabel() const {
// If we have an immediate that's not a constant, treat it as a label
- // reference needing a fixup. If it is a constant, but it can't fit
- // into shift immediate encoding, we reject it.
- if (isImm() && !isa<MCConstantExpr>(getImm())) return true;
- else return (isARMSOImm() || isARMSOImmNeg());
- }
- bool isARMSOImm() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return ARM_AM::getSOImmVal(Value) != -1;
- }
- bool isARMSOImmNot() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return ARM_AM::getSOImmVal(~Value) != -1;
- }
- bool isARMSOImmNeg() const {
+ // reference needing a fixup.
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
+ return true;
+
+ // If it is a constant, it must fit into a modified immediate encoding.
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
- // Only use this when not representable as a plain so_imm.
- return ARM_AM::getSOImmVal(Value) == -1 &&
- ARM_AM::getSOImmVal(-Value) != -1;
+ return (ARM_AM::getSOImmVal(Value) != -1 ||
+ ARM_AM::getSOImmVal(-Value) != -1);;
}
bool isT2SOImm() const {
if (!isImm()) return false;
@@ -1074,6 +1094,22 @@ public:
bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; }
bool isRegShiftedImm() const { return Kind == k_ShiftedImmediate; }
bool isRotImm() const { return Kind == k_RotateImmediate; }
+ bool isModImm() const { return Kind == k_ModifiedImmediate; }
+ bool isModImmNot() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getSOImmVal(~Value) != -1;
+ }
+ bool isModImmNeg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getSOImmVal(Value) == -1 &&
+ ARM_AM::getSOImmVal(-Value) != -1;
+ }
bool isBitfield() const { return Kind == k_BitfieldDescriptor; }
bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; }
bool isPostIdxReg() const {
@@ -1384,6 +1420,7 @@ public:
}
bool isMSRMask() const { return Kind == k_MSRMask; }
+ bool isBankedReg() const { return Kind == k_BankedReg; }
bool isProcIFlags() const { return Kind == k_ProcIFlags; }
// NEON operands.
@@ -1601,9 +1638,18 @@ public:
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
// Must be a constant.
if (!CE) return false;
- int64_t Value = CE->getValue();
- // i16 value in the range [0,255] or [0x0100, 0xff00]
- return (Value >= 0 && Value < 256) || (Value >= 0x0100 && Value <= 0xff00);
+ unsigned Value = CE->getValue();
+ return ARM_AM::isNEONi16splat(Value);
+ }
+
+ bool isNEONi16splatNot() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ unsigned Value = CE->getValue();
+ return ARM_AM::isNEONi16splat(~Value & 0xffff);
}
bool isNEONi32splat() const {
@@ -1614,12 +1660,18 @@ public:
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
// Must be a constant.
if (!CE) return false;
- int64_t Value = CE->getValue();
- // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X.
- return (Value >= 0 && Value < 256) ||
- (Value >= 0x0100 && Value <= 0xff00) ||
- (Value >= 0x010000 && Value <= 0xff0000) ||
- (Value >= 0x01000000 && Value <= 0xff000000);
+ unsigned Value = CE->getValue();
+ return ARM_AM::isNEONi32splat(Value);
+ }
+
+ bool isNEONi32splatNot() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ unsigned Value = CE->getValue();
+ return ARM_AM::isNEONi32splat(~Value);
}
bool isNEONByteReplicate(unsigned NumBytes) const {
@@ -1655,6 +1707,7 @@ public:
int64_t Value = CE->getValue();
// i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X,
// for VMOV/VMVN only, 00Xf or 0Xff are also accepted.
+ // FIXME: This is probably wrong and a copy and paste from previous example
return (Value >= 0 && Value < 256) ||
(Value >= 0x0100 && Value <= 0xff00) ||
(Value >= 0x010000 && Value <= 0xff0000) ||
@@ -1670,6 +1723,7 @@ public:
int64_t Value = ~CE->getValue();
// i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X,
// for VMOV/VMVN only, 00Xf or 0Xff are also accepted.
+ // FIXME: This is probably wrong and a copy and paste from previous example
return (Value >= 0 && Value < 256) ||
(Value >= 0x0100 && Value <= 0xff00) ||
(Value >= 0x010000 && Value <= 0xff0000) ||
@@ -1791,6 +1845,30 @@ public:
Inst.addOperand(MCOperand::CreateImm(RotImm.Imm >> 3));
}
+ void addModImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ // Support for fixups (MCFixup)
+ if (isImm())
+ return addImmOperands(Inst, N);
+
+ Inst.addOperand(MCOperand::CreateImm(ModImm.Bits | (ModImm.Rot << 7)));
+ }
+
+ void addModImmNotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ uint32_t Enc = ARM_AM::getSOImmVal(~CE->getValue());
+ Inst.addOperand(MCOperand::CreateImm(Enc));
+ }
+
+ void addModImmNegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ uint32_t Enc = ARM_AM::getSOImmVal(-CE->getValue());
+ Inst.addOperand(MCOperand::CreateImm(Enc));
+ }
+
void addBitfieldOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// Munge the lsb/width into a bitfield mask.
@@ -1947,22 +2025,6 @@ public:
Inst.addOperand(MCOperand::CreateImm(Memory.OffsetImm->getValue()));
}
- void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- // The operand is actually a so_imm, but we have its bitwise
- // negation in the assembly source, so twiddle it here.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(~CE->getValue()));
- }
-
- void addARMSOImmNegOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- // The operand is actually a so_imm, but we have its
- // negation in the assembly source, so twiddle it here.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
- }
-
void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
@@ -2334,6 +2396,11 @@ public:
Inst.addOperand(MCOperand::CreateImm(unsigned(getMSRMask())));
}
+ void addBankedRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getBankedReg())));
+ }
+
void addProcIFlagsOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
@@ -2378,10 +2445,16 @@ public:
// The immediate encodes the type of constant as well as the value.
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
unsigned Value = CE->getValue();
- if (Value >= 256)
- Value = (Value >> 8) | 0xa00;
- else
- Value |= 0x800;
+ Value = ARM_AM::encodeNEONi16splat(Value);
+ Inst.addOperand(MCOperand::CreateImm(Value));
+ }
+
+ void addNEONi16splatNotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ Value = ARM_AM::encodeNEONi16splat(~Value & 0xffff);
Inst.addOperand(MCOperand::CreateImm(Value));
}
@@ -2390,12 +2463,16 @@ public:
// The immediate encodes the type of constant as well as the value.
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
unsigned Value = CE->getValue();
- if (Value >= 256 && Value <= 0xff00)
- Value = (Value >> 8) | 0x200;
- else if (Value > 0xffff && Value <= 0xff0000)
- Value = (Value >> 16) | 0x400;
- else if (Value > 0xffffff)
- Value = (Value >> 24) | 0x600;
+ Value = ARM_AM::encodeNEONi32splat(Value);
+ Inst.addOperand(MCOperand::CreateImm(Value));
+ }
+
+ void addNEONi32splatNotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ Value = ARM_AM::encodeNEONi32splat(~Value);
Inst.addOperand(MCOperand::CreateImm(Value));
}
@@ -2580,6 +2657,16 @@ public:
return Op;
}
+ static std::unique_ptr<ARMOperand> CreateModImm(unsigned Bits, unsigned Rot,
+ SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_ModifiedImmediate);
+ Op->ModImm.Bits = Bits;
+ Op->ModImm.Rot = Rot;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
static std::unique_ptr<ARMOperand>
CreateBitfield(unsigned LSB, unsigned Width, SMLoc S, SMLoc E) {
auto Op = make_unique<ARMOperand>(k_BitfieldDescriptor);
@@ -2736,6 +2823,14 @@ public:
Op->EndLoc = S;
return Op;
}
+
+ static std::unique_ptr<ARMOperand> CreateBankedReg(unsigned Reg, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_BankedReg);
+ Op->BankedReg.Val = Reg;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
};
} // end anonymous namespace.
@@ -2769,6 +2864,9 @@ void ARMOperand::print(raw_ostream &OS) const {
case k_MSRMask:
OS << "<mask: " << getMSRMask() << ">";
break;
+ case k_BankedReg:
+ OS << "<banked reg: " << getBankedReg() << ">";
+ break;
case k_Immediate:
getImm()->print(OS);
break;
@@ -2822,6 +2920,10 @@ void ARMOperand::print(raw_ostream &OS) const {
case k_RotateImmediate:
OS << "<ror " << " #" << (RotImm.Imm * 8) << ">";
break;
+ case k_ModifiedImmediate:
+ OS << "<mod_imm #" << ModImm.Bits << ", #"
+ << ModImm.Rot << ")>";
+ break;
case k_BitfieldDescriptor:
OS << "<bitfield " << "lsb: " << Bitfield.LSB
<< ", width: " << Bitfield.Width << ">";
@@ -2871,8 +2973,9 @@ static unsigned MatchRegisterName(StringRef Name);
bool ARMAsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
- StartLoc = Parser.getTok().getLoc();
- EndLoc = Parser.getTok().getEndLoc();
+ const AsmToken &Tok = getParser().getTok();
+ StartLoc = Tok.getLoc();
+ EndLoc = Tok.getEndLoc();
RegNo = tryParseRegister();
return (RegNo == (unsigned)-1);
@@ -2883,6 +2986,7 @@ bool ARMAsmParser::ParseRegister(unsigned &RegNo,
/// returned. Otherwise return -1.
///
int ARMAsmParser::tryParseRegister() {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier)) return -1;
@@ -2924,6 +3028,10 @@ int ARMAsmParser::tryParseRegister() {
return Entry->getValue();
}
+ // Some FPUs only have 16 D registers, so D16-D31 are invalid
+ if (hasD16() && RegNum >= ARM::D16 && RegNum <= ARM::D31)
+ return -1;
+
Parser.Lex(); // Eat identifier token.
return RegNum;
@@ -2935,6 +3043,7 @@ int ARMAsmParser::tryParseRegister() {
// consumed in the process of trying to parse the shifter (i.e., when it is
// indeed a shifter operand, but malformed).
int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
@@ -3037,6 +3146,7 @@ int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) {
/// TODO this is likely to change to allow different register types and or to
/// parse for a specific register type.
bool ARMAsmParser::tryParseRegisterWithWriteBack(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
const AsmToken &RegTok = Parser.getTok();
int RegNo = tryParseRegister();
if (RegNo == -1)
@@ -3118,9 +3228,10 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
return -1;
switch (Name[1]) {
default: return -1;
- // p10 and p11 are invalid for coproc instructions (reserved for FP/NEON)
- case '0': return CoprocOp == 'p'? -1: 10;
- case '1': return CoprocOp == 'p'? -1: 11;
+ // CP10 and CP11 are VFP/NEON and so vector instructions should be used.
+ // However, old cores (v5/v6) did use them in that way.
+ case '0': return 10;
+ case '1': return 11;
case '2': return 12;
case '3': return 13;
case '4': return 14;
@@ -3132,6 +3243,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
/// parseITCondCode - Try to parse a condition code for an IT instruction.
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseITCondCode(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
@@ -3169,6 +3281,7 @@ ARMAsmParser::parseITCondCode(OperandVector &Operands) {
/// number, the token is eaten and the operand is added to the operand list.
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
@@ -3177,6 +3290,9 @@ ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
int Num = MatchCoprocessorOperandName(Tok.getString(), 'p');
if (Num == -1)
return MatchOperand_NoMatch;
+ // ARMv7 and v8 don't allow cp10/cp11 due to VFP/NEON specific instructions
+ if ((hasV7Ops() || hasV8Ops()) && (Num == 10 || Num == 11))
+ return MatchOperand_NoMatch;
Parser.Lex(); // Eat identifier token.
Operands.push_back(ARMOperand::CreateCoprocNum(Num, S));
@@ -3188,6 +3304,7 @@ ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
/// number, the token is eaten and the operand is added to the operand list.
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
@@ -3206,6 +3323,7 @@ ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) {
/// coproc_option : '{' imm0_255 '}'
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseCoprocOptionOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
// If this isn't a '{', this isn't a coprocessor immediate operand.
@@ -3283,6 +3401,7 @@ static unsigned getDRegFromQReg(unsigned QReg) {
/// Parse a register list.
bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
assert(Parser.getTok().is(AsmToken::LCurly) &&
"Token is not a Left Curly Brace");
SMLoc S = Parser.getTok().getLoc();
@@ -3414,6 +3533,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
// Helper function to parse the lane index for vector lists.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
+ MCAsmParser &Parser = getParser();
Index = 0; // Always return a defined index value.
if (Parser.getTok().is(AsmToken::LBrac)) {
Parser.Lex(); // Eat the '['.
@@ -3465,6 +3585,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
// parse a vector register list
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseVectorList(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
VectorLaneTy LaneKind;
unsigned LaneIndex;
SMLoc S = Parser.getTok().getLoc();
@@ -3716,6 +3837,7 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
/// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
unsigned Opt;
@@ -3787,6 +3909,7 @@ ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) {
/// parseInstSyncBarrierOptOperand - Try to parse ISB inst sync barrier options.
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
unsigned Opt;
@@ -3838,6 +3961,7 @@ ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) {
/// parseProcIFlagsOperand - Try to parse iflags from CPS instruction.
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
@@ -3872,6 +3996,7 @@ ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
/// parseMSRMaskOperand - Try to parse mask flags from MSR instruction.
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
@@ -3892,9 +4017,6 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
// should really only be allowed when writing a special register. Note
// they get dropped in the MRS instruction reading a special register as
// the SYSm field is only 8 bits.
- //
- // FIXME: the _g and _nzcvqg versions are only allowed if the processor
- // includes the DSP extension but that is not checked.
.Case("apsr", 0x800)
.Case("apsr_nzcvq", 0x800)
.Case("apsr_g", 0x400)
@@ -3926,6 +4048,11 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
if (FlagsVal == ~0U)
return MatchOperand_NoMatch;
+ if (!hasThumb2DSP() && (FlagsVal & 0x400))
+ // The _g and _nzcvqg versions are only valid if the DSP extension is
+ // available.
+ return MatchOperand_NoMatch;
+
if (!hasV7Ops() && FlagsVal >= 0x811 && FlagsVal <= 0x813)
// basepri, basepri_max and faultmask only valid for V7m.
return MatchOperand_NoMatch;
@@ -3998,9 +4125,67 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
return MatchOperand_Success;
}
+/// parseBankedRegOperand - Try to parse a banked register (e.g. "lr_irq") for
+/// use in the MRS/MSR instructions added to support virtualization.
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseBankedRegOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+ StringRef RegName = Tok.getString();
+
+ // The values here come from B9.2.3 of the ARM ARM, where bits 4-0 are SysM
+ // and bit 5 is R.
+ unsigned Encoding = StringSwitch<unsigned>(RegName.lower())
+ .Case("r8_usr", 0x00)
+ .Case("r9_usr", 0x01)
+ .Case("r10_usr", 0x02)
+ .Case("r11_usr", 0x03)
+ .Case("r12_usr", 0x04)
+ .Case("sp_usr", 0x05)
+ .Case("lr_usr", 0x06)
+ .Case("r8_fiq", 0x08)
+ .Case("r9_fiq", 0x09)
+ .Case("r10_fiq", 0x0a)
+ .Case("r11_fiq", 0x0b)
+ .Case("r12_fiq", 0x0c)
+ .Case("sp_fiq", 0x0d)
+ .Case("lr_fiq", 0x0e)
+ .Case("lr_irq", 0x10)
+ .Case("sp_irq", 0x11)
+ .Case("lr_svc", 0x12)
+ .Case("sp_svc", 0x13)
+ .Case("lr_abt", 0x14)
+ .Case("sp_abt", 0x15)
+ .Case("lr_und", 0x16)
+ .Case("sp_und", 0x17)
+ .Case("lr_mon", 0x1c)
+ .Case("sp_mon", 0x1d)
+ .Case("elr_hyp", 0x1e)
+ .Case("sp_hyp", 0x1f)
+ .Case("spsr_fiq", 0x2e)
+ .Case("spsr_irq", 0x30)
+ .Case("spsr_svc", 0x32)
+ .Case("spsr_abt", 0x34)
+ .Case("spsr_und", 0x36)
+ .Case("spsr_mon", 0x3c)
+ .Case("spsr_hyp", 0x3e)
+ .Default(~0U);
+
+ if (Encoding == ~0U)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateBankedReg(Encoding, S));
+ return MatchOperand_Success;
+}
+
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low,
int High) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier)) {
Error(Parser.getTok().getLoc(), Op + " operand expected.");
@@ -4048,6 +4233,7 @@ ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low,
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseSetEndImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
if (Tok.isNot(AsmToken::Identifier)) {
@@ -4077,6 +4263,7 @@ ARMAsmParser::parseSetEndImm(OperandVector &Operands) {
/// n == 32 encoded as n == 0.
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseShifterImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
if (Tok.isNot(AsmToken::Identifier)) {
@@ -4147,6 +4334,7 @@ ARMAsmParser::parseShifterImm(OperandVector &Operands) {
/// ror #n 'n' in {0, 8, 16, 24}
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseRotImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
if (Tok.isNot(AsmToken::Identifier))
@@ -4192,7 +4380,130 @@ ARMAsmParser::parseRotImm(OperandVector &Operands) {
}
ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseModImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ MCAsmLexer &Lexer = getLexer();
+ int64_t Imm1, Imm2;
+
+ SMLoc S = Parser.getTok().getLoc();
+
+ // 1) A mod_imm operand can appear in the place of a register name:
+ // add r0, #mod_imm
+ // add r0, r0, #mod_imm
+ // to correctly handle the latter, we bail out as soon as we see an
+ // identifier.
+ //
+ // 2) Similarly, we do not want to parse into complex operands:
+ // mov r0, #mod_imm
+ // mov r0, :lower16:(_foo)
+ if (Parser.getTok().is(AsmToken::Identifier) ||
+ Parser.getTok().is(AsmToken::Colon))
+ return MatchOperand_NoMatch;
+
+ // Hash (dollar) is optional as per the ARMARM
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar)) {
+ // Avoid parsing into complex operands (#:)
+ if (Lexer.peekTok().is(AsmToken::Colon))
+ return MatchOperand_NoMatch;
+
+ // Eat the hash (dollar)
+ Parser.Lex();
+ }
+
+ SMLoc Sx1, Ex1;
+ Sx1 = Parser.getTok().getLoc();
+ const MCExpr *Imm1Exp;
+ if (getParser().parseExpression(Imm1Exp, Ex1)) {
+ Error(Sx1, "malformed expression");
+ return MatchOperand_ParseFail;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm1Exp);
+
+ if (CE) {
+ // Immediate must fit within 32-bits
+ Imm1 = CE->getValue();
+ if (Imm1 < INT32_MIN || Imm1 > UINT32_MAX) {
+ Error(Sx1, "immediate operand must be representable with 32 bits");
+ return MatchOperand_ParseFail;
+ }
+
+ int Enc = ARM_AM::getSOImmVal(Imm1);
+ if (Enc != -1 && Parser.getTok().is(AsmToken::EndOfStatement)) {
+ // We have a match!
+ Operands.push_back(ARMOperand::CreateModImm((Enc & 0xFF),
+ (Enc & 0xF00) >> 7,
+ Sx1, Ex1));
+ return MatchOperand_Success;
+ }
+
+ // We have parsed an immediate which is not for us, fallback to a plain
+ // immediate. This can happen for instruction aliases. For an example,
+ // ARMInstrInfo.td defines the alias [mov <-> mvn] which can transform
+ // a mov (mvn) with a mod_imm_neg/mod_imm_not operand into the opposite
+ // instruction with a mod_imm operand. The alias is defined such that the
+ // parser method is shared, that's why we have to do this here.
+ if (Parser.getTok().is(AsmToken::EndOfStatement)) {
+ Operands.push_back(ARMOperand::CreateImm(Imm1Exp, Sx1, Ex1));
+ return MatchOperand_Success;
+ }
+ } else {
+ // Operands like #(l1 - l2) can only be evaluated at a later stage (via an
+ // MCFixup). Fallback to a plain immediate.
+ Operands.push_back(ARMOperand::CreateImm(Imm1Exp, Sx1, Ex1));
+ return MatchOperand_Success;
+ }
+
+ // From this point onward, we expect the input to be a (#bits, #rot) pair
+ if (Parser.getTok().isNot(AsmToken::Comma)) {
+ Error(Sx1, "expected modified immediate operand: #[0, 255], #even[0-30]");
+ return MatchOperand_ParseFail;
+ }
+
+ if (Imm1 & ~0xFF) {
+ Error(Sx1, "immediate operand must a number in the range [0, 255]");
+ return MatchOperand_ParseFail;
+ }
+
+ // Eat the comma
+ Parser.Lex();
+
+ // Repeat for #rot
+ SMLoc Sx2, Ex2;
+ Sx2 = Parser.getTok().getLoc();
+
+ // Eat the optional hash (dollar)
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar))
+ Parser.Lex();
+
+ const MCExpr *Imm2Exp;
+ if (getParser().parseExpression(Imm2Exp, Ex2)) {
+ Error(Sx2, "malformed expression");
+ return MatchOperand_ParseFail;
+ }
+
+ CE = dyn_cast<MCConstantExpr>(Imm2Exp);
+
+ if (CE) {
+ Imm2 = CE->getValue();
+ if (!(Imm2 & ~0x1E)) {
+ // We have a match!
+ Operands.push_back(ARMOperand::CreateModImm(Imm1, Imm2, S, Ex2));
+ return MatchOperand_Success;
+ }
+ Error(Sx2, "immediate operand must an even number in the range [0, 30]");
+ return MatchOperand_ParseFail;
+ } else {
+ Error(Sx2, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+}
+
+ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseBitfield(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
// The bitfield descriptor is really two operands, the LSB and the width.
if (Parser.getTok().isNot(AsmToken::Hash) &&
@@ -4269,6 +4580,7 @@ ARMAsmParser::parsePostIdxReg(OperandVector &Operands) {
// This method must return MatchOperand_NoMatch without consuming any tokens
// in the case where there is no match, as other alternatives take other
// parse methods.
+ MCAsmParser &Parser = getParser();
AsmToken Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
bool haveEaten = false;
@@ -4321,6 +4633,7 @@ ARMAsmParser::parseAM3Offset(OperandVector &Operands) {
// This method must return MatchOperand_NoMatch without consuming any tokens
// in the case where there is no match, as other alternatives take other
// parse methods.
+ MCAsmParser &Parser = getParser();
AsmToken Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
@@ -4458,6 +4771,7 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst,
/// Parse an ARM memory expression, return false if successful else return true
/// or an error. The first token must be a '[' when called.
bool ARMAsmParser::parseMemory(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S, E;
assert(Parser.getTok().is(AsmToken::LBrac) &&
"Token is not a Left Bracket");
@@ -4649,6 +4963,7 @@ bool ARMAsmParser::parseMemory(OperandVector &Operands) {
/// return true if it parses a shift otherwise it returns false.
bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
unsigned &Amount) {
+ MCAsmParser &Parser = getParser();
SMLoc Loc = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
@@ -4709,6 +5024,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
/// parseFPImm - A floating point immediate expression operand.
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseFPImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
// Anything that can accept a floating point constant as an operand
// needs to go through here, as the regular parseExpression is
// integer only.
@@ -4789,6 +5105,7 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) {
/// Parse a arm instruction operand. For now this parses the operand regardless
/// of the mnemonic.
bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
+ MCAsmParser &Parser = getParser();
SMLoc S, E;
// Check if the current operand has a custom associated parser, if so, try to
@@ -4921,6 +5238,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
// parsePrefix - Parse ARM 16-bit relocations expression prefix, i.e.
// :lower16: and :upper16:.
bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
+ MCAsmParser &Parser = getParser();
RefKind = ARMMCExpr::VK_ARM_None;
// consume an optional '#' (GNU compatibility)
@@ -4936,15 +5254,52 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
return true;
}
+ enum {
+ COFF = (1 << MCObjectFileInfo::IsCOFF),
+ ELF = (1 << MCObjectFileInfo::IsELF),
+ MACHO = (1 << MCObjectFileInfo::IsMachO)
+ };
+ static const struct PrefixEntry {
+ const char *Spelling;
+ ARMMCExpr::VariantKind VariantKind;
+ uint8_t SupportedFormats;
+ } PrefixEntries[] = {
+ { "lower16", ARMMCExpr::VK_ARM_LO16, COFF | ELF | MACHO },
+ { "upper16", ARMMCExpr::VK_ARM_HI16, COFF | ELF | MACHO },
+ };
+
StringRef IDVal = Parser.getTok().getIdentifier();
- if (IDVal == "lower16") {
- RefKind = ARMMCExpr::VK_ARM_LO16;
- } else if (IDVal == "upper16") {
- RefKind = ARMMCExpr::VK_ARM_HI16;
- } else {
+
+ const auto &Prefix =
+ std::find_if(std::begin(PrefixEntries), std::end(PrefixEntries),
+ [&IDVal](const PrefixEntry &PE) {
+ return PE.Spelling == IDVal;
+ });
+ if (Prefix == std::end(PrefixEntries)) {
Error(Parser.getTok().getLoc(), "unexpected prefix in operand");
return true;
}
+
+ uint8_t CurrentFormat;
+ switch (getContext().getObjectFileInfo()->getObjectFileType()) {
+ case MCObjectFileInfo::IsMachO:
+ CurrentFormat = MACHO;
+ break;
+ case MCObjectFileInfo::IsELF:
+ CurrentFormat = ELF;
+ break;
+ case MCObjectFileInfo::IsCOFF:
+ CurrentFormat = COFF;
+ break;
+ }
+
+ if (~Prefix->SupportedFormats & CurrentFormat) {
+ Error(Parser.getTok().getLoc(),
+ "cannot represent relocation in the current file format");
+ return true;
+ }
+
+ RefKind = Prefix->VariantKind;
Parser.Lex();
if (getLexer().isNot(AsmToken::Colon)) {
@@ -4952,6 +5307,7 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
return true;
}
Parser.Lex(); // Eat the last ':'
+
return false;
}
@@ -4984,7 +5340,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "fmuls" || Mnemonic == "vmaxnm" || Mnemonic == "vminnm" ||
Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" ||
Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" ||
- Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic.startswith("vsel"))
+ Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" ||
+ Mnemonic.startswith("vsel"))
return Mnemonic;
// First, split out any predication code. Ignore mnemonics we know aren't
@@ -5089,7 +5446,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" ||
Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" ||
Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" ||
- Mnemonic == "vrintm" || Mnemonic.startswith("aes") ||
+ Mnemonic == "vrintm" || Mnemonic.startswith("aes") || Mnemonic == "hvc" ||
Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") ||
(FullInst.startswith("vmull") && FullInst.endswith(".p64"))) {
// These mnemonics are never predicable
@@ -5127,7 +5484,7 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
// conditionally adding the cc_out in the first place because we need
// to check the type of the parsed immediate operand.
if (Mnemonic == "mov" && Operands.size() > 4 && !isThumb() &&
- !static_cast<ARMOperand &>(*Operands[4]).isARMSOImm() &&
+ !static_cast<ARMOperand &>(*Operands[4]).isModImm() &&
static_cast<ARMOperand &>(*Operands[4]).isImm0_65535Expr() &&
static_cast<ARMOperand &>(*Operands[1]).getReg() == 0)
return true;
@@ -5271,7 +5628,7 @@ static bool isDataTypeToken(StringRef Tok) {
static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
}
-static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features,
+static void applyMnemonicAliases(StringRef &Mnemonic, uint64_t Features,
unsigned VariantID);
static bool RequiresVFPRegListValidation(StringRef Inst,
@@ -5296,6 +5653,7 @@ static bool RequiresVFPRegListValidation(StringRef Inst,
/// Parse an arm instruction mnemonic followed by its operands.
bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
// FIXME: Can this be done via tablegen in some fashion?
bool RequireVFPRegisterListCheck;
bool AcceptSinglePrecisionOnly;
@@ -5309,7 +5667,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// The generic tblgen'erated code does this later, at the start of
// MatchInstructionImpl(), but that's too late for aliases that include
// any sort of suffix.
- unsigned AvailableFeatures = getAvailableFeatures();
+ uint64_t AvailableFeatures = getAvailableFeatures();
unsigned AssemblerDialect = getParser().getAssemblerDialect();
applyMnemonicAliases(Name, AvailableFeatures, AssemblerDialect);
@@ -5415,6 +5773,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Operands.push_back(ARMOperand::CreateImm(
MCConstantExpr::Create(ProcessorIMod, getContext()),
NameLoc, NameLoc));
+ } else if (Mnemonic == "cps" && isMClass()) {
+ return Error(NameLoc, "instruction 'cps' requires effect for M-class");
}
// Add the remaining tokens in the mnemonic.
@@ -5546,6 +5906,48 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
}
+ // If first 2 operands of a 3 operand instruction are the same
+ // then transform to 2 operand version of the same instruction
+ // e.g. 'adds r0, r0, #1' transforms to 'adds r0, #1'
+ // FIXME: We would really like to be able to tablegen'erate this.
+ if (isThumbOne() && Operands.size() == 6 &&
+ (Mnemonic == "add" || Mnemonic == "sub" || Mnemonic == "and" ||
+ Mnemonic == "eor" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+ Mnemonic == "asr" || Mnemonic == "adc" || Mnemonic == "sbc" ||
+ Mnemonic == "ror" || Mnemonic == "orr" || Mnemonic == "bic")) {
+ ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]);
+ ARMOperand &Op4 = static_cast<ARMOperand &>(*Operands[4]);
+ ARMOperand &Op5 = static_cast<ARMOperand &>(*Operands[5]);
+
+ // If both registers are the same then remove one of them from
+ // the operand list.
+ if (Op3.isReg() && Op4.isReg() && Op3.getReg() == Op4.getReg()) {
+ // If 3rd operand (variable Op5) is a register and the instruction is adds/sub
+ // then do not transform as the backend already handles this instruction
+ // correctly.
+ if (!Op5.isReg() || !((Mnemonic == "add" && CarrySetting) || Mnemonic == "sub")) {
+ Operands.erase(Operands.begin() + 3);
+ if (Mnemonic == "add" && !CarrySetting) {
+ // Special case for 'add' (not 'adds') instruction must
+ // remove the CCOut operand as well.
+ Operands.erase(Operands.begin() + 1);
+ }
+ }
+ }
+ }
+
+ // If instruction is 'add' and first two register operands
+ // use SP register, then remove one of the SP registers from
+ // the instruction.
+ // FIXME: We would really like to be able to tablegen'erate this.
+ if (isThumbOne() && Operands.size() == 5 && Mnemonic == "add" && !CarrySetting) {
+ ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]);
+ ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]);
+ if (Op2.isReg() && Op3.isReg() && Op2.getReg() == ARM::SP && Op3.getReg() == ARM::SP) {
+ Operands.erase(Operands.begin() + 2);
+ }
+ }
+
// GNU Assembler extension (compatibility)
if ((Mnemonic == "ldrd" || Mnemonic == "strd")) {
ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]);
@@ -5623,6 +6025,50 @@ static bool instIsBreakpoint(const MCInst &Inst) {
}
+bool ARMAsmParser::validatetLDMRegList(MCInst Inst,
+ const OperandVector &Operands,
+ unsigned ListNo, bool IsARPop) {
+ const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]);
+ bool HasWritebackToken = Op.isToken() && Op.getToken() == "!";
+
+ bool ListContainsSP = listContainsReg(Inst, ListNo, ARM::SP);
+ bool ListContainsLR = listContainsReg(Inst, ListNo, ARM::LR);
+ bool ListContainsPC = listContainsReg(Inst, ListNo, ARM::PC);
+
+ if (!IsARPop && ListContainsSP)
+ return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
+ "SP may not be in the register list");
+ else if (ListContainsPC && ListContainsLR)
+ return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
+ "PC and LR may not be in the register list simultaneously");
+ else if (inITBlock() && !lastInITBlock() && ListContainsPC)
+ return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
+ "instruction must be outside of IT block or the last "
+ "instruction in an IT block");
+ return false;
+}
+
+bool ARMAsmParser::validatetSTMRegList(MCInst Inst,
+ const OperandVector &Operands,
+ unsigned ListNo) {
+ const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]);
+ bool HasWritebackToken = Op.isToken() && Op.getToken() == "!";
+
+ bool ListContainsSP = listContainsReg(Inst, ListNo, ARM::SP);
+ bool ListContainsPC = listContainsReg(Inst, ListNo, ARM::PC);
+
+ if (ListContainsSP && ListContainsPC)
+ return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
+ "SP and PC may not be in the register list");
+ else if (ListContainsSP)
+ return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
+ "SP may not be in the register list");
+ else if (ListContainsPC)
+ return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
+ "PC may not be in the register list");
+ return false;
+}
+
// FIXME: We would really like to be able to tablegen'erate this.
bool ARMAsmParser::validateInstruction(MCInst &Inst,
const OperandVector &Operands) {
@@ -5728,6 +6174,48 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
"source operands must be sequential");
return false;
}
+ case ARM::STR_PRE_IMM:
+ case ARM::STR_PRE_REG:
+ case ARM::STR_POST_IMM:
+ case ARM::STR_POST_REG:
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ case ARM::STRB_PRE_IMM:
+ case ARM::STRB_PRE_REG:
+ case ARM::STRB_POST_IMM:
+ case ARM::STRB_POST_REG: {
+ // Rt must be different from Rn.
+ const unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ const unsigned Rn = MRI->getEncodingValue(Inst.getOperand(2).getReg());
+
+ if (Rt == Rn)
+ return Error(Operands[3]->getStartLoc(),
+ "source register and base register can't be identical");
+ return false;
+ }
+ case ARM::LDR_PRE_IMM:
+ case ARM::LDR_PRE_REG:
+ case ARM::LDR_POST_IMM:
+ case ARM::LDR_POST_REG:
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ case ARM::LDRB_PRE_IMM:
+ case ARM::LDRB_PRE_REG:
+ case ARM::LDRB_POST_IMM:
+ case ARM::LDRB_POST_REG:
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST: {
+ // Rt must be different from Rn.
+ const unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ const unsigned Rn = MRI->getEncodingValue(Inst.getOperand(2).getReg());
+
+ if (Rt == Rn)
+ return Error(Operands[3]->getStartLoc(),
+ "destination register and base register can't be identical");
+ return false;
+ }
case ARM::SBFX:
case ARM::UBFX: {
// Width must be in range [1, 32-lsb].
@@ -5765,6 +6253,8 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
"writeback operator '!' not allowed when base register "
"in register list");
+ if (validatetLDMRegList(Inst, Operands, 3))
+ return true;
break;
}
case ARM::LDMIA_UPD:
@@ -5775,7 +6265,20 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
// UNPREDICTABLE on v7 upwards. Goodness knows what they did before.
if (!hasV7Ops())
break;
- // Fallthrough
+ if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg()))
+ return Error(Operands.back()->getStartLoc(),
+ "writeback register not allowed in register list");
+ break;
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ if (validatetLDMRegList(Inst, Operands, 3))
+ return true;
+ break;
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ if (validatetSTMRegList(Inst, Operands, 3))
+ return true;
+ break;
case ARM::t2LDMIA_UPD:
case ARM::t2LDMDB_UPD:
case ARM::t2STMIA_UPD:
@@ -5783,6 +6286,14 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg()))
return Error(Operands.back()->getStartLoc(),
"writeback register not allowed in register list");
+
+ if (Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
+ if (validatetLDMRegList(Inst, Operands, 3))
+ return true;
+ } else {
+ if (validatetSTMRegList(Inst, Operands, 3))
+ return true;
+ }
break;
}
case ARM::sysLDMIA_UPD:
@@ -5827,6 +6338,8 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
!isThumbTwo())
return Error(Operands[2]->getStartLoc(),
"registers must be in range r0-r7 or pc");
+ if (validatetLDMRegList(Inst, Operands, 2, !isMClass()))
+ return true;
break;
}
case ARM::tPUSH: {
@@ -5835,6 +6348,8 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
!isThumbTwo())
return Error(Operands[2]->getStartLoc(),
"registers must be in range r0-r7 or lr");
+ if (validatetSTMRegList(Inst, Operands, 2))
+ return true;
break;
}
case ARM::tSTMIA_UPD: {
@@ -5851,6 +6366,9 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
return Error(Operands[4]->getStartLoc(),
"writeback operator '!' not allowed when base register "
"in register list");
+
+ if (validatetSTMRegList(Inst, Operands, 4))
+ return true;
break;
}
case ARM::tADDrSP: {
@@ -6171,7 +6689,8 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) {
}
bool ARMAsmParser::processInstruction(MCInst &Inst,
- const OperandVector &Operands) {
+ const OperandVector &Operands,
+ MCStreamer &Out) {
switch (Inst.getOpcode()) {
// Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction.
case ARM::LDRT_POST:
@@ -6212,12 +6731,35 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
// Alias for alternate form of 'ADR Rd, #imm' instruction.
case ARM::ADDri: {
if (Inst.getOperand(1).getReg() != ARM::PC ||
- Inst.getOperand(5).getReg() != 0)
+ Inst.getOperand(5).getReg() != 0 ||
+ !(Inst.getOperand(2).isExpr() || Inst.getOperand(2).isImm()))
return false;
MCInst TmpInst;
TmpInst.setOpcode(ARM::ADR);
TmpInst.addOperand(Inst.getOperand(0));
- TmpInst.addOperand(Inst.getOperand(2));
+ if (Inst.getOperand(2).isImm()) {
+ // Immediate (mod_imm) will be in its encoded form, we must unencode it
+ // before passing it to the ADR instruction.
+ unsigned Enc = Inst.getOperand(2).getImm();
+ TmpInst.addOperand(MCOperand::CreateImm(
+ ARM_AM::rotr32(Enc & 0xFF, (Enc & 0xF00) >> 7)));
+ } else {
+ // Turn PC-relative expression into absolute expression.
+ // Reading PC provides the start of the current instruction + 8 and
+ // the transform to adr is biased by that.
+ MCSymbol *Dot = getContext().CreateTempSymbol();
+ Out.EmitLabel(Dot);
+ const MCExpr *OpExpr = Inst.getOperand(2).getExpr();
+ const MCExpr *InstPC = MCSymbolRefExpr::Create(Dot,
+ MCSymbolRefExpr::VK_None,
+ getContext());
+ const MCExpr *Const8 = MCConstantExpr::Create(8, getContext());
+ const MCExpr *ReadPC = MCBinaryExpr::CreateAdd(InstPC, Const8,
+ getContext());
+ const MCExpr *FixupAddr = MCBinaryExpr::CreateAdd(ReadPC, OpExpr,
+ getContext());
+ TmpInst.addOperand(MCOperand::CreateExpr(FixupAddr));
+ }
TmpInst.addOperand(Inst.getOperand(3));
TmpInst.addOperand(Inst.getOperand(4));
Inst = TmpInst;
@@ -8010,7 +8552,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
}
// Some high-register supporting Thumb1 encodings only allow both registers
// to be from r0-r7 when in Thumb2.
- else if (Opc == ARM::tADDhirr && isThumbOne() &&
+ else if (Opc == ARM::tADDhirr && isThumbOne() && !hasV6MOps() &&
isARMLowRegister(Inst.getOperand(1).getReg()) &&
isARMLowRegister(Inst.getOperand(2).getReg()))
return Match_RequiresThumb2;
@@ -8028,10 +8570,10 @@ template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) {
}
}
-static const char *getSubtargetFeatureName(unsigned Val);
+static const char *getSubtargetFeatureName(uint64_t Val);
bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ MCStreamer &Out, uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
unsigned MatchResult;
@@ -8039,7 +8581,6 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
MatchingInlineAsm);
switch (MatchResult) {
- default: break;
case Match_Success:
// Context sensitive operand constraints aren't handled by the matcher,
// so check them here.
@@ -8057,7 +8598,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// encoding is selected. Loop on it while changes happen so the
// individual transformations can chain off each other. E.g.,
// tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
- while (processInstruction(Inst, Operands))
+ while (processInstruction(Inst, Operands, Out))
;
// Only after the instruction is fully processed, we can validate it
@@ -8085,7 +8626,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// Special case the error message for the very common case where only
// a single subtarget feature is missing (Thumb vs. ARM, e.g.).
std::string Msg = "instruction requires:";
- unsigned Mask = 1;
+ uint64_t Mask = 1;
for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
if (ErrorInfo & Mask) {
Msg += " ";
@@ -8097,7 +8638,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0U) {
+ if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
@@ -8174,6 +8715,7 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
const MCObjectFileInfo::Environment Format =
getContext().getObjectFileInfo()->getObjectFileType();
bool IsMachO = Format == MCObjectFileInfo::IsMachO;
+ bool IsCOFF = Format == MCObjectFileInfo::IsCOFF;
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal == ".word")
@@ -8225,7 +8767,7 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
else if (IDVal == ".thumb_set")
return parseDirectiveThumbSet(DirectiveID.getLoc());
- if (!IsMachO) {
+ if (!IsMachO && !IsCOFF) {
if (IDVal == ".arch")
return parseDirectiveArch(DirectiveID.getLoc());
else if (IDVal == ".cpu")
@@ -8256,6 +8798,7 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
/// ::= .short expression [, expression]*
/// ::= .word expression [, expression]*
bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
@@ -8285,6 +8828,7 @@ bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) {
/// parseDirectiveThumb
/// ::= .thumb
bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
Error(L, "unexpected token in directive");
return false;
@@ -8306,6 +8850,7 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
/// parseDirectiveARM
/// ::= .arm
bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
Error(L, "unexpected token in directive");
return false;
@@ -8334,12 +8879,13 @@ void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) {
/// parseDirectiveThumbFunc
/// ::= .thumbfunc symbol_name
bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
- const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
- bool isMachO = MAI->hasSubsectionsViaSymbols();
+ MCAsmParser &Parser = getParser();
+ const auto Format = getContext().getObjectFileInfo()->getObjectFileType();
+ bool IsMachO = Format == MCObjectFileInfo::IsMachO;
// Darwin asm has (optionally) function name after .thumb_func direction
// ELF doesn't
- if (isMachO) {
+ if (IsMachO) {
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::EndOfStatement)) {
if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) {
@@ -8356,7 +8902,8 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
}
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- Error(L, "unexpected token in directive");
+ Error(Parser.getTok().getLoc(), "unexpected token in directive");
+ Parser.eatToEndOfStatement();
return false;
}
@@ -8367,6 +8914,7 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
/// parseDirectiveSyntax
/// ::= .syntax unified | divided
bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier)) {
Error(L, "unexpected token in .syntax directive");
@@ -8398,6 +8946,7 @@ bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) {
/// parseDirectiveCode
/// ::= .code 16 | 32
bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Integer)) {
Error(L, "unexpected token in .code directive");
@@ -8442,6 +8991,7 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
/// parseDirectiveReq
/// ::= name .req registername
bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
+ MCAsmParser &Parser = getParser();
Parser.Lex(); // Eat the '.req' token.
unsigned Reg;
SMLoc SRegLoc, ERegLoc;
@@ -8460,7 +9010,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
Parser.Lex(); // Consume the EndOfStatement
- if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg) {
+ if (!RegisterReqs.insert(std::make_pair(Name, Reg)).second) {
Error(SRegLoc, "redefinition of '" + Name + "' does not match original.");
return false;
}
@@ -8471,6 +9021,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
/// parseDirectiveUneq
/// ::= .unreq registername
bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (Parser.getTok().isNot(AsmToken::Identifier)) {
Parser.eatToEndOfStatement();
Error(L, "unexpected input in .unreq directive.");
@@ -8507,6 +9058,7 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
/// ::= .eabi_attribute int, int [, "str"]
/// ::= .eabi_attribute Tag_name, int [, "str"]
bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
+ MCAsmParser &Parser = getParser();
int64_t Tag;
SMLoc TagLoc;
TagLoc = Parser.getTok().getLoc();
@@ -8584,8 +9136,13 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
if (Tag == ARMBuildAttrs::compatibility) {
if (Parser.getTok().isNot(AsmToken::Comma))
IsStringValue = false;
- else
- Parser.Lex();
+ if (Parser.getTok().isNot(AsmToken::Comma)) {
+ Error(Parser.getTok().getLoc(), "comma expected");
+ Parser.eatToEndOfStatement();
+ return false;
+ } else {
+ Parser.Lex();
+ }
}
if (IsStringValue) {
@@ -8614,9 +9171,49 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
StringRef CPU = getParser().parseStringToEndOfStatement().trim();
getTargetStreamer().emitTextAttribute(ARMBuildAttrs::CPU_name, CPU);
+
+ if (!STI.isCPUStringValid(CPU)) {
+ Error(L, "Unknown CPU name");
+ return false;
+ }
+
+ // FIXME: This switches the CPU features globally, therefore it might
+ // happen that code you would not expect to assemble will. For details
+ // see: http://llvm.org/bugs/show_bug.cgi?id=20757
+ STI.InitMCProcessorInfo(CPU, "");
+ STI.InitCPUSchedModel(CPU);
+ unsigned FB = ComputeAvailableFeatures(STI.getFeatureBits());
+ setAvailableFeatures(FB);
+
return false;
}
+// FIXME: This is duplicated in getARMFPUFeatures() in
+// tools/clang/lib/Driver/Tools.cpp
+static const struct {
+ const unsigned Fpu;
+ const uint64_t Enabled;
+ const uint64_t Disabled;
+} Fpus[] = {
+ {ARM::VFP, ARM::FeatureVFP2, ARM::FeatureNEON},
+ {ARM::VFPV2, ARM::FeatureVFP2, ARM::FeatureNEON},
+ {ARM::VFPV3, ARM::FeatureVFP3, ARM::FeatureNEON},
+ {ARM::VFPV3_D16, ARM::FeatureVFP3 | ARM::FeatureD16, ARM::FeatureNEON},
+ {ARM::VFPV4, ARM::FeatureVFP4, ARM::FeatureNEON},
+ {ARM::VFPV4_D16, ARM::FeatureVFP4 | ARM::FeatureD16, ARM::FeatureNEON},
+ {ARM::FPV5_D16, ARM::FeatureFPARMv8 | ARM::FeatureD16,
+ ARM::FeatureNEON | ARM::FeatureCrypto},
+ {ARM::FP_ARMV8, ARM::FeatureFPARMv8,
+ ARM::FeatureNEON | ARM::FeatureCrypto},
+ {ARM::NEON, ARM::FeatureNEON, 0},
+ {ARM::NEON_VFPV4, ARM::FeatureVFP4 | ARM::FeatureNEON, 0},
+ {ARM::NEON_FP_ARMV8, ARM::FeatureFPARMv8 | ARM::FeatureNEON,
+ ARM::FeatureCrypto},
+ {ARM::CRYPTO_NEON_FP_ARMV8,
+ ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto, 0},
+ {ARM::SOFTVFP, 0, 0},
+};
+
/// parseDirectiveFPU
/// ::= .fpu str
bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
@@ -8632,6 +9229,18 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
return false;
}
+ for (const auto &Fpu : Fpus) {
+ if (Fpu.Fpu != ID)
+ continue;
+
+ // Need to toggle features that should be on but are off and that
+ // should off but are on.
+ uint64_t Toggle = (Fpu.Enabled & ~STI.getFeatureBits()) |
+ (Fpu.Disabled & STI.getFeatureBits());
+ setAvailableFeatures(ComputeAvailableFeatures(STI.ToggleFeature(Toggle)));
+ break;
+ }
+
getTargetStreamer().emitFPU(ID);
return false;
}
@@ -8698,6 +9307,7 @@ bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) {
/// parseDirectivePersonality
/// ::= .personality name
bool ARMAsmParser::parseDirectivePersonality(SMLoc L) {
+ MCAsmParser &Parser = getParser();
bool HasExistingPersonality = UC.hasPersonality();
UC.recordPersonality(L);
@@ -8761,6 +9371,7 @@ bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) {
/// parseDirectiveSetFP
/// ::= .setfp fpreg, spreg [, offset]
bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) {
+ MCAsmParser &Parser = getParser();
// Check the ordering of unwind directives
if (!UC.hasFnStart()) {
Error(L, ".fnstart must precede .setfp directive");
@@ -8838,6 +9449,7 @@ bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) {
/// parseDirective
/// ::= .pad offset
bool ARMAsmParser::parseDirectivePad(SMLoc L) {
+ MCAsmParser &Parser = getParser();
// Check the ordering of unwind directives
if (!UC.hasFnStart()) {
Error(L, ".fnstart must precede .pad directive");
@@ -8912,6 +9524,7 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
/// ::= .inst.n opcode [, ...]
/// ::= .inst.w opcode [, ...]
bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
+ MCAsmParser &Parser = getParser();
int Width;
if (isThumb()) {
@@ -9008,7 +9621,7 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) {
}
if (!Section) {
- getStreamer().InitSections();
+ getStreamer().InitSections(false);
Section = getStreamer().getCurrentSection().first;
}
@@ -9024,6 +9637,7 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) {
/// parseDirectivePersonalityIndex
/// ::= .personalityindex index
bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) {
+ MCAsmParser &Parser = getParser();
bool HasExistingPersonality = UC.hasPersonality();
UC.recordPersonalityIndex(L);
@@ -9079,6 +9693,7 @@ bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) {
/// parseDirectiveUnwindRaw
/// ::= .unwind_raw offset, opcode [, opcode...]
bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (!UC.hasFnStart()) {
Parser.eatToEndOfStatement();
Error(L, ".fnstart must precede .unwind_raw directives");
@@ -9160,6 +9775,8 @@ bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) {
/// parseDirectiveTLSDescSeq
/// ::= .tlsdescseq tls-variable
bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+
if (getLexer().isNot(AsmToken::Identifier)) {
TokError("expected variable after '.tlsdescseq' directive");
Parser.eatToEndOfStatement();
@@ -9184,6 +9801,7 @@ bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) {
/// parseDirectiveMovSP
/// ::= .movsp reg [, #offset]
bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (!UC.hasFnStart()) {
Parser.eatToEndOfStatement();
Error(L, ".fnstart must precede .movsp directives");
@@ -9247,6 +9865,7 @@ bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) {
/// parseDirectiveObjectArch
/// ::= .object_arch name
bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::Identifier)) {
Error(getLexer().getLoc(), "unexpected token");
Parser.eatToEndOfStatement();
@@ -9303,6 +9922,8 @@ bool ARMAsmParser::parseDirectiveAlign(SMLoc L) {
/// parseDirectiveThumbSet
/// ::= .thumb_set name, value
bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+
StringRef Name;
if (Parser.parseIdentifier(Name)) {
TokError("expected identifier after '.thumb_set'");
@@ -9349,8 +9970,8 @@ extern "C" void LLVMInitializeARMAsmParser() {
#define GET_MATCHER_IMPLEMENTATION
#include "ARMGenAsmMatcher.inc"
-static const struct ExtMapEntry {
- const char *Extension;
+static const struct {
+ const char *Name;
const unsigned ArchCheck;
const uint64_t Features;
} Extensions[] = {
@@ -9381,46 +10002,47 @@ static const struct ExtMapEntry {
/// parseDirectiveArchExtension
/// ::= .arch_extension [no]feature
bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+
if (getLexer().isNot(AsmToken::Identifier)) {
Error(getLexer().getLoc(), "unexpected token");
Parser.eatToEndOfStatement();
return false;
}
- StringRef Extension = Parser.getTok().getString();
+ StringRef Name = Parser.getTok().getString();
SMLoc ExtLoc = Parser.getTok().getLoc();
getLexer().Lex();
bool EnableFeature = true;
- if (Extension.startswith_lower("no")) {
+ if (Name.startswith_lower("no")) {
EnableFeature = false;
- Extension = Extension.substr(2);
+ Name = Name.substr(2);
}
- for (unsigned EI = 0, EE = array_lengthof(Extensions); EI != EE; ++EI) {
- if (Extensions[EI].Extension != Extension)
+ for (const auto &Extension : Extensions) {
+ if (Extension.Name != Name)
continue;
- unsigned FB = getAvailableFeatures();
- if ((FB & Extensions[EI].ArchCheck) != Extensions[EI].ArchCheck) {
- Error(ExtLoc, "architectural extension '" + Extension + "' is not "
+ if (!Extension.Features)
+ report_fatal_error("unsupported architectural extension: " + Name);
+
+ if ((getAvailableFeatures() & Extension.ArchCheck) != Extension.ArchCheck) {
+ Error(ExtLoc, "architectural extension '" + Name + "' is not "
"allowed for the current base architecture");
return false;
}
- if (!Extensions[EI].Features)
- report_fatal_error("unsupported architectural extension: " + Extension);
-
- if (EnableFeature)
- FB |= ComputeAvailableFeatures(Extensions[EI].Features);
- else
- FB &= ~ComputeAvailableFeatures(Extensions[EI].Features);
-
- setAvailableFeatures(FB);
+ uint64_t ToggleFeatures = EnableFeature
+ ? (~STI.getFeatureBits() & Extension.Features)
+ : ( STI.getFeatureBits() & Extension.Features);
+ uint64_t Features =
+ ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
+ setAvailableFeatures(Features);
return false;
}
- Error(ExtLoc, "unknown architectural extension: " + Extension);
+ Error(ExtLoc, "unknown architectural extension: " + Name);
Parser.eatToEndOfStatement();
return false;
}
@@ -9441,7 +10063,7 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
if (CE->getValue() == 0)
return Match_Success;
break;
- case MCK_ARMSOImm:
+ case MCK_ModImm:
if (Op.isImm()) {
const MCExpr *SOExpr = Op.getImm();
int64_t Value;
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 9b5fa75fe2a7..2530640139ac 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -2,8 +2,7 @@ set(LLVM_TARGET_DEFINITIONS ARM.td)
tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM ARMGenInstrInfo.inc -gen-instr-info)
-tablegen(LLVM ARMGenCodeEmitter.inc -gen-emitter)
-tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM ARMGenMCPseudoLowering.inc -gen-pseudo-lowering)
tablegen(LLVM ARMGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM ARMGenAsmMatcher.inc -gen-asm-matcher)
@@ -19,7 +18,6 @@ add_llvm_target(ARMCodeGen
ARMAsmPrinter.cpp
ARMBaseInstrInfo.cpp
ARMBaseRegisterInfo.cpp
- ARMCodeEmitter.cpp
ARMConstantIslandPass.cpp
ARMConstantPoolValue.cpp
ARMExpandPseudoInsts.cpp
@@ -29,7 +27,6 @@ add_llvm_target(ARMCodeGen
ARMISelDAGToDAG.cpp
ARMISelLowering.cpp
ARMInstrInfo.cpp
- ARMJITInfo.cpp
ARMLoadStoreOptimizer.cpp
ARMMCInstLower.cpp
ARMMachineFunctionInfo.cpp
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 4d4038deac7c..51faf692c88f 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -20,7 +20,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <vector>
@@ -85,42 +84,34 @@ namespace {
}
namespace {
-/// ARMDisassembler - ARM disassembler for all ARM platforms.
+/// ARM disassembler for all ARM platforms.
class ARMDisassembler : public MCDisassembler {
public:
- /// Constructor - Initializes the disassembler.
- ///
ARMDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
MCDisassembler(STI, Ctx) {
}
- ~ARMDisassembler() {
- }
+ ~ARMDisassembler() {}
- /// getInstruction - See MCDisassembler.
- DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
- const MemoryObject &region, uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const override;
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
};
-/// ThumbDisassembler - Thumb disassembler for all Thumb platforms.
+/// Thumb disassembler for all Thumb platforms.
class ThumbDisassembler : public MCDisassembler {
public:
- /// Constructor - Initializes the disassembler.
- ///
ThumbDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
MCDisassembler(STI, Ctx) {
}
- ~ThumbDisassembler() {
- }
+ ~ThumbDisassembler() {}
- /// getInstruction - See MCDisassembler.
- DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
- const MemoryObject &region, uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const override;
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
private:
mutable ITStatus ITBlock;
@@ -185,8 +176,6 @@ static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
@@ -281,6 +270,8 @@ static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
@@ -412,104 +403,122 @@ static MCDisassembler *createThumbDisassembler(const Target &T,
return new ThumbDisassembler(STI, Ctx);
}
-DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &os,
- raw_ostream &cs) const {
- CommentStream = &cs;
+// Post-decoding checks
+static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size,
+ uint64_t Address, raw_ostream &OS,
+ raw_ostream &CS,
+ uint32_t Insn,
+ DecodeStatus Result)
+{
+ switch (MI.getOpcode()) {
+ case ARM::HVC: {
+ // HVC is undefined if condition = 0xf otherwise upredictable
+ // if condition != 0xe
+ uint32_t Cond = (Insn >> 28) & 0xF;
+ if (Cond == 0xF)
+ return MCDisassembler::Fail;
+ if (Cond != 0xE)
+ return MCDisassembler::SoftFail;
+ return Result;
+ }
+ default: return Result;
+ }
+}
- uint8_t bytes[4];
+DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address, raw_ostream &OS,
+ raw_ostream &CS) const {
+ CommentStream = &CS;
assert(!(STI.getFeatureBits() & ARM::ModeThumb) &&
- "Asked to disassemble an ARM instruction but Subtarget is in Thumb mode!");
+ "Asked to disassemble an ARM instruction but Subtarget is in Thumb "
+ "mode!");
// We want to read exactly 4 bytes of data.
- if (Region.readBytes(Address, 4, bytes) == -1) {
+ if (Bytes.size() < 4) {
Size = 0;
return MCDisassembler::Fail;
}
// Encoded as a small-endian 32-bit word in the stream.
- uint32_t insn = (bytes[3] << 24) |
- (bytes[2] << 16) |
- (bytes[1] << 8) |
- (bytes[0] << 0);
+ uint32_t Insn =
+ (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
// Calling the auto-generated decoder function.
- DecodeStatus result = decodeInstruction(DecoderTableARM32, MI, insn,
- Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ DecodeStatus Result =
+ decodeInstruction(DecoderTableARM32, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- return result;
+ return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn, Result);
}
// VFP and NEON instructions, similarly, are shared between ARM
// and Thumb modes.
MI.clear();
- result = decodeInstruction(DecoderTableVFP32, MI, insn, Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ Result = decodeInstruction(DecoderTableVFP32, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- return result;
+ return Result;
}
MI.clear();
- result = decodeInstruction(DecoderTableVFPV832, MI, insn, Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ Result = decodeInstruction(DecoderTableVFPV832, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- return result;
+ return Result;
}
MI.clear();
- result = decodeInstruction(DecoderTableNEONData32, MI, insn, Address,
- this, STI);
- if (result != MCDisassembler::Fail) {
+ Result =
+ decodeInstruction(DecoderTableNEONData32, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
// Add a fake predicate operand, because we share these instruction
// definitions with Thumb2 where these instructions are predicable.
if (!DecodePredicateOperand(MI, 0xE, Address, this))
return MCDisassembler::Fail;
- return result;
+ return Result;
}
MI.clear();
- result = decodeInstruction(DecoderTableNEONLoadStore32, MI, insn, Address,
+ Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, Insn, Address,
this, STI);
- if (result != MCDisassembler::Fail) {
+ if (Result != MCDisassembler::Fail) {
Size = 4;
// Add a fake predicate operand, because we share these instruction
// definitions with Thumb2 where these instructions are predicable.
if (!DecodePredicateOperand(MI, 0xE, Address, this))
return MCDisassembler::Fail;
- return result;
+ return Result;
}
MI.clear();
- result = decodeInstruction(DecoderTableNEONDup32, MI, insn, Address,
- this, STI);
- if (result != MCDisassembler::Fail) {
+ Result =
+ decodeInstruction(DecoderTableNEONDup32, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
// Add a fake predicate operand, because we share these instruction
// definitions with Thumb2 where these instructions are predicable.
if (!DecodePredicateOperand(MI, 0xE, Address, this))
return MCDisassembler::Fail;
- return result;
+ return Result;
}
MI.clear();
- result = decodeInstruction(DecoderTablev8NEON32, MI, insn, Address,
- this, STI);
- if (result != MCDisassembler::Fail) {
+ Result =
+ decodeInstruction(DecoderTablev8NEON32, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- return result;
+ return Result;
}
MI.clear();
- result = decodeInstruction(DecoderTablev8Crypto32, MI, insn, Address,
- this, STI);
- if (result != MCDisassembler::Fail) {
+ Result =
+ decodeInstruction(DecoderTablev8Crypto32, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- return result;
+ return Result;
}
MI.clear();
@@ -681,55 +690,53 @@ void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const {
}
DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
- const MemoryObject &Region,
+ ArrayRef<uint8_t> Bytes,
uint64_t Address,
- raw_ostream &os,
- raw_ostream &cs) const {
- CommentStream = &cs;
-
- uint8_t bytes[4];
+ raw_ostream &OS,
+ raw_ostream &CS) const {
+ CommentStream = &CS;
assert((STI.getFeatureBits() & ARM::ModeThumb) &&
"Asked to disassemble in Thumb mode but Subtarget is in ARM mode!");
// We want to read exactly 2 bytes of data.
- if (Region.readBytes(Address, 2, bytes) == -1) {
+ if (Bytes.size() < 2) {
Size = 0;
return MCDisassembler::Fail;
}
- uint16_t insn16 = (bytes[1] << 8) | bytes[0];
- DecodeStatus result = decodeInstruction(DecoderTableThumb16, MI, insn16,
- Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ uint16_t Insn16 = (Bytes[1] << 8) | Bytes[0];
+ DecodeStatus Result =
+ decodeInstruction(DecoderTableThumb16, MI, Insn16, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 2;
- Check(result, AddThumbPredicate(MI));
- return result;
+ Check(Result, AddThumbPredicate(MI));
+ return Result;
}
MI.clear();
- result = decodeInstruction(DecoderTableThumbSBit16, MI, insn16,
- Address, this, STI);
- if (result) {
+ Result = decodeInstruction(DecoderTableThumbSBit16, MI, Insn16, Address, this,
+ STI);
+ if (Result) {
Size = 2;
bool InITBlock = ITBlock.instrInITBlock();
- Check(result, AddThumbPredicate(MI));
+ Check(Result, AddThumbPredicate(MI));
AddThumb1SBit(MI, InITBlock);
- return result;
+ return Result;
}
MI.clear();
- result = decodeInstruction(DecoderTableThumb216, MI, insn16,
- Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ Result =
+ decodeInstruction(DecoderTableThumb216, MI, Insn16, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 2;
// Nested IT blocks are UNPREDICTABLE. Must be checked before we add
// the Thumb predicate.
if (MI.getOpcode() == ARM::t2IT && ITBlock.instrInITBlock())
- result = MCDisassembler::SoftFail;
+ Result = MCDisassembler::SoftFail;
- Check(result, AddThumbPredicate(MI));
+ Check(Result, AddThumbPredicate(MI));
// If we find an IT instruction, we need to parse its condition
// code and mask operands so that we can apply them correctly
@@ -741,115 +748,115 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ITBlock.setITState(Firstcond, Mask);
}
- return result;
+ return Result;
}
// We want to read exactly 4 bytes of data.
- if (Region.readBytes(Address, 4, bytes) == -1) {
+ if (Bytes.size() < 4) {
Size = 0;
return MCDisassembler::Fail;
}
- uint32_t insn32 = (bytes[3] << 8) |
- (bytes[2] << 0) |
- (bytes[1] << 24) |
- (bytes[0] << 16);
+ uint32_t Insn32 =
+ (Bytes[3] << 8) | (Bytes[2] << 0) | (Bytes[1] << 24) | (Bytes[0] << 16);
MI.clear();
- result = decodeInstruction(DecoderTableThumb32, MI, insn32, Address,
- this, STI);
- if (result != MCDisassembler::Fail) {
+ Result =
+ decodeInstruction(DecoderTableThumb32, MI, Insn32, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
bool InITBlock = ITBlock.instrInITBlock();
- Check(result, AddThumbPredicate(MI));
+ Check(Result, AddThumbPredicate(MI));
AddThumb1SBit(MI, InITBlock);
- return result;
+ return Result;
}
MI.clear();
- result = decodeInstruction(DecoderTableThumb232, MI, insn32, Address,
- this, STI);
- if (result != MCDisassembler::Fail) {
+ Result =
+ decodeInstruction(DecoderTableThumb232, MI, Insn32, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- Check(result, AddThumbPredicate(MI));
- return result;
+ Check(Result, AddThumbPredicate(MI));
+ return Result;
}
- if (fieldFromInstruction(insn32, 28, 4) == 0xE) {
+ if (fieldFromInstruction(Insn32, 28, 4) == 0xE) {
MI.clear();
- result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ Result =
+ decodeInstruction(DecoderTableVFP32, MI, Insn32, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
UpdateThumbVFPPredicate(MI);
- return result;
+ return Result;
}
}
MI.clear();
- result = decodeInstruction(DecoderTableVFPV832, MI, insn32, Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ Result =
+ decodeInstruction(DecoderTableVFPV832, MI, Insn32, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- return result;
+ return Result;
}
- if (fieldFromInstruction(insn32, 28, 4) == 0xE) {
+ if (fieldFromInstruction(Insn32, 28, 4) == 0xE) {
MI.clear();
- result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address,
- this, STI);
- if (result != MCDisassembler::Fail) {
+ Result = decodeInstruction(DecoderTableNEONDup32, MI, Insn32, Address, this,
+ STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- Check(result, AddThumbPredicate(MI));
- return result;
+ Check(Result, AddThumbPredicate(MI));
+ return Result;
}
}
- if (fieldFromInstruction(insn32, 24, 8) == 0xF9) {
+ if (fieldFromInstruction(Insn32, 24, 8) == 0xF9) {
MI.clear();
- uint32_t NEONLdStInsn = insn32;
+ uint32_t NEONLdStInsn = Insn32;
NEONLdStInsn &= 0xF0FFFFFF;
NEONLdStInsn |= 0x04000000;
- result = decodeInstruction(DecoderTableNEONLoadStore32, MI, NEONLdStInsn,
+ Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, NEONLdStInsn,
Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- Check(result, AddThumbPredicate(MI));
- return result;
+ Check(Result, AddThumbPredicate(MI));
+ return Result;
}
}
- if (fieldFromInstruction(insn32, 24, 4) == 0xF) {
+ if (fieldFromInstruction(Insn32, 24, 4) == 0xF) {
MI.clear();
- uint32_t NEONDataInsn = insn32;
+ uint32_t NEONDataInsn = Insn32;
NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24
NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
NEONDataInsn |= 0x12000000; // Set bits 28 and 25
- result = decodeInstruction(DecoderTableNEONData32, MI, NEONDataInsn,
+ Result = decodeInstruction(DecoderTableNEONData32, MI, NEONDataInsn,
Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- Check(result, AddThumbPredicate(MI));
- return result;
+ Check(Result, AddThumbPredicate(MI));
+ return Result;
}
MI.clear();
- uint32_t NEONCryptoInsn = insn32;
+ uint32_t NEONCryptoInsn = Insn32;
NEONCryptoInsn &= 0xF0FFFFFF; // Clear bits 27-24
NEONCryptoInsn |= (NEONCryptoInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
NEONCryptoInsn |= 0x12000000; // Set bits 28 and 25
- result = decodeInstruction(DecoderTablev8Crypto32, MI, NEONCryptoInsn,
+ Result = decodeInstruction(DecoderTablev8Crypto32, MI, NEONCryptoInsn,
Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- return result;
+ return Result;
}
MI.clear();
- uint32_t NEONv8Insn = insn32;
+ uint32_t NEONv8Insn = Insn32;
NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26
- result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address,
+ Result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address,
this, STI);
- if (result != MCDisassembler::Fail) {
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- return result;
+ return Result;
}
}
@@ -1015,7 +1022,11 @@ static const uint16_t DPRDecoderTable[] = {
static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
- if (RegNo > 31)
+ uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+ .getFeatureBits();
+ bool hasD16 = featureBits & ARM::FeatureD16;
+
+ if (RegNo > 31 || (hasD16 && RegNo > 15))
return MCDisassembler::Fail;
unsigned Register = DPRDecoderTable[RegNo];
@@ -1122,15 +1133,6 @@ static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
- uint32_t imm = Val & 0xFF;
- uint32_t rot = (Val & 0xF00) >> 7;
- uint32_t rot_imm = (imm >> rot) | (imm << ((32-rot) & 0x1F));
- Inst.addOperand(MCOperand::CreateImm(rot_imm));
- return MCDisassembler::Success;
-}
-
static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2973,11 +2975,9 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
if (size == 0x3) {
if (align == 0)
return MCDisassembler::Fail;
- size = 4;
align = 16;
} else {
if (size == 2) {
- size = 1 << size;
align *= 8;
} else {
size = 1 << size;
@@ -3267,6 +3267,11 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+ .getFeatureBits();
+ bool hasMP = featureBits & ARM::FeatureMP;
+ bool hasV7Ops = featureBits & ARM::HasV7Ops;
+
if (Rn == 15) {
switch (Inst.getOpcode()) {
case ARM::t2LDRBs:
@@ -3302,11 +3307,10 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
case ARM::t2LDRSHs:
return MCDisassembler::Fail;
case ARM::t2LDRHs:
- // FIXME: this instruction is only available with MP extensions,
- // this should be checked first but we don't have access to the
- // feature bits here.
Inst.setOpcode(ARM::t2PLDWs);
break;
+ case ARM::t2LDRSBs:
+ Inst.setOpcode(ARM::t2PLIs);
default:
break;
}
@@ -3314,8 +3318,14 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
switch (Inst.getOpcode()) {
case ARM::t2PLDs:
- case ARM::t2PLDWs:
+ break;
case ARM::t2PLIs:
+ if (!hasV7Ops)
+ return MCDisassembler::Fail;
+ break;
+ case ARM::t2PLDWs:
+ if (!hasV7Ops || !hasMP)
+ return MCDisassembler::Fail;
break;
default:
if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
@@ -3341,6 +3351,12 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
unsigned imm = fieldFromInstruction(Insn, 0, 8);
imm |= (U << 8);
imm |= (Rn << 9);
+ unsigned add = fieldFromInstruction(Insn, 9, 1);
+
+ uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+ .getFeatureBits();
+ bool hasMP = featureBits & ARM::FeatureMP;
+ bool hasV7Ops = featureBits & ARM::HasV7Ops;
if (Rn == 15) {
switch (Inst.getOpcode()) {
@@ -3375,6 +3391,13 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
switch (Inst.getOpcode()) {
case ARM::t2LDRSHi8:
return MCDisassembler::Fail;
+ case ARM::t2LDRHi8:
+ if (!add)
+ Inst.setOpcode(ARM::t2PLDWi8);
+ break;
+ case ARM::t2LDRSBi8:
+ Inst.setOpcode(ARM::t2PLIi8);
+ break;
default:
break;
}
@@ -3382,9 +3405,15 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
switch (Inst.getOpcode()) {
case ARM::t2PLDi8:
+ break;
case ARM::t2PLIi8:
- case ARM::t2PLDWi8:
+ if (!hasV7Ops)
+ return MCDisassembler::Fail;
break;
+ case ARM::t2PLDWi8:
+ if (!hasV7Ops || !hasMP)
+ return MCDisassembler::Fail;
+ break;
default:
if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3404,6 +3433,11 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
unsigned imm = fieldFromInstruction(Insn, 0, 12);
imm |= (Rn << 13);
+ uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+ .getFeatureBits();
+ bool hasMP = (featureBits & ARM::FeatureMP);
+ bool hasV7Ops = (featureBits & ARM::HasV7Ops);
+
if (Rn == 15) {
switch (Inst.getOpcode()) {
case ARM::t2LDRi12:
@@ -3438,7 +3472,10 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
case ARM::t2LDRSHi12:
return MCDisassembler::Fail;
case ARM::t2LDRHi12:
- Inst.setOpcode(ARM::t2PLDi12);
+ Inst.setOpcode(ARM::t2PLDWi12);
+ break;
+ case ARM::t2LDRSBi12:
+ Inst.setOpcode(ARM::t2PLIi12);
break;
default:
break;
@@ -3447,9 +3484,15 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
switch (Inst.getOpcode()) {
case ARM::t2PLDi12:
- case ARM::t2PLDWi12:
+ break;
case ARM::t2PLIi12:
+ if (!hasV7Ops)
+ return MCDisassembler::Fail;
break;
+ case ARM::t2PLDWi12:
+ if (!hasV7Ops || !hasMP)
+ return MCDisassembler::Fail;
+ break;
default:
if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3507,6 +3550,10 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
unsigned U = fieldFromInstruction(Insn, 23, 1);
int imm = fieldFromInstruction(Insn, 0, 12);
+ uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+ .getFeatureBits();
+ bool hasV7Ops = (featureBits & ARM::HasV7Ops);
+
if (Rt == 15) {
switch (Inst.getOpcode()) {
case ARM::t2LDRBpci:
@@ -3525,7 +3572,10 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
switch(Inst.getOpcode()) {
case ARM::t2PLDpci:
+ break;
case ARM::t2PLIpci:
+ if (!hasV7Ops)
+ return MCDisassembler::Fail;
break;
default:
if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
@@ -3974,7 +4024,85 @@ static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- if (!Val) return MCDisassembler::Fail;
+ DecodeStatus S = MCDisassembler::Success;
+ uint64_t FeatureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+ .getFeatureBits();
+ if (FeatureBits & ARM::FeatureMClass) {
+ unsigned ValLow = Val & 0xff;
+
+ // Validate the SYSm value first.
+ switch (ValLow) {
+ case 0: // apsr
+ case 1: // iapsr
+ case 2: // eapsr
+ case 3: // xpsr
+ case 5: // ipsr
+ case 6: // epsr
+ case 7: // iepsr
+ case 8: // msp
+ case 9: // psp
+ case 16: // primask
+ case 20: // control
+ break;
+ case 17: // basepri
+ case 18: // basepri_max
+ case 19: // faultmask
+ if (!(FeatureBits & ARM::HasV7Ops))
+ // Values basepri, basepri_max and faultmask are only valid for v7m.
+ return MCDisassembler::Fail;
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+
+ if (Inst.getOpcode() == ARM::t2MSR_M) {
+ unsigned Mask = fieldFromInstruction(Val, 10, 2);
+ if (!(FeatureBits & ARM::HasV7Ops)) {
+ // The ARMv6-M MSR bits {11-10} can be only 0b10, other values are
+ // unpredictable.
+ if (Mask != 2)
+ S = MCDisassembler::SoftFail;
+ }
+ else {
+ // The ARMv7-M architecture stores an additional 2-bit mask value in
+ // MSR bits {11-10}. The mask is used only with apsr, iapsr, eapsr and
+ // xpsr, it has to be 0b10 in other cases. Bit mask{1} indicates if
+ // the NZCVQ bits should be moved by the instruction. Bit mask{0}
+ // indicates the move for the GE{3:0} bits, the mask{0} bit can be set
+ // only if the processor includes the DSP extension.
+ if (Mask == 0 || (Mask != 2 && ValLow > 3) ||
+ (!(FeatureBits & ARM::FeatureDSPThumb2) && (Mask & 1)))
+ S = MCDisassembler::SoftFail;
+ }
+ }
+ } else {
+ // A/R class
+ if (Val == 0)
+ return MCDisassembler::Fail;
+ }
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return S;
+}
+
+static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+
+ unsigned R = fieldFromInstruction(Val, 5, 1);
+ unsigned SysM = fieldFromInstruction(Val, 0, 5);
+
+ // The table of encodings for these banked registers comes from B9.2.3 of the
+ // ARM ARM. There are patterns, but nothing regular enough to make this logic
+ // neater. So by fiat, these values are UNPREDICTABLE:
+ if (!R) {
+ if (SysM == 0x7 || SysM == 0xf || SysM == 0x18 || SysM == 0x19 ||
+ SysM == 0x1a || SysM == 0x1b)
+ return MCDisassembler::SoftFail;
+ } else {
+ if (SysM != 0xe && SysM != 0x10 && SysM != 0x12 && SysM != 0x14 &&
+ SysM != 0x16 && SysM != 0x1c && SysM != 0x1e)
+ return MCDisassembler::SoftFail;
+ }
+
Inst.addOperand(MCOperand::CreateImm(Val));
return MCDisassembler::Success;
}
diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt
index 52d833893270..a64a8a970c05 100644
--- a/lib/Target/ARM/Disassembler/LLVMBuild.txt
+++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = ARMDisassembler
parent = ARM
-required_libraries = ARMDesc ARMInfo MC Support
+required_libraries = ARMDesc ARMInfo MCDisassembler Support
add_to_library_groups = ARM
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 228fb5756ca8..16eea335261f 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -269,7 +269,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
// expressed as a GPRPair, so we have to manually merge them.
// FIXME: We would really like to be able to tablegen'erate this.
case ARM::LDREXD: case ARM::STREXD:
- case ARM::LDAEXD: case ARM::STLEXD:
+ case ARM::LDAEXD: case ARM::STLEXD: {
const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID);
bool isStore = Opcode == ARM::STREXD || Opcode == ARM::STLEXD;
unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg();
@@ -290,6 +290,23 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
printInstruction(&NewMI, O);
return;
}
+ break;
+ }
+ // B9.3.3 ERET (Thumb)
+ // For a target that has Virtualization Extensions, ERET is the preferred
+ // disassembly of SUBS PC, LR, #0
+ case ARM::t2SUBS_PC_LR: {
+ if (MI->getNumOperands() == 3 &&
+ MI->getOperand(0).isImm() &&
+ MI->getOperand(0).getImm() == 0 &&
+ (getAvailableFeatures() & ARM::FeatureVirtualization)) {
+ O << "\teret";
+ printPredicateOperand(MI, 1, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+ break;
+ }
}
printInstruction(MI, O);
@@ -503,30 +520,6 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
// Addressing Mode #3
//===--------------------------------------------------------------------===//
-void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- const MCOperand &MO1 = MI->getOperand(Op);
- const MCOperand &MO2 = MI->getOperand(Op+1);
- const MCOperand &MO3 = MI->getOperand(Op+2);
-
- O << markup("<mem:") << "[";
- printRegName(O, MO1.getReg());
- O << "], " << markup(">");
-
- if (MO2.getReg()) {
- O << (char)ARM_AM::getAM3Op(MO3.getImm());
- printRegName(O, MO2.getReg());
- return;
- }
-
- unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
- O << markup("<imm:")
- << '#'
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
- << ImmOffs
- << markup(">");
-}
-
void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
raw_ostream &O,
bool AlwaysPrintImm0) {
@@ -568,13 +561,9 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
return;
}
- const MCOperand &MO3 = MI->getOperand(Op+2);
- unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm());
-
- if (IdxMode == ARMII::IndexModePost) {
- printAM3PostIndexOp(MI, Op, O);
- return;
- }
+ assert(ARM_AM::getAM3IdxMode(MI->getOperand(Op + 2).getImm()) !=
+ ARMII::IndexModePost &&
+ "unexpected idxmode");
printAM3PreOrOffsetIndexOp(MI, Op, O, AlwaysPrintImm0);
}
@@ -807,52 +796,56 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
const MCOperand &Op = MI->getOperand(OpNum);
unsigned SpecRegRBit = Op.getImm() >> 4;
unsigned Mask = Op.getImm() & 0xf;
+ uint64_t FeatureBits = getAvailableFeatures();
- if (getAvailableFeatures() & ARM::FeatureMClass) {
+ if (FeatureBits & ARM::FeatureMClass) {
unsigned SYSm = Op.getImm();
unsigned Opcode = MI->getOpcode();
- // For reads of the special registers ignore the "mask encoding" bits
- // which are only for writes.
- if (Opcode == ARM::t2MRS_M)
- SYSm &= 0xff;
+
+ // For writes, handle extended mask bits if the DSP extension is present.
+ if (Opcode == ARM::t2MSR_M && (FeatureBits & ARM::FeatureDSPThumb2)) {
+ switch (SYSm) {
+ case 0x400: O << "apsr_g"; return;
+ case 0xc00: O << "apsr_nzcvqg"; return;
+ case 0x401: O << "iapsr_g"; return;
+ case 0xc01: O << "iapsr_nzcvqg"; return;
+ case 0x402: O << "eapsr_g"; return;
+ case 0xc02: O << "eapsr_nzcvqg"; return;
+ case 0x403: O << "xpsr_g"; return;
+ case 0xc03: O << "xpsr_nzcvqg"; return;
+ }
+ }
+
+ // Handle the basic 8-bit mask.
+ SYSm &= 0xff;
+
+ if (Opcode == ARM::t2MSR_M && (FeatureBits & ARM::HasV7Ops)) {
+ // ARMv7-M deprecates using MSR APSR without a _<bits> qualifier as an
+ // alias for MSR APSR_nzcvq.
+ switch (SYSm) {
+ case 0: O << "apsr_nzcvq"; return;
+ case 1: O << "iapsr_nzcvq"; return;
+ case 2: O << "eapsr_nzcvq"; return;
+ case 3: O << "xpsr_nzcvq"; return;
+ }
+ }
+
switch (SYSm) {
default: llvm_unreachable("Unexpected mask value!");
- case 0:
- case 0x800: O << "apsr"; return; // with _nzcvq bits is an alias for aspr
- case 0x400: O << "apsr_g"; return;
- case 0xc00: O << "apsr_nzcvqg"; return;
- case 1:
- case 0x801: O << "iapsr"; return; // with _nzcvq bits is an alias for iapsr
- case 0x401: O << "iapsr_g"; return;
- case 0xc01: O << "iapsr_nzcvqg"; return;
- case 2:
- case 0x802: O << "eapsr"; return; // with _nzcvq bits is an alias for eapsr
- case 0x402: O << "eapsr_g"; return;
- case 0xc02: O << "eapsr_nzcvqg"; return;
- case 3:
- case 0x803: O << "xpsr"; return; // with _nzcvq bits is an alias for xpsr
- case 0x403: O << "xpsr_g"; return;
- case 0xc03: O << "xpsr_nzcvqg"; return;
- case 5:
- case 0x805: O << "ipsr"; return;
- case 6:
- case 0x806: O << "epsr"; return;
- case 7:
- case 0x807: O << "iepsr"; return;
- case 8:
- case 0x808: O << "msp"; return;
- case 9:
- case 0x809: O << "psp"; return;
- case 0x10:
- case 0x810: O << "primask"; return;
- case 0x11:
- case 0x811: O << "basepri"; return;
- case 0x12:
- case 0x812: O << "basepri_max"; return;
- case 0x13:
- case 0x813: O << "faultmask"; return;
- case 0x14:
- case 0x814: O << "control"; return;
+ case 0: O << "apsr"; return;
+ case 1: O << "iapsr"; return;
+ case 2: O << "eapsr"; return;
+ case 3: O << "xpsr"; return;
+ case 5: O << "ipsr"; return;
+ case 6: O << "epsr"; return;
+ case 7: O << "iepsr"; return;
+ case 8: O << "msp"; return;
+ case 9: O << "psp"; return;
+ case 16: O << "primask"; return;
+ case 17: O << "basepri"; return;
+ case 18: O << "basepri_max"; return;
+ case 19: O << "faultmask"; return;
+ case 20: O << "control"; return;
}
}
@@ -882,6 +875,42 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
}
}
+void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ uint32_t Banked = MI->getOperand(OpNum).getImm();
+ uint32_t R = (Banked & 0x20) >> 5;
+ uint32_t SysM = Banked & 0x1f;
+
+ // Nothing much we can do about this, the encodings are specified in B9.2.3 of
+ // the ARM ARM v7C, and are all over the shop.
+ if (R) {
+ O << "SPSR_";
+
+ switch(SysM) {
+ case 0x0e: O << "fiq"; return;
+ case 0x10: O << "irq"; return;
+ case 0x12: O << "svc"; return;
+ case 0x14: O << "abt"; return;
+ case 0x16: O << "und"; return;
+ case 0x1c: O << "mon"; return;
+ case 0x1e: O << "hyp"; return;
+ default: llvm_unreachable("Invalid banked SPSR register");
+ }
+ }
+
+ assert(!R && "should have dealt with SPSR regs");
+ const char *RegNames[] = {
+ "r8_usr", "r9_usr", "r10_usr", "r11_usr", "r12_usr", "sp_usr", "lr_usr", "",
+ "r8_fiq", "r9_fiq", "r10_fiq", "r11_fiq", "r12_fiq", "sp_fiq", "lr_fiq", "",
+ "lr_irq", "sp_irq", "lr_svc", "sp_svc", "lr_abt", "sp_abt", "lr_und", "sp_und",
+ "", "", "", "", "lr_mon", "sp_mon", "elr_hyp", "sp_hyp"
+ };
+ const char *Name = RegNames[SysM];
+ assert(Name[0] && "invalid banked register operand");
+
+ O << Name;
+}
+
void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
@@ -1289,6 +1318,52 @@ void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
O << markup(">");
}
+void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ MCOperand Op = MI->getOperand(OpNum);
+
+ // Support for fixups (MCFixup)
+ if (Op.isExpr())
+ return printOperand(MI, OpNum, O);
+
+ unsigned Bits = Op.getImm() & 0xFF;
+ unsigned Rot = (Op.getImm() & 0xF00) >> 7;
+
+ bool PrintUnsigned = false;
+ switch (MI->getOpcode()){
+ case ARM::MOVi:
+ // Movs to PC should be treated unsigned
+ PrintUnsigned = (MI->getOperand(OpNum - 1).getReg() == ARM::PC);
+ break;
+ case ARM::MSRi:
+ // Movs to special registers should be treated unsigned
+ PrintUnsigned = true;
+ break;
+ }
+
+ int32_t Rotated = ARM_AM::rotr32(Bits, Rot);
+ if (ARM_AM::getSOImmVal(Rotated) == Op.getImm()) {
+ // #rot has the least possible value
+ O << "#" << markup("<imm:");
+ if (PrintUnsigned)
+ O << static_cast<uint32_t>(Rotated);
+ else
+ O << Rotated;
+ O << markup(">");
+ return;
+ }
+
+ // Explicit #bits, #rot implied
+ O << "#"
+ << markup("<imm:")
+ << Bits
+ << markup(">")
+ << ", #"
+ << markup("<imm:")
+ << Rot
+ << markup(">");
+}
+
void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
O << markup("<imm:")
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index f671fe49bb66..f179e017278e 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMINSTPRINTER_H
-#define ARMINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
+#define LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -51,7 +51,6 @@ public:
void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
- void printAM3PostIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O);
void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O,
bool AlwaysPrintImm0);
void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
@@ -117,6 +116,7 @@ public:
void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printBankedRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
@@ -131,6 +131,7 @@ public:
void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printGPRPairOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index b6c85c2e9466..f0eed9b811d4 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
-#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
@@ -575,6 +575,53 @@ namespace ARM_AM {
return Val;
}
+ // Generic validation for single-byte immediate (0X00, 00X0, etc).
+ static inline bool isNEONBytesplat(unsigned Value, unsigned Size) {
+ assert(Size >= 1 && Size <= 4 && "Invalid size");
+ unsigned count = 0;
+ for (unsigned i = 0; i < Size; ++i) {
+ if (Value & 0xff) count++;
+ Value >>= 8;
+ }
+ return count == 1;
+ }
+
+ /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
+ static inline bool isNEONi16splat(unsigned Value) {
+ if (Value > 0xffff)
+ return false;
+ // i16 value with set bits only in one byte X0 or 0X.
+ return Value == 0 || isNEONBytesplat(Value, 2);
+ }
+
+ // Encode NEON 16 bits Splat immediate for instructions like VBIC/VORR
+ static inline unsigned encodeNEONi16splat(unsigned Value) {
+ assert(isNEONi16splat(Value) && "Invalid NEON splat value");
+ if (Value >= 0x100)
+ Value = (Value >> 8) | 0xa00;
+ else
+ Value |= 0x800;
+ return Value;
+ }
+
+ /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
+ static inline bool isNEONi32splat(unsigned Value) {
+ // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X.
+ return Value == 0 || isNEONBytesplat(Value, 4);
+ }
+
+ /// Encode NEON 32 bits Splat immediate for instructions like VBIC/VORR.
+ static inline unsigned encodeNEONi32splat(unsigned Value) {
+ assert(isNEONi32splat(Value) && "Invalid NEON splat value");
+ if (Value >= 0x100 && Value <= 0xff00)
+ Value = (Value >> 8) | 0x200;
+ else if (Value > 0xffff && Value <= 0xff0000)
+ Value = (Value >> 16) | 0x400;
+ else if (Value > 0xffffff)
+ Value = (Value >> 24) | 0x600;
+ return Value;
+ }
+
AMSubMode getLoadStoreMultipleSubMode(int Opcode);
//===--------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/MCTargetDesc/ARMArchName.h b/lib/Target/ARM/MCTargetDesc/ARMArchName.h
index 34b9fc126ff1..bc056737a82b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMArchName.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMArchName.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMARCHNAME_H
-#define ARMARCHNAME_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMARCHNAME_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMARCHNAME_H
namespace llvm {
namespace ARM {
@@ -24,4 +24,4 @@ enum ArchKind {
} // namespace ARM
} // namespace llvm
-#endif // ARMARCHNAME_H
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 7acd9cc4d302..0b2e3b0e67bb 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -9,6 +9,10 @@
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMAsmBackend.h"
+#include "MCTargetDesc/ARMAsmBackendDarwin.h"
+#include "MCTargetDesc/ARMAsmBackendELF.h"
+#include "MCTargetDesc/ARMAsmBackendWinCOFF.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
#include "llvm/ADT/StringSwitch.h"
@@ -35,164 +39,136 @@ namespace {
class ARMELFObjectWriter : public MCELFObjectTargetWriter {
public:
ARMELFObjectWriter(uint8_t OSABI)
- : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM,
- /*HasRelocationAddend*/ false) {}
+ : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM,
+ /*HasRelocationAddend*/ false) {}
};
-class ARMAsmBackend : public MCAsmBackend {
- const MCSubtargetInfo* STI;
- bool isThumbMode; // Currently emitting Thumb code.
- bool IsLittleEndian; // Big or little endian.
-public:
- ARMAsmBackend(const Target &T, const StringRef TT, bool IsLittle)
- : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")),
- isThumbMode(TT.startswith("thumb")), IsLittleEndian(IsLittle) {}
-
- ~ARMAsmBackend() {
- delete STI;
- }
-
- unsigned getNumFixupKinds() const override {
- return ARM::NumTargetFixupKinds;
- }
-
- bool hasNOP() const {
- return (STI->getFeatureBits() & ARM::HasV6T2Ops) != 0;
- }
+const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo InfosLE[ARM::NumTargetFixupKinds] = {
+ // This table *must* be in the order that the fixup_* kinds are defined in
+ // ARMFixupKinds.h.
+ //
+ // Name Offset (bits) Size (bits) Flags
+ {"fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_ldst_pcrel_12", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_pcrel_10", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_thumb_adr_pcrel_10", 0, 8,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_adr_pcrel_12", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_uncondbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_condbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_cp", 0, 8,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel},
+ // movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16
+ // - 19.
+ {"fixup_arm_movt_hi16", 0, 20, 0},
+ {"fixup_arm_movw_lo16", 0, 20, 0},
+ {"fixup_t2_movt_hi16", 0, 20, 0},
+ {"fixup_t2_movw_lo16", 0, 20, 0},
+ };
+ const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = {
+ // This table *must* be in the order that the fixup_* kinds are defined in
+ // ARMFixupKinds.h.
+ //
+ // Name Offset (bits) Size (bits) Flags
+ {"fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_ldst_pcrel_12", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_pcrel_10", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_thumb_adr_pcrel_10", 8, 8,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_adr_pcrel_12", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_condbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_uncondbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_uncondbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_condbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_blx", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_cp", 8, 8,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_thumb_bcc", 8, 8, MCFixupKindInfo::FKF_IsPCRel},
+ // movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16
+ // - 19.
+ {"fixup_arm_movt_hi16", 12, 20, 0},
+ {"fixup_arm_movw_lo16", 12, 20, 0},
+ {"fixup_t2_movt_hi16", 12, 20, 0},
+ {"fixup_t2_movw_lo16", 12, 20, 0},
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return (IsLittleEndian ? InfosLE : InfosBE)[Kind - FirstTargetFixupKind];
+}
- const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
- const static MCFixupKindInfo InfosLE[ARM::NumTargetFixupKinds] = {
-// This table *must* be in the order that the fixup_* kinds are defined in
-// ARMFixupKinds.h.
-//
-// Name Offset (bits) Size (bits) Flags
-{ "fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_thumb_adr_pcrel_10",0, 8, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_uncondbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_condbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_cp", 0, 8, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
-// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19.
-{ "fixup_arm_movt_hi16", 0, 20, 0 },
-{ "fixup_arm_movw_lo16", 0, 20, 0 },
-{ "fixup_t2_movt_hi16", 0, 20, 0 },
-{ "fixup_t2_movw_lo16", 0, 20, 0 },
- };
- const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = {
-// This table *must* be in the order that the fixup_* kinds are defined in
-// ARMFixupKinds.h.
-//
-// Name Offset (bits) Size (bits) Flags
-{ "fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_thumb_adr_pcrel_10",8, 8, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_condbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_uncondbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_uncondbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_condbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_blx", 8, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_cp", 8, 8, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_thumb_bcc", 8, 8, MCFixupKindInfo::FKF_IsPCRel },
-// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19.
-{ "fixup_arm_movt_hi16", 12, 20, 0 },
-{ "fixup_arm_movw_lo16", 12, 20, 0 },
-{ "fixup_t2_movt_hi16", 12, 20, 0 },
-{ "fixup_t2_movw_lo16", 12, 20, 0 },
- };
-
- if (Kind < FirstTargetFixupKind)
- return MCAsmBackend::getFixupKindInfo(Kind);
-
- assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
- "Invalid kind!");
- return (IsLittleEndian ? InfosLE : InfosBE)[Kind - FirstTargetFixupKind];
- }
-
- /// processFixupValue - Target hook to process the literal value of a fixup
- /// if necessary.
- void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) override;
-
-
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
-
- bool mayNeedRelaxation(const MCInst &Inst) const override;
-
- bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
- const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const override;
-
- void relaxInstruction(const MCInst &Inst, MCInst &Res) const override;
-
- bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
-
- void handleAssemblerFlag(MCAssemblerFlag Flag) override {
- switch (Flag) {
- default: break;
- case MCAF_Code16:
- setIsThumb(true);
- break;
- case MCAF_Code32:
- setIsThumb(false);
- break;
- }
+void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) {
+ switch (Flag) {
+ default:
+ break;
+ case MCAF_Code16:
+ setIsThumb(true);
+ break;
+ case MCAF_Code32:
+ setIsThumb(false);
+ break;
}
-
- unsigned getPointerSize() const { return 4; }
- bool isThumb() const { return isThumbMode; }
- void setIsThumb(bool it) { isThumbMode = it; }
- bool isLittle() const { return IsLittleEndian; }
-};
+}
} // end anonymous namespace
static unsigned getRelaxedOpcode(unsigned Op) {
switch (Op) {
- default: return Op;
- case ARM::tBcc: return ARM::t2Bcc;
- case ARM::tLDRpci: return ARM::t2LDRpci;
- case ARM::tADR: return ARM::t2ADR;
- case ARM::tB: return ARM::t2B;
- case ARM::tCBZ: return ARM::tHINT;
- case ARM::tCBNZ: return ARM::tHINT;
+ default:
+ return Op;
+ case ARM::tBcc:
+ return ARM::t2Bcc;
+ case ARM::tLDRpci:
+ return ARM::t2LDRpci;
+ case ARM::tADR:
+ return ARM::t2ADR;
+ case ARM::tB:
+ return ARM::t2B;
+ case ARM::tCBZ:
+ return ARM::tHINT;
+ case ARM::tCBNZ:
+ return ARM::tHINT;
}
}
@@ -202,8 +178,7 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
return false;
}
-bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
- uint64_t Value,
+bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const {
switch ((unsigned)Fixup.getKind()) {
@@ -265,7 +240,7 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
Res.addOperand(MCOperand::CreateImm(14));
Res.addOperand(MCOperand::CreateReg(0));
return;
- }
+ }
// The rest of instructions we're relaxing have the same operands.
// We just need to update to the proper opcode.
@@ -276,11 +251,11 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8
const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP
- const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0
+ const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0
const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP
if (isThumb()) {
- const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding
- : Thumb1_16bitNopEncoding;
+ const uint16_t nopEncoding =
+ hasNOP() ? Thumb2_16bitNopEncoding : Thumb1_16bitNopEncoding;
uint64_t NumNops = Count / 2;
for (uint64_t i = 0; i != NumNops; ++i)
OW->Write16(nopEncoding);
@@ -289,18 +264,26 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
return true;
}
// ARM mode
- const uint32_t nopEncoding = hasNOP() ? ARMv6T2_NopEncoding
- : ARMv4_NopEncoding;
+ const uint32_t nopEncoding =
+ hasNOP() ? ARMv6T2_NopEncoding : ARMv4_NopEncoding;
uint64_t NumNops = Count / 4;
for (uint64_t i = 0; i != NumNops; ++i)
OW->Write32(nopEncoding);
// FIXME: should this function return false when unable to write exactly
// 'Count' bytes with NOP encodings?
switch (Count % 4) {
- default: break; // No leftover bytes to write
- case 1: OW->Write8(0); break;
- case 2: OW->Write16(0); break;
- case 3: OW->Write16(0); OW->Write8(0xa0); break;
+ default:
+ break; // No leftover bytes to write
+ case 1:
+ OW->Write8(0);
+ break;
+ case 2:
+ OW->Write16(0);
+ break;
+ case 3:
+ OW->Write16(0);
+ OW->Write8(0xa0);
+ break;
}
return true;
@@ -313,8 +296,7 @@ static uint32_t swapHalfWords(uint32_t Value, bool IsLittleEndian) {
uint32_t Swapped = (Value & 0xFFFF0000) >> 16;
Swapped |= (Value & 0x0000FFFF) << 16;
return Swapped;
- }
- else
+ } else
return Value;
}
@@ -351,7 +333,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case ARM::fixup_arm_movt_hi16:
if (!IsPCRel)
Value >>= 16;
- // Fallthrough
+ // Fallthrough
case ARM::fixup_arm_movw_lo16: {
unsigned Hi4 = (Value & 0xF000) >> 12;
unsigned Lo12 = Value & 0x0FFF;
@@ -363,7 +345,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case ARM::fixup_t2_movt_hi16:
if (!IsPCRel)
Value >>= 16;
- // Fallthrough
+ // Fallthrough
case ARM::fixup_t2_movw_lo16: {
unsigned Hi4 = (Value & 0xF000) >> 12;
unsigned i = (Value & 0x800) >> 11;
@@ -379,7 +361,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case ARM::fixup_arm_ldst_pcrel_12:
// ARM PC-relative values are offset by 8.
Value -= 4;
- // FALLTHROUGH
+ // FALLTHROUGH
case ARM::fixup_t2_ldst_pcrel_12: {
// Offset by 4, adjusted by two due to the half-word ordering of thumb.
Value -= 4;
@@ -438,7 +420,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case ARM::fixup_arm_blx:
// These values don't encode the low two bits since they're always zero.
// Offset by 8 just as above.
- if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Fixup.getValue()))
+ if (const MCSymbolRefExpr *SRE =
+ dyn_cast<MCSymbolRefExpr>(Fixup.getValue()))
if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_TLSCALL)
return 0;
return 0xffffff & ((Value - 8) >> 2);
@@ -447,17 +430,17 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Value >>= 1; // Low bit is not encoded.
uint32_t out = 0;
- bool I = Value & 0x800000;
+ bool I = Value & 0x800000;
bool J1 = Value & 0x400000;
bool J2 = Value & 0x200000;
J1 ^= I;
J2 ^= I;
- out |= I << 26; // S bit
- out |= !J1 << 13; // J1 bit
- out |= !J2 << 11; // J2 bit
- out |= (Value & 0x1FF800) << 5; // imm6 field
- out |= (Value & 0x0007FF); // imm11 field
+ out |= I << 26; // S bit
+ out |= !J1 << 13; // J1 bit
+ out |= !J2 << 11; // J2 bit
+ out |= (Value & 0x1FF800) << 5; // imm6 field
+ out |= (Value & 0x0007FF); // imm11 field
return swapHalfWords(out, IsLittleEndian);
}
@@ -498,7 +481,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits);
uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
- (uint16_t)imm11Bits);
+ (uint16_t)imm11Bits);
return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian);
}
case ARM::fixup_arm_thumb_blx: {
@@ -515,7 +498,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Note that the halfwords are stored high first, low second; so we need
// to transpose the fixup value here to map properly.
uint32_t offset = (Value - 2) >> 2;
- if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Fixup.getValue()))
+ if (const MCSymbolRefExpr *SRE =
+ dyn_cast<MCSymbolRefExpr>(Fixup.getValue()))
if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_TLSCALL)
offset = 0;
uint32_t signBit = (offset & 0x400000) >> 22;
@@ -528,7 +512,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits);
uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
- ((uint16_t)imm10LBits) << 1);
+ ((uint16_t)imm10LBits) << 1);
return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian);
}
case ARM::fixup_arm_thumb_cp:
@@ -564,7 +548,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case ARM::fixup_arm_pcrel_10:
Value = Value - 4; // ARM fixups offset by an additional word and don't
// need to adjust for the half-word ordering.
- // Fall through.
+ // Fall through.
case ARM::fixup_t2_pcrel_10: {
// Offset by 4, adjusted by two due to the half-word ordering of thumb.
Value = Value - 4;
@@ -735,7 +719,8 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
bool IsPCRel) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
Value = adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian);
- if (!Value) return; // Doesn't change encoding.
+ if (!Value)
+ return; // Doesn't change encoding.
unsigned Offset = Fixup.getOffset();
assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
@@ -757,80 +742,36 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
}
}
-namespace {
-// FIXME: This should be in a separate file.
-class ARMWinCOFFAsmBackend : public ARMAsmBackend {
-public:
- ARMWinCOFFAsmBackend(const Target &T, const StringRef &Triple)
- : ARMAsmBackend(T, Triple, true) { }
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
- return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false);
- }
-};
-
-// FIXME: This should be in a separate file.
-// ELF is an ELF of course...
-class ELFARMAsmBackend : public ARMAsmBackend {
-public:
- uint8_t OSABI;
- ELFARMAsmBackend(const Target &T, const StringRef TT,
- uint8_t OSABI, bool IsLittle)
- : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) { }
-
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
- return createARMELFObjectWriter(OS, OSABI, isLittle());
- }
-};
-
-// FIXME: This should be in a separate file.
-class DarwinARMAsmBackend : public ARMAsmBackend {
-public:
- const MachO::CPUSubTypeARM Subtype;
- DarwinARMAsmBackend(const Target &T, const StringRef TT,
- MachO::CPUSubTypeARM st)
- : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), Subtype(st) {
- HasDataInCodeSupport = true;
- }
-
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
- return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
- MachO::CPU_TYPE_ARM,
- Subtype);
- }
-};
-
-} // end anonymous namespace
-
MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
- const MCRegisterInfo &MRI,
- StringRef TT, StringRef CPU,
- bool isLittle) {
+ const MCRegisterInfo &MRI, StringRef TT,
+ StringRef CPU, bool isLittle) {
Triple TheTriple(TT);
switch (TheTriple.getObjectFormat()) {
- default: llvm_unreachable("unsupported object format");
+ default:
+ llvm_unreachable("unsupported object format");
case Triple::MachO: {
MachO::CPUSubTypeARM CS =
- StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName())
- .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T)
- .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ)
- .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6)
- .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M)
- .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM)
- .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K)
- .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M)
- .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S)
- .Default(MachO::CPU_SUBTYPE_ARM_V7);
-
- return new DarwinARMAsmBackend(T, TT, CS);
+ StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName())
+ .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T)
+ .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ)
+ .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6)
+ .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M)
+ .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM)
+ .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K)
+ .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M)
+ .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S)
+ .Default(MachO::CPU_SUBTYPE_ARM_V7);
+
+ return new ARMAsmBackendDarwin(T, TT, CS);
}
case Triple::COFF:
assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
- return new ARMWinCOFFAsmBackend(T, TT);
+ return new ARMAsmBackendWinCOFF(T, TT);
case Triple::ELF:
assert(TheTriple.isOSBinFormatELF() && "using ELF for non-ELF target");
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
- return new ELFARMAsmBackend(T, TT, OSABI, isLittle);
+ return new ARMAsmBackendELF(T, TT, OSABI, isLittle);
}
}
@@ -847,14 +788,13 @@ MCAsmBackend *llvm::createARMBEAsmBackend(const Target &T,
}
MCAsmBackend *llvm::createThumbLEAsmBackend(const Target &T,
- const MCRegisterInfo &MRI,
- StringRef TT, StringRef CPU) {
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU) {
return createARMAsmBackend(T, MRI, TT, CPU, true);
}
MCAsmBackend *llvm::createThumbBEAsmBackend(const Target &T,
- const MCRegisterInfo &MRI,
- StringRef TT, StringRef CPU) {
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU) {
return createARMAsmBackend(T, MRI, TT, CPU, false);
}
-
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
new file mode 100644
index 000000000000..f4f10821037e
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -0,0 +1,69 @@
+//===-- ARMAsmBackend.h - ARM Assembler Backend -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKEND_H
+#define LLVM_LIB_TARGET_ARM_ARMASMBACKEND_H
+
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class ARMAsmBackend : public MCAsmBackend {
+ const MCSubtargetInfo *STI;
+ bool isThumbMode; // Currently emitting Thumb code.
+ bool IsLittleEndian; // Big or little endian.
+public:
+ ARMAsmBackend(const Target &T, StringRef TT, bool IsLittle)
+ : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")),
+ isThumbMode(TT.startswith("thumb")), IsLittleEndian(IsLittle) {}
+
+ ~ARMAsmBackend() override { delete STI; }
+
+ unsigned getNumFixupKinds() const override {
+ return ARM::NumTargetFixupKinds;
+ }
+
+ bool hasNOP() const { return (STI->getFeatureBits() & ARM::HasV6T2Ops) != 0; }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
+
+ /// processFixupValue - Target hook to process the literal value of a fixup
+ /// if necessary.
+ void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFixup &Fixup, const MCFragment *DF,
+ const MCValue &Target, uint64_t &Value,
+ bool &IsResolved) override;
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value, bool IsPCRel) const override;
+
+ bool mayNeedRelaxation(const MCInst &Inst) const override;
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override;
+
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const override;
+
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
+
+ void handleAssemblerFlag(MCAssemblerFlag Flag) override;
+
+ unsigned getPointerSize() const { return 4; }
+ bool isThumb() const { return isThumbMode; }
+ void setIsThumb(bool it) { isThumbMode = it; }
+ bool isLittle() const { return IsLittleEndian; }
+};
+} // end anonymous namespace
+
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
new file mode 100644
index 000000000000..3bd7ab73839a
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -0,0 +1,33 @@
+//===-- ARMAsmBackendDarwin.h ARM Asm Backend Darwin ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H
+#define LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H
+
+#include "llvm/Support/MachO.h"
+
+using namespace llvm;
+
+namespace {
+class ARMAsmBackendDarwin : public ARMAsmBackend {
+public:
+ const MachO::CPUSubTypeARM Subtype;
+ ARMAsmBackendDarwin(const Target &T, StringRef TT, MachO::CPUSubTypeARM st)
+ : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), Subtype(st) {
+ HasDataInCodeSupport = true;
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM,
+ Subtype);
+ }
+};
+}
+
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
new file mode 100644
index 000000000000..4efd32515426
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@@ -0,0 +1,27 @@
+//===-- ARMAsmBackendELF.h ARM Asm Backend ELF -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ELFARMASMBACKEND_H
+#define LLVM_LIB_TARGET_ARM_ELFARMASMBACKEND_H
+
+using namespace llvm;
+namespace {
+class ARMAsmBackendELF : public ARMAsmBackend {
+public:
+ uint8_t OSABI;
+ ARMAsmBackendELF(const Target &T, StringRef TT, uint8_t OSABI, bool IsLittle)
+ : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ return createARMELFObjectWriter(OS, OSABI, isLittle());
+ }
+};
+}
+
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
new file mode 100644
index 000000000000..33be347b03ac
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
@@ -0,0 +1,26 @@
+//===-- ARMAsmBackendWinCOFF.h - ARM Asm Backend WinCOFF --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKENDWINCOFF_H
+#define LLVM_LIB_TARGET_ARM_ARMASMBACKENDWINCOFF_H
+
+using namespace llvm;
+
+namespace {
+class ARMAsmBackendWinCOFF : public ARMAsmBackend {
+public:
+ ARMAsmBackendWinCOFF(const Target &T, StringRef Triple)
+ : ARMAsmBackend(T, Triple, true) {}
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false);
+ }
+};
+}
+
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 1686d76b8e1b..4289a73e9d6b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -14,8 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMBASEINFO_H
-#define ARMBASEINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMBASEINFO_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMBASEINFO_H
#include "ARMMCTargetDesc.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index a86601b6bb5b..a821a6b0b532 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -51,7 +51,7 @@ ARMELFObjectWriter::~ARMELFObjectWriter() {}
bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
unsigned Type) const {
- // FIXME: This is extremelly conservative. This really needs to use a
+ // FIXME: This is extremely conservative. This really needs to use a
// whitelist with a clear explanation for why each realocation needs to
// point to the symbol, not to the section.
switch (Type) {
@@ -102,7 +102,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case ARM::fixup_arm_uncondbl:
switch (Modifier) {
case MCSymbolRefExpr::VK_PLT:
- Type = ELF::R_ARM_PLT32;
+ Type = ELF::R_ARM_CALL;
break;
case MCSymbolRefExpr::VK_ARM_TLSCALL:
Type = ELF::R_ARM_TLS_CALL;
@@ -148,6 +148,22 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
} else {
switch ((unsigned)Fixup.getKind()) {
default: llvm_unreachable("invalid fixup kind!");
+ case FK_Data_1:
+ switch (Modifier) {
+ default: llvm_unreachable("unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_ABS8;
+ break;
+ }
+ break;
+ case FK_Data_2:
+ switch (Modifier) {
+ default: llvm_unreachable("unsupported modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_ABS16;
+ break;
+ }
+ break;
case FK_Data_4:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
@@ -184,6 +200,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_ARM_PREL31:
Type = ELF::R_ARM_PREL31;
break;
+ case MCSymbolRefExpr::VK_ARM_SBREL:
+ Type = ELF::R_ARM_SBREL32;
+ break;
case MCSymbolRefExpr::VK_ARM_TLSLDO:
Type = ELF::R_ARM_TLS_LDO32;
break;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 7b5d8b01dfe6..99b5c628f506 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -300,7 +300,19 @@ private:
StringRef StringValue;
static bool LessTag(const AttributeItem &LHS, const AttributeItem &RHS) {
- return (LHS.Tag < RHS.Tag);
+ // The conformance tag must be emitted first when serialised
+ // into an object file. Specifically, the addenda to the ARM ABI
+ // states that (2.3.7.4):
+ //
+ // "To simplify recognition by consumers in the common case of
+ // claiming conformity for the whole file, this tag should be
+ // emitted first in a file-scope sub-subsection of the first
+ // public subsection of the attributes section."
+ //
+ // So it is special-cased in this comparison predicate when the
+ // attributes are sorted in finishAttributeSection().
+ return (RHS.Tag != ARMBuildAttrs::conformance) &&
+ ((LHS.Tag == ARMBuildAttrs::conformance) || (LHS.Tag < RHS.Tag));
}
};
@@ -541,6 +553,10 @@ public:
/// necessary.
void EmitValueImpl(const MCExpr *Value, unsigned Size,
const SMLoc &Loc) override {
+ if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value))
+ if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4))
+ getContext().FatalError(Loc, "relocated expression must be 32-bit");
+
EmitDataMappingSymbol();
MCELFStreamer::EmitValueImpl(Value, Size);
}
@@ -848,6 +864,14 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
/* OverwriteExisting= */ false);
break;
+ // FPV5_D16 is identical to FP_ARMV8 except for the number of D registers, so
+ // uses the FP_ARMV8_D16 build attribute.
+ case ARM::FPV5_D16:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPARMv8B,
+ /* OverwriteExisting= */ false);
+ break;
+
case ARM::NEON:
setAttributeItem(ARMBuildAttrs::FP_arch,
ARMBuildAttrs::AllowFPv3A,
@@ -971,12 +995,12 @@ void ARMTargetELFStreamer::finishAttributeSection() {
Streamer.EmitULEB128IntValue(item.IntValue);
break;
case AttributeItem::TextAttribute:
- Streamer.EmitBytes(item.StringValue.upper());
+ Streamer.EmitBytes(item.StringValue);
Streamer.EmitIntValue(0, 1); // '\0'
break;
case AttributeItem::NumericAndTextAttributes:
Streamer.EmitULEB128IntValue(item.IntValue);
- Streamer.EmitBytes(item.StringValue.upper());
+ Streamer.EmitBytes(item.StringValue);
Streamer.EmitIntValue(0, 1); // '\0'
break;
}
@@ -1339,10 +1363,9 @@ MCStreamer *createARMNullStreamer(MCContext &Ctx) {
return S;
}
- MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll, bool NoExecStack,
- bool IsThumb) {
+MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool IsThumb) {
ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb);
new ARMTargetELFStreamer(*S);
// FIXME: This should eventually end up somewhere else where more
@@ -1352,8 +1375,6 @@ MCStreamer *createARMNullStreamer(MCContext &Ctx) {
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
- if (NoExecStack)
- S->getAssembler().setNoExecStack(true);
return S;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index bfd9e3364825..46ba57170db5 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ARM_ARMFIXUPKINDS_H
-#define LLVM_ARM_ARMFIXUPKINDS_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMFIXUPKINDS_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMFIXUPKINDS_H
#include "llvm/MC/MCFixup.h"
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 7a19208cffc7..66a1618c370a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "ARMMCAsmInfo.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -55,7 +55,6 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) {
Code16Directive = ".code\t16";
Code32Directive = ".code\t32";
- HasLEB128 = true;
SupportsDebugInformation = true;
// Exceptions handling
@@ -90,6 +89,7 @@ ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() {
AlignmentIsInBytes = false;
PrivateGlobalPrefix = "$M";
+ PrivateLabelPrefix = "$M";
}
void ARMCOFFMCAsmInfoGNU::anchor() { }
@@ -102,8 +102,8 @@ ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() {
Code16Directive = ".code\t16";
Code32Directive = ".code\t32";
PrivateGlobalPrefix = ".L";
+ PrivateLabelPrefix = ".L";
- HasLEB128 = true;
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::None;
UseParensForSymbolVariant = true;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index 51cfa0adc1a9..6cb471537f6e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ARMTARGETASMINFO_H
-#define LLVM_ARMTARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCASMINFO_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCASMINFO_H
#include "llvm/MC/MCAsmInfoCOFF.h"
#include "llvm/MC/MCAsmInfoDarwin.h"
@@ -21,7 +21,8 @@
namespace llvm {
class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
- void anchor() override;
+ virtual void anchor();
+
public:
explicit ARMMCAsmInfoDarwin(StringRef TT);
};
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index b8ee55574972..7320f4053f7f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -304,6 +304,28 @@ public:
return Binary;
}
+ unsigned getModImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &ST) const {
+ const MCOperand &MO = MI.getOperand(Op);
+
+ // Support for fixups (MCFixup)
+ if (MO.isExpr()) {
+ const MCExpr *Expr = MO.getExpr();
+ // In instruction code this value always encoded as lowest 12 bits,
+ // so we don't have to perform any specific adjustments.
+ // Due to requirements of relocatable records we have to use FK_Data_4.
+ // See ARMELFObjectWriter::ExplicitRelSym and
+ // ARMELFObjectWriter::GetRelocTypeInner for more details.
+ MCFixupKind Kind = MCFixupKind(FK_Data_4);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+ return 0;
+ }
+
+ // Immediate is already in its encoded format
+ return MO.getImm();
+ }
+
/// getT2SOImmOpValue - Return an encoded 12-bit shifted-immediate value.
unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index e545e3c2f301..68d32b27fd7d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -35,12 +35,6 @@ void ARMMCExpr::PrintImpl(raw_ostream &OS) const {
OS << ')';
}
-bool
-ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const {
- return false;
-}
-
void ARMMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
Streamer.visitUsedExpr(*getSubExpr());
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index c5c0b10f8ad9..06bf6c97a4f1 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMMCEXPR_H
-#define ARMMCEXPR_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCEXPR_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCEXPR_H
#include "llvm/MC/MCExpr.h"
@@ -58,8 +58,11 @@ public:
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const override;
- void visitUsedExpr(MCStreamer &Streamer) const override;
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override {
+ return false;
+ }
+ void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 6a3ec8fcc486..a6310e5093bc 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -64,10 +64,60 @@ static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
}
static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
- std::string &Info) {
- if (STI.getFeatureBits() & llvm::ARM::HasV8Ops &&
- MI.getOperand(1).isImm() && MI.getOperand(1).getImm() != 8) {
- Info = "applying IT instruction to more than one subsequent instruction is deprecated";
+ std::string &Info) {
+ if (STI.getFeatureBits() & llvm::ARM::HasV8Ops && MI.getOperand(1).isImm() &&
+ MI.getOperand(1).getImm() != 8) {
+ Info = "applying IT instruction to more than one subsequent instruction is "
+ "deprecated";
+ return true;
+ }
+
+ return false;
+}
+
+static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+ std::string &Info) {
+ assert((~STI.getFeatureBits() & llvm::ARM::ModeThumb) &&
+ "cannot predicate thumb instructions");
+
+ assert(MI.getNumOperands() >= 4 && "expected >= 4 arguments");
+ for (unsigned OI = 4, OE = MI.getNumOperands(); OI < OE; ++OI) {
+ assert(MI.getOperand(OI).isReg() && "expected register");
+ if (MI.getOperand(OI).getReg() == ARM::SP ||
+ MI.getOperand(OI).getReg() == ARM::PC) {
+ Info = "use of SP or PC in the list is deprecated";
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool getARMLoadDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+ std::string &Info) {
+ assert((~STI.getFeatureBits() & llvm::ARM::ModeThumb) &&
+ "cannot predicate thumb instructions");
+
+ assert(MI.getNumOperands() >= 4 && "expected >= 4 arguments");
+ bool ListContainsPC = false, ListContainsLR = false;
+ for (unsigned OI = 4, OE = MI.getNumOperands(); OI < OE; ++OI) {
+ assert(MI.getOperand(OI).isReg() && "expected register");
+ switch (MI.getOperand(OI).getReg()) {
+ default:
+ break;
+ case ARM::LR:
+ ListContainsLR = true;
+ break;
+ case ARM::PC:
+ ListContainsPC = true;
+ break;
+ case ARM::SP:
+ Info = "use of SP in the list is deprecated";
+ return true;
+ }
+ }
+
+ if (ListContainsPC && ListContainsLR) {
+ Info = "use of LR and PC simultaneously in the list is deprecated";
return true;
}
@@ -90,6 +140,8 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
bool NoCPU = CPU == "generic" || CPU.empty();
std::string ARMArchFeature;
switch (triple.getSubArch()) {
+ default:
+ llvm_unreachable("invalid sub-architecture for ARM");
case Triple::ARMSubArch_v8:
if (NoCPU)
// v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
@@ -215,31 +267,14 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
Triple TheTriple(TT);
MCAsmInfo *MAI;
- switch (TheTriple.getOS()) {
- case llvm::Triple::Darwin:
- case llvm::Triple::IOS:
- case llvm::Triple::MacOSX:
+ if (TheTriple.isOSDarwin() || TheTriple.isOSBinFormatMachO())
MAI = new ARMMCAsmInfoDarwin(TT);
- break;
- case llvm::Triple::Win32:
- switch (TheTriple.getEnvironment()) {
- case llvm::Triple::Itanium:
- MAI = new ARMCOFFMCAsmInfoGNU();
- break;
- case llvm::Triple::MSVC:
- MAI = new ARMCOFFMCAsmInfoMicrosoft();
- break;
- default:
- llvm_unreachable("invalid environment");
- }
- break;
- default:
- if (TheTriple.isOSBinFormatMachO())
- MAI = new ARMMCAsmInfoDarwin(TT);
- else
- MAI = new ARMELFMCAsmInfo(TT);
- break;
- }
+ else if (TheTriple.isWindowsItaniumEnvironment())
+ MAI = new ARMCOFFMCAsmInfoGNU();
+ else if (TheTriple.isWindowsMSVCEnvironment())
+ MAI = new ARMCOFFMCAsmInfoMicrosoft();
+ else
+ MAI = new ARMELFMCAsmInfo(TT);
unsigned Reg = MRI.getDwarfRegNum(ARM::SP, true);
MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(nullptr, Reg, 0));
@@ -263,11 +298,8 @@ static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
// This is duplicated code. Refactor this.
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &OS,
- MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI,
- bool RelaxAll,
- bool NoExecStack) {
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ const MCSubtargetInfo &STI, bool RelaxAll) {
Triple TheTriple(TT);
switch (TheTriple.getObjectFormat()) {
@@ -281,7 +313,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
return createARMWinCOFFStreamer(Ctx, MAB, *Emitter, OS);
case Triple::ELF:
- return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack,
+ return createARMELFStreamer(Ctx, MAB, OS, Emitter, false,
TheTriple.getArch() == Triple::thumb);
}
}
@@ -356,8 +388,10 @@ extern "C" void LLVMInitializeARMTargetMC() {
// Register the MC codegen info.
TargetRegistry::RegisterMCCodeGenInfo(TheARMLETarget, createARMMCCodeGenInfo);
TargetRegistry::RegisterMCCodeGenInfo(TheARMBETarget, createARMMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheThumbLETarget, createARMMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheThumbBETarget, createARMMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheThumbLETarget,
+ createARMMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheThumbBETarget,
+ createARMMCCodeGenInfo);
// Register the MC instruction info.
TargetRegistry::RegisterMCInstrInfo(TheARMLETarget, createARMMCInstrInfo);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 5326e564f363..a6c20d5f94d6 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMMCTARGETDESC_H
-#define ARMMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCTARGETDESC_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCTARGETDESC_H
#include "llvm/Support/DataTypes.h"
#include <string>
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 186776a1944f..3187d36f7519 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -54,10 +54,10 @@ public:
: MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
/*UseAggressiveSymbolFolding=*/true) {}
- void RecordRelocation(MachObjectWriter *Writer,
- const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFragment *Fragment, const MCFixup &Fixup,
- MCValue Target, uint64_t &FixedValue) override;
+ void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
+ const MCAsmLayout &Layout, const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) override;
};
}
@@ -232,7 +232,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
(IsPCRel << 30) |
MachO::R_SCATTERED);
MRE.r_word1 = Value2;
- Writer->addRelocation(Fragment->getParent(), MRE);
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
}
MachO::any_relocation_info MRE;
@@ -243,7 +243,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
(IsPCRel << 30) |
MachO::R_SCATTERED);
MRE.r_word1 = Value;
- Writer->addRelocation(Fragment->getParent(), MRE);
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
}
void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
@@ -297,7 +297,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
(IsPCRel << 30) |
MachO::R_SCATTERED);
MRE.r_word1 = Value2;
- Writer->addRelocation(Fragment->getParent(), MRE);
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
}
MachO::any_relocation_info MRE;
@@ -307,7 +307,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
(IsPCRel << 30) |
MachO::R_SCATTERED);
MRE.r_word1 = Value;
- Writer->addRelocation(Fragment->getParent(), MRE);
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
}
bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
@@ -351,11 +351,10 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
}
void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
- const MCAssembler &Asm,
+ MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
- const MCFixup &Fixup,
- MCValue Target,
+ const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
unsigned Log2Size;
@@ -401,8 +400,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
// See <reloc.h>.
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
unsigned Index = 0;
- unsigned IsExtern = 0;
unsigned Type = 0;
+ const MCSymbolData *RelSymbol = nullptr;
if (Target.isAbsolute()) { // constant
// FIXME!
@@ -422,13 +421,12 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
// Check whether we need an external or internal relocation.
if (requiresExternRelocation(Writer, Asm, *Fragment, RelocType, SD,
FixedValue)) {
- IsExtern = 1;
- Index = SD->getIndex();
+ RelSymbol = SD;
// For external relocations, make sure to offset the fixup value to
// compensate for the addend of the symbol address, if it was
// undefined. This occurs with weak definitions, for example.
- if (!SD->Symbol->isUndefined())
+ if (!SD->getSymbol().isUndefined())
FixedValue -= Layout.getSymbolOffset(SD);
} else {
// The index is the section ordinal (1-based).
@@ -447,11 +445,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
// struct relocation_info (8 bytes)
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
- MRE.r_word1 = ((Index << 0) |
- (IsPCRel << 24) |
- (Log2Size << 25) |
- (IsExtern << 27) |
- (Type << 28));
+ MRE.r_word1 =
+ (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
// Even when it's not a scattered relocation, movw/movt always uses
// a PAIR relocation.
@@ -476,10 +471,10 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
(Log2Size << 25) |
(MachO::ARM_RELOC_PAIR << 28));
- Writer->addRelocation(Fragment->getParent(), MREPair);
+ Writer->addRelocation(nullptr, Fragment->getParent(), MREPair);
}
- Writer->addRelocation(Fragment->getParent(), MRE);
+ Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
}
MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
index cd5875924300..e0c113ecfaa3 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARM_UNWIND_OP_ASM_H
-#define ARM_UNWIND_OP_ASM_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/ARMEHABI.h"
@@ -90,4 +90,4 @@ private:
} // namespace llvm
-#endif // ARM_UNWIND_OP_ASM_H
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
index 2a7fe6188b50..db8fc925edb8 100644
--- a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = ARMDesc
parent = ARM
-required_libraries = ARMAsmPrinter ARMInfo MC Support
+required_libraries = ARMAsmPrinter ARMInfo MC MCDisassembler Support
add_to_library_groups = ARM
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index f6d24e9e4f93..35fe9b3342d1 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -378,8 +378,8 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
}
bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
- TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
- TRI = Fn.getTarget().getRegisterInfo();
+ TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
+ TRI = Fn.getSubtarget().getRegisterInfo();
MRI = &Fn.getRegInfo();
const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
isLikeA9 = STI->isLikeA9() || STI->isSwift();
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index f069535ff3c0..c1601a3f29dd 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -15,7 +15,7 @@ TARGET = ARM
BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \
ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
ARMGenDAGISel.inc ARMGenSubtargetInfo.inc \
- ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
+ ARMGenCallingConv.inc \
ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \
ARMGenMCPseudoLowering.inc ARMGenDisassemblerTables.inc
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index baa97a7c479a..13f935877d54 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -52,9 +52,9 @@ void Thumb1FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const Thumb1InstrInfo &TII =
- *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
- const Thumb1RegisterInfo *RegInfo =
- static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
if (!hasReservedCallFrame(MF)) {
// If we have alloca, convert as follows:
// ADJCALLSTACKDOWN -> sub, sp, sp, amount
@@ -89,12 +89,15 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const Thumb1RegisterInfo *RegInfo =
- static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
const Thumb1InstrInfo &TII =
- *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo());
- unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned Align = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
unsigned NumBytes = MFI->getStackSize();
assert(NumBytes >= ArgRegsSaveSize &&
@@ -121,7 +124,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
if (!AFI->hasStackFrame()) {
@@ -132,7 +136,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
return;
}
@@ -196,7 +201,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
E = CSI.end(); I != E; ++I) {
@@ -223,7 +229,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI)));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
break;
}
}
@@ -241,13 +248,15 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
} else {
unsigned CFIIndex =
MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(
nullptr, MRI->getDwarfRegNum(FramePtr, true)));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
if (NumBytes > 508)
// If offset is > 508 then sp cannot be adjusted in a single instruction,
@@ -264,7 +273,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
}
@@ -321,12 +331,15 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const Thumb1RegisterInfo *RegInfo =
- static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
const Thumb1InstrInfo &TII =
- *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo());
- unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned Align = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
int NumBytes = (int)MFI->getStackSize();
assert((unsigned)NumBytes >= ArgRegsSaveSize &&
@@ -382,28 +395,65 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
}
}
- if (ArgRegsSaveSize) {
- // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
- // to LR, and we can't pop the value directly to the PC since
- // we need to update the SP after popping the value. Therefore, we
- // pop the old LR into R3 as a temporary.
-
+ bool IsV4PopReturn = false;
+ for (const CalleeSavedInfo &CSI : MFI->getCalleeSavedInfo())
+ if (CSI.getReg() == ARM::LR)
+ IsV4PopReturn = true;
+ IsV4PopReturn &= STI.hasV4TOps() && !STI.hasV5TOps();
+
+ // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
+ // to LR, and we can't pop the value directly to the PC since
+ // we need to update the SP after popping the value. So instead
+ // we have to emit:
+ // POP {r3}
+ // ADD sp, #offset
+ // BX r3
+ // If this would clobber a return value, then generate this sequence instead:
+ // MOV ip, r3
+ // POP {r3}
+ // ADD sp, #offset
+ // MOV lr, r3
+ // MOV r3, ip
+ // BX lr
+ if (ArgRegsSaveSize || IsV4PopReturn) {
// Get the last instruction, tBX_RET
MBBI = MBB.getLastNonDebugInstr();
assert (MBBI->getOpcode() == ARM::tBX_RET);
- // Epilogue for vararg functions: pop LR to R3 and branch off it.
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
- .addReg(ARM::R3, RegState::Define);
-
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
-
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
- .addReg(ARM::R3, RegState::Kill);
- AddDefaultPred(MIB);
- MIB.copyImplicitOps(&*MBBI);
- // erase the old tBX_RET instruction
- MBB.erase(MBBI);
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ if (AFI->getReturnRegsCount() <= 3) {
+ // Epilogue: pop saved LR to R3 and branch off it.
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+ .addReg(ARM::R3, RegState::Define);
+
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
+
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX))
+ .addReg(ARM::R3, RegState::Kill);
+ AddDefaultPred(MIB);
+ MIB.copyImplicitOps(&*MBBI);
+ // erase the old tBX_RET instruction
+ MBB.erase(MBBI);
+ } else {
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(ARM::R12, RegState::Define)
+ .addReg(ARM::R3, RegState::Kill));
+
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+ .addReg(ARM::R3, RegState::Define);
+
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
+
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(ARM::LR, RegState::Define)
+ .addReg(ARM::R3, RegState::Kill));
+
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(ARM::R3, RegState::Define)
+ .addReg(ARM::R12, RegState::Kill));
+ // Keep the tBX_RET instruction
+ }
}
}
@@ -417,7 +467,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
DebugLoc DL;
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
if (MI != MBB.end()) DL = MI->getDebugLoc();
@@ -456,7 +506,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
bool isVarArg = AFI->getArgRegsSaveSize() > 0;
DebugLoc DL = MI->getDebugLoc();
@@ -470,6 +520,9 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
// Special epilogue for vararg functions. See emitEpilogue
if (isVarArg)
continue;
+ // ARMv4T requires BX, see emitEpilogue
+ if (STI.hasV4TOps() && !STI.hasV5TOps())
+ continue;
Reg = ARM::PC;
(*MIB).setDesc(TII.get(ARM::tPOP_RET));
MIB.copyImplicitOps(&*MI);
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index a227f8ece73f..b785b2823dae 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ARM_THUMB1FRAMELOWERING_H
-#define LLVM_ARM_THUMB1FRAMELOWERING_H
+#ifndef LLVM_LIB_TARGET_ARM_THUMB1FRAMELOWERING_H
+#define LLVM_LIB_TARGET_ARM_THUMB1FRAMELOWERING_H
#include "ARMFrameLowering.h"
#include "Thumb1InstrInfo.h"
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 68cbb5c580df..8ea912e27039 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMSubtarget.h"
#include "Thumb1InstrInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -41,10 +42,30 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc)));
+ // Need to check the arch.
+ MachineFunction &MF = *MBB.getParent();
+ const ARMSubtarget &st = MF.getTarget().getSubtarget<ARMSubtarget>();
+
assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
"Thumb1 can only copy GPR registers");
+
+ if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg)
+ || !ARM::tGPRRegClass.contains(DestReg))
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc)));
+ else {
+ // FIXME: The performance consequences of this are going to be atrocious.
+ // Some things to try that should be better:
+ // * 'mov hi, $src; mov $dst, hi', with hi as either r10 or r11
+ // * 'movs $dst, $src' if cpsr isn't live
+ // See: http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-August/075998.html
+
+ // 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPUSH)))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPOP)))
+ .addReg(DestReg, getDefRegState(true));
+ }
}
void Thumb1InstrInfo::
@@ -101,3 +122,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
}
}
+
+void
+Thumb1InstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const {
+ if (RM == Reloc::PIC_)
+ expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::tLDRi, RM);
+ else
+ expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_abs, ARM::tLDRi, RM);
+}
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index c5845b75e587..9fba76052a11 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef THUMB1INSTRUCTIONINFO_H
-#define THUMB1INSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_THUMB1INSTRINFO_H
+#define LLVM_LIB_TARGET_ARM_THUMB1INSTRINFO_H
#include "ARMBaseInstrInfo.h"
#include "Thumb1RegisterInfo.h"
@@ -54,7 +54,10 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
+private:
+ void expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const override;
};
}
-#endif // THUMB1INSTRUCTIONINFO_H
+#endif
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index f907b143ef1b..928c8e3c0098 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -66,8 +66,12 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
int Val,
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned MIFlags) const {
+ assert((isARMLowRegister(DestReg) ||
+ isVirtualRegister(DestReg)) &&
+ "Thumb1 does not have ldr to high register");
+
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C = ConstantInt::get(
Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
@@ -106,15 +110,15 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
NumBytes = -NumBytes;
}
unsigned LdReg = DestReg;
- if (DestReg == ARM::SP) {
+ if (DestReg == ARM::SP)
assert(BaseReg == ARM::SP && "Unexpected!");
+ if (!isARMLowRegister(DestReg) && !MRI.isVirtualRegister(DestReg))
LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
- }
- if (NumBytes <= 255 && NumBytes >= 0)
+ if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) {
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
.addImm(NumBytes).setMIFlags(MIFlags);
- else if (NumBytes < 0 && NumBytes >= -255) {
+ } else if (NumBytes < 0 && NumBytes >= -255 && CanChangeCC) {
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
.addImm(NumBytes).setMIFlags(MIFlags);
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
@@ -124,7 +128,8 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
ARMCC::AL, 0, MIFlags);
// Emit add / sub.
- int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
+ int Opc = (isSub) ? ARM::tSUBrr : ((isHigh || !CanChangeCC) ? ARM::tADDhirr
+ : ARM::tADDrr);
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
if (Opc != ARM::tADDhirr)
@@ -136,32 +141,10 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
AddDefaultPred(MIB);
}
-/// calcNumMI - Returns the number of instructions required to materialize
-/// the specific add / sub r, c instruction.
-static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
- unsigned NumBits, unsigned Scale) {
- unsigned NumMIs = 0;
- unsigned Chunk = ((1 << NumBits) - 1) * Scale;
-
- if (Opc == ARM::tADDrSPi) {
- unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
- Bytes -= ThisVal;
- NumMIs++;
- NumBits = 8;
- Scale = 1; // Followed by a number of tADDi8.
- Chunk = ((1 << NumBits) - 1) * Scale;
- }
-
- NumMIs += Bytes / Chunk;
- if ((Bytes % Chunk) != 0)
- NumMIs++;
- if (ExtraOpc)
- NumMIs++;
- return NumMIs;
-}
-
/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
-/// a destreg = basereg + immediate in Thumb code.
+/// a destreg = basereg + immediate in Thumb code. Tries a series of ADDs or
+/// SUBs first, and uses a constant pool value if the instruction sequence would
+/// be too long. This is allowed to modify the condition flags.
void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
DebugLoc dl,
@@ -172,151 +155,145 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
bool isSub = NumBytes < 0;
unsigned Bytes = (unsigned)NumBytes;
if (isSub) Bytes = -NumBytes;
- bool isMul4 = (Bytes & 3) == 0;
- bool isTwoAddr = false;
- bool DstNotEqBase = false;
- unsigned NumBits = 1;
- unsigned Scale = 1;
- int Opc = 0;
+
+ int CopyOpc = 0;
+ unsigned CopyBits = 0;
+ unsigned CopyScale = 1;
+ bool CopyNeedsCC = false;
int ExtraOpc = 0;
- bool NeedCC = false;
-
- if (DestReg == BaseReg && BaseReg == ARM::SP) {
- assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
- NumBits = 7;
- Scale = 4;
- Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
- isTwoAddr = true;
- } else if (!isSub && BaseReg == ARM::SP) {
- // r1 = add sp, 403
- // =>
- // r1 = add sp, 100 * 4
- // r1 = add r1, 3
- if (!isMul4) {
- Bytes &= ~3;
- ExtraOpc = ARM::tADDi3;
+ unsigned ExtraBits = 0;
+ unsigned ExtraScale = 1;
+ bool ExtraNeedsCC = false;
+
+ // Strategy:
+ // We need to select two types of instruction, maximizing the available
+ // immediate range of each. The instructions we use will depend on whether
+ // DestReg and BaseReg are low, high or the stack pointer.
+ // * CopyOpc - DestReg = BaseReg + imm
+ // This will be emitted once if DestReg != BaseReg, and never if
+ // DestReg == BaseReg.
+ // * ExtraOpc - DestReg = DestReg + imm
+ // This will be emitted as many times as necessary to add the
+ // full immediate.
+ // If the immediate ranges of these instructions are not large enough to cover
+ // NumBytes with a reasonable number of instructions, we fall back to using a
+ // value loaded from a constant pool.
+ if (DestReg == ARM::SP) {
+ if (BaseReg == ARM::SP) {
+ // sp -> sp
+ // Already in right reg, no copy needed
+ } else {
+ // low -> sp or high -> sp
+ CopyOpc = ARM::tMOVr;
+ CopyBits = 0;
}
- NumBits = 8;
- Scale = 4;
- Opc = ARM::tADDrSPi;
- } else {
- // sp = sub sp, c
- // r1 = sub sp, c
- // r8 = sub sp, c
- if (DestReg != BaseReg)
- DstNotEqBase = true;
- NumBits = 8;
- if (DestReg == ARM::SP) {
- Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
- assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
- NumBits = 7;
- Scale = 4;
+ ExtraOpc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ ExtraBits = 7;
+ ExtraScale = 4;
+ } else if (isARMLowRegister(DestReg)) {
+ if (BaseReg == ARM::SP) {
+ // sp -> low
+ assert(!isSub && "Thumb1 does not have tSUBrSPi");
+ CopyOpc = ARM::tADDrSPi;
+ CopyBits = 8;
+ CopyScale = 4;
+ } else if (DestReg == BaseReg) {
+ // low -> same low
+ // Already in right reg, no copy needed
+ } else if (isARMLowRegister(BaseReg)) {
+ // low -> different low
+ CopyOpc = isSub ? ARM::tSUBi3 : ARM::tADDi3;
+ CopyBits = 3;
+ CopyNeedsCC = true;
} else {
- Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
- NumBits = 8;
- NeedCC = true;
+ // high -> low
+ CopyOpc = ARM::tMOVr;
+ CopyBits = 0;
}
- isTwoAddr = true;
+ ExtraOpc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+ ExtraBits = 8;
+ ExtraNeedsCC = true;
+ } else /* DestReg is high */ {
+ if (DestReg == BaseReg) {
+ // high -> same high
+ // Already in right reg, no copy needed
+ } else {
+ // {low,high,sp} -> high
+ CopyOpc = ARM::tMOVr;
+ CopyBits = 0;
+ }
+ ExtraOpc = 0;
}
- unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
+ // We could handle an unaligned immediate with an unaligned copy instruction
+ // and an aligned extra instruction, but this case is not currently needed.
+ assert(((Bytes & 3) == 0 || ExtraScale == 1) &&
+ "Unaligned offset, but all instructions require alignment");
+
+ unsigned CopyRange = ((1 << CopyBits) - 1) * CopyScale;
+ // If we would emit the copy with an immediate of 0, just use tMOVr.
+ if (CopyOpc && Bytes < CopyScale) {
+ CopyOpc = ARM::tMOVr;
+ CopyScale = 1;
+ CopyNeedsCC = false;
+ CopyRange = 0;
+ }
+ unsigned ExtraRange = ((1 << ExtraBits) - 1) * ExtraScale; // per instruction
+ unsigned RequiredCopyInstrs = CopyOpc ? 1 : 0;
+ unsigned RangeAfterCopy = (CopyRange > Bytes) ? 0 : (Bytes - CopyRange);
+
+ // We could handle this case when the copy instruction does not require an
+ // aligned immediate, but we do not currently do this.
+ assert(RangeAfterCopy % ExtraScale == 0 &&
+ "Extra instruction requires immediate to be aligned");
+
+ unsigned RequiredExtraInstrs;
+ if (ExtraRange)
+ RequiredExtraInstrs = RoundUpToAlignment(RangeAfterCopy, ExtraRange) / ExtraRange;
+ else if (RangeAfterCopy > 0)
+ // We need an extra instruction but none is available
+ RequiredExtraInstrs = 1000000;
+ else
+ RequiredExtraInstrs = 0;
+ unsigned RequiredInstrs = RequiredCopyInstrs + RequiredExtraInstrs;
unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
- if (NumMIs > Threshold) {
- // This will expand into too many instructions. Load the immediate from a
- // constpool entry.
+
+ // Use a constant pool, if the sequence of ADDs/SUBs is too expensive.
+ if (RequiredInstrs > Threshold) {
emitThumbRegPlusImmInReg(MBB, MBBI, dl,
DestReg, BaseReg, NumBytes, true,
TII, MRI, MIFlags);
return;
}
- if (DstNotEqBase) {
- if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) {
- // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
- unsigned Chunk = (1 << 3) - 1;
- unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
- Bytes -= ThisVal;
- const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
- const MachineInstrBuilder MIB =
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)
- .setMIFlags(MIFlags));
- AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
- } else {
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
- .addReg(BaseReg, RegState::Kill))
- .setMIFlags(MIFlags);
+ // Emit zero or one copy instructions
+ if (CopyOpc) {
+ unsigned CopyImm = std::min(Bytes, CopyRange) / CopyScale;
+ Bytes -= CopyImm * CopyScale;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(CopyOpc), DestReg);
+ if (CopyNeedsCC)
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(BaseReg, RegState::Kill);
+ if (CopyOpc != ARM::tMOVr) {
+ MIB.addImm(CopyImm);
}
+ AddDefaultPred(MIB.setMIFlags(MIFlags));
+
BaseReg = DestReg;
}
- unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+ // Emit zero or more in-place add/sub instructions
while (Bytes) {
- unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
- Bytes -= ThisVal;
- ThisVal /= Scale;
- // Build the new tADD / tSUB.
- if (isTwoAddr) {
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
- if (NeedCC)
- MIB = AddDefaultT1CC(MIB);
- MIB.addReg(DestReg).addImm(ThisVal);
- MIB = AddDefaultPred(MIB);
- MIB.setMIFlags(MIFlags);
- } else {
- bool isKill = BaseReg != ARM::SP;
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
- if (NeedCC)
- MIB = AddDefaultT1CC(MIB);
- MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
- MIB = AddDefaultPred(MIB);
- MIB.setMIFlags(MIFlags);
-
- BaseReg = DestReg;
- if (Opc == ARM::tADDrSPi) {
- // r4 = add sp, imm
- // r4 = add r4, imm
- // ...
- NumBits = 8;
- Scale = 1;
- Chunk = ((1 << NumBits) - 1) * Scale;
- Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
- NeedCC = isTwoAddr = true;
- }
- }
- }
-
- if (ExtraOpc) {
- const MCInstrDesc &MCID = TII.get(ExtraOpc);
- AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
- .addReg(DestReg, RegState::Kill)
- .addImm(((unsigned)NumBytes) & 3)
- .setMIFlags(MIFlags));
- }
-}
+ unsigned ExtraImm = std::min(Bytes, ExtraRange) / ExtraScale;
+ Bytes -= ExtraImm * ExtraScale;
-/// emitThumbConstant - Emit a series of instructions to materialize a
-/// constant.
-static void emitThumbConstant(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- unsigned DestReg, int Imm,
- const TargetInstrInfo &TII,
- const Thumb1RegisterInfo& MRI,
- DebugLoc dl) {
- bool isSub = Imm < 0;
- if (isSub) Imm = -Imm;
-
- int Chunk = (1 << 8) - 1;
- int ThisVal = (Imm > Chunk) ? Chunk : Imm;
- Imm -= ThisVal;
- AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8),
- DestReg))
- .addImm(ThisVal));
- if (Imm > 0)
- emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI);
- if (isSub) {
- const MCInstrDesc &MCID = TII.get(ARM::tRSB);
- AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
- .addReg(DestReg, RegState::Kill));
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg);
+ if (ExtraNeedsCC)
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(BaseReg).addImm(ExtraImm);
+ MIB = AddDefaultPred(MIB);
+ MIB.setMIFlags(MIFlags);
}
}
@@ -352,86 +329,13 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
const MCInstrDesc &Desc = MI.getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
- if (Opcode == ARM::tADDrSPi) {
+ if (Opcode == ARM::tADDframe) {
Offset += MI.getOperand(FrameRegIdx+1).getImm();
-
- // Can't use tADDrSPi if it's based off the frame pointer.
- unsigned NumBits = 0;
- unsigned Scale = 1;
- if (FrameReg != ARM::SP) {
- Opcode = ARM::tADDi3;
- NumBits = 3;
- } else {
- NumBits = 8;
- Scale = 4;
- assert((Offset & 3) == 0 &&
- "Thumb add/sub sp, #imm immediate must be multiple of 4!");
- }
-
- unsigned PredReg;
- if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
- // Turn it into a move.
- MI.setDesc(TII.get(ARM::tMOVr));
- MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
- // Remove offset
- MI.RemoveOperand(FrameRegIdx+1);
- return true;
- }
-
- // Common case: small offset, fits into instruction.
- unsigned Mask = (1 << NumBits) - 1;
- if (((Offset / Scale) & ~Mask) == 0) {
- // Replace the FrameIndex with sp / fp
- if (Opcode == ARM::tADDi3) {
- MI.setDesc(TII.get(Opcode));
- removeOperands(MI, FrameRegIdx);
- AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
- .addImm(Offset / Scale));
- } else {
- MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
- MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale);
- }
- return true;
- }
-
unsigned DestReg = MI.getOperand(0).getReg();
- unsigned Bytes = (Offset > 0) ? Offset : -Offset;
- unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
- // MI would expand into a large number of instructions. Don't try to
- // simplify the immediate.
- if (NumMIs > 2) {
- emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII,
- *this);
- MBB.erase(II);
- return true;
- }
- if (Offset > 0) {
- // Translate r0 = add sp, imm to
- // r0 = add sp, 255*4
- // r0 = add r0, (imm - 255*4)
- if (Opcode == ARM::tADDi3) {
- MI.setDesc(TII.get(Opcode));
- removeOperands(MI, FrameRegIdx);
- AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
- } else {
- MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
- MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask);
- }
- Offset = (Offset - Mask * Scale);
- MachineBasicBlock::iterator NII = std::next(II);
- emitThumbRegPlusImmediate(MBB, NII, dl, DestReg, DestReg, Offset, TII,
- *this);
- } else {
- // Translate r0 = add sp, -imm to
- // r0 = -imm (this is then translated into a series of instructions)
- // r0 = add r0, sp
- emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
-
- MI.setDesc(TII.get(ARM::tADDhirr));
- MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true);
- MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false);
- }
+ emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII,
+ *this);
+ MBB.erase(II);
return true;
} else {
if (AddrMode != ARMII::AddrModeT1_s)
@@ -485,8 +389,11 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
int64_t Offset) const {
const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(
- MI.getParent()->getParent()->getTarget().getInstrInfo());
+ *static_cast<const ARMBaseInstrInfo *>(MI.getParent()
+ ->getParent()
+ ->getTarget()
+ .getSubtargetImpl()
+ ->getInstrInfo());
int Off = Offset; // ARM doesn't need the general 64-bit offsets
unsigned i = 0;
@@ -512,7 +419,7 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
// off the frame pointer (if, for example, there are alloca() calls in
// the function, the offset will be negative. Use R12 instead since that's
// a call clobbered register that we know won't be used in Thumb1 mode.
- const TargetInstrInfo &TII = *MBB.getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo();
DebugLoc DL;
AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
.addReg(ARM::R12, RegState::Define)
@@ -559,7 +466,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc dl = MI.getDebugLoc();
MachineInstrBuilder MIB(*MBB.getParent(), &MI);
@@ -570,7 +477,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MF.getFrameInfo()->getStackSize() + SPAdj;
if (MF.getFrameInfo()->hasVarSizedObjects()) {
- assert(SPAdj == 0 && MF.getTarget().getFrameLowering()->hasFP(MF) &&
+ assert(SPAdj == 0 && MF.getSubtarget().getFrameLowering()->hasFP(MF) &&
"Unexpected");
// There are alloca()'s in this function, must reference off the frame
// pointer or base pointer instead.
@@ -587,7 +494,10 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// when !hasReservedCallFrame().
#ifndef NDEBUG
if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){
- assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) &&
+ assert(MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->hasReservedCallFrame(MF) &&
"Cannot use SP to access the emergency spill slot in "
"functions without a reserved call frame");
assert(!MF.getFrameInfo()->hasVarSizedObjects() &&
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 0c0abbe99042..5feaf525396e 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef THUMB1REGISTERINFO_H
-#define THUMB1REGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_THUMB1REGISTERINFO_H
+#define LLVM_LIB_TARGET_ARM_THUMB1REGISTERINFO_H
#include "ARMBaseRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -60,4 +60,4 @@ public:
};
}
-#endif // THUMB1REGISTERINFO_H
+#endif
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index edb9ff3a24f2..fdcb522a9144 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -188,7 +188,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
true/*isImp*/, false/*isKill*/));
MachineInstr *LastITMI = MI;
- MachineBasicBlock::iterator InsertPos = MIB;
+ MachineBasicBlock::iterator InsertPos = MIB.getInstr();
++MBBI;
// Form IT block.
@@ -255,8 +255,9 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
const TargetMachine &TM = Fn.getTarget();
AFI = Fn.getInfo<ARMFunctionInfo>();
- TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
- TRI = TM.getRegisterInfo();
+ TII = static_cast<const Thumb2InstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
+ TRI = TM.getSubtargetImpl()->getRegisterInfo();
restrictIT = TM.getSubtarget<ARMSubtarget>().restrictIT();
if (!AFI->isThumbFunction())
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index a9df006fb8c1..91973e1c463e 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -209,6 +209,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
}
+void
+Thumb2InstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const {
+ if (RM == Reloc::PIC_)
+ expandLoadStackGuardBase(MI, ARM::t2MOV_ga_pcrel, ARM::t2LDRi12, RM);
+ else
+ expandLoadStackGuardBase(MI, ARM::t2MOVi32imm, ARM::t2LDRi12, RM);
+}
+
void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, DebugLoc dl,
unsigned DestReg, unsigned BaseReg, int NumBytes,
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 34d45d307ff4..46a1f6d600a7 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef THUMB2INSTRUCTIONINFO_H
-#define THUMB2INSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_THUMB2INSTRINFO_H
+#define LLVM_LIB_TARGET_ARM_THUMB2INSTRINFO_H
#include "ARMBaseInstrInfo.h"
#include "Thumb2RegisterInfo.h"
@@ -61,6 +61,10 @@ public:
/// always be able to get register info as well (through this method).
///
const Thumb2RegisterInfo &getRegisterInfo() const override { return RI; }
+
+private:
+ void expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const override;
};
/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical
@@ -71,4 +75,4 @@ ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
}
-#endif // THUMB2INSTRUCTIONINFO_H
+#endif
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 782d81fd424d..0d5d85a00614 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -40,7 +40,7 @@ Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C = ConstantInt::get(
Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
index 8a33e6cea919..1dd94cc5027d 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef THUMB2REGISTERINFO_H
-#define THUMB2REGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H
+#define LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H
#include "ARMBaseRegisterInfo.h"
@@ -35,4 +35,4 @@ public:
};
}
-#endif // THUMB2REGISTERINFO_H
+#endif
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 09debe76f1b6..c51eb8bedb95 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -335,7 +335,7 @@ static bool VerifyLowRegs(MachineInstr *MI) {
bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA ||
Opc == ARM::t2LDMDB || Opc == ARM::t2LDMIA_UPD ||
Opc == ARM::t2LDMDB_UPD);
- bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD);
+ bool isLROk = (Opc == ARM::t2STMDB_UPD);
bool isSPOk = isPCOk || isLROk;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
@@ -384,7 +384,6 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
if (MI->getOperand(1).getReg() == ARM::SP) {
Opc = Entry.NarrowOpc2;
ImmLimit = Entry.Imm2Limit;
- HasOffReg = false;
}
Scale = 4;
@@ -1003,7 +1002,8 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
const TargetMachine &TM = MF.getTarget();
- TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+ TII = static_cast<const Thumb2InstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
STI = &TM.getSubtarget<ARMSubtarget>();
// Optimizing / minimizing size?
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 06a74d720810..c61805b63f5e 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -1,7 +1,6 @@
add_llvm_library(LLVMTarget
Target.cpp
TargetIntrinsicInfo.cpp
- TargetJITInfo.cpp
TargetLibraryInfo.cpp
TargetLoweringObjectFile.cpp
TargetMachine.cpp
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 673ade78a947..4bae7f817543 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -11,29 +11,35 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CPPTARGETMACHINE_H
-#define CPPTARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_CPPBACKEND_CPPTARGETMACHINE_H
+#define LLVM_LIB_TARGET_CPPBACKEND_CPPTARGETMACHINE_H
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
namespace llvm {
class formatted_raw_ostream;
+class CPPSubtarget : public TargetSubtargetInfo {
+};
+
struct CPPTargetMachine : public TargetMachine {
CPPTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : TargetMachine(T, TT, CPU, FS, Options) {}
+ : TargetMachine(T, TT, CPU, FS, Options), Subtarget() {}
+private:
+ CPPSubtarget Subtarget;
+public:
+ const CPPSubtarget *getSubtargetImpl() const override { return &Subtarget; }
bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out,
CodeGenFileType FileType, bool DisableVerify,
AnalysisID StartAfter,
AnalysisID StopAfter) override;
-
- const DataLayout *getDataLayout() const override { return nullptr; }
};
extern Target TheCppBackendTarget;
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index 81b0e5680547..af7914f30366 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -1,28 +1,32 @@
set(LLVM_TARGET_DEFINITIONS Hexagon.td)
-tablegen(LLVM HexagonGenRegisterInfo.inc -gen-register-info)
-tablegen(LLVM HexagonGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM HexagonGenAsmWriter.inc -gen-asm-writer)
-tablegen(LLVM HexagonGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM HexagonGenCallingConv.inc -gen-callingconv)
-tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM HexagonGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM HexagonGenDFAPacketizer.inc -gen-dfa-packetizer)
+tablegen(LLVM HexagonGenDisassemblerTables.inc -gen-disassembler)
+tablegen(LLVM HexagonGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM HexagonGenMCCodeEmitter.inc -gen-emitter)
+tablegen(LLVM HexagonGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(HexagonCommonTableGen)
add_llvm_target(HexagonCodeGen
HexagonAsmPrinter.cpp
HexagonCallingConvLower.cpp
HexagonCFGOptimizer.cpp
+ HexagonCopyToCombine.cpp
HexagonExpandPredSpillCode.cpp
+ HexagonFixupHwLoops.cpp
HexagonFrameLowering.cpp
HexagonHardwareLoops.cpp
- HexagonFixupHwLoops.cpp
- HexagonMachineFunctionInfo.cpp
- HexagonMachineScheduler.cpp
- HexagonMCInstLower.cpp
HexagonInstrInfo.cpp
HexagonISelDAGToDAG.cpp
HexagonISelLowering.cpp
+ HexagonMachineFunctionInfo.cpp
+ HexagonMachineScheduler.cpp
+ HexagonMCInstLower.cpp
+ HexagonNewValueJump.cpp
HexagonPeephole.cpp
HexagonRegisterInfo.cpp
HexagonRemoveSZExtArgs.cpp
@@ -33,11 +37,8 @@ add_llvm_target(HexagonCodeGen
HexagonTargetMachine.cpp
HexagonTargetObjectFile.cpp
HexagonVLIWPacketizer.cpp
- HexagonNewValueJump.cpp
- HexagonCopyToCombine.cpp
)
add_subdirectory(TargetInfo)
-add_subdirectory(InstPrinter)
add_subdirectory(MCTargetDesc)
-
+add_subdirectory(Disassembler)
diff --git a/lib/Target/Hexagon/Disassembler/CMakeLists.txt b/lib/Target/Hexagon/Disassembler/CMakeLists.txt
new file mode 100644
index 000000000000..755a45e2df8b
--- /dev/null
+++ b/lib/Target/Hexagon/Disassembler/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMHexagonDisassembler
+ HexagonDisassembler.cpp
+ )
diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
new file mode 100644
index 000000000000..44f9d93e8fc7
--- /dev/null
+++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -0,0 +1,181 @@
+//===-- HexagonDisassembler.cpp - Disassembler for Hexagon ISA ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCInst.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include <array>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-disassembler"
+
+// Pull DecodeStatus and its enum values into the global namespace.
+typedef llvm::MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+/// \brief Hexagon disassembler for all Hexagon platforms.
+class HexagonDisassembler : public MCDisassembler {
+public:
+ HexagonDisassembler(MCSubtargetInfo const &STI, MCContext &Ctx)
+ : MCDisassembler(STI, Ctx) {}
+
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
+};
+}
+
+static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+
+static const uint16_t IntRegDecoderTable[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9,
+ Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14,
+ Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
+ Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24,
+ Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29,
+ Hexagon::R30, Hexagon::R31 };
+
+static const uint16_t PredRegDecoderTable[] = { Hexagon::P0, Hexagon::P1,
+Hexagon::P2, Hexagon::P3 };
+
+static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
+ const uint16_t Table[], size_t Size) {
+ if (RegNo < Size) {
+ Inst.addOperand(MCOperand::CreateReg(Table[RegNo]));
+ return MCDisassembler::Success;
+ }
+ else
+ return MCDisassembler::Fail;
+}
+
+static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ void const *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Register = IntRegDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/, const void *Decoder) {
+ static const uint16_t CtrlRegDecoderTable[] = {
+ Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1,
+ Hexagon::P3_0, Hexagon::NoRegister, Hexagon::C6, Hexagon::C7,
+ Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP,
+ Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPCH
+ };
+
+ if (RegNo >= sizeof(CtrlRegDecoderTable) / sizeof(CtrlRegDecoderTable[0]))
+ return MCDisassembler::Fail;
+
+ if (CtrlRegDecoderTable[RegNo] == Hexagon::NoRegister)
+ return MCDisassembler::Fail;
+
+ unsigned Register = CtrlRegDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/, const void *Decoder) {
+ unsigned Register = 0;
+ switch (RegNo) {
+ case 0:
+ Register = Hexagon::M0;
+ break;
+ case 1:
+ Register = Hexagon::M1;
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/, const void *Decoder) {
+ static const uint16_t DoubleRegDecoderTable[] = {
+ Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3,
+ Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7,
+ Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11,
+ Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15
+ };
+
+ return (DecodeRegisterClass(Inst, RegNo >> 1,
+ DoubleRegDecoderTable,
+ sizeof (DoubleRegDecoderTable)));
+}
+
+static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ void const *Decoder) {
+ if (RegNo > 3)
+ return MCDisassembler::Fail;
+
+ unsigned Register = PredRegDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+#include "HexagonGenDisassemblerTables.inc"
+
+static MCDisassembler *createHexagonDisassembler(Target const &T,
+ MCSubtargetInfo const &STI,
+ MCContext &Ctx) {
+ return new HexagonDisassembler(STI, Ctx);
+}
+
+extern "C" void LLVMInitializeHexagonDisassembler() {
+ TargetRegistry::RegisterMCDisassembler(TheHexagonTarget,
+ createHexagonDisassembler);
+}
+
+DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &os,
+ raw_ostream &cs) const {
+ Size = 4;
+ if (Bytes.size() < 4)
+ return MCDisassembler::Fail;
+
+ uint32_t insn =
+ llvm::support::endian::read<uint32_t, llvm::support::little,
+ llvm::support::unaligned>(Bytes.data());
+
+ // Remove parse bits.
+ insn &= ~static_cast<uint32_t>(HexagonII::InstParseBits::INST_PARSE_MASK);
+ DecodeStatus Result = decodeInstruction(DecoderTable32, MI, insn, Address, this, STI);
+ HexagonMCInst::AppendImplicitOperands(MI);
+ return Result;
+}
diff --git a/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt b/lib/Target/Hexagon/Disassembler/LLVMBuild.txt
index 59849aa7e1c7..43bace75a852 100644
--- a/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/Hexagon/Disassembler/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/Hexagon/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
+;===-- ./lib/Target/Hexagon/Disassembler/LLVMBuild.txt ---------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
@@ -17,7 +17,7 @@
[component_0]
type = Library
-name = HexagonAsmPrinter
+name = HexagonDisassembler
parent = Hexagon
-required_libraries = HexagonDesc MC Support
+required_libraries = HexagonDesc HexagonInfo MCDisassembler Support
add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/InstPrinter/Makefile b/lib/Target/Hexagon/Disassembler/Makefile
index 20331d8807ec..16c305fe4074 100644
--- a/lib/Target/Hexagon/InstPrinter/Makefile
+++ b/lib/Target/Hexagon/Disassembler/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/Hexagon/InstPrinter/Makefile ----------------------------===##
+##===-- lib/Target/Hexagon/Disassembler/Makefile -----------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
@@ -6,10 +6,11 @@
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
+
LEVEL = ../../../..
-LIBRARYNAME = LLVMHexagonAsmPrinter
+LIBRARYNAME = LLVMHexagonDisassembler
-# Hack: we need to include 'main' Hexagon target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index 5467ee361257..64ae69c60e5d 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef TARGET_Hexagon_H
-#define TARGET_Hexagon_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGON_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGON_H
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/Target/TargetLowering.h"
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
index 5f4a6c64f703..2068c6da4e48 100644
--- a/lib/Target/Hexagon/Hexagon.td
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -50,6 +50,8 @@ def IEEERndNearV5T : Predicate<"Subtarget.modeIEEERndNear()">;
//===----------------------------------------------------------------------===//
// Classes used for relation maps.
//===----------------------------------------------------------------------===//
+
+class ImmRegShl;
// PredRel - Filter class used to relate non-predicated instructions with their
// predicated forms.
class PredRel;
@@ -137,7 +139,7 @@ def getPredOldOpcode : InstrMapping {
//
def getNewValueOpcode : InstrMapping {
let FilterClass = "NewValueRel";
- let RowFields = ["BaseOpcode", "PredSense", "PNewValue"];
+ let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode"];
let ColFields = ["NValueST"];
let KeyCol = ["false"];
let ValueCols = [["true"]];
@@ -149,7 +151,7 @@ def getNewValueOpcode : InstrMapping {
//
def getNonNVStore : InstrMapping {
let FilterClass = "NewValueRel";
- let RowFields = ["BaseOpcode", "PredSense", "PNewValue"];
+ let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode"];
let ColFields = ["NValueST"];
let KeyCol = ["true"];
let ValueCols = [["false"]];
@@ -180,6 +182,14 @@ def getRegForm : InstrMapping {
let ValueCols = [["reg"]];
}
+def getRegShlForm : InstrMapping {
+ let FilterClass = "ImmRegShl";
+ let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"];
+ let ColFields = ["InputType"];
+ let KeyCol = ["imm"];
+ let ValueCols = [["reg"]];
+}
+
//===----------------------------------------------------------------------===//
// Register File, Calling Conv, Instruction Descriptions
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 2e011bd12741..50f2eca63693 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -18,7 +18,7 @@
#include "HexagonMachineFunctionInfo.h"
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
-#include "InstPrinter/HexagonInstPrinter.h"
+#include "MCTargetDesc/HexagonInstPrinter.h"
#include "MCTargetDesc/HexagonMCInst.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
@@ -174,7 +174,7 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
///
void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (MI->isBundle()) {
- std::vector<const MachineInstr*> BundleMIs;
+ std::vector<MachineInstr const *> BundleMIs;
const MachineBasicBlock *MBB = MI->getParent();
MachineBasicBlock::const_instr_iterator MII = MI;
@@ -183,33 +183,35 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
while (MII != MBB->end() && MII->isInsideBundle()) {
const MachineInstr *MInst = MII;
if (MInst->getOpcode() == TargetOpcode::DBG_VALUE ||
- MInst->getOpcode() == TargetOpcode::IMPLICIT_DEF) {
- IgnoreCount++;
- ++MII;
- continue;
+ MInst->getOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ IgnoreCount++;
+ ++MII;
+ continue;
}
- //BundleMIs.push_back(&*MII);
+ // BundleMIs.push_back(&*MII);
BundleMIs.push_back(MInst);
++MII;
}
unsigned Size = BundleMIs.size();
- assert((Size+IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!");
+ assert((Size + IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!");
for (unsigned Index = 0; Index < Size; Index++) {
HexagonMCInst MCI;
- MCI.setPacketStart(Index == 0);
- MCI.setPacketEnd(Index == (Size-1));
HexagonLowerToMC(BundleMIs[Index], MCI, *this);
+ HexagonMCInst::AppendImplicitOperands(MCI);
+ MCI.setPacketBegin(Index == 0);
+ MCI.setPacketEnd(Index == (Size - 1));
EmitToStreamer(OutStreamer, MCI);
}
}
else {
HexagonMCInst MCI;
+ HexagonLowerToMC(MI, MCI, *this);
+ HexagonMCInst::AppendImplicitOperands(MCI);
if (MI->getOpcode() == Hexagon::ENDLOOP0) {
- MCI.setPacketStart(true);
+ MCI.setPacketBegin(true);
MCI.setPacketEnd(true);
}
- HexagonLowerToMC(MI, MCI, *this);
EmitToStreamer(OutStreamer, MCI);
}
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h
index 7fe8c5790c6b..5f4c162b0070 100755
--- a/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HEXAGONASMPRINTER_H
-#define HEXAGONASMPRINTER_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONASMPRINTER_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONASMPRINTER_H
#include "Hexagon.h"
#include "HexagonTargetMachine.h"
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index de340e0074db..307adad095c8 100644
--- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -59,36 +59,36 @@ private:
char HexagonCFGOptimizer::ID = 0;
static bool IsConditionalBranch(int Opc) {
- return (Opc == Hexagon::JMP_t) || (Opc == Hexagon::JMP_f)
- || (Opc == Hexagon::JMP_tnew_t) || (Opc == Hexagon::JMP_fnew_t);
+ return (Opc == Hexagon::J2_jumpt) || (Opc == Hexagon::J2_jumpf)
+ || (Opc == Hexagon::J2_jumptnewpt) || (Opc == Hexagon::J2_jumpfnewpt);
}
static bool IsUnconditionalJump(int Opc) {
- return (Opc == Hexagon::JMP);
+ return (Opc == Hexagon::J2_jump);
}
void
HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI,
MachineBasicBlock* NewTarget) {
- const HexagonInstrInfo *QII = QTM.getInstrInfo();
+ const HexagonInstrInfo *QII = QTM.getSubtargetImpl()->getInstrInfo();
int NewOpcode = 0;
switch(MI->getOpcode()) {
- case Hexagon::JMP_t:
- NewOpcode = Hexagon::JMP_f;
+ case Hexagon::J2_jumpt:
+ NewOpcode = Hexagon::J2_jumpf;
break;
- case Hexagon::JMP_f:
- NewOpcode = Hexagon::JMP_t;
+ case Hexagon::J2_jumpf:
+ NewOpcode = Hexagon::J2_jumpt;
break;
- case Hexagon::JMP_tnew_t:
- NewOpcode = Hexagon::JMP_fnew_t;
+ case Hexagon::J2_jumptnewpt:
+ NewOpcode = Hexagon::J2_jumpfnewpt;
break;
- case Hexagon::JMP_fnew_t:
- NewOpcode = Hexagon::JMP_tnew_t;
+ case Hexagon::J2_jumpfnewpt:
+ NewOpcode = Hexagon::J2_jumptnewpt;
break;
default:
@@ -163,8 +163,8 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
// The target of the unconditional branch must be JumpAroundTarget.
// TODO: If not, we should not invert the unconditional branch.
MachineBasicBlock* CondBranchTarget = nullptr;
- if ((MI->getOpcode() == Hexagon::JMP_t) ||
- (MI->getOpcode() == Hexagon::JMP_f)) {
+ if ((MI->getOpcode() == Hexagon::J2_jumpt) ||
+ (MI->getOpcode() == Hexagon::J2_jumpf)) {
CondBranchTarget = MI->getOperand(1).getMBB();
}
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
index f5f958c101b1..8d78409aa01d 100644
--- a/lib/Target/Hexagon/HexagonCallingConvLower.cpp
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
@@ -21,6 +21,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg,
@@ -31,7 +32,8 @@ Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg,
// No stack is used.
StackOffset = 0;
- UsedRegs.resize((TM.getRegisterInfo()->getNumRegs()+31)/32);
+ UsedRegs.resize(
+ (TM.getSubtargetImpl()->getRegisterInfo()->getNumRegs() + 31) / 32);
}
// HandleByVal - Allocate a stack slot large enough to pass an argument by
@@ -55,7 +57,7 @@ void Hexagon_CCState::HandleByVal(unsigned ValNo, EVT ValVT,
/// MarkAllocated - Mark a register and all of its aliases as allocated.
void Hexagon_CCState::MarkAllocated(unsigned Reg) {
- const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+ const TargetRegisterInfo &TRI = *TM.getSubtargetImpl()->getRegisterInfo();
for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
UsedRegs[*AI/32] |= 1 << (*AI&31);
}
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.h b/lib/Target/Hexagon/HexagonCallingConvLower.h
index 70b8b643441a..738ed1a52a09 100644
--- a/lib/Target/Hexagon/HexagonCallingConvLower.h
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.h
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H
-#define LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONCALLINGCONVLOWER_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONCALLINGCONVLOWER_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index aeff680e6a9e..1883ad8f2e51 100644
--- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -114,7 +114,7 @@ static bool isCombinableInstType(MachineInstr *MI,
const HexagonInstrInfo *TII,
bool ShouldCombineAggressively) {
switch(MI->getOpcode()) {
- case Hexagon::TFR: {
+ case Hexagon::A2_tfr: {
// A COPY instruction can be combined if its arguments are IntRegs (32bit).
assert(MI->getOperand(0).isReg() && MI->getOperand(1).isReg());
@@ -124,7 +124,7 @@ static bool isCombinableInstType(MachineInstr *MI,
Hexagon::IntRegsRegClass.contains(SrcReg);
}
- case Hexagon::TFRI: {
+ case Hexagon::A2_tfrsi: {
// A transfer-immediate can be combined if its argument is a signed 8bit
// value.
assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
@@ -158,11 +158,11 @@ static bool isCombinableInstType(MachineInstr *MI,
}
static bool isGreaterThan8BitTFRI(MachineInstr *I) {
- return I->getOpcode() == Hexagon::TFRI &&
+ return I->getOpcode() == Hexagon::A2_tfrsi &&
!isInt<8>(I->getOperand(1).getImm());
}
static bool isGreaterThan6BitTFRI(MachineInstr *I) {
- return I->getOpcode() == Hexagon::TFRI &&
+ return I->getOpcode() == Hexagon::A2_tfrsi &&
!isUInt<6>(I->getOperand(1).getImm());
}
@@ -171,11 +171,11 @@ static bool isGreaterThan6BitTFRI(MachineInstr *I) {
static bool areCombinableOperations(const TargetRegisterInfo *TRI,
MachineInstr *HighRegInst,
MachineInstr *LowRegInst) {
- assert((HighRegInst->getOpcode() == Hexagon::TFR ||
- HighRegInst->getOpcode() == Hexagon::TFRI ||
+ assert((HighRegInst->getOpcode() == Hexagon::A2_tfr ||
+ HighRegInst->getOpcode() == Hexagon::A2_tfrsi ||
HighRegInst->getOpcode() == Hexagon::TFRI_V4) &&
- (LowRegInst->getOpcode() == Hexagon::TFR ||
- LowRegInst->getOpcode() == Hexagon::TFRI ||
+ (LowRegInst->getOpcode() == Hexagon::A2_tfr ||
+ LowRegInst->getOpcode() == Hexagon::A2_tfrsi ||
LowRegInst->getOpcode() == Hexagon::TFRI_V4) &&
"Assume individual instructions are of a combinable type");
@@ -417,8 +417,8 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) {
bool HasChanged = false;
// Get target info.
- TRI = MF.getTarget().getRegisterInfo();
- TII = static_cast<const HexagonInstrInfo *>(MF.getTarget().getInstrInfo());
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
// Combine aggressively (for code size)
ShouldCombineAggressively =
@@ -563,14 +563,14 @@ void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt,
// Handle globals.
if (HiOperand.isGlobal()) {
- BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg)
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
.addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(),
HiOperand.getTargetFlags())
.addImm(LoOperand.getImm());
return;
}
if (LoOperand.isGlobal()) {
- BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_iI_V4), DoubleDestReg)
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
.addImm(HiOperand.getImm())
.addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(),
LoOperand.getTargetFlags());
@@ -580,7 +580,7 @@ void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt,
// Handle constant extended immediates.
if (!isInt<8>(HiOperand.getImm())) {
assert(isInt<8>(LoOperand.getImm()));
- BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg)
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
.addImm(HiOperand.getImm())
.addImm(LoOperand.getImm());
return;
@@ -588,7 +588,7 @@ void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt,
if (!isUInt<6>(LoOperand.getImm())) {
assert(isInt<8>(HiOperand.getImm()));
- BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_iI_V4), DoubleDestReg)
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
.addImm(HiOperand.getImm())
.addImm(LoOperand.getImm());
return;
@@ -596,7 +596,7 @@ void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt,
// Insert new combine instruction.
// DoubleRegDest = combine #HiImm, #LoImm
- BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg)
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
.addImm(HiOperand.getImm())
.addImm(LoOperand.getImm());
}
@@ -613,7 +613,7 @@ void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt,
// Handle global.
if (HiOperand.isGlobal()) {
- BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ir_V4), DoubleDestReg)
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
.addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(),
HiOperand.getTargetFlags())
.addReg(LoReg, LoRegKillFlag);
@@ -621,7 +621,7 @@ void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt,
}
// Insert new combine instruction.
// DoubleRegDest = combine #HiImm, LoReg
- BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ir_V4), DoubleDestReg)
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
.addImm(HiOperand.getImm())
.addReg(LoReg, LoRegKillFlag);
}
@@ -638,7 +638,7 @@ void HexagonCopyToCombine::emitCombineRI(MachineBasicBlock::iterator &InsertPt,
// Handle global.
if (LoOperand.isGlobal()) {
- BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_rI_V4), DoubleDestReg)
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg)
.addReg(HiReg, HiRegKillFlag)
.addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(),
LoOperand.getTargetFlags());
@@ -647,7 +647,7 @@ void HexagonCopyToCombine::emitCombineRI(MachineBasicBlock::iterator &InsertPt,
// Insert new combine instruction.
// DoubleRegDest = combine HiReg, #LoImm
- BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_rI_V4), DoubleDestReg)
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg)
.addReg(HiReg, HiRegKillFlag)
.addImm(LoOperand.getImm());
}
@@ -666,7 +666,7 @@ void HexagonCopyToCombine::emitCombineRR(MachineBasicBlock::iterator &InsertPt,
// Insert new combine instruction.
// DoubleRegDest = combine HiReg, LoReg
- BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_rr), DoubleDestReg)
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combinew), DoubleDestReg)
.addReg(HiReg, HiRegKillFlag)
.addReg(LoReg, LoRegKillFlag);
}
diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index 3dafe80ffc48..a2a847c47a20 100644
--- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -72,7 +72,7 @@ char HexagonExpandPredSpillCode::ID = 0;
bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
- const HexagonInstrInfo *TII = QTM.getInstrInfo();
+ const HexagonInstrInfo *TII = QTM.getSubtargetImpl()->getInstrInfo();
// Loop over all of the basic blocks.
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
@@ -86,43 +86,45 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
if (Opc == Hexagon::STriw_pred) {
// STriw_pred [R30], ofst, SrcReg;
unsigned FP = MI->getOperand(0).getReg();
- assert(FP == QTM.getRegisterInfo()->getFrameRegister() &&
- "Not a Frame Pointer, Nor a Spill Slot");
+ assert(
+ FP ==
+ QTM.getSubtargetImpl()->getRegisterInfo()->getFrameRegister() &&
+ "Not a Frame Pointer, Nor a Spill Slot");
assert(MI->getOperand(1).isImm() && "Not an offset");
int Offset = MI->getOperand(1).getImm();
int SrcReg = MI->getOperand(2).getReg();
assert(Hexagon::PredRegsRegClass.contains(SrcReg) &&
"Not a predicate register");
- if (!TII->isValidOffset(Hexagon::STriw_indexed, Offset)) {
+ if (!TII->isValidOffset(Hexagon::S2_storeri_io, Offset)) {
if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) {
BuildMI(*MBB, MII, MI->getDebugLoc(),
TII->get(Hexagon::CONST32_Int_Real),
HEXAGON_RESERVED_REG_1).addImm(Offset);
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr),
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_add),
HEXAGON_RESERVED_REG_1)
.addReg(FP).addReg(HEXAGON_RESERVED_REG_1);
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr),
HEXAGON_RESERVED_REG_2).addReg(SrcReg);
BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::STriw_indexed))
+ TII->get(Hexagon::S2_storeri_io))
.addReg(HEXAGON_RESERVED_REG_1)
.addImm(0).addReg(HEXAGON_RESERVED_REG_2);
} else {
BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri),
HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr),
HEXAGON_RESERVED_REG_2).addReg(SrcReg);
BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::STriw_indexed))
+ TII->get(Hexagon::S2_storeri_io))
.addReg(HEXAGON_RESERVED_REG_1)
.addImm(0)
.addReg(HEXAGON_RESERVED_REG_2);
}
} else {
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr),
HEXAGON_RESERVED_REG_2).addReg(SrcReg);
BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::STriw_indexed)).
+ TII->get(Hexagon::S2_storeri_io)).
addReg(FP).addImm(Offset).addReg(HEXAGON_RESERVED_REG_2);
}
MII = MBB->erase(MI);
@@ -133,39 +135,41 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
assert(Hexagon::PredRegsRegClass.contains(DstReg) &&
"Not a predicate register");
unsigned FP = MI->getOperand(1).getReg();
- assert(FP == QTM.getRegisterInfo()->getFrameRegister() &&
- "Not a Frame Pointer, Nor a Spill Slot");
+ assert(
+ FP ==
+ QTM.getSubtargetImpl()->getRegisterInfo()->getFrameRegister() &&
+ "Not a Frame Pointer, Nor a Spill Slot");
assert(MI->getOperand(2).isImm() && "Not an offset");
int Offset = MI->getOperand(2).getImm();
- if (!TII->isValidOffset(Hexagon::LDriw, Offset)) {
+ if (!TII->isValidOffset(Hexagon::L2_loadri_io, Offset)) {
if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) {
BuildMI(*MBB, MII, MI->getDebugLoc(),
TII->get(Hexagon::CONST32_Int_Real),
HEXAGON_RESERVED_REG_1).addImm(Offset);
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr),
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_add),
HEXAGON_RESERVED_REG_1)
.addReg(FP)
.addReg(HEXAGON_RESERVED_REG_1);
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io),
HEXAGON_RESERVED_REG_2)
.addReg(HEXAGON_RESERVED_REG_1)
.addImm(0);
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp),
DstReg).addReg(HEXAGON_RESERVED_REG_2);
} else {
BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri),
HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io),
HEXAGON_RESERVED_REG_2)
.addReg(HEXAGON_RESERVED_REG_1)
.addImm(0);
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp),
DstReg).addReg(HEXAGON_RESERVED_REG_2);
}
} else {
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io),
HEXAGON_RESERVED_REG_2).addReg(FP).addImm(Offset);
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp),
DstReg).addReg(HEXAGON_RESERVED_REG_2);
}
MII = MBB->erase(MI);
diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
index d41939a57a45..e8d8f1497bdb 100644
--- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
+++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -81,8 +81,8 @@ FunctionPass *llvm::createHexagonFixupHwLoops() {
/// \brief Returns true if the instruction is a hardware loop instruction.
static bool isHardwareLoop(const MachineInstr *MI) {
- return MI->getOpcode() == Hexagon::LOOP0_r ||
- MI->getOpcode() == Hexagon::LOOP0_i;
+ return MI->getOpcode() == Hexagon::J2_loop0r ||
+ MI->getOpcode() == Hexagon::J2_loop0i;
}
@@ -160,7 +160,7 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
MachineBasicBlock::iterator &MII,
RegScavenger &RS) {
- const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
MachineBasicBlock *MBB = MII->getParent();
DebugLoc DL = MII->getDebugLoc();
unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0);
@@ -168,18 +168,18 @@ void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
// First, set the LC0 with the trip count.
if (MII->getOperand(1).isReg()) {
// Trip count is a register
- BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::LC0)
.addReg(MII->getOperand(1).getReg());
} else {
// Trip count is an immediate.
- BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch)
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrsi), Scratch)
.addImm(MII->getOperand(1).getImm());
- BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::LC0)
.addReg(Scratch);
}
// Then, set the SA0 with the loop start address.
BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch)
.addMBB(MII->getOperand(0).getMBB());
- BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0)
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::SA0)
.addReg(Scratch);
}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 21df12faefae..9d1a527eddb3 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -50,7 +50,10 @@ void HexagonFrameLowering::determineFrameLayout(MachineFunction &MF) const {
unsigned FrameSize = MFI->getStackSize();
// Get the alignments provided by the target.
- unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned TargetAlign = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
// Get the maximum call frame size of all the calls.
unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
@@ -77,8 +80,8 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineBasicBlock::iterator MBBI = MBB.begin();
- const HexagonRegisterInfo *QRI =
- static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo());
+ const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
determineFrameLayout(MF);
@@ -115,21 +118,21 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const {
// Check for overflow.
// Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
const int ALLOCFRAME_MAX = 16384;
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
if (NumBytes >= ALLOCFRAME_MAX) {
// Emit allocframe(#0).
- BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(0);
+ BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::S2_allocframe)).addImm(0);
// Subtract offset from frame pointer.
BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::CONST32_Int_Real),
HEXAGON_RESERVED_REG_1).addImm(NumBytes);
- BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::SUB_rr),
+ BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::A2_sub),
QRI->getStackRegister()).
addReg(QRI->getStackRegister()).
addReg(HEXAGON_RESERVED_REG_1);
} else {
- BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(NumBytes);
+ BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::S2_allocframe)).addImm(NumBytes);
}
}
}
@@ -154,12 +157,12 @@ void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator MBBI = std::prev(MBB.end());
MachineBasicBlock::iterator MBBI_end = MBB.end();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
// Handle EH_RETURN.
if (MBBI->getOpcode() == Hexagon::EH_RETURN_JMPR) {
assert(MBBI->getOperand(0).isReg() && "Offset should be in register!");
- BuildMI(MBB, MBBI, dl, TII.get(Hexagon::DEALLOCFRAME));
- BuildMI(MBB, MBBI, dl, TII.get(Hexagon::ADD_rr),
+ BuildMI(MBB, MBBI, dl, TII.get(Hexagon::L2_deallocframe));
+ BuildMI(MBB, MBBI, dl, TII.get(Hexagon::A2_add),
Hexagon::R29).addReg(Hexagon::R29).addReg(Hexagon::R28);
return;
}
@@ -180,7 +183,7 @@ void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
// Add dealloc_return.
MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI_end, dl, TII.get(Hexagon::DEALLOC_RET_V4));
+ BuildMI(MBB, MBBI_end, dl, TII.get(Hexagon::L4_return));
// Transfer the function live-out registers.
MIB->copyImplicitOps(*MBB.getParent(), &*MBBI);
// Remove the JUMPR node.
@@ -195,7 +198,7 @@ void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
I->getOpcode() == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4)
return;
- BuildMI(MBB, MBBI, dl, TII.get(Hexagon::DEALLOCFRAME));
+ BuildMI(MBB, MBBI, dl, TII.get(Hexagon::L2_deallocframe));
}
}
}
@@ -225,7 +228,7 @@ HexagonFrameLowering::spillCalleeSavedRegisters(
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
- const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
if (CSI.empty()) {
return false;
@@ -280,7 +283,7 @@ bool HexagonFrameLowering::restoreCalleeSavedRegisters(
const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
- const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
if (CSI.empty()) {
return false;
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index 2d4b0b9d7eb3..2d6b45793809 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HEXAGON_FRAMEINFO_H
-#define HEXAGON_FRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONFRAMELOWERING_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONFRAMELOWERING_H
#include "Hexagon.h"
#include "llvm/Target/TargetFrameLowering.h"
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 7f76421ac3e0..637b0a0d0bff 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -220,7 +220,7 @@ namespace {
int HexagonHardwareLoops::Counter = 0;
#endif
- /// \brief Abstraction for a trip count of a loop. A smaller vesrsion
+ /// \brief Abstraction for a trip count of a loop. A smaller version
/// of the MachineOperand class without the concerns of changing the
/// operand representation.
class CountValue {
@@ -266,7 +266,8 @@ namespace {
}
void print(raw_ostream &OS, const TargetMachine *TM = nullptr) const {
- const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : nullptr;
+ const TargetRegisterInfo *TRI =
+ TM ? TM->getSubtargetImpl()->getRegisterInfo() : nullptr;
if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
if (isImm()) { OS << Contents.ImmVal; }
}
@@ -284,8 +285,8 @@ INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
/// \brief Returns true if the instruction is a hardware loop instruction.
static bool isHardwareLoop(const MachineInstr *MI) {
- return MI->getOpcode() == Hexagon::LOOP0_r ||
- MI->getOpcode() == Hexagon::LOOP0_i;
+ return MI->getOpcode() == Hexagon::J2_loop0r ||
+ MI->getOpcode() == Hexagon::J2_loop0i;
}
FunctionPass *llvm::createHexagonHardwareLoops() {
@@ -302,8 +303,10 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
MDT = &getAnalysis<MachineDominatorTree>();
TM = static_cast<const HexagonTargetMachine*>(&MF.getTarget());
- TII = static_cast<const HexagonInstrInfo*>(TM->getInstrInfo());
- TRI = static_cast<const HexagonRegisterInfo*>(TM->getRegisterInfo());
+ TII = static_cast<const HexagonInstrInfo *>(
+ TM->getSubtargetImpl()->getInstrInfo());
+ TRI = static_cast<const HexagonRegisterInfo *>(
+ TM->getSubtargetImpl()->getRegisterInfo());
for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
I != E; ++I) {
@@ -537,16 +540,16 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
return nullptr;
switch (CondOpc) {
- case Hexagon::CMPEQri:
- case Hexagon::CMPEQrr:
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpeq:
Cmp = !Negated ? Comparison::EQ : Comparison::NE;
break;
- case Hexagon::CMPGTUri:
- case Hexagon::CMPGTUrr:
+ case Hexagon::C2_cmpgtui:
+ case Hexagon::C2_cmpgtu:
Cmp = !Negated ? Comparison::GTu : Comparison::LEu;
break;
- case Hexagon::CMPGTri:
- case Hexagon::CMPGTrr:
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C2_cmpgt:
Cmp = !Negated ? Comparison::GTs : Comparison::LEs;
break;
// Very limited support for byte/halfword compares.
@@ -623,12 +626,12 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
// If so, use the immediate value rather than the register.
if (Start->isReg()) {
const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
- if (StartValInstr && StartValInstr->getOpcode() == Hexagon::TFRI)
+ if (StartValInstr && StartValInstr->getOpcode() == Hexagon::A2_tfrsi)
Start = &StartValInstr->getOperand(1);
}
if (End->isReg()) {
const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
- if (EndValInstr && EndValInstr->getOpcode() == Hexagon::TFRI)
+ if (EndValInstr && EndValInstr->getOpcode() == Hexagon::A2_tfrsi)
End = &EndValInstr->getOperand(1);
}
@@ -778,7 +781,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
DistR = End->getReg();
DistSR = End->getSubReg();
} else {
- const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::SUB_rr) :
+ const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
(RegToImm ? TII->get(Hexagon::SUB_ri) :
TII->get(Hexagon::ADD_ri));
unsigned SubR = MRI->createVirtualRegister(IntRC);
@@ -829,7 +832,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
// Generate NormR = LSR DistR, Shift.
unsigned LsrR = MRI->createVirtualRegister(IntRC);
- const MCInstrDesc &LsrD = TII->get(Hexagon::LSR_ri);
+ const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);
BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
.addReg(AdjR, 0, AdjSR)
.addImm(Shift);
@@ -1083,7 +1086,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
.addReg(TripCount->getReg(), 0, TripCount->getSubReg());
// Add the Loop instruction to the beginning of the loop.
- BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r))
+ BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r))
.addMBB(LoopStart)
.addReg(CountReg);
} else {
@@ -1092,14 +1095,14 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
// if the immediate fits in the instructions. Otherwise, we need to
// create a new virtual register.
int64_t CountImm = TripCount->getImm();
- if (!TII->isValidOffset(Hexagon::LOOP0_i, CountImm)) {
+ if (!TII->isValidOffset(Hexagon::J2_loop0i, CountImm)) {
unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
- BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::TFRI), CountReg)
+ BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
.addImm(CountImm);
- BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r))
+ BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r))
.addMBB(LoopStart).addReg(CountReg);
} else
- BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_i))
+ BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0i))
.addMBB(LoopStart).addImm(CountImm);
}
@@ -1119,8 +1122,8 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
// The loop ends with either:
// - a conditional branch followed by an unconditional branch, or
// - a conditional branch to the loop start.
- if (LastI->getOpcode() == Hexagon::JMP_t ||
- LastI->getOpcode() == Hexagon::JMP_f) {
+ if (LastI->getOpcode() == Hexagon::J2_jumpt ||
+ LastI->getOpcode() == Hexagon::J2_jumpf) {
// Delete one and change/add an uncond. branch to out of the loop.
MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
LastI = LastMBB->erase(LastI);
@@ -1191,8 +1194,8 @@ MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) {
MachineInstr *DI = MRI->getVRegDef(R);
unsigned DOpc = DI->getOpcode();
switch (DOpc) {
- case Hexagon::TFRI:
- case Hexagon::TFRI64:
+ case Hexagon::A2_tfrsi:
+ case Hexagon::A2_tfrpi:
case Hexagon::CONST32_Int_Real:
case Hexagon::CONST64_Int_Real:
return DI;
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index dabe650a5b70..ea3a1770ac31 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -320,38 +320,38 @@ static unsigned doesIntrinsicContainPredicate(unsigned ID)
default:
return 0;
case Intrinsic::hexagon_C2_tfrpr:
- return Hexagon::TFR_RsPd;
+ return Hexagon::C2_tfrpr;
case Intrinsic::hexagon_C2_and:
- return Hexagon::AND_pp;
+ return Hexagon::C2_and;
case Intrinsic::hexagon_C2_xor:
- return Hexagon::XOR_pp;
+ return Hexagon::C2_xor;
case Intrinsic::hexagon_C2_or:
- return Hexagon::OR_pp;
+ return Hexagon::C2_or;
case Intrinsic::hexagon_C2_not:
- return Hexagon::NOT_p;
+ return Hexagon::C2_not;
case Intrinsic::hexagon_C2_any8:
- return Hexagon::ANY_pp;
+ return Hexagon::C2_any8;
case Intrinsic::hexagon_C2_all8:
- return Hexagon::ALL_pp;
+ return Hexagon::C2_all8;
case Intrinsic::hexagon_C2_vitpack:
- return Hexagon::VITPACK_pp;
+ return Hexagon::C2_vitpack;
case Intrinsic::hexagon_C2_mask:
- return Hexagon::MASK_p;
+ return Hexagon::C2_mask;
case Intrinsic::hexagon_C2_mux:
- return Hexagon::MUX_rr;
+ return Hexagon::C2_mux;
// Mapping hexagon_C2_muxir to MUX_pri. This is pretty weird - but
// that's how it's mapped in q6protos.h.
case Intrinsic::hexagon_C2_muxir:
- return Hexagon::MUX_ri;
+ return Hexagon::C2_muxri;
// Mapping hexagon_C2_muxri to MUX_pir. This is pretty weird - but
// that's how it's mapped in q6protos.h.
case Intrinsic::hexagon_C2_muxri:
- return Hexagon::MUX_ir;
+ return Hexagon::C2_muxir;
case Intrinsic::hexagon_C2_muxii:
- return Hexagon::MUX_ii;
+ return Hexagon::C2_muxii;
case Intrinsic::hexagon_C2_vmux:
return Hexagon::VMUX_prr64;
case Intrinsic::hexagon_S2_valignrb:
@@ -404,10 +404,10 @@ SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl) {
dl, PointerTy,
TargAddr);
// Figure out base + offset opcode
- if (LoadedVT == MVT::i64) Opcode = Hexagon::LDrid_indexed;
- else if (LoadedVT == MVT::i32) Opcode = Hexagon::LDriw_indexed;
- else if (LoadedVT == MVT::i16) Opcode = Hexagon::LDrih_indexed;
- else if (LoadedVT == MVT::i8) Opcode = Hexagon::LDrib_indexed;
+ if (LoadedVT == MVT::i64) Opcode = Hexagon::L2_loadrd_io;
+ else if (LoadedVT == MVT::i32) Opcode = Hexagon::L2_loadri_io;
+ else if (LoadedVT == MVT::i16) Opcode = Hexagon::L2_loadrh_io;
+ else if (LoadedVT == MVT::i8) Opcode = Hexagon::L2_loadrb_io;
else llvm_unreachable("unknown memory type");
// Build indexed load.
@@ -446,14 +446,14 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD,
if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
N1.getNode()->getValueType(0) == MVT::i32) {
- const HexagonInstrInfo *TII =
- static_cast<const HexagonInstrInfo*>(TM.getInstrInfo());
+ const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
if (TII->isValidAutoIncImm(LoadedVT, Val)) {
SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32);
SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
MVT::Other, Base, TargetConst,
Chain);
- SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl, MVT::i64,
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
SDValue(Result_1, 0));
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = LD->getMemOperand();
@@ -474,7 +474,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD,
SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
MVT::Other, Base, TargetConst0,
Chain);
- SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl,
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl,
MVT::i64, SDValue(Result_1, 0));
SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl,
MVT::i32, Base, TargetConstVal,
@@ -513,17 +513,17 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
N1.getNode()->getValueType(0) == MVT::i32) {
- const HexagonInstrInfo *TII =
- static_cast<const HexagonInstrInfo*>(TM.getInstrInfo());
+ const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
if (TII->isValidAutoIncImm(LoadedVT, Val)) {
SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
MVT::i32, MVT::Other, Base,
TargetConstVal, Chain);
- SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32,
TargetConst0);
- SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+ SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
MVT::i64, MVT::Other,
SDValue(Result_2,0),
SDValue(Result_1,0));
@@ -548,9 +548,9 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
MVT::Other,
Base, TargetConst0, Chain);
- SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32,
TargetConst0);
- SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+ SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
MVT::i64, MVT::Other,
SDValue(Result_2,0),
SDValue(Result_1,0));
@@ -591,28 +591,28 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) {
bool zextval = (LD->getExtensionType() == ISD::ZEXTLOAD);
// Figure out the opcode.
- const HexagonInstrInfo *TII =
- static_cast<const HexagonInstrInfo*>(TM.getInstrInfo());
+ const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
if (LoadedVT == MVT::i64) {
if (TII->isValidAutoIncImm(LoadedVT, Val))
- Opcode = Hexagon::POST_LDrid;
+ Opcode = Hexagon::L2_loadrd_pi;
else
- Opcode = Hexagon::LDrid;
+ Opcode = Hexagon::L2_loadrd_io;
} else if (LoadedVT == MVT::i32) {
if (TII->isValidAutoIncImm(LoadedVT, Val))
- Opcode = Hexagon::POST_LDriw;
+ Opcode = Hexagon::L2_loadri_pi;
else
- Opcode = Hexagon::LDriw;
+ Opcode = Hexagon::L2_loadri_io;
} else if (LoadedVT == MVT::i16) {
if (TII->isValidAutoIncImm(LoadedVT, Val))
- Opcode = zextval ? Hexagon::POST_LDriuh : Hexagon::POST_LDrih;
+ Opcode = zextval ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi;
else
- Opcode = zextval ? Hexagon::LDriuh : Hexagon::LDrih;
+ Opcode = zextval ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io;
} else if (LoadedVT == MVT::i8) {
if (TII->isValidAutoIncImm(LoadedVT, Val))
- Opcode = zextval ? Hexagon::POST_LDriub : Hexagon::POST_LDrib;
+ Opcode = zextval ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi;
else
- Opcode = zextval ? Hexagon::LDriub : Hexagon::LDrib;
+ Opcode = zextval ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io;
} else
llvm_unreachable("unknown memory type");
@@ -701,18 +701,18 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
// Offset value must be within representable range
// and must have correct alignment properties.
- const HexagonInstrInfo *TII =
- static_cast<const HexagonInstrInfo*>(TM.getInstrInfo());
+ const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
if (TII->isValidAutoIncImm(StoredVT, Val)) {
SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value,
Chain};
unsigned Opcode = 0;
// Figure out the post inc version of opcode.
- if (StoredVT == MVT::i64) Opcode = Hexagon::POST_STdri;
- else if (StoredVT == MVT::i32) Opcode = Hexagon::POST_STwri;
- else if (StoredVT == MVT::i16) Opcode = Hexagon::POST_SThri;
- else if (StoredVT == MVT::i8) Opcode = Hexagon::POST_STbri;
+ if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_pi;
+ else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_pi;
+ else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_pi;
+ else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_pi;
else llvm_unreachable("unknown memory type");
// Build post increment store.
@@ -735,10 +735,10 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
unsigned Opcode = 0;
// Figure out the opcode.
- if (StoredVT == MVT::i64) Opcode = Hexagon::STrid;
- else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw_indexed;
- else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih;
- else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib;
+ if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_io;
+ else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io;
+ else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io;
+ else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io;
else llvm_unreachable("unknown memory type");
// Build regular store.
@@ -788,10 +788,10 @@ SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST,
TargAddr);
// Figure out base + offset opcode
- if (StoredVT == MVT::i64) Opcode = Hexagon::STrid_indexed;
- else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw_indexed;
- else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih_indexed;
- else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib_indexed;
+ if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_io;
+ else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io;
+ else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io;
+ else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io;
else llvm_unreachable("unknown memory type");
SDValue Ops[] = {SDValue(NewBase,0),
@@ -865,7 +865,7 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
- OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
MVT::Other,
LD->getBasePtr(), TargetConst0,
Chain), 0);
@@ -891,7 +891,7 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
- OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
MVT::Other,
LD->getBasePtr(), TargetConst0,
Chain), 0);
@@ -900,7 +900,7 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
}
// Generate a mpy instruction.
- SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY64, dl, MVT::i64,
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_dpmpyss_s0, dl, MVT::i64,
OP0, OP1);
ReplaceUses(N, Result);
return Result;
@@ -934,9 +934,9 @@ SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) {
if (N000 == N2 &&
N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
- SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl,
+ SDNode *SextNode = CurDAG->getMachineNode(Hexagon::A2_sxth, dl,
MVT::i32, N000);
- SDNode *Result = CurDAG->getMachineNode(Hexagon::MAXw_rr, dl,
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::A2_max, dl,
MVT::i32,
SDValue(SextNode, 0),
N1);
@@ -958,9 +958,9 @@ SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) {
if (N000 == N2 &&
N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
- SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl,
+ SDNode *SextNode = CurDAG->getMachineNode(Hexagon::A2_sxth, dl,
MVT::i32, N000);
- SDNode *Result = CurDAG->getMachineNode(Hexagon::MINw_rr, dl,
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::A2_min, dl,
MVT::i32,
SDValue(SextNode, 0),
N1);
@@ -1045,7 +1045,7 @@ SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
- OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
MVT::Other,
LD->getBasePtr(),
TargetConst0, Chain), 0);
@@ -1070,7 +1070,7 @@ SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
- OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
MVT::Other,
LD->getBasePtr(),
TargetConst0, Chain), 0);
@@ -1079,7 +1079,7 @@ SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) {
}
// Generate a mpy instruction.
- SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY, dl, MVT::i32,
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpy_up, dl, MVT::i32,
OP0, OP1);
ReplaceUses(N, Result);
return Result;
@@ -1112,7 +1112,7 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode()))
if (isInt<9>(CN->getSExtValue())) {
SDNode* Result =
- CurDAG->getMachineNode(Hexagon::MPYI_ri, dl,
+ CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
MVT::i32, Mul_0, Val);
ReplaceUses(N, Result);
return Result;
@@ -1140,7 +1140,7 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
dyn_cast<ConstantSDNode>(Val.getNode()))
if (isInt<9>(CN->getSExtValue())) {
SDNode* Result =
- CurDAG->getMachineNode(Hexagon::MPYI_ri, dl, MVT::i32,
+ CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl, MVT::i32,
Shl2_0, Val);
ReplaceUses(N, Result);
return Result;
@@ -1177,13 +1177,13 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
if (N->getValueType(0) == MVT::i64) {
// Convert the zero_extend to Rs = Pd followed by COMBINE_rr(0,Rs).
SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
- SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl,
+ SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl,
MVT::i32,
SDValue(IsIntrinsic, 0));
- SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl,
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl,
MVT::i32,
TargetConst0);
- SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+ SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
MVT::i64, MVT::Other,
SDValue(Result_2, 0),
SDValue(Result_1, 0));
@@ -1192,7 +1192,7 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
}
if (N->getValueType(0) == MVT::i32) {
// Convert the zero_extend to Rs = Pd
- SDNode* RsPd = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl,
+ SDNode* RsPd = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl,
MVT::i32,
SDValue(IsIntrinsic, 0));
ReplaceUses(N, RsPd);
@@ -1218,10 +1218,10 @@ SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
// as at least one of the operands.
if (IntrinsicWithPred) {
SmallVector<SDValue, 8> Ops;
- const HexagonInstrInfo *TII =
- static_cast<const HexagonInstrInfo*>(TM.getInstrInfo());
+ const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
const MCInstrDesc &MCID = TII->get(IntrinsicWithPred);
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
// Iterate over all the operands of the intrinsics.
// For PredRegs, do the transfer.
@@ -1236,7 +1236,7 @@ SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
Ops.push_back(SDValue(Arg, 0));
} else if (RC == &Hexagon::PredRegsRegClass) {
// Do the transfer.
- SDNode *PdRs = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1,
+ SDNode *PdRs = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::i1,
SDValue(Arg, 0));
Ops.push_back(SDValue(PdRs,0));
} else if (!RC && (dyn_cast<ConstantSDNode>(Arg) != nullptr)) {
@@ -1289,19 +1289,19 @@ SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) {
if (Val == -1) {
// Create the IntReg = 1 node.
SDNode* IntRegTFR =
- CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+ CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32,
CurDAG->getTargetConstant(0, MVT::i32));
// Pd = IntReg
- SDNode* Pd = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1,
+ SDNode* Pd = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::i1,
SDValue(IntRegTFR, 0));
// not(Pd)
- SDNode* NotPd = CurDAG->getMachineNode(Hexagon::NOT_p, dl, MVT::i1,
+ SDNode* NotPd = CurDAG->getMachineNode(Hexagon::C2_not, dl, MVT::i1,
SDValue(Pd, 0));
// xor(not(Pd))
- Result = CurDAG->getMachineNode(Hexagon::XOR_pp, dl, MVT::i1,
+ Result = CurDAG->getMachineNode(Hexagon::C2_xor, dl, MVT::i1,
SDValue(Pd, 0), SDValue(NotPd, 0));
// We have just built:
@@ -1334,7 +1334,7 @@ SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) {
// Build Rd = Rd' + asr(Rs, Rt). The machine constraints will ensure that
// Rd and Rd' are assigned to the same register
- SDNode* Result = CurDAG->getMachineNode(Hexagon::ASR_ADD_rr, dl, MVT::i32,
+ SDNode* Result = CurDAG->getMachineNode(Hexagon::S2_asr_r_r_acc, dl, MVT::i32,
N->getOperand(1),
Src1->getOperand(0),
Src1->getOperand(1));
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index a460ea4f3420..ef5d6b97fd6f 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -51,9 +51,9 @@ class HexagonCCState : public CCState {
public:
HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
- const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs,
- LLVMContext &C, int NumNamedVarArgParams)
- : CCState(CC, isVarArg, MF, TM, locs, C),
+ SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
+ int NumNamedVarArgParams)
+ : CCState(CC, isVarArg, MF, locs, C),
NumNamedVarArgParams(NumNamedVarArgParams) {}
int getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
@@ -322,8 +322,8 @@ HexagonTargetLowering::LowerReturn(SDValue Chain,
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
// Analyze return values of ISD::RET
CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
@@ -372,8 +372,8 @@ HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
@@ -427,9 +427,8 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- HexagonCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext(),
- NumNamedVarArgParams);
+ HexagonCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext(), NumNamedVarArgParams);
if (NumNamedVarArgParams > 0)
CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg);
@@ -464,7 +463,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<SDValue, 8> MemOpChains;
const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
- DAG.getTarget().getRegisterInfo());
+ DAG.getSubtarget().getRegisterInfo());
SDValue StackPtr =
DAG.getCopyFromReg(Chain, dl, QRI->getStackRegister(), getPointerTy());
@@ -723,7 +722,7 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
// Check it to be lr
const HexagonRegisterInfo *QRI =
static_cast<const HexagonRegisterInfo *>(
- DAG.getTarget().getRegisterInfo());
+ DAG.getSubtarget().getRegisterInfo());
if (Reg == QRI->getRARegister()) {
FuncInfo->setHasClobberLR(true);
break;
@@ -817,7 +816,7 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
// The Sub result contains the new stack start address, so it
// must be placed in the stack pointer register.
const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
- DAG.getTarget().getRegisterInfo());
+ DAG.getSubtarget().getRegisterInfo());
SDValue CopyChain = DAG.getCopyToReg(Chain, dl, QRI->getStackRegister(), Sub);
SDValue Ops[2] = { ArgAdjust, CopyChain };
@@ -843,8 +842,8 @@ const {
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
@@ -964,7 +963,7 @@ HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
- const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setReturnAddressIsTaken(true);
@@ -990,8 +989,8 @@ HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
- const HexagonRegisterInfo *TRI =
- static_cast<const HexagonRegisterInfo *>(DAG.getTarget().getRegisterInfo());
+ const HexagonRegisterInfo *TRI = static_cast<const HexagonRegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setFrameAddressIsTaken(true);
@@ -1044,7 +1043,7 @@ HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
//===----------------------------------------------------------------------===//
HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &targetmachine)
- : TargetLowering(targetmachine, new HexagonTargetObjectFile()),
+ : TargetLowering(targetmachine),
TM(targetmachine) {
const HexagonSubtarget &Subtarget = TM.getSubtarget<HexagonSubtarget>();
@@ -1302,9 +1301,11 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &targetmachine)
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
// Turn FP extload into load/fextend.
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ for (MVT VT : MVT::fp_valuetypes())
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
// Hexagon has a i1 sign extending load.
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand);
+ for (MVT VT : MVT::integer_valuetypes())
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
// Turn FP truncstore into trunc + store.
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -1453,8 +1454,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &targetmachine)
setMinFunctionAlignment(2);
// Needed for DYNAMIC_STACKALLOC expansion.
- const HexagonRegisterInfo *QRI =
- static_cast<const HexagonRegisterInfo *>(TM.getRegisterInfo());
+ const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
setStackPointerRegisterToSaveRestore(QRI->getStackRegister());
setSchedulingPreference(Sched::VLIW);
}
@@ -1706,3 +1707,17 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
// information is not available.
return true;
}
+
+// Return true when the given node fits in a positive half word.
+bool llvm::isPositiveHalfWord(SDNode *N) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+ if (CN && CN->getSExtValue() > 0 && isInt<16>(CN->getSExtValue()))
+ return true;
+
+ switch (N->getOpcode()) {
+ default:
+ return false;
+ case ISD::SIGN_EXTEND_INREG:
+ return true;
+ }
+}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index ec16cc8f894b..d03b1b8d9f4a 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef Hexagon_ISELLOWERING_H
-#define Hexagon_ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONISELLOWERING_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONISELLOWERING_H
#include "Hexagon.h"
#include "llvm/CodeGen/CallingConvLower.h"
@@ -21,6 +21,10 @@
#include "llvm/Target/TargetLowering.h"
namespace llvm {
+
+// Return true when the given node fits in a positive half word.
+bool isPositiveHalfWord(SDNode *N);
+
namespace HexagonISD {
enum {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
@@ -48,7 +52,9 @@ namespace llvm {
CALL, // A call instruction.
RET_FLAG, // Return with a flag operand.
BR_JT, // Jump table.
- BARRIER, // Memory barrier.
+ BARRIER, // Memory barrier
+ POPCOUNT,
+ COMBINE,
WrapperJT,
WrapperCP,
WrapperCombineII,
@@ -63,7 +69,8 @@ namespace llvm {
WrapperShuffOB,
WrapperShuffOH,
TC_RETURN,
- EH_RETURN
+ EH_RETURN,
+ DCFETCH
};
}
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index 105734349321..8373652c8f64 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -92,12 +92,18 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
let AsmString = asmstr;
let Pattern = pattern;
let Constraints = cstr;
- let Itinerary = itin;
- let Size = 4;
-
- // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
-
- // Instruction type according to the ISA.
+ let Itinerary = itin;
+ let Size = 4;
+
+ // SoftFail is a field the disassembler can use to provide a way for
+ // instructions to not match without killing the whole decode process. It is
+ // mainly used for ARM, but Tablegen expects this field to exist or it fails
+ // to build the decode table.
+ field bits<32> SoftFail = 0;
+
+ // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+
+ // Instruction type according to the ISA.
IType Type = type;
let TSFlags{4-0} = Type.Value;
@@ -180,12 +186,13 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
string InputType = ""; // Input is "imm" or "reg" type.
string isMEMri = "false"; // Set to "true" for load/store with MEMri operand.
string isFloat = "false"; // Set to "true" for the floating-point load/store.
- string isBrTaken = ""; // Set to "true"/"false" for jump instructions
+ string isBrTaken = !if(isTaken, "true", "false"); // Set to "true"/"false" for jump instructions
let PredSense = !if(isPredicated, !if(isPredicatedFalse, "false", "true"),
"");
let PNewValue = !if(isPredicatedNew, "new", "");
let NValueST = !if(isNVStore, "true", "false");
+ let isCodeGenOnly = 1;
// *** Must match MCTargetDesc/HexagonBaseInfo.h ***
}
@@ -196,6 +203,7 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
// LD Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
+let mayLoad = 1 in
class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>;
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
index d92f97b0dd2d..5fec80bb570a 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -19,6 +19,7 @@
def TypeMEMOP : IType<9>;
def TypeNV : IType<10>;
+def TypeCOMPOUND : IType<12>;
def TypePREFIX : IType<30>;
//----------------------------------------------------------------------------//
@@ -65,3 +66,7 @@ let isCodeGenOnly = 1 in
class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []>
: InstHexagon<outs, ins, asmstr, pattern, "", EXTENDER_tc_1_SLOT0123,
TypePREFIX>;
+
+class CJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 1c95e06c8923..5d962590a705 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -78,11 +78,11 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
switch (MI->getOpcode()) {
default: break;
- case Hexagon::LDriw:
- case Hexagon::LDrid:
- case Hexagon::LDrih:
- case Hexagon::LDrib:
- case Hexagon::LDriub:
+ case Hexagon::L2_loadri_io:
+ case Hexagon::L2_loadrd_io:
+ case Hexagon::L2_loadrh_io:
+ case Hexagon::L2_loadrb_io:
+ case Hexagon::L2_loadrub_io:
if (MI->getOperand(2).isFI() &&
MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
FrameIndex = MI->getOperand(2).getIndex();
@@ -103,10 +103,10 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
switch (MI->getOpcode()) {
default: break;
- case Hexagon::STriw:
- case Hexagon::STrid:
- case Hexagon::STrih:
- case Hexagon::STrib:
+ case Hexagon::S2_storeri_io:
+ case Hexagon::S2_storerd_io:
+ case Hexagon::S2_storerh_io:
+ case Hexagon::S2_storerb_io:
if (MI->getOperand(2).isFI() &&
MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
FrameIndex = MI->getOperand(0).getIndex();
@@ -124,8 +124,8 @@ HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const{
- int BOpc = Hexagon::JMP;
- int BccOpc = Hexagon::JMP_t;
+ int BOpc = Hexagon::J2_jump;
+ int BccOpc = Hexagon::J2_jumpt;
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
@@ -134,7 +134,7 @@ HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
// If we want to reverse the branch an odd number of times, we want
// JMP_f.
if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
- BccOpc = Hexagon::JMP_f;
+ BccOpc = Hexagon::J2_jumpf;
regPos = 1;
}
@@ -213,7 +213,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
}
// Delete the JMP if it's equivalent to a fall-through.
- if (AllowModify && I->getOpcode() == Hexagon::JMP &&
+ if (AllowModify && I->getOpcode() == Hexagon::J2_jump &&
MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
DEBUG(dbgs()<< "\nErasing the jump to successor block\n";);
I->eraseFromParent();
@@ -249,7 +249,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// If there is only one terminator instruction, process it.
if (LastInst && !SecondLastInst) {
- if (LastOpcode == Hexagon::JMP) {
+ if (LastOpcode == Hexagon::J2_jump) {
TBB = LastInst->getOperand(0).getMBB();
return false;
}
@@ -274,7 +274,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode);
bool SecLastOpcodeHasNot = PredOpcodeHasNot(SecLastOpcode);
- if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::JMP)) {
+ if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::J2_jump)) {
TBB = SecondLastInst->getOperand(1).getMBB();
if (SecLastOpcodeHasNot)
Cond.push_back(MachineOperand::CreateImm(0));
@@ -285,7 +285,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// If the block ends with two Hexagon:JMPs, handle it. The second one is not
// executed, so remove it.
- if (SecLastOpcode == Hexagon::JMP && LastOpcode == Hexagon::JMP) {
+ if (SecLastOpcode == Hexagon::J2_jump && LastOpcode == Hexagon::J2_jump) {
TBB = SecondLastInst->getOperand(0).getMBB();
I = LastInst;
if (AllowModify)
@@ -295,7 +295,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// If the block ends with an ENDLOOP, and JMP, handle it.
if (SecLastOpcode == Hexagon::ENDLOOP0 &&
- LastOpcode == Hexagon::JMP) {
+ LastOpcode == Hexagon::J2_jump) {
TBB = SecondLastInst->getOperand(0).getMBB();
Cond.push_back(SecondLastInst->getOperand(0));
FBB = LastInst->getOperand(0).getMBB();
@@ -308,9 +308,9 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
- int BOpc = Hexagon::JMP;
- int BccOpc = Hexagon::JMP_t;
- int BccOpcNot = Hexagon::JMP_f;
+ int BOpc = Hexagon::J2_jump;
+ int BccOpc = Hexagon::J2_jumpt;
+ int BccOpcNot = Hexagon::J2_jumpf;
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return 0;
@@ -346,15 +346,15 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
// Set mask and the first source register.
switch (Opc) {
- case Hexagon::CMPEHexagon4rr:
- case Hexagon::CMPEQri:
- case Hexagon::CMPEQrr:
- case Hexagon::CMPGT64rr:
- case Hexagon::CMPGTU64rr:
- case Hexagon::CMPGTUri:
- case Hexagon::CMPGTUrr:
- case Hexagon::CMPGTri:
- case Hexagon::CMPGTrr:
+ case Hexagon::C2_cmpeqp:
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpgtp:
+ case Hexagon::C2_cmpgtup:
+ case Hexagon::C2_cmpgtui:
+ case Hexagon::C2_cmpgtu:
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C2_cmpgt:
SrcReg = MI->getOperand(1).getReg();
Mask = ~0;
break;
@@ -380,12 +380,12 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
// Set the value/second source register.
switch (Opc) {
- case Hexagon::CMPEHexagon4rr:
- case Hexagon::CMPEQrr:
- case Hexagon::CMPGT64rr:
- case Hexagon::CMPGTU64rr:
- case Hexagon::CMPGTUrr:
- case Hexagon::CMPGTrr:
+ case Hexagon::C2_cmpeqp:
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpgtp:
+ case Hexagon::C2_cmpgtup:
+ case Hexagon::C2_cmpgtu:
+ case Hexagon::C2_cmpgt:
case Hexagon::CMPbEQrr_sbsb_V4:
case Hexagon::CMPbEQrr_ubub_V4:
case Hexagon::CMPbGTUrr_V4:
@@ -397,9 +397,9 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
SrcReg2 = MI->getOperand(2).getReg();
return true;
- case Hexagon::CMPEQri:
- case Hexagon::CMPGTUri:
- case Hexagon::CMPGTri:
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpgtui:
+ case Hexagon::C2_cmpgti:
case Hexagon::CMPbEQri_V4:
case Hexagon::CMPbGTUri_V4:
case Hexagon::CMPhEQri_V4:
@@ -418,16 +418,16 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) {
- BuildMI(MBB, I, DL, get(Hexagon::TFR), DestReg).addReg(SrcReg);
+ BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), DestReg).addReg(SrcReg);
return;
}
if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) {
- BuildMI(MBB, I, DL, get(Hexagon::TFR64), DestReg).addReg(SrcReg);
+ BuildMI(MBB, I, DL, get(Hexagon::A2_tfrp), DestReg).addReg(SrcReg);
return;
}
if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) {
// Map Pd = Ps to Pd = or(Ps, Ps).
- BuildMI(MBB, I, DL, get(Hexagon::OR_pp),
+ BuildMI(MBB, I, DL, get(Hexagon::C2_or),
DestReg).addReg(SrcReg).addReg(SrcReg);
return;
}
@@ -436,31 +436,31 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// We can have an overlap between single and double reg: r1:0 = r0.
if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) {
// r1:0 = r0
- BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg,
+ BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg,
Hexagon::subreg_hireg))).addImm(0);
} else {
// r1:0 = r1 or no overlap.
- BuildMI(MBB, I, DL, get(Hexagon::TFR), (RI.getSubReg(DestReg,
+ BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), (RI.getSubReg(DestReg,
Hexagon::subreg_loreg))).addReg(SrcReg);
- BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg,
+ BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg,
Hexagon::subreg_hireg))).addImm(0);
}
return;
}
- if (Hexagon::CRRegsRegClass.contains(DestReg) &&
+ if (Hexagon::CtrRegsRegClass.contains(DestReg) &&
Hexagon::IntRegsRegClass.contains(SrcReg)) {
- BuildMI(MBB, I, DL, get(Hexagon::TFCR), DestReg).addReg(SrcReg);
+ BuildMI(MBB, I, DL, get(Hexagon::A2_tfrrcr), DestReg).addReg(SrcReg);
return;
}
if (Hexagon::PredRegsRegClass.contains(SrcReg) &&
Hexagon::IntRegsRegClass.contains(DestReg)) {
- BuildMI(MBB, I, DL, get(Hexagon::TFR_RsPd), DestReg).
+ BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg).
addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
Hexagon::PredRegsRegClass.contains(DestReg)) {
- BuildMI(MBB, I, DL, get(Hexagon::TFR_PdRs), DestReg).
+ BuildMI(MBB, I, DL, get(Hexagon::C2_tfrrp), DestReg).
addReg(SrcReg, getKillRegState(KillSrc));
return;
}
@@ -488,11 +488,11 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Align);
if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) {
- BuildMI(MBB, I, DL, get(Hexagon::STriw))
+ BuildMI(MBB, I, DL, get(Hexagon::S2_storeri_io))
.addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
} else if (Hexagon::DoubleRegsRegClass.hasSubClassEq(RC)) {
- BuildMI(MBB, I, DL, get(Hexagon::STrid))
+ BuildMI(MBB, I, DL, get(Hexagon::S2_storerd_io))
.addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
} else if (Hexagon::PredRegsRegClass.hasSubClassEq(RC)) {
@@ -533,10 +533,10 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MFI.getObjectSize(FI),
Align);
if (RC == &Hexagon::IntRegsRegClass) {
- BuildMI(MBB, I, DL, get(Hexagon::LDriw), DestReg)
+ BuildMI(MBB, I, DL, get(Hexagon::L2_loadri_io), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
} else if (RC == &Hexagon::DoubleRegsRegClass) {
- BuildMI(MBB, I, DL, get(Hexagon::LDrid), DestReg)
+ BuildMI(MBB, I, DL, get(Hexagon::L2_loadrd_io), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
} else if (RC == &Hexagon::PredRegsRegClass) {
BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg)
@@ -648,77 +648,67 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
const int Opc = MI->getOpcode();
switch(Opc) {
- case Hexagon::TFRI:
+ case Hexagon::A2_tfrsi:
return isInt<12>(MI->getOperand(1).getImm());
- case Hexagon::STrid:
- case Hexagon::STrid_indexed:
+ case Hexagon::S2_storerd_io:
return isShiftedUInt<6,3>(MI->getOperand(1).getImm());
- case Hexagon::STriw:
- case Hexagon::STriw_indexed:
- case Hexagon::STriw_nv_V4:
+ case Hexagon::S2_storeri_io:
+ case Hexagon::S2_storerinew_io:
return isShiftedUInt<6,2>(MI->getOperand(1).getImm());
- case Hexagon::STrih:
- case Hexagon::STrih_indexed:
- case Hexagon::STrih_nv_V4:
+ case Hexagon::S2_storerh_io:
+ case Hexagon::S2_storerhnew_io:
return isShiftedUInt<6,1>(MI->getOperand(1).getImm());
- case Hexagon::STrib:
- case Hexagon::STrib_indexed:
- case Hexagon::STrib_nv_V4:
+ case Hexagon::S2_storerb_io:
+ case Hexagon::S2_storerbnew_io:
return isUInt<6>(MI->getOperand(1).getImm());
- case Hexagon::LDrid:
- case Hexagon::LDrid_indexed:
+ case Hexagon::L2_loadrd_io:
return isShiftedUInt<6,3>(MI->getOperand(2).getImm());
- case Hexagon::LDriw:
- case Hexagon::LDriw_indexed:
+ case Hexagon::L2_loadri_io:
return isShiftedUInt<6,2>(MI->getOperand(2).getImm());
- case Hexagon::LDrih:
- case Hexagon::LDriuh:
- case Hexagon::LDrih_indexed:
- case Hexagon::LDriuh_indexed:
+ case Hexagon::L2_loadrh_io:
+ case Hexagon::L2_loadruh_io:
return isShiftedUInt<6,1>(MI->getOperand(2).getImm());
- case Hexagon::LDrib:
- case Hexagon::LDriub:
- case Hexagon::LDrib_indexed:
- case Hexagon::LDriub_indexed:
+ case Hexagon::L2_loadrb_io:
+ case Hexagon::L2_loadrub_io:
return isUInt<6>(MI->getOperand(2).getImm());
- case Hexagon::POST_LDrid:
+ case Hexagon::L2_loadrd_pi:
return isShiftedInt<4,3>(MI->getOperand(3).getImm());
- case Hexagon::POST_LDriw:
+ case Hexagon::L2_loadri_pi:
return isShiftedInt<4,2>(MI->getOperand(3).getImm());
- case Hexagon::POST_LDrih:
- case Hexagon::POST_LDriuh:
+ case Hexagon::L2_loadrh_pi:
+ case Hexagon::L2_loadruh_pi:
return isShiftedInt<4,1>(MI->getOperand(3).getImm());
- case Hexagon::POST_LDrib:
- case Hexagon::POST_LDriub:
+ case Hexagon::L2_loadrb_pi:
+ case Hexagon::L2_loadrub_pi:
return isInt<4>(MI->getOperand(3).getImm());
- case Hexagon::STrib_imm_V4:
- case Hexagon::STrih_imm_V4:
- case Hexagon::STriw_imm_V4:
+ case Hexagon::S4_storeirb_io:
+ case Hexagon::S4_storeirh_io:
+ case Hexagon::S4_storeiri_io:
return (isUInt<6>(MI->getOperand(1).getImm()) &&
isInt<6>(MI->getOperand(2).getImm()));
case Hexagon::ADD_ri:
return isInt<8>(MI->getOperand(2).getImm());
- case Hexagon::ASLH:
- case Hexagon::ASRH:
- case Hexagon::SXTB:
- case Hexagon::SXTH:
- case Hexagon::ZXTB:
- case Hexagon::ZXTH:
+ case Hexagon::A2_aslh:
+ case Hexagon::A2_asrh:
+ case Hexagon::A2_sxtb:
+ case Hexagon::A2_sxth:
+ case Hexagon::A2_zxtb:
+ case Hexagon::A2_zxth:
return Subtarget.hasV4TOps();
}
@@ -739,16 +729,16 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
switch(Opc) {
default: llvm_unreachable("Unexpected predicated instruction");
- case Hexagon::COMBINE_rr_cPt:
- return Hexagon::COMBINE_rr_cNotPt;
- case Hexagon::COMBINE_rr_cNotPt:
- return Hexagon::COMBINE_rr_cPt;
+ case Hexagon::C2_ccombinewt:
+ return Hexagon::C2_ccombinewf;
+ case Hexagon::C2_ccombinewf:
+ return Hexagon::C2_ccombinewt;
// Dealloc_return.
- case Hexagon::DEALLOC_RET_cPt_V4:
- return Hexagon::DEALLOC_RET_cNotPt_V4;
- case Hexagon::DEALLOC_RET_cNotPt_V4:
- return Hexagon::DEALLOC_RET_cPt_V4;
+ case Hexagon::L4_return_t:
+ return Hexagon::L4_return_f;
+ case Hexagon::L4_return_f:
+ return Hexagon::L4_return_t;
}
}
@@ -780,22 +770,22 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
case Hexagon::TFRI_f:
return !invertPredicate ? Hexagon::TFRI_cPt_f :
Hexagon::TFRI_cNotPt_f;
- case Hexagon::COMBINE_rr:
- return !invertPredicate ? Hexagon::COMBINE_rr_cPt :
- Hexagon::COMBINE_rr_cNotPt;
+ case Hexagon::A2_combinew:
+ return !invertPredicate ? Hexagon::C2_ccombinewt :
+ Hexagon::C2_ccombinewf;
// Word.
case Hexagon::STriw_f:
- return !invertPredicate ? Hexagon::STriw_cPt :
- Hexagon::STriw_cNotPt;
+ return !invertPredicate ? Hexagon::S2_pstorerit_io:
+ Hexagon::S2_pstorerif_io;
case Hexagon::STriw_indexed_f:
- return !invertPredicate ? Hexagon::STriw_indexed_cPt :
- Hexagon::STriw_indexed_cNotPt;
+ return !invertPredicate ? Hexagon::S2_pstorerit_io:
+ Hexagon::S2_pstorerif_io;
// DEALLOC_RETURN.
- case Hexagon::DEALLOC_RET_V4:
- return !invertPredicate ? Hexagon::DEALLOC_RET_cPt_V4 :
- Hexagon::DEALLOC_RET_cNotPt_V4;
+ case Hexagon::L4_return:
+ return !invertPredicate ? Hexagon::L4_return_t:
+ Hexagon::L4_return_f;
}
llvm_unreachable("Unexpected predicable instruction");
}
@@ -901,7 +891,7 @@ PredicateInstruction(MachineInstr *MI,
continue;
}
else {
- assert(false && "Unexpected operand type");
+ llvm_unreachable("Unexpected operand type");
}
}
}
@@ -1082,13 +1072,13 @@ isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs,
bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
switch (MI->getOpcode()) {
default: return false;
- case Hexagon::DEALLOC_RET_V4 :
- case Hexagon::DEALLOC_RET_cPt_V4 :
- case Hexagon::DEALLOC_RET_cNotPt_V4 :
- case Hexagon::DEALLOC_RET_cdnPnt_V4 :
- case Hexagon::DEALLOC_RET_cNotdnPnt_V4 :
- case Hexagon::DEALLOC_RET_cdnPt_V4 :
- case Hexagon::DEALLOC_RET_cNotdnPt_V4 :
+ case Hexagon::L4_return:
+ case Hexagon::L4_return_t:
+ case Hexagon::L4_return_f:
+ case Hexagon::L4_return_tnew_pnt:
+ case Hexagon::L4_return_fnew_pnt:
+ case Hexagon::L4_return_tnew_pt:
+ case Hexagon::L4_return_fnew_pt:
return true;
}
}
@@ -1107,33 +1097,29 @@ isValidOffset(const int Opcode, const int Offset) const {
switch(Opcode) {
- case Hexagon::LDriw:
- case Hexagon::LDriw_indexed:
+ case Hexagon::L2_loadri_io:
case Hexagon::LDriw_f:
- case Hexagon::STriw_indexed:
- case Hexagon::STriw:
+ case Hexagon::S2_storeri_io:
case Hexagon::STriw_f:
return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
(Offset <= Hexagon_MEMW_OFFSET_MAX);
- case Hexagon::LDrid:
- case Hexagon::LDrid_indexed:
+ case Hexagon::L2_loadrd_io:
case Hexagon::LDrid_f:
- case Hexagon::STrid:
- case Hexagon::STrid_indexed:
+ case Hexagon::S2_storerd_io:
case Hexagon::STrid_f:
return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
(Offset <= Hexagon_MEMD_OFFSET_MAX);
- case Hexagon::LDrih:
- case Hexagon::LDriuh:
- case Hexagon::STrih:
+ case Hexagon::L2_loadrh_io:
+ case Hexagon::L2_loadruh_io:
+ case Hexagon::S2_storerh_io:
return (Offset >= Hexagon_MEMH_OFFSET_MIN) &&
(Offset <= Hexagon_MEMH_OFFSET_MAX);
- case Hexagon::LDrib:
- case Hexagon::STrib:
- case Hexagon::LDriub:
+ case Hexagon::L2_loadrb_io:
+ case Hexagon::S2_storerb_io:
+ case Hexagon::L2_loadrub_io:
return (Offset >= Hexagon_MEMB_OFFSET_MIN) &&
(Offset <= Hexagon_MEMB_OFFSET_MAX);
@@ -1142,28 +1128,28 @@ isValidOffset(const int Opcode, const int Offset) const {
return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
(Offset <= Hexagon_ADDI_OFFSET_MAX);
- case Hexagon::MemOPw_ADDi_V4 :
- case Hexagon::MemOPw_SUBi_V4 :
- case Hexagon::MemOPw_ADDr_V4 :
- case Hexagon::MemOPw_SUBr_V4 :
- case Hexagon::MemOPw_ANDr_V4 :
- case Hexagon::MemOPw_ORr_V4 :
+ case Hexagon::L4_iadd_memopw_io:
+ case Hexagon::L4_isub_memopw_io:
+ case Hexagon::L4_add_memopw_io:
+ case Hexagon::L4_sub_memopw_io:
+ case Hexagon::L4_and_memopw_io:
+ case Hexagon::L4_or_memopw_io:
return (0 <= Offset && Offset <= 255);
- case Hexagon::MemOPh_ADDi_V4 :
- case Hexagon::MemOPh_SUBi_V4 :
- case Hexagon::MemOPh_ADDr_V4 :
- case Hexagon::MemOPh_SUBr_V4 :
- case Hexagon::MemOPh_ANDr_V4 :
- case Hexagon::MemOPh_ORr_V4 :
+ case Hexagon::L4_iadd_memoph_io:
+ case Hexagon::L4_isub_memoph_io:
+ case Hexagon::L4_add_memoph_io:
+ case Hexagon::L4_sub_memoph_io:
+ case Hexagon::L4_and_memoph_io:
+ case Hexagon::L4_or_memoph_io:
return (0 <= Offset && Offset <= 127);
- case Hexagon::MemOPb_ADDi_V4 :
- case Hexagon::MemOPb_SUBi_V4 :
- case Hexagon::MemOPb_ADDr_V4 :
- case Hexagon::MemOPb_SUBr_V4 :
- case Hexagon::MemOPb_ANDr_V4 :
- case Hexagon::MemOPb_ORr_V4 :
+ case Hexagon::L4_iadd_memopb_io:
+ case Hexagon::L4_isub_memopb_io:
+ case Hexagon::L4_add_memopb_io:
+ case Hexagon::L4_sub_memopb_io:
+ case Hexagon::L4_and_memopb_io:
+ case Hexagon::L4_or_memopb_io:
return (0 <= Offset && Offset <= 63);
// LDri_pred and STriw_pred are pseudo operations, so it has to take offset of
@@ -1172,7 +1158,7 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::LDriw_pred:
return true;
- case Hexagon::LOOP0_i:
+ case Hexagon::J2_loop0i:
return isUInt<10>(Offset);
// INLINEASM is very special.
@@ -1220,31 +1206,31 @@ isMemOp(const MachineInstr *MI) const {
switch (MI->getOpcode())
{
- default: return false;
- case Hexagon::MemOPw_ADDi_V4 :
- case Hexagon::MemOPw_SUBi_V4 :
- case Hexagon::MemOPw_ADDr_V4 :
- case Hexagon::MemOPw_SUBr_V4 :
- case Hexagon::MemOPw_ANDr_V4 :
- case Hexagon::MemOPw_ORr_V4 :
- case Hexagon::MemOPh_ADDi_V4 :
- case Hexagon::MemOPh_SUBi_V4 :
- case Hexagon::MemOPh_ADDr_V4 :
- case Hexagon::MemOPh_SUBr_V4 :
- case Hexagon::MemOPh_ANDr_V4 :
- case Hexagon::MemOPh_ORr_V4 :
- case Hexagon::MemOPb_ADDi_V4 :
- case Hexagon::MemOPb_SUBi_V4 :
- case Hexagon::MemOPb_ADDr_V4 :
- case Hexagon::MemOPb_SUBr_V4 :
- case Hexagon::MemOPb_ANDr_V4 :
- case Hexagon::MemOPb_ORr_V4 :
- case Hexagon::MemOPb_SETBITi_V4:
- case Hexagon::MemOPh_SETBITi_V4:
- case Hexagon::MemOPw_SETBITi_V4:
- case Hexagon::MemOPb_CLRBITi_V4:
- case Hexagon::MemOPh_CLRBITi_V4:
- case Hexagon::MemOPw_CLRBITi_V4:
+ default: return false;
+ case Hexagon::L4_iadd_memopw_io:
+ case Hexagon::L4_isub_memopw_io:
+ case Hexagon::L4_add_memopw_io:
+ case Hexagon::L4_sub_memopw_io:
+ case Hexagon::L4_and_memopw_io:
+ case Hexagon::L4_or_memopw_io:
+ case Hexagon::L4_iadd_memoph_io:
+ case Hexagon::L4_isub_memoph_io:
+ case Hexagon::L4_add_memoph_io:
+ case Hexagon::L4_sub_memoph_io:
+ case Hexagon::L4_and_memoph_io:
+ case Hexagon::L4_or_memoph_io:
+ case Hexagon::L4_iadd_memopb_io:
+ case Hexagon::L4_isub_memopb_io:
+ case Hexagon::L4_add_memopb_io:
+ case Hexagon::L4_sub_memopb_io:
+ case Hexagon::L4_and_memopb_io:
+ case Hexagon::L4_or_memopb_io:
+ case Hexagon::L4_ior_memopb_io:
+ case Hexagon::L4_ior_memoph_io:
+ case Hexagon::L4_ior_memopw_io:
+ case Hexagon::L4_iand_memopb_io:
+ case Hexagon::L4_iand_memoph_io:
+ case Hexagon::L4_iand_memopw_io:
return true;
}
return false;
@@ -1264,12 +1250,12 @@ isSpillPredRegOp(const MachineInstr *MI) const {
bool HexagonInstrInfo::isNewValueJumpCandidate(const MachineInstr *MI) const {
switch (MI->getOpcode()) {
default: return false;
- case Hexagon::CMPEQrr:
- case Hexagon::CMPEQri:
- case Hexagon::CMPGTrr:
- case Hexagon::CMPGTri:
- case Hexagon::CMPGTUrr:
- case Hexagon::CMPGTUri:
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C2_cmpgtu:
+ case Hexagon::C2_cmpgtui:
return true;
}
}
@@ -1278,51 +1264,71 @@ bool HexagonInstrInfo::
isConditionalTransfer (const MachineInstr *MI) const {
switch (MI->getOpcode()) {
default: return false;
- case Hexagon::TFR_cPt:
- case Hexagon::TFR_cNotPt:
- case Hexagon::TFRI_cPt:
- case Hexagon::TFRI_cNotPt:
- case Hexagon::TFR_cdnPt:
- case Hexagon::TFR_cdnNotPt:
- case Hexagon::TFRI_cdnPt:
- case Hexagon::TFRI_cdnNotPt:
+ case Hexagon::A2_tfrt:
+ case Hexagon::A2_tfrf:
+ case Hexagon::C2_cmoveit:
+ case Hexagon::C2_cmoveif:
+ case Hexagon::A2_tfrtnew:
+ case Hexagon::A2_tfrfnew:
+ case Hexagon::C2_cmovenewit:
+ case Hexagon::C2_cmovenewif:
return true;
}
}
bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
- const HexagonRegisterInfo& QRI = getRegisterInfo();
switch (MI->getOpcode())
{
default: return false;
+ case Hexagon::A2_paddf:
+ case Hexagon::A2_paddfnew:
+ case Hexagon::A2_paddt:
+ case Hexagon::A2_paddtnew:
+ case Hexagon::A2_pandf:
+ case Hexagon::A2_pandfnew:
+ case Hexagon::A2_pandt:
+ case Hexagon::A2_pandtnew:
+ case Hexagon::A4_paslhf:
+ case Hexagon::A4_paslhfnew:
+ case Hexagon::A4_paslht:
+ case Hexagon::A4_paslhtnew:
+ case Hexagon::A4_pasrhf:
+ case Hexagon::A4_pasrhfnew:
+ case Hexagon::A4_pasrht:
+ case Hexagon::A4_pasrhtnew:
+ case Hexagon::A2_porf:
+ case Hexagon::A2_porfnew:
+ case Hexagon::A2_port:
+ case Hexagon::A2_portnew:
+ case Hexagon::A2_psubf:
+ case Hexagon::A2_psubfnew:
+ case Hexagon::A2_psubt:
+ case Hexagon::A2_psubtnew:
+ case Hexagon::A2_pxorf:
+ case Hexagon::A2_pxorfnew:
+ case Hexagon::A2_pxort:
+ case Hexagon::A2_pxortnew:
+ case Hexagon::A4_psxthf:
+ case Hexagon::A4_psxthfnew:
+ case Hexagon::A4_psxtht:
+ case Hexagon::A4_psxthtnew:
+ case Hexagon::A4_psxtbf:
+ case Hexagon::A4_psxtbfnew:
+ case Hexagon::A4_psxtbt:
+ case Hexagon::A4_psxtbtnew:
+ case Hexagon::A4_pzxtbf:
+ case Hexagon::A4_pzxtbfnew:
+ case Hexagon::A4_pzxtbt:
+ case Hexagon::A4_pzxtbtnew:
+ case Hexagon::A4_pzxthf:
+ case Hexagon::A4_pzxthfnew:
+ case Hexagon::A4_pzxtht:
+ case Hexagon::A4_pzxthtnew:
case Hexagon::ADD_ri_cPt:
case Hexagon::ADD_ri_cNotPt:
- case Hexagon::ADD_rr_cPt:
- case Hexagon::ADD_rr_cNotPt:
- case Hexagon::XOR_rr_cPt:
- case Hexagon::XOR_rr_cNotPt:
- case Hexagon::AND_rr_cPt:
- case Hexagon::AND_rr_cNotPt:
- case Hexagon::OR_rr_cPt:
- case Hexagon::OR_rr_cNotPt:
- case Hexagon::SUB_rr_cPt:
- case Hexagon::SUB_rr_cNotPt:
- case Hexagon::COMBINE_rr_cPt:
- case Hexagon::COMBINE_rr_cNotPt:
+ case Hexagon::C2_ccombinewt:
+ case Hexagon::C2_ccombinewf:
return true;
- case Hexagon::ASLH_cPt_V4:
- case Hexagon::ASLH_cNotPt_V4:
- case Hexagon::ASRH_cPt_V4:
- case Hexagon::ASRH_cNotPt_V4:
- case Hexagon::SXTB_cPt_V4:
- case Hexagon::SXTB_cNotPt_V4:
- case Hexagon::SXTH_cPt_V4:
- case Hexagon::SXTH_cNotPt_V4:
- case Hexagon::ZXTB_cPt_V4:
- case Hexagon::ZXTB_cNotPt_V4:
- case Hexagon::ZXTH_cPt_V4:
- case Hexagon::ZXTH_cNotPt_V4:
- return QRI.Subtarget.hasV4TOps();
}
}
@@ -1332,56 +1338,44 @@ isConditionalLoad (const MachineInstr* MI) const {
switch (MI->getOpcode())
{
default: return false;
- case Hexagon::LDrid_cPt :
- case Hexagon::LDrid_cNotPt :
- case Hexagon::LDrid_indexed_cPt :
- case Hexagon::LDrid_indexed_cNotPt :
- case Hexagon::LDriw_cPt :
- case Hexagon::LDriw_cNotPt :
- case Hexagon::LDriw_indexed_cPt :
- case Hexagon::LDriw_indexed_cNotPt :
- case Hexagon::LDrih_cPt :
- case Hexagon::LDrih_cNotPt :
- case Hexagon::LDrih_indexed_cPt :
- case Hexagon::LDrih_indexed_cNotPt :
- case Hexagon::LDrib_cPt :
- case Hexagon::LDrib_cNotPt :
- case Hexagon::LDrib_indexed_cPt :
- case Hexagon::LDrib_indexed_cNotPt :
- case Hexagon::LDriuh_cPt :
- case Hexagon::LDriuh_cNotPt :
- case Hexagon::LDriuh_indexed_cPt :
- case Hexagon::LDriuh_indexed_cNotPt :
- case Hexagon::LDriub_cPt :
- case Hexagon::LDriub_cNotPt :
- case Hexagon::LDriub_indexed_cPt :
- case Hexagon::LDriub_indexed_cNotPt :
+ case Hexagon::L2_ploadrdt_io :
+ case Hexagon::L2_ploadrdf_io:
+ case Hexagon::L2_ploadrit_io:
+ case Hexagon::L2_ploadrif_io:
+ case Hexagon::L2_ploadrht_io:
+ case Hexagon::L2_ploadrhf_io:
+ case Hexagon::L2_ploadrbt_io:
+ case Hexagon::L2_ploadrbf_io:
+ case Hexagon::L2_ploadruht_io:
+ case Hexagon::L2_ploadruhf_io:
+ case Hexagon::L2_ploadrubt_io:
+ case Hexagon::L2_ploadrubf_io:
return true;
- case Hexagon::POST_LDrid_cPt :
- case Hexagon::POST_LDrid_cNotPt :
- case Hexagon::POST_LDriw_cPt :
- case Hexagon::POST_LDriw_cNotPt :
- case Hexagon::POST_LDrih_cPt :
- case Hexagon::POST_LDrih_cNotPt :
- case Hexagon::POST_LDrib_cPt :
- case Hexagon::POST_LDrib_cNotPt :
- case Hexagon::POST_LDriuh_cPt :
- case Hexagon::POST_LDriuh_cNotPt :
- case Hexagon::POST_LDriub_cPt :
- case Hexagon::POST_LDriub_cNotPt :
+ case Hexagon::L2_ploadrdt_pi:
+ case Hexagon::L2_ploadrdf_pi:
+ case Hexagon::L2_ploadrit_pi:
+ case Hexagon::L2_ploadrif_pi:
+ case Hexagon::L2_ploadrht_pi:
+ case Hexagon::L2_ploadrhf_pi:
+ case Hexagon::L2_ploadrbt_pi:
+ case Hexagon::L2_ploadrbf_pi:
+ case Hexagon::L2_ploadruht_pi:
+ case Hexagon::L2_ploadruhf_pi:
+ case Hexagon::L2_ploadrubt_pi:
+ case Hexagon::L2_ploadrubf_pi:
return QRI.Subtarget.hasV4TOps();
- case Hexagon::LDrid_indexed_shl_cPt_V4 :
- case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
- case Hexagon::LDrib_indexed_shl_cPt_V4 :
- case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
- case Hexagon::LDriub_indexed_shl_cPt_V4 :
- case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
- case Hexagon::LDrih_indexed_shl_cPt_V4 :
- case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
- case Hexagon::LDriuh_indexed_shl_cPt_V4 :
- case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
- case Hexagon::LDriw_indexed_shl_cPt_V4 :
- case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
+ case Hexagon::L4_ploadrdt_rr:
+ case Hexagon::L4_ploadrdf_rr:
+ case Hexagon::L4_ploadrbt_rr:
+ case Hexagon::L4_ploadrbf_rr:
+ case Hexagon::L4_ploadrubt_rr:
+ case Hexagon::L4_ploadrubf_rr:
+ case Hexagon::L4_ploadrht_rr:
+ case Hexagon::L4_ploadrhf_rr:
+ case Hexagon::L4_ploadruht_rr:
+ case Hexagon::L4_ploadruhf_rr:
+ case Hexagon::L4_ploadrit_rr:
+ case Hexagon::L4_ploadrif_rr:
return QRI.Subtarget.hasV4TOps();
}
}
@@ -1426,50 +1420,47 @@ isConditionalStore (const MachineInstr* MI) const {
switch (MI->getOpcode())
{
default: return false;
- case Hexagon::STrib_imm_cPt_V4 :
- case Hexagon::STrib_imm_cNotPt_V4 :
- case Hexagon::STrib_indexed_shl_cPt_V4 :
- case Hexagon::STrib_indexed_shl_cNotPt_V4 :
- case Hexagon::STrib_cPt :
- case Hexagon::STrib_cNotPt :
- case Hexagon::POST_STbri_cPt :
- case Hexagon::POST_STbri_cNotPt :
- case Hexagon::STrid_indexed_cPt :
- case Hexagon::STrid_indexed_cNotPt :
- case Hexagon::STrid_indexed_shl_cPt_V4 :
- case Hexagon::POST_STdri_cPt :
- case Hexagon::POST_STdri_cNotPt :
- case Hexagon::STrih_cPt :
- case Hexagon::STrih_cNotPt :
- case Hexagon::STrih_indexed_cPt :
- case Hexagon::STrih_indexed_cNotPt :
- case Hexagon::STrih_imm_cPt_V4 :
- case Hexagon::STrih_imm_cNotPt_V4 :
- case Hexagon::STrih_indexed_shl_cPt_V4 :
- case Hexagon::STrih_indexed_shl_cNotPt_V4 :
- case Hexagon::POST_SThri_cPt :
- case Hexagon::POST_SThri_cNotPt :
- case Hexagon::STriw_cPt :
- case Hexagon::STriw_cNotPt :
- case Hexagon::STriw_indexed_cPt :
- case Hexagon::STriw_indexed_cNotPt :
- case Hexagon::STriw_imm_cPt_V4 :
- case Hexagon::STriw_imm_cNotPt_V4 :
- case Hexagon::STriw_indexed_shl_cPt_V4 :
- case Hexagon::STriw_indexed_shl_cNotPt_V4 :
- case Hexagon::POST_STwri_cPt :
- case Hexagon::POST_STwri_cNotPt :
+ case Hexagon::S4_storeirbt_io:
+ case Hexagon::S4_storeirbf_io:
+ case Hexagon::S4_pstorerbt_rr:
+ case Hexagon::S4_pstorerbf_rr:
+ case Hexagon::S2_pstorerbt_io:
+ case Hexagon::S2_pstorerbf_io:
+ case Hexagon::S2_pstorerbt_pi:
+ case Hexagon::S2_pstorerbf_pi:
+ case Hexagon::S2_pstorerdt_io:
+ case Hexagon::S2_pstorerdf_io:
+ case Hexagon::S4_pstorerdt_rr:
+ case Hexagon::S4_pstorerdf_rr:
+ case Hexagon::S2_pstorerdt_pi:
+ case Hexagon::S2_pstorerdf_pi:
+ case Hexagon::S2_pstorerht_io:
+ case Hexagon::S2_pstorerhf_io:
+ case Hexagon::S4_storeirht_io:
+ case Hexagon::S4_storeirhf_io:
+ case Hexagon::S4_pstorerht_rr:
+ case Hexagon::S4_pstorerhf_rr:
+ case Hexagon::S2_pstorerht_pi:
+ case Hexagon::S2_pstorerhf_pi:
+ case Hexagon::S2_pstorerit_io:
+ case Hexagon::S2_pstorerif_io:
+ case Hexagon::S4_storeirit_io:
+ case Hexagon::S4_storeirif_io:
+ case Hexagon::S4_pstorerit_rr:
+ case Hexagon::S4_pstorerif_rr:
+ case Hexagon::S2_pstorerit_pi:
+ case Hexagon::S2_pstorerif_pi:
return QRI.Subtarget.hasV4TOps();
// V4 global address store before promoting to dot new.
- case Hexagon::STd_GP_cPt_V4 :
- case Hexagon::STd_GP_cNotPt_V4 :
- case Hexagon::STb_GP_cPt_V4 :
- case Hexagon::STb_GP_cNotPt_V4 :
- case Hexagon::STh_GP_cPt_V4 :
- case Hexagon::STh_GP_cNotPt_V4 :
- case Hexagon::STw_GP_cPt_V4 :
- case Hexagon::STw_GP_cNotPt_V4 :
+ case Hexagon::S4_pstorerdt_abs:
+ case Hexagon::S4_pstorerdf_abs:
+ case Hexagon::S4_pstorerbt_abs:
+ case Hexagon::S4_pstorerbf_abs:
+ case Hexagon::S4_pstorerht_abs:
+ case Hexagon::S4_pstorerhf_abs:
+ case Hexagon::S4_pstorerit_abs:
+ case Hexagon::S4_pstorerif_abs:
return QRI.Subtarget.hasV4TOps();
// Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
@@ -1565,10 +1556,10 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const {
return Hexagon::STrih_shl_nv_V4;
case Hexagon::STriw_f:
- return Hexagon::STriw_nv_V4;
+ return Hexagon::S2_storerinew_io;
case Hexagon::STriw_indexed_f:
- return Hexagon::STriw_indexed_nv_V4;
+ return Hexagon::S4_storerinew_rr;
case Hexagon::STriw_shl_V4:
return Hexagon::STriw_shl_nv_V4;
@@ -1589,28 +1580,28 @@ int HexagonInstrInfo::GetDotNewPredOp(MachineInstr *MI,
switch (MI->getOpcode()) {
default: llvm_unreachable("Unknown .new type");
// Condtional Jumps
- case Hexagon::JMP_t:
- case Hexagon::JMP_f:
+ case Hexagon::J2_jumpt:
+ case Hexagon::J2_jumpf:
return getDotNewPredJumpOp(MI, MBPI);
- case Hexagon::JMPR_t:
- return Hexagon::JMPR_tnew_tV3;
+ case Hexagon::J2_jumprt:
+ return Hexagon::J2_jumptnewpt;
- case Hexagon::JMPR_f:
- return Hexagon::JMPR_fnew_tV3;
+ case Hexagon::J2_jumprf:
+ return Hexagon::J2_jumprfnewpt;
- case Hexagon::JMPret_t:
- return Hexagon::JMPret_tnew_tV3;
+ case Hexagon::JMPrett:
+ return Hexagon::J2_jumprtnewpt;
- case Hexagon::JMPret_f:
- return Hexagon::JMPret_fnew_tV3;
+ case Hexagon::JMPretf:
+ return Hexagon::J2_jumprfnewpt;
// Conditional combine
- case Hexagon::COMBINE_rr_cPt :
- return Hexagon::COMBINE_rr_cdnPt;
- case Hexagon::COMBINE_rr_cNotPt :
- return Hexagon::COMBINE_rr_cdnNotPt;
+ case Hexagon::C2_ccombinewt:
+ return Hexagon::C2_ccombinewnewt;
+ case Hexagon::C2_ccombinewf:
+ return Hexagon::C2_ccombinewnewf;
}
}
@@ -1636,11 +1627,10 @@ void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const {
MO.addTargetFlag(HexagonII::HMOTF_ConstExtended);
}
-DFAPacketizer *HexagonInstrInfo::
-CreateTargetScheduleState(const TargetMachine *TM,
- const ScheduleDAG *DAG) const {
- const InstrItineraryData *II = TM->getInstrItineraryData();
- return TM->getSubtarget<HexagonGenSubtargetInfo>().createDFAPacketizer(II);
+DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState(
+ const TargetSubtargetInfo &STI) const {
+ const InstrItineraryData *II = STI.getInstrItineraryData();
+ return static_cast<const HexagonSubtarget &>(STI).createDFAPacketizer(II);
}
bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
@@ -1728,10 +1718,10 @@ HexagonInstrInfo::getDotNewPredJumpOp(MachineInstr *MI,
taken = true;
switch (MI->getOpcode()) {
- case Hexagon::JMP_t:
- return taken ? Hexagon::JMP_tnew_t : Hexagon::JMP_tnew_nt;
- case Hexagon::JMP_f:
- return taken ? Hexagon::JMP_fnew_t : Hexagon::JMP_fnew_nt;
+ case Hexagon::J2_jumpt:
+ return taken ? Hexagon::J2_jumptnewpt : Hexagon::J2_jumptnew;
+ case Hexagon::J2_jumpf:
+ return taken ? Hexagon::J2_jumpfnewpt : Hexagon::J2_jumpfnew;
default:
llvm_unreachable("Unexpected jump instruction.");
@@ -1765,7 +1755,7 @@ int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const {
& HexagonII::ExtentBitsMask;
if (isSigned) // if value is signed
- return -1 << (bits - 1);
+ return -1U << (bits - 1);
else
return 0;
}
@@ -1779,9 +1769,9 @@ int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const {
& HexagonII::ExtentBitsMask;
if (isSigned) // if value is signed
- return ~(-1 << (bits - 1));
+ return ~(-1U << (bits - 1));
else
- return ~(-1 << bits);
+ return ~(-1U << bits);
}
// Returns true if an instruction can be converted into a non-extended
@@ -1843,16 +1833,16 @@ short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const {
}
bool HexagonInstrInfo::PredOpcodeHasJMP_c(Opcode_t Opcode) const {
- return (Opcode == Hexagon::JMP_t) ||
- (Opcode == Hexagon::JMP_f) ||
- (Opcode == Hexagon::JMP_tnew_t) ||
- (Opcode == Hexagon::JMP_fnew_t) ||
- (Opcode == Hexagon::JMP_tnew_nt) ||
- (Opcode == Hexagon::JMP_fnew_nt);
+ return (Opcode == Hexagon::J2_jumpt) ||
+ (Opcode == Hexagon::J2_jumpf) ||
+ (Opcode == Hexagon::J2_jumptnewpt) ||
+ (Opcode == Hexagon::J2_jumpfnewpt) ||
+ (Opcode == Hexagon::J2_jumpt) ||
+ (Opcode == Hexagon::J2_jumpf);
}
bool HexagonInstrInfo::PredOpcodeHasNot(Opcode_t Opcode) const {
- return (Opcode == Hexagon::JMP_f) ||
- (Opcode == Hexagon::JMP_fnew_t) ||
- (Opcode == Hexagon::JMP_fnew_nt);
+ return (Opcode == Hexagon::J2_jumpf) ||
+ (Opcode == Hexagon::J2_jumpfnewpt) ||
+ (Opcode == Hexagon::J2_jumpfnew);
}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 6b032c95e74a..6acfbec24709 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HexagonINSTRUCTIONINFO_H
-#define HexagonINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H
#include "HexagonRegisterInfo.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
@@ -148,9 +148,8 @@ public:
bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
const BranchProbability &Probability) const override;
- DFAPacketizer*
- CreateTargetScheduleState(const TargetMachine *TM,
- const ScheduleDAG *DAG) const override;
+ DFAPacketizer *
+ CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override;
bool isSchedulingBoundary(const MachineInstr *MI,
const MachineBasicBlock *MBB,
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index 4dcf101ea3ad..7ce65f345cab 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -14,367 +14,606 @@
include "HexagonInstrFormats.td"
include "HexagonOperands.td"
+// Pattern fragment that combines the value type and the register class
+// into a single parameter.
+// The pat frags in the definitions below need to have a named register,
+// otherwise i32 will be assumed regardless of the register class. The
+// name of the register does not matter.
+def I1 : PatLeaf<(i1 PredRegs:$R)>;
+def I32 : PatLeaf<(i32 IntRegs:$R)>;
+def I64 : PatLeaf<(i64 DoubleRegs:$R)>;
+def F32 : PatLeaf<(f32 IntRegs:$R)>;
+def F64 : PatLeaf<(f64 DoubleRegs:$R)>;
+
+// Pattern fragments to extract the low and high subregisters from a
+// 64-bit value.
+def LoReg: OutPatFrag<(ops node:$Rs),
+ (EXTRACT_SUBREG (i64 $Rs), subreg_loreg)>;
+
//===----------------------------------------------------------------------===//
-// Multi-class for logical operators.
-multiclass ALU32_rr_ri<string OpcStr, SDNode OpNode> {
- def rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
- !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
- [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$b),
- (i32 IntRegs:$c)))]>;
- def ri : ALU32_ri<(outs IntRegs:$dst), (ins s10Imm:$b, IntRegs:$c),
- !strconcat("$dst = ", !strconcat(OpcStr, "(#$b, $c)")),
- [(set (i32 IntRegs:$dst), (OpNode s10Imm:$b,
- (i32 IntRegs:$c)))]>;
-}
+//===----------------------------------------------------------------------===//
+// Compare
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, isCompare = 1, InputType = "imm", isExtendable = 1,
+ opExtendable = 2 in
+class T_CMP <string mnemonic, bits<2> MajOp, bit isNot, Operand ImmOp>
+ : ALU32Inst <(outs PredRegs:$dst),
+ (ins IntRegs:$src1, ImmOp:$src2),
+ "$dst = "#!if(isNot, "!","")#mnemonic#"($src1, #$src2)",
+ [], "",ALU32_2op_tc_2early_SLOT0123 >, ImmRegRel {
+ bits<2> dst;
+ bits<5> src1;
+ bits<10> src2;
+ let CextOpcode = mnemonic;
+ let opExtentBits = !if(!eq(mnemonic, "cmp.gtu"), 9, 10);
+ let isExtentSigned = !if(!eq(mnemonic, "cmp.gtu"), 0, 1);
+
+ let IClass = 0b0111;
+
+ let Inst{27-24} = 0b0101;
+ let Inst{23-22} = MajOp;
+ let Inst{21} = !if(!eq(mnemonic, "cmp.gtu"), 0, src2{9});
+ let Inst{20-16} = src1;
+ let Inst{13-5} = src2{8-0};
+ let Inst{4} = isNot;
+ let Inst{3-2} = 0b00;
+ let Inst{1-0} = dst;
+ }
-// Multi-class for compare ops.
-let isCompare = 1 in {
-multiclass CMP64_rr<string OpcStr, PatFrag OpNode> {
- def rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c),
- !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
- [(set (i1 PredRegs:$dst),
- (OpNode (i64 DoubleRegs:$b), (i64 DoubleRegs:$c)))]>;
-}
+def C2_cmpeqi : T_CMP <"cmp.eq", 0b00, 0, s10Ext>;
+def C2_cmpgti : T_CMP <"cmp.gt", 0b01, 0, s10Ext>;
+def C2_cmpgtui : T_CMP <"cmp.gtu", 0b10, 0, u9Ext>;
-multiclass CMP32_rr_ri_s10<string OpcStr, string CextOp, PatFrag OpNode> {
- let CextOpcode = CextOp in {
- let InputType = "reg" in
- def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
- !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
- [(set (i1 PredRegs:$dst),
- (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
+class T_CMP_pat <InstHexagon MI, PatFrag OpNode, PatLeaf ImmPred>
+ : Pat<(i1 (OpNode (i32 IntRegs:$src1), ImmPred:$src2)),
+ (MI IntRegs:$src1, ImmPred:$src2)>;
- let isExtendable = 1, opExtendable = 2, isExtentSigned = 1,
- opExtentBits = 10, InputType = "imm" in
- def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s10Ext:$c),
- !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
- [(set (i1 PredRegs:$dst),
- (OpNode (i32 IntRegs:$b), s10ExtPred:$c))]>;
- }
-}
+def : T_CMP_pat <C2_cmpeqi, seteq, s10ImmPred>;
+def : T_CMP_pat <C2_cmpgti, setgt, s10ImmPred>;
+def : T_CMP_pat <C2_cmpgtui, setugt, u9ImmPred>;
-multiclass CMP32_rr_ri_u9<string OpcStr, string CextOp, PatFrag OpNode> {
- let CextOpcode = CextOp in {
- let InputType = "reg" in
- def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
- !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
- [(set (i1 PredRegs:$dst),
- (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
+//===----------------------------------------------------------------------===//
+// ALU32/ALU +
+//===----------------------------------------------------------------------===//
+def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
- let isExtendable = 1, opExtendable = 2, isExtentSigned = 0,
- opExtentBits = 9, InputType = "imm" in
- def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Ext:$c),
- !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
- [(set (i1 PredRegs:$dst),
- (OpNode (i32 IntRegs:$b), u9ExtPred:$c))]>;
- }
+def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>;
+
+let hasSideEffects = 0, hasNewValue = 1, InputType = "reg" in
+class T_ALU32_3op<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev,
+ bit IsComm>
+ : ALU32_rr<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = "#mnemonic#"($Rs, $Rt)",
+ [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel, PredRel {
+ let isCommutable = IsComm;
+ let BaseOpcode = mnemonic#_rr;
+ let CextOpcode = mnemonic;
+
+ bits<5> Rs;
+ bits<5> Rt;
+ bits<5> Rd;
+
+ let IClass = 0b1111;
+ let Inst{27} = 0b0;
+ let Inst{26-24} = MajOp;
+ let Inst{23-21} = MinOp;
+ let Inst{20-16} = !if(OpsRev,Rt,Rs);
+ let Inst{12-8} = !if(OpsRev,Rs,Rt);
+ let Inst{4-0} = Rd;
}
-multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> {
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in
- def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Ext:$c),
- !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
- [(set (i1 PredRegs:$dst), (OpNode (i32 IntRegs:$b),
- s8ExtPred:$c))]>;
+let hasSideEffects = 0, hasNewValue = 1 in
+class T_ALU32_3op_pred<string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit OpsRev, bit PredNot, bit PredNew>
+ : ALU32_rr<(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt),
+ "if ("#!if(PredNot,"!","")#"$Pu"#!if(PredNew,".new","")#") "#
+ "$Rd = "#mnemonic#"($Rs, $Rt)",
+ [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel, PredNewRel {
+ let isPredicated = 1;
+ let isPredicatedFalse = PredNot;
+ let isPredicatedNew = PredNew;
+ let BaseOpcode = mnemonic#_rr;
+ let CextOpcode = mnemonic;
+
+ bits<2> Pu;
+ bits<5> Rs;
+ bits<5> Rt;
+ bits<5> Rd;
+
+ let IClass = 0b1111;
+ let Inst{27} = 0b1;
+ let Inst{26-24} = MajOp;
+ let Inst{23-21} = MinOp;
+ let Inst{20-16} = !if(OpsRev,Rt,Rs);
+ let Inst{13} = PredNew;
+ let Inst{12-8} = !if(OpsRev,Rs,Rt);
+ let Inst{7} = PredNot;
+ let Inst{6-5} = Pu;
+ let Inst{4-0} = Rd;
}
+
+class T_ALU32_combineh<string Op1, string Op2, bits<3> MajOp, bits<3> MinOp,
+ bit OpsRev>
+ : T_ALU32_3op<"", MajOp, MinOp, OpsRev, 0> {
+ let AsmString = "$Rd = combine($Rs"#Op1#", $Rt"#Op2#")";
}
-//===----------------------------------------------------------------------===//
-// ALU32/ALU (Instructions with register-register form)
-//===----------------------------------------------------------------------===//
-def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
- [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+let isCodeGenOnly = 0 in {
+def A2_combine_hh : T_ALU32_combineh<".h", ".h", 0b011, 0b100, 1>;
+def A2_combine_hl : T_ALU32_combineh<".h", ".l", 0b011, 0b101, 1>;
+def A2_combine_lh : T_ALU32_combineh<".l", ".h", 0b011, 0b110, 1>;
+def A2_combine_ll : T_ALU32_combineh<".l", ".l", 0b011, 0b111, 1>;
+}
-def HexagonWrapperCombineII :
- SDNode<"HexagonISD::WrapperCombineII", SDTHexagonI64I32I32>;
+class T_ALU32_3op_sfx<string mnemonic, string suffix, bits<3> MajOp,
+ bits<3> MinOp, bit OpsRev, bit IsComm>
+ : T_ALU32_3op<"", MajOp, MinOp, OpsRev, IsComm> {
+ let AsmString = "$Rd = "#mnemonic#"($Rs, $Rt)"#suffix;
+}
-def HexagonWrapperCombineRR :
- SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>;
+let Defs = [USR_OVF], Itinerary = ALU32_3op_tc_2_SLOT0123,
+ isCodeGenOnly = 0 in {
+ def A2_addsat : T_ALU32_3op_sfx<"add", ":sat", 0b110, 0b010, 0, 1>;
+ def A2_subsat : T_ALU32_3op_sfx<"sub", ":sat", 0b110, 0b110, 1, 0>;
+}
-multiclass ALU32_Pbase<string mnemonic, RegisterClass RC, bit isNot,
- bit isPredNew> {
- let isPredicatedNew = isPredNew in
- def NAME : ALU32_rr<(outs RC:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs: $src3),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
- ") $dst = ")#mnemonic#"($src2, $src3)",
- []>;
+multiclass T_ALU32_3op_p<string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit OpsRev> {
+ def t : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 0, 0>;
+ def f : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 1, 0>;
+ def tnew : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 0, 1>;
+ def fnew : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 1, 1>;
}
-multiclass ALU32_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
- let isPredicatedFalse = PredNot in {
- defm _c#NAME : ALU32_Pbase<mnemonic, RC, PredNot, 0>;
- // Predicate new
- defm _cdn#NAME : ALU32_Pbase<mnemonic, RC, PredNot, 1>;
- }
+multiclass T_ALU32_3op_A2<string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit OpsRev, bit IsComm> {
+ let isPredicable = 1 in
+ def A2_#NAME : T_ALU32_3op <mnemonic, MajOp, MinOp, OpsRev, IsComm>;
+ defm A2_p#NAME : T_ALU32_3op_p<mnemonic, MajOp, MinOp, OpsRev>;
}
-let InputType = "reg" in
-multiclass ALU32_base<string mnemonic, string CextOp, SDNode OpNode> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_rr in {
- let isPredicable = 1 in
- def NAME : ALU32_rr<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = "#mnemonic#"($src1, $src2)",
- [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))]>;
-
- let neverHasSideEffects = 1, isPredicated = 1 in {
- defm Pt : ALU32_Pred<mnemonic, IntRegs, 0>;
- defm NotPt : ALU32_Pred<mnemonic, IntRegs, 1>;
- }
- }
+let isCodeGenOnly = 0 in {
+defm add : T_ALU32_3op_A2<"add", 0b011, 0b000, 0, 1>;
+defm and : T_ALU32_3op_A2<"and", 0b001, 0b000, 0, 1>;
+defm or : T_ALU32_3op_A2<"or", 0b001, 0b001, 0, 1>;
+defm sub : T_ALU32_3op_A2<"sub", 0b011, 0b001, 1, 0>;
+defm xor : T_ALU32_3op_A2<"xor", 0b001, 0b011, 0, 1>;
}
-let isCommutable = 1 in {
- defm ADD_rr : ALU32_base<"add", "ADD", add>, ImmRegRel, PredNewRel;
- defm AND_rr : ALU32_base<"and", "AND", and>, ImmRegRel, PredNewRel;
- defm XOR_rr : ALU32_base<"xor", "XOR", xor>, ImmRegRel, PredNewRel;
- defm OR_rr : ALU32_base<"or", "OR", or>, ImmRegRel, PredNewRel;
+// Pats for instruction selection.
+class BinOp32_pat<SDNode Op, InstHexagon MI, ValueType ResT>
+ : Pat<(ResT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
+ (ResT (MI IntRegs:$Rs, IntRegs:$Rt))>;
+
+def: BinOp32_pat<add, A2_add, i32>;
+def: BinOp32_pat<and, A2_and, i32>;
+def: BinOp32_pat<or, A2_or, i32>;
+def: BinOp32_pat<sub, A2_sub, i32>;
+def: BinOp32_pat<xor, A2_xor, i32>;
+
+// A few special cases producing register pairs:
+let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0,
+ isCodeGenOnly = 0 in {
+ def S2_packhl : T_ALU32_3op <"packhl", 0b101, 0b100, 0, 0>;
+
+ let isPredicable = 1 in
+ def A2_combinew : T_ALU32_3op <"combine", 0b101, 0b000, 0, 0>;
+
+ // Conditional combinew uses "newt/f" instead of "t/fnew".
+ def C2_ccombinewt : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 0, 0>;
+ def C2_ccombinewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 0>;
+ def C2_ccombinewnewt : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 0, 1>;
+ def C2_ccombinewnewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 1>;
}
-defm SUB_rr : ALU32_base<"sub", "SUB", sub>, ImmRegRel, PredNewRel;
+let hasSideEffects = 0, hasNewValue = 1, isCompare = 1, InputType = "reg" in
+class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm>
+ : ALU32_rr<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Pd = "#mnemonic#"($Rs, $Rt)",
+ [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel {
+ let CextOpcode = mnemonic;
+ let isCommutable = IsComm;
+ bits<5> Rs;
+ bits<5> Rt;
+ bits<2> Pd;
+
+ let IClass = 0b1111;
+ let Inst{27-24} = 0b0010;
+ let Inst{22-21} = MinOp;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{4} = IsNeg;
+ let Inst{3-2} = 0b00;
+ let Inst{1-0} = Pd;
+}
-// Combines the two integer registers SRC1 and SRC2 into a double register.
-let isPredicable = 1 in
-class T_Combine : ALU32_rr<(outs DoubleRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = combine($src1, $src2)",
- [(set (i64 DoubleRegs:$dst),
- (i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1),
- (i32 IntRegs:$src2))))]>;
-
-multiclass Combine_base {
- let BaseOpcode = "combine" in {
- def NAME : T_Combine;
- let neverHasSideEffects = 1, isPredicated = 1 in {
- defm Pt : ALU32_Pred<"combine", DoubleRegs, 0>;
- defm NotPt : ALU32_Pred<"combine", DoubleRegs, 1>;
- }
- }
+let Itinerary = ALU32_3op_tc_2early_SLOT0123, isCodeGenOnly = 0 in {
+ def C2_cmpeq : T_ALU32_3op_cmp< "cmp.eq", 0b00, 0, 1>;
+ def C2_cmpgt : T_ALU32_3op_cmp< "cmp.gt", 0b10, 0, 0>;
+ def C2_cmpgtu : T_ALU32_3op_cmp< "cmp.gtu", 0b11, 0, 0>;
}
-defm COMBINE_rr : Combine_base, PredNewRel;
+// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones
+// that reverse the order of the operands.
+class RevCmp<PatFrag F> : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment>;
+
+// Pats for compares. They use PatFrags as operands, not SDNodes,
+// since seteq/setgt/etc. are defined as ParFrags.
+class T_cmp32_rr_pat<InstHexagon MI, PatFrag Op, ValueType VT>
+ : Pat<(VT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
+ (VT (MI IntRegs:$Rs, IntRegs:$Rt))>;
+
+def: T_cmp32_rr_pat<C2_cmpeq, seteq, i1>;
+def: T_cmp32_rr_pat<C2_cmpgt, setgt, i1>;
+def: T_cmp32_rr_pat<C2_cmpgtu, setugt, i1>;
+
+def: T_cmp32_rr_pat<C2_cmpgt, RevCmp<setlt>, i1>;
+def: T_cmp32_rr_pat<C2_cmpgtu, RevCmp<setult>, i1>;
+
+let CextOpcode = "MUX", InputType = "reg", hasNewValue = 1,
+ isCodeGenOnly = 0 in
+def C2_mux: ALU32_rr<(outs IntRegs:$Rd),
+ (ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = mux($Pu, $Rs, $Rt)", [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel {
+ bits<5> Rd;
+ bits<2> Pu;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let CextOpcode = "mux";
+ let InputType = "reg";
+ let hasSideEffects = 0;
+ let IClass = 0b1111;
+
+ let Inst{27-24} = 0b0100;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{6-5} = Pu;
+ let Inst{4-0} = Rd;
+}
-// Combines the two immediates SRC1 and SRC2 into a double register.
-class COMBINE_imm<Operand imm1, Operand imm2, PatLeaf pat1, PatLeaf pat2> :
- ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2),
- "$dst = combine(#$src1, #$src2)",
- [(set (i64 DoubleRegs:$dst),
- (i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>;
+def: Pat<(i32 (select (i1 PredRegs:$Pu), (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
+ (C2_mux PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt)>;
+
+// Combines the two immediates into a double register.
+// Increase complexity to make it greater than any complexity of a combine
+// that involves a register.
+
+let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
+ isExtentSigned = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 1,
+ AddedComplexity = 75, isCodeGenOnly = 0 in
+def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8Ext:$s8, s8Imm:$S8),
+ "$Rdd = combine(#$s8, #$S8)",
+ [(set (i64 DoubleRegs:$Rdd),
+ (i64 (HexagonCOMBINE(i32 s8ExtPred:$s8), (i32 s8ImmPred:$S8))))]> {
+ bits<5> Rdd;
+ bits<8> s8;
+ bits<8> S8;
+
+ let IClass = 0b0111;
+ let Inst{27-23} = 0b11000;
+ let Inst{22-16} = S8{7-1};
+ let Inst{13} = S8{0};
+ let Inst{12-5} = s8;
+ let Inst{4-0} = Rdd;
+ }
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in
-def COMBINE_Ii : COMBINE_imm<s8Ext, s8Imm, s8ExtPred, s8ImmPred>;
+//===----------------------------------------------------------------------===//
+// Template class for predicated ADD of a reg and an Immediate value.
+//===----------------------------------------------------------------------===//
+let hasNewValue = 1 in
+class T_Addri_Pred <bit PredNot, bit PredNew>
+ : ALU32_ri <(outs IntRegs:$Rd),
+ (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8),
+ !if(PredNot, "if (!$Pu", "if ($Pu")#!if(PredNew,".new) $Rd = ",
+ ") $Rd = ")#"add($Rs, #$s8)"> {
+ bits<5> Rd;
+ bits<2> Pu;
+ bits<5> Rs;
+ bits<8> s8;
+
+ let isPredicatedNew = PredNew;
+ let IClass = 0b0111;
+
+ let Inst{27-24} = 0b0100;
+ let Inst{23} = PredNot;
+ let Inst{22-21} = Pu;
+ let Inst{20-16} = Rs;
+ let Inst{13} = PredNew;
+ let Inst{12-5} = s8;
+ let Inst{4-0} = Rd;
+ }
//===----------------------------------------------------------------------===//
-// ALU32/ALU (ADD with register-immediate form)
+// A2_addi: Add a signed immediate to a register.
//===----------------------------------------------------------------------===//
-multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> {
- let isPredicatedNew = isPredNew in
- def NAME : ALU32_ri<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, s8Ext: $src3),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
- ") $dst = ")#mnemonic#"($src2, #$src3)",
- []>;
-}
+let hasNewValue = 1 in
+class T_Addri <Operand immOp, list<dag> pattern = [] >
+ : ALU32_ri <(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, immOp:$s16),
+ "$Rd = add($Rs, #$s16)", pattern,
+ //[(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs), (s16ExtPred:$s16)))],
+ "", ALU32_ADDI_tc_1_SLOT0123> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<16> s16;
+
+ let IClass = 0b1011;
+
+ let Inst{27-21} = s16{15-9};
+ let Inst{20-16} = Rs;
+ let Inst{13-5} = s16{8-0};
+ let Inst{4-0} = Rd;
+ }
-multiclass ALU32ri_Pred<string mnemonic, bit PredNot> {
+//===----------------------------------------------------------------------===//
+// Multiclass for ADD of a register and an immediate value.
+//===----------------------------------------------------------------------===//
+multiclass Addri_Pred<string mnemonic, bit PredNot> {
let isPredicatedFalse = PredNot in {
- defm _c#NAME : ALU32ri_Pbase<mnemonic, PredNot, 0>;
+ def _c#NAME : T_Addri_Pred<PredNot, 0>;
// Predicate new
- defm _cdn#NAME : ALU32ri_Pbase<mnemonic, PredNot, 1>;
+ def _cdn#NAME : T_Addri_Pred<PredNot, 1>;
}
}
let isExtendable = 1, InputType = "imm" in
-multiclass ALU32ri_base<string mnemonic, string CextOp, SDNode OpNode> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_ri in {
+multiclass Addri_base<string mnemonic, SDNode OpNode> {
+ let CextOpcode = mnemonic, BaseOpcode = mnemonic#_ri in {
let opExtendable = 2, isExtentSigned = 1, opExtentBits = 16,
isPredicable = 1 in
- def NAME : ALU32_ri<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s16Ext:$src2),
- "$dst = "#mnemonic#"($src1, #$src2)",
- [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1),
- (s16ExtPred:$src2)))]>;
+ def NAME : T_Addri< s16Ext, // Rd=add(Rs,#s16)
+ [(set (i32 IntRegs:$Rd),
+ (add IntRegs:$Rs, s16ExtPred:$s16))]>;
let opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
- neverHasSideEffects = 1, isPredicated = 1 in {
- defm Pt : ALU32ri_Pred<mnemonic, 0>;
- defm NotPt : ALU32ri_Pred<mnemonic, 1>;
+ hasSideEffects = 0, isPredicated = 1 in {
+ defm Pt : Addri_Pred<mnemonic, 0>;
+ defm NotPt : Addri_Pred<mnemonic, 1>;
}
}
}
-defm ADD_ri : ALU32ri_base<"add", "ADD", add>, ImmRegRel, PredNewRel;
-
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10,
-CextOpcode = "OR", InputType = "imm" in
-def OR_ri : ALU32_ri<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s10Ext:$src2),
- "$dst = or($src1, #$src2)",
- [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1),
- s10ExtPred:$src2))]>, ImmRegRel;
+let isCodeGenOnly = 0 in
+defm ADD_ri : Addri_base<"add", add>, ImmRegRel, PredNewRel;
+//===----------------------------------------------------------------------===//
+// Template class used for the following ALU32 instructions.
+// Rd=and(Rs,#s10)
+// Rd=or(Rs,#s10)
+//===----------------------------------------------------------------------===//
let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10,
-InputType = "imm", CextOpcode = "AND" in
-def AND_ri : ALU32_ri<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s10Ext:$src2),
- "$dst = and($src1, #$src2)",
- [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1),
- s10ExtPred:$src2))]>, ImmRegRel;
+InputType = "imm", hasNewValue = 1 in
+class T_ALU32ri_logical <string mnemonic, SDNode OpNode, bits<2> MinOp>
+ : ALU32_ri <(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, s10Ext:$s10),
+ "$Rd = "#mnemonic#"($Rs, #$s10)" ,
+ [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s10ExtPred:$s10))]> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<10> s10;
+ let CextOpcode = mnemonic;
+
+ let IClass = 0b0111;
+
+ let Inst{27-24} = 0b0110;
+ let Inst{23-22} = MinOp;
+ let Inst{21} = s10{9};
+ let Inst{20-16} = Rs;
+ let Inst{13-5} = s10{8-0};
+ let Inst{4-0} = Rd;
+ }
-// Nop.
-let neverHasSideEffects = 1 in
-def NOP : ALU32_rr<(outs), (ins),
- "nop",
- []>;
+let isCodeGenOnly = 0 in {
+def OR_ri : T_ALU32ri_logical<"or", or, 0b10>, ImmRegRel;
+def AND_ri : T_ALU32ri_logical<"and", and, 0b00>, ImmRegRel;
+}
+// Subtract register from immediate
// Rd32=sub(#s10,Rs32)
let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 10,
-CextOpcode = "SUB", InputType = "imm" in
-def SUB_ri : ALU32_ri<(outs IntRegs:$dst),
- (ins s10Ext:$src1, IntRegs:$src2),
- "$dst = sub(#$src1, $src2)",
- [(set IntRegs:$dst, (sub s10ExtPred:$src1, IntRegs:$src2))]>,
- ImmRegRel;
+CextOpcode = "sub", InputType = "imm", hasNewValue = 1, isCodeGenOnly = 0 in
+def SUB_ri: ALU32_ri <(outs IntRegs:$Rd), (ins s10Ext:$s10, IntRegs:$Rs),
+ "$Rd = sub(#$s10, $Rs)" ,
+ [(set IntRegs:$Rd, (sub s10ExtPred:$s10, IntRegs:$Rs))] > ,
+ ImmRegRel {
+ bits<5> Rd;
+ bits<10> s10;
+ bits<5> Rs;
+
+ let IClass = 0b0111;
+
+ let Inst{27-22} = 0b011001;
+ let Inst{21} = s10{9};
+ let Inst{20-16} = Rs;
+ let Inst{13-5} = s10{8-0};
+ let Inst{4-0} = Rd;
+ }
+// Nop.
+let hasSideEffects = 0, isCodeGenOnly = 0 in
+def A2_nop: ALU32Inst <(outs), (ins), "nop" > {
+ let IClass = 0b0111;
+ let Inst{27-24} = 0b1111;
+}
// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs).
def : Pat<(not (i32 IntRegs:$src1)),
(SUB_ri -1, (i32 IntRegs:$src1))>;
-// Rd = neg(Rs) gets mapped to Rd=sub(#0, Rs).
-// Pattern definition for 'neg' was not necessary.
-
-multiclass TFR_Pred<bit PredNot> {
- let isPredicatedFalse = PredNot in {
- def _c#NAME : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- !if(PredNot, "if (!$src1", "if ($src1")#") $dst = $src2",
- []>;
- // Predicate new
- let isPredicatedNew = 1 in
- def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = $src2",
- []>;
+let hasSideEffects = 0, hasNewValue = 1 in
+class T_tfr16<bit isHi>
+ : ALU32Inst <(outs IntRegs:$Rx), (ins IntRegs:$src1, u16Imm:$u16),
+ "$Rx"#!if(isHi, ".h", ".l")#" = #$u16",
+ [], "$src1 = $Rx" > {
+ bits<5> Rx;
+ bits<16> u16;
+
+ let IClass = 0b0111;
+ let Inst{27-26} = 0b00;
+ let Inst{25-24} = !if(isHi, 0b10, 0b01);
+ let Inst{23-22} = u16{15-14};
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rx;
+ let Inst{13-0} = u16{13-0};
}
-}
-
-let InputType = "reg", neverHasSideEffects = 1 in
-multiclass TFR_base<string CextOp> {
- let CextOpcode = CextOp, BaseOpcode = CextOp in {
- let isPredicable = 1 in
- def NAME : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
- "$dst = $src1",
- []>;
- let isPredicated = 1 in {
- defm Pt : TFR_Pred<0>;
- defm NotPt : TFR_Pred<1>;
- }
- }
+let isCodeGenOnly = 0 in {
+def A2_tfril: T_tfr16<0>;
+def A2_tfrih: T_tfr16<1>;
}
-class T_TFR64_Pred<bit PredNot, bit isPredNew>
- : ALU32_rr<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, DoubleRegs:$src2),
- !if(PredNot, "if (!$src1", "if ($src1")#
- !if(isPredNew, ".new) ", ") ")#"$dst = $src2", []>
-{
+// Conditional transfer is an alias to conditional "Rd = add(Rs, #0)".
+let isPredicated = 1, hasNewValue = 1, opNewValue = 0 in
+class T_tfr_pred<bit isPredNot, bit isPredNew>
+ : ALU32Inst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ("#!if(isPredNot, "!", "")#
+ "$src1"#!if(isPredNew, ".new", "")#
+ ") $dst = $src2"> {
bits<5> dst;
bits<2> src1;
bits<5> src2;
- let IClass = 0b1111;
- let Inst{27-24} = 0b1101;
+ let isPredicatedFalse = isPredNot;
+ let isPredicatedNew = isPredNew;
+ let IClass = 0b0111;
+
+ let Inst{27-24} = 0b0100;
+ let Inst{23} = isPredNot;
let Inst{13} = isPredNew;
- let Inst{7} = PredNot;
+ let Inst{12-5} = 0;
let Inst{4-0} = dst;
- let Inst{6-5} = src1;
- let Inst{20-17} = src2{4-1};
- let Inst{16} = 0b1;
- let Inst{12-9} = src2{4-1};
- let Inst{8} = 0b0;
-}
+ let Inst{22-21} = src1;
+ let Inst{20-16} = src2;
+ }
-multiclass TFR64_Pred<bit PredNot> {
- let isPredicatedFalse = PredNot in {
- def _c#NAME : T_TFR64_Pred<PredNot, 0>;
+let isPredicable = 1 in
+class T_tfr : ALU32Inst<(outs IntRegs:$dst), (ins IntRegs:$src),
+ "$dst = $src"> {
+ bits<5> dst;
+ bits<5> src;
- let isPredicatedNew = 1 in
- def _cdn#NAME : T_TFR64_Pred<PredNot, 1>; // Predicate new
+ let IClass = 0b0111;
+
+ let Inst{27-21} = 0b0000011;
+ let Inst{20-16} = src;
+ let Inst{13} = 0b0;
+ let Inst{4-0} = dst;
}
-}
-let neverHasSideEffects = 1 in
-multiclass TFR64_base<string BaseName> {
- let BaseOpcode = BaseName in {
- let isPredicable = 1 in
- def NAME : ALU32Inst <(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1),
- "$dst = $src1" > {
- bits<5> dst;
- bits<5> src1;
-
- let IClass = 0b1111;
- let Inst{27-23} = 0b01010;
- let Inst{4-0} = dst;
- let Inst{20-17} = src1{4-1};
- let Inst{16} = 0b1;
- let Inst{12-9} = src1{4-1};
- let Inst{8} = 0b0;
- }
+let InputType = "reg", hasNewValue = 1, hasSideEffects = 0 in
+multiclass tfr_base<string CextOp> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp in {
+ def NAME : T_tfr;
- let isPredicated = 1 in {
- defm Pt : TFR64_Pred<0>;
- defm NotPt : TFR64_Pred<1>;
- }
+ // Predicate
+ def t : T_tfr_pred<0, 0>;
+ def f : T_tfr_pred<1, 0>;
+ // Predicate new
+ def tnew : T_tfr_pred<0, 1>;
+ def fnew : T_tfr_pred<1, 1>;
}
}
-multiclass TFRI_Pred<bit PredNot> {
- let isMoveImm = 1, isPredicatedFalse = PredNot in {
- def _c#NAME : ALU32_ri<(outs IntRegs:$dst),
- (ins PredRegs:$src1, s12Ext:$src2),
- !if(PredNot, "if (!$src1", "if ($src1")#") $dst = #$src2",
- []>;
+// Assembler mapped to C2_ccombinew[t|f|newt|newf].
+// Please don't add bits to this instruction as it'll be converted into
+// 'combine' before object code emission.
+let isPredicated = 1 in
+class T_tfrp_pred<bit PredNot, bit PredNew>
+ : ALU32_rr <(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, DoubleRegs:$src2),
+ "if ("#!if(PredNot, "!", "")#"$src1"
+ #!if(PredNew, ".new", "")#") $dst = $src2" > {
+ let isPredicatedFalse = PredNot;
+ let isPredicatedNew = PredNew;
+ }
+// Assembler mapped to A2_combinew.
+// Please don't add bits to this instruction as it'll be converted into
+// 'combine' before object code emission.
+class T_tfrp : ALU32Inst <(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src),
+ "$dst = $src">;
+
+let hasSideEffects = 0 in
+multiclass TFR64_base<string BaseName> {
+ let BaseOpcode = BaseName in {
+ let isPredicable = 1 in
+ def NAME : T_tfrp;
+ // Predicate
+ def t : T_tfrp_pred <0, 0>;
+ def f : T_tfrp_pred <1, 0>;
// Predicate new
- let isPredicatedNew = 1 in
- def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, s12Ext:$src2),
- !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = #$src2",
- []>;
+ def tnew : T_tfrp_pred <0, 1>;
+ def fnew : T_tfrp_pred <1, 1>;
}
}
-let InputType = "imm", isExtendable = 1, isExtentSigned = 1 in
-multiclass TFRI_base<string CextOp> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#I in {
- let isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16,
- isMoveImm = 1, isPredicable = 1, isReMaterializable = 1 in
- def NAME : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1),
- "$dst = #$src1",
- [(set (i32 IntRegs:$dst), s16ExtPred:$src1)]>;
+let InputType = "imm", isExtendable = 1, isExtentSigned = 1, opExtentBits = 12,
+ isMoveImm = 1, opExtendable = 2, BaseOpcode = "TFRI", CextOpcode = "TFR",
+ hasSideEffects = 0, isPredicated = 1, hasNewValue = 1 in
+class T_TFRI_Pred<bit PredNot, bit PredNew>
+ : ALU32_ri<(outs IntRegs:$Rd), (ins PredRegs:$Pu, s12Ext:$s12),
+ "if ("#!if(PredNot,"!","")#"$Pu"#!if(PredNew,".new","")#") $Rd = #$s12",
+ [], "", ALU32_2op_tc_1_SLOT0123>, ImmRegRel, PredNewRel {
+ let isPredicatedFalse = PredNot;
+ let isPredicatedNew = PredNew;
+
+ bits<5> Rd;
+ bits<2> Pu;
+ bits<12> s12;
+
+ let IClass = 0b0111;
+ let Inst{27-24} = 0b1110;
+ let Inst{23} = PredNot;
+ let Inst{22-21} = Pu;
+ let Inst{20} = 0b0;
+ let Inst{19-16,12-5} = s12;
+ let Inst{13} = PredNew;
+ let Inst{4-0} = Rd;
+}
- let opExtendable = 2, opExtentBits = 12, neverHasSideEffects = 1,
- isPredicated = 1 in {
- defm Pt : TFRI_Pred<0>;
- defm NotPt : TFRI_Pred<1>;
- }
- }
+let isCodeGenOnly = 0 in {
+def C2_cmoveit : T_TFRI_Pred<0, 0>;
+def C2_cmoveif : T_TFRI_Pred<1, 0>;
+def C2_cmovenewit : T_TFRI_Pred<0, 1>;
+def C2_cmovenewif : T_TFRI_Pred<1, 1>;
}
-defm TFRI : TFRI_base<"TFR">, ImmRegRel, PredNewRel;
-defm TFR : TFR_base<"TFR">, ImmRegRel, PredNewRel;
-defm TFR64 : TFR64_base<"TFR64">, PredNewRel;
+let InputType = "imm", isExtendable = 1, isExtentSigned = 1,
+ CextOpcode = "TFR", BaseOpcode = "TFRI", hasNewValue = 1, opNewValue = 0,
+ isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, isMoveImm = 1,
+ isPredicated = 0, isPredicable = 1, isReMaterializable = 1,
+ isCodeGenOnly = 0 in
+def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16Ext:$s16), "$Rd = #$s16",
+ [(set (i32 IntRegs:$Rd), s16ExtPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>,
+ ImmRegRel, PredRel {
+ bits<5> Rd;
+ bits<16> s16;
+
+ let IClass = 0b0111;
+ let Inst{27-24} = 0b1000;
+ let Inst{23-22,20-16,13-5} = s16;
+ let Inst{4-0} = Rd;
+}
+
+let isCodeGenOnly = 0 in
+defm A2_tfr : tfr_base<"TFR">, ImmRegRel, PredNewRel;
+defm A2_tfrp : TFR64_base<"TFR64">, PredNewRel;
+
+// Assembler mapped
+let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1 in
+def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1),
+ "$dst = #$src1",
+ [(set (i64 DoubleRegs:$dst), s8Imm64Pred:$src1)]>;
+
+// TODO: see if this instruction can be deleted..
+let isExtendable = 1, opExtendable = 1, opExtentBits = 6 in
+def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u6Ext:$src1),
+ "$dst = #$src1">;
-// Transfer control register.
-let neverHasSideEffects = 1 in
-def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1),
- "$dst = $src1",
- []>;
//===----------------------------------------------------------------------===//
// ALU32/ALU -
//===----------------------------------------------------------------------===//
@@ -383,105 +622,190 @@ def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1),
//===----------------------------------------------------------------------===//
// ALU32/PERM +
//===----------------------------------------------------------------------===//
+// Scalar mux register immediate.
+let hasSideEffects = 0, isExtentSigned = 1, CextOpcode = "MUX",
+ InputType = "imm", hasNewValue = 1, isExtendable = 1, opExtentBits = 8 in
+class T_MUX1 <bit MajOp, dag ins, string AsmStr>
+ : ALU32Inst <(outs IntRegs:$Rd), ins, AsmStr>, ImmRegRel {
+ bits<5> Rd;
+ bits<2> Pu;
+ bits<8> s8;
+ bits<5> Rs;
+
+ let IClass = 0b0111;
+ let Inst{27-24} = 0b0011;
+ let Inst{23} = MajOp;
+ let Inst{22-21} = Pu;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b0;
+ let Inst{12-5} = s8;
+ let Inst{4-0} = Rd;
+}
-let neverHasSideEffects = 1 in
-def COMBINE_ii : ALU32_ii<(outs DoubleRegs:$dst),
- (ins s8Imm:$src1, s8Imm:$src2),
- "$dst = combine(#$src1, #$src2)",
- []>;
+let opExtendable = 2, isCodeGenOnly = 0 in
+def C2_muxri : T_MUX1<0b1, (ins PredRegs:$Pu, s8Ext:$s8, IntRegs:$Rs),
+ "$Rd = mux($Pu, #$s8, $Rs)">;
+
+let opExtendable = 3, isCodeGenOnly = 0 in
+def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8),
+ "$Rd = mux($Pu, $Rs, #$s8)">;
+
+def : Pat<(i32 (select I1:$Pu, s8ExtPred:$s8, I32:$Rs)),
+ (C2_muxri I1:$Pu, s8ExtPred:$s8, I32:$Rs)>;
+
+def : Pat<(i32 (select I1:$Pu, I32:$Rs, s8ExtPred:$s8)),
+ (C2_muxir I1:$Pu, I32:$Rs, s8ExtPred:$s8)>;
+
+// C2_muxii: Scalar mux immediates.
+let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1,
+ opExtentBits = 8, opExtendable = 2, isCodeGenOnly = 0 in
+def C2_muxii: ALU32Inst <(outs IntRegs:$Rd),
+ (ins PredRegs:$Pu, s8Ext:$s8, s8Imm:$S8),
+ "$Rd = mux($Pu, #$s8, #$S8)" ,
+ [(set (i32 IntRegs:$Rd),
+ (i32 (select I1:$Pu, s8ExtPred:$s8, s8ImmPred:$S8)))] > {
+ bits<5> Rd;
+ bits<2> Pu;
+ bits<8> s8;
+ bits<8> S8;
+
+ let IClass = 0b0111;
+
+ let Inst{27-25} = 0b101;
+ let Inst{24-23} = Pu;
+ let Inst{22-16} = S8{7-1};
+ let Inst{13} = S8{0};
+ let Inst{12-5} = s8;
+ let Inst{4-0} = Rd;
+ }
-// Mux.
-def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
- DoubleRegs:$src2,
- DoubleRegs:$src3),
- "$dst = vmux($src1, $src2, $src3)",
- []>;
+//===----------------------------------------------------------------------===//
+// template class for non-predicated alu32_2op instructions
+// - aslh, asrh, sxtb, sxth, zxth
+//===----------------------------------------------------------------------===//
+let hasNewValue = 1, opNewValue = 0 in
+class T_ALU32_2op <string mnemonic, bits<3> minOp> :
+ ALU32Inst < (outs IntRegs:$Rd), (ins IntRegs:$Rs),
+ "$Rd = "#mnemonic#"($Rs)", [] > {
+ bits<5> Rd;
+ bits<5> Rs;
+
+ let IClass = 0b0111;
+
+ let Inst{27-24} = 0b0000;
+ let Inst{23-21} = minOp;
+ let Inst{13} = 0b0;
+ let Inst{4-0} = Rd;
+ let Inst{20-16} = Rs;
+}
+
+//===----------------------------------------------------------------------===//
+// template class for predicated alu32_2op instructions
+// - aslh, asrh, sxtb, sxth, zxtb, zxth
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, validSubTargets = HasV4SubT,
+ hasNewValue = 1, opNewValue = 0 in
+class T_ALU32_2op_Pred <string mnemonic, bits<3> minOp, bit isPredNot,
+ bit isPredNew > :
+ ALU32Inst <(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs),
+ !if(isPredNot, "if (!$Pu", "if ($Pu")
+ #!if(isPredNew, ".new) ",") ")#"$Rd = "#mnemonic#"($Rs)"> {
+ bits<5> Rd;
+ bits<2> Pu;
+ bits<5> Rs;
+
+ let IClass = 0b0111;
+
+ let Inst{27-24} = 0b0000;
+ let Inst{23-21} = minOp;
+ let Inst{13} = 0b1;
+ let Inst{11} = isPredNot;
+ let Inst{10} = isPredNew;
+ let Inst{4-0} = Rd;
+ let Inst{9-8} = Pu;
+ let Inst{20-16} = Rs;
+}
-let CextOpcode = "MUX", InputType = "reg" in
-def MUX_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
- IntRegs:$src2, IntRegs:$src3),
- "$dst = mux($src1, $src2, $src3)",
- [(set (i32 IntRegs:$dst),
- (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2),
- (i32 IntRegs:$src3))))]>, ImmRegRel;
-
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8,
-CextOpcode = "MUX", InputType = "imm" in
-def MUX_ir : ALU32_ir<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Ext:$src2,
- IntRegs:$src3),
- "$dst = mux($src1, #$src2, $src3)",
- [(set (i32 IntRegs:$dst),
- (i32 (select (i1 PredRegs:$src1), s8ExtPred:$src2,
- (i32 IntRegs:$src3))))]>, ImmRegRel;
-
-let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
-CextOpcode = "MUX", InputType = "imm" in
-def MUX_ri : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2,
- s8Ext:$src3),
- "$dst = mux($src1, $src2, #$src3)",
- [(set (i32 IntRegs:$dst),
- (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2),
- s8ExtPred:$src3)))]>, ImmRegRel;
-
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in
-def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Ext:$src2,
- s8Imm:$src3),
- "$dst = mux($src1, #$src2, #$src3)",
- [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1),
- s8ExtPred:$src2,
- s8ImmPred:$src3)))]>;
-
-// ALU32 - aslh, asrh, sxtb, sxth, zxtb, zxth
-multiclass ALU32_2op_Pbase<string mnemonic, bit isNot, bit isPredNew> {
- let isPredicatedNew = isPredNew in
- def NAME : ALU32Inst<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
- ") $dst = ")#mnemonic#"($src2)">,
- Requires<[HasV4T]>;
-}
-
-multiclass ALU32_2op_Pred<string mnemonic, bit PredNot> {
+multiclass ALU32_2op_Pred<string mnemonic, bits<3> minOp, bit PredNot> {
let isPredicatedFalse = PredNot in {
- defm _c#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 0>;
+ def NAME : T_ALU32_2op_Pred<mnemonic, minOp, PredNot, 0>;
+
// Predicate new
- defm _cdn#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 1>;
+ let isPredicatedNew = 1 in
+ def NAME#new : T_ALU32_2op_Pred<mnemonic, minOp, PredNot, 1>;
}
}
-multiclass ALU32_2op_base<string mnemonic> {
+multiclass ALU32_2op_base<string mnemonic, bits<3> minOp> {
let BaseOpcode = mnemonic in {
- let isPredicable = 1, neverHasSideEffects = 1 in
- def NAME : ALU32Inst<(outs IntRegs:$dst),
- (ins IntRegs:$src1),
- "$dst = "#mnemonic#"($src1)">;
-
- let Predicates = [HasV4T], validSubTargets = HasV4SubT, isPredicated = 1,
- neverHasSideEffects = 1 in {
- defm Pt_V4 : ALU32_2op_Pred<mnemonic, 0>;
- defm NotPt_V4 : ALU32_2op_Pred<mnemonic, 1>;
+ let isPredicable = 1, hasSideEffects = 0 in
+ def A2_#NAME : T_ALU32_2op<mnemonic, minOp>;
+
+ let validSubTargets = HasV4SubT, isPredicated = 1, hasSideEffects = 0 in {
+ defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>;
+ defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>;
}
}
}
-defm ASLH : ALU32_2op_base<"aslh">, PredNewRel;
-defm ASRH : ALU32_2op_base<"asrh">, PredNewRel;
-defm SXTB : ALU32_2op_base<"sxtb">, PredNewRel;
-defm SXTH : ALU32_2op_base<"sxth">, PredNewRel;
-defm ZXTB : ALU32_2op_base<"zxtb">, PredNewRel;
-defm ZXTH : ALU32_2op_base<"zxth">, PredNewRel;
+let isCodeGenOnly = 0 in {
+defm aslh : ALU32_2op_base<"aslh", 0b000>, PredNewRel;
+defm asrh : ALU32_2op_base<"asrh", 0b001>, PredNewRel;
+defm sxtb : ALU32_2op_base<"sxtb", 0b101>, PredNewRel;
+defm sxth : ALU32_2op_base<"sxth", 0b111>, PredNewRel;
+defm zxth : ALU32_2op_base<"zxth", 0b110>, PredNewRel;
+}
-def : Pat <(shl (i32 IntRegs:$src1), (i32 16)),
- (ASLH IntRegs:$src1)>;
+// Rd=zxtb(Rs): assembler mapped to Rd=and(Rs,#255).
+// Compiler would want to generate 'zxtb' instead of 'and' becuase 'zxtb' has
+// predicated forms while 'and' doesn't. Since integrated assembler can't
+// handle 'mapped' instructions, we need to encode 'zxtb' same as 'and' where
+// immediate operand is set to '255'.
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_ZXTB: ALU32Inst < (outs IntRegs:$Rd), (ins IntRegs:$Rs),
+ "$Rd = zxtb($Rs)", [] > { // Rd = and(Rs,255)
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<10> s10 = 255;
+
+ let IClass = 0b0111;
+
+ let Inst{27-22} = 0b011000;
+ let Inst{4-0} = Rd;
+ let Inst{20-16} = Rs;
+ let Inst{21} = s10{9};
+ let Inst{13-5} = s10{8-0};
+}
-def : Pat <(sra (i32 IntRegs:$src1), (i32 16)),
- (ASRH IntRegs:$src1)>;
+//Rd=zxtb(Rs): assembler mapped to "Rd=and(Rs,#255)
+multiclass ZXTB_base <string mnemonic, bits<3> minOp> {
+ let BaseOpcode = mnemonic in {
+ let isPredicable = 1, hasSideEffects = 0 in
+ def A2_#NAME : T_ZXTB;
-def : Pat <(sext_inreg (i32 IntRegs:$src1), i8),
- (SXTB IntRegs:$src1)>;
+ let validSubTargets = HasV4SubT, isPredicated = 1, hasSideEffects = 0 in {
+ defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>;
+ defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>;
+ }
+ }
+}
+
+let isCodeGenOnly=0 in
+defm zxtb : ZXTB_base<"zxtb",0b100>, PredNewRel;
+
+def: Pat<(shl I32:$src1, (i32 16)), (A2_aslh I32:$src1)>;
+def: Pat<(sra I32:$src1, (i32 16)), (A2_asrh I32:$src1)>;
+def: Pat<(sext_inreg I32:$src1, i8), (A2_sxtb I32:$src1)>;
+def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>;
+
+// Mux.
+def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
+ DoubleRegs:$src2,
+ DoubleRegs:$src3),
+ "$dst = vmux($src1, $src2, $src3)",
+ []>;
-def : Pat <(sext_inreg (i32 IntRegs:$src1), i16),
- (SXTH IntRegs:$src1)>;
//===----------------------------------------------------------------------===//
// ALU32/PERM -
@@ -492,11 +816,6 @@ def : Pat <(sext_inreg (i32 IntRegs:$src1), i16),
// ALU32/PRED +
//===----------------------------------------------------------------------===//
-// Compare.
-defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", "CMPGTU", setugt>, ImmRegRel;
-defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", "CMPGT", setgt>, ImmRegRel;
-defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", "CMPEQ", seteq>, ImmRegRel;
-
// SDNode for converting immediate C to C-1.
def DEC_CONST_SIGNED : SDNodeXForm<imm, [{
// Return the byte immediate const-1 as an SDNode.
@@ -511,14 +830,6 @@ def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{
return XformUToUM1Imm(imm);
}]>;
-def CTLZ_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
- "$dst = cl0($src1)",
- [(set (i32 IntRegs:$dst), (ctlz (i32 IntRegs:$src1)))]>;
-
-def CTTZ_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
- "$dst = ct0($src1)",
- [(set (i32 IntRegs:$dst), (cttz (i32 IntRegs:$src1)))]>;
-
def CTLZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
"$dst = cl0($src1)",
[(set (i32 IntRegs:$dst), (i32 (trunc (ctlz (i64 DoubleRegs:$src1)))))]>;
@@ -527,16 +838,6 @@ def CTTZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
"$dst = ct0($src1)",
[(set (i32 IntRegs:$dst), (i32 (trunc (cttz (i64 DoubleRegs:$src1)))))]>;
-def TSTBIT_rr : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = tstbit($src1, $src2)",
- [(set (i1 PredRegs:$dst),
- (setne (and (shl 1, (i32 IntRegs:$src2)), (i32 IntRegs:$src1)), 0))]>;
-
-def TSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
- "$dst = tstbit($src1, $src2)",
- [(set (i1 PredRegs:$dst),
- (setne (and (shl 1, (u5ImmPred:$src2)), (i32 IntRegs:$src1)), 0))]>;
-
//===----------------------------------------------------------------------===//
// ALU32/PRED -
//===----------------------------------------------------------------------===//
@@ -544,114 +845,279 @@ def TSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
//===----------------------------------------------------------------------===//
// ALU64/ALU +
+//===----------------------------------------------------------------------===//// Add.
+//===----------------------------------------------------------------------===//
+// Template Class
+// Add/Subtract halfword
+// Rd=add(Rt.L,Rs.[HL])[:sat]
+// Rd=sub(Rt.L,Rs.[HL])[:sat]
+// Rd=add(Rt.[LH],Rs.[HL])[:sat][:<16]
+// Rd=sub(Rt.[LH],Rs.[HL])[:sat][:<16]
//===----------------------------------------------------------------------===//
-// Add.
-def ADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- DoubleRegs:$src2),
- "$dst = add($src1, $src2)",
- [(set (i64 DoubleRegs:$dst), (add (i64 DoubleRegs:$src1),
- (i64 DoubleRegs:$src2)))]>;
-// Add halfword.
+let hasNewValue = 1, opNewValue = 0 in
+class T_XTYPE_ADD_SUB <bits<2> LHbits, bit isSat, bit hasShift, bit isSub>
+ : ALU64Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rt, IntRegs:$Rs),
+ "$Rd = "#!if(isSub,"sub","add")#"($Rt."
+ #!if(hasShift, !if(LHbits{1},"h","l"),"l") #", $Rs."
+ #!if(hasShift, !if(LHbits{0},"h)","l)"), !if(LHbits{1},"h)","l)"))
+ #!if(isSat,":sat","")
+ #!if(hasShift,":<<16",""), [], "", ALU64_tc_1_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rt;
+ bits<5> Rs;
+ let IClass = 0b1101;
+
+ let Inst{27-23} = 0b01010;
+ let Inst{22} = hasShift;
+ let Inst{21} = isSub;
+ let Inst{7} = isSat;
+ let Inst{6-5} = LHbits;
+ let Inst{4-0} = Rd;
+ let Inst{12-8} = Rt;
+ let Inst{20-16} = Rs;
+ }
-// Compare.
-defm CMPEHexagon4 : CMP64_rr<"cmp.eq", seteq>;
-defm CMPGT64 : CMP64_rr<"cmp.gt", setgt>;
-defm CMPGTU64 : CMP64_rr<"cmp.gtu", setugt>;
-
-// Logical operations.
-def AND_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- DoubleRegs:$src2),
- "$dst = and($src1, $src2)",
- [(set (i64 DoubleRegs:$dst), (and (i64 DoubleRegs:$src1),
- (i64 DoubleRegs:$src2)))]>;
-
-def OR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- DoubleRegs:$src2),
- "$dst = or($src1, $src2)",
- [(set (i64 DoubleRegs:$dst), (or (i64 DoubleRegs:$src1),
- (i64 DoubleRegs:$src2)))]>;
-
-def XOR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- DoubleRegs:$src2),
- "$dst = xor($src1, $src2)",
- [(set (i64 DoubleRegs:$dst), (xor (i64 DoubleRegs:$src1),
- (i64 DoubleRegs:$src2)))]>;
-
-// Maximum.
-def MAXw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = max($src2, $src1)",
- [(set (i32 IntRegs:$dst),
- (i32 (select (i1 (setlt (i32 IntRegs:$src2),
- (i32 IntRegs:$src1))),
- (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>;
+//Rd=sub(Rt.L,Rs.[LH])
+let isCodeGenOnly = 0 in {
+def A2_subh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 1>;
+def A2_subh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 1>;
+}
-def MAXUw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = maxu($src2, $src1)",
- [(set (i32 IntRegs:$dst),
- (i32 (select (i1 (setult (i32 IntRegs:$src2),
- (i32 IntRegs:$src1))),
- (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>;
-
-def MAXd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- DoubleRegs:$src2),
- "$dst = max($src2, $src1)",
- [(set (i64 DoubleRegs:$dst),
- (i64 (select (i1 (setlt (i64 DoubleRegs:$src2),
- (i64 DoubleRegs:$src1))),
- (i64 DoubleRegs:$src1),
- (i64 DoubleRegs:$src2))))]>;
-
-def MAXUd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- DoubleRegs:$src2),
- "$dst = maxu($src2, $src1)",
- [(set (i64 DoubleRegs:$dst),
- (i64 (select (i1 (setult (i64 DoubleRegs:$src2),
- (i64 DoubleRegs:$src1))),
- (i64 DoubleRegs:$src1),
- (i64 DoubleRegs:$src2))))]>;
-
-// Minimum.
-def MINw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = min($src2, $src1)",
- [(set (i32 IntRegs:$dst),
- (i32 (select (i1 (setgt (i32 IntRegs:$src2),
- (i32 IntRegs:$src1))),
- (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>;
+let isCodeGenOnly = 0 in {
+//Rd=add(Rt.L,Rs.[LH])
+def A2_addh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 0>;
+def A2_addh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 0>;
+}
-def MINUw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = minu($src2, $src1)",
- [(set (i32 IntRegs:$dst),
- (i32 (select (i1 (setugt (i32 IntRegs:$src2),
- (i32 IntRegs:$src1))),
- (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>;
-
-def MINd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- DoubleRegs:$src2),
- "$dst = min($src2, $src1)",
- [(set (i64 DoubleRegs:$dst),
- (i64 (select (i1 (setgt (i64 DoubleRegs:$src2),
- (i64 DoubleRegs:$src1))),
- (i64 DoubleRegs:$src1),
- (i64 DoubleRegs:$src2))))]>;
-
-def MINUd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- DoubleRegs:$src2),
- "$dst = minu($src2, $src1)",
- [(set (i64 DoubleRegs:$dst),
- (i64 (select (i1 (setugt (i64 DoubleRegs:$src2),
- (i64 DoubleRegs:$src1))),
- (i64 DoubleRegs:$src1),
- (i64 DoubleRegs:$src2))))]>;
-
-// Subtract.
-def SUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- DoubleRegs:$src2),
- "$dst = sub($src1, $src2)",
- [(set (i64 DoubleRegs:$dst), (sub (i64 DoubleRegs:$src1),
- (i64 DoubleRegs:$src2)))]>;
+let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF], isCodeGenOnly = 0 in {
+ //Rd=sub(Rt.L,Rs.[LH]):sat
+ def A2_subh_l16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 0, 1>;
+ def A2_subh_l16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 0, 1>;
+
+ //Rd=add(Rt.L,Rs.[LH]):sat
+ def A2_addh_l16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 0, 0>;
+ def A2_addh_l16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 0, 0>;
+}
+
+//Rd=sub(Rt.[LH],Rs.[LH]):<<16
+let isCodeGenOnly = 0 in {
+def A2_subh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 1>;
+def A2_subh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 1>;
+def A2_subh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 1>;
+def A2_subh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 1>;
+}
+
+//Rd=add(Rt.[LH],Rs.[LH]):<<16
+let isCodeGenOnly = 0 in {
+def A2_addh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 0>;
+def A2_addh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 0>;
+def A2_addh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 0>;
+def A2_addh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 0>;
+}
+
+let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF], isCodeGenOnly = 0 in {
+ //Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16
+ def A2_subh_h16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 1, 1>;
+ def A2_subh_h16_sat_lh : T_XTYPE_ADD_SUB <0b01, 1, 1, 1>;
+ def A2_subh_h16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 1, 1>;
+ def A2_subh_h16_sat_hh : T_XTYPE_ADD_SUB <0b11, 1, 1, 1>;
+
+ //Rd=add(Rt.[LH],Rs.[LH]):sat:<<16
+ def A2_addh_h16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 1, 0>;
+ def A2_addh_h16_sat_lh : T_XTYPE_ADD_SUB <0b01, 1, 1, 0>;
+ def A2_addh_h16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 1, 0>;
+ def A2_addh_h16_sat_hh : T_XTYPE_ADD_SUB <0b11, 1, 1, 0>;
+}
+
+// Add halfword.
+def: Pat<(sext_inreg (add I32:$src1, I32:$src2), i16),
+ (A2_addh_l16_ll I32:$src1, I32:$src2)>;
+
+def: Pat<(sra (add (shl I32:$src1, (i32 16)), I32:$src2), (i32 16)),
+ (A2_addh_l16_hl I32:$src1, I32:$src2)>;
+
+def: Pat<(shl (add I32:$src1, I32:$src2), (i32 16)),
+ (A2_addh_h16_ll I32:$src1, I32:$src2)>;
// Subtract halfword.
+def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16),
+ (A2_subh_l16_ll I32:$src1, I32:$src2)>;
+
+def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)),
+ (A2_subh_h16_ll I32:$src1, I32:$src2)>;
+
+let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in
+def S2_parityp: ALU64Inst<(outs IntRegs:$Rd),
+ (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
+ "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-24} = 0b0000;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{4-0} = Rd;
+}
+
+let hasNewValue = 1, opNewValue = 0, hasSideEffects = 0 in
+class T_XTYPE_MIN_MAX < bit isMax, bit isUnsigned >
+ : ALU64Inst < (outs IntRegs:$Rd), (ins IntRegs:$Rt, IntRegs:$Rs),
+ "$Rd = "#!if(isMax,"max","min")#!if(isUnsigned,"u","")
+ #"($Rt, $Rs)", [], "", ALU64_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rt;
+ bits<5> Rs;
+
+ let IClass = 0b1101;
+
+ let Inst{27-23} = 0b01011;
+ let Inst{22-21} = !if(isMax, 0b10, 0b01);
+ let Inst{7} = isUnsigned;
+ let Inst{4-0} = Rd;
+ let Inst{12-8} = !if(isMax, Rs, Rt);
+ let Inst{20-16} = !if(isMax, Rt, Rs);
+ }
+
+let isCodeGenOnly = 0 in {
+def A2_min : T_XTYPE_MIN_MAX < 0, 0 >;
+def A2_minu : T_XTYPE_MIN_MAX < 0, 1 >;
+def A2_max : T_XTYPE_MIN_MAX < 1, 0 >;
+def A2_maxu : T_XTYPE_MIN_MAX < 1, 1 >;
+}
+
+// Here, depending on the operand being selected, we'll either generate a
+// min or max instruction.
+// Ex:
+// (a>b)?a:b --> max(a,b) => Here check performed is '>' and the value selected
+// is the larger of two. So, the corresponding HexagonInst is passed in 'Inst'.
+// (a>b)?b:a --> min(a,b) => Here check performed is '>' but the smaller value
+// is selected and the corresponding HexagonInst is passed in 'SwapInst'.
+
+multiclass T_MinMax_pats <PatFrag Op, RegisterClass RC, ValueType VT,
+ InstHexagon Inst, InstHexagon SwapInst> {
+ def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))),
+ (VT RC:$src1), (VT RC:$src2)),
+ (Inst RC:$src1, RC:$src2)>;
+ def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))),
+ (VT RC:$src2), (VT RC:$src1)),
+ (SwapInst RC:$src1, RC:$src2)>;
+}
+
+
+multiclass MinMax_pats <PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
+ defm: T_MinMax_pats<Op, IntRegs, i32, Inst, SwapInst>;
+
+ def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1),
+ (i32 PositiveHalfWord:$src2))),
+ (i32 PositiveHalfWord:$src1),
+ (i32 PositiveHalfWord:$src2))), i16),
+ (Inst IntRegs:$src1, IntRegs:$src2)>;
+
+ def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1),
+ (i32 PositiveHalfWord:$src2))),
+ (i32 PositiveHalfWord:$src2),
+ (i32 PositiveHalfWord:$src1))), i16),
+ (SwapInst IntRegs:$src1, IntRegs:$src2)>;
+}
+
+let AddedComplexity = 200 in {
+ defm: MinMax_pats<setge, A2_max, A2_min>;
+ defm: MinMax_pats<setgt, A2_max, A2_min>;
+ defm: MinMax_pats<setle, A2_min, A2_max>;
+ defm: MinMax_pats<setlt, A2_min, A2_max>;
+ defm: MinMax_pats<setuge, A2_maxu, A2_minu>;
+ defm: MinMax_pats<setugt, A2_maxu, A2_minu>;
+ defm: MinMax_pats<setule, A2_minu, A2_maxu>;
+ defm: MinMax_pats<setult, A2_minu, A2_maxu>;
+}
+
+class T_cmp64_rr<string mnemonic, bits<3> MinOp, bit IsComm>
+ : ALU64_rr<(outs PredRegs:$Pd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
+ "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", ALU64_tc_2early_SLOT23> {
+ let isCompare = 1;
+ let isCommutable = IsComm;
+ let hasSideEffects = 0;
+
+ bits<2> Pd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-21} = 0b0010100;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = MinOp;
+ let Inst{1-0} = Pd;
+}
+
+let isCodeGenOnly = 0 in {
+def C2_cmpeqp : T_cmp64_rr<"cmp.eq", 0b000, 1>;
+def C2_cmpgtp : T_cmp64_rr<"cmp.gt", 0b010, 0>;
+def C2_cmpgtup : T_cmp64_rr<"cmp.gtu", 0b100, 0>;
+}
+
+class T_cmp64_rr_pat<InstHexagon MI, PatFrag CmpOp>
+ : Pat<(i1 (CmpOp (i64 DoubleRegs:$Rs), (i64 DoubleRegs:$Rt))),
+ (i1 (MI DoubleRegs:$Rs, DoubleRegs:$Rt))>;
+
+def: T_cmp64_rr_pat<C2_cmpeqp, seteq>;
+def: T_cmp64_rr_pat<C2_cmpgtp, setgt>;
+def: T_cmp64_rr_pat<C2_cmpgtup, setugt>;
+def: T_cmp64_rr_pat<C2_cmpgtp, RevCmp<setlt>>;
+def: T_cmp64_rr_pat<C2_cmpgtup, RevCmp<setult>>;
+
+class T_ALU64_rr<string mnemonic, string suffix, bits<4> RegType,
+ bits<3> MajOp, bits<3> MinOp, bit OpsRev, bit IsComm,
+ string Op2Pfx>
+ : ALU64_rr<(outs DoubleRegs:$Rd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
+ "$Rd = " #mnemonic# "($Rs, " #Op2Pfx# "$Rt)" #suffix, [],
+ "", ALU64_tc_1_SLOT23> {
+ let hasSideEffects = 0;
+ let isCommutable = IsComm;
+
+ bits<5> Rs;
+ bits<5> Rt;
+ bits<5> Rd;
+
+ let IClass = 0b1101;
+ let Inst{27-24} = RegType;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = !if (OpsRev,Rt,Rs);
+ let Inst{12-8} = !if (OpsRev,Rs,Rt);
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rd;
+}
+
+class T_ALU64_arith<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit IsSat,
+ bit OpsRev, bit IsComm>
+ : T_ALU64_rr<mnemonic, !if(IsSat,":sat",""), 0b0011, MajOp, MinOp, OpsRev,
+ IsComm, "">;
+
+let isCodeGenOnly = 0 in {
+def A2_addp : T_ALU64_arith<"add", 0b000, 0b111, 0, 0, 1>;
+def A2_subp : T_ALU64_arith<"sub", 0b001, 0b111, 0, 1, 0>;
+}
+
+def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>;
+def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>;
+
+class T_ALU64_logical<string mnemonic, bits<3> MinOp, bit OpsRev, bit IsComm,
+ bit IsNeg>
+ : T_ALU64_rr<mnemonic, "", 0b0011, 0b111, MinOp, OpsRev, IsComm,
+ !if(IsNeg,"~","")>;
+
+let isCodeGenOnly = 0 in {
+def A2_andp : T_ALU64_logical<"and", 0b000, 0, 1, 0>;
+def A2_orp : T_ALU64_logical<"or", 0b010, 0, 1, 0>;
+def A2_xorp : T_ALU64_logical<"xor", 0b100, 0, 1, 0>;
+}
+
+def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>;
+def: Pat<(i64 (or I64:$Rs, I64:$Rt)), (A2_orp I64:$Rs, I64:$Rt)>;
+def: Pat<(i64 (xor I64:$Rs, I64:$Rt)), (A2_xorp I64:$Rs, I64:$Rt)>;
//===----------------------------------------------------------------------===//
// ALU64/ALU -
@@ -683,29 +1149,90 @@ def SUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
// Pipelined looping instructions.
// Logical operations on predicates.
-def AND_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
- "$dst = and($src1, $src2)",
- [(set (i1 PredRegs:$dst), (and (i1 PredRegs:$src1),
- (i1 PredRegs:$src2)))]>;
-
-let neverHasSideEffects = 1 in
-def AND_pnotp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1,
- PredRegs:$src2),
- "$dst = and($src1, !$src2)",
- []>;
-
-def ANY_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
- "$dst = any8($src1)",
- []>;
+let hasSideEffects = 0 in
+class T_LOGICAL_1OP<string MnOp, bits<2> OpBits>
+ : CRInst<(outs PredRegs:$Pd), (ins PredRegs:$Ps),
+ "$Pd = " # MnOp # "($Ps)", [], "", CR_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<2> Ps;
+
+ let IClass = 0b0110;
+ let Inst{27-23} = 0b10111;
+ let Inst{22-21} = OpBits;
+ let Inst{20} = 0b0;
+ let Inst{17-16} = Ps;
+ let Inst{13} = 0b0;
+ let Inst{1-0} = Pd;
+}
-def ALL_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
- "$dst = all8($src1)",
- []>;
+let isCodeGenOnly = 0 in {
+def C2_any8 : T_LOGICAL_1OP<"any8", 0b00>;
+def C2_all8 : T_LOGICAL_1OP<"all8", 0b01>;
+def C2_not : T_LOGICAL_1OP<"not", 0b10>;
+}
-def VITPACK_pp : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1,
- PredRegs:$src2),
- "$dst = vitpack($src1, $src2)",
- []>;
+def: Pat<(i1 (not (i1 PredRegs:$Ps))),
+ (C2_not PredRegs:$Ps)>;
+
+let hasSideEffects = 0 in
+class T_LOGICAL_2OP<string MnOp, bits<3> OpBits, bit IsNeg, bit Rev>
+ : CRInst<(outs PredRegs:$Pd), (ins PredRegs:$Ps, PredRegs:$Pt),
+ "$Pd = " # MnOp # "($Ps, " # !if (IsNeg,"!","") # "$Pt)",
+ [], "", CR_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<2> Ps;
+ bits<2> Pt;
+
+ let IClass = 0b0110;
+ let Inst{27-24} = 0b1011;
+ let Inst{23-21} = OpBits;
+ let Inst{20} = 0b0;
+ let Inst{17-16} = !if(Rev,Pt,Ps); // Rs and Rt are reversed for some
+ let Inst{13} = 0b0; // instructions.
+ let Inst{9-8} = !if(Rev,Ps,Pt);
+ let Inst{1-0} = Pd;
+}
+
+let isCodeGenOnly = 0 in {
+def C2_and : T_LOGICAL_2OP<"and", 0b000, 0, 1>;
+def C2_or : T_LOGICAL_2OP<"or", 0b001, 0, 1>;
+def C2_xor : T_LOGICAL_2OP<"xor", 0b010, 0, 0>;
+def C2_andn : T_LOGICAL_2OP<"and", 0b011, 1, 1>;
+def C2_orn : T_LOGICAL_2OP<"or", 0b111, 1, 1>;
+}
+
+def: Pat<(i1 (and I1:$Ps, I1:$Pt)), (C2_and I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (or I1:$Ps, I1:$Pt)), (C2_or I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (xor I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (or I1:$Ps, (not I1:$Pt))), (C2_orn I1:$Ps, I1:$Pt)>;
+
+let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in
+def C2_vitpack : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps, PredRegs:$Pt),
+ "$Rd = vitpack($Ps, $Pt)", [], "", S_2op_tc_1_SLOT23> {
+ bits<5> Rd;
+ bits<2> Ps;
+ bits<2> Pt;
+
+ let IClass = 0b1000;
+ let Inst{27-24} = 0b1001;
+ let Inst{22-21} = 0b00;
+ let Inst{17-16} = Ps;
+ let Inst{9-8} = Pt;
+ let Inst{4-0} = Rd;
+}
+
+let hasSideEffects = 0, isCodeGenOnly = 0 in
+def C2_mask : SInst<(outs DoubleRegs:$Rd), (ins PredRegs:$Pt),
+ "$Rd = mask($Pt)", [], "", S_2op_tc_1_SLOT23> {
+ bits<5> Rd;
+ bits<2> Pt;
+
+ let IClass = 0b1000;
+ let Inst{27-24} = 0b0110;
+ let Inst{9-8} = Pt;
+ let Inst{4-0} = Rd;
+}
def VALIGN_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
DoubleRegs:$src2,
@@ -719,46 +1246,38 @@ def VSPLICE_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
"$dst = vspliceb($src1, $src2, $src3)",
[]>;
-def MASK_p : SInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1),
- "$dst = mask($src1)",
- []>;
-
-def NOT_p : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
- "$dst = not($src1)",
- [(set (i1 PredRegs:$dst), (not (i1 PredRegs:$src1)))]>;
-
-def OR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
- "$dst = or($src1, $src2)",
- [(set (i1 PredRegs:$dst), (or (i1 PredRegs:$src1),
- (i1 PredRegs:$src2)))]>;
-
-def XOR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
- "$dst = xor($src1, $src2)",
- [(set (i1 PredRegs:$dst), (xor (i1 PredRegs:$src1),
- (i1 PredRegs:$src2)))]>;
-
-
// User control register transfer.
//===----------------------------------------------------------------------===//
// CR -
//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// JR +
+//===----------------------------------------------------------------------===//
+
def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone,
- [SDNPHasChain]>;
+def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;
def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>;
-let InputType = "imm", isBarrier = 1, isPredicable = 1,
-Defs = [PC], isExtendable = 1, opExtendable = 0, isExtentSigned = 1,
-opExtentBits = 24 in
-class T_JMP <dag InsDag, list<dag> JumpList = []>
- : JInst<(outs), InsDag,
- "jump $dst" , JumpList> {
- bits<24> dst;
+class CondStr<string CReg, bit True, bit New> {
+ string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") ";
+}
+class JumpOpcStr<string Mnemonic, bit New, bit Taken> {
+ string S = Mnemonic # !if(New, !if(Taken,":t",":nt"), "");
+}
+let isBranch = 1, isBarrier = 1, Defs = [PC], hasSideEffects = 0,
+ isPredicable = 1,
+ isExtendable = 1, opExtendable = 0, isExtentSigned = 1,
+ opExtentBits = 24, opExtentAlign = 2, InputType = "imm" in
+class T_JMP<string ExtStr>
+ : JInst<(outs), (ins brtarget:$dst),
+ "jump " # ExtStr # "$dst",
+ [], "", J_tc_2early_SLOT23> {
+ bits<24> dst;
let IClass = 0b0101;
let Inst{27-25} = 0b100;
@@ -766,16 +1285,16 @@ class T_JMP <dag InsDag, list<dag> JumpList = []>
let Inst{13-1} = dst{14-2};
}
-let InputType = "imm", isExtendable = 1, opExtendable = 1, isExtentSigned = 1,
-Defs = [PC], isPredicated = 1, opExtentBits = 17 in
-class T_JMP_c <bit PredNot, bit isPredNew, bit isTak>:
- JInst<(outs ), (ins PredRegs:$src, brtarget:$dst),
- !if(PredNot, "if (!$src", "if ($src")#
- !if(isPredNew, ".new) ", ") ")#"jump"#
- !if(isPredNew, !if(isTak, ":t ", ":nt "), " ")#"$dst"> {
-
+let isBranch = 1, Defs = [PC], hasSideEffects = 0, isPredicated = 1,
+ isExtendable = 1, opExtendable = 1, isExtentSigned = 1,
+ opExtentBits = 17, opExtentAlign = 2, InputType = "imm" in
+class T_JMP_c<bit PredNot, bit isPredNew, bit isTak, string ExtStr>
+ : JInst<(outs), (ins PredRegs:$src, brtarget:$dst),
+ CondStr<"$src", !if(PredNot,0,1), isPredNew>.S #
+ JumpOpcStr<"jump", isPredNew, isTak>.S # " " #
+ ExtStr # "$dst",
+ [], "", J_tc_2early_SLOT23>, ImmRegRel {
let isTaken = isTak;
- let isBrTaken = !if(isPredNew, !if(isTaken, "true", "false"), "");
let isPredicatedFalse = PredNot;
let isPredicatedNew = isPredNew;
bits<2> src;
@@ -794,11 +1313,28 @@ class T_JMP_c <bit PredNot, bit isPredNew, bit isTak>:
let Inst{7-1} = dst{8-2};
}
-let isBarrier = 1, Defs = [PC], isPredicable = 1, InputType = "reg" in
-class T_JMPr<dag InsDag = (ins IntRegs:$dst)>
- : JRInst<(outs ), InsDag,
- "jumpr $dst" ,
- []> {
+multiclass JMP_Pred<bit PredNot, string ExtStr> {
+ def NAME : T_JMP_c<PredNot, 0, 0, ExtStr>;
+ // Predicate new
+ def NAME#newpt : T_JMP_c<PredNot, 1, 1, ExtStr>; // taken
+ def NAME#new : T_JMP_c<PredNot, 1, 0, ExtStr>; // not taken
+}
+
+multiclass JMP_base<string BaseOp, string ExtStr> {
+ let BaseOpcode = BaseOp in {
+ def NAME : T_JMP<ExtStr>;
+ defm t : JMP_Pred<0, ExtStr>;
+ defm f : JMP_Pred<1, ExtStr>;
+ }
+}
+
+// Jumps to address stored in a register, JUMPR_MISC
+// if ([[!]P[.new]]) jumpr[:t/nt] Rs
+let isBranch = 1, isIndirectBranch = 1, isBarrier = 1, Defs = [PC],
+ isPredicable = 1, hasSideEffects = 0, InputType = "reg" in
+class T_JMPr
+ : JRInst<(outs), (ins IntRegs:$dst),
+ "jumpr $dst", [], "", J_tc_2early_SLOT2> {
bits<5> dst;
let IClass = 0b0101;
@@ -806,15 +1342,15 @@ class T_JMPr<dag InsDag = (ins IntRegs:$dst)>
let Inst{20-16} = dst;
}
-let Defs = [PC], isPredicated = 1, InputType = "reg" in
-class T_JMPr_c <bit PredNot, bit isPredNew, bit isTak>:
- JRInst <(outs ), (ins PredRegs:$src, IntRegs:$dst),
- !if(PredNot, "if (!$src", "if ($src")#
- !if(isPredNew, ".new) ", ") ")#"jumpr"#
- !if(isPredNew, !if(isTak, ":t ", ":nt "), " ")#"$dst"> {
+let isBranch = 1, isIndirectBranch = 1, Defs = [PC], isPredicated = 1,
+ hasSideEffects = 0, InputType = "reg" in
+class T_JMPr_c <bit PredNot, bit isPredNew, bit isTak>
+ : JRInst <(outs), (ins PredRegs:$src, IntRegs:$dst),
+ CondStr<"$src", !if(PredNot,0,1), isPredNew>.S #
+ JumpOpcStr<"jumpr", isPredNew, isTak>.S # " $dst", [],
+ "", J_tc_2early_SLOT2> {
let isTaken = isTak;
- let isBrTaken = !if(isPredNew, !if(isTaken, "true", "false"), "");
let isPredicatedFalse = PredNot;
let isPredicatedNew = isPredNew;
bits<2> src;
@@ -828,70 +1364,85 @@ class T_JMPr_c <bit PredNot, bit isPredNew, bit isTak>:
let Inst{12} = !if(isPredNew, isTak, zero);
let Inst{11} = isPredNew;
let Inst{9-8} = src;
- let Predicates = !if(isPredNew, [HasV3T], [HasV2T]);
- let validSubTargets = !if(isPredNew, HasV3SubT, HasV2SubT);
-}
-
-multiclass JMP_Pred<bit PredNot> {
- def _#NAME : T_JMP_c<PredNot, 0, 0>;
- // Predicate new
- def _#NAME#new_t : T_JMP_c<PredNot, 1, 1>; // taken
- def _#NAME#new_nt : T_JMP_c<PredNot, 1, 0>; // not taken
-}
-
-multiclass JMP_base<string BaseOp> {
- let BaseOpcode = BaseOp in {
- def NAME : T_JMP<(ins brtarget:$dst), [(br bb:$dst)]>;
- defm t : JMP_Pred<0>;
- defm f : JMP_Pred<1>;
- }
}
multiclass JMPR_Pred<bit PredNot> {
def NAME: T_JMPr_c<PredNot, 0, 0>;
// Predicate new
- def NAME#new_tV3 : T_JMPr_c<PredNot, 1, 1>; // taken
- def NAME#new_ntV3 : T_JMPr_c<PredNot, 1, 0>; // not taken
+ def NAME#newpt : T_JMPr_c<PredNot, 1, 1>; // taken
+ def NAME#new : T_JMPr_c<PredNot, 1, 0>; // not taken
}
multiclass JMPR_base<string BaseOp> {
let BaseOpcode = BaseOp in {
def NAME : T_JMPr;
- defm _t : JMPR_Pred<0>;
- defm _f : JMPR_Pred<1>;
+ defm t : JMPR_Pred<0>;
+ defm f : JMPR_Pred<1>;
}
}
-let isTerminator = 1, neverHasSideEffects = 1 in {
-let isBranch = 1 in
-defm JMP : JMP_base<"JMP">, PredNewRel;
+let isCall = 1, hasSideEffects = 1 in
+class JUMPR_MISC_CALLR<bit isPred, bit isPredNot,
+ dag InputDag = (ins IntRegs:$Rs)>
+ : JRInst<(outs), InputDag,
+ !if(isPred, !if(isPredNot, "if (!$Pu) callr $Rs",
+ "if ($Pu) callr $Rs"),
+ "callr $Rs"),
+ [], "", J_tc_2early_SLOT2> {
+ bits<5> Rs;
+ bits<2> Pu;
+ let isPredicated = isPred;
+ let isPredicatedFalse = isPredNot;
-let isBranch = 1, isIndirectBranch = 1 in
-defm JMPR : JMPR_base<"JMPr">, PredNewRel;
+ let IClass = 0b0101;
+ let Inst{27-25} = 0b000;
+ let Inst{24-23} = !if (isPred, 0b10, 0b01);
+ let Inst{22} = 0;
+ let Inst{21} = isPredNot;
+ let Inst{9-8} = !if (isPred, Pu, 0b00);
+ let Inst{20-16} = Rs;
-let isReturn = 1, isCodeGenOnly = 1 in
-defm JMPret : JMPR_base<"JMPret">, PredNewRel;
+ }
+
+let Defs = VolatileV3.Regs, isCodeGenOnly = 0 in {
+ def J2_callrt : JUMPR_MISC_CALLR<1, 0, (ins PredRegs:$Pu, IntRegs:$Rs)>;
+ def J2_callrf : JUMPR_MISC_CALLR<1, 1, (ins PredRegs:$Pu, IntRegs:$Rs)>;
}
-def : Pat<(retflag),
- (JMPret (i32 R31))>;
+let isTerminator = 1, hasSideEffects = 0, isCodeGenOnly = 0 in {
+ defm J2_jump : JMP_base<"JMP", "">, PredNewRel;
-def : Pat <(brcond (i1 PredRegs:$src1), bb:$offset),
- (JMP_t (i1 PredRegs:$src1), bb:$offset)>;
+ // Deal with explicit assembly
+ // - never extened a jump #, always extend a jump ##
+ let isAsmParserOnly = 1 in {
+ defm J2_jump_ext : JMP_base<"JMP", "##">;
+ defm J2_jump_noext : JMP_base<"JMP", "#">;
+ }
-// A return through builtin_eh_return.
-let isReturn = 1, isTerminator = 1, isBarrier = 1, neverHasSideEffects = 1,
-isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in
-def EH_RETURN_JMPR : T_JMPr;
+ defm J2_jumpr : JMPR_base<"JMPr">, PredNewRel;
+
+ let isReturn = 1, isCodeGenOnly = 1 in
+ defm JMPret : JMPR_base<"JMPret">, PredNewRel;
+}
-def : Pat<(eh_return),
- (EH_RETURN_JMPR (i32 R31))>;
+def: Pat<(br bb:$dst),
+ (J2_jump brtarget:$dst)>;
+def: Pat<(retflag),
+ (JMPret (i32 R31))>;
+def: Pat<(brcond (i1 PredRegs:$src1), bb:$offset),
+ (J2_jumpt PredRegs:$src1, bb:$offset)>;
-def : Pat<(HexagonBR_JT (i32 IntRegs:$dst)),
- (JMPR (i32 IntRegs:$dst))>;
+// A return through builtin_eh_return.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, hasSideEffects = 0,
+ isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in
+def EH_RETURN_JMPR : T_JMPr;
-def : Pat<(brind (i32 IntRegs:$dst)),
- (JMPR (i32 IntRegs:$dst))>;
+def: Pat<(eh_return),
+ (EH_RETURN_JMPR (i32 R31))>;
+def: Pat<(HexagonBR_JT (i32 IntRegs:$dst)),
+ (J2_jumpr IntRegs:$dst)>;
+def: Pat<(brind (i32 IntRegs:$dst)),
+ (J2_jumpr IntRegs:$dst)>;
//===----------------------------------------------------------------------===//
// JR -
@@ -900,265 +1451,502 @@ def : Pat<(brind (i32 IntRegs:$dst)),
//===----------------------------------------------------------------------===//
// LD +
//===----------------------------------------------------------------------===//
-///
-// Load -- MEMri operand
-multiclass LD_MEMri_Pbase<string mnemonic, RegisterClass RC,
- bit isNot, bit isPredNew> {
- let isPredicatedNew = isPredNew in
- def NAME : LDInst2<(outs RC:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#"$dst = "#mnemonic#"($addr)",
- []>;
-}
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, AddedComplexity = 20 in
+class T_load_io <string mnemonic, RegisterClass RC, bits<4> MajOp,
+ Operand ImmOp>
+ : LDInst<(outs RC:$dst), (ins IntRegs:$src1, ImmOp:$offset),
+ "$dst = "#mnemonic#"($src1 + #$offset)", []>, AddrModeRel {
+ bits<4> name;
+ bits<5> dst;
+ bits<5> src1;
+ bits<14> offset;
+ bits<11> offsetBits;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "s11_3Ext"), offset{13-3},
+ !if (!eq(ImmOpStr, "s11_2Ext"), offset{12-2},
+ !if (!eq(ImmOpStr, "s11_1Ext"), offset{11-1},
+ /* s11_0Ext */ offset{10-0})));
+ let opExtentBits = !if (!eq(ImmOpStr, "s11_3Ext"), 14,
+ !if (!eq(ImmOpStr, "s11_2Ext"), 13,
+ !if (!eq(ImmOpStr, "s11_1Ext"), 12,
+ /* s11_0Ext */ 11)));
+ let hasNewValue = !if (!eq(ImmOpStr, "s11_3Ext"), 0, 1);
+
+ let IClass = 0b1001;
+
+ let Inst{27} = 0b0;
+ let Inst{26-25} = offsetBits{10-9};
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{13-5} = offsetBits{8-0};
+ let Inst{4-0} = dst;
+ }
-multiclass LD_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
- let isPredicatedFalse = PredNot in {
- defm _c#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
- // Predicate new
- defm _cdn#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 1>;
+let opExtendable = 3, isExtentSigned = 0, isPredicated = 1 in
+class T_pload_io <string mnemonic, RegisterClass RC, bits<4>MajOp,
+ Operand ImmOp, bit isNot, bit isPredNew>
+ : LDInst<(outs RC:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
+ "if ("#!if(isNot, "!$src1", "$src1")
+ #!if(isPredNew, ".new", "")
+ #") $dst = "#mnemonic#"($src2 + #$offset)",
+ [],"", V2LDST_tc_ld_SLOT01> , AddrModeRel {
+ bits<5> dst;
+ bits<2> src1;
+ bits<5> src2;
+ bits<9> offset;
+ bits<6> offsetBits;
+ string ImmOpStr = !cast<string>(ImmOp);
+
+ let offsetBits = !if (!eq(ImmOpStr, "u6_3Ext"), offset{8-3},
+ !if (!eq(ImmOpStr, "u6_2Ext"), offset{7-2},
+ !if (!eq(ImmOpStr, "u6_1Ext"), offset{6-1},
+ /* u6_0Ext */ offset{5-0})));
+ let opExtentBits = !if (!eq(ImmOpStr, "u6_3Ext"), 9,
+ !if (!eq(ImmOpStr, "u6_2Ext"), 8,
+ !if (!eq(ImmOpStr, "u6_1Ext"), 7,
+ /* u6_0Ext */ 6)));
+ let hasNewValue = !if (!eq(ImmOpStr, "u6_3Ext"), 0, 1);
+ let isPredicatedNew = isPredNew;
+ let isPredicatedFalse = isNot;
+
+ let IClass = 0b0100;
+
+ let Inst{27} = 0b0;
+ let Inst{27} = 0b0;
+ let Inst{26} = isNot;
+ let Inst{25} = isPredNew;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src2;
+ let Inst{13} = 0b0;
+ let Inst{12-11} = src1;
+ let Inst{10-5} = offsetBits;
+ let Inst{4-0} = dst;
}
-}
-let isExtendable = 1, neverHasSideEffects = 1 in
-multiclass LD_MEMri<string mnemonic, string CextOp, RegisterClass RC,
- bits<5> ImmBits, bits<5> PredImmBits> {
+let isExtendable = 1, hasSideEffects = 0, addrMode = BaseImmOffset in
+multiclass LD_Idxd<string mnemonic, string CextOp, RegisterClass RC,
+ Operand ImmOp, Operand predImmOp, bits<4>MajOp> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+ let isPredicable = 1 in
+ def L2_#NAME#_io : T_load_io <mnemonic, RC, MajOp, ImmOp>;
- let CextOpcode = CextOp, BaseOpcode = CextOp in {
- let opExtendable = 2, isExtentSigned = 1, opExtentBits = ImmBits,
- isPredicable = 1 in
- def NAME : LDInst2<(outs RC:$dst), (ins MEMri:$addr),
- "$dst = "#mnemonic#"($addr)",
- []>;
-
- let opExtendable = 3, isExtentSigned = 0, opExtentBits = PredImmBits,
- isPredicated = 1 in {
- defm Pt : LD_MEMri_Pred<mnemonic, RC, 0 >;
- defm NotPt : LD_MEMri_Pred<mnemonic, RC, 1 >;
- }
+ // Predicated
+ def L2_p#NAME#t_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 0, 0>;
+ def L2_p#NAME#f_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 1, 0>;
+
+ // Predicated new
+ def L2_p#NAME#tnew_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 0, 1>;
+ def L2_p#NAME#fnew_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 1, 1>;
}
}
-let addrMode = BaseImmOffset, isMEMri = "true" in {
- let accessSize = ByteAccess in {
- defm LDrib: LD_MEMri < "memb", "LDrib", IntRegs, 11, 6>, AddrModeRel;
- defm LDriub: LD_MEMri < "memub" , "LDriub", IntRegs, 11, 6>, AddrModeRel;
- }
+let accessSize = ByteAccess, isCodeGenOnly = 0 in {
+ defm loadrb: LD_Idxd <"memb", "LDrib", IntRegs, s11_0Ext, u6_0Ext, 0b1000>;
+ defm loadrub: LD_Idxd <"memub", "LDriub", IntRegs, s11_0Ext, u6_0Ext, 0b1001>;
+}
- let accessSize = HalfWordAccess in {
- defm LDrih: LD_MEMri < "memh", "LDrih", IntRegs, 12, 7>, AddrModeRel;
- defm LDriuh: LD_MEMri < "memuh", "LDriuh", IntRegs, 12, 7>, AddrModeRel;
- }
+let accessSize = HalfWordAccess, opExtentAlign = 1, isCodeGenOnly = 0 in {
+ defm loadrh: LD_Idxd <"memh", "LDrih", IntRegs, s11_1Ext, u6_1Ext, 0b1010>;
+ defm loadruh: LD_Idxd <"memuh", "LDriuh", IntRegs, s11_1Ext, u6_1Ext, 0b1011>;
+}
- let accessSize = WordAccess in
- defm LDriw: LD_MEMri < "memw", "LDriw", IntRegs, 13, 8>, AddrModeRel;
+let accessSize = WordAccess, opExtentAlign = 2, isCodeGenOnly = 0 in
+defm loadri: LD_Idxd <"memw", "LDriw", IntRegs, s11_2Ext, u6_2Ext, 0b1100>;
- let accessSize = DoubleWordAccess in
- defm LDrid: LD_MEMri < "memd", "LDrid", DoubleRegs, 14, 9>, AddrModeRel;
-}
+let accessSize = DoubleWordAccess, opExtentAlign = 3, isCodeGenOnly = 0 in
+defm loadrd: LD_Idxd <"memd", "LDrid", DoubleRegs, s11_3Ext, u6_3Ext, 0b1110>;
def : Pat < (i32 (sextloadi8 ADDRriS11_0:$addr)),
- (LDrib ADDRriS11_0:$addr) >;
+ (L2_loadrb_io AddrFI:$addr, 0) >;
def : Pat < (i32 (zextloadi8 ADDRriS11_0:$addr)),
- (LDriub ADDRriS11_0:$addr) >;
+ (L2_loadrub_io AddrFI:$addr, 0) >;
def : Pat < (i32 (sextloadi16 ADDRriS11_1:$addr)),
- (LDrih ADDRriS11_1:$addr) >;
+ (L2_loadrh_io AddrFI:$addr, 0) >;
def : Pat < (i32 (zextloadi16 ADDRriS11_1:$addr)),
- (LDriuh ADDRriS11_1:$addr) >;
+ (L2_loadruh_io AddrFI:$addr, 0) >;
def : Pat < (i32 (load ADDRriS11_2:$addr)),
- (LDriw ADDRriS11_2:$addr) >;
+ (L2_loadri_io AddrFI:$addr, 0) >;
def : Pat < (i64 (load ADDRriS11_3:$addr)),
- (LDrid ADDRriS11_3:$addr) >;
-
-
-// Load - Base with Immediate offset addressing mode
-multiclass LD_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
- bit isNot, bit isPredNew> {
- let isPredicatedNew = isPredNew in
- def NAME : LDInst2<(outs RC:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#"$dst = "#mnemonic#"($src2+#$src3)",
- []>;
-}
-
-multiclass LD_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
- bit PredNot> {
- let isPredicatedFalse = PredNot in {
- defm _c#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
- // Predicate new
- defm _cdn#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>;
- }
-}
-
-let isExtendable = 1, neverHasSideEffects = 1 in
-multiclass LD_Idxd<string mnemonic, string CextOp, RegisterClass RC,
- Operand ImmOp, Operand predImmOp, bits<5> ImmBits,
- bits<5> PredImmBits> {
-
- let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
- let opExtendable = 2, isExtentSigned = 1, opExtentBits = ImmBits,
- isPredicable = 1, AddedComplexity = 20 in
- def NAME : LDInst2<(outs RC:$dst), (ins IntRegs:$src1, ImmOp:$offset),
- "$dst = "#mnemonic#"($src1+#$offset)",
- []>;
-
- let opExtendable = 3, isExtentSigned = 0, opExtentBits = PredImmBits,
- isPredicated = 1 in {
- defm Pt : LD_Idxd_Pred<mnemonic, RC, predImmOp, 0 >;
- defm NotPt : LD_Idxd_Pred<mnemonic, RC, predImmOp, 1 >;
- }
- }
-}
-
-let addrMode = BaseImmOffset in {
- let accessSize = ByteAccess in {
- defm LDrib_indexed: LD_Idxd <"memb", "LDrib", IntRegs, s11_0Ext, u6_0Ext,
- 11, 6>, AddrModeRel;
- defm LDriub_indexed: LD_Idxd <"memub" , "LDriub", IntRegs, s11_0Ext, u6_0Ext,
- 11, 6>, AddrModeRel;
- }
- let accessSize = HalfWordAccess in {
- defm LDrih_indexed: LD_Idxd <"memh", "LDrih", IntRegs, s11_1Ext, u6_1Ext,
- 12, 7>, AddrModeRel;
- defm LDriuh_indexed: LD_Idxd <"memuh", "LDriuh", IntRegs, s11_1Ext, u6_1Ext,
- 12, 7>, AddrModeRel;
- }
- let accessSize = WordAccess in
- defm LDriw_indexed: LD_Idxd <"memw", "LDriw", IntRegs, s11_2Ext, u6_2Ext,
- 13, 8>, AddrModeRel;
-
- let accessSize = DoubleWordAccess in
- defm LDrid_indexed: LD_Idxd <"memd", "LDrid", DoubleRegs, s11_3Ext, u6_3Ext,
- 14, 9>, AddrModeRel;
-}
+ (L2_loadrd_io AddrFI:$addr, 0) >;
let AddedComplexity = 20 in {
def : Pat < (i32 (sextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))),
- (LDrib_indexed IntRegs:$src1, s11_0ExtPred:$offset) >;
+ (L2_loadrb_io IntRegs:$src1, s11_0ExtPred:$offset) >;
def : Pat < (i32 (zextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))),
- (LDriub_indexed IntRegs:$src1, s11_0ExtPred:$offset) >;
+ (L2_loadrub_io IntRegs:$src1, s11_0ExtPred:$offset) >;
def : Pat < (i32 (sextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))),
- (LDrih_indexed IntRegs:$src1, s11_1ExtPred:$offset) >;
+ (L2_loadrh_io IntRegs:$src1, s11_1ExtPred:$offset) >;
def : Pat < (i32 (zextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))),
- (LDriuh_indexed IntRegs:$src1, s11_1ExtPred:$offset) >;
+ (L2_loadruh_io IntRegs:$src1, s11_1ExtPred:$offset) >;
def : Pat < (i32 (load (add IntRegs:$src1, s11_2ExtPred:$offset))),
- (LDriw_indexed IntRegs:$src1, s11_2ExtPred:$offset) >;
+ (L2_loadri_io IntRegs:$src1, s11_2ExtPred:$offset) >;
def : Pat < (i64 (load (add IntRegs:$src1, s11_3ExtPred:$offset))),
- (LDrid_indexed IntRegs:$src1, s11_3ExtPred:$offset) >;
+ (L2_loadrd_io IntRegs:$src1, s11_3ExtPred:$offset) >;
}
//===----------------------------------------------------------------------===//
// Post increment load
//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Template class for non-predicated post increment loads with immediate offset.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, addrMode = PostInc in
+class T_load_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<4> MajOp >
+ : LDInstPI <(outs RC:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, ImmOp:$offset),
+ "$dst = "#mnemonic#"($src1++#$offset)" ,
+ [],
+ "$src1 = $dst2" > ,
+ PredNewRel {
+ bits<5> dst;
+ bits<5> src1;
+ bits<7> offset;
+ bits<4> offsetBits;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
+ !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+ !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+ /* s4_0Imm */ offset{3-0})));
+ let hasNewValue = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1);
+
+ let IClass = 0b1001;
+
+ let Inst{27-25} = 0b101;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{13-12} = 0b00;
+ let Inst{8-5} = offsetBits;
+ let Inst{4-0} = dst;
+ }
-multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
- bit isNot, bit isPredNew> {
- let isPredicatedNew = isPredNew in
- def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#"$dst = "#mnemonic#"($src2++#$offset)",
- [],
- "$src2 = $dst2">;
-}
+//===----------------------------------------------------------------------===//
+// Template class for predicated post increment loads with immediate offset.
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc in
+class T_pload_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<4> MajOp, bit isPredNot, bit isPredNew >
+ : LDInst <(outs RC:$dst, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
+ !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#"$dst = "#mnemonic#"($src2++#$offset)",
+ [] ,
+ "$src2 = $dst2" > ,
+ PredNewRel {
+ bits<5> dst;
+ bits<2> src1;
+ bits<5> src2;
+ bits<7> offset;
+ bits<4> offsetBits;
-multiclass LD_PostInc_Pred<string mnemonic, RegisterClass RC,
- Operand ImmOp, bit PredNot> {
- let isPredicatedFalse = PredNot in {
- defm _c#NAME : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
- // Predicate new
- let Predicates = [HasV4T], validSubTargets = HasV4SubT in
- defm _cdn#NAME#_V4 : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 1>;
+ let isPredicatedNew = isPredNew;
+ let isPredicatedFalse = isPredNot;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
+ !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+ !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+ /* s4_0Imm */ offset{3-0})));
+ let hasNewValue = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1);
+
+ let IClass = 0b1001;
+
+ let Inst{27-25} = 0b101;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src2;
+ let Inst{13} = 0b1;
+ let Inst{12} = isPredNew;
+ let Inst{11} = isPredNot;
+ let Inst{10-9} = src1;
+ let Inst{8-5} = offsetBits;
+ let Inst{4-0} = dst;
}
-}
-multiclass LD_PostInc<string mnemonic, string BaseOp, RegisterClass RC,
- Operand ImmOp> {
+//===----------------------------------------------------------------------===//
+// Multiclass for post increment loads with immediate offset.
+//===----------------------------------------------------------------------===//
+multiclass LD_PostInc <string mnemonic, string BaseOp, RegisterClass RC,
+ Operand ImmOp, bits<4> MajOp> {
let BaseOpcode = "POST_"#BaseOp in {
let isPredicable = 1 in
- def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2),
- (ins IntRegs:$src1, ImmOp:$offset),
- "$dst = "#mnemonic#"($src1++#$offset)",
- [],
- "$src1 = $dst2">;
-
- let isPredicated = 1 in {
- defm Pt : LD_PostInc_Pred<mnemonic, RC, ImmOp, 0 >;
- defm NotPt : LD_PostInc_Pred<mnemonic, RC, ImmOp, 1 >;
- }
+ def L2_#NAME#_pi : T_load_pi < mnemonic, RC, ImmOp, MajOp>;
+
+ // Predicated
+ def L2_p#NAME#t_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 0, 0>;
+ def L2_p#NAME#f_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 1, 0>;
+
+ // Predicated new
+ def L2_p#NAME#tnew_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 0, 1>;
+ def L2_p#NAME#fnew_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 1, 1>;
}
}
-let hasCtrlDep = 1, neverHasSideEffects = 1, addrMode = PostInc in {
- defm POST_LDrib : LD_PostInc<"memb", "LDrib", IntRegs, s4_0Imm>,
- PredNewRel;
- defm POST_LDriub : LD_PostInc<"memub", "LDriub", IntRegs, s4_0Imm>,
- PredNewRel;
- defm POST_LDrih : LD_PostInc<"memh", "LDrih", IntRegs, s4_1Imm>,
- PredNewRel;
- defm POST_LDriuh : LD_PostInc<"memuh", "LDriuh", IntRegs, s4_1Imm>,
- PredNewRel;
- defm POST_LDriw : LD_PostInc<"memw", "LDriw", IntRegs, s4_2Imm>,
- PredNewRel;
- defm POST_LDrid : LD_PostInc<"memd", "LDrid", DoubleRegs, s4_3Imm>,
- PredNewRel;
+// post increment byte loads with immediate offset
+let accessSize = ByteAccess, isCodeGenOnly = 0 in {
+ defm loadrb : LD_PostInc <"memb", "LDrib", IntRegs, s4_0Imm, 0b1000>;
+ defm loadrub : LD_PostInc <"memub", "LDriub", IntRegs, s4_0Imm, 0b1001>;
}
+// post increment halfword loads with immediate offset
+let accessSize = HalfWordAccess, opExtentAlign = 1, isCodeGenOnly = 0 in {
+ defm loadrh : LD_PostInc <"memh", "LDrih", IntRegs, s4_1Imm, 0b1010>;
+ defm loadruh : LD_PostInc <"memuh", "LDriuh", IntRegs, s4_1Imm, 0b1011>;
+}
+
+// post increment word loads with immediate offset
+let accessSize = WordAccess, opExtentAlign = 2, isCodeGenOnly = 0 in
+defm loadri : LD_PostInc <"memw", "LDriw", IntRegs, s4_2Imm, 0b1100>;
+
+// post increment doubleword loads with immediate offset
+let accessSize = DoubleWordAccess, opExtentAlign = 3, isCodeGenOnly = 0 in
+defm loadrd : LD_PostInc <"memd", "LDrid", DoubleRegs, s4_3Imm, 0b1110>;
+
def : Pat< (i32 (extloadi1 ADDRriS11_0:$addr)),
- (i32 (LDrib ADDRriS11_0:$addr)) >;
+ (i32 (L2_loadrb_io AddrFI:$addr, 0)) >;
// Load byte any-extend.
def : Pat < (i32 (extloadi8 ADDRriS11_0:$addr)),
- (i32 (LDrib ADDRriS11_0:$addr)) >;
+ (i32 (L2_loadrb_io AddrFI:$addr, 0)) >;
// Indexed load byte any-extend.
let AddedComplexity = 20 in
def : Pat < (i32 (extloadi8 (add IntRegs:$src1, s11_0ImmPred:$offset))),
- (i32 (LDrib_indexed IntRegs:$src1, s11_0ImmPred:$offset)) >;
+ (i32 (L2_loadrb_io IntRegs:$src1, s11_0ImmPred:$offset)) >;
def : Pat < (i32 (extloadi16 ADDRriS11_1:$addr)),
- (i32 (LDrih ADDRriS11_1:$addr))>;
+ (i32 (L2_loadrh_io AddrFI:$addr, 0))>;
let AddedComplexity = 20 in
def : Pat < (i32 (extloadi16 (add IntRegs:$src1, s11_1ImmPred:$offset))),
- (i32 (LDrih_indexed IntRegs:$src1, s11_1ImmPred:$offset)) >;
+ (i32 (L2_loadrh_io IntRegs:$src1, s11_1ImmPred:$offset)) >;
let AddedComplexity = 10 in
def : Pat < (i32 (zextloadi1 ADDRriS11_0:$addr)),
- (i32 (LDriub ADDRriS11_0:$addr))>;
+ (i32 (L2_loadrub_io AddrFI:$addr, 0))>;
let AddedComplexity = 20 in
def : Pat < (i32 (zextloadi1 (add IntRegs:$src1, s11_0ImmPred:$offset))),
- (i32 (LDriub_indexed IntRegs:$src1, s11_0ImmPred:$offset))>;
+ (i32 (L2_loadrub_io IntRegs:$src1, s11_0ImmPred:$offset))>;
+
+//===----------------------------------------------------------------------===//
+// Template class for post increment loads with register offset.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, addrMode = PostInc in
+class T_load_pr <string mnemonic, RegisterClass RC, bits<4> MajOp,
+ MemAccessSize AccessSz>
+ : LDInstPI <(outs RC:$dst, IntRegs:$_dst_),
+ (ins IntRegs:$src1, ModRegs:$src2),
+ "$dst = "#mnemonic#"($src1++$src2)" ,
+ [], "$src1 = $_dst_" > {
+ bits<5> dst;
+ bits<5> src1;
+ bits<1> src2;
+
+ let accessSize = AccessSz;
+ let IClass = 0b1001;
+
+ let Inst{27-25} = 0b110;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{13} = src2;
+ let Inst{12} = 0b0;
+ let Inst{7} = 0b0;
+ let Inst{4-0} = dst;
+ }
+
+let hasNewValue = 1, isCodeGenOnly = 0 in {
+ def L2_loadrb_pr : T_load_pr <"memb", IntRegs, 0b1000, ByteAccess>;
+ def L2_loadrub_pr : T_load_pr <"memub", IntRegs, 0b1001, ByteAccess>;
+ def L2_loadrh_pr : T_load_pr <"memh", IntRegs, 0b1010, HalfWordAccess>;
+ def L2_loadruh_pr : T_load_pr <"memuh", IntRegs, 0b1011, HalfWordAccess>;
+ def L2_loadri_pr : T_load_pr <"memw", IntRegs, 0b1100, WordAccess>;
+}
+
+let isCodeGenOnly = 0 in
+def L2_loadrd_pr : T_load_pr <"memd", DoubleRegs, 0b1110, DoubleWordAccess>;
// Load predicate.
let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
-isPseudo = 1, Defs = [R10,R11,D5], neverHasSideEffects = 1 in
+isPseudo = 1, Defs = [R10,R11,D5], hasSideEffects = 0 in
def LDriw_pred : LDInst2<(outs PredRegs:$dst),
(ins MEMri:$addr),
"Error; should not emit",
[]>;
-// Deallocate stack frame.
-let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in {
- def DEALLOCFRAME : LDInst2<(outs), (ins),
+let Defs = [R29, R30, R31], Uses = [R30], hasSideEffects = 0, isCodeGenOnly = 0 in
+ def L2_deallocframe : LDInst<(outs), (ins),
"deallocframe",
- []>;
+ []> {
+ let IClass = 0b1001;
+
+ let Inst{27-16} = 0b000000011110;
+ let Inst{13} = 0b0;
+ let Inst{4-0} = 0b11110;
+}
+
+// Load / Post increment circular addressing mode.
+let Uses = [CS], hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class T_load_pcr<string mnemonic, RegisterClass RC, bits<4> MajOp>
+ : LDInst <(outs RC:$dst, IntRegs:$_dst_),
+ (ins IntRegs:$Rz, ModRegs:$Mu),
+ "$dst = "#mnemonic#"($Rz ++ I:circ($Mu))", [],
+ "$Rz = $_dst_" > {
+ bits<5> dst;
+ bits<5> Rz;
+ bit Mu;
+
+ let IClass = 0b1001;
+
+ let Inst{27-25} = 0b100;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{12} = 0b0;
+ let Inst{9} = 0b1;
+ let Inst{7} = 0b0;
+ let Inst{4-0} = dst;
+ }
+
+let accessSize = ByteAccess, isCodeGenOnly = 0 in {
+ def L2_loadrb_pcr : T_load_pcr <"memb", IntRegs, 0b1000>;
+ def L2_loadrub_pcr : T_load_pcr <"memub", IntRegs, 0b1001>;
+}
+
+let accessSize = HalfWordAccess, isCodeGenOnly = 0 in {
+ def L2_loadrh_pcr : T_load_pcr <"memh", IntRegs, 0b1010>;
+ def L2_loadruh_pcr : T_load_pcr <"memuh", IntRegs, 0b1011>;
+}
+
+let accessSize = WordAccess, isCodeGenOnly = 0 in {
+ def L2_loadri_pcr : T_load_pcr <"memw", IntRegs, 0b1100>;
}
-// Load and unpack bytes to halfwords.
+let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+def L2_loadrd_pcr : T_load_pcr <"memd", DoubleRegs, 0b1110>;
+
+//===----------------------------------------------------------------------===//
+// Circular loads with immediate offset.
+//===----------------------------------------------------------------------===//
+let Uses = [CS], mayLoad = 1, hasSideEffects = 0, hasNewValue = 1 in
+class T_load_pci <string mnemonic, RegisterClass RC,
+ Operand ImmOp, bits<4> MajOp>
+ : LDInstPI<(outs RC:$dst, IntRegs:$_dst_),
+ (ins IntRegs:$Rz, ImmOp:$offset, ModRegs:$Mu),
+ "$dst = "#mnemonic#"($Rz ++ #$offset:circ($Mu))", [],
+ "$Rz = $_dst_"> {
+ bits<5> dst;
+ bits<5> Rz;
+ bits<1> Mu;
+ bits<7> offset;
+ bits<4> offsetBits;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
+ !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+ !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+ /* s4_0Imm */ offset{3-0})));
+ let IClass = 0b1001;
+ let Inst{27-25} = 0b100;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{12} = 0b0;
+ let Inst{9} = 0b0;
+ let Inst{8-5} = offsetBits;
+ let Inst{4-0} = dst;
+ }
+
+// Byte variants of circ load
+let accessSize = ByteAccess, isCodeGenOnly = 0 in {
+ def L2_loadrb_pci : T_load_pci <"memb", IntRegs, s4_0Imm, 0b1000>;
+ def L2_loadrub_pci : T_load_pci <"memub", IntRegs, s4_0Imm, 0b1001>;
+}
+
+// Half word variants of circ load
+let accessSize = HalfWordAccess, isCodeGenOnly = 0 in {
+ def L2_loadrh_pci : T_load_pci <"memh", IntRegs, s4_1Imm, 0b1010>;
+ def L2_loadruh_pci : T_load_pci <"memuh", IntRegs, s4_1Imm, 0b1011>;
+}
+
+// Word variants of circ load
+let accessSize = WordAccess, isCodeGenOnly = 0 in
+def L2_loadri_pci : T_load_pci <"memw", IntRegs, s4_2Imm, 0b1100>;
+
+let accessSize = DoubleWordAccess, hasNewValue = 0, isCodeGenOnly = 0 in
+def L2_loadrd_pci : T_load_pci <"memd", DoubleRegs, s4_3Imm, 0b1110>;
+
+// L[24]_load[wd]_locked: Load word/double with lock.
+let isSoloAX = 1 in
+class T_load_locked <string mnemonic, RegisterClass RC>
+ : LD0Inst <(outs RC:$dst),
+ (ins IntRegs:$src),
+ "$dst = "#mnemonic#"($src)"> {
+ bits<5> dst;
+ bits<5> src;
+ let IClass = 0b1001;
+ let Inst{27-21} = 0b0010000;
+ let Inst{20-16} = src;
+ let Inst{13-12} = !if (!eq(mnemonic, "memd_locked"), 0b01, 0b00);
+ let Inst{4-0} = dst;
+}
+let hasNewValue = 1, accessSize = WordAccess, opNewValue = 0, isCodeGenOnly = 0 in
+ def L2_loadw_locked : T_load_locked <"memw_locked", IntRegs>;
+let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+ def L4_loadd_locked : T_load_locked <"memd_locked", DoubleRegs>;
+//===----------------------------------------------------------------------===//
+// Bit-reversed loads with auto-increment register
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_load_pbr<string mnemonic, RegisterClass RC,
+ MemAccessSize addrSize, bits<4> majOp>
+ : LDInst
+ <(outs RC:$dst, IntRegs:$_dst_),
+ (ins IntRegs:$Rz, ModRegs:$Mu),
+ "$dst = "#mnemonic#"($Rz ++ $Mu:brev)" ,
+ [] , "$Rz = $_dst_" > {
+
+ let accessSize = addrSize;
+
+ bits<5> dst;
+ bits<5> Rz;
+ bits<1> Mu;
+
+ let IClass = 0b1001;
+
+ let Inst{27-25} = 0b111;
+ let Inst{24-21} = majOp;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{12} = 0b0;
+ let Inst{7} = 0b0;
+ let Inst{4-0} = dst;
+ }
+
+let hasNewValue =1, opNewValue = 0, isCodeGenOnly = 0 in {
+ def L2_loadrb_pbr : T_load_pbr <"memb", IntRegs, ByteAccess, 0b1000>;
+ def L2_loadrub_pbr : T_load_pbr <"memub", IntRegs, ByteAccess, 0b1001>;
+ def L2_loadrh_pbr : T_load_pbr <"memh", IntRegs, HalfWordAccess, 0b1010>;
+ def L2_loadruh_pbr : T_load_pbr <"memuh", IntRegs, HalfWordAccess, 0b1011>;
+ def L2_loadri_pbr : T_load_pbr <"memw", IntRegs, WordAccess, 0b1100>;
+}
+
+let isCodeGenOnly = 0 in
+def L2_loadrd_pbr : T_load_pbr <"memd", DoubleRegs, DoubleWordAccess, 0b1110>;
+
//===----------------------------------------------------------------------===//
// LD -
//===----------------------------------------------------------------------===//
@@ -1180,180 +1968,663 @@ let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in {
//===----------------------------------------------------------------------===//
// MTYPE/MPYH +
//===----------------------------------------------------------------------===//
-// Multiply and use lower result.
-// Rd=+mpyi(Rs,#u8)
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 8 in
-def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Ext:$src2),
- "$dst =+ mpyi($src1, #$src2)",
- [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
- u8ExtPred:$src2))]>;
-// Rd=-mpyi(Rs,#u8)
-def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
- "$dst =- mpyi($src1, #$src2)",
- [(set (i32 IntRegs:$dst), (ineg (mul (i32 IntRegs:$src1),
- u8ImmPred:$src2)))]>;
+//===----------------------------------------------------------------------===//
+// Template Class
+// MPYS / Multipy signed/unsigned halfwords
+//Rd=mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat]
+//===----------------------------------------------------------------------===//
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_M2_mpy < bits<2> LHbits, bit isSat, bit isRnd,
+ bit hasShift, bit isUnsigned>
+ : MInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = "#!if(isUnsigned,"mpyu","mpy")#"($Rs."#!if(LHbits{1},"h","l")
+ #", $Rt."#!if(LHbits{0},"h)","l)")
+ #!if(hasShift,":<<1","")
+ #!if(isRnd,":rnd","")
+ #!if(isSat,":sat",""),
+ [], "", M_tc_3x_SLOT23 > {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1100;
+ let Inst{23} = hasShift;
+ let Inst{22} = isUnsigned;
+ let Inst{21} = isRnd;
+ let Inst{7} = isSat;
+ let Inst{6-5} = LHbits;
+ let Inst{4-0} = Rd;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ }
+
+//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1]
+let isCodeGenOnly = 0 in {
+def M2_mpy_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 0>;
+def M2_mpy_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 0>;
+def M2_mpy_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 0>;
+def M2_mpy_lh_s0: T_M2_mpy<0b01, 0, 0, 0, 0>;
+def M2_mpy_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 0>;
+def M2_mpy_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 0>;
+def M2_mpy_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 0>;
+def M2_mpy_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 0>;
+}
+
+//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
+let isCodeGenOnly = 0 in {
+def M2_mpyu_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 1>;
+def M2_mpyu_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 1>;
+def M2_mpyu_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 1>;
+def M2_mpyu_lh_s0: T_M2_mpy<0b01, 0, 0, 0, 1>;
+def M2_mpyu_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 1>;
+def M2_mpyu_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 1>;
+def M2_mpyu_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 1>;
+def M2_mpyu_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 1>;
+}
+
+//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1]:rnd
+let isCodeGenOnly = 0 in {
+def M2_mpy_rnd_ll_s1: T_M2_mpy <0b00, 0, 1, 1, 0>;
+def M2_mpy_rnd_ll_s0: T_M2_mpy <0b00, 0, 1, 0, 0>;
+def M2_mpy_rnd_lh_s1: T_M2_mpy <0b01, 0, 1, 1, 0>;
+def M2_mpy_rnd_lh_s0: T_M2_mpy <0b01, 0, 1, 0, 0>;
+def M2_mpy_rnd_hl_s1: T_M2_mpy <0b10, 0, 1, 1, 0>;
+def M2_mpy_rnd_hl_s0: T_M2_mpy <0b10, 0, 1, 0, 0>;
+def M2_mpy_rnd_hh_s1: T_M2_mpy <0b11, 0, 1, 1, 0>;
+def M2_mpy_rnd_hh_s0: T_M2_mpy <0b11, 0, 1, 0, 0>;
+}
+
+//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:sat]
+//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat]
+let Defs = [USR_OVF], isCodeGenOnly = 0 in {
+ def M2_mpy_sat_ll_s1: T_M2_mpy <0b00, 1, 0, 1, 0>;
+ def M2_mpy_sat_ll_s0: T_M2_mpy <0b00, 1, 0, 0, 0>;
+ def M2_mpy_sat_lh_s1: T_M2_mpy <0b01, 1, 0, 1, 0>;
+ def M2_mpy_sat_lh_s0: T_M2_mpy <0b01, 1, 0, 0, 0>;
+ def M2_mpy_sat_hl_s1: T_M2_mpy <0b10, 1, 0, 1, 0>;
+ def M2_mpy_sat_hl_s0: T_M2_mpy <0b10, 1, 0, 0, 0>;
+ def M2_mpy_sat_hh_s1: T_M2_mpy <0b11, 1, 0, 1, 0>;
+ def M2_mpy_sat_hh_s0: T_M2_mpy <0b11, 1, 0, 0, 0>;
+
+ def M2_mpy_sat_rnd_ll_s1: T_M2_mpy <0b00, 1, 1, 1, 0>;
+ def M2_mpy_sat_rnd_ll_s0: T_M2_mpy <0b00, 1, 1, 0, 0>;
+ def M2_mpy_sat_rnd_lh_s1: T_M2_mpy <0b01, 1, 1, 1, 0>;
+ def M2_mpy_sat_rnd_lh_s0: T_M2_mpy <0b01, 1, 1, 0, 0>;
+ def M2_mpy_sat_rnd_hl_s1: T_M2_mpy <0b10, 1, 1, 1, 0>;
+ def M2_mpy_sat_rnd_hl_s0: T_M2_mpy <0b10, 1, 1, 0, 0>;
+ def M2_mpy_sat_rnd_hh_s1: T_M2_mpy <0b11, 1, 1, 1, 0>;
+ def M2_mpy_sat_rnd_hh_s0: T_M2_mpy <0b11, 1, 1, 0, 0>;
+}
+
+//===----------------------------------------------------------------------===//
+// Template Class
+// MPYS / Multipy signed/unsigned halfwords and add/subtract the
+// result from the accumulator.
+//Rx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
+//===----------------------------------------------------------------------===//
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_M2_mpy_acc < bits<2> LHbits, bit isSat, bit isNac,
+ bit hasShift, bit isUnsigned >
+ : MInst_acc<(outs IntRegs:$Rx), (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
+ "$Rx "#!if(isNac,"-= ","+= ")#!if(isUnsigned,"mpyu","mpy")
+ #"($Rs."#!if(LHbits{1},"h","l")
+ #", $Rt."#!if(LHbits{0},"h)","l)")
+ #!if(hasShift,":<<1","")
+ #!if(isSat,":sat",""),
+ [], "$dst2 = $Rx", M_tc_3x_SLOT23 > {
+ bits<5> Rx;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1110;
+ let Inst{27-24} = 0b1110;
+ let Inst{23} = hasShift;
+ let Inst{22} = isUnsigned;
+ let Inst{21} = isNac;
+ let Inst{7} = isSat;
+ let Inst{6-5} = LHbits;
+ let Inst{4-0} = Rx;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ }
+
+//Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1]
+let isCodeGenOnly = 0 in {
+def M2_mpy_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 0>;
+def M2_mpy_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 0>;
+def M2_mpy_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 0>;
+def M2_mpy_acc_lh_s0: T_M2_mpy_acc <0b01, 0, 0, 0, 0>;
+def M2_mpy_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 0>;
+def M2_mpy_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 0>;
+def M2_mpy_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 0>;
+def M2_mpy_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 0>;
+}
+
+//Rx += mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
+let isCodeGenOnly = 0 in {
+def M2_mpyu_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 1>;
+def M2_mpyu_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 1>;
+def M2_mpyu_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 1>;
+def M2_mpyu_acc_lh_s0: T_M2_mpy_acc <0b01, 0, 0, 0, 1>;
+def M2_mpyu_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 1>;
+def M2_mpyu_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 1>;
+def M2_mpyu_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 1>;
+def M2_mpyu_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 1>;
+}
+
+//Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1]
+let isCodeGenOnly = 0 in {
+def M2_mpy_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 0>;
+def M2_mpy_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 0>;
+def M2_mpy_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 0>;
+def M2_mpy_nac_lh_s0: T_M2_mpy_acc <0b01, 0, 1, 0, 0>;
+def M2_mpy_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 0>;
+def M2_mpy_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 0>;
+def M2_mpy_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 0>;
+def M2_mpy_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 0>;
+}
+
+//Rx -= mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
+let isCodeGenOnly = 0 in {
+def M2_mpyu_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 1>;
+def M2_mpyu_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 1>;
+def M2_mpyu_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 1>;
+def M2_mpyu_nac_lh_s0: T_M2_mpy_acc <0b01, 0, 1, 0, 1>;
+def M2_mpyu_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 1>;
+def M2_mpyu_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 1>;
+def M2_mpyu_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 1>;
+def M2_mpyu_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 1>;
+}
+
+//Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat
+let isCodeGenOnly = 0 in {
+def M2_mpy_acc_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 0, 1, 0>;
+def M2_mpy_acc_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 0, 0, 0>;
+def M2_mpy_acc_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 0, 1, 0>;
+def M2_mpy_acc_sat_lh_s0: T_M2_mpy_acc <0b01, 1, 0, 0, 0>;
+def M2_mpy_acc_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 0, 1, 0>;
+def M2_mpy_acc_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 0, 0, 0>;
+def M2_mpy_acc_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 0, 1, 0>;
+def M2_mpy_acc_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 0, 0, 0>;
+}
+
+//Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat
+let isCodeGenOnly = 0 in {
+def M2_mpy_nac_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 1, 1, 0>;
+def M2_mpy_nac_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 1, 0, 0>;
+def M2_mpy_nac_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 1, 1, 0>;
+def M2_mpy_nac_sat_lh_s0: T_M2_mpy_acc <0b01, 1, 1, 0, 0>;
+def M2_mpy_nac_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 1, 1, 0>;
+def M2_mpy_nac_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 1, 0, 0>;
+def M2_mpy_nac_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 1, 1, 0>;
+def M2_mpy_nac_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 1, 0, 0>;
+}
+
+//===----------------------------------------------------------------------===//
+// Template Class
+// MPYS / Multipy signed/unsigned halfwords and add/subtract the
+// result from the 64-bit destination register.
+//Rxx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
+//===----------------------------------------------------------------------===//
+
+class T_M2_mpyd_acc < bits<2> LHbits, bit isNac, bit hasShift, bit isUnsigned>
+ : MInst_acc<(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
+ "$Rxx "#!if(isNac,"-= ","+= ")#!if(isUnsigned,"mpyu","mpy")
+ #"($Rs."#!if(LHbits{1},"h","l")
+ #", $Rt."#!if(LHbits{0},"h)","l)")
+ #!if(hasShift,":<<1",""),
+ [], "$dst2 = $Rxx", M_tc_3x_SLOT23 > {
+ bits<5> Rxx;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b0110;
+ let Inst{23} = hasShift;
+ let Inst{22} = isUnsigned;
+ let Inst{21} = isNac;
+ let Inst{7} = 0;
+ let Inst{6-5} = LHbits;
+ let Inst{4-0} = Rxx;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ }
+
+let isCodeGenOnly = 0 in {
+def M2_mpyd_acc_hh_s0: T_M2_mpyd_acc <0b11, 0, 0, 0>;
+def M2_mpyd_acc_hl_s0: T_M2_mpyd_acc <0b10, 0, 0, 0>;
+def M2_mpyd_acc_lh_s0: T_M2_mpyd_acc <0b01, 0, 0, 0>;
+def M2_mpyd_acc_ll_s0: T_M2_mpyd_acc <0b00, 0, 0, 0>;
+
+def M2_mpyd_acc_hh_s1: T_M2_mpyd_acc <0b11, 0, 1, 0>;
+def M2_mpyd_acc_hl_s1: T_M2_mpyd_acc <0b10, 0, 1, 0>;
+def M2_mpyd_acc_lh_s1: T_M2_mpyd_acc <0b01, 0, 1, 0>;
+def M2_mpyd_acc_ll_s1: T_M2_mpyd_acc <0b00, 0, 1, 0>;
+
+def M2_mpyd_nac_hh_s0: T_M2_mpyd_acc <0b11, 1, 0, 0>;
+def M2_mpyd_nac_hl_s0: T_M2_mpyd_acc <0b10, 1, 0, 0>;
+def M2_mpyd_nac_lh_s0: T_M2_mpyd_acc <0b01, 1, 0, 0>;
+def M2_mpyd_nac_ll_s0: T_M2_mpyd_acc <0b00, 1, 0, 0>;
+
+def M2_mpyd_nac_hh_s1: T_M2_mpyd_acc <0b11, 1, 1, 0>;
+def M2_mpyd_nac_hl_s1: T_M2_mpyd_acc <0b10, 1, 1, 0>;
+def M2_mpyd_nac_lh_s1: T_M2_mpyd_acc <0b01, 1, 1, 0>;
+def M2_mpyd_nac_ll_s1: T_M2_mpyd_acc <0b00, 1, 1, 0>;
+
+def M2_mpyud_acc_hh_s0: T_M2_mpyd_acc <0b11, 0, 0, 1>;
+def M2_mpyud_acc_hl_s0: T_M2_mpyd_acc <0b10, 0, 0, 1>;
+def M2_mpyud_acc_lh_s0: T_M2_mpyd_acc <0b01, 0, 0, 1>;
+def M2_mpyud_acc_ll_s0: T_M2_mpyd_acc <0b00, 0, 0, 1>;
+
+def M2_mpyud_acc_hh_s1: T_M2_mpyd_acc <0b11, 0, 1, 1>;
+def M2_mpyud_acc_hl_s1: T_M2_mpyd_acc <0b10, 0, 1, 1>;
+def M2_mpyud_acc_lh_s1: T_M2_mpyd_acc <0b01, 0, 1, 1>;
+def M2_mpyud_acc_ll_s1: T_M2_mpyd_acc <0b00, 0, 1, 1>;
+
+def M2_mpyud_nac_hh_s0: T_M2_mpyd_acc <0b11, 1, 0, 1>;
+def M2_mpyud_nac_hl_s0: T_M2_mpyd_acc <0b10, 1, 0, 1>;
+def M2_mpyud_nac_lh_s0: T_M2_mpyd_acc <0b01, 1, 0, 1>;
+def M2_mpyud_nac_ll_s0: T_M2_mpyd_acc <0b00, 1, 0, 1>;
+
+def M2_mpyud_nac_hh_s1: T_M2_mpyd_acc <0b11, 1, 1, 1>;
+def M2_mpyud_nac_hl_s1: T_M2_mpyd_acc <0b10, 1, 1, 1>;
+def M2_mpyud_nac_lh_s1: T_M2_mpyd_acc <0b01, 1, 1, 1>;
+def M2_mpyud_nac_ll_s1: T_M2_mpyd_acc <0b00, 1, 1, 1>;
+}
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_MType_mpy <string mnemonic, bits<4> RegTyBits, RegisterClass RC,
+ bits<3> MajOp, bits<3> MinOp, bit isSat = 0, bit isRnd = 0,
+ string op2Suffix = "", bit isRaw = 0, bit isHi = 0 >
+ : MInst <(outs IntRegs:$dst), (ins RC:$src1, RC:$src2),
+ "$dst = "#mnemonic
+ #"($src1, $src2"#op2Suffix#")"
+ #!if(MajOp{2}, ":<<1", "")
+ #!if(isRnd, ":rnd", "")
+ #!if(isSat, ":sat", "")
+ #!if(isRaw, !if(isHi, ":raw:hi", ":raw:lo"), ""), [] > {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = RegTyBits;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = src2;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = dst;
+ }
+
+class T_MType_dd <string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit isSat = 0, bit isRnd = 0 >
+ : T_MType_mpy <mnemonic, 0b1001, DoubleRegs, MajOp, MinOp, isSat, isRnd>;
+
+class T_MType_rr1 <string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit isSat = 0, bit isRnd = 0 >
+ : T_MType_mpy<mnemonic, 0b1101, IntRegs, MajOp, MinOp, isSat, isRnd>;
+
+class T_MType_rr2 <string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit isSat = 0, bit isRnd = 0, string op2str = "" >
+ : T_MType_mpy<mnemonic, 0b1101, IntRegs, MajOp, MinOp, isSat, isRnd, op2str>;
+
+let CextOpcode = "mpyi", InputType = "reg", isCodeGenOnly = 0 in
+def M2_mpyi : T_MType_rr1 <"mpyi", 0b000, 0b000>, ImmRegRel;
+
+let isCodeGenOnly = 0 in {
+def M2_mpy_up : T_MType_rr1 <"mpy", 0b000, 0b001>;
+def M2_mpyu_up : T_MType_rr1 <"mpyu", 0b010, 0b001>;
+}
+
+let isCodeGenOnly = 0 in
+def M2_dpmpyss_rnd_s0 : T_MType_rr1 <"mpy", 0b001, 0b001, 0, 1>;
+
+let isCodeGenOnly = 0 in {
+def M2_hmmpyh_rs1 : T_MType_rr2 <"mpy", 0b101, 0b100, 1, 1, ".h">;
+def M2_hmmpyl_rs1 : T_MType_rr2 <"mpy", 0b111, 0b100, 1, 1, ".l">;
+}
+
+// V4 Instructions
+let isCodeGenOnly = 0 in {
+def M2_mpysu_up : T_MType_rr1 <"mpysu", 0b011, 0b001, 0>;
+def M2_mpy_up_s1_sat : T_MType_rr1 <"mpy", 0b111, 0b000, 1>;
+
+def M2_hmmpyh_s1 : T_MType_rr2 <"mpy", 0b101, 0b000, 1, 0, ".h">;
+def M2_hmmpyl_s1 : T_MType_rr2 <"mpy", 0b101, 0b001, 1, 0, ".l">;
+}
+
+def: Pat<(i32 (mul I32:$src1, I32:$src2)), (M2_mpyi I32:$src1, I32:$src2)>;
+def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up I32:$src1, I32:$src2)>;
+def: Pat<(i32 (mulhu I32:$src1, I32:$src2)), (M2_mpyu_up I32:$src1, I32:$src2)>;
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_MType_mpy_ri <bit isNeg, Operand ImmOp, list<dag> pattern>
+ : MInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, ImmOp:$u8),
+ "$Rd ="#!if(isNeg, "- ", "+ ")#"mpyi($Rs, #$u8)" ,
+ pattern, "", M_tc_3x_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<8> u8;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b0000;
+ let Inst{23} = isNeg;
+ let Inst{13} = 0b0;
+ let Inst{4-0} = Rd;
+ let Inst{20-16} = Rs;
+ let Inst{12-5} = u8;
+ }
+
+let isExtendable = 1, opExtentBits = 8, opExtendable = 2, isCodeGenOnly = 0 in
+def M2_mpysip : T_MType_mpy_ri <0, u8Ext,
+ [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u8ExtPred:$u8))]>;
+
+let isCodeGenOnly = 0 in
+def M2_mpysin : T_MType_mpy_ri <1, u8Imm,
+ [(set (i32 IntRegs:$Rd), (ineg (mul IntRegs:$Rs,
+ u8ImmPred:$u8)))]>;
+
+// Assember mapped to M2_mpyi
+let isAsmParserOnly = 1 in
+def M2_mpyui : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpyui($src1, $src2)">;
// Rd=mpyi(Rs,#m9)
// s9 is NOT the same as m9 - but it works.. so far.
-// Assembler maps to either Rd=+mpyi(Rs,#u8 or Rd=-mpyi(Rs,#u8)
+// Assembler maps to either Rd=+mpyi(Rs,#u8) or Rd=-mpyi(Rs,#u8)
// depending on the value of m9. See Arch Spec.
let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9,
-CextOpcode = "MPYI", InputType = "imm" in
-def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2),
- "$dst = mpyi($src1, #$src2)",
- [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
- s9ExtPred:$src2))]>, ImmRegRel;
-
-// Rd=mpyi(Rs,Rt)
-let CextOpcode = "MPYI", InputType = "reg" in
-def MPYI : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = mpyi($src1, $src2)",
- [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))]>, ImmRegRel;
-
-// Rx+=mpyi(Rs,#u8)
-let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 8,
-CextOpcode = "MPYI_acc", InputType = "imm" in
-def MPYI_acc_ri : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u8Ext:$src3),
- "$dst += mpyi($src2, #$src3)",
- [(set (i32 IntRegs:$dst),
- (add (mul (i32 IntRegs:$src2), u8ExtPred:$src3),
- (i32 IntRegs:$src1)))],
- "$src1 = $dst">, ImmRegRel;
+ CextOpcode = "mpyi", InputType = "imm", hasNewValue = 1 in
+def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2),
+ "$dst = mpyi($src1, #$src2)",
+ [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
+ s9ExtPred:$src2))]>, ImmRegRel;
+
+let hasNewValue = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 3,
+ InputType = "imm" in
+class T_MType_acc_ri <string mnemonic, bits<3> MajOp, Operand ImmOp,
+ list<dag> pattern = []>
+ : MInst < (outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, ImmOp:$src3),
+ "$dst "#mnemonic#"($src2, #$src3)",
+ pattern, "$src1 = $dst", M_tc_2_SLOT23> {
+ bits<5> dst;
+ bits<5> src2;
+ bits<8> src3;
+
+ let IClass = 0b1110;
-// Rx+=mpyi(Rs,Rt)
-let CextOpcode = "MPYI_acc", InputType = "reg" in
-def MPYI_acc_rr : MInst_acc<(outs IntRegs:$dst),
+ let Inst{27-26} = 0b00;
+ let Inst{25-23} = MajOp;
+ let Inst{20-16} = src2;
+ let Inst{13} = 0b0;
+ let Inst{12-5} = src3;
+ let Inst{4-0} = dst;
+ }
+
+let InputType = "reg", hasNewValue = 1 in
+class T_MType_acc_rr <string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit isSwap = 0, list<dag> pattern = [], bit hasNot = 0,
+ bit isSat = 0, bit isShift = 0>
+ : MInst < (outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "$dst += mpyi($src2, $src3)",
- [(set (i32 IntRegs:$dst),
- (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
- (i32 IntRegs:$src1)))],
- "$src1 = $dst">, ImmRegRel;
-
-// Rx-=mpyi(Rs,#u8)
-let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 8 in
-def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u8Ext:$src3),
- "$dst -= mpyi($src2, #$src3)",
- [(set (i32 IntRegs:$dst),
- (sub (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
- u8ExtPred:$src3)))],
- "$src1 = $dst">;
-
-// Multiply and use upper result.
-// Rd=mpy(Rs,Rt.H):<<1:rnd:sat
-// Rd=mpy(Rs,Rt.L):<<1:rnd:sat
-// Rd=mpy(Rs,Rt)
-def MPY : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = mpy($src1, $src2)",
- [(set (i32 IntRegs:$dst), (mulhs (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))]>;
-
-// Rd=mpy(Rs,Rt):rnd
-// Rd=mpyu(Rs,Rt)
-def MPYU : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = mpyu($src1, $src2)",
- [(set (i32 IntRegs:$dst), (mulhu (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))]>;
-
-// Multiply and use full result.
-// Rdd=mpyu(Rs,Rt)
-def MPYU64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = mpyu($src1, $src2)",
- [(set (i64 DoubleRegs:$dst),
- (mul (i64 (anyext (i32 IntRegs:$src1))),
- (i64 (anyext (i32 IntRegs:$src2)))))]>;
-
-// Rdd=mpy(Rs,Rt)
-def MPY64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = mpy($src1, $src2)",
- [(set (i64 DoubleRegs:$dst),
- (mul (i64 (sext (i32 IntRegs:$src1))),
- (i64 (sext (i32 IntRegs:$src2)))))]>;
+ "$dst "#mnemonic#"($src2, "#!if(hasNot, "~$src3)","$src3)")
+ #!if(isShift, ":<<1", "")
+ #!if(isSat, ":sat", ""),
+ pattern, "$src1 = $dst", M_tc_2_SLOT23 > {
+ bits<5> dst;
+ bits<5> src2;
+ bits<5> src3;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1111;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = !if(isSwap, src3, src2);
+ let Inst{13} = 0b0;
+ let Inst{12-8} = !if(isSwap, src2, src3);
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = dst;
+ }
+
+let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23, isCodeGenOnly = 0 in {
+ def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8Ext,
+ [(set (i32 IntRegs:$dst),
+ (add (mul IntRegs:$src2, u8ExtPred:$src3),
+ IntRegs:$src1))]>, ImmRegRel;
+
+ def M2_maci : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0,
+ [(set (i32 IntRegs:$dst),
+ (add (mul IntRegs:$src2, IntRegs:$src3),
+ IntRegs:$src1))]>, ImmRegRel;
+}
+
+let CextOpcode = "ADD_acc", isCodeGenOnly = 0 in {
+ let isExtentSigned = 1 in
+ def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8Ext,
+ [(set (i32 IntRegs:$dst),
+ (add (add (i32 IntRegs:$src2), s8_16ExtPred:$src3),
+ (i32 IntRegs:$src1)))]>, ImmRegRel;
+
+ def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0,
+ [(set (i32 IntRegs:$dst),
+ (add (add (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
+ (i32 IntRegs:$src1)))]>, ImmRegRel;
+}
+
+let CextOpcode = "SUB_acc", isCodeGenOnly = 0 in {
+ let isExtentSigned = 1 in
+ def M2_naccii : T_MType_acc_ri <"-= add", 0b101, s8Ext>, ImmRegRel;
+
+ def M2_nacci : T_MType_acc_rr <"-= add", 0b100, 0b001, 0>, ImmRegRel;
+}
+
+let Itinerary = M_tc_3x_SLOT23, isCodeGenOnly = 0 in
+def M2_macsin : T_MType_acc_ri <"-= mpyi", 0b011, u8Ext>;
+
+let isCodeGenOnly = 0 in {
+def M2_xor_xacc : T_MType_acc_rr < "^= xor", 0b100, 0b011, 0>;
+def M2_subacc : T_MType_acc_rr <"+= sub", 0b000, 0b011, 1>;
+}
+
+class T_MType_acc_pat1 <InstHexagon MI, SDNode firstOp, SDNode secOp,
+ PatLeaf ImmPred>
+ : Pat <(secOp IntRegs:$src1, (firstOp IntRegs:$src2, ImmPred:$src3)),
+ (MI IntRegs:$src1, IntRegs:$src2, ImmPred:$src3)>;
+
+class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp>
+ : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, IntRegs:$src3))),
+ (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>;
+def : T_MType_acc_pat1 <M2_macsin, mul, sub, u8ExtPred>;
+
+def : T_MType_acc_pat1 <M2_naccii, add, sub, s8_16ExtPred>;
+def : T_MType_acc_pat2 <M2_nacci, add, sub>;
+//===----------------------------------------------------------------------===//
+// Template Class -- Multiply signed/unsigned halfwords with and without
+// saturation and rounding
+//===----------------------------------------------------------------------===//
+class T_M2_mpyd < bits<2> LHbits, bit isRnd, bit hasShift, bit isUnsigned >
+ : MInst < (outs DoubleRegs:$Rdd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rdd = "#!if(isUnsigned,"mpyu","mpy")#"($Rs."#!if(LHbits{1},"h","l")
+ #", $Rt."#!if(LHbits{0},"h)","l)")
+ #!if(hasShift,":<<1","")
+ #!if(isRnd,":rnd",""),
+ [] > {
+ bits<5> Rdd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b0100;
+ let Inst{23} = hasShift;
+ let Inst{22} = isUnsigned;
+ let Inst{21} = isRnd;
+ let Inst{6-5} = LHbits;
+ let Inst{4-0} = Rdd;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+}
+
+let isCodeGenOnly = 0 in {
+def M2_mpyd_hh_s0: T_M2_mpyd<0b11, 0, 0, 0>;
+def M2_mpyd_hl_s0: T_M2_mpyd<0b10, 0, 0, 0>;
+def M2_mpyd_lh_s0: T_M2_mpyd<0b01, 0, 0, 0>;
+def M2_mpyd_ll_s0: T_M2_mpyd<0b00, 0, 0, 0>;
+
+def M2_mpyd_hh_s1: T_M2_mpyd<0b11, 0, 1, 0>;
+def M2_mpyd_hl_s1: T_M2_mpyd<0b10, 0, 1, 0>;
+def M2_mpyd_lh_s1: T_M2_mpyd<0b01, 0, 1, 0>;
+def M2_mpyd_ll_s1: T_M2_mpyd<0b00, 0, 1, 0>;
+
+def M2_mpyd_rnd_hh_s0: T_M2_mpyd<0b11, 1, 0, 0>;
+def M2_mpyd_rnd_hl_s0: T_M2_mpyd<0b10, 1, 0, 0>;
+def M2_mpyd_rnd_lh_s0: T_M2_mpyd<0b01, 1, 0, 0>;
+def M2_mpyd_rnd_ll_s0: T_M2_mpyd<0b00, 1, 0, 0>;
+
+def M2_mpyd_rnd_hh_s1: T_M2_mpyd<0b11, 1, 1, 0>;
+def M2_mpyd_rnd_hl_s1: T_M2_mpyd<0b10, 1, 1, 0>;
+def M2_mpyd_rnd_lh_s1: T_M2_mpyd<0b01, 1, 1, 0>;
+def M2_mpyd_rnd_ll_s1: T_M2_mpyd<0b00, 1, 1, 0>;
+
+//Rdd=mpyu(Rs.[HL],Rt.[HL])[:<<1]
+def M2_mpyud_hh_s0: T_M2_mpyd<0b11, 0, 0, 1>;
+def M2_mpyud_hl_s0: T_M2_mpyd<0b10, 0, 0, 1>;
+def M2_mpyud_lh_s0: T_M2_mpyd<0b01, 0, 0, 1>;
+def M2_mpyud_ll_s0: T_M2_mpyd<0b00, 0, 0, 1>;
+
+def M2_mpyud_hh_s1: T_M2_mpyd<0b11, 0, 1, 1>;
+def M2_mpyud_hl_s1: T_M2_mpyd<0b10, 0, 1, 1>;
+def M2_mpyud_lh_s1: T_M2_mpyd<0b01, 0, 1, 1>;
+def M2_mpyud_ll_s1: T_M2_mpyd<0b00, 0, 1, 1>;
+}
+//===----------------------------------------------------------------------===//
+// Template Class for xtype mpy:
+// Vector multiply
+// Complex multiply
+// multiply 32X32 and use full result
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_XTYPE_mpy64 <string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit isSat, bit hasShift, bit isConj>
+ : MInst <(outs DoubleRegs:$Rdd),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rdd = "#mnemonic#"($Rs, $Rt"#!if(isConj,"*)",")")
+ #!if(hasShift,":<<1","")
+ #!if(isSat,":sat",""),
+ [] > {
+ bits<5> Rdd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b0101;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rdd;
+ }
+
+//===----------------------------------------------------------------------===//
+// Template Class for xtype mpy with accumulation into 64-bit:
+// Vector multiply
+// Complex multiply
+// multiply 32X32 and use full result
+//===----------------------------------------------------------------------===//
+class T_XTYPE_mpy64_acc <string op1, string op2, bits<3> MajOp, bits<3> MinOp,
+ bit isSat, bit hasShift, bit isConj>
+ : MInst <(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
+ "$Rxx "#op2#"= "#op1#"($Rs, $Rt"#!if(isConj,"*)",")")
+ #!if(hasShift,":<<1","")
+ #!if(isSat,":sat",""),
+
+ [] , "$dst2 = $Rxx" > {
+ bits<5> Rxx;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b0111;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rxx;
+ }
+
+// MPY - Multiply and use full result
+// Rdd = mpy[u](Rs,Rt)
+let isCodeGenOnly = 0 in {
+def M2_dpmpyss_s0 : T_XTYPE_mpy64 < "mpy", 0b000, 0b000, 0, 0, 0>;
+def M2_dpmpyuu_s0 : T_XTYPE_mpy64 < "mpyu", 0b010, 0b000, 0, 0, 0>;
+
+// Rxx[+-]= mpy[u](Rs,Rt)
+def M2_dpmpyss_acc_s0 : T_XTYPE_mpy64_acc < "mpy", "+", 0b000, 0b000, 0, 0, 0>;
+def M2_dpmpyss_nac_s0 : T_XTYPE_mpy64_acc < "mpy", "-", 0b001, 0b000, 0, 0, 0>;
+def M2_dpmpyuu_acc_s0 : T_XTYPE_mpy64_acc < "mpyu", "+", 0b010, 0b000, 0, 0, 0>;
+def M2_dpmpyuu_nac_s0 : T_XTYPE_mpy64_acc < "mpyu", "-", 0b011, 0b000, 0, 0, 0>;
+}
+
+def: Pat<(i64 (mul (i64 (anyext (i32 IntRegs:$src1))),
+ (i64 (anyext (i32 IntRegs:$src2))))),
+ (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>;
+
+def: Pat<(i64 (mul (i64 (sext (i32 IntRegs:$src1))),
+ (i64 (sext (i32 IntRegs:$src2))))),
+ (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>;
+
+def: Pat<(i64 (mul (is_sext_i32:$src1),
+ (is_sext_i32:$src2))),
+ (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>;
// Multiply and accumulate, use full result.
// Rxx[+-]=mpy(Rs,Rt)
-// Rxx+=mpy(Rs,Rt)
-def MPY64_acc : MInst_acc<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "$dst += mpy($src2, $src3)",
- [(set (i64 DoubleRegs:$dst),
- (add (mul (i64 (sext (i32 IntRegs:$src2))),
- (i64 (sext (i32 IntRegs:$src3)))),
- (i64 DoubleRegs:$src1)))],
- "$src1 = $dst">;
-
-// Rxx-=mpy(Rs,Rt)
-def MPY64_sub : MInst_acc<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "$dst -= mpy($src2, $src3)",
- [(set (i64 DoubleRegs:$dst),
- (sub (i64 DoubleRegs:$src1),
- (mul (i64 (sext (i32 IntRegs:$src2))),
- (i64 (sext (i32 IntRegs:$src3))))))],
- "$src1 = $dst">;
-
-// Rxx[+-]=mpyu(Rs,Rt)
-// Rxx+=mpyu(Rs,Rt)
-def MPYU64_acc : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- IntRegs:$src2, IntRegs:$src3),
- "$dst += mpyu($src2, $src3)",
- [(set (i64 DoubleRegs:$dst),
- (add (mul (i64 (anyext (i32 IntRegs:$src2))),
- (i64 (anyext (i32 IntRegs:$src3)))),
- (i64 DoubleRegs:$src1)))], "$src1 = $dst">;
-
-// Rxx-=mpyu(Rs,Rt)
-def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "$dst -= mpyu($src2, $src3)",
- [(set (i64 DoubleRegs:$dst),
- (sub (i64 DoubleRegs:$src1),
- (mul (i64 (anyext (i32 IntRegs:$src2))),
- (i64 (anyext (i32 IntRegs:$src3))))))],
- "$src1 = $dst">;
-
-
-let InputType = "reg", CextOpcode = "ADD_acc" in
-def ADDrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
- IntRegs:$src2, IntRegs:$src3),
- "$dst += add($src2, $src3)",
- [(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2),
- (i32 IntRegs:$src3)),
- (i32 IntRegs:$src1)))],
- "$src1 = $dst">, ImmRegRel;
-
-let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
-InputType = "imm", CextOpcode = "ADD_acc" in
-def ADDri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
- IntRegs:$src2, s8Ext:$src3),
- "$dst += add($src2, #$src3)",
- [(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2),
- s8_16ExtPred:$src3),
- (i32 IntRegs:$src1)))],
- "$src1 = $dst">, ImmRegRel;
-
-let CextOpcode = "SUB_acc", InputType = "reg" in
-def SUBrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
- IntRegs:$src2, IntRegs:$src3),
- "$dst -= add($src2, $src3)",
- [(set (i32 IntRegs:$dst),
- (sub (i32 IntRegs:$src1), (add (i32 IntRegs:$src2),
- (i32 IntRegs:$src3))))],
- "$src1 = $dst">, ImmRegRel;
-
-let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
-CextOpcode = "SUB_acc", InputType = "imm" in
-def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
- IntRegs:$src2, s8Ext:$src3),
- "$dst -= add($src2, #$src3)",
- [(set (i32 IntRegs:$dst), (sub (i32 IntRegs:$src1),
- (add (i32 IntRegs:$src2),
- s8_16ExtPred:$src3)))],
- "$src1 = $dst">, ImmRegRel;
+
+def: Pat<(i64 (add (i64 DoubleRegs:$src1),
+ (mul (i64 (sext (i32 IntRegs:$src2))),
+ (i64 (sext (i32 IntRegs:$src3)))))),
+ (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
+ (mul (i64 (sext (i32 IntRegs:$src2))),
+ (i64 (sext (i32 IntRegs:$src3)))))),
+ (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (add (i64 DoubleRegs:$src1),
+ (mul (i64 (anyext (i32 IntRegs:$src2))),
+ (i64 (anyext (i32 IntRegs:$src3)))))),
+ (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (add (i64 DoubleRegs:$src1),
+ (mul (i64 (zext (i32 IntRegs:$src2))),
+ (i64 (zext (i32 IntRegs:$src3)))))),
+ (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
+ (mul (i64 (anyext (i32 IntRegs:$src2))),
+ (i64 (anyext (i32 IntRegs:$src3)))))),
+ (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
+ (mul (i64 (zext (i32 IntRegs:$src2))),
+ (i64 (zext (i32 IntRegs:$src3)))))),
+ (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
//===----------------------------------------------------------------------===//
// MTYPE/MPYH -
@@ -1385,245 +2656,459 @@ def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
//===----------------------------------------------------------------------===//
///
// Store doubleword.
-
//===----------------------------------------------------------------------===//
-// Post increment store
+// Template class for non-predicated post increment stores with immediate offset
//===----------------------------------------------------------------------===//
+let isPredicable = 1, hasSideEffects = 0, addrMode = PostInc in
+class T_store_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<4> MajOp, bit isHalf >
+ : STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ImmOp:$offset, RC:$src2),
+ mnemonic#"($src1++#$offset) = $src2"#!if(isHalf, ".h", ""),
+ [], "$src1 = $_dst_" >,
+ AddrModeRel {
+ bits<5> src1;
+ bits<5> src2;
+ bits<7> offset;
+ bits<4> offsetBits;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
+ !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+ !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+ /* s4_0Imm */ offset{3-0})));
+ let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1);
+
+ let IClass = 0b1010;
+
+ let Inst{27-25} = 0b101;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = src2;
+ let Inst{7} = 0b0;
+ let Inst{6-3} = offsetBits;
+ let Inst{1} = 0b0;
+ }
-multiclass ST_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
- bit isNot, bit isPredNew> {
- let isPredicatedNew = isPredNew in
- def NAME : STInst2PI<(outs IntRegs:$dst),
+//===----------------------------------------------------------------------===//
+// Template class for predicated post increment stores with immediate offset
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc in
+class T_pstore_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<4> MajOp, bit isHalf, bit isPredNot, bit isPredNew >
+ : STInst <(outs IntRegs:$_dst_),
(ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($src2++#$offset) = $src3",
- [],
- "$src2 = $dst">;
-}
+ !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2++#$offset) = $src3"#!if(isHalf, ".h", ""),
+ [], "$src2 = $_dst_" >,
+ AddrModeRel {
+ bits<2> src1;
+ bits<5> src2;
+ bits<7> offset;
+ bits<5> src3;
+ bits<4> offsetBits;
-multiclass ST_PostInc_Pred<string mnemonic, RegisterClass RC,
- Operand ImmOp, bit PredNot> {
- let isPredicatedFalse = PredNot in {
- defm _c#NAME : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
- // Predicate new
- let Predicates = [HasV4T], validSubTargets = HasV4SubT in
- defm _cdn#NAME#_V4 : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 1>;
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
+ !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+ !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+ /* s4_0Imm */ offset{3-0})));
+
+ let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1);
+ let isPredicatedNew = isPredNew;
+ let isPredicatedFalse = isPredNot;
+
+ let IClass = 0b1010;
+
+ let Inst{27-25} = 0b101;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src2;
+ let Inst{13} = 0b1;
+ let Inst{12-8} = src3;
+ let Inst{7} = isPredNew;
+ let Inst{6-3} = offsetBits;
+ let Inst{2} = isPredNot;
+ let Inst{1-0} = src1;
}
-}
-let hasCtrlDep = 1, isNVStorable = 1, neverHasSideEffects = 1 in
multiclass ST_PostInc<string mnemonic, string BaseOp, RegisterClass RC,
- Operand ImmOp> {
+ Operand ImmOp, bits<4> MajOp, bit isHalf = 0 > {
- let hasCtrlDep = 1, BaseOpcode = "POST_"#BaseOp in {
- let isPredicable = 1 in
- def NAME : STInst2PI<(outs IntRegs:$dst),
- (ins IntRegs:$src1, ImmOp:$offset, RC:$src2),
- mnemonic#"($src1++#$offset) = $src2",
- [],
- "$src1 = $dst">;
-
- let isPredicated = 1 in {
- defm Pt : ST_PostInc_Pred<mnemonic, RC, ImmOp, 0 >;
- defm NotPt : ST_PostInc_Pred<mnemonic, RC, ImmOp, 1 >;
- }
+ let BaseOpcode = "POST_"#BaseOp in {
+ def S2_#NAME#_pi : T_store_pi <mnemonic, RC, ImmOp, MajOp, isHalf>;
+
+ // Predicated
+ def S2_p#NAME#t_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp, isHalf, 0, 0>;
+ def S2_p#NAME#f_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp, isHalf, 1, 0>;
+
+ // Predicated new
+ def S2_p#NAME#tnew_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp,
+ isHalf, 0, 1>;
+ def S2_p#NAME#fnew_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp,
+ isHalf, 1, 1>;
}
}
-defm POST_STbri: ST_PostInc <"memb", "STrib", IntRegs, s4_0Imm>, AddrModeRel;
-defm POST_SThri: ST_PostInc <"memh", "STrih", IntRegs, s4_1Imm>, AddrModeRel;
-defm POST_STwri: ST_PostInc <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel;
+let accessSize = ByteAccess, isCodeGenOnly = 0 in
+defm storerb: ST_PostInc <"memb", "STrib", IntRegs, s4_0Imm, 0b1000>;
+
+let accessSize = HalfWordAccess, isCodeGenOnly = 0 in
+defm storerh: ST_PostInc <"memh", "STrih", IntRegs, s4_1Imm, 0b1010>;
+
+let accessSize = WordAccess, isCodeGenOnly = 0 in
+defm storeri: ST_PostInc <"memw", "STriw", IntRegs, s4_2Imm, 0b1100>;
-let isNVStorable = 0 in
-defm POST_STdri: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm>, AddrModeRel;
+let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+defm storerd: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm, 0b1110>;
+
+let accessSize = HalfWordAccess, isNVStorable = 0, isCodeGenOnly = 0 in
+defm storerf: ST_PostInc <"memh", "STrih_H", IntRegs, s4_1Imm, 0b1011, 1>;
def : Pat<(post_truncsti8 (i32 IntRegs:$src1), IntRegs:$src2,
s4_3ImmPred:$offset),
- (POST_STbri IntRegs:$src2, s4_0ImmPred:$offset, IntRegs:$src1)>;
+ (S2_storerb_pi IntRegs:$src2, s4_0ImmPred:$offset, IntRegs:$src1)>;
def : Pat<(post_truncsti16 (i32 IntRegs:$src1), IntRegs:$src2,
s4_3ImmPred:$offset),
- (POST_SThri IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>;
+ (S2_storerh_pi IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>;
def : Pat<(post_store (i32 IntRegs:$src1), IntRegs:$src2, s4_2ImmPred:$offset),
- (POST_STwri IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>;
+ (S2_storeri_pi IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>;
def : Pat<(post_store (i64 DoubleRegs:$src1), IntRegs:$src2,
s4_3ImmPred:$offset),
- (POST_STdri IntRegs:$src2, s4_3ImmPred:$offset, DoubleRegs:$src1)>;
+ (S2_storerd_pi IntRegs:$src2, s4_3ImmPred:$offset, DoubleRegs:$src1)>;
//===----------------------------------------------------------------------===//
-// multiclass for the store instructions with MEMri operand.
+// Template class for post increment stores with register offset.
//===----------------------------------------------------------------------===//
-multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot,
- bit isPredNew> {
- let isPredicatedNew = isPredNew in
- def NAME : STInst2<(outs),
- (ins PredRegs:$src1, MEMri:$addr, RC: $src2),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($addr) = $src2",
- []>;
+let isNVStorable = 1 in
+class T_store_pr <string mnemonic, RegisterClass RC, bits<3> MajOp,
+ MemAccessSize AccessSz, bit isHalf = 0>
+ : STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ModRegs:$src2, RC:$src3),
+ mnemonic#"($src1++$src2) = $src3"#!if(isHalf, ".h", ""),
+ [], "$src1 = $_dst_" > {
+ bits<5> src1;
+ bits<1> src2;
+ bits<5> src3;
+ let accessSize = AccessSz;
+
+ let IClass = 0b1010;
+
+ let Inst{27-24} = 0b1101;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{13} = src2;
+ let Inst{12-8} = src3;
+ let Inst{7} = 0b0;
+ }
+
+let isCodeGenOnly = 0 in {
+def S2_storerb_pr : T_store_pr<"memb", IntRegs, 0b000, ByteAccess>;
+def S2_storerh_pr : T_store_pr<"memh", IntRegs, 0b010, HalfWordAccess>;
+def S2_storeri_pr : T_store_pr<"memw", IntRegs, 0b100, WordAccess>;
+def S2_storerd_pr : T_store_pr<"memd", DoubleRegs, 0b110, DoubleWordAccess>;
+
+def S2_storerf_pr : T_store_pr<"memh", IntRegs, 0b011, HalfWordAccess, 1>;
}
+let opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in
+class T_store_io <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<3>MajOp, bit isH = 0>
+ : STInst <(outs),
+ (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ mnemonic#"($src1+#$src2) = $src3"#!if(isH,".h","")>,
+ AddrModeRel, ImmRegRel {
+ bits<5> src1;
+ bits<14> src2; // Actual address offset
+ bits<5> src3;
+ bits<11> offsetBits; // Represents offset encoding
+
+ string ImmOpStr = !cast<string>(ImmOp);
+
+ let opExtentBits = !if (!eq(ImmOpStr, "s11_3Ext"), 14,
+ !if (!eq(ImmOpStr, "s11_2Ext"), 13,
+ !if (!eq(ImmOpStr, "s11_1Ext"), 12,
+ /* s11_0Ext */ 11)));
+ let offsetBits = !if (!eq(ImmOpStr, "s11_3Ext"), src2{13-3},
+ !if (!eq(ImmOpStr, "s11_2Ext"), src2{12-2},
+ !if (!eq(ImmOpStr, "s11_1Ext"), src2{11-1},
+ /* s11_0Ext */ src2{10-0})));
+ let IClass = 0b1010;
+
+ let Inst{27} = 0b0;
+ let Inst{26-25} = offsetBits{10-9};
+ let Inst{24} = 0b1;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{13} = offsetBits{8};
+ let Inst{12-8} = src3;
+ let Inst{7-0} = offsetBits{7-0};
+ }
-multiclass ST_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
- let isPredicatedFalse = PredNot in {
- defm _c#NAME : ST_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
+let opExtendable = 2, isPredicated = 1 in
+class T_pstore_io <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<3>MajOp, bit PredNot, bit isPredNew, bit isH = 0>
+ : STInst <(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ !if(PredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2+#$src3) = $src4"#!if(isH,".h",""),
+ [],"",V2LDST_tc_st_SLOT01 >,
+ AddrModeRel, ImmRegRel {
+ bits<2> src1;
+ bits<5> src2;
+ bits<9> src3; // Actual address offset
+ bits<5> src4;
+ bits<6> offsetBits; // Represents offset encoding
- // Predicate new
- let validSubTargets = HasV4SubT, Predicates = [HasV4T] in
- defm _cdn#NAME#_V4 : ST_MEMri_Pbase<mnemonic, RC, PredNot, 1>;
+ let isPredicatedNew = isPredNew;
+ let isPredicatedFalse = PredNot;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let opExtentBits = !if (!eq(ImmOpStr, "u6_3Ext"), 9,
+ !if (!eq(ImmOpStr, "u6_2Ext"), 8,
+ !if (!eq(ImmOpStr, "u6_1Ext"), 7,
+ /* u6_0Ext */ 6)));
+ let offsetBits = !if (!eq(ImmOpStr, "u6_3Ext"), src3{8-3},
+ !if (!eq(ImmOpStr, "u6_2Ext"), src3{7-2},
+ !if (!eq(ImmOpStr, "u6_1Ext"), src3{6-1},
+ /* u6_0Ext */ src3{5-0})));
+ let IClass = 0b0100;
+
+ let Inst{27} = 0b0;
+ let Inst{26} = PredNot;
+ let Inst{25} = isPredNew;
+ let Inst{24} = 0b0;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = src2;
+ let Inst{13} = offsetBits{5};
+ let Inst{12-8} = src4;
+ let Inst{7-3} = offsetBits{4-0};
+ let Inst{1-0} = src1;
}
-}
-let isExtendable = 1, isNVStorable = 1, neverHasSideEffects = 1 in
-multiclass ST_MEMri<string mnemonic, string CextOp, RegisterClass RC,
- bits<5> ImmBits, bits<5> PredImmBits> {
+let isExtendable = 1, isNVStorable = 1, hasSideEffects = 0 in
+multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC,
+ Operand ImmOp, Operand predImmOp, bits<3> MajOp, bit isH = 0> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+ def S2_#NAME#_io : T_store_io <mnemonic, RC, ImmOp, MajOp, isH>;
- let CextOpcode = CextOp, BaseOpcode = CextOp in {
- let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
- isPredicable = 1 in
- def NAME : STInst2<(outs),
- (ins MEMri:$addr, RC:$src),
- mnemonic#"($addr) = $src",
- []>;
+ // Predicated
+ def S2_p#NAME#t_io : T_pstore_io<mnemonic, RC, predImmOp, MajOp, 0, 0, isH>;
+ def S2_p#NAME#f_io : T_pstore_io<mnemonic, RC, predImmOp, MajOp, 1, 0, isH>;
- let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits,
- isPredicated = 1 in {
- defm Pt : ST_MEMri_Pred<mnemonic, RC, 0>;
- defm NotPt : ST_MEMri_Pred<mnemonic, RC, 1>;
- }
+ // Predicated new
+ def S4_p#NAME#tnew_io : T_pstore_io <mnemonic, RC, predImmOp,
+ MajOp, 0, 1, isH>;
+ def S4_p#NAME#fnew_io : T_pstore_io <mnemonic, RC, predImmOp,
+ MajOp, 1, 1, isH>;
}
}
-let addrMode = BaseImmOffset, isMEMri = "true" in {
+let addrMode = BaseImmOffset, InputType = "imm", isCodeGenOnly = 0 in {
let accessSize = ByteAccess in
- defm STrib: ST_MEMri < "memb", "STrib", IntRegs, 11, 6>, AddrModeRel;
+ defm storerb: ST_Idxd < "memb", "STrib", IntRegs, s11_0Ext, u6_0Ext, 0b000>;
- let accessSize = HalfWordAccess in
- defm STrih: ST_MEMri < "memh", "STrih", IntRegs, 12, 7>, AddrModeRel;
+ let accessSize = HalfWordAccess, opExtentAlign = 1 in
+ defm storerh: ST_Idxd < "memh", "STrih", IntRegs, s11_1Ext, u6_1Ext, 0b010>;
- let accessSize = WordAccess in
- defm STriw: ST_MEMri < "memw", "STriw", IntRegs, 13, 8>, AddrModeRel;
+ let accessSize = WordAccess, opExtentAlign = 2 in
+ defm storeri: ST_Idxd < "memw", "STriw", IntRegs, s11_2Ext, u6_2Ext, 0b100>;
- let accessSize = DoubleWordAccess, isNVStorable = 0 in
- defm STrid: ST_MEMri < "memd", "STrid", DoubleRegs, 14, 9>, AddrModeRel;
+ let accessSize = DoubleWordAccess, isNVStorable = 0, opExtentAlign = 3 in
+ defm storerd: ST_Idxd < "memd", "STrid", DoubleRegs, s11_3Ext,
+ u6_3Ext, 0b110>;
+
+ let accessSize = HalfWordAccess, opExtentAlign = 1 in
+ defm storerf: ST_Idxd < "memh", "STrif", IntRegs, s11_1Ext,
+ u6_1Ext, 0b011, 1>;
}
def : Pat<(truncstorei8 (i32 IntRegs:$src1), ADDRriS11_0:$addr),
- (STrib ADDRriS11_0:$addr, (i32 IntRegs:$src1))>;
+ (S2_storerb_io AddrFI:$addr, 0, (i32 IntRegs:$src1))>;
def : Pat<(truncstorei16 (i32 IntRegs:$src1), ADDRriS11_1:$addr),
- (STrih ADDRriS11_1:$addr, (i32 IntRegs:$src1))>;
+ (S2_storerh_io AddrFI:$addr, 0, (i32 IntRegs:$src1))>;
def : Pat<(store (i32 IntRegs:$src1), ADDRriS11_2:$addr),
- (STriw ADDRriS11_2:$addr, (i32 IntRegs:$src1))>;
+ (S2_storeri_io AddrFI:$addr, 0, (i32 IntRegs:$src1))>;
def : Pat<(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr),
- (STrid ADDRriS11_3:$addr, (i64 DoubleRegs:$src1))>;
-
-
-//===----------------------------------------------------------------------===//
-// multiclass for the store instructions with base+immediate offset
-// addressing mode
-//===----------------------------------------------------------------------===//
-multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
- bit isNot, bit isPredNew> {
- let isPredicatedNew = isPredNew in
- def NAME : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($src2+#$src3) = $src4",
- []>;
-}
-
-multiclass ST_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
- bit PredNot> {
- let isPredicatedFalse = PredNot, isPredicated = 1 in {
- defm _c#NAME : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
-
- // Predicate new
- let validSubTargets = HasV4SubT, Predicates = [HasV4T] in
- defm _cdn#NAME#_V4 : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>;
- }
-}
+ (S2_storerd_io AddrFI:$addr, 0, (i64 DoubleRegs:$src1))>;
-let isExtendable = 1, isNVStorable = 1, neverHasSideEffects = 1 in
-multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC,
- Operand ImmOp, Operand predImmOp, bits<5> ImmBits,
- bits<5> PredImmBits> {
-
- let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
- let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
- isPredicable = 1 in
- def NAME : STInst2<(outs),
- (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
- mnemonic#"($src1+#$src2) = $src3",
- []>;
-
- let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits in {
- defm Pt : ST_Idxd_Pred<mnemonic, RC, predImmOp, 0>;
- defm NotPt : ST_Idxd_Pred<mnemonic, RC, predImmOp, 1>;
- }
- }
-}
-
-let addrMode = BaseImmOffset, InputType = "reg" in {
- let accessSize = ByteAccess in
- defm STrib_indexed: ST_Idxd < "memb", "STrib", IntRegs, s11_0Ext,
- u6_0Ext, 11, 6>, AddrModeRel, ImmRegRel;
-
- let accessSize = HalfWordAccess in
- defm STrih_indexed: ST_Idxd < "memh", "STrih", IntRegs, s11_1Ext,
- u6_1Ext, 12, 7>, AddrModeRel, ImmRegRel;
-
- let accessSize = WordAccess in
- defm STriw_indexed: ST_Idxd < "memw", "STriw", IntRegs, s11_2Ext,
- u6_2Ext, 13, 8>, AddrModeRel, ImmRegRel;
-
- let accessSize = DoubleWordAccess, isNVStorable = 0 in
- defm STrid_indexed: ST_Idxd < "memd", "STrid", DoubleRegs, s11_3Ext,
- u6_3Ext, 14, 9>, AddrModeRel;
-}
let AddedComplexity = 10 in {
def : Pat<(truncstorei8 (i32 IntRegs:$src1), (add IntRegs:$src2,
s11_0ExtPred:$offset)),
- (STrib_indexed IntRegs:$src2, s11_0ImmPred:$offset,
+ (S2_storerb_io IntRegs:$src2, s11_0ImmPred:$offset,
(i32 IntRegs:$src1))>;
def : Pat<(truncstorei16 (i32 IntRegs:$src1), (add IntRegs:$src2,
s11_1ExtPred:$offset)),
- (STrih_indexed IntRegs:$src2, s11_1ImmPred:$offset,
+ (S2_storerh_io IntRegs:$src2, s11_1ImmPred:$offset,
(i32 IntRegs:$src1))>;
def : Pat<(store (i32 IntRegs:$src1), (add IntRegs:$src2,
s11_2ExtPred:$offset)),
- (STriw_indexed IntRegs:$src2, s11_2ImmPred:$offset,
+ (S2_storeri_io IntRegs:$src2, s11_2ImmPred:$offset,
(i32 IntRegs:$src1))>;
def : Pat<(store (i64 DoubleRegs:$src1), (add IntRegs:$src2,
s11_3ExtPred:$offset)),
- (STrid_indexed IntRegs:$src2, s11_3ImmPred:$offset,
+ (S2_storerd_io IntRegs:$src2, s11_3ImmPred:$offset,
(i64 DoubleRegs:$src1))>;
}
// memh(Rx++#s4:1)=Rt.H
-// Store word.
// Store predicate.
-let Defs = [R10,R11,D5], neverHasSideEffects = 1 in
-def STriw_pred : STInst2<(outs),
- (ins MEMri:$addr, PredRegs:$src1),
- "Error; should not emit",
- []>;
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
+def STriw_pred : STInst<(outs),
+ (ins IntRegs:$addr, s11_2Ext:$off, PredRegs:$src1),
+ ".error \"should not emit\"", []>;
+
+// S2_allocframe: Allocate stack frame.
+let Defs = [R29, R30], Uses = [R29, R31, R30],
+ hasSideEffects = 0, accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+def S2_allocframe: ST0Inst <
+ (outs), (ins u11_3Imm:$u11_3),
+ "allocframe(#$u11_3)" > {
+ bits<14> u11_3;
+
+ let IClass = 0b1010;
+ let Inst{27-16} = 0b000010011101;
+ let Inst{13-11} = 0b000;
+ let Inst{10-0} = u11_3{13-3};
+ }
-// Allocate stack frame.
-let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in {
- def ALLOCFRAME : STInst2<(outs),
- (ins i32imm:$amt),
- "allocframe(#$amt)",
- []>;
+// S2_storer[bhwdf]_pci: Store byte/half/word/double.
+// S2_storer[bhwdf]_pci -> S2_storerbnew_pci
+let Uses = [CS], isNVStorable = 1 in
+class T_store_pci <string mnemonic, RegisterClass RC,
+ Operand Imm, bits<4>MajOp,
+ MemAccessSize AlignSize, string RegSrc = "Rt">
+ : STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$Rz, Imm:$offset, ModRegs:$Mu, RC:$Rt),
+ #mnemonic#"($Rz ++ #$offset:circ($Mu)) = $"#RegSrc#"",
+ [] ,
+ "$Rz = $_dst_" > {
+ bits<5> Rz;
+ bits<7> offset;
+ bits<1> Mu;
+ bits<5> Rt;
+ let accessSize = AlignSize;
+
+ let IClass = 0b1010;
+ let Inst{27-25} = 0b100;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{12-8} = Rt;
+ let Inst{7} = 0b0;
+ let Inst{6-3} =
+ !if (!eq(!cast<string>(AlignSize), "DoubleWordAccess"), offset{6-3},
+ !if (!eq(!cast<string>(AlignSize), "WordAccess"), offset{5-2},
+ !if (!eq(!cast<string>(AlignSize), "HalfWordAccess"), offset{4-1},
+ /* ByteAccess */ offset{3-0})));
+ let Inst{1} = 0b0;
+ }
+
+let isCodeGenOnly = 0 in {
+def S2_storerb_pci : T_store_pci<"memb", IntRegs, s4_0Imm, 0b1000,
+ ByteAccess>;
+def S2_storerh_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1010,
+ HalfWordAccess>;
+def S2_storerf_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1011,
+ HalfWordAccess, "Rt.h">;
+def S2_storeri_pci : T_store_pci<"memw", IntRegs, s4_2Imm, 0b1100,
+ WordAccess>;
+def S2_storerd_pci : T_store_pci<"memd", DoubleRegs, s4_3Imm, 0b1110,
+ DoubleWordAccess>;
}
+
+//===----------------------------------------------------------------------===//
+// Circular stores with auto-increment register
+//===----------------------------------------------------------------------===//
+let Uses = [CS], isNVStorable = 1, isCodeGenOnly = 0 in
+class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp,
+ MemAccessSize AlignSize, string RegSrc = "Rt">
+ : STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$Rz, ModRegs:$Mu, RC:$Rt),
+ #mnemonic#"($Rz ++ I:circ($Mu)) = $"#RegSrc#"",
+ [],
+ "$Rz = $_dst_" > {
+ bits<5> Rz;
+ bits<1> Mu;
+ bits<5> Rt;
+
+ let accessSize = AlignSize;
+
+ let IClass = 0b1010;
+ let Inst{27-25} = 0b100;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{12-8} = Rt;
+ let Inst{7} = 0b0;
+ let Inst{1} = 0b1;
+ }
+
+let isCodeGenOnly = 0 in {
+def S2_storerb_pcr : T_store_pcr<"memb", IntRegs, 0b1000, ByteAccess>;
+def S2_storerh_pcr : T_store_pcr<"memh", IntRegs, 0b1010, HalfWordAccess>;
+def S2_storeri_pcr : T_store_pcr<"memw", IntRegs, 0b1100, WordAccess>;
+def S2_storerd_pcr : T_store_pcr<"memd", DoubleRegs, 0b1110, DoubleWordAccess>;
+def S2_storerf_pcr : T_store_pcr<"memh", IntRegs, 0b1011,
+ HalfWordAccess, "Rt.h">;
+}
+
+//===----------------------------------------------------------------------===//
+// Bit-reversed stores with auto-increment register
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_store_pbr<string mnemonic, RegisterClass RC,
+ MemAccessSize addrSize, bits<3> majOp,
+ bit isHalf = 0>
+ : STInst
+ <(outs IntRegs:$_dst_),
+ (ins IntRegs:$Rz, ModRegs:$Mu, RC:$src),
+ #mnemonic#"($Rz ++ $Mu:brev) = $src"#!if (!eq(isHalf, 1), ".h", ""),
+ [], "$Rz = $_dst_" > {
+
+ let accessSize = addrSize;
+
+ bits<5> Rz;
+ bits<1> Mu;
+ bits<5> src;
+
+ let IClass = 0b1010;
+
+ let Inst{27-24} = 0b1111;
+ let Inst{23-21} = majOp;
+ let Inst{7} = 0b0;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{12-8} = src;
+ }
+
+let isNVStorable = 1, isCodeGenOnly = 0 in {
+ let BaseOpcode = "S2_storerb_pbr" in
+ def S2_storerb_pbr : T_store_pbr<"memb", IntRegs, ByteAccess,
+ 0b000>, NewValueRel;
+ let BaseOpcode = "S2_storerh_pbr" in
+ def S2_storerh_pbr : T_store_pbr<"memh", IntRegs, HalfWordAccess,
+ 0b010>, NewValueRel;
+ let BaseOpcode = "S2_storeri_pbr" in
+ def S2_storeri_pbr : T_store_pbr<"memw", IntRegs, WordAccess,
+ 0b100>, NewValueRel;
+}
+let isCodeGenOnly = 0 in {
+def S2_storerf_pbr : T_store_pbr<"memh", IntRegs, HalfWordAccess, 0b011, 1>;
+def S2_storerd_pbr : T_store_pbr<"memd", DoubleRegs, DoubleWordAccess, 0b110>;
+}
+
//===----------------------------------------------------------------------===//
// ST -
//===----------------------------------------------------------------------===//
@@ -1637,69 +3122,423 @@ def NOT_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
[(set (i64 DoubleRegs:$dst), (not (i64 DoubleRegs:$src1)))]>;
-// Sign extend word to doubleword.
-def SXTW : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
- "$dst = sxtw($src1)",
- [(set (i64 DoubleRegs:$dst), (sext (i32 IntRegs:$src1)))]>;
//===----------------------------------------------------------------------===//
// STYPE/ALU -
//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_S2op_1 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut,
+ RegisterClass RCIn, bits<2> MajOp, bits<3> MinOp, bit isSat>
+ : SInst <(outs RCOut:$dst), (ins RCIn:$src),
+ "$dst = "#mnemonic#"($src)"#!if(isSat, ":sat", ""),
+ [], "", S_2op_tc_1_SLOT23 > {
+ bits<5> dst;
+ bits<5> src;
+
+ let IClass = 0b1000;
+
+ let Inst{27-24} = RegTyBits;
+ let Inst{23-22} = MajOp;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = src;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = dst;
+ }
+
+class T_S2op_1_di <string mnemonic, bits<2> MajOp, bits<3> MinOp>
+ : T_S2op_1 <mnemonic, 0b0100, DoubleRegs, IntRegs, MajOp, MinOp, 0>;
+
+let hasNewValue = 1 in
+class T_S2op_1_id <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit isSat = 0>
+ : T_S2op_1 <mnemonic, 0b1000, IntRegs, DoubleRegs, MajOp, MinOp, isSat>;
+
+let hasNewValue = 1 in
+class T_S2op_1_ii <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit isSat = 0>
+ : T_S2op_1 <mnemonic, 0b1100, IntRegs, IntRegs, MajOp, MinOp, isSat>;
+
+// Sign extend word to doubleword
+let isCodeGenOnly = 0 in
+def A2_sxtw : T_S2op_1_di <"sxtw", 0b01, 0b000>;
+
+def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>;
+
+// Swizzle the bytes of a word
+let isCodeGenOnly = 0 in
+def A2_swiz : T_S2op_1_ii <"swiz", 0b10, 0b111>;
+
+// Saturate
+let Defs = [USR_OVF], isCodeGenOnly = 0 in {
+ def A2_sat : T_S2op_1_id <"sat", 0b11, 0b000>;
+ def A2_satb : T_S2op_1_ii <"satb", 0b11, 0b111>;
+ def A2_satub : T_S2op_1_ii <"satub", 0b11, 0b110>;
+ def A2_sath : T_S2op_1_ii <"sath", 0b11, 0b100>;
+ def A2_satuh : T_S2op_1_ii <"satuh", 0b11, 0b101>;
+}
+
+let Itinerary = S_2op_tc_2_SLOT23, isCodeGenOnly = 0 in {
+ // Absolute value word
+ def A2_abs : T_S2op_1_ii <"abs", 0b10, 0b100>;
+
+ let Defs = [USR_OVF] in
+ def A2_abssat : T_S2op_1_ii <"abs", 0b10, 0b101, 1>;
+
+ // Negate with saturation
+ let Defs = [USR_OVF] in
+ def A2_negsat : T_S2op_1_ii <"neg", 0b10, 0b110, 1>;
+}
+
+def: Pat<(i32 (select (i1 (setlt (i32 IntRegs:$src), 0)),
+ (i32 (sub 0, (i32 IntRegs:$src))),
+ (i32 IntRegs:$src))),
+ (A2_abs IntRegs:$src)>;
+
+let AddedComplexity = 50 in
+def: Pat<(i32 (xor (add (sra (i32 IntRegs:$src), (i32 31)),
+ (i32 IntRegs:$src)),
+ (sra (i32 IntRegs:$src), (i32 31)))),
+ (A2_abs IntRegs:$src)>;
+
+class T_S2op_2 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut,
+ RegisterClass RCIn, bits<3> MajOp, bits<3> MinOp,
+ bit isSat, bit isRnd, list<dag> pattern = []>
+ : SInst <(outs RCOut:$dst),
+ (ins RCIn:$src, u5Imm:$u5),
+ "$dst = "#mnemonic#"($src, #$u5)"#!if(isSat, ":sat", "")
+ #!if(isRnd, ":rnd", ""),
+ pattern, "", S_2op_tc_2_SLOT23> {
+ bits<5> dst;
+ bits<5> src;
+ bits<5> u5;
+
+ let IClass = 0b1000;
+
+ let Inst{27-24} = RegTyBits;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = src;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = u5;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = dst;
+ }
+
+class T_S2op_2_di <string mnemonic, bits<3> MajOp, bits<3> MinOp>
+ : T_S2op_2 <mnemonic, 0b1000, DoubleRegs, IntRegs, MajOp, MinOp, 0, 0>;
+
+let hasNewValue = 1 in
+class T_S2op_2_id <string mnemonic, bits<3> MajOp, bits<3> MinOp>
+ : T_S2op_2 <mnemonic, 0b1000, IntRegs, DoubleRegs, MajOp, MinOp, 0, 0>;
+
+let hasNewValue = 1 in
+class T_S2op_2_ii <string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit isSat = 0, bit isRnd = 0, list<dag> pattern = []>
+ : T_S2op_2 <mnemonic, 0b1100, IntRegs, IntRegs, MajOp, MinOp,
+ isSat, isRnd, pattern>;
+
+class T_S2op_shift <string mnemonic, bits<3> MajOp, bits<3> MinOp, SDNode OpNd>
+ : T_S2op_2_ii <mnemonic, MajOp, MinOp, 0, 0,
+ [(set (i32 IntRegs:$dst), (OpNd (i32 IntRegs:$src),
+ (u5ImmPred:$u5)))]>;
+
+// Arithmetic/logical shift right/left by immediate
+let Itinerary = S_2op_tc_1_SLOT23, isCodeGenOnly = 0 in {
+ def S2_asr_i_r : T_S2op_shift <"asr", 0b000, 0b000, sra>;
+ def S2_lsr_i_r : T_S2op_shift <"lsr", 0b000, 0b001, srl>;
+ def S2_asl_i_r : T_S2op_shift <"asl", 0b000, 0b010, shl>;
+}
+
+// Shift left by immediate with saturation
+let Defs = [USR_OVF], isCodeGenOnly = 0 in
+def S2_asl_i_r_sat : T_S2op_2_ii <"asl", 0b010, 0b010, 1>;
+
+// Shift right with round
+let isCodeGenOnly = 0 in
+def S2_asr_i_r_rnd : T_S2op_2_ii <"asr", 0b010, 0b000, 0, 1>;
+
+def: Pat<(i32 (sra (i32 (add (i32 (sra I32:$src1, u5ImmPred:$src2)),
+ (i32 1))),
+ (i32 1))),
+ (S2_asr_i_r_rnd IntRegs:$src1, u5ImmPred:$src2)>;
+
+class T_S2op_3<string opc, bits<2>MajOp, bits<3>minOp, bits<1> sat = 0>
+ : SInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss),
+ "$Rdd = "#opc#"($Rss)"#!if(!eq(sat, 1),":sat","")> {
+ bits<5> Rss;
+ bits<5> Rdd;
+ let IClass = 0b1000;
+ let Inst{27-24} = 0;
+ let Inst{23-22} = MajOp;
+ let Inst{20-16} = Rss;
+ let Inst{7-5} = minOp;
+ let Inst{4-0} = Rdd;
+}
+
+let isCodeGenOnly = 0 in {
+def A2_absp : T_S2op_3 <"abs", 0b10, 0b110>;
+def A2_negp : T_S2op_3 <"neg", 0b10, 0b101>;
+def A2_notp : T_S2op_3 <"not", 0b10, 0b100>;
+}
+
+// Innterleave/deinterleave
+let isCodeGenOnly = 0 in {
+def S2_interleave : T_S2op_3 <"interleave", 0b11, 0b101>;
+def S2_deinterleave : T_S2op_3 <"deinterleave", 0b11, 0b100>;
+}
+
//===----------------------------------------------------------------------===//
// STYPE/BIT +
//===----------------------------------------------------------------------===//
-// clrbit.
-def CLRBIT : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
- "$dst = clrbit($src1, #$src2)",
- [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1),
- (not
- (shl 1, u5ImmPred:$src2))))]>;
+// Bit count
+
+let hasSideEffects = 0, hasNewValue = 1 in
+class T_COUNT_LEADING<string MnOp, bits<3> MajOp, bits<3> MinOp, bit Is32,
+ dag Out, dag Inp>
+ : SInst<Out, Inp, "$Rd = "#MnOp#"($Rs)", [], "", S_2op_tc_1_SLOT23> {
+ bits<5> Rs;
+ bits<5> Rd;
+ let IClass = 0b1000;
+ let Inst{27} = 0b1;
+ let Inst{26} = Is32;
+ let Inst{25-24} = 0b00;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rd;
+}
-def CLRBIT_31 : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
- "$dst = clrbit($src1, #$src2)",
- []>;
+class T_COUNT_LEADING_32<string MnOp, bits<3> MajOp, bits<3> MinOp>
+ : T_COUNT_LEADING<MnOp, MajOp, MinOp, 0b1,
+ (outs IntRegs:$Rd), (ins IntRegs:$Rs)>;
+
+class T_COUNT_LEADING_64<string MnOp, bits<3> MajOp, bits<3> MinOp>
+ : T_COUNT_LEADING<MnOp, MajOp, MinOp, 0b0,
+ (outs IntRegs:$Rd), (ins DoubleRegs:$Rs)>;
+
+let isCodeGenOnly = 0 in {
+def S2_cl0 : T_COUNT_LEADING_32<"cl0", 0b000, 0b101>;
+def S2_cl1 : T_COUNT_LEADING_32<"cl1", 0b000, 0b110>;
+def S2_ct0 : T_COUNT_LEADING_32<"ct0", 0b010, 0b100>;
+def S2_ct1 : T_COUNT_LEADING_32<"ct1", 0b010, 0b101>;
+def S2_cl0p : T_COUNT_LEADING_64<"cl0", 0b010, 0b010>;
+def S2_cl1p : T_COUNT_LEADING_64<"cl1", 0b010, 0b100>;
+def S2_clb : T_COUNT_LEADING_32<"clb", 0b000, 0b100>;
+def S2_clbp : T_COUNT_LEADING_64<"clb", 0b010, 0b000>;
+def S2_clbnorm : T_COUNT_LEADING_32<"normamt", 0b000, 0b111>;
+}
-// Map from r0 = and(r1, 2147483647) to r0 = clrbit(r1, #31).
-def : Pat <(and (i32 IntRegs:$src1), 2147483647),
- (CLRBIT_31 (i32 IntRegs:$src1), 31)>;
+def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>;
+def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
+def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>;
+def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
+def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>;
+def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
+
+// Bit set/clear/toggle
+
+let hasSideEffects = 0, hasNewValue = 1 in
+class T_SCT_BIT_IMM<string MnOp, bits<3> MinOp>
+ : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, u5Imm:$u5),
+ "$Rd = "#MnOp#"($Rs, #$u5)", [], "", S_2op_tc_1_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> u5;
+ let IClass = 0b1000;
+ let Inst{27-21} = 0b1100110;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = u5;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rd;
+}
-// setbit.
-def SETBIT : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
- "$dst = setbit($src1, #$src2)",
- [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1),
- (shl 1, u5ImmPred:$src2)))]>;
+let hasSideEffects = 0, hasNewValue = 1 in
+class T_SCT_BIT_REG<string MnOp, bits<2> MinOp>
+ : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = "#MnOp#"($Rs, $Rt)", [], "", S_3op_tc_1_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+ let IClass = 0b1100;
+ let Inst{27-22} = 0b011010;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{7-6} = MinOp;
+ let Inst{4-0} = Rd;
+}
-// Map from r0 = or(r1, -2147483648) to r0 = setbit(r1, #31).
-def SETBIT_31 : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
- "$dst = setbit($src1, #$src2)",
- []>;
+let isCodeGenOnly = 0 in {
+def S2_clrbit_i : T_SCT_BIT_IMM<"clrbit", 0b001>;
+def S2_setbit_i : T_SCT_BIT_IMM<"setbit", 0b000>;
+def S2_togglebit_i : T_SCT_BIT_IMM<"togglebit", 0b010>;
+def S2_clrbit_r : T_SCT_BIT_REG<"clrbit", 0b01>;
+def S2_setbit_r : T_SCT_BIT_REG<"setbit", 0b00>;
+def S2_togglebit_r : T_SCT_BIT_REG<"togglebit", 0b10>;
+}
-def : Pat <(or (i32 IntRegs:$src1), -2147483648),
- (SETBIT_31 (i32 IntRegs:$src1), 31)>;
+def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, u5ImmPred:$u5)))),
+ (S2_clrbit_i IntRegs:$Rs, u5ImmPred:$u5)>;
+def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, u5ImmPred:$u5))),
+ (S2_setbit_i IntRegs:$Rs, u5ImmPred:$u5)>;
+def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, u5ImmPred:$u5))),
+ (S2_togglebit_i IntRegs:$Rs, u5ImmPred:$u5)>;
+def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, (i32 IntRegs:$Rt))))),
+ (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))),
+ (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))),
+ (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>;
+
+// Bit test
+
+let hasSideEffects = 0 in
+class T_TEST_BIT_IMM<string MnOp, bits<3> MajOp>
+ : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u5Imm:$u5),
+ "$Pd = "#MnOp#"($Rs, #$u5)",
+ [], "", S_2op_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<5> Rs;
+ bits<5> u5;
+ let IClass = 0b1000;
+ let Inst{27-24} = 0b0101;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0;
+ let Inst{12-8} = u5;
+ let Inst{1-0} = Pd;
+}
-// togglebit.
-def TOGBIT : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
- "$dst = setbit($src1, #$src2)",
- [(set (i32 IntRegs:$dst), (xor (i32 IntRegs:$src1),
- (shl 1, u5ImmPred:$src2)))]>;
+let hasSideEffects = 0 in
+class T_TEST_BIT_REG<string MnOp, bit IsNeg>
+ : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Pd = "#MnOp#"($Rs, $Rt)",
+ [], "", S_3op_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<5> Rs;
+ bits<5> Rt;
+ let IClass = 0b1100;
+ let Inst{27-22} = 0b011100;
+ let Inst{21} = IsNeg;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{1-0} = Pd;
+}
-// Map from r0 = xor(r1, -2147483648) to r0 = togglebit(r1, #31).
-def TOGBIT_31 : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
- "$dst = togglebit($src1, #$src2)",
- []>;
+let isCodeGenOnly = 0 in {
+def S2_tstbit_i : T_TEST_BIT_IMM<"tstbit", 0b000>;
+def S2_tstbit_r : T_TEST_BIT_REG<"tstbit", 0>;
+}
+
+let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
+ def: Pat<(i1 (setne (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)),
+ (S2_tstbit_i IntRegs:$Rs, u5ImmPred:$u5)>;
+ def: Pat<(i1 (setne (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)),
+ (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>;
+ def: Pat<(i1 (trunc (i32 IntRegs:$Rs))),
+ (S2_tstbit_i IntRegs:$Rs, 0)>;
+ def: Pat<(i1 (trunc (i64 DoubleRegs:$Rs))),
+ (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>;
+}
+let hasSideEffects = 0 in
+class T_TEST_BITS_IMM<string MnOp, bits<2> MajOp, bit IsNeg>
+ : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u6Imm:$u6),
+ "$Pd = "#MnOp#"($Rs, #$u6)",
+ [], "", S_2op_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<5> Rs;
+ bits<6> u6;
+ let IClass = 0b1000;
+ let Inst{27-24} = 0b0101;
+ let Inst{23-22} = MajOp;
+ let Inst{21} = IsNeg;
+ let Inst{20-16} = Rs;
+ let Inst{13-8} = u6;
+ let Inst{1-0} = Pd;
+}
+
+let hasSideEffects = 0 in
+class T_TEST_BITS_REG<string MnOp, bits<2> MajOp, bit IsNeg>
+ : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Pd = "#MnOp#"($Rs, $Rt)",
+ [], "", S_3op_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<5> Rs;
+ bits<5> Rt;
+ let IClass = 0b1100;
+ let Inst{27-24} = 0b0111;
+ let Inst{23-22} = MajOp;
+ let Inst{21} = IsNeg;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{1-0} = Pd;
+}
-def : Pat <(xor (i32 IntRegs:$src1), -2147483648),
- (TOGBIT_31 (i32 IntRegs:$src1), 31)>;
+let isCodeGenOnly = 0 in {
+def C2_bitsclri : T_TEST_BITS_IMM<"bitsclr", 0b10, 0>;
+def C2_bitsclr : T_TEST_BITS_REG<"bitsclr", 0b10, 0>;
+def C2_bitsset : T_TEST_BITS_REG<"bitsset", 0b01, 0>;
+}
+
+let AddedComplexity = 20 in { // Complexity greater than compare reg-imm.
+ def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), u6ImmPred:$u6), 0)),
+ (C2_bitsclri IntRegs:$Rs, u6ImmPred:$u6)>;
+ def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), 0)),
+ (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>;
+}
+
+let AddedComplexity = 10 in // Complexity greater than compare reg-reg.
+def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)),
+ (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>;
+
+//===----------------------------------------------------------------------===//
+// STYPE/BIT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/COMPLEX +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/COMPLEX -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PERM +
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PERM -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/PRED +
+//===----------------------------------------------------------------------===//
// Predicate transfer.
-let neverHasSideEffects = 1 in
-def TFR_RsPd : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1),
- "$dst = $src1 /* Should almost never emit this. */",
- []>;
+let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in
+def C2_tfrpr : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps),
+ "$Rd = $Ps", [], "", S_2op_tc_1_SLOT23> {
+ bits<5> Rd;
+ bits<2> Ps;
+
+ let IClass = 0b1000;
+ let Inst{27-24} = 0b1001;
+ let Inst{22} = 0b1;
+ let Inst{17-16} = Ps;
+ let Inst{4-0} = Rd;
+}
+
+// Transfer general register to predicate.
+let hasSideEffects = 0, isCodeGenOnly = 0 in
+def C2_tfrrp: SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs),
+ "$Pd = $Rs", [], "", S_2op_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<5> Rs;
+
+ let IClass = 0b1000;
+ let Inst{27-21} = 0b0101010;
+ let Inst{20-16} = Rs;
+ let Inst{1-0} = Pd;
+}
+
-def TFR_PdRs : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1),
- "$dst = $src1 /* Should almost never emit this. */",
- [(set (i1 PredRegs:$dst), (trunc (i32 IntRegs:$src1)))]>;
//===----------------------------------------------------------------------===//
// STYPE/PRED -
//===----------------------------------------------------------------------===//
@@ -1707,88 +3546,59 @@ def TFR_PdRs : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1),
//===----------------------------------------------------------------------===//
// STYPE/SHIFT +
//===----------------------------------------------------------------------===//
+class S_2OpInstImm<string Mnemonic, bits<3>MajOp, bits<3>MinOp,
+ Operand Imm, list<dag> pattern = [], bit isRnd = 0>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, Imm:$src2),
+ "$dst = "#Mnemonic#"($src1, #$src2)"#!if(isRnd, ":rnd", ""),
+ pattern> {
+ bits<5> src1;
+ bits<5> dst;
+ let IClass = 0b1000;
+ let Inst{27-24} = 0;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = dst;
+}
+
+class S_2OpInstImmI6<string Mnemonic, SDNode OpNode, bits<3>MinOp>
+ : S_2OpInstImm<Mnemonic, 0b000, MinOp, u6Imm,
+ [(set (i64 DoubleRegs:$dst), (OpNode (i64 DoubleRegs:$src1),
+ u6ImmPred:$src2))]> {
+ bits<6> src2;
+ let Inst{13-8} = src2;
+}
+
// Shift by immediate.
-def ASR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
- "$dst = asr($src1, #$src2)",
- [(set (i32 IntRegs:$dst), (sra (i32 IntRegs:$src1),
- u5ImmPred:$src2))]>;
-
-def ASRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
- "$dst = asr($src1, #$src2)",
- [(set (i64 DoubleRegs:$dst), (sra (i64 DoubleRegs:$src1),
- u6ImmPred:$src2))]>;
-
-def ASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
- "$dst = asl($src1, #$src2)",
- [(set (i32 IntRegs:$dst), (shl (i32 IntRegs:$src1),
- u5ImmPred:$src2))]>;
-
-def ASLd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
- "$dst = asl($src1, #$src2)",
- [(set (i64 DoubleRegs:$dst), (shl (i64 DoubleRegs:$src1),
- u6ImmPred:$src2))]>;
-
-def LSR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
- "$dst = lsr($src1, #$src2)",
- [(set (i32 IntRegs:$dst), (srl (i32 IntRegs:$src1),
- u5ImmPred:$src2))]>;
-
-def LSRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
- "$dst = lsr($src1, #$src2)",
- [(set (i64 DoubleRegs:$dst), (srl (i64 DoubleRegs:$src1),
- u6ImmPred:$src2))]>;
-
-// Shift by immediate and add.
-let AddedComplexity = 100 in
-def ADDASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
- u3Imm:$src3),
- "$dst = addasl($src1, $src2, #$src3)",
- [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$src1),
- (shl (i32 IntRegs:$src2),
- u3ImmPred:$src3)))]>;
-
-// Shift by register.
-def ASL_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = asl($src1, $src2)",
- [(set (i32 IntRegs:$dst), (shl (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))]>;
-
-def ASR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = asr($src1, $src2)",
- [(set (i32 IntRegs:$dst), (sra (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))]>;
-
-def LSL_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = lsl($src1, $src2)",
- [(set (i32 IntRegs:$dst), (shl (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))]>;
-
-def LSR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = lsr($src1, $src2)",
- [(set (i32 IntRegs:$dst), (srl (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))]>;
-
-def ASLd : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
- "$dst = asl($src1, $src2)",
- [(set (i64 DoubleRegs:$dst), (shl (i64 DoubleRegs:$src1),
- (i32 IntRegs:$src2)))]>;
-
-def LSLd : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
- "$dst = lsl($src1, $src2)",
- [(set (i64 DoubleRegs:$dst), (shl (i64 DoubleRegs:$src1),
- (i32 IntRegs:$src2)))]>;
-
-def ASRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- IntRegs:$src2),
- "$dst = asr($src1, $src2)",
- [(set (i64 DoubleRegs:$dst), (sra (i64 DoubleRegs:$src1),
- (i32 IntRegs:$src2)))]>;
-
-def LSRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- IntRegs:$src2),
- "$dst = lsr($src1, $src2)",
- [(set (i64 DoubleRegs:$dst), (srl (i64 DoubleRegs:$src1),
- (i32 IntRegs:$src2)))]>;
+let isCodeGenOnly = 0 in {
+def S2_asr_i_p : S_2OpInstImmI6<"asr", sra, 0b000>;
+def S2_asl_i_p : S_2OpInstImmI6<"asl", shl, 0b010>;
+def S2_lsr_i_p : S_2OpInstImmI6<"lsr", srl, 0b001>;
+}
+
+// Shift left by small amount and add.
+let AddedComplexity = 100, hasNewValue = 1, hasSideEffects = 0,
+ isCodeGenOnly = 0 in
+def S2_addasl_rrri: SInst <(outs IntRegs:$Rd),
+ (ins IntRegs:$Rt, IntRegs:$Rs, u3Imm:$u3),
+ "$Rd = addasl($Rt, $Rs, #$u3)" ,
+ [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rt),
+ (shl (i32 IntRegs:$Rs), u3ImmPred:$u3)))],
+ "", S_3op_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rt;
+ bits<5> Rs;
+ bits<3> u3;
+
+ let IClass = 0b1100;
+
+ let Inst{27-21} = 0b0100000;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = u3;
+ let Inst{4-0} = Rd;
+ }
//===----------------------------------------------------------------------===//
// STYPE/SHIFT -
@@ -1815,38 +3625,191 @@ def LSRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
//===----------------------------------------------------------------------===//
// SYSTEM/USER +
//===----------------------------------------------------------------------===//
-def SDHexagonBARRIER: SDTypeProfile<0, 0, []>;
-def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDHexagonBARRIER,
- [SDNPHasChain]>;
+def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
-let hasSideEffects = 1, isSolo = 1 in
+let hasSideEffects = 1, isSoloAX = 1, isCodeGenOnly = 0 in
def BARRIER : SYSInst<(outs), (ins),
"barrier",
- [(HexagonBARRIER)]>;
+ [(HexagonBARRIER)],"",ST_tc_st_SLOT0> {
+ let Inst{31-28} = 0b1010;
+ let Inst{27-21} = 0b1000000;
+}
//===----------------------------------------------------------------------===//
// SYSTEM/SUPER -
//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// CRUSER - Type.
+//===----------------------------------------------------------------------===//
+// HW loop
+let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2,
+ opExtendable = 0, hasSideEffects = 0 in
+class LOOP_iBase<string mnemonic, Operand brOp, bit mustExtend = 0>
+ : CRInst<(outs), (ins brOp:$offset, u10Imm:$src2),
+ #mnemonic#"($offset, #$src2)",
+ [], "" , CR_tc_3x_SLOT3> {
+ bits<9> offset;
+ bits<10> src2;
+
+ let IClass = 0b0110;
+
+ let Inst{27-22} = 0b100100;
+ let Inst{21} = !if (!eq(mnemonic, "loop0"), 0b0, 0b1);
+ let Inst{20-16} = src2{9-5};
+ let Inst{12-8} = offset{8-4};
+ let Inst{7-5} = src2{4-2};
+ let Inst{4-3} = offset{3-2};
+ let Inst{1-0} = src2{1-0};
+}
+
+let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2,
+ opExtendable = 0, hasSideEffects = 0 in
+class LOOP_rBase<string mnemonic, Operand brOp, bit mustExtend = 0>
+ : CRInst<(outs), (ins brOp:$offset, IntRegs:$src2),
+ #mnemonic#"($offset, $src2)",
+ [], "" ,CR_tc_3x_SLOT3> {
+ bits<9> offset;
+ bits<5> src2;
+
+ let IClass = 0b0110;
+
+ let Inst{27-22} = 0b000000;
+ let Inst{21} = !if (!eq(mnemonic, "loop0"), 0b0, 0b1);
+ let Inst{20-16} = src2;
+ let Inst{12-8} = offset{8-4};
+ let Inst{4-3} = offset{3-2};
+ }
+
+multiclass LOOP_ri<string mnemonic> {
+ def i : LOOP_iBase<mnemonic, brtarget>;
+ def r : LOOP_rBase<mnemonic, brtarget>;
+}
+
+
+let Defs = [SA0, LC0, USR], isCodeGenOnly = 0 in
+defm J2_loop0 : LOOP_ri<"loop0">;
+
+// Interestingly only loop0's appear to set usr.lpcfg
+let Defs = [SA1, LC1], isCodeGenOnly = 0 in
+defm J2_loop1 : LOOP_ri<"loop1">;
+
+let isBranch = 1, isTerminator = 1, hasSideEffects = 0,
+ Defs = [PC, LC0], Uses = [SA0, LC0] in {
+def ENDLOOP0 : Endloop<(outs), (ins brtarget:$offset),
+ ":endloop0",
+ []>;
+}
+
+let isBranch = 1, isTerminator = 1, hasSideEffects = 0,
+ Defs = [PC, LC1], Uses = [SA1, LC1] in {
+def ENDLOOP1 : Endloop<(outs), (ins brtarget:$offset),
+ ":endloop1",
+ []>;
+}
+
+// Pipelined loop instructions, sp[123]loop0
+let Defs = [LC0, SA0, P3, USR], hasSideEffects = 0,
+ isExtentSigned = 1, isExtendable = 1, opExtentBits = 9, opExtentAlign = 2,
+ opExtendable = 0, isPredicateLate = 1 in
+class SPLOOP_iBase<string SP, bits<2> op>
+ : CRInst <(outs), (ins brtarget:$r7_2, u10Imm:$U10),
+ "p3 = sp"#SP#"loop0($r7_2, #$U10)" > {
+ bits<9> r7_2;
+ bits<10> U10;
+
+ let IClass = 0b0110;
+
+ let Inst{22-21} = op;
+ let Inst{27-23} = 0b10011;
+ let Inst{20-16} = U10{9-5};
+ let Inst{12-8} = r7_2{8-4};
+ let Inst{7-5} = U10{4-2};
+ let Inst{4-3} = r7_2{3-2};
+ let Inst{1-0} = U10{1-0};
+ }
+
+let Defs = [LC0, SA0, P3, USR], hasSideEffects = 0,
+ isExtentSigned = 1, isExtendable = 1, opExtentBits = 9, opExtentAlign = 2,
+ opExtendable = 0, isPredicateLate = 1 in
+class SPLOOP_rBase<string SP, bits<2> op>
+ : CRInst <(outs), (ins brtarget:$r7_2, IntRegs:$Rs),
+ "p3 = sp"#SP#"loop0($r7_2, $Rs)" > {
+ bits<9> r7_2;
+ bits<5> Rs;
+
+ let IClass = 0b0110;
+
+ let Inst{22-21} = op;
+ let Inst{27-23} = 0b00001;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = r7_2{8-4};
+ let Inst{4-3} = r7_2{3-2};
+ }
+
+multiclass SPLOOP_ri<string mnemonic, bits<2> op> {
+ def i : SPLOOP_iBase<mnemonic, op>;
+ def r : SPLOOP_rBase<mnemonic, op>;
+}
+
+let isCodeGenOnly = 0 in {
+defm J2_ploop1s : SPLOOP_ri<"1", 0b01>;
+defm J2_ploop2s : SPLOOP_ri<"2", 0b10>;
+defm J2_ploop3s : SPLOOP_ri<"3", 0b11>;
+}
+
+// Transfer to/from Control/GPR Guest/GPR
+let hasSideEffects = 0 in
+class TFR_CR_RS_base<RegisterClass CTRC, RegisterClass RC, bit isDouble>
+ : CRInst <(outs CTRC:$dst), (ins RC:$src),
+ "$dst = $src", [], "", CR_tc_3x_SLOT3> {
+ bits<5> dst;
+ bits<5> src;
+
+ let IClass = 0b0110;
+
+ let Inst{27-25} = 0b001;
+ let Inst{24} = isDouble;
+ let Inst{23-21} = 0b001;
+ let Inst{20-16} = src;
+ let Inst{4-0} = dst;
+ }
+let isCodeGenOnly = 0 in
+def A2_tfrrcr : TFR_CR_RS_base<CtrRegs, IntRegs, 0b0>;
+def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>;
+def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>;
+
+let hasSideEffects = 0 in
+class TFR_RD_CR_base<RegisterClass RC, RegisterClass CTRC, bit isSingle>
+ : CRInst <(outs RC:$dst), (ins CTRC:$src),
+ "$dst = $src", [], "", CR_tc_3x_SLOT3> {
+ bits<5> dst;
+ bits<5> src;
+
+ let IClass = 0b0110;
+
+ let Inst{27-26} = 0b10;
+ let Inst{25} = isSingle;
+ let Inst{24-21} = 0b0000;
+ let Inst{20-16} = src;
+ let Inst{4-0} = dst;
+ }
+
+let hasNewValue = 1, opNewValue = 0, isCodeGenOnly = 0 in
+def A2_tfrcrr : TFR_RD_CR_base<IntRegs, CtrRegs, 1>;
+def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>;
+def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>;
+
+// Y4_trace: Send value to etm trace.
+let isSoloAX = 1, hasSideEffects = 0, isCodeGenOnly = 0 in
+def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs),
+ "trace($Rs)"> {
+ bits<5> Rs;
+
+ let IClass = 0b0110;
+ let Inst{27-21} = 0b0010010;
+ let Inst{20-16} = Rs;
+ }
-// TFRI64 - assembly mapped.
-let isReMaterializable = 1 in
-def TFRI64 : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1),
- "$dst = #$src1",
- [(set (i64 DoubleRegs:$dst), s8Imm64Pred:$src1)]>;
-
-// Pseudo instruction to encode a set of conditional transfers.
-// This instruction is used instead of a mux and trades-off codesize
-// for performance. We conduct this transformation optimistically in
-// the hope that these instructions get promoted to dot-new transfers.
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
- IntRegs:$src2,
- IntRegs:$src3),
- "Error; should not emit",
- [(set (i32 IntRegs:$dst),
- (i32 (select (i1 PredRegs:$src1),
- (i32 IntRegs:$src2),
- (i32 IntRegs:$src3))))]>;
let AddedComplexity = 100, isPredicated = 1 in
def TFR_condset_ri : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, s12Imm:$src3),
@@ -1877,28 +3840,6 @@ def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1),
"$dst = add($src1)",
[(set (i32 IntRegs:$dst), ADDRri:$src1)]>;
-//
-// CR - Type.
-//
-let neverHasSideEffects = 1, Defs = [SA0, LC0] in {
-def LOOP0_i : CRInst<(outs), (ins brtarget:$offset, u10Imm:$src2),
- "loop0($offset, #$src2)",
- []>;
-}
-
-let neverHasSideEffects = 1, Defs = [SA0, LC0] in {
-def LOOP0_r : CRInst<(outs), (ins brtarget:$offset, IntRegs:$src2),
- "loop0($offset, $src2)",
- []>;
-}
-
-let isBranch = 1, isTerminator = 1, neverHasSideEffects = 1,
- Defs = [PC, LC0], Uses = [SA0, LC0] in {
-def ENDLOOP0 : Endloop<(outs), (ins brtarget:$offset),
- ":endloop0",
- []>;
-}
-
// Support for generating global address.
// Taken from X86InstrInfo.td.
def SDTHexagonCONST32 : SDTypeProfile<1, 1, [
@@ -1909,44 +3850,44 @@ def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>;
def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
// HI/LO Instructions
-let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
def LO : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
"$dst.l = #LO($global)",
[]>;
-let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
def HI : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
"$dst.h = #HI($global)",
[]>;
-let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
def LOi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value),
"$dst.l = #LO($imm_value)",
[]>;
-let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
def HIi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value),
"$dst.h = #HI($imm_value)",
[]>;
-let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
def LO_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt),
"$dst.l = #LO($jt)",
[]>;
-let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
def HI_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt),
"$dst.h = #HI($jt)",
[]>;
-let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
def LO_label : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
"$dst.l = #LO($label)",
[]>;
-let isReMaterializable = 1, isMoveImm = 1 , neverHasSideEffects = 1 in
+let isReMaterializable = 1, isMoveImm = 1 , hasSideEffects = 0 in
def HI_label : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
"$dst.h = #HI($label)",
[]>;
@@ -2044,22 +3985,16 @@ let Defs = [R29, R30, R31], Uses = [R29] in {
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
// Call subroutine.
-let isCall = 1, neverHasSideEffects = 1,
+let isCall = 1, hasSideEffects = 0,
Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
def CALL : JInst<(outs), (ins calltarget:$dst),
"call $dst", []>;
}
-// Call subroutine from register.
-let isCall = 1, neverHasSideEffects = 1,
- Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
- R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
- def CALLR : JRInst<(outs), (ins IntRegs:$dst),
- "callr $dst",
- []>;
- }
-
+// Call subroutine indirectly.
+let Defs = VolatileV3.Regs, isCodeGenOnly = 0 in
+def J2_callr : JUMPR_MISC_CALLR<0, 1>;
// Indirect tail-call.
let isCodeGenOnly = 1, isCall = 1, isReturn = 1 in
@@ -2068,13 +4003,15 @@ def TCRETURNR : T_JMPr;
// Direct tail-calls.
let isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
isTerminator = 1, isCodeGenOnly = 1 in {
- def TCRETURNtg : T_JMP<(ins calltarget:$dst)>;
- def TCRETURNtext : T_JMP<(ins calltarget:$dst)>;
+ def TCRETURNtg : JInst<(outs), (ins calltarget:$dst), "jump $dst",
+ [], "", J_tc_2early_SLOT23>;
+ def TCRETURNtext : JInst<(outs), (ins calltarget:$dst), "jump $dst",
+ [], "", J_tc_2early_SLOT23>;
}
// Map call instruction.
def : Pat<(call (i32 IntRegs:$dst)),
- (CALLR (i32 IntRegs:$dst))>, Requires<[HasV2TOnly]>;
+ (J2_callr (i32 IntRegs:$dst))>, Requires<[HasV2TOnly]>;
def : Pat<(call tglobaladdr:$dst),
(CALL tglobaladdr:$dst)>, Requires<[HasV2TOnly]>;
def : Pat<(call texternalsym:$dst),
@@ -2090,91 +4027,81 @@ def : Pat<(HexagonTCRet (i32 IntRegs:$dst)),
// Atomic load and store support
// 8 bit atomic load
def : Pat<(atomic_load_8 ADDRriS11_0:$src1),
- (i32 (LDriub ADDRriS11_0:$src1))>;
+ (i32 (L2_loadrub_io AddrFI:$src1, 0))>;
def : Pat<(atomic_load_8 (add (i32 IntRegs:$src1), s11_0ImmPred:$offset)),
- (i32 (LDriub_indexed (i32 IntRegs:$src1), s11_0ImmPred:$offset))>;
+ (i32 (L2_loadrub_io (i32 IntRegs:$src1), s11_0ImmPred:$offset))>;
// 16 bit atomic load
def : Pat<(atomic_load_16 ADDRriS11_1:$src1),
- (i32 (LDriuh ADDRriS11_1:$src1))>;
+ (i32 (L2_loadruh_io AddrFI:$src1, 0))>;
def : Pat<(atomic_load_16 (add (i32 IntRegs:$src1), s11_1ImmPred:$offset)),
- (i32 (LDriuh_indexed (i32 IntRegs:$src1), s11_1ImmPred:$offset))>;
+ (i32 (L2_loadruh_io (i32 IntRegs:$src1), s11_1ImmPred:$offset))>;
def : Pat<(atomic_load_32 ADDRriS11_2:$src1),
- (i32 (LDriw ADDRriS11_2:$src1))>;
+ (i32 (L2_loadri_io AddrFI:$src1, 0))>;
def : Pat<(atomic_load_32 (add (i32 IntRegs:$src1), s11_2ImmPred:$offset)),
- (i32 (LDriw_indexed (i32 IntRegs:$src1), s11_2ImmPred:$offset))>;
+ (i32 (L2_loadri_io (i32 IntRegs:$src1), s11_2ImmPred:$offset))>;
// 64 bit atomic load
def : Pat<(atomic_load_64 ADDRriS11_3:$src1),
- (i64 (LDrid ADDRriS11_3:$src1))>;
+ (i64 (L2_loadrd_io AddrFI:$src1, 0))>;
def : Pat<(atomic_load_64 (add (i32 IntRegs:$src1), s11_3ImmPred:$offset)),
- (i64 (LDrid_indexed (i32 IntRegs:$src1), s11_3ImmPred:$offset))>;
+ (i64 (L2_loadrd_io (i32 IntRegs:$src1), s11_3ImmPred:$offset))>;
def : Pat<(atomic_store_8 ADDRriS11_0:$src2, (i32 IntRegs:$src1)),
- (STrib ADDRriS11_0:$src2, (i32 IntRegs:$src1))>;
+ (S2_storerb_io AddrFI:$src2, 0, (i32 IntRegs:$src1))>;
def : Pat<(atomic_store_8 (add (i32 IntRegs:$src2), s11_0ImmPred:$offset),
(i32 IntRegs:$src1)),
- (STrib_indexed (i32 IntRegs:$src2), s11_0ImmPred:$offset,
+ (S2_storerb_io (i32 IntRegs:$src2), s11_0ImmPred:$offset,
(i32 IntRegs:$src1))>;
def : Pat<(atomic_store_16 ADDRriS11_1:$src2, (i32 IntRegs:$src1)),
- (STrih ADDRriS11_1:$src2, (i32 IntRegs:$src1))>;
+ (S2_storerh_io AddrFI:$src2, 0, (i32 IntRegs:$src1))>;
def : Pat<(atomic_store_16 (i32 IntRegs:$src1),
(add (i32 IntRegs:$src2), s11_1ImmPred:$offset)),
- (STrih_indexed (i32 IntRegs:$src2), s11_1ImmPred:$offset,
+ (S2_storerh_io (i32 IntRegs:$src2), s11_1ImmPred:$offset,
(i32 IntRegs:$src1))>;
def : Pat<(atomic_store_32 ADDRriS11_2:$src2, (i32 IntRegs:$src1)),
- (STriw ADDRriS11_2:$src2, (i32 IntRegs:$src1))>;
+ (S2_storeri_io AddrFI:$src2, 0, (i32 IntRegs:$src1))>;
def : Pat<(atomic_store_32 (add (i32 IntRegs:$src2), s11_2ImmPred:$offset),
(i32 IntRegs:$src1)),
- (STriw_indexed (i32 IntRegs:$src2), s11_2ImmPred:$offset,
+ (S2_storeri_io (i32 IntRegs:$src2), s11_2ImmPred:$offset,
(i32 IntRegs:$src1))>;
def : Pat<(atomic_store_64 ADDRriS11_3:$src2, (i64 DoubleRegs:$src1)),
- (STrid ADDRriS11_3:$src2, (i64 DoubleRegs:$src1))>;
+ (S2_storerd_io AddrFI:$src2, 0, (i64 DoubleRegs:$src1))>;
def : Pat<(atomic_store_64 (add (i32 IntRegs:$src2), s11_3ImmPred:$offset),
(i64 DoubleRegs:$src1)),
- (STrid_indexed (i32 IntRegs:$src2), s11_3ImmPred:$offset,
+ (S2_storerd_io (i32 IntRegs:$src2), s11_3ImmPred:$offset,
(i64 DoubleRegs:$src1))>;
// Map from r0 = and(r1, 65535) to r0 = zxth(r1)
def : Pat <(and (i32 IntRegs:$src1), 65535),
- (ZXTH (i32 IntRegs:$src1))>;
+ (A2_zxth (i32 IntRegs:$src1))>;
// Map from r0 = and(r1, 255) to r0 = zxtb(r1).
def : Pat <(and (i32 IntRegs:$src1), 255),
- (ZXTB (i32 IntRegs:$src1))>;
+ (A2_zxtb (i32 IntRegs:$src1))>;
// Map Add(p1, true) to p1 = not(p1).
// Add(p1, false) should never be produced,
// if it does, it got to be mapped to NOOP.
def : Pat <(add (i1 PredRegs:$src1), -1),
- (NOT_p (i1 PredRegs:$src1))>;
-
-// Map from p0 = setlt(r0, r1) r2 = mux(p0, r3, r4) =>
-// p0 = cmp.lt(r0, r1), r0 = mux(p0, r2, r1).
-// cmp.lt(r0, r1) -> cmp.gt(r1, r0)
-def : Pat <(select (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- (i32 IntRegs:$src3),
- (i32 IntRegs:$src4)),
- (i32 (TFR_condset_rr (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)),
- (i32 IntRegs:$src4), (i32 IntRegs:$src3)))>,
- Requires<[HasV2TOnly]>;
+ (C2_not (i1 PredRegs:$src1))>;
// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
def : Pat <(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s8ImmPred:$src3),
@@ -2196,91 +4123,91 @@ def : Pat <(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s12ImmPred:$src3),
// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
def : Pat <(brcond (not (i1 PredRegs:$src1)), bb:$offset),
- (JMP_f (i1 PredRegs:$src1), bb:$offset)>;
+ (J2_jumpf (i1 PredRegs:$src1), bb:$offset)>;
// Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2).
def : Pat <(and (i1 PredRegs:$src1), (not (i1 PredRegs:$src2))),
- (i1 (AND_pnotp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
+ (i1 (C2_andn (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
let AddedComplexity = 100 in
def : Pat <(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$global))),
- (i64 (COMBINE_rr (TFRI 0),
- (LDriub_indexed (CONST32_set tglobaladdr:$global), 0)))>,
+ (i64 (A2_combinew (A2_tfrsi 0),
+ (L2_loadrub_io (CONST32_set tglobaladdr:$global), 0)))>,
Requires<[NoV4T]>;
// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned.
let AddedComplexity = 10 in
def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)),
- (i32 (AND_rr (i32 (LDrib ADDRriS11_0:$addr)), (TFRI 0x1)))>;
+ (i32 (A2_and (i32 (L2_loadrb_io AddrFI:$addr, 0)), (A2_tfrsi 0x1)))>;
-// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = SXTW(Rss.lo).
+// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo).
def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)),
- (i64 (SXTW (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg))))>;
+ (i64 (A2_sxtw (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg))))>;
-// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = SXTW(SXTH(Rss.lo)).
+// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(SXTH(Rss.lo)).
def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)),
- (i64 (SXTW (i32 (SXTH (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
+ (i64 (A2_sxtw (i32 (A2_sxth (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
subreg_loreg))))))>;
-// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = SXTW(SXTB(Rss.lo)).
+// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(SXTB(Rss.lo)).
def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)),
- (i64 (SXTW (i32 (SXTB (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
+ (i64 (A2_sxtw (i32 (A2_sxtb (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
subreg_loreg))))))>;
// We want to prevent emitting pnot's as much as possible.
-// Map brcond with an unsupported setcc to a JMP_f.
+// Map brcond with an unsupported setcc to a J2_jumpf.
def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
bb:$offset),
- (JMP_f (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+ (J2_jumpf (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
bb:$offset)>;
def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)),
bb:$offset),
- (JMP_f (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>;
+ (J2_jumpf (C2_cmpeqi (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>;
def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset),
- (JMP_f (i1 PredRegs:$src1), bb:$offset)>;
+ (J2_jumpf (i1 PredRegs:$src1), bb:$offset)>;
def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset),
- (JMP_t (i1 PredRegs:$src1), bb:$offset)>;
+ (J2_jumpt (i1 PredRegs:$src1), bb:$offset)>;
// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1)
def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)),
bb:$offset),
- (JMP_f (CMPGTri (i32 IntRegs:$src1),
+ (J2_jumpf (C2_cmpgti (i32 IntRegs:$src1),
(DEC_CONST_SIGNED s8ImmPred:$src2)), bb:$offset)>;
// cmp.lt(r0, r1) -> cmp.gt(r1, r0)
def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
bb:$offset),
- (JMP_t (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)), bb:$offset)>;
+ (J2_jumpt (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)), bb:$offset)>;
def : Pat <(brcond (i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
bb:$offset),
- (JMP_f (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)),
+ (J2_jumpf (C2_cmpgtup (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)),
bb:$offset)>;
def : Pat <(brcond (i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
bb:$offset),
- (JMP_f (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+ (J2_jumpf (C2_cmpgtu (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
bb:$offset)>;
def : Pat <(brcond (i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
bb:$offset),
- (JMP_f (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
+ (J2_jumpf (C2_cmpgtup (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
bb:$offset)>;
// Map from a 64-bit select to an emulated 64-bit mux.
// Hexagon does not support 64-bit MUXes; so emulate with combines.
def : Pat <(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2),
(i64 DoubleRegs:$src3)),
- (i64 (COMBINE_rr (i32 (MUX_rr (i1 PredRegs:$src1),
+ (i64 (A2_combinew (i32 (C2_mux (i1 PredRegs:$src1),
(i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
subreg_hireg)),
(i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src3),
subreg_hireg)))),
- (i32 (MUX_rr (i1 PredRegs:$src1),
+ (i32 (C2_mux (i1 PredRegs:$src1),
(i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
subreg_loreg)),
(i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src3),
@@ -2290,12 +4217,12 @@ def : Pat <(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2),
// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3).
def : Pat <(select (i1 PredRegs:$src1), (i1 PredRegs:$src2),
(i1 PredRegs:$src3)),
- (OR_pp (AND_pp (i1 PredRegs:$src1), (i1 PredRegs:$src2)),
- (AND_pp (NOT_p (i1 PredRegs:$src1)), (i1 PredRegs:$src3)))>;
+ (C2_or (C2_and (i1 PredRegs:$src1), (i1 PredRegs:$src2)),
+ (C2_and (C2_not (i1 PredRegs:$src1)), (i1 PredRegs:$src3)))>;
// Map Pd = load(addr) -> Rs = load(addr); Pd = Rs.
def : Pat<(i1 (load ADDRriS11_2:$addr)),
- (i1 (TFR_PdRs (i32 (LDrib ADDRriS11_2:$addr))))>;
+ (i1 (C2_tfrrp (i32 (L2_loadrb_io AddrFI:$addr, 0))))>;
// Map for truncating from 64 immediates to 32 bit immediates.
def : Pat<(i32 (trunc (i64 DoubleRegs:$src))),
@@ -2303,263 +4230,263 @@ def : Pat<(i32 (trunc (i64 DoubleRegs:$src))),
// Map for truncating from i64 immediates to i1 bit immediates.
def : Pat<(i1 (trunc (i64 DoubleRegs:$src))),
- (i1 (TFR_PdRs (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
+ (i1 (C2_tfrrp (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
subreg_loreg))))>;
// Map memb(Rs) = Rdd -> memb(Rs) = Rt.
def : Pat<(truncstorei8 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
- (STrib ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
+ (S2_storerb_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
subreg_loreg)))>;
// Map memh(Rs) = Rdd -> memh(Rs) = Rt.
def : Pat<(truncstorei16 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
- (STrih ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
+ (S2_storerh_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
subreg_loreg)))>;
// Map memw(Rs) = Rdd -> memw(Rs) = Rt
def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
- (STriw ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
+ (S2_storeri_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
subreg_loreg)))>;
// Map memw(Rs) = Rdd -> memw(Rs) = Rt.
def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
- (STriw ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
+ (S2_storeri_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
subreg_loreg)))>;
// Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0.
def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
- (STrib ADDRriS11_2:$addr, (TFRI 1))>;
+ (S2_storerb_io AddrFI:$addr, 0, (A2_tfrsi 1))>;
// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0.
def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
- (STrib ADDRriS11_2:$addr, (TFRI 1))>;
+ (S2_storerb_io AddrFI:$addr, 0, (A2_tfrsi 1))>;
// Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt.
def : Pat<(store (i1 PredRegs:$src1), ADDRriS11_2:$addr),
- (STrib ADDRriS11_2:$addr, (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0)) )>;
+ (S2_storerb_io AddrFI:$addr, 0, (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0)) )>;
-// Map Rdd = anyext(Rs) -> Rdd = sxtw(Rs).
+// Map Rdd = anyext(Rs) -> Rdd = A2_sxtw(Rs).
// Hexagon_TODO: We can probably use combine but that will cost 2 instructions.
// Better way to do this?
def : Pat<(i64 (anyext (i32 IntRegs:$src1))),
- (i64 (SXTW (i32 IntRegs:$src1)))>;
+ (i64 (A2_sxtw (i32 IntRegs:$src1)))>;
// Map cmple -> cmpgt.
// rs <= rt -> !(rs > rt).
def : Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)),
- (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), s10ExtPred:$src2)))>;
+ (i1 (C2_not (C2_cmpgti (i32 IntRegs:$src1), s10ExtPred:$src2)))>;
// rs <= rt -> !(rs > rt).
def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- (i1 (NOT_p (CMPGTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
+ (i1 (C2_not (C2_cmpgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
// Rss <= Rtt -> !(Rss > Rtt).
def : Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- (i1 (NOT_p (CMPGT64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>;
+ (i1 (C2_not (C2_cmpgtp (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>;
// Map cmpne -> cmpeq.
// Hexagon_TODO: We should improve on this.
// rs != rt -> !(rs == rt).
def : Pat <(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)),
- (i1 (NOT_p(i1 (CMPEQri (i32 IntRegs:$src1), s10ExtPred:$src2))))>;
+ (i1 (C2_not(i1 (C2_cmpeqi (i32 IntRegs:$src1), s10ExtPred:$src2))))>;
// Map cmpne(Rs) -> !cmpeqe(Rs).
// rs != rt -> !(rs == rt).
def : Pat <(i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- (i1 (NOT_p (i1 (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)))))>;
+ (i1 (C2_not (i1 (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)))))>;
// Convert setne back to xor for hexagon since we compute w/ pred registers.
def : Pat <(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))),
- (i1 (XOR_pp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
+ (i1 (C2_xor (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
// Map cmpne(Rss) -> !cmpew(Rss).
// rs != rt -> !(rs == rt).
def : Pat <(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- (i1 (NOT_p (i1 (CMPEHexagon4rr (i64 DoubleRegs:$src1),
+ (i1 (C2_not (i1 (C2_cmpeqp (i64 DoubleRegs:$src1),
(i64 DoubleRegs:$src2)))))>;
// Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt).
// rs >= rt -> !(rt > rs).
def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- (i1 (NOT_p (i1 (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>;
+ (i1 (C2_not (i1 (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>;
// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1)
def : Pat <(i1 (setge (i32 IntRegs:$src1), s8ExtPred:$src2)),
- (i1 (CMPGTri (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2)))>;
+ (i1 (C2_cmpgti (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2)))>;
// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
// rss >= rtt -> !(rtt > rss).
def : Pat <(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- (i1 (NOT_p (i1 (CMPGT64rr (i64 DoubleRegs:$src2),
+ (i1 (C2_not (i1 (C2_cmpgtp (i64 DoubleRegs:$src2),
(i64 DoubleRegs:$src1)))))>;
// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1).
// rs < rt -> !(rs >= rt).
def : Pat <(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)),
- (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2))))>;
+ (i1 (C2_not (C2_cmpgti (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2))))>;
// Map cmplt(Rs, Rt) -> cmpgt(Rt, Rs).
// rs < rt -> rt > rs.
// We can let assembler map it, or we can do in the compiler itself.
def : Pat <(i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- (i1 (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>;
+ (i1 (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>;
// Map cmplt(Rss, Rtt) -> cmpgt(Rtt, Rss).
// rss < rtt -> (rtt > rss).
def : Pat <(i1 (setlt (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- (i1 (CMPGT64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>;
+ (i1 (C2_cmpgtp (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>;
// Map from cmpltu(Rs, Rd) -> cmpgtu(Rd, Rs)
// rs < rt -> rt > rs.
// We can let assembler map it, or we can do in the compiler itself.
def : Pat <(i1 (setult (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- (i1 (CMPGTUrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>;
+ (i1 (C2_cmpgtu (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>;
// Map from cmpltu(Rss, Rdd) -> cmpgtu(Rdd, Rss).
// rs < rt -> rt > rs.
def : Pat <(i1 (setult (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- (i1 (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>;
+ (i1 (C2_cmpgtup (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>;
// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs)
def : Pat <(i1 (setuge (i32 IntRegs:$src1), 0)),
- (i1 (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src1)))>;
+ (i1 (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src1)))>;
// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1)
def : Pat <(i1 (setuge (i32 IntRegs:$src1), u8ExtPred:$src2)),
- (i1 (CMPGTUri (i32 IntRegs:$src1), (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>;
+ (i1 (C2_cmpgtui (i32 IntRegs:$src1), (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>;
// Generate cmpgtu(Rs, #u9)
def : Pat <(i1 (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)),
- (i1 (CMPGTUri (i32 IntRegs:$src1), u9ExtPred:$src2))>;
+ (i1 (C2_cmpgtui (i32 IntRegs:$src1), u9ExtPred:$src2))>;
// Map from Rs >= Rt -> !(Rt > Rs).
// rs >= rt -> !(rt > rs).
def : Pat <(i1 (setuge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- (i1 (NOT_p (CMPGTUrr (i32 IntRegs:$src2), (i32 IntRegs:$src1))))>;
+ (i1 (C2_not (C2_cmpgtu (i32 IntRegs:$src2), (i32 IntRegs:$src1))))>;
// Map from Rs >= Rt -> !(Rt > Rs).
// rs >= rt -> !(rt > rs).
def : Pat <(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- (i1 (NOT_p (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1))))>;
+ (i1 (C2_not (C2_cmpgtup (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1))))>;
// Map from cmpleu(Rs, Rt) -> !cmpgtu(Rs, Rt).
// Map from (Rs <= Rt) -> !(Rs > Rt).
def : Pat <(i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- (i1 (NOT_p (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
+ (i1 (C2_not (C2_cmpgtu (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1).
// Map from (Rs <= Rt) -> !(Rs > Rt).
def : Pat <(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- (i1 (NOT_p (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>;
+ (i1 (C2_not (C2_cmpgtup (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>;
// Sign extends.
// i1 -> i32
def : Pat <(i32 (sext (i1 PredRegs:$src1))),
- (i32 (MUX_ii (i1 PredRegs:$src1), -1, 0))>;
+ (i32 (C2_muxii (i1 PredRegs:$src1), -1, 0))>;
// i1 -> i64
def : Pat <(i64 (sext (i1 PredRegs:$src1))),
- (i64 (COMBINE_rr (TFRI -1), (MUX_ii (i1 PredRegs:$src1), -1, 0)))>;
+ (i64 (A2_combinew (A2_tfrsi -1), (C2_muxii (i1 PredRegs:$src1), -1, 0)))>;
// Convert sign-extended load back to load and sign extend.
// i8 -> i64
def: Pat <(i64 (sextloadi8 ADDRriS11_0:$src1)),
- (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>;
+ (i64 (A2_sxtw (L2_loadrb_io AddrFI:$src1, 0)))>;
// Convert any-extended load back to load and sign extend.
// i8 -> i64
def: Pat <(i64 (extloadi8 ADDRriS11_0:$src1)),
- (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>;
+ (i64 (A2_sxtw (L2_loadrb_io AddrFI:$src1, 0)))>;
// Convert sign-extended load back to load and sign extend.
// i16 -> i64
def: Pat <(i64 (sextloadi16 ADDRriS11_1:$src1)),
- (i64 (SXTW (LDrih ADDRriS11_1:$src1)))>;
+ (i64 (A2_sxtw (L2_loadrh_io AddrFI:$src1, 0)))>;
// Convert sign-extended load back to load and sign extend.
// i32 -> i64
def: Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)),
- (i64 (SXTW (LDriw ADDRriS11_2:$src1)))>;
+ (i64 (A2_sxtw (L2_loadri_io AddrFI:$src1, 0)))>;
// Zero extends.
// i1 -> i32
def : Pat <(i32 (zext (i1 PredRegs:$src1))),
- (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))>;
+ (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0))>;
// i1 -> i64
def : Pat <(i64 (zext (i1 PredRegs:$src1))),
- (i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>,
+ (i64 (A2_combinew (A2_tfrsi 0), (C2_muxii (i1 PredRegs:$src1), 1, 0)))>,
Requires<[NoV4T]>;
// i32 -> i64
def : Pat <(i64 (zext (i32 IntRegs:$src1))),
- (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>,
+ (i64 (A2_combinew (A2_tfrsi 0), (i32 IntRegs:$src1)))>,
Requires<[NoV4T]>;
// i8 -> i64
def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
- (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>,
+ (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrub_io AddrFI:$src1, 0)))>,
Requires<[NoV4T]>;
let AddedComplexity = 20 in
def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
s11_0ExtPred:$offset))),
- (i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1,
+ (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrub_io IntRegs:$src1,
s11_0ExtPred:$offset)))>,
Requires<[NoV4T]>;
// i1 -> i64
def: Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)),
- (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>,
+ (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrub_io AddrFI:$src1, 0)))>,
Requires<[NoV4T]>;
let AddedComplexity = 20 in
def: Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1),
s11_0ExtPred:$offset))),
- (i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1,
+ (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrub_io IntRegs:$src1,
s11_0ExtPred:$offset)))>,
Requires<[NoV4T]>;
// i16 -> i64
def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
- (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>,
+ (i64 (A2_combinew (A2_tfrsi 0), (L2_loadruh_io AddrFI:$src1, 0)))>,
Requires<[NoV4T]>;
let AddedComplexity = 20 in
def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
s11_1ExtPred:$offset))),
- (i64 (COMBINE_rr (TFRI 0), (LDriuh_indexed IntRegs:$src1,
+ (i64 (A2_combinew (A2_tfrsi 0), (L2_loadruh_io IntRegs:$src1,
s11_1ExtPred:$offset)))>,
Requires<[NoV4T]>;
// i32 -> i64
def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
- (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
+ (i64 (A2_combinew (A2_tfrsi 0), (L2_loadri_io AddrFI:$src1, 0)))>,
Requires<[NoV4T]>;
let AddedComplexity = 100 in
def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
- (i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1,
+ (i64 (A2_combinew (A2_tfrsi 0), (L2_loadri_io IntRegs:$src1,
s11_2ExtPred:$offset)))>,
Requires<[NoV4T]>;
let AddedComplexity = 10 in
def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
- (i32 (LDriw ADDRriS11_0:$src1))>;
+ (i32 (L2_loadri_io AddrFI:$src1, 0))>;
// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
def : Pat <(i32 (zext (i1 PredRegs:$src1))),
- (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))>;
+ (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0))>;
// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
def : Pat <(i32 (anyext (i1 PredRegs:$src1))),
- (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))>;
+ (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0))>;
-// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
+// Map from Rss = Pd to Rdd = A2_sxtw (mux(Pd, #1, #0))
def : Pat <(i64 (anyext (i1 PredRegs:$src1))),
- (i64 (SXTW (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))))>;
+ (i64 (A2_sxtw (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0))))>;
let AddedComplexity = 100 in
@@ -2567,20 +4494,20 @@ def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
(i32 32))),
(i64 (zextloadi32 (i32 (add IntRegs:$src2,
s11_2ExtPred:$offset2)))))),
- (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
- (LDriw_indexed IntRegs:$src2,
+ (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+ (L2_loadri_io IntRegs:$src2,
s11_2ExtPred:$offset2)))>;
def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
(i32 32))),
(i64 (zextloadi32 ADDRriS11_2:$srcLow)))),
- (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
- (LDriw ADDRriS11_2:$srcLow)))>;
+ (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+ (L2_loadri_io AddrFI:$srcLow, 0)))>;
def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
(i32 32))),
(i64 (zext (i32 IntRegs:$srcLow))))),
- (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+ (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
IntRegs:$srcLow))>;
let AddedComplexity = 100 in
@@ -2588,26 +4515,26 @@ def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
(i32 32))),
(i64 (zextloadi32 (i32 (add IntRegs:$src2,
s11_2ExtPred:$offset2)))))),
- (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
- (LDriw_indexed IntRegs:$src2,
+ (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+ (L2_loadri_io IntRegs:$src2,
s11_2ExtPred:$offset2)))>;
def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
(i32 32))),
(i64 (zextloadi32 ADDRriS11_2:$srcLow)))),
- (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
- (LDriw ADDRriS11_2:$srcLow)))>;
+ (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+ (L2_loadri_io AddrFI:$srcLow, 0)))>;
def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
(i32 32))),
(i64 (zext (i32 IntRegs:$srcLow))))),
- (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+ (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
IntRegs:$srcLow))>;
// Any extended 64-bit load.
// anyext i32 -> i64
def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
- (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
+ (i64 (A2_combinew (A2_tfrsi 0), (L2_loadri_io AddrFI:$src1, 0)))>,
Requires<[NoV4T]>;
// When there is an offset we should prefer the pattern below over the pattern above.
@@ -2622,50 +4549,51 @@ def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
// ********************************************
let AddedComplexity = 100 in
def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
- (i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1,
+ (i64 (A2_combinew (A2_tfrsi 0), (L2_loadri_io IntRegs:$src1,
s11_2ExtPred:$offset)))>,
Requires<[NoV4T]>;
// anyext i16 -> i64.
def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
- (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>,
+ (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrh_io AddrFI:$src1, 0)))>,
Requires<[NoV4T]>;
let AddedComplexity = 20 in
def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
s11_1ExtPred:$offset))),
- (i64 (COMBINE_rr (TFRI 0), (LDrih_indexed IntRegs:$src1,
+ (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrh_io IntRegs:$src1,
s11_1ExtPred:$offset)))>,
Requires<[NoV4T]>;
// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs).
def : Pat<(i64 (zext (i32 IntRegs:$src1))),
- (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>,
+ (i64 (A2_combinew (A2_tfrsi 0), (i32 IntRegs:$src1)))>,
Requires<[NoV4T]>;
// Multiply 64-bit unsigned and use upper result.
def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
(i64
- (MPYU64_acc
+ (M2_dpmpyuu_acc_s0
(i64
- (COMBINE_rr
- (TFRI 0),
+ (A2_combinew
+ (A2_tfrsi 0),
(i32
(EXTRACT_SUBREG
(i64
- (LSRd_ri
+ (S2_lsr_i_p
(i64
- (MPYU64_acc
+ (M2_dpmpyuu_acc_s0
(i64
- (MPYU64_acc
+ (M2_dpmpyuu_acc_s0
(i64
- (COMBINE_rr (TFRI 0),
+ (A2_combinew (A2_tfrsi 0),
(i32
(EXTRACT_SUBREG
(i64
- (LSRd_ri
+ (S2_lsr_i_p
(i64
- (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
+ (M2_dpmpyuu_s0
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
subreg_loreg)),
(i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
subreg_loreg)))), 32)),
@@ -2681,25 +4609,26 @@ def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
// Multiply 64-bit signed and use upper result.
def : Pat <(mulhs (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
(i64
- (MPY64_acc
+ (M2_dpmpyss_acc_s0
(i64
- (COMBINE_rr (TFRI 0),
+ (A2_combinew (A2_tfrsi 0),
(i32
(EXTRACT_SUBREG
(i64
- (LSRd_ri
+ (S2_lsr_i_p
(i64
- (MPY64_acc
+ (M2_dpmpyss_acc_s0
(i64
- (MPY64_acc
+ (M2_dpmpyss_acc_s0
(i64
- (COMBINE_rr (TFRI 0),
+ (A2_combinew (A2_tfrsi 0),
(i32
(EXTRACT_SUBREG
(i64
- (LSRd_ri
+ (S2_lsr_i_p
(i64
- (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
+ (M2_dpmpyuu_s0
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
subreg_loreg)),
(i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
subreg_loreg)))), 32)),
@@ -2744,83 +4673,470 @@ def : Pat<(HexagonWrapperJT tjumptable:$dst),
(i32 (CONST32_set_jt tjumptable:$dst))>;
// XTYPE/SHIFT
-
-// Multi-class for logical operators :
+//
+//===----------------------------------------------------------------------===//
+// Template Class
// Shift by immediate/register and accumulate/logical
-multiclass xtype_imm<string OpcStr, SDNode OpNode1, SDNode OpNode2> {
- def _ri : SInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u5Imm:$src3),
- !strconcat("$dst ", !strconcat(OpcStr, "($src2, #$src3)")),
- [(set (i32 IntRegs:$dst),
- (OpNode2 (i32 IntRegs:$src1),
- (OpNode1 (i32 IntRegs:$src2),
- u5ImmPred:$src3)))],
- "$src1 = $dst">;
-
- def d_ri : SInst_acc<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, DoubleRegs:$src2, u6Imm:$src3),
- !strconcat("$dst ", !strconcat(OpcStr, "($src2, #$src3)")),
- [(set (i64 DoubleRegs:$dst), (OpNode2 (i64 DoubleRegs:$src1),
- (OpNode1 (i64 DoubleRegs:$src2), u6ImmPred:$src3)))],
- "$src1 = $dst">;
-}
-
-// Multi-class for logical operators :
-// Shift by register and accumulate/logical (32/64 bits)
-multiclass xtype_reg<string OpcStr, SDNode OpNode1, SDNode OpNode2> {
- def _rr : SInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- !strconcat("$dst ", !strconcat(OpcStr, "($src2, $src3)")),
- [(set (i32 IntRegs:$dst),
- (OpNode2 (i32 IntRegs:$src1),
- (OpNode1 (i32 IntRegs:$src2),
- (i32 IntRegs:$src3))))],
- "$src1 = $dst">;
+//===----------------------------------------------------------------------===//
+
+// Rx[+-&|]=asr(Rs,#u5)
+// Rx[+-&|^]=lsr(Rs,#u5)
+// Rx[+-&|^]=asl(Rs,#u5)
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_shift_imm_acc_r <string opc1, string opc2, SDNode OpNode1,
+ SDNode OpNode2, bits<3> majOp, bits<2> minOp>
+ : SInst_acc<(outs IntRegs:$Rx),
+ (ins IntRegs:$src1, IntRegs:$Rs, u5Imm:$u5),
+ "$Rx "#opc2#opc1#"($Rs, #$u5)",
+ [(set (i32 IntRegs:$Rx),
+ (OpNode2 (i32 IntRegs:$src1),
+ (OpNode1 (i32 IntRegs:$Rs), u5ImmPred:$u5)))],
+ "$src1 = $Rx", S_2op_tc_2_SLOT23> {
+ bits<5> Rx;
+ bits<5> Rs;
+ bits<5> u5;
+
+ let IClass = 0b1000;
+
+ let Inst{27-24} = 0b1110;
+ let Inst{23-22} = majOp{2-1};
+ let Inst{13} = 0b0;
+ let Inst{7} = majOp{0};
+ let Inst{6-5} = minOp;
+ let Inst{4-0} = Rx;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = u5;
+ }
+
+// Rx[+-&|]=asr(Rs,Rt)
+// Rx[+-&|^]=lsr(Rs,Rt)
+// Rx[+-&|^]=asl(Rs,Rt)
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_shift_reg_acc_r <string opc1, string opc2, SDNode OpNode1,
+ SDNode OpNode2, bits<2> majOp, bits<2> minOp>
+ : SInst_acc<(outs IntRegs:$Rx),
+ (ins IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt),
+ "$Rx "#opc2#opc1#"($Rs, $Rt)",
+ [(set (i32 IntRegs:$Rx),
+ (OpNode2 (i32 IntRegs:$src1),
+ (OpNode1 (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))],
+ "$src1 = $Rx", S_3op_tc_2_SLOT23 > {
+ bits<5> Rx;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1100;
+
+ let Inst{27-24} = 0b1100;
+ let Inst{23-22} = majOp;
+ let Inst{7-6} = minOp;
+ let Inst{4-0} = Rx;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ }
- def d_rr : SInst_acc<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- !strconcat("$dst ", !strconcat(OpcStr, "($src2, $src3)")),
- [(set (i64 DoubleRegs:$dst),
- (OpNode2 (i64 DoubleRegs:$src1),
- (OpNode1 (i64 DoubleRegs:$src2),
- (i32 IntRegs:$src3))))],
- "$src1 = $dst">;
+// Rxx[+-&|]=asr(Rss,#u6)
+// Rxx[+-&|^]=lsr(Rss,#u6)
+// Rxx[+-&|^]=asl(Rss,#u6)
+
+class T_shift_imm_acc_p <string opc1, string opc2, SDNode OpNode1,
+ SDNode OpNode2, bits<3> majOp, bits<2> minOp>
+ : SInst_acc<(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$src1, DoubleRegs:$Rss, u6Imm:$u6),
+ "$Rxx "#opc2#opc1#"($Rss, #$u6)",
+ [(set (i64 DoubleRegs:$Rxx),
+ (OpNode2 (i64 DoubleRegs:$src1),
+ (OpNode1 (i64 DoubleRegs:$Rss), u6ImmPred:$u6)))],
+ "$src1 = $Rxx", S_2op_tc_2_SLOT23> {
+ bits<5> Rxx;
+ bits<5> Rss;
+ bits<6> u6;
+
+ let IClass = 0b1000;
+
+ let Inst{27-24} = 0b0010;
+ let Inst{23-22} = majOp{2-1};
+ let Inst{7} = majOp{0};
+ let Inst{6-5} = minOp;
+ let Inst{4-0} = Rxx;
+ let Inst{20-16} = Rss;
+ let Inst{13-8} = u6;
+ }
+
+
+// Rxx[+-&|]=asr(Rss,Rt)
+// Rxx[+-&|^]=lsr(Rss,Rt)
+// Rxx[+-&|^]=asl(Rss,Rt)
+// Rxx[+-&|^]=lsl(Rss,Rt)
+
+class T_shift_reg_acc_p <string opc1, string opc2, SDNode OpNode1,
+ SDNode OpNode2, bits<3> majOp, bits<2> minOp>
+ : SInst_acc<(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$src1, DoubleRegs:$Rss, IntRegs:$Rt),
+ "$Rxx "#opc2#opc1#"($Rss, $Rt)",
+ [(set (i64 DoubleRegs:$Rxx),
+ (OpNode2 (i64 DoubleRegs:$src1),
+ (OpNode1 (i64 DoubleRegs:$Rss), (i32 IntRegs:$Rt))))],
+ "$src1 = $Rxx", S_3op_tc_2_SLOT23> {
+ bits<5> Rxx;
+ bits<5> Rss;
+ bits<5> Rt;
+
+ let IClass = 0b1100;
+
+ let Inst{27-24} = 0b1011;
+ let Inst{23-21} = majOp;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rt;
+ let Inst{7-6} = minOp;
+ let Inst{4-0} = Rxx;
+ }
+
+//===----------------------------------------------------------------------===//
+// Multi-class for the shift instructions with logical/arithmetic operators.
+//===----------------------------------------------------------------------===//
+multiclass xtype_imm_base<string OpcStr1, string OpcStr2, SDNode OpNode1,
+ SDNode OpNode2, bits<3> majOp, bits<2> minOp > {
+ def _i_r#NAME : T_shift_imm_acc_r< OpcStr1, OpcStr2, OpNode1,
+ OpNode2, majOp, minOp >;
+ def _i_p#NAME : T_shift_imm_acc_p< OpcStr1, OpcStr2, OpNode1,
+ OpNode2, majOp, minOp >;
}
-multiclass basic_xtype_imm<string OpcStr, SDNode OpNode> {
-let AddedComplexity = 100 in
- defm _ADD : xtype_imm< !strconcat("+= ", OpcStr), OpNode, add>;
- defm _SUB : xtype_imm< !strconcat("-= ", OpcStr), OpNode, sub>;
- defm _AND : xtype_imm< !strconcat("&= ", OpcStr), OpNode, and>;
- defm _OR : xtype_imm< !strconcat("|= ", OpcStr), OpNode, or>;
+multiclass xtype_imm_acc<string opc1, SDNode OpNode, bits<2>minOp> {
+ let AddedComplexity = 100 in
+ defm _acc : xtype_imm_base< opc1, "+= ", OpNode, add, 0b001, minOp>;
+
+ defm _nac : xtype_imm_base< opc1, "-= ", OpNode, sub, 0b000, minOp>;
+ defm _and : xtype_imm_base< opc1, "&= ", OpNode, and, 0b010, minOp>;
+ defm _or : xtype_imm_base< opc1, "|= ", OpNode, or, 0b011, minOp>;
}
-multiclass basic_xtype_reg<string OpcStr, SDNode OpNode> {
+multiclass xtype_xor_imm_acc<string opc1, SDNode OpNode, bits<2>minOp> {
let AddedComplexity = 100 in
- defm _ADD : xtype_reg< !strconcat("+= ", OpcStr), OpNode, add>;
- defm _SUB : xtype_reg< !strconcat("-= ", OpcStr), OpNode, sub>;
- defm _AND : xtype_reg< !strconcat("&= ", OpcStr), OpNode, and>;
- defm _OR : xtype_reg< !strconcat("|= ", OpcStr), OpNode, or>;
+ defm _xacc : xtype_imm_base< opc1, "^= ", OpNode, xor, 0b100, minOp>;
}
-multiclass xtype_xor_imm<string OpcStr, SDNode OpNode> {
-let AddedComplexity = 100 in
- defm _XOR : xtype_imm< !strconcat("^= ", OpcStr), OpNode, xor>;
+let isCodeGenOnly = 0 in {
+defm S2_asr : xtype_imm_acc<"asr", sra, 0b00>;
+
+defm S2_lsr : xtype_imm_acc<"lsr", srl, 0b01>,
+ xtype_xor_imm_acc<"lsr", srl, 0b01>;
+
+defm S2_asl : xtype_imm_acc<"asl", shl, 0b10>,
+ xtype_xor_imm_acc<"asl", shl, 0b10>;
}
-defm ASL : basic_xtype_imm<"asl", shl>, basic_xtype_reg<"asl", shl>,
- xtype_xor_imm<"asl", shl>;
+multiclass xtype_reg_acc_r<string opc1, SDNode OpNode, bits<2>minOp> {
+ let AddedComplexity = 100 in
+ def _acc : T_shift_reg_acc_r <opc1, "+= ", OpNode, add, 0b11, minOp>;
-defm LSR : basic_xtype_imm<"lsr", srl>, basic_xtype_reg<"lsr", srl>,
- xtype_xor_imm<"lsr", srl>;
+ def _nac : T_shift_reg_acc_r <opc1, "-= ", OpNode, sub, 0b10, minOp>;
+ def _and : T_shift_reg_acc_r <opc1, "&= ", OpNode, and, 0b01, minOp>;
+ def _or : T_shift_reg_acc_r <opc1, "|= ", OpNode, or, 0b00, minOp>;
+}
-defm ASR : basic_xtype_imm<"asr", sra>, basic_xtype_reg<"asr", sra>;
-defm LSL : basic_xtype_reg<"lsl", shl>;
+multiclass xtype_reg_acc_p<string opc1, SDNode OpNode, bits<2>minOp> {
+ let AddedComplexity = 100 in
+ def _acc : T_shift_reg_acc_p <opc1, "+= ", OpNode, add, 0b110, minOp>;
+
+ def _nac : T_shift_reg_acc_p <opc1, "-= ", OpNode, sub, 0b100, minOp>;
+ def _and : T_shift_reg_acc_p <opc1, "&= ", OpNode, and, 0b010, minOp>;
+ def _or : T_shift_reg_acc_p <opc1, "|= ", OpNode, or, 0b000, minOp>;
+ def _xor : T_shift_reg_acc_p <opc1, "^= ", OpNode, xor, 0b011, minOp>;
+}
+
+multiclass xtype_reg_acc<string OpcStr, SDNode OpNode, bits<2> minOp > {
+ defm _r_r : xtype_reg_acc_r <OpcStr, OpNode, minOp>;
+ defm _r_p : xtype_reg_acc_p <OpcStr, OpNode, minOp>;
+}
+
+let isCodeGenOnly = 0 in {
+defm S2_asl : xtype_reg_acc<"asl", shl, 0b10>;
+defm S2_asr : xtype_reg_acc<"asr", sra, 0b00>;
+defm S2_lsr : xtype_reg_acc<"lsr", srl, 0b01>;
+defm S2_lsl : xtype_reg_acc<"lsl", shl, 0b11>;
+}
+
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_S3op_1 <string mnemonic, RegisterClass RC, bits<2> MajOp, bits<3> MinOp,
+ bit SwapOps, bit isSat = 0, bit isRnd = 0, bit hasShift = 0>
+ : SInst <(outs RC:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ "$dst = "#mnemonic#"($src1, $src2)"#!if(isRnd, ":rnd", "")
+ #!if(hasShift,":>>1","")
+ #!if(isSat, ":sat", ""),
+ [], "", S_3op_tc_2_SLOT23 > {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+
+ let IClass = 0b1100;
+
+ let Inst{27-24} = 0b0001;
+ let Inst{23-22} = MajOp;
+ let Inst{20-16} = !if (SwapOps, src2, src1);
+ let Inst{12-8} = !if (SwapOps, src1, src2);
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = dst;
+ }
+
+class T_S3op_64 <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit SwapOps,
+ bit isSat = 0, bit isRnd = 0, bit hasShift = 0 >
+ : T_S3op_1 <mnemonic, DoubleRegs, MajOp, MinOp, SwapOps,
+ isSat, isRnd, hasShift>;
+
+let isCodeGenOnly = 0 in
+def S2_lfsp : T_S3op_64 < "lfs", 0b10, 0b110, 0>;
+
+//===----------------------------------------------------------------------===//
+// Template class used by vector shift, vector rotate, vector neg,
+// 32-bit shift, 64-bit shifts, etc.
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0 in
+class T_S3op_3 <string mnemonic, RegisterClass RC, bits<2> MajOp,
+ bits<2> MinOp, bit isSat = 0, list<dag> pattern = [] >
+ : SInst <(outs RC:$dst),
+ (ins RC:$src1, IntRegs:$src2),
+ "$dst = "#mnemonic#"($src1, $src2)"#!if(isSat, ":sat", ""),
+ pattern, "", S_3op_tc_1_SLOT23> {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+
+ let IClass = 0b1100;
+
+ let Inst{27-24} = !if(!eq(!cast<string>(RC), "IntRegs"), 0b0110, 0b0011);
+ let Inst{23-22} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{12-8} = src2;
+ let Inst{7-6} = MinOp;
+ let Inst{4-0} = dst;
+ }
+
+let hasNewValue = 1 in
+class T_S3op_shift32 <string mnemonic, SDNode OpNode, bits<2> MinOp>
+ : T_S3op_3 <mnemonic, IntRegs, 0b01, MinOp, 0,
+ [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
+
+let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in
+class T_S3op_shift32_Sat <string mnemonic, bits<2> MinOp>
+ : T_S3op_3 <mnemonic, IntRegs, 0b00, MinOp, 1, []>;
+
+
+class T_S3op_shift64 <string mnemonic, SDNode OpNode, bits<2> MinOp>
+ : T_S3op_3 <mnemonic, DoubleRegs, 0b10, MinOp, 0,
+ [(set (i64 DoubleRegs:$dst), (OpNode (i64 DoubleRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
+
+
+class T_S3op_shiftVect <string mnemonic, bits<2> MajOp, bits<2> MinOp>
+ : T_S3op_3 <mnemonic, DoubleRegs, MajOp, MinOp, 0, []>;
+
+
+// Shift by register
+// Rdd=[asr|lsr|asl|lsl](Rss,Rt)
+
+let isCodeGenOnly = 0 in {
+def S2_asr_r_p : T_S3op_shift64 < "asr", sra, 0b00>;
+def S2_lsr_r_p : T_S3op_shift64 < "lsr", srl, 0b01>;
+def S2_asl_r_p : T_S3op_shift64 < "asl", shl, 0b10>;
+def S2_lsl_r_p : T_S3op_shift64 < "lsl", shl, 0b11>;
+}
+
+// Rd=[asr|lsr|asl|lsl](Rs,Rt)
+
+let isCodeGenOnly = 0 in {
+def S2_asr_r_r : T_S3op_shift32<"asr", sra, 0b00>;
+def S2_lsr_r_r : T_S3op_shift32<"lsr", srl, 0b01>;
+def S2_asl_r_r : T_S3op_shift32<"asl", shl, 0b10>;
+def S2_lsl_r_r : T_S3op_shift32<"lsl", shl, 0b11>;
+}
+
+// Shift by register with saturation
+// Rd=asr(Rs,Rt):sat
+// Rd=asl(Rs,Rt):sat
+
+let Defs = [USR_OVF], isCodeGenOnly = 0 in {
+ def S2_asr_r_r_sat : T_S3op_shift32_Sat<"asr", 0b00>;
+ def S2_asl_r_r_sat : T_S3op_shift32_Sat<"asl", 0b10>;
+}
+
+//===----------------------------------------------------------------------===//
+// Template class for 'insert bitfield' instructions
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_S3op_insert <string mnemonic, RegisterClass RC>
+ : SInst <(outs RC:$dst),
+ (ins RC:$src1, RC:$src2, DoubleRegs:$src3),
+ "$dst = "#mnemonic#"($src2, $src3)" ,
+ [], "$src1 = $dst", S_3op_tc_1_SLOT23 > {
+ bits<5> dst;
+ bits<5> src2;
+ bits<5> src3;
+
+ let IClass = 0b1100;
+
+ let Inst{27-26} = 0b10;
+ let Inst{25-24} = !if(!eq(!cast<string>(RC), "IntRegs"), 0b00, 0b10);
+ let Inst{23} = 0b0;
+ let Inst{20-16} = src2;
+ let Inst{12-8} = src3;
+ let Inst{4-0} = dst;
+ }
+
+let hasSideEffects = 0 in
+class T_S2op_insert <bits<4> RegTyBits, RegisterClass RC, Operand ImmOp>
+ : SInst <(outs RC:$dst), (ins RC:$dst2, RC:$src1, ImmOp:$src2, ImmOp:$src3),
+ "$dst = insert($src1, #$src2, #$src3)",
+ [], "$dst2 = $dst", S_2op_tc_2_SLOT23> {
+ bits<5> dst;
+ bits<5> src1;
+ bits<6> src2;
+ bits<6> src3;
+ bit bit23;
+ bit bit13;
+ string ImmOpStr = !cast<string>(ImmOp);
+
+ let bit23 = !if (!eq(ImmOpStr, "u6Imm"), src3{5}, 0);
+ let bit13 = !if (!eq(ImmOpStr, "u6Imm"), src2{5}, 0);
+
+ let IClass = 0b1000;
+
+ let Inst{27-24} = RegTyBits;
+ let Inst{23} = bit23;
+ let Inst{22-21} = src3{4-3};
+ let Inst{20-16} = src1;
+ let Inst{13} = bit13;
+ let Inst{12-8} = src2{4-0};
+ let Inst{7-5} = src3{2-0};
+ let Inst{4-0} = dst;
+ }
+
+// Rx=insert(Rs,Rtt)
+// Rx=insert(Rs,#u5,#U5)
+let hasNewValue = 1, isCodeGenOnly = 0 in {
+ def S2_insert_rp : T_S3op_insert <"insert", IntRegs>;
+ def S2_insert : T_S2op_insert <0b1111, IntRegs, u5Imm>;
+}
+
+// Rxx=insert(Rss,Rtt)
+// Rxx=insert(Rss,#u6,#U6)
+let isCodeGenOnly = 0 in {
+def S2_insertp_rp : T_S3op_insert<"insert", DoubleRegs>;
+def S2_insertp : T_S2op_insert <0b0011, DoubleRegs, u6Imm>;
+}
+
+//===----------------------------------------------------------------------===//
+// Template class for 'extract bitfield' instructions
+//===----------------------------------------------------------------------===//
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_S3op_extract <string mnemonic, bits<2> MinOp>
+ : SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rs, DoubleRegs:$Rtt),
+ "$Rd = "#mnemonic#"($Rs, $Rtt)",
+ [], "", S_3op_tc_2_SLOT23 > {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rtt;
+
+ let IClass = 0b1100;
+
+ let Inst{27-22} = 0b100100;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rtt;
+ let Inst{7-6} = MinOp;
+ let Inst{4-0} = Rd;
+ }
+
+let hasSideEffects = 0 in
+class T_S2op_extract <string mnemonic, bits<4> RegTyBits,
+ RegisterClass RC, Operand ImmOp>
+ : SInst <(outs RC:$dst), (ins RC:$src1, ImmOp:$src2, ImmOp:$src3),
+ "$dst = "#mnemonic#"($src1, #$src2, #$src3)",
+ [], "", S_2op_tc_2_SLOT23> {
+ bits<5> dst;
+ bits<5> src1;
+ bits<6> src2;
+ bits<6> src3;
+ bit bit23;
+ bit bit13;
+ string ImmOpStr = !cast<string>(ImmOp);
+
+ let bit23 = !if (!eq(ImmOpStr, "u6Imm"), src3{5},
+ !if (!eq(mnemonic, "extractu"), 0, 1));
+
+ let bit13 = !if (!eq(ImmOpStr, "u6Imm"), src2{5}, 0);
+
+ let IClass = 0b1000;
+
+ let Inst{27-24} = RegTyBits;
+ let Inst{23} = bit23;
+ let Inst{22-21} = src3{4-3};
+ let Inst{20-16} = src1;
+ let Inst{13} = bit13;
+ let Inst{12-8} = src2{4-0};
+ let Inst{7-5} = src3{2-0};
+ let Inst{4-0} = dst;
+ }
+
+// Extract bitfield
+
+// Rdd=extractu(Rss,Rtt)
+// Rdd=extractu(Rss,#u6,#U6)
+let isCodeGenOnly = 0 in {
+def S2_extractup_rp : T_S3op_64 < "extractu", 0b00, 0b000, 0>;
+def S2_extractup : T_S2op_extract <"extractu", 0b0001, DoubleRegs, u6Imm>;
+}
+
+// Rd=extractu(Rs,Rtt)
+// Rd=extractu(Rs,#u5,#U5)
+let hasNewValue = 1, isCodeGenOnly = 0 in {
+ def S2_extractu_rp : T_S3op_extract<"extractu", 0b00>;
+ def S2_extractu : T_S2op_extract <"extractu", 0b1101, IntRegs, u5Imm>;
+}
+
+//===----------------------------------------------------------------------===//
+// :raw for of tableindx[bdhw] insns
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class tableidxRaw<string OpStr, bits<2>MinOp>
+ : SInst <(outs IntRegs:$Rx),
+ (ins IntRegs:$_dst_, IntRegs:$Rs, u4Imm:$u4, s6Imm:$S6),
+ "$Rx = "#OpStr#"($Rs, #$u4, #$S6):raw",
+ [], "$Rx = $_dst_" > {
+ bits<5> Rx;
+ bits<5> Rs;
+ bits<4> u4;
+ bits<6> S6;
+
+ let IClass = 0b1000;
+
+ let Inst{27-24} = 0b0111;
+ let Inst{23-22} = MinOp;
+ let Inst{21} = u4{3};
+ let Inst{20-16} = Rs;
+ let Inst{13-8} = S6;
+ let Inst{7-5} = u4{2-0};
+ let Inst{4-0} = Rx;
+ }
+
+let isCodeGenOnly = 0 in {
+def S2_tableidxb : tableidxRaw<"tableidxb", 0b00>;
+def S2_tableidxh : tableidxRaw<"tableidxh", 0b01>;
+def S2_tableidxw : tableidxRaw<"tableidxw", 0b10>;
+def S2_tableidxd : tableidxRaw<"tableidxd", 0b11>;
+}
// Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
def : Pat <(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)),
- (i32 (MPYI_rin (i32 IntRegs:$src1), u8ImmPred:$src2))>;
+ (i32 (M2_mpysin (i32 IntRegs:$src1), u8ImmPred:$src2))>;
//===----------------------------------------------------------------------===//
// V3 Instructions +
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV3.td b/lib/Target/Hexagon/HexagonInstrInfoV3.td
index 7e75554e7fcd..8e9147600fa6 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV3.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV3.td
@@ -21,13 +21,52 @@ def callv3nr : SDNode<"HexagonISD::CALLv3nr", SDT_SPCall,
// J +
//===----------------------------------------------------------------------===//
// Call subroutine.
-let isCall = 1, neverHasSideEffects = 1,
- Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31,
- P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
- def CALLv3 : JInst<(outs), (ins calltarget:$dst),
- "call $dst", []>, Requires<[HasV3T]>;
+let isCall = 1, hasSideEffects = 1, validSubTargets = HasV3SubT,
+ Defs = VolatileV3.Regs, isPredicable = 1,
+ isExtended = 0, isExtendable = 1, opExtendable = 0,
+ isExtentSigned = 1, opExtentBits = 24, opExtentAlign = 2 in
+class T_Call<string ExtStr>
+ : JInst<(outs), (ins calltarget:$dst),
+ "call " # ExtStr # "$dst", [], "", J_tc_2early_SLOT23> {
+ let BaseOpcode = "call";
+ bits<24> dst;
+
+ let IClass = 0b0101;
+ let Inst{27-25} = 0b101;
+ let Inst{24-16,13-1} = dst{23-2};
+ let Inst{0} = 0b0;
+}
+
+let isCall = 1, hasSideEffects = 1, validSubTargets = HasV3SubT,
+ Defs = VolatileV3.Regs, isPredicated = 1,
+ isExtended = 0, isExtendable = 1, opExtendable = 1,
+ isExtentSigned = 1, opExtentBits = 17, opExtentAlign = 2 in
+class T_CallPred<bit IfTrue, string ExtStr>
+ : JInst<(outs), (ins PredRegs:$Pu, calltarget:$dst),
+ CondStr<"$Pu", IfTrue, 0>.S # "call " # ExtStr # "$dst",
+ [], "", J_tc_2early_SLOT23> {
+ let BaseOpcode = "call";
+ let isPredicatedFalse = !if(IfTrue,0,1);
+ bits<2> Pu;
+ bits<17> dst;
+
+ let IClass = 0b0101;
+ let Inst{27-24} = 0b1101;
+ let Inst{23-22,20-16,13,7-1} = dst{16-2};
+ let Inst{21} = !if(IfTrue,0,1);
+ let Inst{11} = 0b0;
+ let Inst{9-8} = Pu;
+}
+
+multiclass T_Calls<string ExtStr> {
+ def NAME : T_Call<ExtStr>;
+ def t : T_CallPred<1, ExtStr>;
+ def f : T_CallPred<0, ExtStr>;
}
+let isCodeGenOnly = 0 in
+defm J2_call: T_Calls<"">, PredRel;
+
//===----------------------------------------------------------------------===//
// J -
//===----------------------------------------------------------------------===//
@@ -37,7 +76,7 @@ let isCall = 1, neverHasSideEffects = 1,
// JR +
//===----------------------------------------------------------------------===//
// Call subroutine from register.
-let isCall = 1, neverHasSideEffects = 1,
+let isCall = 1, hasSideEffects = 0,
Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31,
P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
def CALLRv3 : JRInst<(outs), (ins IntRegs:$dst),
@@ -53,27 +92,69 @@ let isCall = 1, neverHasSideEffects = 1,
// ALU64/ALU +
//===----------------------------------------------------------------------===//
-let AddedComplexity = 200 in
-def MAXw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- DoubleRegs:$src2),
- "$dst = max($src2, $src1)",
- [(set (i64 DoubleRegs:$dst),
- (i64 (select (i1 (setlt (i64 DoubleRegs:$src2),
- (i64 DoubleRegs:$src1))),
- (i64 DoubleRegs:$src1),
- (i64 DoubleRegs:$src2))))]>,
-Requires<[HasV3T]>;
-
-let AddedComplexity = 200 in
-def MINw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- DoubleRegs:$src2),
- "$dst = min($src2, $src1)",
- [(set (i64 DoubleRegs:$dst),
- (i64 (select (i1 (setgt (i64 DoubleRegs:$src2),
- (i64 DoubleRegs:$src1))),
- (i64 DoubleRegs:$src1),
- (i64 DoubleRegs:$src2))))]>,
-Requires<[HasV3T]>;
+
+let Defs = [USR_OVF], Itinerary = ALU64_tc_2_SLOT23,
+ validSubTargets = HasV3SubT, isCodeGenOnly = 0 in
+def A2_addpsat : T_ALU64_arith<"add", 0b011, 0b101, 1, 0, 1>;
+
+class T_ALU64_addsp_hl<string suffix, bits<3> MinOp>
+ : T_ALU64_rr<"add", suffix, 0b0011, 0b011, MinOp, 0, 0, "">;
+
+let isCodeGenOnly = 0 in {
+def A2_addspl : T_ALU64_addsp_hl<":raw:lo", 0b110>;
+def A2_addsph : T_ALU64_addsp_hl<":raw:hi", 0b111>;
+}
+
+let hasSideEffects = 0, isCodeGenOnly = 0 in
+def A2_addsp : ALU64_rr<(outs DoubleRegs:$Rd),
+ (ins IntRegs:$Rs, DoubleRegs:$Rt), "$Rd = add($Rs, $Rt)",
+ [(set (i64 DoubleRegs:$Rd), (i64 (add (i64 (sext (i32 IntRegs:$Rs))),
+ (i64 DoubleRegs:$Rt))))],
+ "", ALU64_tc_1_SLOT23>;
+
+
+let hasSideEffects = 0 in
+class T_XTYPE_MIN_MAX_P<bit isMax, bit isUnsigned>
+ : ALU64Inst<(outs DoubleRegs:$Rd), (ins DoubleRegs:$Rt, DoubleRegs:$Rs),
+ "$Rd = "#!if(isMax,"max","min")#!if(isUnsigned,"u","")
+ #"($Rt, $Rs)", [], "", ALU64_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+
+ let Inst{27-23} = 0b00111;
+ let Inst{22-21} = !if(isMax, 0b10, 0b01);
+ let Inst{20-16} = !if(isMax, Rt, Rs);
+ let Inst{12-8} = !if(isMax, Rs, Rt);
+ let Inst{7} = 0b1;
+ let Inst{6} = !if(isMax, 0b0, 0b1);
+ let Inst{5} = isUnsigned;
+ let Inst{4-0} = Rd;
+}
+
+let isCodeGenOnly = 0 in {
+def A2_minp : T_XTYPE_MIN_MAX_P<0, 0>;
+def A2_minup : T_XTYPE_MIN_MAX_P<0, 1>;
+def A2_maxp : T_XTYPE_MIN_MAX_P<1, 0>;
+def A2_maxup : T_XTYPE_MIN_MAX_P<1, 1>;
+}
+
+multiclass MinMax_pats_p<PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
+ defm: T_MinMax_pats<Op, DoubleRegs, i64, Inst, SwapInst>;
+}
+
+let AddedComplexity = 200 in {
+ defm: MinMax_pats_p<setge, A2_maxp, A2_minp>;
+ defm: MinMax_pats_p<setgt, A2_maxp, A2_minp>;
+ defm: MinMax_pats_p<setle, A2_minp, A2_maxp>;
+ defm: MinMax_pats_p<setlt, A2_minp, A2_maxp>;
+ defm: MinMax_pats_p<setuge, A2_maxup, A2_minup>;
+ defm: MinMax_pats_p<setugt, A2_maxup, A2_minup>;
+ defm: MinMax_pats_p<setule, A2_minup, A2_maxup>;
+ defm: MinMax_pats_p<setult, A2_minup, A2_maxup>;
+}
//===----------------------------------------------------------------------===//
// ALU64/ALU -
@@ -100,8 +181,8 @@ Requires<[HasV3T]>;
// Map call instruction
def : Pat<(call (i32 IntRegs:$dst)),
- (CALLRv3 (i32 IntRegs:$dst))>, Requires<[HasV3T]>;
+ (J2_call (i32 IntRegs:$dst))>, Requires<[HasV3T]>;
def : Pat<(call tglobaladdr:$dst),
- (CALLv3 tglobaladdr:$dst)>, Requires<[HasV3T]>;
+ (J2_call tglobaladdr:$dst)>, Requires<[HasV3T]>;
def : Pat<(call texternalsym:$dst),
- (CALLv3 texternalsym:$dst)>, Requires<[HasV3T]>;
+ (J2_call texternalsym:$dst)>, Requires<[HasV3T]>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index db5b7eaa68f8..08bfd676fed8 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -11,15 +11,32 @@
//
//===----------------------------------------------------------------------===//
-let neverHasSideEffects = 1 in
-class T_Immext<dag ins> :
- EXTENDERInst<(outs), ins, "immext(#$imm)", []>,
- Requires<[HasV4T]>;
+let hasSideEffects = 0 in
+class T_Immext<Operand ImmType>
+ : EXTENDERInst<(outs), (ins ImmType:$imm),
+ "immext(#$imm)", []> {
+ bits<32> imm;
+ let IClass = 0b0000;
+
+ let Inst{27-16} = imm{31-20};
+ let Inst{13-0} = imm{19-6};
+ }
+
+def A4_ext : T_Immext<u26_6Imm>;
+let isCodeGenOnly = 1 in {
+ let isBranch = 1 in
+ def A4_ext_b : T_Immext<brtarget>;
+ let isCall = 1 in
+ def A4_ext_c : T_Immext<calltarget>;
+ def A4_ext_g : T_Immext<globaladdress>;
+}
-def IMMEXT_b : T_Immext<(ins brtarget:$imm)>;
-def IMMEXT_c : T_Immext<(ins calltarget:$imm)>;
-def IMMEXT_g : T_Immext<(ins globaladdress:$imm)>;
-def IMMEXT_i : T_Immext<(ins u26_6Imm:$imm)>;
+def BITPOS32 : SDNodeXForm<imm, [{
+ // Return the bit position we will set [0-31].
+ // As an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformMskToBitPosU5Imm(imm);
+}]>;
// Fold (add (CONST32 tglobaladdr:$addr) <offset>) into a global address.
def FoldGlobalAddr : ComplexPattern<i32, 1, "foldGlobalAddress", [], []>;
@@ -95,63 +112,152 @@ def NumUsesBelowThresCONST32 : PatFrag<(ops node:$addr),
//===----------------------------------------------------------------------===//
// ALU32 +
//===----------------------------------------------------------------------===//
-// Generate frame index addresses.
-let neverHasSideEffects = 1, isReMaterializable = 1,
-isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in
-def TFR_FI_immext_V4 : ALU32_ri<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s32Imm:$offset),
- "$dst = add($src1, ##$offset)",
- []>,
- Requires<[HasV4T]>;
-// Rd=cmp.eq(Rs,#s8)
-let validSubTargets = HasV4SubT, isExtendable = 1, opExtendable = 2,
-isExtentSigned = 1, opExtentBits = 8 in
-def V4_A4_rcmpeqi : ALU32_ri<(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, s8Ext:$s8),
- "$Rd = cmp.eq($Rs, #$s8)",
- [(set (i32 IntRegs:$Rd),
- (i32 (zext (i1 (seteq (i32 IntRegs:$Rs),
- s8ExtPred:$s8)))))]>,
- Requires<[HasV4T]>;
-
-// Preserve the TSTBIT generation
-def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))),
- (i32 IntRegs:$src1))), 0)))),
- (i32 (MUX_ii (i1 (TSTBIT_rr (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- 1, 0))>;
-
-// Interfered with tstbit generation, above pattern preserves, see : tstbit.ll
-// Rd=cmp.ne(Rs,#s8)
-let validSubTargets = HasV4SubT, isExtendable = 1, opExtendable = 2,
-isExtentSigned = 1, opExtentBits = 8 in
-def V4_A4_rcmpneqi : ALU32_ri<(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, s8Ext:$s8),
- "$Rd = !cmp.eq($Rs, #$s8)",
- [(set (i32 IntRegs:$Rd),
- (i32 (zext (i1 (setne (i32 IntRegs:$Rs),
- s8ExtPred:$s8)))))]>,
- Requires<[HasV4T]>;
-
-// Rd=cmp.eq(Rs,Rt)
-let validSubTargets = HasV4SubT in
-def V4_A4_rcmpeq : ALU32_ri<(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = cmp.eq($Rs, $Rt)",
- [(set (i32 IntRegs:$Rd),
- (i32 (zext (i1 (seteq (i32 IntRegs:$Rs),
- IntRegs:$Rt)))))]>,
- Requires<[HasV4T]>;
+class T_ALU32_3op_not<string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit OpsRev>
+ : T_ALU32_3op<mnemonic, MajOp, MinOp, OpsRev, 0> {
+ let AsmString = "$Rd = "#mnemonic#"($Rs, ~$Rt)";
+}
+
+let BaseOpcode = "andn_rr", CextOpcode = "andn", isCodeGenOnly = 0 in
+def A4_andn : T_ALU32_3op_not<"and", 0b001, 0b100, 1>;
+let BaseOpcode = "orn_rr", CextOpcode = "orn", isCodeGenOnly = 0 in
+def A4_orn : T_ALU32_3op_not<"or", 0b001, 0b101, 1>;
+
+let CextOpcode = "rcmp.eq", isCodeGenOnly = 0 in
+def A4_rcmpeq : T_ALU32_3op<"cmp.eq", 0b011, 0b010, 0, 1>;
+let CextOpcode = "!rcmp.eq", isCodeGenOnly = 0 in
+def A4_rcmpneq : T_ALU32_3op<"!cmp.eq", 0b011, 0b011, 0, 1>;
+
+let isCodeGenOnly = 0 in {
+def C4_cmpneq : T_ALU32_3op_cmp<"!cmp.eq", 0b00, 1, 1>;
+def C4_cmplte : T_ALU32_3op_cmp<"!cmp.gt", 0b10, 1, 0>;
+def C4_cmplteu : T_ALU32_3op_cmp<"!cmp.gtu", 0b11, 1, 0>;
+}
+
+// Pats for instruction selection.
+
+// A class to embed the usual comparison patfrags within a zext to i32.
+// The seteq/setne frags use "lhs" and "rhs" as operands, so use the same
+// names, or else the frag's "body" won't match the operands.
+class CmpInReg<PatFrag Op>
+ : PatFrag<(ops node:$lhs, node:$rhs),(i32 (zext (i1 Op.Fragment)))>;
+
+def: T_cmp32_rr_pat<A4_rcmpeq, CmpInReg<seteq>, i32>;
+def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>;
+
+class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm>
+ : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", S_3op_tc_2early_SLOT23>,
+ ImmRegRel {
+ let validSubTargets = HasV4SubT;
+ let InputType = "reg";
+ let CextOpcode = mnemonic;
+ let isCompare = 1;
+ let isCommutable = IsComm;
+ let hasSideEffects = 0;
+
+ bits<2> Pd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1100;
+ let Inst{27-21} = 0b0111110;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = MinOp;
+ let Inst{1-0} = Pd;
+}
+
+let isCodeGenOnly = 0 in {
+def A4_cmpbeq : T_CMP_rrbh<"cmpb.eq", 0b110, 1>;
+def A4_cmpbgt : T_CMP_rrbh<"cmpb.gt", 0b010, 0>;
+def A4_cmpbgtu : T_CMP_rrbh<"cmpb.gtu", 0b111, 0>;
+def A4_cmpheq : T_CMP_rrbh<"cmph.eq", 0b011, 1>;
+def A4_cmphgt : T_CMP_rrbh<"cmph.gt", 0b100, 0>;
+def A4_cmphgtu : T_CMP_rrbh<"cmph.gtu", 0b101, 0>;
+}
+
+class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm,
+ Operand ImmType, bit IsImmExt, bit IsImmSigned, int ImmBits>
+ : ALU64Inst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, ImmType:$Imm),
+ "$Pd = "#mnemonic#"($Rs, #$Imm)", [], "", ALU64_tc_2early_SLOT23>,
+ ImmRegRel {
+ let validSubTargets = HasV4SubT;
+ let InputType = "imm";
+ let CextOpcode = mnemonic;
+ let isCompare = 1;
+ let isCommutable = IsComm;
+ let hasSideEffects = 0;
+ let isExtendable = IsImmExt;
+ let opExtendable = !if (IsImmExt, 2, 0);
+ let isExtentSigned = IsImmSigned;
+ let opExtentBits = ImmBits;
+
+ bits<2> Pd;
+ bits<5> Rs;
+ bits<8> Imm;
+
+ let IClass = 0b1101;
+ let Inst{27-24} = 0b1101;
+ let Inst{22-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{12-5} = Imm;
+ let Inst{4} = 0b0;
+ let Inst{3} = IsHalf;
+ let Inst{1-0} = Pd;
+}
+
+let isCodeGenOnly = 0 in {
+def A4_cmpbeqi : T_CMP_ribh<"cmpb.eq", 0b00, 0, 1, u8Imm, 0, 0, 8>;
+def A4_cmpbgti : T_CMP_ribh<"cmpb.gt", 0b01, 0, 0, s8Imm, 0, 1, 8>;
+def A4_cmpbgtui : T_CMP_ribh<"cmpb.gtu", 0b10, 0, 0, u7Ext, 1, 0, 7>;
+def A4_cmpheqi : T_CMP_ribh<"cmph.eq", 0b00, 1, 1, s8Ext, 1, 1, 8>;
+def A4_cmphgti : T_CMP_ribh<"cmph.gt", 0b01, 1, 0, s8Ext, 1, 1, 8>;
+def A4_cmphgtui : T_CMP_ribh<"cmph.gtu", 0b10, 1, 0, u7Ext, 1, 0, 7>;
+}
+class T_RCMP_EQ_ri<string mnemonic, bit IsNeg>
+ : ALU32_ri<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s8Ext:$s8),
+ "$Rd = "#mnemonic#"($Rs, #$s8)", [], "", ALU32_2op_tc_1_SLOT0123>,
+ ImmRegRel {
+ let validSubTargets = HasV4SubT;
+ let InputType = "imm";
+ let CextOpcode = !if (IsNeg, "!rcmp.eq", "rcmp.eq");
+ let isExtendable = 1;
+ let opExtendable = 2;
+ let isExtentSigned = 1;
+ let opExtentBits = 8;
+ let hasNewValue = 1;
+
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<8> s8;
+
+ let IClass = 0b0111;
+ let Inst{27-24} = 0b0011;
+ let Inst{22} = 0b1;
+ let Inst{21} = IsNeg;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b1;
+ let Inst{12-5} = s8;
+ let Inst{4-0} = Rd;
+}
+
+let isCodeGenOnly = 0 in {
+def A4_rcmpeqi : T_RCMP_EQ_ri<"cmp.eq", 0>;
+def A4_rcmpneqi : T_RCMP_EQ_ri<"!cmp.eq", 1>;
+}
+
+def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s8ExtPred:$s8)))),
+ (A4_rcmpeqi IntRegs:$Rs, s8ExtPred:$s8)>;
+def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s8ExtPred:$s8)))),
+ (A4_rcmpneqi IntRegs:$Rs, s8ExtPred:$s8)>;
+
+// Preserve the S2_tstbit_r generation
+def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))),
+ (i32 IntRegs:$src1))), 0)))),
+ (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>;
-// Rd=cmp.ne(Rs,Rt)
-let validSubTargets = HasV4SubT in
-def V4_A4_rcmpneq : ALU32_ri<(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = !cmp.eq($Rs, $Rt)",
- [(set (i32 IntRegs:$Rd),
- (i32 (zext (i1 (setne (i32 IntRegs:$Rs),
- IntRegs:$Rt)))))]>,
- Requires<[HasV4T]>;
//===----------------------------------------------------------------------===//
// ALU32 -
@@ -162,24 +268,31 @@ def V4_A4_rcmpneq : ALU32_ri<(outs IntRegs:$Rd),
// ALU32/PERM +
//===----------------------------------------------------------------------===//
-// Combine
-// Rdd=combine(Rs, #s8)
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8,
- neverHasSideEffects = 1, validSubTargets = HasV4SubT in
-def COMBINE_rI_V4 : ALU32_ri<(outs DoubleRegs:$dst),
- (ins IntRegs:$src1, s8Ext:$src2),
- "$dst = combine($src1, #$src2)",
- []>,
- Requires<[HasV4T]>;
+// Combine a word and an immediate into a register pair.
+let hasSideEffects = 0, isExtentSigned = 1, isExtendable = 1,
+ opExtentBits = 8 in
+class T_Combine1 <bits<2> MajOp, dag ins, string AsmStr>
+ : ALU32Inst <(outs DoubleRegs:$Rdd), ins, AsmStr> {
+ bits<5> Rdd;
+ bits<5> Rs;
+ bits<8> s8;
+
+ let IClass = 0b0111;
+ let Inst{27-24} = 0b0011;
+ let Inst{22-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b1;
+ let Inst{12-5} = s8;
+ let Inst{4-0} = Rdd;
+ }
-// Rdd=combine(#s8, Rs)
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8,
- neverHasSideEffects = 1, validSubTargets = HasV4SubT in
-def COMBINE_Ir_V4 : ALU32_ir<(outs DoubleRegs:$dst),
- (ins s8Ext:$src1, IntRegs:$src2),
- "$dst = combine(#$src1, $src2)",
- []>,
- Requires<[HasV4T]>;
+let opExtendable = 2, isCodeGenOnly = 0 in
+def A4_combineri : T_Combine1<0b00, (ins IntRegs:$Rs, s8Ext:$s8),
+ "$Rdd = combine($Rs, #$s8)">;
+
+let opExtendable = 1, isCodeGenOnly = 0 in
+def A4_combineir : T_Combine1<0b01, (ins s8Ext:$s8, IntRegs:$Rs),
+ "$Rdd = combine(#$s8, $Rs)">;
def HexagonWrapperCombineRI_V4 :
SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>;
@@ -187,23 +300,31 @@ def HexagonWrapperCombineIR_V4 :
SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>;
def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i),
- (COMBINE_rI_V4 IntRegs:$r, s8ExtPred:$i)>,
+ (A4_combineri IntRegs:$r, s8ExtPred:$i)>,
Requires<[HasV4T]>;
def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r),
- (COMBINE_Ir_V4 s8ExtPred:$i, IntRegs:$r)>,
+ (A4_combineir s8ExtPred:$i, IntRegs:$r)>,
Requires<[HasV4T]>;
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 6,
- neverHasSideEffects = 1, validSubTargets = HasV4SubT in
-def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst),
- (ins s8Imm:$src1, u6Ext:$src2),
- "$dst = combine(#$src1, #$src2)",
- []>,
- Requires<[HasV4T]>;
+// A4_combineii: Set two small immediates.
+let hasSideEffects = 0, isExtendable = 1, opExtentBits = 6, opExtendable = 2 in
+def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8Imm:$s8, u6Ext:$U6),
+ "$Rdd = combine(#$s8, #$U6)"> {
+ bits<5> Rdd;
+ bits<8> s8;
+ bits<6> U6;
+
+ let IClass = 0b0111;
+ let Inst{27-23} = 0b11001;
+ let Inst{20-16} = U6{5-1};
+ let Inst{13} = U6{0};
+ let Inst{12-5} = s8;
+ let Inst{4-0} = Rdd;
+ }
//===----------------------------------------------------------------------===//
-// ALU32/PERM +
+// ALU32/PERM -
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -212,7 +333,7 @@ def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst),
//===----------------------------------------------------------------------===//
// Template class for load instructions with Absolute set addressing mode.
//===----------------------------------------------------------------------===//
-let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1,
+let isExtended = 1, opExtendable = 2, hasSideEffects = 0,
validSubTargets = HasV4SubT, addrMode = AbsoluteSet in
class T_LD_abs_set<string mnemonic, RegisterClass RC>:
LDInst2<(outs RC:$dst1, IntRegs:$dst2),
@@ -228,112 +349,152 @@ def LDrih_abs_set_V4 : T_LD_abs_set <"memh", IntRegs>;
def LDriw_abs_set_V4 : T_LD_abs_set <"memw", IntRegs>;
def LDriuh_abs_set_V4 : T_LD_abs_set <"memuh", IntRegs>;
+//===----------------------------------------------------------------------===//
+// Template classes for the non-predicated load instructions with
+// base + register offset addressing mode
+//===----------------------------------------------------------------------===//
+class T_load_rr <string mnemonic, RegisterClass RC, bits<3> MajOp>:
+ LDInst<(outs RC:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$u2),
+ "$dst = "#mnemonic#"($src1 + $src2<<#$u2)",
+ [], "", V4LDST_tc_ld_SLOT01>, ImmRegShl, AddrModeRel {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+ bits<2> u2;
-// multiclass for load instructions with base + register offset
-// addressing mode
-multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot,
- bit isPredNew> {
- let isPredicatedNew = isPredNew in
- def NAME : LDInst2<(outs RC:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#"$dst = "#mnemonic#"($src2+$src3<<#$offset)",
- []>, Requires<[HasV4T]>;
-}
+ let IClass = 0b0011;
-multiclass ld_idxd_shl_pred<string mnemonic, RegisterClass RC, bit PredNot> {
- let isPredicatedFalse = PredNot in {
- defm _c#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 0>;
- // Predicate new
- defm _cdn#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 1>;
+ let Inst{27-24} = 0b1010;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{12-8} = src2;
+ let Inst{13} = u2{1};
+ let Inst{7} = u2{0};
+ let Inst{4-0} = dst;
}
-}
-let neverHasSideEffects = 1 in
-multiclass ld_idxd_shl<string mnemonic, string CextOp, RegisterClass RC> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
+//===----------------------------------------------------------------------===//
+// Template classes for the predicated load instructions with
+// base + register offset addressing mode
+//===----------------------------------------------------------------------===//
+let isPredicated = 1 in
+class T_pload_rr <string mnemonic, RegisterClass RC, bits<3> MajOp,
+ bit isNot, bit isPredNew>:
+ LDInst <(outs RC:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$u2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#"$dst = "#mnemonic#"($src2+$src3<<#$u2)",
+ [], "", V4LDST_tc_ld_SLOT01>, AddrModeRel {
+ bits<5> dst;
+ bits<2> src1;
+ bits<5> src2;
+ bits<5> src3;
+ bits<2> u2;
+
+ let isPredicatedFalse = isNot;
+ let isPredicatedNew = isPredNew;
+
+ let IClass = 0b0011;
+
+ let Inst{27-26} = 0b00;
+ let Inst{25} = isPredNew;
+ let Inst{24} = isNot;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = src2;
+ let Inst{12-8} = src3;
+ let Inst{13} = u2{1};
+ let Inst{7} = u2{0};
+ let Inst{6-5} = src1;
+ let Inst{4-0} = dst;
+ }
+
+//===----------------------------------------------------------------------===//
+// multiclass for load instructions with base + register offset
+// addressing mode
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, addrMode = BaseRegOffset in
+multiclass ld_idxd_shl <string mnemonic, string CextOp, RegisterClass RC,
+ bits<3> MajOp > {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl,
+ InputType = "reg" in {
let isPredicable = 1 in
- def NAME#_V4 : LDInst2<(outs RC:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
- "$dst = "#mnemonic#"($src1+$src2<<#$offset)",
- []>, Requires<[HasV4T]>;
-
- let isPredicated = 1 in {
- defm Pt_V4 : ld_idxd_shl_pred<mnemonic, RC, 0 >;
- defm NotPt_V4 : ld_idxd_shl_pred<mnemonic, RC, 1>;
- }
+ def L4_#NAME#_rr : T_load_rr <mnemonic, RC, MajOp>;
+
+ // Predicated
+ def L4_p#NAME#t_rr : T_pload_rr <mnemonic, RC, MajOp, 0, 0>;
+ def L4_p#NAME#f_rr : T_pload_rr <mnemonic, RC, MajOp, 1, 0>;
+
+ // Predicated new
+ def L4_p#NAME#tnew_rr : T_pload_rr <mnemonic, RC, MajOp, 0, 1>;
+ def L4_p#NAME#fnew_rr : T_pload_rr <mnemonic, RC, MajOp, 1, 1>;
}
}
-let addrMode = BaseRegOffset in {
- let accessSize = ByteAccess in {
- defm LDrib_indexed_shl: ld_idxd_shl<"memb", "LDrib", IntRegs>,
- AddrModeRel;
- defm LDriub_indexed_shl: ld_idxd_shl<"memub", "LDriub", IntRegs>,
- AddrModeRel;
- }
- let accessSize = HalfWordAccess in {
- defm LDrih_indexed_shl: ld_idxd_shl<"memh", "LDrih", IntRegs>, AddrModeRel;
- defm LDriuh_indexed_shl: ld_idxd_shl<"memuh", "LDriuh", IntRegs>,
- AddrModeRel;
- }
- let accessSize = WordAccess in
- defm LDriw_indexed_shl: ld_idxd_shl<"memw", "LDriw", IntRegs>, AddrModeRel;
+let hasNewValue = 1, accessSize = ByteAccess, isCodeGenOnly = 0 in {
+ defm loadrb : ld_idxd_shl<"memb", "LDrib", IntRegs, 0b000>;
+ defm loadrub : ld_idxd_shl<"memub", "LDriub", IntRegs, 0b001>;
+}
- let accessSize = DoubleWordAccess in
- defm LDrid_indexed_shl: ld_idxd_shl<"memd", "LDrid", DoubleRegs>,
- AddrModeRel;
+let hasNewValue = 1, accessSize = HalfWordAccess, isCodeGenOnly = 0 in {
+ defm loadrh : ld_idxd_shl<"memh", "LDrih", IntRegs, 0b010>;
+ defm loadruh : ld_idxd_shl<"memuh", "LDriuh", IntRegs, 0b011>;
}
+let hasNewValue = 1, accessSize = WordAccess, isCodeGenOnly = 0 in
+defm loadri : ld_idxd_shl<"memw", "LDriw", IntRegs, 0b100>;
+
+let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+defm loadrd : ld_idxd_shl<"memd", "LDrid", DoubleRegs, 0b110>;
+
// 'def pats' for load instructions with base + register offset and non-zero
// immediate value. Immediate value is used to left-shift the second
// register operand.
let AddedComplexity = 40 in {
def : Pat <(i32 (sextloadi8 (add IntRegs:$src1,
(shl IntRegs:$src2, u2ImmPred:$offset)))),
- (LDrib_indexed_shl_V4 IntRegs:$src1,
+ (L4_loadrb_rr IntRegs:$src1,
IntRegs:$src2, u2ImmPred:$offset)>,
Requires<[HasV4T]>;
def : Pat <(i32 (zextloadi8 (add IntRegs:$src1,
(shl IntRegs:$src2, u2ImmPred:$offset)))),
- (LDriub_indexed_shl_V4 IntRegs:$src1,
+ (L4_loadrub_rr IntRegs:$src1,
IntRegs:$src2, u2ImmPred:$offset)>,
Requires<[HasV4T]>;
def : Pat <(i32 (extloadi8 (add IntRegs:$src1,
(shl IntRegs:$src2, u2ImmPred:$offset)))),
- (LDriub_indexed_shl_V4 IntRegs:$src1,
+ (L4_loadrub_rr IntRegs:$src1,
IntRegs:$src2, u2ImmPred:$offset)>,
Requires<[HasV4T]>;
def : Pat <(i32 (sextloadi16 (add IntRegs:$src1,
(shl IntRegs:$src2, u2ImmPred:$offset)))),
- (LDrih_indexed_shl_V4 IntRegs:$src1,
+ (L4_loadrh_rr IntRegs:$src1,
IntRegs:$src2, u2ImmPred:$offset)>,
Requires<[HasV4T]>;
def : Pat <(i32 (zextloadi16 (add IntRegs:$src1,
(shl IntRegs:$src2, u2ImmPred:$offset)))),
- (LDriuh_indexed_shl_V4 IntRegs:$src1,
+ (L4_loadruh_rr IntRegs:$src1,
IntRegs:$src2, u2ImmPred:$offset)>,
Requires<[HasV4T]>;
def : Pat <(i32 (extloadi16 (add IntRegs:$src1,
(shl IntRegs:$src2, u2ImmPred:$offset)))),
- (LDriuh_indexed_shl_V4 IntRegs:$src1,
+ (L4_loadruh_rr IntRegs:$src1,
IntRegs:$src2, u2ImmPred:$offset)>,
Requires<[HasV4T]>;
def : Pat <(i32 (load (add IntRegs:$src1,
(shl IntRegs:$src2, u2ImmPred:$offset)))),
- (LDriw_indexed_shl_V4 IntRegs:$src1,
+ (L4_loadri_rr IntRegs:$src1,
IntRegs:$src2, u2ImmPred:$offset)>,
Requires<[HasV4T]>;
def : Pat <(i64 (load (add IntRegs:$src1,
(shl IntRegs:$src2, u2ImmPred:$offset)))),
- (LDrid_indexed_shl_V4 IntRegs:$src1,
+ (L4_loadrd_rr IntRegs:$src1,
IntRegs:$src2, u2ImmPred:$offset)>,
Requires<[HasV4T]>;
}
@@ -343,114 +504,114 @@ def : Pat <(i64 (load (add IntRegs:$src1,
// zero immediate value.
let AddedComplexity = 10 in {
def : Pat <(i64 (load (add IntRegs:$src1, IntRegs:$src2))),
- (LDrid_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
+ (L4_loadrd_rr IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, IntRegs:$src2))),
- (LDrib_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
+ (L4_loadrb_rr IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, IntRegs:$src2))),
- (LDriub_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
+ (L4_loadrub_rr IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
def : Pat <(i32 (extloadi8 (add IntRegs:$src1, IntRegs:$src2))),
- (LDriub_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
+ (L4_loadrub_rr IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
def : Pat <(i32 (sextloadi16 (add IntRegs:$src1, IntRegs:$src2))),
- (LDrih_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
+ (L4_loadrh_rr IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
def : Pat <(i32 (zextloadi16 (add IntRegs:$src1, IntRegs:$src2))),
- (LDriuh_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
+ (L4_loadruh_rr IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
def : Pat <(i32 (extloadi16 (add IntRegs:$src1, IntRegs:$src2))),
- (LDriuh_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
+ (L4_loadruh_rr IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
def : Pat <(i32 (load (add IntRegs:$src1, IntRegs:$src2))),
- (LDriw_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
+ (L4_loadri_rr IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
}
// zext i1->i64
def : Pat <(i64 (zext (i1 PredRegs:$src1))),
- (i64 (COMBINE_Ir_V4 0, (MUX_ii (i1 PredRegs:$src1), 1, 0)))>,
+ (i64 (A4_combineir 0, (C2_muxii (i1 PredRegs:$src1), 1, 0)))>,
Requires<[HasV4T]>;
// zext i32->i64
def : Pat <(i64 (zext (i32 IntRegs:$src1))),
- (i64 (COMBINE_Ir_V4 0, (i32 IntRegs:$src1)))>,
+ (i64 (A4_combineir 0, (i32 IntRegs:$src1)))>,
Requires<[HasV4T]>;
// zext i8->i64
def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
- (i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>,
+ (i64 (A4_combineir 0, (L2_loadrub_io AddrFI:$src1, 0)))>,
Requires<[HasV4T]>;
let AddedComplexity = 20 in
def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
s11_0ExtPred:$offset))),
- (i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1,
+ (i64 (A4_combineir 0, (L2_loadrub_io IntRegs:$src1,
s11_0ExtPred:$offset)))>,
Requires<[HasV4T]>;
// zext i1->i64
def: Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)),
- (i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>,
+ (i64 (A4_combineir 0, (L2_loadrub_io AddrFI:$src1, 0)))>,
Requires<[HasV4T]>;
let AddedComplexity = 20 in
def: Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1),
s11_0ExtPred:$offset))),
- (i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1,
+ (i64 (A4_combineir 0, (L2_loadrub_io IntRegs:$src1,
s11_0ExtPred:$offset)))>,
Requires<[HasV4T]>;
// zext i16->i64
def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
- (i64 (COMBINE_Ir_V4 0, (LDriuh ADDRriS11_1:$src1)))>,
+ (i64 (A4_combineir 0, (L2_loadruh_io AddrFI:$src1, 0)))>,
Requires<[HasV4T]>;
let AddedComplexity = 20 in
def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
s11_1ExtPred:$offset))),
- (i64 (COMBINE_Ir_V4 0, (LDriuh_indexed IntRegs:$src1,
+ (i64 (A4_combineir 0, (L2_loadruh_io IntRegs:$src1,
s11_1ExtPred:$offset)))>,
Requires<[HasV4T]>;
// anyext i16->i64
def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
- (i64 (COMBINE_Ir_V4 0, (LDrih ADDRriS11_2:$src1)))>,
+ (i64 (A4_combineir 0, (L2_loadrh_io AddrFI:$src1, 0)))>,
Requires<[HasV4T]>;
let AddedComplexity = 20 in
def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
s11_1ExtPred:$offset))),
- (i64 (COMBINE_Ir_V4 0, (LDrih_indexed IntRegs:$src1,
+ (i64 (A4_combineir 0, (L2_loadrh_io IntRegs:$src1,
s11_1ExtPred:$offset)))>,
Requires<[HasV4T]>;
// zext i32->i64
def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
- (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>,
+ (i64 (A4_combineir 0, (L2_loadri_io AddrFI:$src1, 0)))>,
Requires<[HasV4T]>;
let AddedComplexity = 100 in
def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
- (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1,
+ (i64 (A4_combineir 0, (L2_loadri_io IntRegs:$src1,
s11_2ExtPred:$offset)))>,
Requires<[HasV4T]>;
// anyext i32->i64
def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
- (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>,
+ (i64 (A4_combineir 0, (L2_loadri_io AddrFI:$src1, 0)))>,
Requires<[HasV4T]>;
let AddedComplexity = 100 in
def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
- (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1,
+ (i64 (A4_combineir 0, (L2_loadri_io IntRegs:$src1,
s11_2ExtPred:$offset)))>,
Requires<[HasV4T]>;
@@ -482,124 +643,209 @@ def STrih_abs_set_V4 : T_ST_abs_set <"memh", IntRegs>;
def STriw_abs_set_V4 : T_ST_abs_set <"memw", IntRegs>;
//===----------------------------------------------------------------------===//
-// multiclass for store instructions with base + register offset addressing
-// mode
+// Template classes for the non-predicated store instructions with
+// base + register offset addressing mode
//===----------------------------------------------------------------------===//
-multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot,
- bit isPredNew> {
- let isPredicatedNew = isPredNew in
- def NAME : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- RC:$src5),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($src2+$src3<<#$src4) = $src5",
- []>,
- Requires<[HasV4T]>;
-}
+let isPredicable = 1 in
+class T_store_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, bit isH>
+ : STInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, RC:$Rt),
+ mnemonic#"($Rs + $Ru<<#$u2) = $Rt"#!if(isH, ".h",""),
+ [],"",V4LDST_tc_st_SLOT01>, ImmRegShl, AddrModeRel {
-multiclass ST_Idxd_shl_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
- let isPredicatedFalse = PredNot in {
- defm _c#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 0>;
- // Predicate new
- defm _cdn#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 1>;
+ bits<5> Rs;
+ bits<5> Ru;
+ bits<2> u2;
+ bits<5> Rt;
+
+ let IClass = 0b0011;
+
+ let Inst{27-24} = 0b1011;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Ru;
+ let Inst{13} = u2{1};
+ let Inst{7} = u2{0};
+ let Inst{4-0} = Rt;
+ }
+
+//===----------------------------------------------------------------------===//
+// Template classes for the predicated store instructions with
+// base + register offset addressing mode
+//===----------------------------------------------------------------------===//
+let isPredicated = 1 in
+class T_pstore_rr <string mnemonic, RegisterClass RC, bits<3> MajOp,
+ bit isNot, bit isPredNew, bit isH>
+ : STInst <(outs),
+ (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, RC:$Rt),
+
+ !if(isNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($Rs+$Ru<<#$u2) = $Rt"#!if(isH, ".h",""),
+ [], "", V4LDST_tc_st_SLOT01> , AddrModeRel{
+ bits<2> Pv;
+ bits<5> Rs;
+ bits<5> Ru;
+ bits<2> u2;
+ bits<5> Rt;
+
+ let isPredicatedFalse = isNot;
+ let isPredicatedNew = isPredNew;
+
+ let IClass = 0b0011;
+
+ let Inst{27-26} = 0b01;
+ let Inst{25} = isPredNew;
+ let Inst{24} = isNot;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Ru;
+ let Inst{13} = u2{1};
+ let Inst{7} = u2{0};
+ let Inst{6-5} = Pv;
+ let Inst{4-0} = Rt;
+ }
+
+//===----------------------------------------------------------------------===//
+// Template classes for the new-value store instructions with
+// base + register offset addressing mode
+//===----------------------------------------------------------------------===//
+let isPredicable = 1, isNewValue = 1, opNewValue = 3 in
+class T_store_new_rr <string mnemonic, bits<2> MajOp> :
+ NVInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, IntRegs:$Nt),
+ mnemonic#"($Rs + $Ru<<#$u2) = $Nt.new",
+ [],"",V4LDST_tc_st_SLOT0>, ImmRegShl, AddrModeRel {
+
+ bits<5> Rs;
+ bits<5> Ru;
+ bits<2> u2;
+ bits<3> Nt;
+
+ let IClass = 0b0011;
+
+ let Inst{27-21} = 0b1011101;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Ru;
+ let Inst{13} = u2{1};
+ let Inst{7} = u2{0};
+ let Inst{4-3} = MajOp;
+ let Inst{2-0} = Nt;
}
-}
+//===----------------------------------------------------------------------===//
+// Template classes for the predicated new-value store instructions with
+// base + register offset addressing mode
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, isNewValue = 1, opNewValue = 4 in
+class T_pstore_new_rr <string mnemonic, bits<2> MajOp, bit isNot, bit isPredNew>
+ : NVInst<(outs),
+ (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, IntRegs:$Nt),
+ !if(isNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($Rs+$Ru<<#$u2) = $Nt.new",
+ [], "", V4LDST_tc_st_SLOT0>, AddrModeRel {
+ bits<2> Pv;
+ bits<5> Rs;
+ bits<5> Ru;
+ bits<2> u2;
+ bits<3> Nt;
+
+ let isPredicatedFalse = isNot;
+ let isPredicatedNew = isPredNew;
+
+ let IClass = 0b0011;
+ let Inst{27-26} = 0b01;
+ let Inst{25} = isPredNew;
+ let Inst{24} = isNot;
+ let Inst{23-21} = 0b101;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Ru;
+ let Inst{13} = u2{1};
+ let Inst{7} = u2{0};
+ let Inst{6-5} = Pv;
+ let Inst{4-3} = MajOp;
+ let Inst{2-0} = Nt;
+ }
+
+//===----------------------------------------------------------------------===//
+// multiclass for store instructions with base + register offset addressing
+// mode
+//===----------------------------------------------------------------------===//
let isNVStorable = 1 in
-multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC> {
+multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC,
+ bits<3> MajOp, bit isH = 0> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
- let isPredicable = 1 in
- def NAME#_V4 : STInst2<(outs),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, RC:$src4),
- mnemonic#"($src1+$src2<<#$src3) = $src4",
- []>,
- Requires<[HasV4T]>;
+ def S4_#NAME#_rr : T_store_rr <mnemonic, RC, MajOp, isH>;
- let isPredicated = 1 in {
- defm Pt_V4 : ST_Idxd_shl_Pred<mnemonic, RC, 0 >;
- defm NotPt_V4 : ST_Idxd_shl_Pred<mnemonic, RC, 1>;
- }
+ // Predicated
+ def S4_p#NAME#t_rr : T_pstore_rr <mnemonic, RC, MajOp, 0, 0, isH>;
+ def S4_p#NAME#f_rr : T_pstore_rr <mnemonic, RC, MajOp, 1, 0, isH>;
+
+ // Predicated new
+ def S4_p#NAME#tnew_rr : T_pstore_rr <mnemonic, RC, MajOp, 0, 1, isH>;
+ def S4_p#NAME#fnew_rr : T_pstore_rr <mnemonic, RC, MajOp, 1, 1, isH>;
}
}
+//===----------------------------------------------------------------------===//
// multiclass for new-value store instructions with base + register offset
// addressing mode.
-multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
- bit isPredNew> {
- let isPredicatedNew = isPredNew in
- def NAME#_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- RC:$src5),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($src2+$src3<<#$src4) = $src5.new",
- []>,
- Requires<[HasV4T]>;
-}
-
-multiclass ST_Idxd_shl_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
- let isPredicatedFalse = PredNot in {
- defm _c#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 0>;
- // Predicate new
- defm _cdn#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 1>;
- }
-}
-
+//===----------------------------------------------------------------------===//
let mayStore = 1, isNVStore = 1 in
-multiclass ST_Idxd_shl_nv<string mnemonic, string CextOp, RegisterClass RC> {
+multiclass ST_Idxd_shl_nv <string mnemonic, string CextOp, RegisterClass RC,
+ bits<2> MajOp> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
- let isPredicable = 1 in
- def NAME#_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, RC:$src4),
- mnemonic#"($src1+$src2<<#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
+ def S4_#NAME#new_rr : T_store_new_rr<mnemonic, MajOp>;
- let isPredicated = 1 in {
- defm Pt : ST_Idxd_shl_Pred_nv<mnemonic, RC, 0 >;
- defm NotPt : ST_Idxd_shl_Pred_nv<mnemonic, RC, 1>;
- }
+ // Predicated
+ def S4_p#NAME#newt_rr : T_pstore_new_rr <mnemonic, MajOp, 0, 0>;
+ def S4_p#NAME#newf_rr : T_pstore_new_rr <mnemonic, MajOp, 1, 0>;
+
+ // Predicated new
+ def S4_p#NAME#newtnew_rr : T_pstore_new_rr <mnemonic, MajOp, 0, 1>;
+ def S4_p#NAME#newfnew_rr : T_pstore_new_rr <mnemonic, MajOp, 1, 1>;
}
}
-let addrMode = BaseRegOffset, neverHasSideEffects = 1,
-validSubTargets = HasV4SubT in {
+let addrMode = BaseRegOffset, InputType = "reg", hasSideEffects = 0,
+ isCodeGenOnly = 0 in {
let accessSize = ByteAccess in
- defm STrib_indexed_shl: ST_Idxd_shl<"memb", "STrib", IntRegs>,
- ST_Idxd_shl_nv<"memb", "STrib", IntRegs>, AddrModeRel;
+ defm storerb: ST_Idxd_shl<"memb", "STrib", IntRegs, 0b000>,
+ ST_Idxd_shl_nv<"memb", "STrib", IntRegs, 0b00>;
let accessSize = HalfWordAccess in
- defm STrih_indexed_shl: ST_Idxd_shl<"memh", "STrih", IntRegs>,
- ST_Idxd_shl_nv<"memh", "STrih", IntRegs>, AddrModeRel;
+ defm storerh: ST_Idxd_shl<"memh", "STrih", IntRegs, 0b010>,
+ ST_Idxd_shl_nv<"memh", "STrih", IntRegs, 0b01>;
let accessSize = WordAccess in
- defm STriw_indexed_shl: ST_Idxd_shl<"memw", "STriw", IntRegs>,
- ST_Idxd_shl_nv<"memw", "STriw", IntRegs>, AddrModeRel;
+ defm storeri: ST_Idxd_shl<"memw", "STriw", IntRegs, 0b100>,
+ ST_Idxd_shl_nv<"memw", "STriw", IntRegs, 0b10>;
let isNVStorable = 0, accessSize = DoubleWordAccess in
- defm STrid_indexed_shl: ST_Idxd_shl<"memd", "STrid", DoubleRegs>, AddrModeRel;
+ defm storerd: ST_Idxd_shl<"memd", "STrid", DoubleRegs, 0b110>;
+
+ let isNVStorable = 0, accessSize = HalfWordAccess in
+ defm storerf: ST_Idxd_shl<"memh", "STrif", IntRegs, 0b011, 1>;
}
let Predicates = [HasV4T], AddedComplexity = 10 in {
def : Pat<(truncstorei8 (i32 IntRegs:$src4),
(add IntRegs:$src1, (shl IntRegs:$src2,
u2ImmPred:$src3))),
- (STrib_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+ (S4_storerb_rr IntRegs:$src1, IntRegs:$src2,
u2ImmPred:$src3, IntRegs:$src4)>;
def : Pat<(truncstorei16 (i32 IntRegs:$src4),
(add IntRegs:$src1, (shl IntRegs:$src2,
u2ImmPred:$src3))),
- (STrih_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+ (S4_storerh_rr IntRegs:$src1, IntRegs:$src2,
u2ImmPred:$src3, IntRegs:$src4)>;
def : Pat<(store (i32 IntRegs:$src4),
(add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))),
- (STriw_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+ (S4_storeri_rr IntRegs:$src1, IntRegs:$src2,
u2ImmPred:$src3, IntRegs:$src4)>;
def : Pat<(store (i64 DoubleRegs:$src4),
(add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))),
- (STrid_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+ (S4_storerd_rr IntRegs:$src1, IntRegs:$src2,
u2ImmPred:$src3, DoubleRegs:$src4)>;
}
@@ -668,74 +914,123 @@ defm : T_ST_LOff_Pats<STrih_shl_V4, IntRegs, i32, truncstorei16>;
// TODO: needs to be implemented.
//===----------------------------------------------------------------------===//
+// Template class
+//===----------------------------------------------------------------------===//
+let isPredicable = 1, isExtendable = 1, isExtentSigned = 1, opExtentBits = 8,
+ opExtendable = 2 in
+class T_StoreImm <string mnemonic, Operand OffsetOp, bits<2> MajOp >
+ : STInst <(outs ), (ins IntRegs:$Rs, OffsetOp:$offset, s8Ext:$S8),
+ mnemonic#"($Rs+#$offset)=#$S8",
+ [], "", V4LDST_tc_st_SLOT01>,
+ ImmRegRel, PredNewRel {
+ bits<5> Rs;
+ bits<8> S8;
+ bits<8> offset;
+ bits<6> offsetBits;
+
+ string OffsetOpStr = !cast<string>(OffsetOp);
+ let offsetBits = !if (!eq(OffsetOpStr, "u6_2Imm"), offset{7-2},
+ !if (!eq(OffsetOpStr, "u6_1Imm"), offset{6-1},
+ /* u6_0Imm */ offset{5-0}));
+
+ let IClass = 0b0011;
+
+ let Inst{27-25} = 0b110;
+ let Inst{22-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{12-7} = offsetBits;
+ let Inst{13} = S8{7};
+ let Inst{6-0} = S8{6-0};
+ }
+
+let isPredicated = 1, isExtendable = 1, isExtentSigned = 1, opExtentBits = 6,
+ opExtendable = 3 in
+class T_StoreImm_pred <string mnemonic, Operand OffsetOp, bits<2> MajOp,
+ bit isPredNot, bit isPredNew >
+ : STInst <(outs ),
+ (ins PredRegs:$Pv, IntRegs:$Rs, OffsetOp:$offset, s6Ext:$S6),
+ !if(isPredNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($Rs+#$offset)=#$S6",
+ [], "", V4LDST_tc_st_SLOT01>,
+ ImmRegRel, PredNewRel {
+ bits<2> Pv;
+ bits<5> Rs;
+ bits<6> S6;
+ bits<8> offset;
+ bits<6> offsetBits;
+
+ string OffsetOpStr = !cast<string>(OffsetOp);
+ let offsetBits = !if (!eq(OffsetOpStr, "u6_2Imm"), offset{7-2},
+ !if (!eq(OffsetOpStr, "u6_1Imm"), offset{6-1},
+ /* u6_0Imm */ offset{5-0}));
+ let isPredicatedNew = isPredNew;
+ let isPredicatedFalse = isPredNot;
+
+ let IClass = 0b0011;
+
+ let Inst{27-25} = 0b100;
+ let Inst{24} = isPredNew;
+ let Inst{23} = isPredNot;
+ let Inst{22-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{13} = S6{5};
+ let Inst{12-7} = offsetBits;
+ let Inst{6-5} = Pv;
+ let Inst{4-0} = S6{4-0};
+ }
+
+
+//===----------------------------------------------------------------------===//
// multiclass for store instructions with base + immediate offset
// addressing mode and immediate stored value.
// mem[bhw](Rx++#s4:3)=#s8
// if ([!]Pv[.new]) mem[bhw](Rx++#s4:3)=#s6
//===----------------------------------------------------------------------===//
-multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot,
- bit isPredNew> {
- let isPredicatedNew = isPredNew in
- def NAME : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, OffsetOp:$src3, s6Ext:$src4),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($src2+#$src3) = #$src4",
- []>,
- Requires<[HasV4T]>;
-}
-multiclass ST_Imm_Pred<string mnemonic, Operand OffsetOp, bit PredNot> {
- let isPredicatedFalse = PredNot in {
- defm _c#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 0>;
- // Predicate new
- defm _cdn#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 1>;
- }
+multiclass ST_Imm_Pred <string mnemonic, Operand OffsetOp, bits<2> MajOp,
+ bit PredNot> {
+ def _io : T_StoreImm_pred <mnemonic, OffsetOp, MajOp, PredNot, 0>;
+ // Predicate new
+ def new_io : T_StoreImm_pred <mnemonic, OffsetOp, MajOp, PredNot, 1>;
}
-let isExtendable = 1, isExtentSigned = 1, neverHasSideEffects = 1 in
-multiclass ST_Imm<string mnemonic, string CextOp, Operand OffsetOp> {
+multiclass ST_Imm <string mnemonic, string CextOp, Operand OffsetOp,
+ bits<2> MajOp> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_imm in {
- let opExtendable = 2, opExtentBits = 8, isPredicable = 1 in
- def NAME#_V4 : STInst2<(outs),
- (ins IntRegs:$src1, OffsetOp:$src2, s8Ext:$src3),
- mnemonic#"($src1+#$src2) = #$src3",
- []>,
- Requires<[HasV4T]>;
+ def _io : T_StoreImm <mnemonic, OffsetOp, MajOp>;
- let opExtendable = 3, opExtentBits = 6, isPredicated = 1 in {
- defm Pt_V4 : ST_Imm_Pred<mnemonic, OffsetOp, 0>;
- defm NotPt_V4 : ST_Imm_Pred<mnemonic, OffsetOp, 1 >;
- }
+ defm t : ST_Imm_Pred <mnemonic, OffsetOp, MajOp, 0>;
+ defm f : ST_Imm_Pred <mnemonic, OffsetOp, MajOp, 1>;
}
}
-let addrMode = BaseImmOffset, InputType = "imm",
-validSubTargets = HasV4SubT in {
+let hasSideEffects = 0, validSubTargets = HasV4SubT, addrMode = BaseImmOffset,
+ InputType = "imm", isCodeGenOnly = 0 in {
let accessSize = ByteAccess in
- defm STrib_imm : ST_Imm<"memb", "STrib", u6_0Imm>, ImmRegRel, PredNewRel;
+ defm S4_storeirb : ST_Imm<"memb", "STrib", u6_0Imm, 0b00>;
let accessSize = HalfWordAccess in
- defm STrih_imm : ST_Imm<"memh", "STrih", u6_1Imm>, ImmRegRel, PredNewRel;
+ defm S4_storeirh : ST_Imm<"memh", "STrih", u6_1Imm, 0b01>;
let accessSize = WordAccess in
- defm STriw_imm : ST_Imm<"memw", "STriw", u6_2Imm>, ImmRegRel, PredNewRel;
+ defm S4_storeiri : ST_Imm<"memw", "STriw", u6_2Imm, 0b10>;
}
let Predicates = [HasV4T], AddedComplexity = 10 in {
def: Pat<(truncstorei8 s8ExtPred:$src3, (add IntRegs:$src1, u6_0ImmPred:$src2)),
- (STrib_imm_V4 IntRegs:$src1, u6_0ImmPred:$src2, s8ExtPred:$src3)>;
+ (S4_storeirb_io IntRegs:$src1, u6_0ImmPred:$src2, s8ExtPred:$src3)>;
def: Pat<(truncstorei16 s8ExtPred:$src3, (add IntRegs:$src1,
u6_1ImmPred:$src2)),
- (STrih_imm_V4 IntRegs:$src1, u6_1ImmPred:$src2, s8ExtPred:$src3)>;
+ (S4_storeirh_io IntRegs:$src1, u6_1ImmPred:$src2, s8ExtPred:$src3)>;
def: Pat<(store s8ExtPred:$src3, (add IntRegs:$src1, u6_2ImmPred:$src2)),
- (STriw_imm_V4 IntRegs:$src1, u6_2ImmPred:$src2, s8ExtPred:$src3)>;
+ (S4_storeiri_io IntRegs:$src1, u6_2ImmPred:$src2, s8ExtPred:$src3)>;
}
let AddedComplexity = 6 in
def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)),
- (STrib_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
+ (S4_storeirb_io IntRegs:$src1, 0, s8ExtPred:$src2)>,
Requires<[HasV4T]>;
// memb(Rx++#s4:0:circ(Mu))=Rt
@@ -751,7 +1046,7 @@ def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)),
// memh(Rs+#s11:1)=Rt.H
let AddedComplexity = 6 in
def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)),
- (STrih_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
+ (S4_storeirh_io IntRegs:$src1, 0, s8ExtPred:$src2)>,
Requires<[HasV4T]>;
// memh(Rs+Ru<<#u2)=Rt.H
@@ -782,7 +1077,7 @@ def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)),
// TODO: Needs to be implemented.
// Store predicate:
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def STriw_pred_V4 : STInst2<(outs),
(ins MEMri:$addr, PredRegs:$src1),
"Error; should not emit",
@@ -791,7 +1086,7 @@ def STriw_pred_V4 : STInst2<(outs),
let AddedComplexity = 6 in
def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)),
- (STriw_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
+ (S4_storeiri_io IntRegs:$src1, 0, s8ExtPred:$src2)>,
Requires<[HasV4T]>;
// memw(Rx++#s4:2)=Rt
@@ -809,170 +1104,249 @@ def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)),
// NV/ST +
//===----------------------------------------------------------------------===//
-// multiclass for new-value store instructions with base + immediate offset.
-//
-multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC,
- Operand predImmOp, bit isNot, bit isPredNew> {
- let isPredicatedNew = isPredNew in
- def NAME#_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($src2+#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
-}
+let opNewValue = 2, opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in
+class T_store_io_nv <string mnemonic, RegisterClass RC,
+ Operand ImmOp, bits<2>MajOp>
+ : NVInst_V4 <(outs),
+ (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ mnemonic#"($src1+#$src2) = $src3.new",
+ [],"",ST_tc_st_SLOT0> {
+ bits<5> src1;
+ bits<13> src2; // Actual address offset
+ bits<3> src3;
+ bits<11> offsetBits; // Represents offset encoding
+
+ let opExtentBits = !if (!eq(mnemonic, "memb"), 11,
+ !if (!eq(mnemonic, "memh"), 12,
+ !if (!eq(mnemonic, "memw"), 13, 0)));
+
+ let opExtentAlign = !if (!eq(mnemonic, "memb"), 0,
+ !if (!eq(mnemonic, "memh"), 1,
+ !if (!eq(mnemonic, "memw"), 2, 0)));
+
+ let offsetBits = !if (!eq(mnemonic, "memb"), src2{10-0},
+ !if (!eq(mnemonic, "memh"), src2{11-1},
+ !if (!eq(mnemonic, "memw"), src2{12-2}, 0)));
+
+ let IClass = 0b1010;
+
+ let Inst{27} = 0b0;
+ let Inst{26-25} = offsetBits{10-9};
+ let Inst{24-21} = 0b1101;
+ let Inst{20-16} = src1;
+ let Inst{13} = offsetBits{8};
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = src3;
+ let Inst{7-0} = offsetBits{7-0};
+ }
-multiclass ST_Idxd_Pred_nv<string mnemonic, RegisterClass RC, Operand predImmOp,
- bit PredNot> {
- let isPredicatedFalse = PredNot in {
- defm _c#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 0>;
- // Predicate new
- defm _cdn#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 1>;
+let opExtendable = 2, opNewValue = 3, isPredicated = 1 in
+class T_pstore_io_nv <string mnemonic, RegisterClass RC, Operand predImmOp,
+ bits<2>MajOp, bit PredNot, bit isPredNew>
+ : NVInst_V4 <(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC:$src4),
+ !if(PredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2+#$src3) = $src4.new",
+ [],"",V2LDST_tc_st_SLOT0> {
+ bits<2> src1;
+ bits<5> src2;
+ bits<9> src3;
+ bits<3> src4;
+ bits<6> offsetBits; // Represents offset encoding
+
+ let isPredicatedNew = isPredNew;
+ let isPredicatedFalse = PredNot;
+ let opExtentBits = !if (!eq(mnemonic, "memb"), 6,
+ !if (!eq(mnemonic, "memh"), 7,
+ !if (!eq(mnemonic, "memw"), 8, 0)));
+
+ let opExtentAlign = !if (!eq(mnemonic, "memb"), 0,
+ !if (!eq(mnemonic, "memh"), 1,
+ !if (!eq(mnemonic, "memw"), 2, 0)));
+
+ let offsetBits = !if (!eq(mnemonic, "memb"), src3{5-0},
+ !if (!eq(mnemonic, "memh"), src3{6-1},
+ !if (!eq(mnemonic, "memw"), src3{7-2}, 0)));
+
+ let IClass = 0b0100;
+
+ let Inst{27} = 0b0;
+ let Inst{26} = PredNot;
+ let Inst{25} = isPredNew;
+ let Inst{24-21} = 0b0101;
+ let Inst{20-16} = src2;
+ let Inst{13} = offsetBits{5};
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = src4;
+ let Inst{7-3} = offsetBits{4-0};
+ let Inst{2} = 0b0;
+ let Inst{1-0} = src1;
}
-}
-let mayStore = 1, isNVStore = 1, neverHasSideEffects = 1, isExtendable = 1 in
+// multiclass for new-value store instructions with base + immediate offset.
+//
+let mayStore = 1, isNVStore = 1, isNewValue = 1, hasSideEffects = 0,
+ isExtendable = 1 in
multiclass ST_Idxd_nv<string mnemonic, string CextOp, RegisterClass RC,
- Operand ImmOp, Operand predImmOp, bits<5> ImmBits,
- bits<5> PredImmBits> {
+ Operand ImmOp, Operand predImmOp, bits<2> MajOp> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
- let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
- isPredicable = 1 in
- def NAME#_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
- mnemonic#"($src1+#$src2) = $src3.new",
- []>,
- Requires<[HasV4T]>;
-
- let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits,
- isPredicated = 1 in {
- defm Pt : ST_Idxd_Pred_nv<mnemonic, RC, predImmOp, 0>;
- defm NotPt : ST_Idxd_Pred_nv<mnemonic, RC, predImmOp, 1>;
- }
+ def S2_#NAME#new_io : T_store_io_nv <mnemonic, RC, ImmOp, MajOp>;
+ // Predicated
+ def S2_p#NAME#newt_io :T_pstore_io_nv <mnemonic, RC, predImmOp, MajOp, 0, 0>;
+ def S2_p#NAME#newf_io :T_pstore_io_nv <mnemonic, RC, predImmOp, MajOp, 1, 0>;
+ // Predicated new
+ def S4_p#NAME#newtnew_io :T_pstore_io_nv <mnemonic, RC, predImmOp,
+ MajOp, 0, 1>;
+ def S4_p#NAME#newfnew_io :T_pstore_io_nv <mnemonic, RC, predImmOp,
+ MajOp, 1, 1>;
}
}
-let addrMode = BaseImmOffset, validSubTargets = HasV4SubT in {
+let addrMode = BaseImmOffset, InputType = "imm", isCodeGenOnly = 0 in {
let accessSize = ByteAccess in
- defm STrib_indexed: ST_Idxd_nv<"memb", "STrib", IntRegs, s11_0Ext,
- u6_0Ext, 11, 6>, AddrModeRel;
+ defm storerb: ST_Idxd_nv<"memb", "STrib", IntRegs, s11_0Ext,
+ u6_0Ext, 0b00>, AddrModeRel;
- let accessSize = HalfWordAccess in
- defm STrih_indexed: ST_Idxd_nv<"memh", "STrih", IntRegs, s11_1Ext,
- u6_1Ext, 12, 7>, AddrModeRel;
+ let accessSize = HalfWordAccess, opExtentAlign = 1 in
+ defm storerh: ST_Idxd_nv<"memh", "STrih", IntRegs, s11_1Ext,
+ u6_1Ext, 0b01>, AddrModeRel;
- let accessSize = WordAccess in
- defm STriw_indexed: ST_Idxd_nv<"memw", "STriw", IntRegs, s11_2Ext,
- u6_2Ext, 13, 8>, AddrModeRel;
+ let accessSize = WordAccess, opExtentAlign = 2 in
+ defm storeri: ST_Idxd_nv<"memw", "STriw", IntRegs, s11_2Ext,
+ u6_2Ext, 0b10>, AddrModeRel;
}
-// multiclass for new-value store instructions with base + immediate offset.
-// and MEMri operand.
-multiclass ST_MEMri_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
- bit isPredNew> {
- let isPredicatedNew = isPredNew in
- def NAME#_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, RC: $src2),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-}
-
-multiclass ST_MEMri_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
- let isPredicatedFalse = PredNot in {
- defm _c#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 0>;
+//===----------------------------------------------------------------------===//
+// Template class for non-predicated post increment .new stores
+// mem[bhwd](Rx++#s4:[0123])=Nt.new
+//===----------------------------------------------------------------------===//
+let isPredicable = 1, hasSideEffects = 0, validSubTargets = HasV4SubT,
+ addrMode = PostInc, isNVStore = 1, isNewValue = 1, opNewValue = 3 in
+class T_StorePI_nv <string mnemonic, Operand ImmOp, bits<2> MajOp >
+ : NVInstPI_V4 <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ImmOp:$offset, IntRegs:$src2),
+ mnemonic#"($src1++#$offset) = $src2.new",
+ [], "$src1 = $_dst_">,
+ AddrModeRel {
+ bits<5> src1;
+ bits<3> src2;
+ bits<7> offset;
+ bits<4> offsetBits;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+ !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+ /* s4_0Imm */ offset{3-0}));
+ let IClass = 0b1010;
+
+ let Inst{27-21} = 0b1011101;
+ let Inst{20-16} = src1;
+ let Inst{13} = 0b0;
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = src2;
+ let Inst{7} = 0b0;
+ let Inst{6-3} = offsetBits;
+ let Inst{1} = 0b0;
+ }
- // Predicate new
- defm _cdn#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 1>;
+//===----------------------------------------------------------------------===//
+// Template class for predicated post increment .new stores
+// if([!]Pv[.new]) mem[bhwd](Rx++#s4:[0123])=Nt.new
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, hasSideEffects = 0, validSubTargets = HasV4SubT,
+ addrMode = PostInc, isNVStore = 1, isNewValue = 1, opNewValue = 4 in
+class T_StorePI_nv_pred <string mnemonic, Operand ImmOp,
+ bits<2> MajOp, bit isPredNot, bit isPredNew >
+ : NVInstPI_V4 <(outs IntRegs:$_dst_),
+ (ins PredRegs:$src1, IntRegs:$src2,
+ ImmOp:$offset, IntRegs:$src3),
+ !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2++#$offset) = $src3.new",
+ [], "$src2 = $_dst_">,
+ AddrModeRel {
+ bits<2> src1;
+ bits<5> src2;
+ bits<3> src3;
+ bits<7> offset;
+ bits<4> offsetBits;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+ !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+ /* s4_0Imm */ offset{3-0}));
+ let isPredicatedNew = isPredNew;
+ let isPredicatedFalse = isPredNot;
+
+ let IClass = 0b1010;
+
+ let Inst{27-21} = 0b1011101;
+ let Inst{20-16} = src2;
+ let Inst{13} = 0b1;
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = src3;
+ let Inst{7} = isPredNew;
+ let Inst{6-3} = offsetBits;
+ let Inst{2} = isPredNot;
+ let Inst{1-0} = src1;
}
-}
-let mayStore = 1, isNVStore = 1, isExtendable = 1, neverHasSideEffects = 1 in
-multiclass ST_MEMri_nv<string mnemonic, string CextOp, RegisterClass RC,
- bits<5> ImmBits, bits<5> PredImmBits> {
+multiclass ST_PostInc_Pred_nv<string mnemonic, Operand ImmOp,
+ bits<2> MajOp, bit PredNot> {
+ def _pi : T_StorePI_nv_pred <mnemonic, ImmOp, MajOp, PredNot, 0>;
- let CextOpcode = CextOp, BaseOpcode = CextOp in {
- let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
- isPredicable = 1 in
- def NAME#_nv_V4 : NVInst_V4<(outs),
- (ins MEMri:$addr, RC:$src),
- mnemonic#"($addr) = $src.new",
- []>,
- Requires<[HasV4T]>;
+ // Predicate new
+ def new_pi : T_StorePI_nv_pred <mnemonic, ImmOp, MajOp, PredNot, 1>;
+}
- let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits,
- neverHasSideEffects = 1, isPredicated = 1 in {
- defm Pt : ST_MEMri_Pred_nv<mnemonic, RC, 0>;
- defm NotPt : ST_MEMri_Pred_nv<mnemonic, RC, 1>;
- }
+multiclass ST_PostInc_nv<string mnemonic, string BaseOp, Operand ImmOp,
+ bits<2> MajOp> {
+ let BaseOpcode = "POST_"#BaseOp in {
+ def S2_#NAME#_pi : T_StorePI_nv <mnemonic, ImmOp, MajOp>;
+
+ // Predicated
+ defm S2_p#NAME#t : ST_PostInc_Pred_nv <mnemonic, ImmOp, MajOp, 0>;
+ defm S2_p#NAME#f : ST_PostInc_Pred_nv <mnemonic, ImmOp, MajOp, 1>;
}
}
-let addrMode = BaseImmOffset, isMEMri = "true", validSubTargets = HasV4SubT,
-mayStore = 1 in {
- let accessSize = ByteAccess in
- defm STrib: ST_MEMri_nv<"memb", "STrib", IntRegs, 11, 6>, AddrModeRel;
+let accessSize = ByteAccess, isCodeGenOnly = 0 in
+defm storerbnew: ST_PostInc_nv <"memb", "STrib", s4_0Imm, 0b00>;
- let accessSize = HalfWordAccess in
- defm STrih: ST_MEMri_nv<"memh", "STrih", IntRegs, 12, 7>, AddrModeRel;
+let accessSize = HalfWordAccess, isCodeGenOnly = 0 in
+defm storerhnew: ST_PostInc_nv <"memh", "STrih", s4_1Imm, 0b01>;
- let accessSize = WordAccess in
- defm STriw: ST_MEMri_nv<"memw", "STriw", IntRegs, 13, 8>, AddrModeRel;
-}
+let accessSize = WordAccess, isCodeGenOnly = 0 in
+defm storerinew: ST_PostInc_nv <"memw", "STriw", s4_2Imm, 0b10>;
//===----------------------------------------------------------------------===//
-// Post increment store
-// mem[bhwd](Rx++#s4:[0123])=Nt.new
+// Template class for post increment .new stores with register offset
//===----------------------------------------------------------------------===//
-
-multiclass ST_PostInc_Pbase_nv<string mnemonic, RegisterClass RC, Operand ImmOp,
- bit isNot, bit isPredNew> {
- let isPredicatedNew = isPredNew in
- def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($src2++#$offset) = $src3.new",
- [],
- "$src2 = $dst">,
- Requires<[HasV4T]>;
-}
-
-multiclass ST_PostInc_Pred_nv<string mnemonic, RegisterClass RC,
- Operand ImmOp, bit PredNot> {
- let isPredicatedFalse = PredNot in {
- defm _c#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 0>;
- // Predicate new
- let Predicates = [HasV4T], validSubTargets = HasV4SubT in
- defm _cdn#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 1>;
- }
-}
-
-let hasCtrlDep = 1, isNVStore = 1, neverHasSideEffects = 1 in
-multiclass ST_PostInc_nv<string mnemonic, string BaseOp, RegisterClass RC,
- Operand ImmOp> {
-
- let BaseOpcode = "POST_"#BaseOp in {
- let isPredicable = 1 in
- def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins IntRegs:$src1, ImmOp:$offset, RC:$src2),
- mnemonic#"($src1++#$offset) = $src2.new",
- [],
- "$src1 = $dst">,
- Requires<[HasV4T]>;
-
- let isPredicated = 1 in {
- defm Pt : ST_PostInc_Pred_nv<mnemonic, RC, ImmOp, 0 >;
- defm NotPt : ST_PostInc_Pred_nv<mnemonic, RC, ImmOp, 1 >;
- }
+let isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3 in
+class T_StorePI_RegNV <string mnemonic, bits<2> MajOp, MemAccessSize AccessSz>
+ : NVInstPI_V4 <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ModRegs:$src2, IntRegs:$src3),
+ #mnemonic#"($src1++$src2) = $src3.new",
+ [], "$src1 = $_dst_"> {
+ bits<5> src1;
+ bits<1> src2;
+ bits<3> src3;
+ let accessSize = AccessSz;
+
+ let IClass = 0b1010;
+
+ let Inst{27-21} = 0b1101101;
+ let Inst{20-16} = src1;
+ let Inst{13} = src2;
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = src3;
+ let Inst{7} = 0b0;
}
-}
-let addrMode = PostInc, validSubTargets = HasV4SubT in {
-defm POST_STbri: ST_PostInc_nv <"memb", "STrib", IntRegs, s4_0Imm>, AddrModeRel;
-defm POST_SThri: ST_PostInc_nv <"memh", "STrih", IntRegs, s4_1Imm>, AddrModeRel;
-defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel;
+let isCodeGenOnly = 0 in {
+def S2_storerbnew_pr : T_StorePI_RegNV<"memb", 0b00, ByteAccess>;
+def S2_storerhnew_pr : T_StorePI_RegNV<"memh", 0b01, HalfWordAccess>;
+def S2_storerinew_pr : T_StorePI_RegNV<"memw", 0b10, WordAccess>;
}
// memb(Rx++#s4:0:circ(Mu))=Nt.new
@@ -1002,7 +1376,8 @@ defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel;
// operands.
//===----------------------------------------------------------------------===//
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11 in
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11,
+ opExtentAlign = 2 in
class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum,
bit isNegCond, bit isTak>
: NVInst_V4<(outs),
@@ -1010,8 +1385,7 @@ class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum,
"if ("#!if(isNegCond, "!","")#mnemonic#
"($src1"#!if(!eq(NvOpNum, 0),".new, ",", ")#
"$src2"#!if(!eq(NvOpNum, 1),".new))","))")#" jump:"
- #!if(isTak, "t","nt")#" $offset",
- []>, Requires<[HasV4T]> {
+ #!if(isTak, "t","nt")#" $offset", []> {
bits<5> src1;
bits<5> src2;
@@ -1020,8 +1394,8 @@ class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum,
bits<11> offset;
let isTaken = isTak;
- let isBrTaken = !if(isTaken, "true", "false");
let isPredicatedFalse = isNegCond;
+ let opNewValue{0} = NvOpNum;
let Ns = !if(!eq(NvOpNum, 0), src1{2-0}, src2{2-0});
let RegOp = !if(!eq(NvOpNum, 0), src2, src1);
@@ -1064,7 +1438,8 @@ multiclass NVJrr_base<string mnemonic, string BaseOp, bits<3> majOp,
// if ([!]cmp.gtu(Rt,Ns.new)) jump:[n]t #r9:2
let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
- Defs = [PC], neverHasSideEffects = 1, validSubTargets = HasV4SubT in {
+ Defs = [PC], hasSideEffects = 0, validSubTargets = HasV4SubT,
+ isCodeGenOnly = 0 in {
defm CMPEQrr : NVJrr_base<"cmp.eq", "CMPEQ", 0b000, 0>, PredRel;
defm CMPGTrr : NVJrr_base<"cmp.gt", "CMPGT", 0b001, 0>, PredRel;
defm CMPGTUrr : NVJrr_base<"cmp.gtu", "CMPGTU", 0b010, 0>, PredRel;
@@ -1077,18 +1452,18 @@ let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
// with a register and an unsigned immediate (U5) operand.
//===----------------------------------------------------------------------===//
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11 in
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11,
+ opExtentAlign = 2 in
class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond,
bit isTak>
: NVInst_V4<(outs),
(ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
"if ("#!if(isNegCond, "!","")#mnemonic#"($src1.new, #$src2)) jump:"
- #!if(isTak, "t","nt")#" $offset",
- []>, Requires<[HasV4T]> {
+ #!if(isTak, "t","nt")#" $offset", []> {
let isTaken = isTak;
let isPredicatedFalse = isNegCond;
- let isBrTaken = !if(isTaken, "true", "false");
+ let isTaken = isTak;
bits<3> src1;
bits<5> src2;
@@ -1124,7 +1499,8 @@ multiclass NVJri_base<string mnemonic, string BaseOp, bits<3> majOp> {
// if ([!]cmp.gtu(Ns.new,#U5)) jump:[n]t #r9:2
let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
- Defs = [PC], neverHasSideEffects = 1, validSubTargets = HasV4SubT in {
+ Defs = [PC], hasSideEffects = 0, validSubTargets = HasV4SubT,
+ isCodeGenOnly = 0 in {
defm CMPEQri : NVJri_base<"cmp.eq", "CMPEQ", 0b000>, PredRel;
defm CMPGTri : NVJri_base<"cmp.gt", "CMPGT", 0b001>, PredRel;
defm CMPGTUri : NVJri_base<"cmp.gtu", "CMPGTU", 0b010>, PredRel;
@@ -1135,19 +1511,19 @@ let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
// with a register and an hardcoded 0/-1 immediate value.
//===----------------------------------------------------------------------===//
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 11 in
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 11,
+ opExtentAlign = 2 in
class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal,
bit isNegCond, bit isTak>
: NVInst_V4<(outs),
(ins IntRegs:$src1, brtarget:$offset),
"if ("#!if(isNegCond, "!","")#mnemonic
#"($src1.new, #"#ImmVal#")) jump:"
- #!if(isTak, "t","nt")#" $offset",
- []>, Requires<[HasV4T]> {
+ #!if(isTak, "t","nt")#" $offset", []> {
let isTaken = isTak;
let isPredicatedFalse = isNegCond;
- let isBrTaken = !if(isTaken, "true", "false");
+ let isTaken = isTak;
bits<3> src1;
bits<11> offset;
@@ -1172,8 +1548,8 @@ multiclass NVJ_ConstImm_cond<string mnemonic, bits<3> majOp, string ImmVal,
multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp,
string ImmVal> {
let BaseOpcode = BaseOp#_NVJ_ConstImm in {
- defm _t_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True cond
- defm _f_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False Cond
+ defm _t_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True
+ defm _f_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False
}
}
@@ -1182,266 +1558,328 @@ multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp,
// if ([!]cmp.gt(Ns.new,#-1)) jump:[n]t #r9:2
let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator=1,
- Defs = [PC], neverHasSideEffects = 1 in {
+ Defs = [PC], hasSideEffects = 0, isCodeGenOnly = 0 in {
defm TSTBIT0 : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel;
defm CMPEQn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ", 0b100, "-1">, PredRel;
defm CMPGTn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT", 0b101, "-1">, PredRel;
}
+// J4_hintjumpr: Hint indirect conditional jump.
+let isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0, isCodeGenOnly = 0 in
+def J4_hintjumpr: JRInst <
+ (outs),
+ (ins IntRegs:$Rs),
+ "hintjr($Rs)"> {
+ bits<5> Rs;
+ let IClass = 0b0101;
+ let Inst{27-21} = 0b0010101;
+ let Inst{20-16} = Rs;
+ }
+
//===----------------------------------------------------------------------===//
-// XTYPE/ALU +
+// NV/J -
//===----------------------------------------------------------------------===//
-// Add and accumulate.
-// Rd=add(Rs,add(Ru,#s6))
-let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 6,
-validSubTargets = HasV4SubT in
-def ADDr_ADDri_V4 : MInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, s6Ext:$src3),
- "$dst = add($src1, add($src2, #$src3))",
- [(set (i32 IntRegs:$dst),
- (add (i32 IntRegs:$src1), (add (i32 IntRegs:$src2),
- s6_16ExtPred:$src3)))]>,
- Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// CR +
+//===----------------------------------------------------------------------===//
-// Rd=add(Rs,sub(#s6,Ru))
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 6,
-validSubTargets = HasV4SubT in
-def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s6Ext:$src2, IntRegs:$src3),
- "$dst = add($src1, sub(#$src2, $src3))",
- [(set (i32 IntRegs:$dst),
- (add (i32 IntRegs:$src1), (sub s6_10ExtPred:$src2,
- (i32 IntRegs:$src3))))]>,
- Requires<[HasV4T]>;
+// PC-relative add
+let hasNewValue = 1, isExtendable = 1, opExtendable = 1,
+ isExtentSigned = 0, opExtentBits = 6, hasSideEffects = 0,
+ Uses = [PC], validSubTargets = HasV4SubT in
+def C4_addipc : CRInst <(outs IntRegs:$Rd), (ins u6Ext:$u6),
+ "$Rd = add(pc, #$u6)", [], "", CR_tc_2_SLOT3 > {
+ bits<5> Rd;
+ bits<6> u6;
+
+ let IClass = 0b0110;
+ let Inst{27-16} = 0b101001001001;
+ let Inst{12-7} = u6;
+ let Inst{4-0} = Rd;
+ }
-// Generates the same instruction as ADDr_SUBri_V4 but matches different
-// pattern.
-// Rd=add(Rs,sub(#s6,Ru))
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 6,
-validSubTargets = HasV4SubT in
-def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s6Ext:$src2, IntRegs:$src3),
- "$dst = add($src1, sub(#$src2, $src3))",
- [(set (i32 IntRegs:$dst),
- (sub (add (i32 IntRegs:$src1), s6_10ExtPred:$src2),
- (i32 IntRegs:$src3)))]>,
- Requires<[HasV4T]>;
-// Add or subtract doublewords with carry.
-//TODO:
-// Rdd=add(Rss,Rtt,Px):carry
-//TODO:
-// Rdd=sub(Rss,Rtt,Px):carry
+let hasSideEffects = 0 in
+class T_LOGICAL_3OP<string MnOp1, string MnOp2, bits<2> OpBits, bit IsNeg>
+ : CRInst<(outs PredRegs:$Pd),
+ (ins PredRegs:$Ps, PredRegs:$Pt, PredRegs:$Pu),
+ "$Pd = " # MnOp1 # "($Ps, " # MnOp2 # "($Pt, " #
+ !if (IsNeg,"!","") # "$Pu))",
+ [], "", CR_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<2> Ps;
+ bits<2> Pt;
+ bits<2> Pu;
+
+ let IClass = 0b0110;
+ let Inst{27-24} = 0b1011;
+ let Inst{23} = IsNeg;
+ let Inst{22-21} = OpBits;
+ let Inst{20} = 0b1;
+ let Inst{17-16} = Ps;
+ let Inst{13} = 0b0;
+ let Inst{9-8} = Pt;
+ let Inst{7-6} = Pu;
+ let Inst{1-0} = Pd;
+}
+let isCodeGenOnly = 0 in {
+def C4_and_and : T_LOGICAL_3OP<"and", "and", 0b00, 0>;
+def C4_and_or : T_LOGICAL_3OP<"and", "or", 0b01, 0>;
+def C4_or_and : T_LOGICAL_3OP<"or", "and", 0b10, 0>;
+def C4_or_or : T_LOGICAL_3OP<"or", "or", 0b11, 0>;
+def C4_and_andn : T_LOGICAL_3OP<"and", "and", 0b00, 1>;
+def C4_and_orn : T_LOGICAL_3OP<"and", "or", 0b01, 1>;
+def C4_or_andn : T_LOGICAL_3OP<"or", "and", 0b10, 1>;
+def C4_or_orn : T_LOGICAL_3OP<"or", "or", 0b11, 1>;
+}
-// Logical doublewords.
-// Rdd=and(Rtt,~Rss)
-let validSubTargets = HasV4SubT in
-def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, DoubleRegs:$src2),
- "$dst = and($src1, ~$src2)",
- [(set (i64 DoubleRegs:$dst), (and (i64 DoubleRegs:$src1),
- (not (i64 DoubleRegs:$src2))))]>,
- Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// CR -
+//===----------------------------------------------------------------------===//
-// Rdd=or(Rtt,~Rss)
-let validSubTargets = HasV4SubT in
-def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, DoubleRegs:$src2),
- "$dst = or($src1, ~$src2)",
- [(set (i64 DoubleRegs:$dst),
- (or (i64 DoubleRegs:$src1), (not (i64 DoubleRegs:$src2))))]>,
- Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// XTYPE/ALU +
+//===----------------------------------------------------------------------===//
+// Logical with-not instructions.
+let validSubTargets = HasV4SubT, isCodeGenOnly = 0 in {
+ def A4_andnp : T_ALU64_logical<"and", 0b001, 1, 0, 1>;
+ def A4_ornp : T_ALU64_logical<"or", 0b011, 1, 0, 1>;
+}
-// Logical-logical doublewords.
-// Rxx^=xor(Rss,Rtt)
-let validSubTargets = HasV4SubT in
-def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- "$dst ^= xor($src2, $src3)",
- [(set (i64 DoubleRegs:$dst),
- (xor (i64 DoubleRegs:$src1), (xor (i64 DoubleRegs:$src2),
- (i64 DoubleRegs:$src3))))],
- "$src1 = $dst">,
- Requires<[HasV4T]>;
+let hasNewValue = 1, hasSideEffects = 0, isCodeGenOnly = 0 in
+def S4_parity: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-21} = 0b0101111;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{4-0} = Rd;
+}
+// Add and accumulate.
+// Rd=add(Rs,add(Ru,#s6))
+let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 6,
+ opExtendable = 3, isCodeGenOnly = 0 in
+def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$Ru, s6Ext:$s6),
+ "$Rd = add($Rs, add($Ru, #$s6))" ,
+ [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs),
+ (add (i32 IntRegs:$Ru), s6_16ExtPred:$s6)))],
+ "", ALU64_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Ru;
+ bits<6> s6;
+
+ let IClass = 0b1101;
+
+ let Inst{27-23} = 0b10110;
+ let Inst{22-21} = s6{5-4};
+ let Inst{20-16} = Rs;
+ let Inst{13} = s6{3};
+ let Inst{12-8} = Rd;
+ let Inst{7-5} = s6{2-0};
+ let Inst{4-0} = Ru;
+ }
+let isExtentSigned = 1, hasSideEffects = 0, hasNewValue = 1, isExtendable = 1,
+ opExtentBits = 6, opExtendable = 2, isCodeGenOnly = 0 in
+def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, s6Ext:$s6, IntRegs:$Ru),
+ "$Rd = add($Rs, sub(#$s6, $Ru))",
+ [], "", ALU64_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<6> s6;
+ bits<5> Ru;
+
+ let IClass = 0b1101;
+
+ let Inst{27-23} = 0b10111;
+ let Inst{22-21} = s6{5-4};
+ let Inst{20-16} = Rs;
+ let Inst{13} = s6{3};
+ let Inst{12-8} = Rd;
+ let Inst{7-5} = s6{2-0};
+ let Inst{4-0} = Ru;
+ }
+
+// Extract bitfield
+// Rdd=extract(Rss,#u6,#U6)
+// Rdd=extract(Rss,Rtt)
+// Rd=extract(Rs,Rtt)
+// Rd=extract(Rs,#u5,#U5)
+
+let isCodeGenOnly = 0 in {
+def S4_extractp_rp : T_S3op_64 < "extract", 0b11, 0b100, 0>;
+def S4_extractp : T_S2op_extract <"extract", 0b1010, DoubleRegs, u6Imm>;
+}
-// Logical-logical words.
-// Rx=or(Ru,and(Rx,#s10))
-let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10,
-validSubTargets = HasV4SubT in
-def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3),
- "$dst = or($src1, and($src2, #$src3))",
- [(set (i32 IntRegs:$dst),
- (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
- s10ExtPred:$src3)))],
- "$src2 = $dst">,
- Requires<[HasV4T]>;
+let hasNewValue = 1, isCodeGenOnly = 0 in {
+ def S4_extract_rp : T_S3op_extract<"extract", 0b01>;
+ def S4_extract : T_S2op_extract <"extract", 0b1101, IntRegs, u5Imm>;
+}
-// Rx[&|^]=and(Rs,Rt)
-// Rx&=and(Rs,Rt)
-let validSubTargets = HasV4SubT in
-def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
- "$dst &= and($src2, $src3)",
- [(set (i32 IntRegs:$dst),
- (and (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
- (i32 IntRegs:$src3))))],
- "$src1 = $dst">,
- Requires<[HasV4T]>;
+let Itinerary = M_tc_3x_SLOT23, Defs = [USR_OVF], isCodeGenOnly = 0 in {
+ def M4_mac_up_s1_sat: T_MType_acc_rr<"+= mpy", 0b011, 0b000, 0, [], 0, 1, 1>;
+ def M4_nac_up_s1_sat: T_MType_acc_rr<"-= mpy", 0b011, 0b001, 0, [], 0, 1, 1>;
+}
-// Rx|=and(Rs,Rt)
-let validSubTargets = HasV4SubT, CextOpcode = "ORr_ANDr", InputType = "reg" in
-def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
- "$dst |= and($src2, $src3)",
- [(set (i32 IntRegs:$dst),
- (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
- (i32 IntRegs:$src3))))],
- "$src1 = $dst">,
- Requires<[HasV4T]>, ImmRegRel;
+// Logical xor with xor accumulation.
+// Rxx^=xor(Rss,Rtt)
+let hasSideEffects = 0, isCodeGenOnly = 0 in
+def M4_xor_xacc
+ : SInst <(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Rxx ^= xor($Rss, $Rtt)",
+ [(set (i64 DoubleRegs:$Rxx),
+ (xor (i64 DoubleRegs:$dst2), (xor (i64 DoubleRegs:$Rss),
+ (i64 DoubleRegs:$Rtt))))],
+ "$dst2 = $Rxx", S_3op_tc_1_SLOT23> {
+ bits<5> Rxx;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1100;
+
+ let Inst{27-23} = 0b10101;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ let Inst{4-0} = Rxx;
+ }
+
+// Split bitfield
+let isCodeGenOnly = 0 in
+def A4_bitspliti : T_S2op_2_di <"bitsplit", 0b110, 0b100>;
-// Rx^=and(Rs,Rt)
-let validSubTargets = HasV4SubT in
-def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
- "$dst ^= and($src2, $src3)",
- [(set (i32 IntRegs:$dst),
- (xor (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
- (i32 IntRegs:$src3))))],
- "$src1 = $dst">,
- Requires<[HasV4T]>;
+// Arithmetic/Convergent round
+let isCodeGenOnly = 0 in
+def A4_cround_ri : T_S2op_2_ii <"cround", 0b111, 0b000>;
-// Rx[&|^]=and(Rs,~Rt)
-// Rx&=and(Rs,~Rt)
-let validSubTargets = HasV4SubT in
-def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
- "$dst &= and($src2, ~$src3)",
- [(set (i32 IntRegs:$dst),
- (and (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
- (not (i32 IntRegs:$src3)))))],
- "$src1 = $dst">,
- Requires<[HasV4T]>;
+let isCodeGenOnly = 0 in
+def A4_round_ri : T_S2op_2_ii <"round", 0b111, 0b100>;
-// Rx|=and(Rs,~Rt)
-let validSubTargets = HasV4SubT in
-def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
- "$dst |= and($src2, ~$src3)",
- [(set (i32 IntRegs:$dst),
- (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
- (not (i32 IntRegs:$src3)))))],
- "$src1 = $dst">,
- Requires<[HasV4T]>;
+let Defs = [USR_OVF], isCodeGenOnly = 0 in
+def A4_round_ri_sat : T_S2op_2_ii <"round", 0b111, 0b110, 1>;
-// Rx^=and(Rs,~Rt)
-let validSubTargets = HasV4SubT in
-def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
- "$dst ^= and($src2, ~$src3)",
- [(set (i32 IntRegs:$dst),
- (xor (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
- (not (i32 IntRegs:$src3)))))],
- "$src1 = $dst">,
- Requires<[HasV4T]>;
+// Logical-logical words.
+// Compound or-and -- Rx=or(Ru,and(Rx,#s10))
+let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 10,
+ opExtendable = 3, isCodeGenOnly = 0 in
+def S4_or_andix:
+ ALU64Inst<(outs IntRegs:$Rx),
+ (ins IntRegs:$Ru, IntRegs:$_src_, s10Ext:$s10),
+ "$Rx = or($Ru, and($_src_, #$s10))" ,
+ [(set (i32 IntRegs:$Rx),
+ (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s10ExtPred:$s10)))] ,
+ "$_src_ = $Rx", ALU64_tc_2_SLOT23> {
+ bits<5> Rx;
+ bits<5> Ru;
+ bits<10> s10;
+
+ let IClass = 0b1101;
+
+ let Inst{27-22} = 0b101001;
+ let Inst{20-16} = Rx;
+ let Inst{21} = s10{9};
+ let Inst{13-5} = s10{8-0};
+ let Inst{4-0} = Ru;
+ }
-// Rx[&|^]=or(Rs,Rt)
-// Rx&=or(Rs,Rt)
-let validSubTargets = HasV4SubT in
-def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
- "$dst &= or($src2, $src3)",
- [(set (i32 IntRegs:$dst),
- (and (i32 IntRegs:$src1), (or (i32 IntRegs:$src2),
- (i32 IntRegs:$src3))))],
- "$src1 = $dst">,
- Requires<[HasV4T]>;
+// Miscellaneous ALU64 instructions.
+//
+let hasNewValue = 1, hasSideEffects = 0, isCodeGenOnly = 0 in
+def A4_modwrapu: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = modwrap($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-21} = 0b0011111;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = 0b111;
+ let Inst{4-0} = Rd;
+}
-// Rx|=or(Rs,Rt)
-let validSubTargets = HasV4SubT, CextOpcode = "ORr_ORr", InputType = "reg" in
-def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
- "$dst |= or($src2, $src3)",
- [(set (i32 IntRegs:$dst),
- (or (i32 IntRegs:$src1), (or (i32 IntRegs:$src2),
- (i32 IntRegs:$src3))))],
- "$src1 = $dst">,
- Requires<[HasV4T]>, ImmRegRel;
+let hasSideEffects = 0, isCodeGenOnly = 0 in
+def A4_bitsplit: ALU64Inst<(outs DoubleRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = bitsplit($Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-24} = 0b0100;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{4-0} = Rd;
+}
-// Rx^=or(Rs,Rt)
-let validSubTargets = HasV4SubT in
-def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
- "$dst ^= or($src2, $src3)",
- [(set (i32 IntRegs:$dst),
- (xor (i32 IntRegs:$src1), (or (i32 IntRegs:$src2),
- (i32 IntRegs:$src3))))],
- "$src1 = $dst">,
- Requires<[HasV4T]>;
+let isCodeGenOnly = 0 in {
+// Rx[&|]=xor(Rs,Rt)
+def M4_or_xor : T_MType_acc_rr < "|= xor", 0b110, 0b001, 0>;
+def M4_and_xor : T_MType_acc_rr < "&= xor", 0b010, 0b010, 0>;
-// Rx[&|^]=xor(Rs,Rt)
-// Rx&=xor(Rs,Rt)
-let validSubTargets = HasV4SubT in
-def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
- "$dst &= xor($src2, $src3)",
- [(set (i32 IntRegs:$dst),
- (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2),
- (i32 IntRegs:$src3))))],
- "$src1 = $dst">,
- Requires<[HasV4T]>;
+// Rx[&|^]=or(Rs,Rt)
+def M4_xor_or : T_MType_acc_rr < "^= or", 0b110, 0b011, 0>;
-// Rx|=xor(Rs,Rt)
-let validSubTargets = HasV4SubT in
-def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
- "$dst |= xor($src2, $src3)",
- [(set (i32 IntRegs:$dst),
- (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2),
- (i32 IntRegs:$src3))))],
- "$src1 = $dst">,
- Requires<[HasV4T]>;
+let CextOpcode = "ORr_ORr" in
+def M4_or_or : T_MType_acc_rr < "|= or", 0b110, 0b000, 0>;
+def M4_and_or : T_MType_acc_rr < "&= or", 0b010, 0b001, 0>;
-// Rx^=xor(Rs,Rt)
-let validSubTargets = HasV4SubT in
-def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
- "$dst ^= xor($src2, $src3)",
- [(set (i32 IntRegs:$dst),
- (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2),
- (i32 IntRegs:$src3))))],
- "$src1 = $dst">,
- Requires<[HasV4T]>;
+// Rx[&|^]=and(Rs,Rt)
+def M4_xor_and : T_MType_acc_rr < "^= and", 0b110, 0b010, 0>;
-// Rx|=and(Rs,#s10)
-let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10,
-validSubTargets = HasV4SubT, CextOpcode = "ORr_ANDr", InputType = "imm" in
-def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3),
- "$dst |= and($src2, #$src3)",
- [(set (i32 IntRegs:$dst),
- (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
- s10ExtPred:$src3)))],
- "$src1 = $dst">,
- Requires<[HasV4T]>, ImmRegRel;
+let CextOpcode = "ORr_ANDr" in
+def M4_or_and : T_MType_acc_rr < "|= and", 0b010, 0b011, 0>;
+def M4_and_and : T_MType_acc_rr < "&= and", 0b010, 0b000, 0>;
-// Rx|=or(Rs,#s10)
-let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10,
-validSubTargets = HasV4SubT, CextOpcode = "ORr_ORr", InputType = "imm" in
-def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3),
- "$dst |= or($src2, #$src3)",
- [(set (i32 IntRegs:$dst),
- (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
- s10ExtPred:$src3)))],
- "$src1 = $dst">,
- Requires<[HasV4T]>, ImmRegRel;
+// Rx[&|^]=and(Rs,~Rt)
+def M4_xor_andn : T_MType_acc_rr < "^= and", 0b001, 0b010, 0, [], 1>;
+def M4_or_andn : T_MType_acc_rr < "|= and", 0b001, 0b000, 0, [], 1>;
+def M4_and_andn : T_MType_acc_rr < "&= and", 0b001, 0b001, 0, [], 1>;
+}
+
+// Compound or-or and or-and
+let isExtentSigned = 1, InputType = "imm", hasNewValue = 1, isExtendable = 1,
+ opExtentBits = 10, opExtendable = 3 in
+class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode>
+ : MInst_acc <(outs IntRegs:$Rx),
+ (ins IntRegs:$src1, IntRegs:$Rs, s10Ext:$s10),
+ "$Rx |= "#mnemonic#"($Rs, #$s10)",
+ [(set (i32 IntRegs:$Rx), (or (i32 IntRegs:$src1),
+ (OpNode (i32 IntRegs:$Rs), s10ExtPred:$s10)))],
+ "$src1 = $Rx", ALU64_tc_2_SLOT23>, ImmRegRel {
+ bits<5> Rx;
+ bits<5> Rs;
+ bits<10> s10;
+
+ let IClass = 0b1101;
+
+ let Inst{27-24} = 0b1010;
+ let Inst{23-22} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{21} = s10{9};
+ let Inst{13-5} = s10{8-0};
+ let Inst{4-0} = Rx;
+ }
+let CextOpcode = "ORr_ANDr", isCodeGenOnly = 0 in
+def S4_or_andi : T_CompOR <"and", 0b00, and>;
+
+let CextOpcode = "ORr_ORr", isCodeGenOnly = 0 in
+def S4_or_ori : T_CompOR <"or", 0b10, or>;
// Modulo wrap
// Rd=modwrap(Rs,Rt)
@@ -1480,79 +1918,229 @@ def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst),
// XTYPE/ALU -
//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// XTYPE/BIT +
+//===----------------------------------------------------------------------===//
+
+// Bit reverse
+let isCodeGenOnly = 0 in
+def S2_brevp : T_S2op_3 <"brev", 0b11, 0b110>;
+
+// Bit count
+let isCodeGenOnly = 0 in {
+def S2_ct0p : T_COUNT_LEADING_64<"ct0", 0b111, 0b010>;
+def S2_ct1p : T_COUNT_LEADING_64<"ct1", 0b111, 0b100>;
+def S4_clbpnorm : T_COUNT_LEADING_64<"normamt", 0b011, 0b000>;
+}
+
+def: Pat<(i32 (trunc (cttz (i64 DoubleRegs:$Rss)))),
+ (S2_ct0p (i64 DoubleRegs:$Rss))>;
+def: Pat<(i32 (trunc (cttz (not (i64 DoubleRegs:$Rss))))),
+ (S2_ct1p (i64 DoubleRegs:$Rss))>;
+
+let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in
+def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6Imm:$s6),
+ "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> {
+ bits<5> Rs;
+ bits<5> Rd;
+ bits<6> s6;
+ let IClass = 0b1000;
+ let Inst{27-24} = 0b1100;
+ let Inst{23-21} = 0b001;
+ let Inst{20-16} = Rs;
+ let Inst{13-8} = s6;
+ let Inst{7-5} = 0b000;
+ let Inst{4-0} = Rd;
+}
+
+let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in
+def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6Imm:$s6),
+ "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> {
+ bits<5> Rs;
+ bits<5> Rd;
+ bits<6> s6;
+ let IClass = 0b1000;
+ let Inst{27-24} = 0b1000;
+ let Inst{23-21} = 0b011;
+ let Inst{20-16} = Rs;
+ let Inst{13-8} = s6;
+ let Inst{7-5} = 0b010;
+ let Inst{4-0} = Rd;
+}
+
+
+// Bit test/set/clear
+let isCodeGenOnly = 0 in {
+def S4_ntstbit_i : T_TEST_BIT_IMM<"!tstbit", 0b001>;
+def S4_ntstbit_r : T_TEST_BIT_REG<"!tstbit", 1>;
+}
+
+let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
+ def: Pat<(i1 (seteq (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)),
+ (S4_ntstbit_i (i32 IntRegs:$Rs), u5ImmPred:$u5)>;
+ def: Pat<(i1 (seteq (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)),
+ (S4_ntstbit_r (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))>;
+}
+
+// Add extra complexity to prefer these instructions over bitsset/bitsclr.
+// The reason is that tstbit/ntstbit can be folded into a compound instruction:
+// if ([!]tstbit(...)) jump ...
+let AddedComplexity = 100 in
+def: Pat<(i1 (setne (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))),
+ (S2_tstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>;
+
+let AddedComplexity = 100 in
+def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))),
+ (S4_ntstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>;
+
+let isCodeGenOnly = 0 in {
+def C4_nbitsset : T_TEST_BITS_REG<"!bitsset", 0b01, 1>;
+def C4_nbitsclr : T_TEST_BITS_REG<"!bitsclr", 0b10, 1>;
+def C4_nbitsclri : T_TEST_BITS_IMM<"!bitsclr", 0b10, 1>;
+}
+
+// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be
+// represented as a compare against "value & 0xFF", which is an exact match
+// for cmpb (same for cmph). The patterns below do not contain any additional
+// complexity that would make them preferable, and if they were actually used
+// instead of cmpb/cmph, they would result in a compare against register that
+// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF).
+def: Pat<(i1 (setne (and I32:$Rs, u6ImmPred:$u6), 0)),
+ (C4_nbitsclri I32:$Rs, u6ImmPred:$u6)>;
+def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)),
+ (C4_nbitsclr I32:$Rs, I32:$Rt)>;
+def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
+ (C4_nbitsset I32:$Rs, I32:$Rt)>;
+
+//===----------------------------------------------------------------------===//
+// XTYPE/BIT -
+//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// XTYPE/MPY +
//===----------------------------------------------------------------------===//
-// Multiply and user lower result.
-// Rd=add(#u6,mpyi(Rs,#U6))
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6,
-validSubTargets = HasV4SubT in
-def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst),
- (ins u6Ext:$src1, IntRegs:$src2, u6Imm:$src3),
- "$dst = add(#$src1, mpyi($src2, #$src3))",
- [(set (i32 IntRegs:$dst),
- (add (mul (i32 IntRegs:$src2), u6ImmPred:$src3),
- u6ExtPred:$src1))]>,
- Requires<[HasV4T]>;
+// Rd=add(#u6,mpyi(Rs,#U6)) -- Multiply by immed and add immed.
+
+let hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1,
+ isCodeGenOnly = 0 in
+def M4_mpyri_addi : MInst<(outs IntRegs:$Rd),
+ (ins u6Ext:$u6, IntRegs:$Rs, u6Imm:$U6),
+ "$Rd = add(#$u6, mpyi($Rs, #$U6))" ,
+ [(set (i32 IntRegs:$Rd),
+ (add (mul (i32 IntRegs:$Rs), u6ImmPred:$U6),
+ u6ExtPred:$u6))] ,"",ALU64_tc_3x_SLOT23> {
+ bits<5> Rd;
+ bits<6> u6;
+ bits<5> Rs;
+ bits<6> U6;
+
+ let IClass = 0b1101;
+
+ let Inst{27-24} = 0b1000;
+ let Inst{23} = U6{5};
+ let Inst{22-21} = u6{5-4};
+ let Inst{20-16} = Rs;
+ let Inst{13} = u6{3};
+ let Inst{12-8} = Rd;
+ let Inst{7-5} = u6{2-0};
+ let Inst{4-0} = U6{4-0};
+ }
+
+// Rd=add(#u6,mpyi(Rs,Rt))
+let CextOpcode = "ADD_MPY", InputType = "imm", hasNewValue = 1,
+ isExtendable = 1, opExtentBits = 6, opExtendable = 1, isCodeGenOnly = 0 in
+def M4_mpyrr_addi : MInst <(outs IntRegs:$Rd),
+ (ins u6Ext:$u6, IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = add(#$u6, mpyi($Rs, $Rt))" ,
+ [(set (i32 IntRegs:$Rd),
+ (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u6ExtPred:$u6))],
+ "", ALU64_tc_3x_SLOT23>, ImmRegRel {
+ bits<5> Rd;
+ bits<6> u6;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+
+ let Inst{27-23} = 0b01110;
+ let Inst{22-21} = u6{5-4};
+ let Inst{20-16} = Rs;
+ let Inst{13} = u6{3};
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = u6{2-0};
+ let Inst{4-0} = Rd;
+ }
+
+let hasNewValue = 1 in
+class T_AddMpy <bit MajOp, PatLeaf ImmPred, dag ins>
+ : ALU64Inst <(outs IntRegs:$dst), ins,
+ "$dst = add($src1, mpyi("#!if(MajOp,"$src3, #$src2))",
+ "#$src2, $src3))"),
+ [(set (i32 IntRegs:$dst),
+ (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), ImmPred:$src2)))],
+ "", ALU64_tc_3x_SLOT23> {
+ bits<5> dst;
+ bits<5> src1;
+ bits<8> src2;
+ bits<5> src3;
+
+ let IClass = 0b1101;
+
+ bits<6> ImmValue = !if(MajOp, src2{5-0}, src2{7-2});
+
+ let Inst{27-24} = 0b1111;
+ let Inst{23} = MajOp;
+ let Inst{22-21} = ImmValue{5-4};
+ let Inst{20-16} = src3;
+ let Inst{13} = ImmValue{3};
+ let Inst{12-8} = dst;
+ let Inst{7-5} = ImmValue{2-0};
+ let Inst{4-0} = src1;
+ }
+
+let isCodeGenOnly = 0 in
+def M4_mpyri_addr_u2 : T_AddMpy<0b0, u6_2ImmPred,
+ (ins IntRegs:$src1, u6_2Imm:$src2, IntRegs:$src3)>;
+
+let isExtendable = 1, opExtentBits = 6, opExtendable = 3,
+ CextOpcode = "ADD_MPY", InputType = "imm", isCodeGenOnly = 0 in
+def M4_mpyri_addr : T_AddMpy<0b1, u6ExtPred,
+ (ins IntRegs:$src1, IntRegs:$src3, u6Ext:$src2)>, ImmRegRel;
+
+// Rx=add(Ru,mpyi(Rx,Rs))
+let validSubTargets = HasV4SubT, CextOpcode = "ADD_MPY", InputType = "reg",
+ hasNewValue = 1, isCodeGenOnly = 0 in
+def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx),
+ (ins IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs),
+ "$Rx = add($Ru, mpyi($_src_, $Rs))",
+ [(set (i32 IntRegs:$Rx), (add (i32 IntRegs:$Ru),
+ (mul (i32 IntRegs:$_src_), (i32 IntRegs:$Rs))))],
+ "$_src_ = $Rx", M_tc_3x_SLOT23>, ImmRegRel {
+ bits<5> Rx;
+ bits<5> Ru;
+ bits<5> Rs;
+
+ let IClass = 0b1110;
+
+ let Inst{27-21} = 0b0011000;
+ let Inst{12-8} = Rx;
+ let Inst{4-0} = Ru;
+ let Inst{20-16} = Rs;
+ }
// Rd=add(##,mpyi(Rs,#U6))
def : Pat <(add (mul (i32 IntRegs:$src2), u6ImmPred:$src3),
(HexagonCONST32 tglobaladdr:$src1)),
- (i32 (ADDi_MPYri_V4 tglobaladdr:$src1, IntRegs:$src2,
+ (i32 (M4_mpyri_addi tglobaladdr:$src1, IntRegs:$src2,
u6ImmPred:$src3))>;
-// Rd=add(#u6,mpyi(Rs,Rt))
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6,
-validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in
-def ADDi_MPYrr_V4 : MInst<(outs IntRegs:$dst),
- (ins u6Ext:$src1, IntRegs:$src2, IntRegs:$src3),
- "$dst = add(#$src1, mpyi($src2, $src3))",
- [(set (i32 IntRegs:$dst),
- (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
- u6ExtPred:$src1))]>,
- Requires<[HasV4T]>, ImmRegRel;
-
// Rd=add(##,mpyi(Rs,Rt))
def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
(HexagonCONST32 tglobaladdr:$src1)),
- (i32 (ADDi_MPYrr_V4 tglobaladdr:$src1, IntRegs:$src2,
+ (i32 (M4_mpyrr_addi tglobaladdr:$src1, IntRegs:$src2,
IntRegs:$src3))>;
-// Rd=add(Ru,mpyi(#u6:2,Rs))
-let validSubTargets = HasV4SubT in
-def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, u6Imm:$src2, IntRegs:$src3),
- "$dst = add($src1, mpyi(#$src2, $src3))",
- [(set (i32 IntRegs:$dst),
- (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3),
- u6_2ImmPred:$src2)))]>,
- Requires<[HasV4T]>;
-
-// Rd=add(Ru,mpyi(Rs,#u6))
-let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 6,
-validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in
-def ADDr_MPYri_V4 : MInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u6Ext:$src3),
- "$dst = add($src1, mpyi($src2, #$src3))",
- [(set (i32 IntRegs:$dst),
- (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
- u6ExtPred:$src3)))]>,
- Requires<[HasV4T]>, ImmRegRel;
-
-// Rx=add(Ru,mpyi(Rx,Rs))
-let validSubTargets = HasV4SubT, InputType = "reg", CextOpcode = "ADD_MPY" in
-def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "$dst = add($src1, mpyi($src2, $src3))",
- [(set (i32 IntRegs:$dst),
- (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
- (i32 IntRegs:$src3))))],
- "$src2 = $dst">,
- Requires<[HasV4T]>, ImmRegRel;
-
-
// Polynomial multiply words
// Rdd=pmpyw(Rs,Rt)
// Rxx^=pmpyw(Rs,Rt)
@@ -1590,159 +2178,115 @@ def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst),
//===----------------------------------------------------------------------===//
// XTYPE/SHIFT +
//===----------------------------------------------------------------------===//
-
-// Shift by immediate and accumulate.
-// Rx=add(#u8,asl(Rx,#U5))
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
-validSubTargets = HasV4SubT in
-def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
- "$dst = add(#$src1, asl($src2, #$src3))",
- [(set (i32 IntRegs:$dst),
- (add (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ExtPred:$src1))],
- "$src2 = $dst">,
- Requires<[HasV4T]>;
-
-// Rx=add(#u8,lsr(Rx,#U5))
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
-validSubTargets = HasV4SubT in
-def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
- "$dst = add(#$src1, lsr($src2, #$src3))",
- [(set (i32 IntRegs:$dst),
- (add (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ExtPred:$src1))],
- "$src2 = $dst">,
- Requires<[HasV4T]>;
-
-// Rx=sub(#u8,asl(Rx,#U5))
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
-validSubTargets = HasV4SubT in
-def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
- "$dst = sub(#$src1, asl($src2, #$src3))",
- [(set (i32 IntRegs:$dst),
- (sub (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ExtPred:$src1))],
- "$src2 = $dst">,
- Requires<[HasV4T]>;
-
-// Rx=sub(#u8,lsr(Rx,#U5))
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
-validSubTargets = HasV4SubT in
-def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
- "$dst = sub(#$src1, lsr($src2, #$src3))",
- [(set (i32 IntRegs:$dst),
- (sub (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ExtPred:$src1))],
- "$src2 = $dst">,
- Requires<[HasV4T]>;
-
-
-//Shift by immediate and logical.
-//Rx=and(#u8,asl(Rx,#U5))
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
-validSubTargets = HasV4SubT in
-def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
- "$dst = and(#$src1, asl($src2, #$src3))",
- [(set (i32 IntRegs:$dst),
- (and (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ExtPred:$src1))],
- "$src2 = $dst">,
- Requires<[HasV4T]>;
-
-//Rx=and(#u8,lsr(Rx,#U5))
+// Shift by immediate and accumulate/logical.
+// Rx=add(#u8,asl(Rx,#U5)) Rx=add(#u8,lsr(Rx,#U5))
+// Rx=sub(#u8,asl(Rx,#U5)) Rx=sub(#u8,lsr(Rx,#U5))
+// Rx=and(#u8,asl(Rx,#U5)) Rx=and(#u8,lsr(Rx,#U5))
+// Rx=or(#u8,asl(Rx,#U5)) Rx=or(#u8,lsr(Rx,#U5))
let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
-validSubTargets = HasV4SubT in
-def ANDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
- "$dst = and(#$src1, lsr($src2, #$src3))",
- [(set (i32 IntRegs:$dst),
- (and (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ExtPred:$src1))],
- "$src2 = $dst">,
- Requires<[HasV4T]>;
-
-//Rx=or(#u8,asl(Rx,#U5))
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
-AddedComplexity = 30, validSubTargets = HasV4SubT in
-def ORi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
- "$dst = or(#$src1, asl($src2, #$src3))",
- [(set (i32 IntRegs:$dst),
- (or (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ExtPred:$src1))],
- "$src2 = $dst">,
- Requires<[HasV4T]>;
+ hasNewValue = 1, opNewValue = 0, validSubTargets = HasV4SubT in
+class T_S4_ShiftOperate<string MnOp, string MnSh, SDNode Op, SDNode Sh,
+ bit asl_lsr, bits<2> MajOp, InstrItinClass Itin>
+ : MInst_acc<(outs IntRegs:$Rd), (ins u8Ext:$u8, IntRegs:$Rx, u5Imm:$U5),
+ "$Rd = "#MnOp#"(#$u8, "#MnSh#"($Rx, #$U5))",
+ [(set (i32 IntRegs:$Rd),
+ (Op (Sh I32:$Rx, u5ImmPred:$U5), u8ExtPred:$u8))],
+ "$Rd = $Rx", Itin> {
+
+ bits<5> Rd;
+ bits<8> u8;
+ bits<5> Rx;
+ bits<5> U5;
+
+ let IClass = 0b1101;
+ let Inst{27-24} = 0b1110;
+ let Inst{23-21} = u8{7-5};
+ let Inst{20-16} = Rd;
+ let Inst{13} = u8{4};
+ let Inst{12-8} = U5;
+ let Inst{7-5} = u8{3-1};
+ let Inst{4} = asl_lsr;
+ let Inst{3} = u8{0};
+ let Inst{2-1} = MajOp;
+}
-//Rx=or(#u8,lsr(Rx,#U5))
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
-AddedComplexity = 30, validSubTargets = HasV4SubT in
-def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
- "$dst = or(#$src1, lsr($src2, #$src3))",
- [(set (i32 IntRegs:$dst),
- (or (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ExtPred:$src1))],
- "$src2 = $dst">,
- Requires<[HasV4T]>;
+multiclass T_ShiftOperate<string mnemonic, SDNode Op, bits<2> MajOp,
+ InstrItinClass Itin> {
+ def _asl_ri : T_S4_ShiftOperate<mnemonic, "asl", Op, shl, 0, MajOp, Itin>;
+ def _lsr_ri : T_S4_ShiftOperate<mnemonic, "lsr", Op, srl, 1, MajOp, Itin>;
+}
+let AddedComplexity = 200, isCodeGenOnly = 0 in {
+ defm S4_addi : T_ShiftOperate<"add", add, 0b10, ALU64_tc_2_SLOT23>;
+ defm S4_andi : T_ShiftOperate<"and", and, 0b00, ALU64_tc_2_SLOT23>;
+}
-//Shift by register.
-//Rd=lsl(#s6,Rt)
-let validSubTargets = HasV4SubT in {
-def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2),
- "$dst = lsl(#$src1, $src2)",
- [(set (i32 IntRegs:$dst), (shl s6ImmPred:$src1,
- (i32 IntRegs:$src2)))]>,
- Requires<[HasV4T]>;
+let AddedComplexity = 30, isCodeGenOnly = 0 in
+defm S4_ori : T_ShiftOperate<"or", or, 0b01, ALU64_tc_1_SLOT23>;
+let isCodeGenOnly = 0 in
+defm S4_subi : T_ShiftOperate<"sub", sub, 0b11, ALU64_tc_1_SLOT23>;
-//Shift by register and logical.
-//Rxx^=asl(Rss,Rt)
-def ASLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- "$dst ^= asl($src2, $src3)",
- [(set (i64 DoubleRegs:$dst),
- (xor (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$src2),
- (i32 IntRegs:$src3))))],
- "$src1 = $dst">,
- Requires<[HasV4T]>;
-//Rxx^=asr(Rss,Rt)
-def ASRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- "$dst ^= asr($src2, $src3)",
- [(set (i64 DoubleRegs:$dst),
- (xor (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$src2),
- (i32 IntRegs:$src3))))],
- "$src1 = $dst">,
- Requires<[HasV4T]>;
+// Rd=[cround|round](Rs,Rt)
+let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23, isCodeGenOnly = 0 in {
+ def A4_cround_rr : T_S3op_3 < "cround", IntRegs, 0b11, 0b00>;
+ def A4_round_rr : T_S3op_3 < "round", IntRegs, 0b11, 0b10>;
+}
-//Rxx^=lsl(Rss,Rt)
-def LSLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- "$dst ^= lsl($src2, $src3)",
- [(set (i64 DoubleRegs:$dst), (xor (i64 DoubleRegs:$src1),
- (shl (i64 DoubleRegs:$src2),
- (i32 IntRegs:$src3))))],
- "$src1 = $dst">,
- Requires<[HasV4T]>;
+// Rd=round(Rs,Rt):sat
+let hasNewValue = 1, Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23,
+ isCodeGenOnly = 0 in
+def A4_round_rr_sat : T_S3op_3 < "round", IntRegs, 0b11, 0b11, 1>;
+
+// Rdd=[add|sub](Rss,Rtt,Px):carry
+let isPredicateLate = 1, hasSideEffects = 0 in
+class T_S3op_carry <string mnemonic, bits<3> MajOp>
+ : SInst < (outs DoubleRegs:$Rdd, PredRegs:$Px),
+ (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, PredRegs:$Pu),
+ "$Rdd = "#mnemonic#"($Rss, $Rtt, $Pu):carry",
+ [], "$Px = $Pu", S_3op_tc_1_SLOT23 > {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rtt;
+ bits<2> Pu;
+
+ let IClass = 0b1100;
+
+ let Inst{27-24} = 0b0010;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ let Inst{6-5} = Pu;
+ let Inst{4-0} = Rdd;
+ }
-//Rxx^=lsr(Rss,Rt)
-def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- "$dst ^= lsr($src2, $src3)",
- [(set (i64 DoubleRegs:$dst),
- (xor (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$src2),
- (i32 IntRegs:$src3))))],
- "$src1 = $dst">,
- Requires<[HasV4T]>;
+let isCodeGenOnly = 0 in {
+def A4_addp_c : T_S3op_carry < "add", 0b110 >;
+def A4_subp_c : T_S3op_carry < "sub", 0b111 >;
}
+// Shift an immediate left by register amount.
+let hasNewValue = 1, hasSideEffects = 0, isCodeGenOnly = 0 in
+def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6Imm:$s6, IntRegs:$Rt),
+ "$Rd = lsl(#$s6, $Rt)" ,
+ [(set (i32 IntRegs:$Rd), (shl s6ImmPred:$s6,
+ (i32 IntRegs:$Rt)))],
+ "", S_3op_tc_1_SLOT23> {
+ bits<5> Rd;
+ bits<6> s6;
+ bits<5> Rt;
+
+ let IClass = 0b1100;
+
+ let Inst{27-22} = 0b011010;
+ let Inst{20-16} = s6{5-1};
+ let Inst{12-8} = Rt;
+ let Inst{7-6} = 0b11;
+ let Inst{4-0} = Rd;
+ let Inst{5} = s6{0};
+ }
+
//===----------------------------------------------------------------------===//
// XTYPE/SHIFT -
//===----------------------------------------------------------------------===//
@@ -1830,7 +2374,7 @@ class MemOp_rr_base <string opc, bits<2> opcBits, Operand ImmOp,
(ins IntRegs:$base, ImmOp:$offset, IntRegs:$delta),
opc#"($base+#$offset)"#memOp#"$delta",
[]>,
- Requires<[HasV4T, UseMEMOP]> {
+ Requires<[UseMEMOP]> {
bits<5> base;
bits<5> delta;
@@ -1841,6 +2385,7 @@ class MemOp_rr_base <string opc, bits<2> opcBits, Operand ImmOp,
!if (!eq(opcBits, 0b01), offset{6-1},
!if (!eq(opcBits, 0b10), offset{7-2},0)));
+ let opExtentAlign = opcBits;
let IClass = 0b0011;
let Inst{27-24} = 0b1110;
let Inst{22-21} = opcBits;
@@ -1861,7 +2406,7 @@ class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp,
opc#"($base+#$offset)"#memOp#"#$delta"
#!if(memOpBits{1},")", ""), // clrbit, setbit - include ')'
[]>,
- Requires<[HasV4T, UseMEMOP]> {
+ Requires<[UseMEMOP]> {
bits<5> base;
bits<5> delta;
@@ -1872,6 +2417,7 @@ class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp,
!if (!eq(opcBits, 0b01), offset{6-1},
!if (!eq(opcBits, 0b10), offset{7-2},0)));
+ let opExtentAlign = opcBits;
let IClass = 0b0011;
let Inst{27-24} = 0b1111;
let Inst{22-21} = opcBits;
@@ -1884,36 +2430,36 @@ class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp,
// multiclass to define MemOp instructions with register operand.
multiclass MemOp_rr<string opc, bits<2> opcBits, Operand ImmOp> {
- def _ADD#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " += ", 0b00>; // add
- def _SUB#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " -= ", 0b01>; // sub
- def _AND#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " &= ", 0b10>; // and
- def _OR#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " |= ", 0b11>; // or
+ def L4_add#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " += ", 0b00>; // add
+ def L4_sub#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " -= ", 0b01>; // sub
+ def L4_and#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " &= ", 0b10>; // and
+ def L4_or#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " |= ", 0b11>; // or
}
// multiclass to define MemOp instructions with immediate Operand.
multiclass MemOp_ri<string opc, bits<2> opcBits, Operand ImmOp> {
- def _ADD#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " += ", 0b00 >;
- def _SUB#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " -= ", 0b01 >;
- def _CLRBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =clrbit(", 0b10>;
- def _SETBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =setbit(", 0b11>;
+ def L4_iadd#NAME : MemOp_ri_base <opc, opcBits, ImmOp, " += ", 0b00 >;
+ def L4_isub#NAME : MemOp_ri_base <opc, opcBits, ImmOp, " -= ", 0b01 >;
+ def L4_iand#NAME : MemOp_ri_base<opc, opcBits, ImmOp, " = clrbit(", 0b10>;
+ def L4_ior#NAME : MemOp_ri_base<opc, opcBits, ImmOp, " = setbit(", 0b11>;
}
multiclass MemOp_base <string opc, bits<2> opcBits, Operand ImmOp> {
- defm r : MemOp_rr <opc, opcBits, ImmOp>;
- defm i : MemOp_ri <opc, opcBits, ImmOp>;
+ defm _#NAME : MemOp_rr <opc, opcBits, ImmOp>;
+ defm _#NAME : MemOp_ri <opc, opcBits, ImmOp>;
}
// Define MemOp instructions.
let isExtendable = 1, opExtendable = 1, isExtentSigned = 0,
-validSubTargets =HasV4SubT in {
- let opExtentBits = 6, accessSize = ByteAccess in
- defm MemOPb : MemOp_base <"memb", 0b00, u6_0Ext>;
+ validSubTargets =HasV4SubT in {
+ let opExtentBits = 6, accessSize = ByteAccess, isCodeGenOnly = 0 in
+ defm memopb_io : MemOp_base <"memb", 0b00, u6_0Ext>;
- let opExtentBits = 7, accessSize = HalfWordAccess in
- defm MemOPh : MemOp_base <"memh", 0b01, u6_1Ext>;
+ let opExtentBits = 7, accessSize = HalfWordAccess, isCodeGenOnly = 0 in
+ defm memoph_io : MemOp_base <"memh", 0b01, u6_1Ext>;
- let opExtentBits = 8, accessSize = WordAccess in
- defm MemOPw : MemOp_base <"memw", 0b10, u6_2Ext>;
+ let opExtentBits = 8, accessSize = WordAccess, isCodeGenOnly = 0 in
+ defm memopw_io : MemOp_base <"memw", 0b10, u6_2Ext>;
}
//===----------------------------------------------------------------------===//
@@ -1946,10 +2492,10 @@ multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
// Half Word
defm : MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred,
- MemOPh_ADDi_V4, MemOPh_SUBi_V4>;
+ L4_iadd_memoph_io, L4_isub_memoph_io>;
// Byte
defm : MemOpi_u5ALUOp <ldOpByte, truncstorei8, u6ExtPred,
- MemOPb_ADDi_V4, MemOPb_SUBi_V4>;
+ L4_iadd_memopb_io, L4_isub_memopb_io>;
}
let Predicates = [HasV4T, UseMEMOP] in {
@@ -1958,8 +2504,8 @@ let Predicates = [HasV4T, UseMEMOP] in {
defm : MemOpi_u5ExtType<extloadi8, extloadi16>; // any extend
// Word
- defm : MemOpi_u5ALUOp <load, store, u6_2ExtPred, MemOPw_ADDi_V4,
- MemOPw_SUBi_V4>;
+ defm : MemOpi_u5ALUOp <load, store, u6_2ExtPred, L4_iadd_memopw_io,
+ L4_isub_memopw_io>;
}
//===----------------------------------------------------------------------===//
@@ -1987,10 +2533,10 @@ multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
// Half Word
defm : MemOpi_m5Pats <ldOpHalf, truncstorei16, u6_1ExtPred, m5HImmPred,
- ADDRriU6_1, MEMOPIMM_HALF, MemOPh_SUBi_V4>;
+ ADDRriU6_1, MEMOPIMM_HALF, L4_isub_memoph_io>;
// Byte
defm : MemOpi_m5Pats <ldOpByte, truncstorei8, u6ExtPred, m5BImmPred,
- ADDRriU6_0, MEMOPIMM_BYTE, MemOPb_SUBi_V4>;
+ ADDRriU6_0, MEMOPIMM_BYTE, L4_isub_memopb_io>;
}
let Predicates = [HasV4T, UseMEMOP] in {
@@ -2000,7 +2546,7 @@ let Predicates = [HasV4T, UseMEMOP] in {
// Word
defm : MemOpi_m5Pats <load, store, u6_2ExtPred, m5ImmPred,
- ADDRriU6_2, MEMOPIMM, MemOPw_SUBi_V4>;
+ ADDRriU6_2, MEMOPIMM, L4_isub_memopw_io>;
}
//===----------------------------------------------------------------------===//
@@ -2031,16 +2577,16 @@ multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred,
multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
// Byte - clrbit
defm : MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u6ExtPred,
- ADDRriU6_0, CLRMEMIMM_BYTE, MemOPb_CLRBITi_V4, and>;
+ ADDRriU6_0, CLRMEMIMM_BYTE, L4_iand_memopb_io, and>;
// Byte - setbit
defm : MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u6ExtPred,
- ADDRriU6_0, SETMEMIMM_BYTE, MemOPb_SETBITi_V4, or>;
+ ADDRriU6_0, SETMEMIMM_BYTE, L4_ior_memopb_io, or>;
// Half Word - clrbit
defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u6_1ExtPred,
- ADDRriU6_1, CLRMEMIMM_SHORT, MemOPh_CLRBITi_V4, and>;
+ ADDRriU6_1, CLRMEMIMM_SHORT, L4_iand_memoph_io, and>;
// Half Word - setbit
defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u6_1ExtPred,
- ADDRriU6_1, SETMEMIMM_SHORT, MemOPh_SETBITi_V4, or>;
+ ADDRriU6_1, SETMEMIMM_SHORT, L4_ior_memoph_io, or>;
}
let Predicates = [HasV4T, UseMEMOP] in {
@@ -2053,9 +2599,9 @@ let Predicates = [HasV4T, UseMEMOP] in {
// memw(Rs+#0) = [clrbit|setbit](#U5)
// memw(Rs+#u6:2) = [clrbit|setbit](#U5)
defm : MemOpi_bitPats<load, store, Clr5ImmPred, u6_2ExtPred, ADDRriU6_2,
- CLRMEMIMM, MemOPw_CLRBITi_V4, and>;
+ CLRMEMIMM, L4_iand_memopw_io, and>;
defm : MemOpi_bitPats<load, store, Set5ImmPred, u6_2ExtPred, ADDRriU6_2,
- SETMEMIMM, MemOPw_SETBITi_V4, or>;
+ SETMEMIMM, L4_ior_memopw_io, or>;
}
//===----------------------------------------------------------------------===//
@@ -2096,12 +2642,12 @@ multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp,
multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
// Half Word
defm : MemOPr_ALUOp <ldOpHalf, truncstorei16, ADDRriU6_1, u6_1ExtPred,
- MemOPh_ADDr_V4, MemOPh_SUBr_V4,
- MemOPh_ANDr_V4, MemOPh_ORr_V4>;
+ L4_add_memoph_io, L4_sub_memoph_io,
+ L4_and_memoph_io, L4_or_memoph_io>;
// Byte
defm : MemOPr_ALUOp <ldOpByte, truncstorei8, ADDRriU6_0, u6ExtPred,
- MemOPb_ADDr_V4, MemOPb_SUBr_V4,
- MemOPb_ANDr_V4, MemOPb_ORr_V4>;
+ L4_add_memopb_io, L4_sub_memopb_io,
+ L4_and_memopb_io, L4_or_memopb_io>;
}
// Define 'def Pats' for MemOps with register addend.
@@ -2111,8 +2657,8 @@ let Predicates = [HasV4T, UseMEMOP] in {
defm : MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend
defm : MemOPr_ExtType<extloadi8, extloadi16>; // any extend
// Word
- defm : MemOPr_ALUOp <load, store, ADDRriU6_2, u6_2ExtPred, MemOPw_ADDr_V4,
- MemOPw_SUBr_V4, MemOPw_ANDr_V4, MemOPw_ORr_V4 >;
+ defm : MemOPr_ALUOp <load, store, ADDRriU6_2, u6_2ExtPred, L4_add_memopw_io,
+ L4_sub_memopw_io, L4_and_memopw_io, L4_or_memopw_io >;
}
//===----------------------------------------------------------------------===//
@@ -2130,6 +2676,42 @@ let Predicates = [HasV4T, UseMEMOP] in {
// incorrect code for negative numbers.
// Pd=cmpb.eq(Rs,#u8)
+let isCompare = 1, isExtendable = 1, opExtendable = 2, hasSideEffects = 0,
+ validSubTargets = HasV4SubT in
+class CMP_NOT_REG_IMM<string OpName, bits<2> op, Operand ImmOp,
+ list<dag> Pattern>
+ : ALU32Inst <(outs PredRegs:$dst), (ins IntRegs:$src1, ImmOp:$src2),
+ "$dst = !cmp."#OpName#"($src1, #$src2)",
+ Pattern,
+ "", ALU32_2op_tc_2early_SLOT0123> {
+ bits<2> dst;
+ bits<5> src1;
+ bits<10> src2;
+
+ let IClass = 0b0111;
+ let Inst{27-24} = 0b0101;
+ let Inst{23-22} = op;
+ let Inst{20-16} = src1;
+ let Inst{21} = !if (!eq(OpName, "gtu"), 0b0, src2{9});
+ let Inst{13-5} = src2{8-0};
+ let Inst{4-2} = 0b100;
+ let Inst{1-0} = dst;
+}
+
+let opExtentBits = 10, isExtentSigned = 1 in {
+def C4_cmpneqi : CMP_NOT_REG_IMM <"eq", 0b00, s10Ext, [(set (i1 PredRegs:$dst),
+ (setne (i32 IntRegs:$src1), s10ExtPred:$src2))]>;
+
+def C4_cmpltei : CMP_NOT_REG_IMM <"gt", 0b01, s10Ext, [(set (i1 PredRegs:$dst),
+ (not (setgt (i32 IntRegs:$src1), s10ExtPred:$src2)))]>;
+
+}
+let opExtentBits = 9 in
+def C4_cmplteui : CMP_NOT_REG_IMM <"gtu", 0b10, u9Ext, [(set (i1 PredRegs:$dst),
+ (not (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)))]>;
+
+
+
// p=!cmp.eq(r1,r2)
let isCompare = 1, validSubTargets = HasV4SubT in
def CMPnotEQ_rr : ALU32_rr<(outs PredRegs:$dst),
@@ -2139,15 +2721,6 @@ def CMPnotEQ_rr : ALU32_rr<(outs PredRegs:$dst),
(setne (i32 IntRegs:$src1), (i32 IntRegs:$src2)))]>,
Requires<[HasV4T]>;
-// p=!cmp.eq(r1,#s10)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPnotEQ_ri : ALU32_ri<(outs PredRegs:$dst),
- (ins IntRegs:$src1, s10Ext:$src2),
- "$dst = !cmp.eq($src1, #$src2)",
- [(set (i1 PredRegs:$dst),
- (setne (i32 IntRegs:$src1), s10ImmPred:$src2))]>,
- Requires<[HasV4T]>;
-
// p=!cmp.gt(r1,r2)
let isCompare = 1, validSubTargets = HasV4SubT in
def CMPnotGT_rr : ALU32_rr<(outs PredRegs:$dst),
@@ -2157,14 +2730,6 @@ def CMPnotGT_rr : ALU32_rr<(outs PredRegs:$dst),
(not (setgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>,
Requires<[HasV4T]>;
-// p=!cmp.gt(r1,#s10)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPnotGT_ri : ALU32_ri<(outs PredRegs:$dst),
- (ins IntRegs:$src1, s10Ext:$src2),
- "$dst = !cmp.gt($src1, #$src2)",
- [(set (i1 PredRegs:$dst),
- (not (setgt (i32 IntRegs:$src1), s10ImmPred:$src2)))]>,
- Requires<[HasV4T]>;
// p=!cmp.gtu(r1,r2)
let isCompare = 1, validSubTargets = HasV4SubT in
@@ -2175,15 +2740,6 @@ def CMPnotGTU_rr : ALU32_rr<(outs PredRegs:$dst),
(not (setugt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>,
Requires<[HasV4T]>;
-// p=!cmp.gtu(r1,#u9)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPnotGTU_ri : ALU32_ri<(outs PredRegs:$dst),
- (ins IntRegs:$src1, u9Ext:$src2),
- "$dst = !cmp.gtu($src1, #$src2)",
- [(set (i1 PredRegs:$dst),
- (not (setugt (i32 IntRegs:$src1), u9ImmPred:$src2)))]>,
- Requires<[HasV4T]>;
-
let isCompare = 1, validSubTargets = HasV4SubT in
def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, u8Imm:$src2),
@@ -2194,7 +2750,7 @@ def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
def : Pat <(brcond (i1 (setne (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2)),
bb:$offset),
- (JMP_f (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2),
+ (J2_jumpf (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2),
bb:$offset)>,
Requires<[HasV4T]>;
@@ -2331,7 +2887,7 @@ def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 254)),
// if (!Pd.new) Rd=#0
// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs)
def : Pat <(i32 (zext (i1 (setult (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rt),
+ (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rt),
(i32 IntRegs:$Rs))),
1, 0))>,
Requires<[HasV4T]>;
@@ -2344,7 +2900,7 @@ def : Pat <(i32 (zext (i1 (setult (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
// if (!Pd.new) Rd=#0
// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs)
def : Pat <(i32 (zext (i1 (setlt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rt),
+ (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rt),
(i32 IntRegs:$Rs))),
1, 0))>,
Requires<[HasV4T]>;
@@ -2356,7 +2912,7 @@ def : Pat <(i32 (zext (i1 (setlt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
// if (Pd.new) Rd=#1
// if (!Pd.new) Rd=#0
def : Pat <(i32 (zext (i1 (setugt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rs),
+ (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rs),
(i32 IntRegs:$Rt))),
1, 0))>,
Requires<[HasV4T]>;
@@ -2378,7 +2934,7 @@ def : Pat <(i32 (zext (i1 (setugt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
// if (!Pd.new) Rd=#1
// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs)
def : Pat <(i32 (zext (i1 (setuge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rt),
+ (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rt),
(i32 IntRegs:$Rs))),
0, 1))>,
Requires<[HasV4T]>;
@@ -2391,7 +2947,7 @@ def : Pat <(i32 (zext (i1 (setuge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
// if (!Pd.new) Rd=#1
// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs)
def : Pat <(i32 (zext (i1 (setge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rt),
+ (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rt),
(i32 IntRegs:$Rs))),
0, 1))>,
Requires<[HasV4T]>;
@@ -2403,7 +2959,7 @@ def : Pat <(i32 (zext (i1 (setge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
// if (Pd.new) Rd=#0
// if (!Pd.new) Rd=#1
def : Pat <(i32 (zext (i1 (setule (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rs),
+ (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rs),
(i32 IntRegs:$Rt))),
0, 1))>,
Requires<[HasV4T]>;
@@ -2415,7 +2971,7 @@ def : Pat <(i32 (zext (i1 (setule (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
// if (Pd.new) Rd=#0
// if (!Pd.new) Rd=#1
def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rs),
+ (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rs),
(i32 IntRegs:$Rt))),
0, 1))>,
Requires<[HasV4T]>;
@@ -2439,7 +2995,7 @@ def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
let AddedComplexity = 139 in
def : Pat <(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)),
u7StrictPosImmPred:$src2)))),
- (i32 (MUX_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$src1),
+ (i32 (C2_muxii (i1 (CMPbGTUri_V4 (i32 IntRegs:$src1),
(DEC_CONST_BYTE u7StrictPosImmPred:$src2))),
0, 1))>,
Requires<[HasV4T]>;
@@ -2566,17 +3122,61 @@ def NTSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
// XTYPE/PRED -
//===----------------------------------------------------------------------===//
-//Deallocate frame and return.
-// dealloc_return
-let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicable = 1,
- Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1 in {
-let validSubTargets = HasV4SubT in
- def DEALLOC_RET_V4 : LD0Inst<(outs), (ins),
- "dealloc_return",
- []>,
- Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// Multiclass for DeallocReturn
+//===----------------------------------------------------------------------===//
+class L4_RETURN<string mnemonic, bit isNot, bit isPredNew, bit isTak>
+ : LD0Inst<(outs), (ins PredRegs:$src),
+ !if(isNot, "if (!$src", "if ($src")#
+ !if(isPredNew, ".new) ", ") ")#mnemonic#
+ !if(isPredNew, #!if(isTak,":t", ":nt"),""),
+ [], "", LD_tc_3or4stall_SLOT0> {
+
+ bits<2> src;
+ let BaseOpcode = "L4_RETURN";
+ let isPredicatedFalse = isNot;
+ let isPredicatedNew = isPredNew;
+ let isTaken = isTak;
+ let IClass = 0b1001;
+
+ let Inst{27-16} = 0b011000011110;
+
+ let Inst{13} = isNot;
+ let Inst{12} = isTak;
+ let Inst{11} = isPredNew;
+ let Inst{10} = 0b0;
+ let Inst{9-8} = src;
+ let Inst{4-0} = 0b11110;
+ }
+
+// Produce all predicated forms, p, !p, p.new, !p.new, :t, :nt
+multiclass L4_RETURN_PRED<string mnemonic, bit PredNot> {
+ let isPredicated = 1 in {
+ def _#NAME# : L4_RETURN <mnemonic, PredNot, 0, 1>;
+ def _#NAME#new_pnt : L4_RETURN <mnemonic, PredNot, 1, 0>;
+ def _#NAME#new_pt : L4_RETURN <mnemonic, PredNot, 1, 1>;
+ }
+}
+
+multiclass LD_MISC_L4_RETURN<string mnemonic> {
+ let isBarrier = 1, isPredicable = 1 in
+ def NAME : LD0Inst <(outs), (ins), mnemonic, [], "",
+ LD_tc_3or4stall_SLOT0> {
+ let BaseOpcode = "L4_RETURN";
+ let IClass = 0b1001;
+ let Inst{27-16} = 0b011000011110;
+ let Inst{13-10} = 0b0000;
+ let Inst{4-0} = 0b11110;
+ }
+ defm t : L4_RETURN_PRED<mnemonic, 0 >;
+ defm f : L4_RETURN_PRED<mnemonic, 1 >;
}
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R30], hasSideEffects = 0,
+ validSubTargets = HasV4SubT, isCodeGenOnly = 0 in
+defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel;
+
// Restore registers and dealloc return function call.
let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
Defs = [R29, R30, R31, PC] in {
@@ -2609,454 +3209,579 @@ let isCall = 1, isBarrier = 1,
Requires<[HasV4T]>;
}
-// if (Ps) dealloc_return
-let isReturn = 1, isTerminator = 1,
- Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1,
- isPredicated = 1 in {
-let validSubTargets = HasV4SubT in
- def DEALLOC_RET_cPt_V4 : LD0Inst<(outs),
- (ins PredRegs:$src1),
- "if ($src1) dealloc_return",
- []>,
- Requires<[HasV4T]>;
-}
+//===----------------------------------------------------------------------===//
+// Template class for non predicated store instructions with
+// GP-Relative or absolute addressing.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, isPredicable = 1, isNVStorable = 1 in
+class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<2>MajOp, Operand AddrOp, bit isAbs, bit isHalf>
+ : STInst<(outs), (ins AddrOp:$addr, RC:$src),
+ mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src"#!if(isHalf, ".h",""),
+ [], "", V2LDST_tc_st_SLOT01> {
+ bits<19> addr;
+ bits<5> src;
+ bits<16> offsetBits;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3},
+ !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
+ !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
+ /* u16_0Imm */ addr{15-0})));
+ let IClass = 0b0100;
+ let Inst{27} = 1;
+ let Inst{26-25} = offsetBits{15-14};
+ let Inst{24} = 0b0;
+ let Inst{23-22} = MajOp;
+ let Inst{21} = isHalf;
+ let Inst{20-16} = offsetBits{13-9};
+ let Inst{13} = offsetBits{8};
+ let Inst{12-8} = src;
+ let Inst{7-0} = offsetBits{7-0};
+ }
-// if (!Ps) dealloc_return
-let isReturn = 1, isTerminator = 1,
- Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1,
- isPredicated = 1, isPredicatedFalse = 1 in {
-let validSubTargets = HasV4SubT in
- def DEALLOC_RET_cNotPt_V4 : LD0Inst<(outs), (ins PredRegs:$src1),
- "if (!$src1) dealloc_return",
- []>,
- Requires<[HasV4T]>;
-}
+//===----------------------------------------------------------------------===//
+// Template class for predicated store instructions with
+// GP-Relative or absolute addressing.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, isPredicated = 1, isNVStorable = 1, opExtentBits = 6,
+ opExtendable = 1 in
+class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
+ bit isHalf, bit isNot, bit isNew>
+ : STInst<(outs), (ins PredRegs:$src1, u6Ext:$absaddr, RC: $src2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ",
+ ") ")#mnemonic#"(#$absaddr) = $src2"#!if(isHalf, ".h",""),
+ [], "", ST_tc_st_SLOT01>, AddrModeRel {
+ bits<2> src1;
+ bits<6> absaddr;
+ bits<5> src2;
+
+ let isPredicatedNew = isNew;
+ let isPredicatedFalse = isNot;
+
+ let IClass = 0b1010;
-// if (Ps.new) dealloc_return:nt
-let isReturn = 1, isTerminator = 1,
- Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1,
- isPredicated = 1 in {
-let validSubTargets = HasV4SubT in
- def DEALLOC_RET_cdnPnt_V4 : LD0Inst<(outs), (ins PredRegs:$src1),
- "if ($src1.new) dealloc_return:nt",
- []>,
- Requires<[HasV4T]>;
-}
+ let Inst{27-24} = 0b1111;
+ let Inst{23-22} = MajOp;
+ let Inst{21} = isHalf;
+ let Inst{17-16} = absaddr{5-4};
+ let Inst{13} = isNew;
+ let Inst{12-8} = src2;
+ let Inst{7} = 0b1;
+ let Inst{6-3} = absaddr{3-0};
+ let Inst{2} = isNot;
+ let Inst{1-0} = src1;
+ }
-// if (!Ps.new) dealloc_return:nt
-let isReturn = 1, isTerminator = 1,
- Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1,
- isPredicated = 1, isPredicatedFalse = 1 in {
-let validSubTargets = HasV4SubT in
- def DEALLOC_RET_cNotdnPnt_V4 : LD0Inst<(outs), (ins PredRegs:$src1),
- "if (!$src1.new) dealloc_return:nt",
- []>,
- Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// Template class for predicated store instructions with absolute addressing.
+//===----------------------------------------------------------------------===//
+class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<2> MajOp, bit isHalf>
+ : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u0AlwaysExt, 1, isHalf>,
+ AddrModeRel {
+ string ImmOpStr = !cast<string>(ImmOp);
+ let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
+ !if (!eq(ImmOpStr, "u16_2Imm"), 18,
+ !if (!eq(ImmOpStr, "u16_1Imm"), 17,
+ /* u16_0Imm */ 16)));
+
+ let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3,
+ !if (!eq(ImmOpStr, "u16_2Imm"), 2,
+ !if (!eq(ImmOpStr, "u16_1Imm"), 1,
+ /* u16_0Imm */ 0)));
}
-// if (Ps.new) dealloc_return:t
-let isReturn = 1, isTerminator = 1,
- Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1,
- isPredicated = 1 in {
-let validSubTargets = HasV4SubT in
- def DEALLOC_RET_cdnPt_V4 : LD0Inst<(outs), (ins PredRegs:$src1),
- "if ($src1.new) dealloc_return:t",
- []>,
- Requires<[HasV4T]>;
-}
+//===----------------------------------------------------------------------===//
+// Multiclass for store instructions with absolute addressing.
+//===----------------------------------------------------------------------===//
+let validSubTargets = HasV4SubT, addrMode = Absolute, isExtended = 1 in
+multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC,
+ Operand ImmOp, bits<2> MajOp, bit isHalf = 0> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
+ let opExtendable = 0, isPredicable = 1 in
+ def S2_#NAME#abs : T_StoreAbs <mnemonic, RC, ImmOp, MajOp, isHalf>;
-// if (!Ps.new) dealloc_return:nt
-let isReturn = 1, isTerminator = 1,
- Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1,
- isPredicated = 1, isPredicatedFalse = 1 in {
-let validSubTargets = HasV4SubT in
- def DEALLOC_RET_cNotdnPt_V4 : LD0Inst<(outs), (ins PredRegs:$src1),
- "if (!$src1.new) dealloc_return:t",
- []>,
- Requires<[HasV4T]>;
+ // Predicated
+ def S4_p#NAME#t_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 0, 0>;
+ def S4_p#NAME#f_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 1, 0>;
+
+ // .new Predicated
+ def S4_p#NAME#tnew_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 0, 1>;
+ def S4_p#NAME#fnew_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 1, 1>;
+ }
}
-// Load/Store with absolute addressing mode
-// memw(#u6)=Rt
+//===----------------------------------------------------------------------===//
+// Template class for non predicated new-value store instructions with
+// GP-Relative or absolute addressing.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, isPredicable = 1, mayStore = 1, isNVStore = 1,
+ isNewValue = 1, opNewValue = 1 in
+class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp, bit isAbs>
+ : NVInst_V4<(outs), (ins u0AlwaysExt:$addr, IntRegs:$src),
+ mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src.new",
+ [], "", V2LDST_tc_st_SLOT0> {
+ bits<19> addr;
+ bits<3> src;
+ bits<16> offsetBits;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3},
+ !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
+ !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
+ /* u16_0Imm */ addr{15-0})));
+ let IClass = 0b0100;
+
+ let Inst{27} = 1;
+ let Inst{26-25} = offsetBits{15-14};
+ let Inst{24-21} = 0b0101;
+ let Inst{20-16} = offsetBits{13-9};
+ let Inst{13} = offsetBits{8};
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = src;
+ let Inst{7-0} = offsetBits{7-0};
+ }
-multiclass ST_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
- bit isPredNew> {
- let isPredicatedNew = isPredNew in
- def NAME#_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u0AlwaysExt:$absaddr, RC: $src2),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"(##$absaddr) = $src2",
- []>,
- Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// Template class for predicated new-value store instructions with
+// absolute addressing.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, isPredicated = 1, mayStore = 1, isNVStore = 1,
+ isNewValue = 1, opNewValue = 2, opExtentBits = 6, opExtendable = 1 in
+class T_StoreAbs_NV_Pred <string mnemonic, bits<2> MajOp, bit isNot, bit isNew>
+ : NVInst_V4<(outs), (ins PredRegs:$src1, u6Ext:$absaddr, IntRegs:$src2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ",
+ ") ")#mnemonic#"(#$absaddr) = $src2.new",
+ [], "", ST_tc_st_SLOT0>, AddrModeRel {
+ bits<2> src1;
+ bits<6> absaddr;
+ bits<3> src2;
+
+ let isPredicatedNew = isNew;
+ let isPredicatedFalse = isNot;
+
+ let IClass = 0b1010;
+
+ let Inst{27-24} = 0b1111;
+ let Inst{23-21} = 0b101;
+ let Inst{17-16} = absaddr{5-4};
+ let Inst{13} = isNew;
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = src2;
+ let Inst{7} = 0b1;
+ let Inst{6-3} = absaddr{3-0};
+ let Inst{2} = isNot;
+ let Inst{1-0} = src1;
}
-multiclass ST_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
- let isPredicatedFalse = PredNot in {
- defm _c#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 0>;
- // Predicate new
- defm _cdn#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 1>;
- }
+//===----------------------------------------------------------------------===//
+// Template class for non-predicated new-value store instructions with
+// absolute addressing.
+//===----------------------------------------------------------------------===//
+class T_StoreAbs_NV <string mnemonic, Operand ImmOp, bits<2> MajOp>
+ : T_StoreAbsGP_NV <mnemonic, ImmOp, MajOp, 1>, AddrModeRel {
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
+ !if (!eq(ImmOpStr, "u16_2Imm"), 18,
+ !if (!eq(ImmOpStr, "u16_1Imm"), 17,
+ /* u16_0Imm */ 16)));
+
+ let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3,
+ !if (!eq(ImmOpStr, "u16_2Imm"), 2,
+ !if (!eq(ImmOpStr, "u16_1Imm"), 1,
+ /* u16_0Imm */ 0)));
}
-let isNVStorable = 1, isExtended = 1, neverHasSideEffects = 1 in
-multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC> {
+//===----------------------------------------------------------------------===//
+// Multiclass for new-value store instructions with absolute addressing.
+//===----------------------------------------------------------------------===//
+let validSubTargets = HasV4SubT, addrMode = Absolute, isExtended = 1 in
+multiclass ST_Abs_NV <string mnemonic, string CextOp, Operand ImmOp,
+ bits<2> MajOp> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
let opExtendable = 0, isPredicable = 1 in
- def NAME#_V4 : STInst2<(outs),
- (ins u0AlwaysExt:$absaddr, RC:$src),
- mnemonic#"(##$absaddr) = $src",
- []>,
- Requires<[HasV4T]>;
+ def S2_#NAME#newabs : T_StoreAbs_NV <mnemonic, ImmOp, MajOp>;
- let opExtendable = 1, isPredicated = 1 in {
- defm Pt : ST_Abs_Pred<mnemonic, RC, 0>;
- defm NotPt : ST_Abs_Pred<mnemonic, RC, 1>;
- }
+ // Predicated
+ def S4_p#NAME#newt_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 0, 0>;
+ def S4_p#NAME#newf_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 1, 0>;
+
+ // .new Predicated
+ def S4_p#NAME#newtnew_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 0, 1>;
+ def S4_p#NAME#newfnew_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 1, 1>;
}
}
-multiclass ST_Abs_Predbase_nv<string mnemonic, RegisterClass RC, bit isNot,
- bit isPredNew> {
- let isPredicatedNew = isPredNew in
- def NAME#_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, u0AlwaysExt:$absaddr, RC: $src2),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"(##$absaddr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-}
+//===----------------------------------------------------------------------===//
+// Stores with absolute addressing
+//===----------------------------------------------------------------------===//
+let accessSize = ByteAccess, isCodeGenOnly = 0 in
+defm storerb : ST_Abs <"memb", "STrib", IntRegs, u16_0Imm, 0b00>,
+ ST_Abs_NV <"memb", "STrib", u16_0Imm, 0b00>;
-multiclass ST_Abs_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
- let isPredicatedFalse = PredNot in {
- defm _c#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 0>;
- // Predicate new
- defm _cdn#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 1>;
- }
-}
+let accessSize = HalfWordAccess, isCodeGenOnly = 0 in
+defm storerh : ST_Abs <"memh", "STrih", IntRegs, u16_1Imm, 0b01>,
+ ST_Abs_NV <"memh", "STrih", u16_1Imm, 0b01>;
-let mayStore = 1, isNVStore = 1, isExtended = 1, neverHasSideEffects = 1 in
-multiclass ST_Abs_nv<string mnemonic, string CextOp, RegisterClass RC> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
- let opExtendable = 0, isPredicable = 1 in
- def NAME#_nv_V4 : NVInst_V4<(outs),
- (ins u0AlwaysExt:$absaddr, RC:$src),
- mnemonic#"(##$absaddr) = $src.new",
- []>,
- Requires<[HasV4T]>;
+let accessSize = WordAccess, isCodeGenOnly = 0 in
+defm storeri : ST_Abs <"memw", "STriw", IntRegs, u16_2Imm, 0b10>,
+ ST_Abs_NV <"memw", "STriw", u16_2Imm, 0b10>;
- let opExtendable = 1, isPredicated = 1 in {
- defm Pt : ST_Abs_Pred_nv<mnemonic, RC, 0>;
- defm NotPt : ST_Abs_Pred_nv<mnemonic, RC, 1>;
- }
+let isNVStorable = 0, accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+defm storerd : ST_Abs <"memd", "STrid", DoubleRegs, u16_3Imm, 0b11>;
+
+let isNVStorable = 0, accessSize = HalfWordAccess, isCodeGenOnly = 0 in
+defm storerf : ST_Abs <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>;
+
+//===----------------------------------------------------------------------===//
+// GP-relative stores.
+// mem[bhwd](#global)=Rt
+// Once predicated, these instructions map to absolute addressing mode.
+// if ([!]Pv[.new]) mem[bhwd](##global)=Rt
+//===----------------------------------------------------------------------===//
+
+let validSubTargets = HasV4SubT in
+class T_StoreGP <string mnemonic, string BaseOp, RegisterClass RC,
+ Operand ImmOp, bits<2> MajOp, bit isHalf = 0>
+ : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, globaladdress, 0, isHalf> {
+ // Set BaseOpcode same as absolute addressing instructions so that
+ // non-predicated GP-Rel instructions can have relate with predicated
+ // Absolute instruction.
+ let BaseOpcode = BaseOp#_abs;
+ }
+
+let validSubTargets = HasV4SubT in
+multiclass ST_GP <string mnemonic, string BaseOp, Operand ImmOp,
+ bits<2> MajOp, bit isHalf = 0> {
+ // Set BaseOpcode same as absolute addressing instructions so that
+ // non-predicated GP-Rel instructions can have relate with predicated
+ // Absolute instruction.
+ let BaseOpcode = BaseOp#_abs in {
+ def NAME#gp : T_StoreAbsGP <mnemonic, IntRegs, ImmOp, MajOp,
+ globaladdress, 0, isHalf>;
+ // New-value store
+ def NAME#newgp : T_StoreAbsGP_NV <mnemonic, ImmOp, MajOp, 0> ;
}
}
-let addrMode = Absolute in {
- let accessSize = ByteAccess in
- defm STrib_abs : ST_Abs<"memb", "STrib", IntRegs>,
- ST_Abs_nv<"memb", "STrib", IntRegs>, AddrModeRel;
+let accessSize = ByteAccess in
+defm S2_storerb : ST_GP<"memb", "STrib", u16_0Imm, 0b00>, NewValueRel;
- let accessSize = HalfWordAccess in
- defm STrih_abs : ST_Abs<"memh", "STrih", IntRegs>,
- ST_Abs_nv<"memh", "STrih", IntRegs>, AddrModeRel;
+let accessSize = HalfWordAccess in
+defm S2_storerh : ST_GP<"memh", "STrih", u16_1Imm, 0b01>, NewValueRel;
- let accessSize = WordAccess in
- defm STriw_abs : ST_Abs<"memw", "STriw", IntRegs>,
- ST_Abs_nv<"memw", "STriw", IntRegs>, AddrModeRel;
+let accessSize = WordAccess in
+defm S2_storeri : ST_GP<"memw", "STriw", u16_2Imm, 0b10>, NewValueRel;
- let accessSize = DoubleWordAccess, isNVStorable = 0 in
- defm STrid_abs : ST_Abs<"memd", "STrid", DoubleRegs>, AddrModeRel;
-}
+let isNVStorable = 0, accessSize = DoubleWordAccess in
+def S2_storerdgp : T_StoreGP <"memd", "STrid", DoubleRegs,
+ u16_3Imm, 0b11>, PredNewRel;
+
+let isNVStorable = 0, accessSize = HalfWordAccess in
+def S2_storerfgp : T_StoreGP <"memh", "STrif", IntRegs,
+ u16_1Imm, 0b01, 1>, PredNewRel;
let Predicates = [HasV4T], AddedComplexity = 30 in {
def : Pat<(truncstorei8 (i32 IntRegs:$src1),
(HexagonCONST32 tglobaladdr:$absaddr)),
- (STrib_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
+ (S2_storerbabs tglobaladdr: $absaddr, IntRegs: $src1)>;
def : Pat<(truncstorei16 (i32 IntRegs:$src1),
(HexagonCONST32 tglobaladdr:$absaddr)),
- (STrih_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
+ (S2_storerhabs tglobaladdr: $absaddr, IntRegs: $src1)>;
def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)),
- (STriw_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
+ (S2_storeriabs tglobaladdr: $absaddr, IntRegs: $src1)>;
def : Pat<(store (i64 DoubleRegs:$src1),
(HexagonCONST32 tglobaladdr:$absaddr)),
- (STrid_abs_V4 tglobaladdr: $absaddr, DoubleRegs: $src1)>;
-}
-
-//===----------------------------------------------------------------------===//
-// multiclass for store instructions with GP-relative addressing mode.
-// mem[bhwd](#global)=Rt
-// if ([!]Pv[.new]) mem[bhwd](##global) = Rt
-//===----------------------------------------------------------------------===//
-let mayStore = 1, isNVStorable = 1 in
-multiclass ST_GP<string mnemonic, string BaseOp, RegisterClass RC> {
- let BaseOpcode = BaseOp, isPredicable = 1 in
- def NAME#_V4 : STInst2<(outs),
- (ins globaladdress:$global, RC:$src),
- mnemonic#"(#$global) = $src",
- []>;
-
- // When GP-relative instructions are predicated, their addressing mode is
- // changed to absolute and they are always constant extended.
- let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1,
- isPredicated = 1 in {
- defm Pt : ST_Abs_Pred <mnemonic, RC, 0>;
- defm NotPt : ST_Abs_Pred <mnemonic, RC, 1>;
- }
-}
-
-let mayStore = 1, isNVStore = 1 in
-multiclass ST_GP_nv<string mnemonic, string BaseOp, RegisterClass RC> {
- let BaseOpcode = BaseOp, isPredicable = 1 in
- def NAME#_nv_V4 : NVInst_V4<(outs),
- (ins u0AlwaysExt:$global, RC:$src),
- mnemonic#"(#$global) = $src.new",
- []>,
- Requires<[HasV4T]>;
-
- // When GP-relative instructions are predicated, their addressing mode is
- // changed to absolute and they are always constant extended.
- let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1,
- isPredicated = 1 in {
- defm Pt : ST_Abs_Pred_nv<mnemonic, RC, 0>;
- defm NotPt : ST_Abs_Pred_nv<mnemonic, RC, 1>;
- }
-}
-
-let validSubTargets = HasV4SubT, neverHasSideEffects = 1 in {
- let isNVStorable = 0 in
- defm STd_GP : ST_GP <"memd", "STd_GP", DoubleRegs>, PredNewRel;
-
- defm STb_GP : ST_GP<"memb", "STb_GP", IntRegs>,
- ST_GP_nv<"memb", "STb_GP", IntRegs>, NewValueRel;
- defm STh_GP : ST_GP<"memh", "STh_GP", IntRegs>,
- ST_GP_nv<"memh", "STh_GP", IntRegs>, NewValueRel;
- defm STw_GP : ST_GP<"memw", "STw_GP", IntRegs>,
- ST_GP_nv<"memw", "STw_GP", IntRegs>, NewValueRel;
+ (S2_storerdabs tglobaladdr: $absaddr, DoubleRegs: $src1)>;
}
// 64 bit atomic store
def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
(i64 DoubleRegs:$src1)),
- (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
+ (S2_storerdgp tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
Requires<[HasV4T]>;
// Map from store(globaladdress) -> memd(#foo)
let AddedComplexity = 100 in
def : Pat <(store (i64 DoubleRegs:$src1),
(HexagonCONST32_GP tglobaladdr:$global)),
- (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>;
+ (S2_storerdgp tglobaladdr:$global, (i64 DoubleRegs:$src1))>;
// 8 bit atomic store
def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
(i32 IntRegs:$src1)),
- (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+ (S2_storerbgp tglobaladdr:$global, (i32 IntRegs:$src1))>;
// Map from store(globaladdress) -> memb(#foo)
let AddedComplexity = 100 in
def : Pat<(truncstorei8 (i32 IntRegs:$src1),
(HexagonCONST32_GP tglobaladdr:$global)),
- (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+ (S2_storerbgp tglobaladdr:$global, (i32 IntRegs:$src1))>;
// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
// to "r0 = 1; memw(#foo) = r0"
let AddedComplexity = 100 in
def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
- (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>;
+ (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>;
def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
(i32 IntRegs:$src1)),
- (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+ (S2_storerhgp tglobaladdr:$global, (i32 IntRegs:$src1))>;
// Map from store(globaladdress) -> memh(#foo)
let AddedComplexity = 100 in
def : Pat<(truncstorei16 (i32 IntRegs:$src1),
(HexagonCONST32_GP tglobaladdr:$global)),
- (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+ (S2_storerhgp tglobaladdr:$global, (i32 IntRegs:$src1))>;
// 32 bit atomic store
def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
(i32 IntRegs:$src1)),
- (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+ (S2_storerigp tglobaladdr:$global, (i32 IntRegs:$src1))>;
// Map from store(globaladdress) -> memw(#foo)
let AddedComplexity = 100 in
def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
- (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+ (S2_storerigp tglobaladdr:$global, (i32 IntRegs:$src1))>;
//===----------------------------------------------------------------------===//
-// Multiclass for the load instructions with absolute addressing mode.
+// Template class for non predicated load instructions with
+// absolute addressing mode.
//===----------------------------------------------------------------------===//
-multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
- bit isPredNew> {
- let isPredicatedNew = isPredNew in
- def NAME : LDInst2<(outs RC:$dst),
- (ins PredRegs:$src1, u0AlwaysExt:$absaddr),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#"$dst = "#mnemonic#"(##$absaddr)",
- []>,
- Requires<[HasV4T]>;
-}
+let isPredicable = 1, hasSideEffects = 0, validSubTargets = HasV4SubT in
+class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<3> MajOp, Operand AddrOp, bit isAbs>
+ : LDInst <(outs RC:$dst), (ins AddrOp:$addr),
+ "$dst = "#mnemonic# !if(isAbs, "(##", "(#")#"$addr)",
+ [], "", V2LDST_tc_ld_SLOT01> {
+ bits<5> dst;
+ bits<19> addr;
+ bits<16> offsetBits;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3},
+ !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
+ !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
+ /* u16_0Imm */ addr{15-0})));
+
+ let IClass = 0b0100;
+
+ let Inst{27} = 0b1;
+ let Inst{26-25} = offsetBits{15-14};
+ let Inst{24} = 0b1;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = offsetBits{13-9};
+ let Inst{13-5} = offsetBits{8-0};
+ let Inst{4-0} = dst;
+ }
+
+class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<3> MajOp>
+ : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u0AlwaysExt, 1>, AddrModeRel {
-multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
- let isPredicatedFalse = PredNot in {
- defm _c#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 0>;
- // Predicate new
- defm _cdn#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 1>;
+ string ImmOpStr = !cast<string>(ImmOp);
+ let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
+ !if (!eq(ImmOpStr, "u16_2Imm"), 18,
+ !if (!eq(ImmOpStr, "u16_1Imm"), 17,
+ /* u16_0Imm */ 16)));
+
+ let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3,
+ !if (!eq(ImmOpStr, "u16_2Imm"), 2,
+ !if (!eq(ImmOpStr, "u16_1Imm"), 1,
+ /* u16_0Imm */ 0)));
}
+//===----------------------------------------------------------------------===//
+// Template class for predicated load instructions with
+// absolute addressing mode.
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, hasNewValue = 1, opExtentBits = 6, opExtendable = 2 in
+class T_LoadAbs_Pred <string mnemonic, RegisterClass RC, bits<3> MajOp,
+ bit isPredNot, bit isPredNew>
+ : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u6Ext:$absaddr),
+ !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#"$dst = "#mnemonic#"(#$absaddr)">, AddrModeRel {
+ bits<5> dst;
+ bits<2> src1;
+ bits<6> absaddr;
+
+ let isPredicatedNew = isPredNew;
+ let isPredicatedFalse = isPredNot;
+
+ let IClass = 0b1001;
+
+ let Inst{27-24} = 0b1111;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = absaddr{5-1};
+ let Inst{13} = 0b1;
+ let Inst{12} = isPredNew;
+ let Inst{11} = isPredNot;
+ let Inst{10-9} = src1;
+ let Inst{8} = absaddr{0};
+ let Inst{7} = 0b1;
+ let Inst{4-0} = dst;
+ }
+
+//===----------------------------------------------------------------------===//
+// Multiclass for the load instructions with absolute addressing mode.
+//===----------------------------------------------------------------------===//
+multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bits<3> MajOp,
+ bit PredNot> {
+ def _abs : T_LoadAbs_Pred <mnemonic, RC, MajOp, PredNot, 0>;
+ // Predicate new
+ def new_abs : T_LoadAbs_Pred <mnemonic, RC, MajOp, PredNot, 1>;
}
-let isExtended = 1, neverHasSideEffects = 1 in
-multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC> {
+let addrMode = Absolute, isExtended = 1 in
+multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC,
+ Operand ImmOp, bits<3> MajOp> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
- let opExtendable = 1, isPredicable = 1 in
- def NAME#_V4 : LDInst2<(outs RC:$dst),
- (ins u0AlwaysExt:$absaddr),
- "$dst = "#mnemonic#"(##$absaddr)",
- []>,
- Requires<[HasV4T]>;
+ let opExtendable = 1, isPredicable = 1 in
+ def L4_#NAME#_abs: T_LoadAbs <mnemonic, RC, ImmOp, MajOp>;
- let opExtendable = 2, isPredicated = 1 in {
- defm Pt_V4 : LD_Abs_Pred<mnemonic, RC, 0>;
- defm NotPt_V4 : LD_Abs_Pred<mnemonic, RC, 1>;
- }
+ // Predicated
+ defm L4_p#NAME#t : LD_Abs_Pred<mnemonic, RC, MajOp, 0>;
+ defm L4_p#NAME#f : LD_Abs_Pred<mnemonic, RC, MajOp, 1>;
}
}
-let addrMode = Absolute in {
- let accessSize = ByteAccess in {
- defm LDrib_abs : LD_Abs<"memb", "LDrib", IntRegs>, AddrModeRel;
- defm LDriub_abs : LD_Abs<"memub", "LDriub", IntRegs>, AddrModeRel;
- }
- let accessSize = HalfWordAccess in {
- defm LDrih_abs : LD_Abs<"memh", "LDrih", IntRegs>, AddrModeRel;
- defm LDriuh_abs : LD_Abs<"memuh", "LDriuh", IntRegs>, AddrModeRel;
+let accessSize = ByteAccess, hasNewValue = 1, isCodeGenOnly = 0 in {
+ defm loadrb : LD_Abs<"memb", "LDrib", IntRegs, u16_0Imm, 0b000>;
+ defm loadrub : LD_Abs<"memub", "LDriub", IntRegs, u16_0Imm, 0b001>;
+}
+
+let accessSize = HalfWordAccess, hasNewValue = 1, isCodeGenOnly = 0 in {
+ defm loadrh : LD_Abs<"memh", "LDrih", IntRegs, u16_1Imm, 0b010>;
+ defm loadruh : LD_Abs<"memuh", "LDriuh", IntRegs, u16_1Imm, 0b011>;
+}
+
+let accessSize = WordAccess, hasNewValue = 1, isCodeGenOnly = 0 in
+defm loadri : LD_Abs<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>;
+
+let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
+
+//===----------------------------------------------------------------------===//
+// multiclass for load instructions with GP-relative addressing mode.
+// Rx=mem[bhwd](##global)
+// Once predicated, these instructions map to absolute addressing mode.
+// if ([!]Pv[.new]) Rx=mem[bhwd](##global)
+//===----------------------------------------------------------------------===//
+
+class T_LoadGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp,
+ bits<3> MajOp>
+ : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, globaladdress, 0>, PredNewRel {
+ let BaseOpcode = BaseOp#_abs;
}
- let accessSize = WordAccess in
- defm LDriw_abs : LD_Abs<"memw", "LDriw", IntRegs>, AddrModeRel;
- let accessSize = DoubleWordAccess in
- defm LDrid_abs : LD_Abs<"memd", "LDrid", DoubleRegs>, AddrModeRel;
+let accessSize = ByteAccess, hasNewValue = 1 in {
+ def L2_loadrbgp : T_LoadGP<"memb", "LDrib", IntRegs, u16_0Imm, 0b000>;
+ def L2_loadrubgp : T_LoadGP<"memub", "LDriub", IntRegs, u16_0Imm, 0b001>;
+}
+
+let accessSize = HalfWordAccess, hasNewValue = 1 in {
+ def L2_loadrhgp : T_LoadGP<"memh", "LDrih", IntRegs, u16_1Imm, 0b010>;
+ def L2_loadruhgp : T_LoadGP<"memuh", "LDriuh", IntRegs, u16_1Imm, 0b011>;
}
+let accessSize = WordAccess, hasNewValue = 1 in
+def L2_loadrigp : T_LoadGP<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>;
+
+let accessSize = DoubleWordAccess in
+def L2_loadrdgp : T_LoadGP<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
+
let Predicates = [HasV4T], AddedComplexity = 30 in {
def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))),
- (LDriw_abs_V4 tglobaladdr: $absaddr)>;
+ (L4_loadri_abs tglobaladdr: $absaddr)>;
def : Pat<(i32 (sextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
- (LDrib_abs_V4 tglobaladdr:$absaddr)>;
+ (L4_loadrb_abs tglobaladdr:$absaddr)>;
def : Pat<(i32 (zextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
- (LDriub_abs_V4 tglobaladdr:$absaddr)>;
+ (L4_loadrub_abs tglobaladdr:$absaddr)>;
def : Pat<(i32 (sextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
- (LDrih_abs_V4 tglobaladdr:$absaddr)>;
+ (L4_loadrh_abs tglobaladdr:$absaddr)>;
def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
- (LDriuh_abs_V4 tglobaladdr:$absaddr)>;
-}
-
-//===----------------------------------------------------------------------===//
-// multiclass for load instructions with GP-relative addressing mode.
-// Rx=mem[bhwd](##global)
-// if ([!]Pv[.new]) Rx=mem[bhwd](##global)
-//===----------------------------------------------------------------------===//
-let neverHasSideEffects = 1, validSubTargets = HasV4SubT in
-multiclass LD_GP<string mnemonic, string BaseOp, RegisterClass RC> {
- let BaseOpcode = BaseOp in {
- let isPredicable = 1 in
- def NAME#_V4 : LDInst2<(outs RC:$dst),
- (ins globaladdress:$global),
- "$dst = "#mnemonic#"(#$global)",
- []>;
-
- let isExtended = 1, opExtendable = 2, isPredicated = 1 in {
- defm Pt_V4 : LD_Abs_Pred<mnemonic, RC, 0>;
- defm NotPt_V4 : LD_Abs_Pred<mnemonic, RC, 1>;
- }
- }
+ (L4_loadruh_abs tglobaladdr:$absaddr)>;
}
-defm LDd_GP : LD_GP<"memd", "LDd_GP", DoubleRegs>, PredNewRel;
-defm LDb_GP : LD_GP<"memb", "LDb_GP", IntRegs>, PredNewRel;
-defm LDub_GP : LD_GP<"memub", "LDub_GP", IntRegs>, PredNewRel;
-defm LDh_GP : LD_GP<"memh", "LDh_GP", IntRegs>, PredNewRel;
-defm LDuh_GP : LD_GP<"memuh", "LDuh_GP", IntRegs>, PredNewRel;
-defm LDw_GP : LD_GP<"memw", "LDw_GP", IntRegs>, PredNewRel;
-
def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
- (i64 (LDd_GP_V4 tglobaladdr:$global))>;
+ (i64 (L2_loadrdgp tglobaladdr:$global))>;
def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
- (i32 (LDw_GP_V4 tglobaladdr:$global))>;
+ (i32 (L2_loadrigp tglobaladdr:$global))>;
def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
- (i32 (LDuh_GP_V4 tglobaladdr:$global))>;
+ (i32 (L2_loadruhgp tglobaladdr:$global))>;
def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
- (i32 (LDub_GP_V4 tglobaladdr:$global))>;
+ (i32 (L2_loadrubgp tglobaladdr:$global))>;
// Map from load(globaladdress) -> memw(#foo + 0)
let AddedComplexity = 100 in
def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i64 (LDd_GP_V4 tglobaladdr:$global))>;
+ (i64 (L2_loadrdgp tglobaladdr:$global))>;
// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
let AddedComplexity = 100 in
def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>;
+ (i1 (C2_tfrrp (i32 (L2_loadrbgp tglobaladdr:$global))))>;
// When the Interprocedural Global Variable optimizer realizes that a certain
// global variable takes only two constant values, it shrinks the global to
// a boolean. Catch those loads here in the following 3 patterns.
let AddedComplexity = 100 in
def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+ (i32 (L2_loadrbgp tglobaladdr:$global))>;
let AddedComplexity = 100 in
def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+ (i32 (L2_loadrbgp tglobaladdr:$global))>;
// Map from load(globaladdress) -> memb(#foo)
let AddedComplexity = 100 in
def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+ (i32 (L2_loadrbgp tglobaladdr:$global))>;
// Map from load(globaladdress) -> memb(#foo)
let AddedComplexity = 100 in
def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+ (i32 (L2_loadrbgp tglobaladdr:$global))>;
let AddedComplexity = 100 in
def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDub_GP_V4 tglobaladdr:$global))>;
+ (i32 (L2_loadrubgp tglobaladdr:$global))>;
// Map from load(globaladdress) -> memub(#foo)
let AddedComplexity = 100 in
def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDub_GP_V4 tglobaladdr:$global))>;
+ (i32 (L2_loadrubgp tglobaladdr:$global))>;
// Map from load(globaladdress) -> memh(#foo)
let AddedComplexity = 100 in
def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDh_GP_V4 tglobaladdr:$global))>;
+ (i32 (L2_loadrhgp tglobaladdr:$global))>;
// Map from load(globaladdress) -> memh(#foo)
let AddedComplexity = 100 in
def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDh_GP_V4 tglobaladdr:$global))>;
+ (i32 (L2_loadrhgp tglobaladdr:$global))>;
// Map from load(globaladdress) -> memuh(#foo)
let AddedComplexity = 100 in
def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDuh_GP_V4 tglobaladdr:$global))>;
+ (i32 (L2_loadruhgp tglobaladdr:$global))>;
// Map from load(globaladdress) -> memw(#foo)
let AddedComplexity = 100 in
def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDw_GP_V4 tglobaladdr:$global))>;
+ (i32 (L2_loadrigp tglobaladdr:$global))>;
// Transfer global address into a register
@@ -3073,7 +3798,7 @@ def : Pat<(HexagonCONST32_GP tblockaddress:$src1),
Requires<[HasV4T]>;
let isExtended = 1, opExtendable = 2, AddedComplexity=50,
-neverHasSideEffects = 1, isPredicated = 1, validSubTargets = HasV4SubT in
+hasSideEffects = 0, isPredicated = 1, validSubTargets = HasV4SubT in
def TFRI_cPt_V4 : ALU32_ri<(outs IntRegs:$dst),
(ins PredRegs:$src1, s16Ext:$src2),
"if($src1) $dst = #$src2",
@@ -3081,7 +3806,7 @@ def TFRI_cPt_V4 : ALU32_ri<(outs IntRegs:$dst),
Requires<[HasV4T]>;
let isExtended = 1, opExtendable = 2, AddedComplexity=50, isPredicatedFalse = 1,
-neverHasSideEffects = 1, isPredicated = 1, validSubTargets = HasV4SubT in
+hasSideEffects = 0, isPredicated = 1, validSubTargets = HasV4SubT in
def TFRI_cNotPt_V4 : ALU32_ri<(outs IntRegs:$dst),
(ins PredRegs:$src1, s16Ext:$src2),
"if(!$src1) $dst = #$src2",
@@ -3089,7 +3814,7 @@ def TFRI_cNotPt_V4 : ALU32_ri<(outs IntRegs:$dst),
Requires<[HasV4T]>;
let isExtended = 1, opExtendable = 2, AddedComplexity=50,
-neverHasSideEffects = 1, isPredicated = 1, validSubTargets = HasV4SubT in
+hasSideEffects = 0, isPredicated = 1, validSubTargets = HasV4SubT in
def TFRI_cdnPt_V4 : ALU32_ri<(outs IntRegs:$dst),
(ins PredRegs:$src1, s16Ext:$src2),
"if($src1.new) $dst = #$src2",
@@ -3097,7 +3822,7 @@ def TFRI_cdnPt_V4 : ALU32_ri<(outs IntRegs:$dst),
Requires<[HasV4T]>;
let isExtended = 1, opExtendable = 2, AddedComplexity=50, isPredicatedFalse = 1,
-neverHasSideEffects = 1, isPredicated = 1, validSubTargets = HasV4SubT in
+hasSideEffects = 0, isPredicated = 1, validSubTargets = HasV4SubT in
def TFRI_cdnNotPt_V4 : ALU32_ri<(outs IntRegs:$dst),
(ins PredRegs:$src1, s16Ext:$src2),
"if(!$src1.new) $dst = #$src2",
@@ -3157,30 +3882,30 @@ def : Pat <(i32 (zextloadi8 (add IntRegs:$src1,
let Predicates = [HasV4T], AddedComplexity = 30 in {
def : Pat<(truncstorei8 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
- (STrib_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
+ (S2_storerbabs u0AlwaysExtPred:$src2, IntRegs: $src1)>;
def : Pat<(truncstorei16 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
- (STrih_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
+ (S2_storerhabs u0AlwaysExtPred:$src2, IntRegs: $src1)>;
def : Pat<(store (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
- (STriw_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
+ (S2_storeriabs u0AlwaysExtPred:$src2, IntRegs: $src1)>;
}
let Predicates = [HasV4T], AddedComplexity = 30 in {
def : Pat<(i32 (load u0AlwaysExtPred:$src)),
- (LDriw_abs_V4 u0AlwaysExtPred:$src)>;
+ (L4_loadri_abs u0AlwaysExtPred:$src)>;
def : Pat<(i32 (sextloadi8 u0AlwaysExtPred:$src)),
- (LDrib_abs_V4 u0AlwaysExtPred:$src)>;
+ (L4_loadrb_abs u0AlwaysExtPred:$src)>;
def : Pat<(i32 (zextloadi8 u0AlwaysExtPred:$src)),
- (LDriub_abs_V4 u0AlwaysExtPred:$src)>;
+ (L4_loadrub_abs u0AlwaysExtPred:$src)>;
def : Pat<(i32 (sextloadi16 u0AlwaysExtPred:$src)),
- (LDrih_abs_V4 u0AlwaysExtPred:$src)>;
+ (L4_loadrh_abs u0AlwaysExtPred:$src)>;
def : Pat<(i32 (zextloadi16 u0AlwaysExtPred:$src)),
- (LDriuh_abs_V4 u0AlwaysExtPred:$src)>;
+ (L4_loadruh_abs u0AlwaysExtPred:$src)>;
}
// Indexed store word - global address.
@@ -3194,11 +3919,11 @@ def STriw_offset_ext_V4 : STInst<(outs),
Requires<[HasV4T]>;
def : Pat<(i64 (ctlz (i64 DoubleRegs:$src1))),
- (i64 (COMBINE_Ir_V4 (i32 0), (i32 (CTLZ64_rr DoubleRegs:$src1))))>,
+ (i64 (A4_combineir (i32 0), (i32 (CTLZ64_rr DoubleRegs:$src1))))>,
Requires<[HasV4T]>;
def : Pat<(i64 (cttz (i64 DoubleRegs:$src1))),
- (i64 (COMBINE_Ir_V4 (i32 0), (i32 (CTTZ64_rr DoubleRegs:$src1))))>,
+ (i64 (A4_combineir (i32 0), (i32 (CTTZ64_rr DoubleRegs:$src1))))>,
Requires<[HasV4T]>;
@@ -3207,49 +3932,49 @@ def : Pat<(i64 (cttz (i64 DoubleRegs:$src1))),
// zextloadi8.
let Predicates = [HasV4T], AddedComplexity = 120 in {
def: Pat <(i64 (extloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
- (i64 (COMBINE_Ir_V4 0, (LDrib_abs_V4 tglobaladdr:$addr)))>;
+ (i64 (A4_combineir 0, (L4_loadrb_abs tglobaladdr:$addr)))>;
def: Pat <(i64 (zextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
- (i64 (COMBINE_Ir_V4 0, (LDriub_abs_V4 tglobaladdr:$addr)))>;
+ (i64 (A4_combineir 0, (L4_loadrub_abs tglobaladdr:$addr)))>;
def: Pat <(i64 (sextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
- (i64 (SXTW (LDrib_abs_V4 tglobaladdr:$addr)))>;
+ (i64 (A2_sxtw (L4_loadrb_abs tglobaladdr:$addr)))>;
def: Pat <(i64 (extloadi8 FoldGlobalAddr:$addr)),
- (i64 (COMBINE_Ir_V4 0, (LDrib_abs_V4 FoldGlobalAddr:$addr)))>;
+ (i64 (A4_combineir 0, (L4_loadrb_abs FoldGlobalAddr:$addr)))>;
def: Pat <(i64 (zextloadi8 FoldGlobalAddr:$addr)),
- (i64 (COMBINE_Ir_V4 0, (LDriub_abs_V4 FoldGlobalAddr:$addr)))>;
+ (i64 (A4_combineir 0, (L4_loadrub_abs FoldGlobalAddr:$addr)))>;
def: Pat <(i64 (sextloadi8 FoldGlobalAddr:$addr)),
- (i64 (SXTW (LDrib_abs_V4 FoldGlobalAddr:$addr)))>;
+ (i64 (A2_sxtw (L4_loadrb_abs FoldGlobalAddr:$addr)))>;
}
// i16 -> i64 loads
// We need a complexity of 120 here to override preceding handling of
// zextloadi16.
let AddedComplexity = 120 in {
def: Pat <(i64 (extloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
- (i64 (COMBINE_Ir_V4 0, (LDrih_abs_V4 tglobaladdr:$addr)))>,
+ (i64 (A4_combineir 0, (L4_loadrh_abs tglobaladdr:$addr)))>,
Requires<[HasV4T]>;
def: Pat <(i64 (zextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
- (i64 (COMBINE_Ir_V4 0, (LDriuh_abs_V4 tglobaladdr:$addr)))>,
+ (i64 (A4_combineir 0, (L4_loadruh_abs tglobaladdr:$addr)))>,
Requires<[HasV4T]>;
def: Pat <(i64 (sextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
- (i64 (SXTW (LDrih_abs_V4 tglobaladdr:$addr)))>,
+ (i64 (A2_sxtw (L4_loadrh_abs tglobaladdr:$addr)))>,
Requires<[HasV4T]>;
def: Pat <(i64 (extloadi16 FoldGlobalAddr:$addr)),
- (i64 (COMBINE_Ir_V4 0, (LDrih_abs_V4 FoldGlobalAddr:$addr)))>,
+ (i64 (A4_combineir 0, (L4_loadrh_abs FoldGlobalAddr:$addr)))>,
Requires<[HasV4T]>;
def: Pat <(i64 (zextloadi16 FoldGlobalAddr:$addr)),
- (i64 (COMBINE_Ir_V4 0, (LDriuh_abs_V4 FoldGlobalAddr:$addr)))>,
+ (i64 (A4_combineir 0, (L4_loadruh_abs FoldGlobalAddr:$addr)))>,
Requires<[HasV4T]>;
def: Pat <(i64 (sextloadi16 FoldGlobalAddr:$addr)),
- (i64 (SXTW (LDrih_abs_V4 FoldGlobalAddr:$addr)))>,
+ (i64 (A2_sxtw (L4_loadrh_abs FoldGlobalAddr:$addr)))>,
Requires<[HasV4T]>;
}
// i32->i64 loads
@@ -3257,27 +3982,27 @@ def: Pat <(i64 (sextloadi16 FoldGlobalAddr:$addr)),
// zextloadi32.
let AddedComplexity = 120 in {
def: Pat <(i64 (extloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
- (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 tglobaladdr:$addr)))>,
+ (i64 (A4_combineir 0, (L4_loadri_abs tglobaladdr:$addr)))>,
Requires<[HasV4T]>;
def: Pat <(i64 (zextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
- (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 tglobaladdr:$addr)))>,
+ (i64 (A4_combineir 0, (L4_loadri_abs tglobaladdr:$addr)))>,
Requires<[HasV4T]>;
def: Pat <(i64 (sextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
- (i64 (SXTW (LDriw_abs_V4 tglobaladdr:$addr)))>,
+ (i64 (A2_sxtw (L4_loadri_abs tglobaladdr:$addr)))>,
Requires<[HasV4T]>;
def: Pat <(i64 (extloadi32 FoldGlobalAddr:$addr)),
- (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 FoldGlobalAddr:$addr)))>,
+ (i64 (A4_combineir 0, (L4_loadri_abs FoldGlobalAddr:$addr)))>,
Requires<[HasV4T]>;
def: Pat <(i64 (zextloadi32 FoldGlobalAddr:$addr)),
- (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 FoldGlobalAddr:$addr)))>,
+ (i64 (A4_combineir 0, (L4_loadri_abs FoldGlobalAddr:$addr)))>,
Requires<[HasV4T]>;
def: Pat <(i64 (sextloadi32 FoldGlobalAddr:$addr)),
- (i64 (SXTW (LDriw_abs_V4 FoldGlobalAddr:$addr)))>,
+ (i64 (A2_sxtw (L4_loadri_abs FoldGlobalAddr:$addr)))>,
Requires<[HasV4T]>;
}
@@ -3294,104 +4019,457 @@ def STrih_offset_ext_V4 : STInst<(outs),
let AddedComplexity = 100 in
def : Pat<(store (i64 DoubleRegs:$src1),
FoldGlobalAddrGP:$addr),
- (STrid_abs_V4 FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>,
+ (S2_storerdabs FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>,
Requires<[HasV4T]>;
def : Pat<(atomic_store_64 FoldGlobalAddrGP:$addr,
(i64 DoubleRegs:$src1)),
- (STrid_abs_V4 FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>,
+ (S2_storerdabs FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>,
Requires<[HasV4T]>;
// Map from store(globaladdress + x) -> memb(#foo + x)
let AddedComplexity = 100 in
def : Pat<(truncstorei8 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
- (STrib_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ (S2_storerbabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
Requires<[HasV4T]>;
def : Pat<(atomic_store_8 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
- (STrib_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ (S2_storerbabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
Requires<[HasV4T]>;
// Map from store(globaladdress + x) -> memh(#foo + x)
let AddedComplexity = 100 in
def : Pat<(truncstorei16 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
- (STrih_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ (S2_storerhabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
Requires<[HasV4T]>;
def : Pat<(atomic_store_16 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
- (STrih_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ (S2_storerhabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
Requires<[HasV4T]>;
// Map from store(globaladdress + x) -> memw(#foo + x)
let AddedComplexity = 100 in
def : Pat<(store (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
- (STriw_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ (S2_storeriabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
Requires<[HasV4T]>;
def : Pat<(atomic_store_32 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
- (STriw_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ (S2_storeriabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
Requires<[HasV4T]>;
// Map from load(globaladdress + x) -> memd(#foo + x)
let AddedComplexity = 100 in
def : Pat<(i64 (load FoldGlobalAddrGP:$addr)),
- (i64 (LDrid_abs_V4 FoldGlobalAddrGP:$addr))>,
+ (i64 (L4_loadrd_abs FoldGlobalAddrGP:$addr))>,
Requires<[HasV4T]>;
def : Pat<(atomic_load_64 FoldGlobalAddrGP:$addr),
- (i64 (LDrid_abs_V4 FoldGlobalAddrGP:$addr))>,
+ (i64 (L4_loadrd_abs FoldGlobalAddrGP:$addr))>,
Requires<[HasV4T]>;
// Map from load(globaladdress + x) -> memb(#foo + x)
let AddedComplexity = 100 in
def : Pat<(i32 (extloadi8 FoldGlobalAddrGP:$addr)),
- (i32 (LDrib_abs_V4 FoldGlobalAddrGP:$addr))>,
+ (i32 (L4_loadrb_abs FoldGlobalAddrGP:$addr))>,
Requires<[HasV4T]>;
// Map from load(globaladdress + x) -> memb(#foo + x)
let AddedComplexity = 100 in
def : Pat<(i32 (sextloadi8 FoldGlobalAddrGP:$addr)),
- (i32 (LDrib_abs_V4 FoldGlobalAddrGP:$addr))>,
+ (i32 (L4_loadrb_abs FoldGlobalAddrGP:$addr))>,
Requires<[HasV4T]>;
//let AddedComplexity = 100 in
let AddedComplexity = 100 in
def : Pat<(i32 (extloadi16 FoldGlobalAddrGP:$addr)),
- (i32 (LDrih_abs_V4 FoldGlobalAddrGP:$addr))>,
+ (i32 (L4_loadrh_abs FoldGlobalAddrGP:$addr))>,
Requires<[HasV4T]>;
// Map from load(globaladdress + x) -> memh(#foo + x)
let AddedComplexity = 100 in
def : Pat<(i32 (sextloadi16 FoldGlobalAddrGP:$addr)),
- (i32 (LDrih_abs_V4 FoldGlobalAddrGP:$addr))>,
+ (i32 (L4_loadrh_abs FoldGlobalAddrGP:$addr))>,
Requires<[HasV4T]>;
// Map from load(globaladdress + x) -> memuh(#foo + x)
let AddedComplexity = 100 in
def : Pat<(i32 (zextloadi16 FoldGlobalAddrGP:$addr)),
- (i32 (LDriuh_abs_V4 FoldGlobalAddrGP:$addr))>,
+ (i32 (L4_loadruh_abs FoldGlobalAddrGP:$addr))>,
Requires<[HasV4T]>;
def : Pat<(atomic_load_16 FoldGlobalAddrGP:$addr),
- (i32 (LDriuh_abs_V4 FoldGlobalAddrGP:$addr))>,
+ (i32 (L4_loadruh_abs FoldGlobalAddrGP:$addr))>,
Requires<[HasV4T]>;
// Map from load(globaladdress + x) -> memub(#foo + x)
let AddedComplexity = 100 in
def : Pat<(i32 (zextloadi8 FoldGlobalAddrGP:$addr)),
- (i32 (LDriub_abs_V4 FoldGlobalAddrGP:$addr))>,
+ (i32 (L4_loadrub_abs FoldGlobalAddrGP:$addr))>,
Requires<[HasV4T]>;
def : Pat<(atomic_load_8 FoldGlobalAddrGP:$addr),
- (i32 (LDriub_abs_V4 FoldGlobalAddrGP:$addr))>,
+ (i32 (L4_loadrub_abs FoldGlobalAddrGP:$addr))>,
Requires<[HasV4T]>;
// Map from load(globaladdress + x) -> memw(#foo + x)
let AddedComplexity = 100 in
def : Pat<(i32 (load FoldGlobalAddrGP:$addr)),
- (i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>,
+ (i32 (L4_loadri_abs FoldGlobalAddrGP:$addr))>,
Requires<[HasV4T]>;
def : Pat<(atomic_load_32 FoldGlobalAddrGP:$addr),
- (i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>,
+ (i32 (L4_loadri_abs FoldGlobalAddrGP:$addr))>,
Requires<[HasV4T]>;
+
+//===----------------------------------------------------------------------===//
+// :raw for of boundscheck:hi:lo insns
+//===----------------------------------------------------------------------===//
+
+// A4_boundscheck_lo: Detect if a register is within bounds.
+let hasSideEffects = 0, isCodeGenOnly = 0 in
+def A4_boundscheck_lo: ALU64Inst <
+ (outs PredRegs:$Pd),
+ (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Pd = boundscheck($Rss, $Rtt):raw:lo"> {
+ bits<2> Pd;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1101;
+
+ let Inst{27-23} = 0b00100;
+ let Inst{13} = 0b1;
+ let Inst{7-5} = 0b100;
+ let Inst{1-0} = Pd;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ }
+
+// A4_boundscheck_hi: Detect if a register is within bounds.
+let hasSideEffects = 0, isCodeGenOnly = 0 in
+def A4_boundscheck_hi: ALU64Inst <
+ (outs PredRegs:$Pd),
+ (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Pd = boundscheck($Rss, $Rtt):raw:hi"> {
+ bits<2> Pd;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1101;
+
+ let Inst{27-23} = 0b00100;
+ let Inst{13} = 0b1;
+ let Inst{7-5} = 0b101;
+ let Inst{1-0} = Pd;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ }
+
+let hasSideEffects = 0 in
+def A4_boundscheck : MInst <
+ (outs PredRegs:$Pd), (ins IntRegs:$Rs, DoubleRegs:$Rtt),
+ "$Pd=boundscheck($Rs,$Rtt)">;
+
+// A4_tlbmatch: Detect if a VA/ASID matches a TLB entry.
+let isPredicateLate = 1, hasSideEffects = 0, isCodeGenOnly = 0 in
+def A4_tlbmatch : ALU64Inst<(outs PredRegs:$Pd),
+ (ins DoubleRegs:$Rs, IntRegs:$Rt),
+ "$Pd = tlbmatch($Rs, $Rt)",
+ [], "", ALU64_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-23} = 0b00100;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b1;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = 0b011;
+ let Inst{1-0} = Pd;
+ }
+
+// We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH
+// because the SDNode ISD::PREFETCH has properties MayLoad and MayStore.
+// We don't really want either one here.
+def SDTHexagonDCFETCH : SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>;
+def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH,
+ [SDNPHasChain]>;
+
+// Use LD0Inst for dcfetch, but set "mayLoad" to 0 because this doesn't
+// really do a load.
+let hasSideEffects = 1, mayLoad = 0, isCodeGenOnly = 0 in
+def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3),
+ "dcfetch($Rs + #$u11_3)",
+ [(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3)],
+ "", LD_tc_ld_SLOT0> {
+ bits<5> Rs;
+ bits<14> u11_3;
+
+ let IClass = 0b1001;
+ let Inst{27-21} = 0b0100000;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b0;
+ let Inst{10-0} = u11_3{13-3};
+}
+
+//===----------------------------------------------------------------------===//
+// Compound instructions
+//===----------------------------------------------------------------------===//
+
+let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
+ isPredicated = 1, isPredicatedNew = 1, isExtendable = 1,
+ opExtentBits = 11, opExtentAlign = 2, opExtendable = 1,
+ isTerminator = 1, validSubTargets = HasV4SubT in
+class CJInst_tstbit_R0<string px, bit np, string tnt>
+ : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2),
+ ""#px#" = tstbit($Rs, #0); if ("
+ #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
+ [], "", COMPOUND, TypeCOMPOUND> {
+ bits<4> Rs;
+ bits<11> r9_2;
+
+ // np: !p[01]
+ let isPredicatedFalse = np;
+ // tnt: Taken/Not Taken
+ let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
+ let isTaken = !if (!eq(tnt, "t"), 1, 0);
+
+ let IClass = 0b0001;
+ let Inst{27-26} = 0b00;
+ let Inst{25} = !if (!eq(px, "!p1"), 1,
+ !if (!eq(px, "p1"), 1, 0));
+ let Inst{24-23} = 0b11;
+ let Inst{22} = np;
+ let Inst{21-20} = r9_2{10-9};
+ let Inst{19-16} = Rs;
+ let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
+ let Inst{9-8} = 0b11;
+ let Inst{7-1} = r9_2{8-2};
+}
+
+let Defs = [PC, P0], Uses = [P0], isCodeGenOnly = 0 in {
+ def J4_tstbit0_tp0_jump_nt : CJInst_tstbit_R0<"p0", 0, "nt">;
+ def J4_tstbit0_tp0_jump_t : CJInst_tstbit_R0<"p0", 0, "t">;
+ def J4_tstbit0_fp0_jump_nt : CJInst_tstbit_R0<"p0", 1, "nt">;
+ def J4_tstbit0_fp0_jump_t : CJInst_tstbit_R0<"p0", 1, "t">;
+}
+
+let Defs = [PC, P1], Uses = [P1], isCodeGenOnly = 0 in {
+ def J4_tstbit0_tp1_jump_nt : CJInst_tstbit_R0<"p1", 0, "nt">;
+ def J4_tstbit0_tp1_jump_t : CJInst_tstbit_R0<"p1", 0, "t">;
+ def J4_tstbit0_fp1_jump_nt : CJInst_tstbit_R0<"p1", 1, "nt">;
+ def J4_tstbit0_fp1_jump_t : CJInst_tstbit_R0<"p1", 1, "t">;
+}
+
+
+let isBranch = 1, hasSideEffects = 0,
+ isExtentSigned = 1, isPredicated = 1, isPredicatedNew = 1,
+ isExtendable = 1, opExtentBits = 11, opExtentAlign = 2,
+ opExtendable = 2, isTerminator = 1, validSubTargets = HasV4SubT in
+class CJInst_RR<string px, string op, bit np, string tnt>
+ : InstHexagon<(outs), (ins IntRegs:$Rs, IntRegs:$Rt, brtarget:$r9_2),
+ ""#px#" = cmp."#op#"($Rs, $Rt); if ("
+ #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
+ [], "", COMPOUND, TypeCOMPOUND> {
+ bits<4> Rs;
+ bits<4> Rt;
+ bits<11> r9_2;
+
+ // np: !p[01]
+ let isPredicatedFalse = np;
+ // tnt: Taken/Not Taken
+ let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
+ let isTaken = !if (!eq(tnt, "t"), 1, 0);
+
+ let IClass = 0b0001;
+ let Inst{27-23} = !if (!eq(op, "eq"), 0b01000,
+ !if (!eq(op, "gt"), 0b01001,
+ !if (!eq(op, "gtu"), 0b01010, 0)));
+ let Inst{22} = np;
+ let Inst{21-20} = r9_2{10-9};
+ let Inst{19-16} = Rs;
+ let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
+ // px: Predicate reg 0/1
+ let Inst{12} = !if (!eq(px, "!p1"), 1,
+ !if (!eq(px, "p1"), 1, 0));
+ let Inst{11-8} = Rt;
+ let Inst{7-1} = r9_2{8-2};
+}
+
+// P[10] taken/not taken.
+multiclass T_tnt_CJInst_RR<string op, bit np> {
+ let Defs = [PC, P0], Uses = [P0] in {
+ def NAME#p0_jump_nt : CJInst_RR<"p0", op, np, "nt">;
+ def NAME#p0_jump_t : CJInst_RR<"p0", op, np, "t">;
+ }
+ let Defs = [PC, P1], Uses = [P1] in {
+ def NAME#p1_jump_nt : CJInst_RR<"p1", op, np, "nt">;
+ def NAME#p1_jump_t : CJInst_RR<"p1", op, np, "t">;
+ }
+}
+// Predicate / !Predicate
+multiclass T_pnp_CJInst_RR<string op>{
+ defm J4_cmp#NAME#_t : T_tnt_CJInst_RR<op, 0>;
+ defm J4_cmp#NAME#_f : T_tnt_CJInst_RR<op, 1>;
+}
+// TypeCJ Instructions compare RR and jump
+let isCodeGenOnly = 0 in {
+defm eq : T_pnp_CJInst_RR<"eq">;
+defm gt : T_pnp_CJInst_RR<"gt">;
+defm gtu : T_pnp_CJInst_RR<"gtu">;
+}
+
+let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
+ isPredicated = 1, isPredicatedNew = 1, isExtendable = 1, opExtentBits = 11,
+ opExtentAlign = 2, opExtendable = 2, isTerminator = 1,
+ validSubTargets = HasV4SubT in
+class CJInst_RU5<string px, string op, bit np, string tnt>
+ : InstHexagon<(outs), (ins IntRegs:$Rs, u5Imm:$U5, brtarget:$r9_2),
+ ""#px#" = cmp."#op#"($Rs, #$U5); if ("
+ #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
+ [], "", COMPOUND, TypeCOMPOUND> {
+ bits<4> Rs;
+ bits<5> U5;
+ bits<11> r9_2;
+
+ // np: !p[01]
+ let isPredicatedFalse = np;
+ // tnt: Taken/Not Taken
+ let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
+ let isTaken = !if (!eq(tnt, "t"), 1, 0);
+
+ let IClass = 0b0001;
+ let Inst{27-26} = 0b00;
+ // px: Predicate reg 0/1
+ let Inst{25} = !if (!eq(px, "!p1"), 1,
+ !if (!eq(px, "p1"), 1, 0));
+ let Inst{24-23} = !if (!eq(op, "eq"), 0b00,
+ !if (!eq(op, "gt"), 0b01,
+ !if (!eq(op, "gtu"), 0b10, 0)));
+ let Inst{22} = np;
+ let Inst{21-20} = r9_2{10-9};
+ let Inst{19-16} = Rs;
+ let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
+ let Inst{12-8} = U5;
+ let Inst{7-1} = r9_2{8-2};
+}
+// P[10] taken/not taken.
+multiclass T_tnt_CJInst_RU5<string op, bit np> {
+ let Defs = [PC, P0], Uses = [P0] in {
+ def NAME#p0_jump_nt : CJInst_RU5<"p0", op, np, "nt">;
+ def NAME#p0_jump_t : CJInst_RU5<"p0", op, np, "t">;
+ }
+ let Defs = [PC, P1], Uses = [P1] in {
+ def NAME#p1_jump_nt : CJInst_RU5<"p1", op, np, "nt">;
+ def NAME#p1_jump_t : CJInst_RU5<"p1", op, np, "t">;
+ }
+}
+// Predicate / !Predicate
+multiclass T_pnp_CJInst_RU5<string op>{
+ defm J4_cmp#NAME#i_t : T_tnt_CJInst_RU5<op, 0>;
+ defm J4_cmp#NAME#i_f : T_tnt_CJInst_RU5<op, 1>;
+}
+// TypeCJ Instructions compare RI and jump
+let isCodeGenOnly = 0 in {
+defm eq : T_pnp_CJInst_RU5<"eq">;
+defm gt : T_pnp_CJInst_RU5<"gt">;
+defm gtu : T_pnp_CJInst_RU5<"gtu">;
+}
+
+let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
+ isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1,
+ isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, opExtendable = 1,
+ isTerminator = 1, validSubTargets = HasV4SubT in
+class CJInst_Rn1<string px, string op, bit np, string tnt>
+ : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2),
+ ""#px#" = cmp."#op#"($Rs,#-1); if ("
+ #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
+ [], "", COMPOUND, TypeCOMPOUND> {
+ bits<4> Rs;
+ bits<11> r9_2;
+
+ // np: !p[01]
+ let isPredicatedFalse = np;
+ // tnt: Taken/Not Taken
+ let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
+ let isTaken = !if (!eq(tnt, "t"), 1, 0);
+
+ let IClass = 0b0001;
+ let Inst{27-26} = 0b00;
+ let Inst{25} = !if (!eq(px, "!p1"), 1,
+ !if (!eq(px, "p1"), 1, 0));
+
+ let Inst{24-23} = 0b11;
+ let Inst{22} = np;
+ let Inst{21-20} = r9_2{10-9};
+ let Inst{19-16} = Rs;
+ let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
+ let Inst{9-8} = !if (!eq(op, "eq"), 0b00,
+ !if (!eq(op, "gt"), 0b01, 0));
+ let Inst{7-1} = r9_2{8-2};
+}
+
+// P[10] taken/not taken.
+multiclass T_tnt_CJInst_Rn1<string op, bit np> {
+ let Defs = [PC, P0], Uses = [P0] in {
+ def NAME#p0_jump_nt : CJInst_Rn1<"p0", op, np, "nt">;
+ def NAME#p0_jump_t : CJInst_Rn1<"p0", op, np, "t">;
+ }
+ let Defs = [PC, P1], Uses = [P1] in {
+ def NAME#p1_jump_nt : CJInst_Rn1<"p1", op, np, "nt">;
+ def NAME#p1_jump_t : CJInst_Rn1<"p1", op, np, "t">;
+ }
+}
+// Predicate / !Predicate
+multiclass T_pnp_CJInst_Rn1<string op>{
+ defm J4_cmp#NAME#n1_t : T_tnt_CJInst_Rn1<op, 0>;
+ defm J4_cmp#NAME#n1_f : T_tnt_CJInst_Rn1<op, 1>;
+}
+// TypeCJ Instructions compare -1 and jump
+let isCodeGenOnly = 0 in {
+defm eq : T_pnp_CJInst_Rn1<"eq">;
+defm gt : T_pnp_CJInst_Rn1<"gt">;
+}
+
+// J4_jumpseti: Direct unconditional jump and set register to immediate.
+let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1,
+ isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11,
+ opExtentAlign = 2, opExtendable = 2, validSubTargets = HasV4SubT,
+ isCodeGenOnly = 0 in
+def J4_jumpseti: CJInst <
+ (outs IntRegs:$Rd),
+ (ins u6Imm:$U6, brtarget:$r9_2),
+ "$Rd = #$U6 ; jump $r9_2"> {
+ bits<4> Rd;
+ bits<6> U6;
+ bits<11> r9_2;
+
+ let IClass = 0b0001;
+ let Inst{27-24} = 0b0110;
+ let Inst{21-20} = r9_2{10-9};
+ let Inst{19-16} = Rd;
+ let Inst{13-8} = U6;
+ let Inst{7-1} = r9_2{8-2};
+ }
+
+// J4_jumpsetr: Direct unconditional jump and transfer register.
+let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1,
+ isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11,
+ opExtentAlign = 2, opExtendable = 2, validSubTargets = HasV4SubT,
+ isCodeGenOnly = 0 in
+def J4_jumpsetr: CJInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, brtarget:$r9_2),
+ "$Rd = $Rs ; jump $r9_2"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ bits<11> r9_2;
+
+ let IClass = 0b0001;
+ let Inst{27-24} = 0b0111;
+ let Inst{21-20} = r9_2{10-9};
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rs;
+ let Inst{7-1} = r9_2{8-2};
+ }
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV5.td b/lib/Target/Hexagon/HexagonInstrInfoV5.td
index 9da607455811..5674aa3ccd83 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV5.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV5.td
@@ -1,3 +1,42 @@
+//=- HexagonInstrInfoV5.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V5 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XTYPE/MPY
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 0 in
+def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6Imm,
+ [(set I64:$dst,
+ (sra (i64 (add (i64 (sra I64:$src1, u6ImmPred:$src2)), 1)),
+ (i32 1)))], 1>,
+ Requires<[HasV5T]> {
+ bits<6> src2;
+ let Inst{13-8} = src2;
+}
+
+let isCodeGenOnly = 0 in
+def C4_fastcorner9 : T_LOGICAL_2OP<"fastcorner9", 0b000, 0, 0>,
+ Requires<[HasV5T]> {
+ let Inst{13,7,4} = 0b111;
+}
+
+let isCodeGenOnly = 0 in
+def C4_fastcorner9_not : T_LOGICAL_2OP<"!fastcorner9", 0b000, 0, 0>,
+ Requires<[HasV5T]> {
+ let Inst{20,13,7,4} = 0b1111;
+}
+
def SDTHexagonFCONST32 : SDTypeProfile<1, 1, [
SDTCisVT<0, f32>,
SDTCisPtrTy<1>]>;
@@ -37,7 +76,7 @@ def TFRI_f : ALU32_ri<(outs IntRegs:$dst), (ins f32Ext:$src1),
Requires<[HasV5T]>;
let isExtended = 1, opExtendable = 2, isPredicated = 1,
-neverHasSideEffects = 1, validSubTargets = HasV5SubT in
+hasSideEffects = 0, validSubTargets = HasV5SubT in
def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst),
(ins PredRegs:$src1, f32Ext:$src2),
"if ($src1) $dst = #$src2",
@@ -45,13 +84,405 @@ def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst),
Requires<[HasV5T]>;
let isExtended = 1, opExtendable = 2, isPredicated = 1, isPredicatedFalse = 1,
-neverHasSideEffects = 1, validSubTargets = HasV5SubT in
+hasSideEffects = 0, validSubTargets = HasV5SubT in
def TFRI_cNotPt_f : ALU32_ri<(outs IntRegs:$dst),
(ins PredRegs:$src1, f32Ext:$src2),
"if (!$src1) $dst =#$src2",
[]>,
Requires<[HasV5T]>;
+def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+ SDTCisVT<1, i64>]>;
+
+def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>;
+
+let hasNewValue = 1, validSubTargets = HasV5SubT, isCodeGenOnly = 0 in
+def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
+ "$Rd = popcount($Rss)",
+ [(set I32:$Rd, (HexagonPOPCOUNT I64:$Rss))], "", S_2op_tc_2_SLOT23>,
+ Requires<[HasV5T]> {
+ bits<5> Rd;
+ bits<5> Rss;
+
+ let IClass = 0b1000;
+
+ let Inst{27-21} = 0b1000011;
+ let Inst{7-5} = 0b011;
+ let Inst{4-0} = Rd;
+ let Inst{20-16} = Rss;
+ }
+
+let isFP = 1, hasNewValue = 1, opNewValue = 0 in
+class T_MInstFloat <string mnemonic, bits<3> MajOp, bits<3> MinOp>
+ : MInst<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = "#mnemonic#"($Rs, $Rt)", [],
+ "" , M_tc_3or4x_SLOT23 > ,
+ Requires<[HasV5T]> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1011;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rd;
+ }
+
+let isCommutable = 1, isCodeGenOnly = 0 in {
+ def F2_sfadd : T_MInstFloat < "sfadd", 0b000, 0b000>;
+ def F2_sfmpy : T_MInstFloat < "sfmpy", 0b010, 0b000>;
+}
+
+let isCodeGenOnly = 0 in
+def F2_sfsub : T_MInstFloat < "sfsub", 0b000, 0b001>;
+
+let Itinerary = M_tc_3x_SLOT23, isCodeGenOnly = 0 in {
+ def F2_sfmax : T_MInstFloat < "sfmax", 0b100, 0b000>;
+ def F2_sfmin : T_MInstFloat < "sfmin", 0b100, 0b001>;
+}
+
+let isCodeGenOnly = 0 in {
+def F2_sffixupn : T_MInstFloat < "sffixupn", 0b110, 0b000>;
+def F2_sffixupd : T_MInstFloat < "sffixupd", 0b110, 0b001>;
+}
+
+// F2_sfrecipa: Reciprocal approximation for division.
+let isPredicateLate = 1, isFP = 1,
+hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in
+def F2_sfrecipa: MInst <
+ (outs IntRegs:$Rd, PredRegs:$Pe),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd, $Pe = sfrecipa($Rs, $Rt)">,
+ Requires<[HasV5T]> {
+ bits<5> Rd;
+ bits<2> Pe;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1110;
+ let Inst{27-21} = 0b1011111;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = Rt;
+ let Inst{7} = 0b1;
+ let Inst{6-5} = Pe;
+ let Inst{4-0} = Rd;
+ }
+
+// F2_dfcmpeq: Floating point compare for equal.
+let isCompare = 1, isFP = 1 in
+class T_fcmp <string mnemonic, RegisterClass RC, bits<3> MinOp,
+ list<dag> pattern = [] >
+ : ALU64Inst <(outs PredRegs:$dst), (ins RC:$src1, RC:$src2),
+ "$dst = "#mnemonic#"($src1, $src2)", pattern,
+ "" , ALU64_tc_2early_SLOT23 > ,
+ Requires<[HasV5T]> {
+ bits<2> dst;
+ bits<5> src1;
+ bits<5> src2;
+
+ let IClass = 0b1101;
+
+ let Inst{27-21} = 0b0010111;
+ let Inst{20-16} = src1;
+ let Inst{12-8} = src2;
+ let Inst{7-5} = MinOp;
+ let Inst{1-0} = dst;
+ }
+
+class T_fcmp64 <string mnemonic, PatFrag OpNode, bits<3> MinOp>
+ : T_fcmp <mnemonic, DoubleRegs, MinOp,
+ [(set I1:$dst, (OpNode F64:$src1, F64:$src2))]> {
+ let IClass = 0b1101;
+ let Inst{27-21} = 0b0010111;
+}
+
+class T_fcmp32 <string mnemonic, PatFrag OpNode, bits<3> MinOp>
+ : T_fcmp <mnemonic, IntRegs, MinOp,
+ [(set I1:$dst, (OpNode F32:$src1, F32:$src2))]> {
+ let IClass = 0b1100;
+ let Inst{27-21} = 0b0111111;
+}
+
+let isCodeGenOnly = 0 in {
+def F2_dfcmpeq : T_fcmp64<"dfcmp.eq", setoeq, 0b000>;
+def F2_dfcmpgt : T_fcmp64<"dfcmp.gt", setogt, 0b001>;
+def F2_dfcmpge : T_fcmp64<"dfcmp.ge", setoge, 0b010>;
+def F2_dfcmpuo : T_fcmp64<"dfcmp.uo", setuo, 0b011>;
+
+def F2_sfcmpge : T_fcmp32<"sfcmp.ge", setoge, 0b000>;
+def F2_sfcmpuo : T_fcmp32<"sfcmp.uo", setuo, 0b001>;
+def F2_sfcmpeq : T_fcmp32<"sfcmp.eq", setoeq, 0b011>;
+def F2_sfcmpgt : T_fcmp32<"sfcmp.gt", setogt, 0b100>;
+}
+
+// F2 convert template classes:
+let isFP = 1 in
+class F2_RDD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
+ SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
+ string chop ="">
+ : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss),
+ "$Rdd = "#mnemonic#"($Rss)"#chop,
+ [(set RCOut:$Rdd, (Op RCIn:$Rss))], "",
+ S_2op_tc_3or4x_SLOT23> {
+ bits<5> Rdd;
+ bits<5> Rss;
+
+ let IClass = 0b1000;
+
+ let Inst{27-21} = 0b0000111;
+ let Inst{20-16} = Rss;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rdd;
+ }
+
+let isFP = 1 in
+class F2_RDD_RS_CONVERT<string mnemonic, bits<3> MinOp,
+ SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
+ string chop ="">
+ : SInst <(outs DoubleRegs:$Rdd), (ins IntRegs:$Rs),
+ "$Rdd = "#mnemonic#"($Rs)"#chop,
+ [(set RCOut:$Rdd, (Op RCIn:$Rs))], "",
+ S_2op_tc_3or4x_SLOT23> {
+ bits<5> Rdd;
+ bits<5> Rs;
+
+ let IClass = 0b1000;
+
+ let Inst{27-21} = 0b0100100;
+ let Inst{20-16} = Rs;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rdd;
+ }
+
+let isFP = 1, hasNewValue = 1 in
+class F2_RD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
+ SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
+ string chop ="">
+ : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
+ "$Rd = "#mnemonic#"($Rss)"#chop,
+ [(set RCOut:$Rd, (Op RCIn:$Rss))], "",
+ S_2op_tc_3or4x_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rss;
+
+ let IClass = 0b1000;
+
+ let Inst{27-24} = 0b1000;
+ let Inst{23-21} = MinOp;
+ let Inst{20-16} = Rss;
+ let Inst{7-5} = 0b001;
+ let Inst{4-0} = Rd;
+ }
+
+let isFP = 1, hasNewValue = 1 in
+class F2_RD_RS_CONVERT<string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
+ string chop ="">
+ : SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rs),
+ "$Rd = "#mnemonic#"($Rs)"#chop,
+ [(set RCOut:$Rd, (Op RCIn:$Rs))], "",
+ S_2op_tc_3or4x_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+
+ let IClass = 0b1000;
+
+ let Inst{27-24} = 0b1011;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rd;
+ }
+
+// Convert single precision to double precision and vice-versa.
+let isCodeGenOnly = 0 in {
+def F2_conv_sf2df : F2_RDD_RS_CONVERT <"convert_sf2df", 0b000,
+ fextend, F64, F32>;
+
+def F2_conv_df2sf : F2_RD_RSS_CONVERT <"convert_df2sf", 0b000,
+ fround, F32, F64>;
+
+// Convert Integer to Floating Point.
+def F2_conv_d2sf : F2_RD_RSS_CONVERT <"convert_d2sf", 0b010,
+ sint_to_fp, F32, I64>;
+def F2_conv_ud2sf : F2_RD_RSS_CONVERT <"convert_ud2sf", 0b001,
+ uint_to_fp, F32, I64>;
+def F2_conv_uw2sf : F2_RD_RS_CONVERT <"convert_uw2sf", 0b001, 0b000,
+ uint_to_fp, F32, I32>;
+def F2_conv_w2sf : F2_RD_RS_CONVERT <"convert_w2sf", 0b010, 0b000,
+ sint_to_fp, F32, I32>;
+def F2_conv_d2df : F2_RDD_RSS_CONVERT <"convert_d2df", 0b011,
+ sint_to_fp, F64, I64>;
+def F2_conv_ud2df : F2_RDD_RSS_CONVERT <"convert_ud2df", 0b010,
+ uint_to_fp, F64, I64>;
+def F2_conv_uw2df : F2_RDD_RS_CONVERT <"convert_uw2df", 0b001,
+ uint_to_fp, F64, I32>;
+def F2_conv_w2df : F2_RDD_RS_CONVERT <"convert_w2df", 0b010,
+ sint_to_fp, F64, I32>;
+
+// Convert Floating Point to Integer - default.
+def F2_conv_df2uw_chop : F2_RD_RSS_CONVERT <"convert_df2uw", 0b101,
+ fp_to_uint, I32, F64, ":chop">;
+def F2_conv_df2w_chop : F2_RD_RSS_CONVERT <"convert_df2w", 0b111,
+ fp_to_sint, I32, F64, ":chop">;
+def F2_conv_sf2uw_chop : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b001,
+ fp_to_uint, I32, F32, ":chop">;
+def F2_conv_sf2w_chop : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b001,
+ fp_to_sint, I32, F32, ":chop">;
+def F2_conv_df2d_chop : F2_RDD_RSS_CONVERT <"convert_df2d", 0b110,
+ fp_to_sint, I64, F64, ":chop">;
+def F2_conv_df2ud_chop : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b111,
+ fp_to_uint, I64, F64, ":chop">;
+def F2_conv_sf2d_chop : F2_RDD_RS_CONVERT <"convert_sf2d", 0b110,
+ fp_to_sint, I64, F32, ":chop">;
+def F2_conv_sf2ud_chop : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b101,
+ fp_to_uint, I64, F32, ":chop">;
+
+// Convert Floating Point to Integer: non-chopped.
+let AddedComplexity = 20, Predicates = [HasV5T, IEEERndNearV5T] in {
+ def F2_conv_df2d : F2_RDD_RSS_CONVERT <"convert_df2d", 0b000,
+ fp_to_sint, I64, F64>;
+ def F2_conv_df2ud : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b001,
+ fp_to_uint, I64, F64>;
+ def F2_conv_sf2ud : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b011,
+ fp_to_uint, I64, F32>;
+ def F2_conv_sf2d : F2_RDD_RS_CONVERT <"convert_sf2d", 0b100,
+ fp_to_sint, I64, F32>;
+ def F2_conv_df2uw : F2_RD_RSS_CONVERT <"convert_df2uw", 0b011,
+ fp_to_uint, I32, F64>;
+ def F2_conv_df2w : F2_RD_RSS_CONVERT <"convert_df2w", 0b100,
+ fp_to_sint, I32, F64>;
+ def F2_conv_sf2uw : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b000,
+ fp_to_uint, I32, F32>;
+ def F2_conv_sf2w : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b000,
+ fp_to_sint, I32, F32>;
+}
+}
+
+// Fix up radicand.
+let isFP = 1, hasNewValue = 1, isCodeGenOnly = 0 in
+def F2_sffixupr: SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs),
+ "$Rd = sffixupr($Rs)",
+ [], "" , S_2op_tc_3or4x_SLOT23>, Requires<[HasV5T]> {
+ bits<5> Rd;
+ bits<5> Rs;
+
+ let IClass = 0b1000;
+
+ let Inst{27-21} = 0b1011101;
+ let Inst{20-16} = Rs;
+ let Inst{7-5} = 0b000;
+ let Inst{4-0} = Rd;
+ }
+
+// F2_sffma: Floating-point fused multiply add.
+let isFP = 1, hasNewValue = 1 in
+class T_sfmpy_acc <bit isSub, bit isLib>
+ : MInst<(outs IntRegs:$Rx),
+ (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
+ "$Rx "#!if(isSub, "-=","+=")#" sfmpy($Rs, $Rt)"#!if(isLib, ":lib",""),
+ [], "$dst2 = $Rx" , M_tc_3_SLOT23 > ,
+ Requires<[HasV5T]> {
+ bits<5> Rx;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-21} = 0b1111000;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = Rt;
+ let Inst{7} = 0b1;
+ let Inst{6} = isLib;
+ let Inst{5} = isSub;
+ let Inst{4-0} = Rx;
+ }
+
+let isCodeGenOnly = 0 in {
+def F2_sffma: T_sfmpy_acc <0, 0>;
+def F2_sffms: T_sfmpy_acc <1, 0>;
+def F2_sffma_lib: T_sfmpy_acc <0, 1>;
+def F2_sffms_lib: T_sfmpy_acc <1, 1>;
+}
+
+// Floating-point fused multiply add w/ additional scaling (2**pu).
+let isFP = 1, hasNewValue = 1, isCodeGenOnly = 0 in
+def F2_sffma_sc: MInst <
+ (outs IntRegs:$Rx),
+ (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt, PredRegs:$Pu),
+ "$Rx += sfmpy($Rs, $Rt, $Pu):scale" ,
+ [], "$dst2 = $Rx" , M_tc_3_SLOT23 > ,
+ Requires<[HasV5T]> {
+ bits<5> Rx;
+ bits<5> Rs;
+ bits<5> Rt;
+ bits<2> Pu;
+
+ let IClass = 0b1110;
+
+ let Inst{27-21} = 0b1111011;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = Rt;
+ let Inst{7} = 0b1;
+ let Inst{6-5} = Pu;
+ let Inst{4-0} = Rx;
+ }
+
+// Classify floating-point value
+let isFP = 1, isCodeGenOnly = 0 in
+ def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>;
+
+let isFP = 1, isCodeGenOnly = 0 in
+def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5Imm:$u5),
+ "$Pd = dfclass($Rss, #$u5)",
+ [], "" , ALU64_tc_2early_SLOT23 > , Requires<[HasV5T]> {
+ bits<2> Pd;
+ bits<5> Rss;
+ bits<5> u5;
+
+ let IClass = 0b1101;
+ let Inst{27-21} = 0b1100100;
+ let Inst{20-16} = Rss;
+ let Inst{12-10} = 0b000;
+ let Inst{9-5} = u5;
+ let Inst{4-3} = 0b10;
+ let Inst{1-0} = Pd;
+ }
+
+// Instructions to create floating point constant
+let hasNewValue = 1, opNewValue = 0 in
+class T_fimm <string mnemonic, RegisterClass RC, bits<4> RegType, bit isNeg>
+ : ALU64Inst<(outs RC:$dst), (ins u10Imm:$src),
+ "$dst = "#mnemonic#"(#$src)"#!if(isNeg, ":neg", ":pos"),
+ [], "", ALU64_tc_3x_SLOT23>, Requires<[HasV5T]> {
+ bits<5> dst;
+ bits<10> src;
+
+ let IClass = 0b1101;
+ let Inst{27-24} = RegType;
+ let Inst{23} = 0b0;
+ let Inst{22} = isNeg;
+ let Inst{21} = src{9};
+ let Inst{13-5} = src{8-0};
+ let Inst{4-0} = dst;
+ }
+
+let isCodeGenOnly = 0 in {
+def F2_sfimm_p : T_fimm <"sfmake", IntRegs, 0b0110, 0>;
+def F2_sfimm_n : T_fimm <"sfmake", IntRegs, 0b0110, 1>;
+def F2_dfimm_p : T_fimm <"dfmake", DoubleRegs, 0b1001, 0>;
+def F2_dfimm_n : T_fimm <"dfmake", DoubleRegs, 0b1001, 1>;
+}
+
// Convert single precision to double precision and vice-versa.
def CONVERT_sf2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
"$dst = convert_sf2df($src)",
@@ -290,36 +721,36 @@ def : Pat<(i1 (setule (f64 DoubleRegs:$src1), (fpimm:$src2))),
// ne.
def : Pat<(i1 (setone (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
- (i1 (NOT_p (FCMPOEQ32_rr IntRegs:$src1, IntRegs:$src2)))>,
+ (i1 (C2_not (FCMPOEQ32_rr IntRegs:$src1, IntRegs:$src2)))>,
Requires<[HasV5T]>;
def : Pat<(i1 (setone (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
- (i1 (NOT_p (FCMPOEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>,
+ (i1 (C2_not (FCMPOEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>,
Requires<[HasV5T]>;
def : Pat<(i1 (setune (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
- (i1 (NOT_p (FCMPUEQ32_rr IntRegs:$src1, IntRegs:$src2)))>,
+ (i1 (C2_not (FCMPUEQ32_rr IntRegs:$src1, IntRegs:$src2)))>,
Requires<[HasV5T]>;
def : Pat<(i1 (setune (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
- (i1 (NOT_p (FCMPUEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>,
+ (i1 (C2_not (FCMPUEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>,
Requires<[HasV5T]>;
def : Pat<(i1 (setone (f32 IntRegs:$src1), (fpimm:$src2))),
- (i1 (NOT_p (FCMPOEQ32_rr IntRegs:$src1, (f32 (TFRI_f fpimm:$src2)))))>,
+ (i1 (C2_not (FCMPOEQ32_rr IntRegs:$src1, (f32 (TFRI_f fpimm:$src2)))))>,
Requires<[HasV5T]>;
def : Pat<(i1 (setone (f64 DoubleRegs:$src1), (fpimm:$src2))),
- (i1 (NOT_p (FCMPOEQ64_rr DoubleRegs:$src1,
+ (i1 (C2_not (FCMPOEQ64_rr DoubleRegs:$src1,
(f64 (CONST64_Float_Real fpimm:$src2)))))>,
Requires<[HasV5T]>;
def : Pat<(i1 (setune (f32 IntRegs:$src1), (fpimm:$src2))),
- (i1 (NOT_p (FCMPUEQ32_rr IntRegs:$src1, (f32 (TFRI_f fpimm:$src2)))))>,
+ (i1 (C2_not (FCMPUEQ32_rr IntRegs:$src1, (f32 (TFRI_f fpimm:$src2)))))>,
Requires<[HasV5T]>;
def : Pat<(i1 (setune (f64 DoubleRegs:$src1), (fpimm:$src2))),
- (i1 (NOT_p (FCMPUEQ64_rr DoubleRegs:$src1,
+ (i1 (C2_not (FCMPUEQ64_rr DoubleRegs:$src1,
(f64 (CONST64_Float_Real fpimm:$src2)))))>,
Requires<[HasV5T]>;
@@ -458,30 +889,21 @@ def CONVERT_sf2ud_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
def : Pat <(i32 (bitconvert (f32 IntRegs:$src))),
- (i32 (TFR IntRegs:$src))>,
+ (i32 (A2_tfr IntRegs:$src))>,
Requires<[HasV5T]>;
def : Pat <(f32 (bitconvert (i32 IntRegs:$src))),
- (f32 (TFR IntRegs:$src))>,
+ (f32 (A2_tfr IntRegs:$src))>,
Requires<[HasV5T]>;
def : Pat <(i64 (bitconvert (f64 DoubleRegs:$src))),
- (i64 (TFR64 DoubleRegs:$src))>,
+ (i64 (A2_tfrp DoubleRegs:$src))>,
Requires<[HasV5T]>;
def : Pat <(f64 (bitconvert (i64 DoubleRegs:$src))),
- (f64 (TFR64 DoubleRegs:$src))>,
+ (f64 (A2_tfrp DoubleRegs:$src))>,
Requires<[HasV5T]>;
-// Floating point fused multiply-add.
-def FMADD_dp : ALU64_acc<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- "$dst += dfmpy($src2, $src3)",
- [(set (f64 DoubleRegs:$dst),
- (fma DoubleRegs:$src2, DoubleRegs:$src3, DoubleRegs:$src1))],
- "$src1 = $dst">,
- Requires<[HasV5T]>;
-
def FMADD_sp : ALU64_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"$dst += sfmpy($src2, $src3)",
@@ -492,15 +914,6 @@ def FMADD_sp : ALU64_acc<(outs IntRegs:$dst),
// Floating point max/min.
-let AddedComplexity = 100 in
-def FMAX_dp : ALU64_rr<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, DoubleRegs:$src2),
- "$dst = dfmax($src1, $src2)",
- [(set DoubleRegs:$dst, (f64 (select (i1 (setolt DoubleRegs:$src2,
- DoubleRegs:$src1)),
- DoubleRegs:$src1,
- DoubleRegs:$src2)))]>,
- Requires<[HasV5T]>;
let AddedComplexity = 100 in
def FMAX_sp : ALU64_rr<(outs IntRegs:$dst),
@@ -513,16 +926,6 @@ def FMAX_sp : ALU64_rr<(outs IntRegs:$dst),
Requires<[HasV5T]>;
let AddedComplexity = 100 in
-def FMIN_dp : ALU64_rr<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, DoubleRegs:$src2),
- "$dst = dfmin($src1, $src2)",
- [(set DoubleRegs:$dst, (f64 (select (i1 (setogt DoubleRegs:$src2,
- DoubleRegs:$src1)),
- DoubleRegs:$src1,
- DoubleRegs:$src2)))]>,
- Requires<[HasV5T]>;
-
-let AddedComplexity = 100 in
def FMIN_sp : ALU64_rr<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = sfmin($src1, $src2)",
@@ -615,19 +1018,19 @@ def : Pat <(i32 (fp_to_sint (f64 DoubleRegs:$src1))),
Requires<[HasV5T]>;
def : Pat <(fabs (f32 IntRegs:$src1)),
- (CLRBIT_31 (f32 IntRegs:$src1), 31)>,
+ (S2_clrbit_i (f32 IntRegs:$src1), 31)>,
Requires<[HasV5T]>;
def : Pat <(fneg (f32 IntRegs:$src1)),
- (TOGBIT_31 (f32 IntRegs:$src1), 31)>,
+ (S2_togglebit_i (f32 IntRegs:$src1), 31)>,
Requires<[HasV5T]>;
/*
def : Pat <(fabs (f64 DoubleRegs:$src1)),
- (CLRBIT_31 (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>,
+ (S2_clrbit_i (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>,
Requires<[HasV5T]>;
def : Pat <(fabs (f64 DoubleRegs:$src1)),
- (CLRBIT_31 (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>,
+ (S2_clrbit_i (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>,
Requires<[HasV5T]>;
*/
diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td
index 99f59d5ea669..7d3f9d92cbd4 100644
--- a/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -1843,13 +1843,18 @@ class si_MInst_didi<string opc, Intrinsic IntID>
!strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
[(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class T_RI_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat<(IntID (i32 IntRegs:$Rs), imm:$It),
+ (MI IntRegs:$Rs, imm:$It)>;
+
//
// LDInst classes.
//
-let mayLoad = 1, neverHasSideEffects = 1 in
+let mayLoad = 1, hasSideEffects = 0 in
class di_LDInstPI_diu4<string opc, Intrinsic IntID>
: LDInstPI<(outs IntRegs:$dst, DoubleRegs:$dst2),
- (ins IntRegs:$src1, IntRegs:$src2, CRRegs:$src3, s4Imm:$offset),
+ (ins IntRegs:$src1, IntRegs:$src2, CtrRegs:$src3, s4Imm:$offset),
"$dst2 = memd($src1++#$offset:circ($src3))",
[],
"$src1 = $dst">;
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
index 2788101d5a66..df89378603a4 100644
--- a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
+++ b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
@@ -13,13 +13,13 @@
//
def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2),
(i64
- (COMBINE_rr
+ (A2_combinew
(HEXAGON_M2_maci
(HEXAGON_M2_maci
(i32
(EXTRACT_SUBREG
(i64
- (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
+ (M2_dpmpyuu_s0 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
subreg_loreg)),
(i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
subreg_loreg)))),
@@ -31,7 +31,8 @@ def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2),
(i32
(EXTRACT_SUBREG
(i64
- (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
+ (M2_dpmpyuu_s0
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
(i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
subreg_loreg)))), subreg_loreg))))>;
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
index dd28ebb57231..77b148b9f2bd 100644
--- a/lib/Target/Hexagon/HexagonIntrinsicsV4.td
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
@@ -217,12 +217,13 @@ def Hexagon_A4_combineri : di_ALU32_sis8 <"combine", int_hexagon_A4_combineri>;
// ALU32 / PRED / Conditional Sign Extend.
// ALU32 / PRED / Conditional Zero Extend.
// ALU32 / PRED / Compare.
-def Hexagon_C4_cmpneq : qi_neg_ALU32_sisi <"cmp.eq", int_hexagon_C4_cmpneq>;
-def Hexagon_C4_cmpneqi : qi_neg_ALU32_sis10 <"cmp.eq", int_hexagon_C4_cmpneqi>;
-def Hexagon_C4_cmplte : qi_neg_ALU32_sisi <"cmp.gt", int_hexagon_C4_cmplte>;
def Hexagon_C4_cmpltei : qi_neg_ALU32_sis10 <"cmp.gt", int_hexagon_C4_cmpltei>;
+def Hexagon_C4_cmplte : qi_neg_ALU32_sisi <"cmp.gt", int_hexagon_C4_cmplte>;
def Hexagon_C4_cmplteu : qi_neg_ALU32_sisi <"cmp.gtu",int_hexagon_C4_cmplteu>;
-def Hexagon_C4_cmplteui: qi_neg_ALU32_siu9 <"cmp.gtu",int_hexagon_C4_cmplteui>;
+
+def: T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi>;
+def: T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei>;
+def: T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui>;
// ALU32 / PRED / cmpare To General Register.
def Hexagon_A4_rcmpneq : si_neg_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpneq>;
diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp
index 5e4346d40213..3f5d6d84a108 100644
--- a/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -42,7 +42,6 @@ static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol,
void llvm::HexagonLowerToMC(const MachineInstr* MI, HexagonMCInst& MCI,
HexagonAsmPrinter& AP) {
MCI.setOpcode(MI->getOpcode());
- MCI.setDesc(MI->getDesc());
for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) {
const MachineOperand &MO = MI->getOperand(i);
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index d799bdbd5f19..cb18df6ed198 100644
--- a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HexagonMACHINEFUNCTIONINFO_H
-#define HexagonMACHINEFUNCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H
#include "llvm/CodeGen/MachineFunction.h"
#include <map>
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 6fcaa2057404..97c626fdf7af 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -145,7 +145,7 @@ void VLIWMachineScheduler::schedule() {
<< "********** MI Converging Scheduling VLIW BB#" << BB->getNumber()
<< " " << BB->getName()
<< " in_func " << BB->getParent()->getFunction()->getName()
- << " at loop depth " << MLI.getLoopDepth(BB)
+ << " at loop depth " << MLI->getLoopDepth(BB)
<< " \n");
buildDAGWithRegPressure();
@@ -208,8 +208,12 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
const TargetMachine &TM = DAG->MF.getTarget();
delete Top.HazardRec;
delete Bot.HazardRec;
- Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
- Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+ Top.HazardRec =
+ TM.getSubtargetImpl()->getInstrInfo()->CreateTargetMIHazardRecognizer(
+ Itin, DAG);
+ Bot.HazardRec =
+ TM.getSubtargetImpl()->getInstrInfo()->CreateTargetMIHazardRecognizer(
+ Itin, DAG);
delete Top.ResourceModel;
delete Bot.ResourceModel;
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
index 8c4108609606..1e023c32bb8c 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HEXAGONASMPRINTER_H
-#define HEXAGONASMPRINTER_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINESCHEDULER_H
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -56,7 +56,9 @@ class VLIWResourceModel {
public:
VLIWResourceModel(const TargetMachine &TM, const TargetSchedModel *SM) :
SchedModel(SM), TotalPackets(0) {
- ResourcesModel = TM.getInstrInfo()->CreateTargetScheduleState(&TM, nullptr);
+ ResourcesModel =
+ TM.getSubtargetImpl()->getInstrInfo()->CreateTargetScheduleState(
+ *TM.getSubtargetImpl());
// This hard requirement could be relaxed,
// but for now do not let it proceed.
@@ -99,7 +101,7 @@ public:
/// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
/// time to do some work.
- virtual void schedule() override;
+ void schedule() override;
/// Perform platform-specific DAG postprocessing.
void postprocessDAG();
};
@@ -207,15 +209,15 @@ public:
: DAG(nullptr), SchedModel(nullptr), Top(TopQID, "TopQ"),
Bot(BotQID, "BotQ") {}
- virtual void initialize(ScheduleDAGMI *dag) override;
+ void initialize(ScheduleDAGMI *dag) override;
- virtual SUnit *pickNode(bool &IsTopNode) override;
+ SUnit *pickNode(bool &IsTopNode) override;
- virtual void schedNode(SUnit *SU, bool IsTopNode) override;
+ void schedNode(SUnit *SU, bool IsTopNode) override;
- virtual void releaseTopNode(SUnit *SU) override;
+ void releaseTopNode(SUnit *SU) override;
- virtual void releaseBottomNode(SUnit *SU) override;
+ void releaseBottomNode(SUnit *SU) override;
unsigned ReportPackets() {
return Top.ResourceModel->getTotalPackets() +
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index b7c03a72779f..7edba92e7e0d 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -176,7 +176,7 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA,
return false;
// if call in path, bail out.
- if (MII->getOpcode() == Hexagon::CALLv3)
+ if (MII->getOpcode() == Hexagon::J2_call)
return false;
// if NVJ is running prior to RA, do the following checks.
@@ -199,8 +199,7 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA,
// of registers by individual passes in the backend. At this time,
// we don't know the scope of usage and definitions of these
// instructions.
- if (MII->getOpcode() == Hexagon::TFR_condset_rr ||
- MII->getOpcode() == Hexagon::TFR_condset_ii ||
+ if (MII->getOpcode() == Hexagon::TFR_condset_ii ||
MII->getOpcode() == Hexagon::TFR_condset_ri ||
MII->getOpcode() == Hexagon::TFR_condset_ir ||
MII->getOpcode() == Hexagon::LDriw_pred ||
@@ -228,8 +227,8 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
int64_t v = MI->getOperand(2).getImm();
if (!(isUInt<5>(v) ||
- ((MI->getOpcode() == Hexagon::CMPEQri ||
- MI->getOpcode() == Hexagon::CMPGTri) &&
+ ((MI->getOpcode() == Hexagon::C2_cmpeqi ||
+ MI->getOpcode() == Hexagon::C2_cmpgti) &&
(v == -1))))
return false;
}
@@ -299,11 +298,11 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
taken = true;
switch (MI->getOpcode()) {
- case Hexagon::CMPEQrr:
+ case Hexagon::C2_cmpeq:
return taken ? Hexagon::CMPEQrr_t_Jumpnv_t_V4
: Hexagon::CMPEQrr_t_Jumpnv_nt_V4;
- case Hexagon::CMPEQri: {
+ case Hexagon::C2_cmpeqi: {
if (reg >= 0)
return taken ? Hexagon::CMPEQri_t_Jumpnv_t_V4
: Hexagon::CMPEQri_t_Jumpnv_nt_V4;
@@ -312,7 +311,7 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
: Hexagon::CMPEQn1_t_Jumpnv_nt_V4;
}
- case Hexagon::CMPGTrr: {
+ case Hexagon::C2_cmpgt: {
if (secondRegNewified)
return taken ? Hexagon::CMPLTrr_t_Jumpnv_t_V4
: Hexagon::CMPLTrr_t_Jumpnv_nt_V4;
@@ -321,7 +320,7 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
: Hexagon::CMPGTrr_t_Jumpnv_nt_V4;
}
- case Hexagon::CMPGTri: {
+ case Hexagon::C2_cmpgti: {
if (reg >= 0)
return taken ? Hexagon::CMPGTri_t_Jumpnv_t_V4
: Hexagon::CMPGTri_t_Jumpnv_nt_V4;
@@ -330,7 +329,7 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
: Hexagon::CMPGTn1_t_Jumpnv_nt_V4;
}
- case Hexagon::CMPGTUrr: {
+ case Hexagon::C2_cmpgtu: {
if (secondRegNewified)
return taken ? Hexagon::CMPLTUrr_t_Jumpnv_t_V4
: Hexagon::CMPLTUrr_t_Jumpnv_nt_V4;
@@ -339,7 +338,7 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
: Hexagon::CMPGTUrr_t_Jumpnv_nt_V4;
}
- case Hexagon::CMPGTUri:
+ case Hexagon::C2_cmpgtui:
return taken ? Hexagon::CMPGTUri_t_Jumpnv_t_V4
: Hexagon::CMPGTUri_t_Jumpnv_nt_V4;
@@ -362,9 +361,9 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
LiveVariables &LVs = getAnalysis<LiveVariables>();
#endif
- QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().getInstrInfo());
- QRI =
- static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo());
+ QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ QRI = static_cast<const HexagonRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
if (!QRI->Subtarget.hasV4TOps() ||
@@ -413,12 +412,12 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n");
if (!foundJump &&
- (MI->getOpcode() == Hexagon::JMP_t ||
- MI->getOpcode() == Hexagon::JMP_f ||
- MI->getOpcode() == Hexagon::JMP_tnew_t ||
- MI->getOpcode() == Hexagon::JMP_tnew_nt ||
- MI->getOpcode() == Hexagon::JMP_fnew_t ||
- MI->getOpcode() == Hexagon::JMP_fnew_nt)) {
+ (MI->getOpcode() == Hexagon::J2_jumpt ||
+ MI->getOpcode() == Hexagon::J2_jumpf ||
+ MI->getOpcode() == Hexagon::J2_jumptnewpt ||
+ MI->getOpcode() == Hexagon::J2_jumptnew ||
+ MI->getOpcode() == Hexagon::J2_jumpfnewpt ||
+ MI->getOpcode() == Hexagon::J2_jumpfnew)) {
// This is where you would insert your compare and
// instr that feeds compare
jmpPos = MII;
@@ -454,9 +453,9 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
jmpTarget = MI->getOperand(1).getMBB();
foundJump = true;
- if (MI->getOpcode() == Hexagon::JMP_f ||
- MI->getOpcode() == Hexagon::JMP_fnew_t ||
- MI->getOpcode() == Hexagon::JMP_fnew_nt) {
+ if (MI->getOpcode() == Hexagon::J2_jumpf ||
+ MI->getOpcode() == Hexagon::J2_jumpfnewpt ||
+ MI->getOpcode() == Hexagon::J2_jumpfnew) {
invertPredicate = true;
}
continue;
@@ -545,7 +544,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
if (isSecondOpReg) {
// In case of CMPLT, or CMPLTU, or EQ with the second register
// to newify, swap the operands.
- if (cmpInstr->getOpcode() == Hexagon::CMPEQrr &&
+ if (cmpInstr->getOpcode() == Hexagon::C2_cmpeq &&
feederReg == (unsigned) cmpOp2) {
unsigned tmp = cmpReg1;
bool tmpIsKill = MO1IsKill;
@@ -612,8 +611,8 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
.addReg(cmpOp2, getKillRegState(MO2IsKill))
.addMBB(jmpTarget);
- else if ((cmpInstr->getOpcode() == Hexagon::CMPEQri ||
- cmpInstr->getOpcode() == Hexagon::CMPGTri) &&
+ else if ((cmpInstr->getOpcode() == Hexagon::C2_cmpeqi ||
+ cmpInstr->getOpcode() == Hexagon::C2_cmpgti) &&
cmpOp2 == -1 )
// Corresponding new-value compare jump instructions don't have the
// operand for -1 immediate value.
diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td
index c79d78f21080..5a6de0ae2746 100644
--- a/lib/Target/Hexagon/HexagonOperands.td
+++ b/lib/Target/Hexagon/HexagonOperands.td
@@ -39,6 +39,7 @@ let PrintMethod = "printImmOperand" in {
def u16_0Imm : Operand<i32>;
def u16_1Imm : Operand<i32>;
def u16_2Imm : Operand<i32>;
+ def u16_3Imm : Operand<i32>;
def u11_3Imm : Operand<i32>;
def u10Imm : Operand<i32>;
def u9Imm : Operand<i32>;
@@ -258,6 +259,13 @@ def u16_s8ImmPred : PatLeaf<(i32 imm), [{
return isShiftedUInt<16,8>(v);
}]>;
+def u11_3ImmPred : PatLeaf<(i32 imm), [{
+ // True if the immediate fits in a 14-bit unsigned field, and the lowest
+ // three bits are 0.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<11,3>(v);
+}]>;
+
def u9ImmPred : PatLeaf<(i32 imm), [{
// u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned
// field.
@@ -800,6 +808,12 @@ def u6_3ExtPred : PatLeaf<(i32 imm), [{
}
}]>;
+
+// This complex pattern exists only to create a machine instruction operand
+// of type "frame index". There doesn't seem to be a way to do that directly
+// in the patterns.
+def AddrFI : ComplexPattern<i32, 1, "SelectAddrFI", [frameindex], []>;
+
// Addressing modes.
def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
@@ -856,3 +870,12 @@ def symbolHi32 : Operand<i32> {
def symbolLo32 : Operand<i32> {
let PrintMethod = "printSymbolLo";
}
+
+// Return true if for a 32 to 64-bit sign-extended load.
+def is_sext_i32 : PatLeaf<(i64 DoubleRegs:$src1), [{
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
+ if (!LD)
+ return false;
+ return LD->getExtensionType() == ISD::SEXTLOAD &&
+ LD->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index 48b61590853a..e9b2ef6b3911 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -111,10 +111,8 @@ INITIALIZE_PASS(HexagonPeephole, "hexagon-peephole", "Hexagon Peephole",
false, false)
bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
- QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().
- getInstrInfo());
- QRI = static_cast<const HexagonRegisterInfo *>(MF.getTarget().
- getRegisterInfo());
+ QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ QRI = MF.getTarget().getSubtarget<HexagonSubtarget>().getRegisterInfo();
MRI = &MF.getRegInfo();
DenseMap<unsigned, unsigned> PeepholeMap;
@@ -135,7 +133,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *MI = MII;
// Look for sign extends:
// %vreg170<def> = SXTW %vreg166
- if (!DisableOptSZExt && MI->getOpcode() == Hexagon::SXTW) {
+ if (!DisableOptSZExt && MI->getOpcode() == Hexagon::A2_sxtw) {
assert (MI->getNumOperands() == 2);
MachineOperand &Dst = MI->getOperand(0);
MachineOperand &Src = MI->getOperand(1);
@@ -154,7 +152,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
// Look for %vreg170<def> = COMBINE_ir_V4 (0, %vreg169)
// %vreg170:DoublRegs, %vreg169:IntRegs
if (!DisableOptExtTo64 &&
- MI->getOpcode () == Hexagon::COMBINE_Ir_V4) {
+ MI->getOpcode () == Hexagon::A4_combineir) {
assert (MI->getNumOperands() == 3);
MachineOperand &Dst = MI->getOperand(0);
MachineOperand &Src1 = MI->getOperand(1);
@@ -171,7 +169,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
// %vregIntReg = COPY %vregDoubleReg1:subreg_loreg.
// and convert into
// %vregIntReg = COPY %vregDoubleReg0:subreg_hireg.
- if (MI->getOpcode() == Hexagon::LSRd_ri) {
+ if (MI->getOpcode() == Hexagon::S2_lsr_i_p) {
assert(MI->getNumOperands() == 3);
MachineOperand &Dst = MI->getOperand(0);
MachineOperand &Src1 = MI->getOperand(1);
@@ -186,7 +184,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
// Look for P=NOT(P).
if (!DisablePNotP &&
- (MI->getOpcode() == Hexagon::NOT_p)) {
+ (MI->getOpcode() == Hexagon::C2_not)) {
assert (MI->getNumOperands() == 2);
MachineOperand &Dst = MI->getOperand(0);
MachineOperand &Src = MI->getOperand(1);
@@ -271,10 +269,9 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
unsigned PR = 1, S1 = 2, S2 = 3; // Operand indices.
switch (Op) {
- case Hexagon::TFR_condset_rr:
+ case Hexagon::C2_mux:
+ case Hexagon::C2_muxii:
case Hexagon::TFR_condset_ii:
- case Hexagon::MUX_ii:
- case Hexagon::MUX_rr:
NewOp = Op;
break;
case Hexagon::TFR_condset_ri:
@@ -283,11 +280,11 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
case Hexagon::TFR_condset_ir:
NewOp = Hexagon::TFR_condset_ri;
break;
- case Hexagon::MUX_ri:
- NewOp = Hexagon::MUX_ir;
+ case Hexagon::C2_muxri:
+ NewOp = Hexagon::C2_muxir;
break;
- case Hexagon::MUX_ir:
- NewOp = Hexagon::MUX_ri;
+ case Hexagon::C2_muxir:
+ NewOp = Hexagon::C2_muxri;
break;
}
if (NewOp) {
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index fb466d3b5908..a64c9df9a047 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -128,12 +128,12 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Addressable stack objects are accessed using neg. offsets from %fp.
MachineFunction &MF = *MI.getParent()->getParent();
const HexagonInstrInfo &TII =
- *static_cast<const HexagonInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned FrameReg = getFrameRegister(MF);
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (!TFI->hasFP(MF)) {
// We will not reserve space on the stack for the lr and fp registers.
Offset -= 2 * Hexagon_WordSize;
@@ -159,15 +159,15 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
//
// r0 = add(r30, #10000)
// r0 = memw(r0)
- if ( (MI.getOpcode() == Hexagon::LDriw) ||
- (MI.getOpcode() == Hexagon::LDrid) ||
- (MI.getOpcode() == Hexagon::LDrih) ||
- (MI.getOpcode() == Hexagon::LDriuh) ||
- (MI.getOpcode() == Hexagon::LDrib) ||
- (MI.getOpcode() == Hexagon::LDriub) ||
+ if ( (MI.getOpcode() == Hexagon::L2_loadri_io) ||
+ (MI.getOpcode() == Hexagon::L2_loadrd_io) ||
+ (MI.getOpcode() == Hexagon::L2_loadrh_io) ||
+ (MI.getOpcode() == Hexagon::L2_loadruh_io) ||
+ (MI.getOpcode() == Hexagon::L2_loadrb_io) ||
+ (MI.getOpcode() == Hexagon::L2_loadrub_io) ||
(MI.getOpcode() == Hexagon::LDriw_f) ||
(MI.getOpcode() == Hexagon::LDrid_f)) {
- unsigned dstReg = (MI.getOpcode() == Hexagon::LDrid) ?
+ unsigned dstReg = (MI.getOpcode() == Hexagon::L2_loadrd_io) ?
getSubReg(MI.getOperand(0).getReg(), Hexagon::subreg_loreg) :
MI.getOperand(0).getReg();
@@ -176,7 +176,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::ADD_rr),
+ TII.get(Hexagon::A2_add),
dstReg).addReg(FrameReg).addReg(dstReg);
} else {
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
@@ -186,11 +186,10 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true);
MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
- } else if ((MI.getOpcode() == Hexagon::STriw_indexed) ||
- (MI.getOpcode() == Hexagon::STriw) ||
- (MI.getOpcode() == Hexagon::STrid) ||
- (MI.getOpcode() == Hexagon::STrih) ||
- (MI.getOpcode() == Hexagon::STrib) ||
+ } else if ((MI.getOpcode() == Hexagon::S2_storeri_io) ||
+ (MI.getOpcode() == Hexagon::S2_storerd_io) ||
+ (MI.getOpcode() == Hexagon::S2_storerh_io) ||
+ (MI.getOpcode() == Hexagon::S2_storerb_io) ||
(MI.getOpcode() == Hexagon::STrid_f) ||
(MI.getOpcode() == Hexagon::STriw_f)) {
// For stores, we need a reserved register. Change
@@ -205,7 +204,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::ADD_rr),
+ TII.get(Hexagon::A2_add),
resReg).addReg(FrameReg).addReg(resReg);
} else {
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
@@ -237,7 +236,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(Hexagon::CONST32_Int_Real), ResReg).addImm(Offset);
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::ADD_rr), ResReg).addReg(FrameReg).
+ TII.get(Hexagon::A2_add), ResReg).addReg(FrameReg).
addReg(ResReg);
MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false,
true);
@@ -256,7 +255,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::ADD_rr),
+ TII.get(Hexagon::A2_add),
dstReg).addReg(FrameReg).addReg(dstReg);
// Can we delete MI??? r2 = add (r2, #0).
MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true);
@@ -278,7 +277,7 @@ unsigned HexagonRegisterInfo::getRARegister() const {
unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction
&MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (TFI->hasFP(MF)) {
return Hexagon::R30;
}
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index 648b4af9e957..a83b5026467a 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HexagonREGISTERINFO_H
-#define HexagonREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONREGISTERINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONREGISTERINFO_H
#include "llvm/MC/MachineLocation.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td
index 8ea1b7e75db7..decd94722da1 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -13,90 +13,78 @@
let Namespace = "Hexagon" in {
- class HexagonReg<string n> : Register<n> {
+ class HexagonReg<bits<5> num, string n, list<string> alt = [],
+ list<Register> alias = []> : Register<n> {
field bits<5> Num;
+ let Aliases = alias;
+ let HWEncoding{4-0} = num;
}
- class HexagonDoubleReg<string n, list<Register> subregs> :
+ class HexagonDoubleReg<bits<5> num, string n, list<Register> subregs,
+ list<string> alt = []> :
RegisterWithSubRegs<n, subregs> {
field bits<5> Num;
+
+ let AltNames = alt;
+ let HWEncoding{4-0} = num;
}
// Registers are identified with 5-bit ID numbers.
// Ri - 32-bit integer registers.
- class Ri<bits<5> num, string n> : HexagonReg<n> {
+ class Ri<bits<5> num, string n, list<string> alt = []> : HexagonReg<num, n, alt> {
let Num = num;
}
// Rf - 32-bit floating-point registers.
- class Rf<bits<5> num, string n> : HexagonReg<n> {
+ class Rf<bits<5> num, string n> : HexagonReg<num, n> {
let Num = num;
}
// Rd - 64-bit registers.
class Rd<bits<5> num, string n, list<Register> subregs> :
- HexagonDoubleReg<n, subregs> {
+ HexagonDoubleReg<num, n, subregs> {
let Num = num;
let SubRegs = subregs;
}
// Rp - predicate registers
- class Rp<bits<5> num, string n> : HexagonReg<n> {
+ class Rp<bits<5> num, string n> : HexagonReg<num, n> {
let Num = num;
}
// Rc - control registers
- class Rc<bits<5> num, string n> : HexagonReg<n> {
+ class Rc<bits<5> num, string n,
+ list<string> alt = [], list<Register> alias = []> :
+ HexagonReg<num, n, alt, alias> {
+ let Num = num;
+ }
+
+ // Rcc - 64-bit control registers.
+ class Rcc<bits<5> num, string n, list<Register> subregs,
+ list<string> alt = []> :
+ HexagonDoubleReg<num, n, subregs, alt> {
let Num = num;
+ let SubRegs = subregs;
}
- // Rj - aliased integer registers
- class Rj<string n, Ri R>: HexagonReg<n> {
- let Num = R.Num;
- let Aliases = [R];
+ // Mx - address modifier registers
+ class Mx<bits<1> num, string n> : HexagonReg<{0b0000, num}, n> {
+ let Num = !cast<bits<5>>(num);
}
def subreg_loreg : SubRegIndex<32>;
def subreg_hireg : SubRegIndex<32, 32>;
+ def subreg_overflow : SubRegIndex<1, 0>;
// Integer registers.
- def R0 : Ri< 0, "r0">, DwarfRegNum<[0]>;
- def R1 : Ri< 1, "r1">, DwarfRegNum<[1]>;
- def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>;
- def R3 : Ri< 3, "r3">, DwarfRegNum<[3]>;
- def R4 : Ri< 4, "r4">, DwarfRegNum<[4]>;
- def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>;
- def R6 : Ri< 6, "r6">, DwarfRegNum<[6]>;
- def R7 : Ri< 7, "r7">, DwarfRegNum<[7]>;
- def R8 : Ri< 8, "r8">, DwarfRegNum<[8]>;
- def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>;
- def R10 : Ri<10, "r10">, DwarfRegNum<[10]>;
- def R11 : Ri<11, "r11">, DwarfRegNum<[11]>;
- def R12 : Ri<12, "r12">, DwarfRegNum<[12]>;
- def R13 : Ri<13, "r13">, DwarfRegNum<[13]>;
- def R14 : Ri<14, "r14">, DwarfRegNum<[14]>;
- def R15 : Ri<15, "r15">, DwarfRegNum<[15]>;
- def R16 : Ri<16, "r16">, DwarfRegNum<[16]>;
- def R17 : Ri<17, "r17">, DwarfRegNum<[17]>;
- def R18 : Ri<18, "r18">, DwarfRegNum<[18]>;
- def R19 : Ri<19, "r19">, DwarfRegNum<[19]>;
- def R20 : Ri<20, "r20">, DwarfRegNum<[20]>;
- def R21 : Ri<21, "r21">, DwarfRegNum<[21]>;
- def R22 : Ri<22, "r22">, DwarfRegNum<[22]>;
- def R23 : Ri<23, "r23">, DwarfRegNum<[23]>;
- def R24 : Ri<24, "r24">, DwarfRegNum<[24]>;
- def R25 : Ri<25, "r25">, DwarfRegNum<[25]>;
- def R26 : Ri<26, "r26">, DwarfRegNum<[26]>;
- def R27 : Ri<27, "r27">, DwarfRegNum<[27]>;
- def R28 : Ri<28, "r28">, DwarfRegNum<[28]>;
- def R29 : Ri<29, "r29">, DwarfRegNum<[29]>;
- def R30 : Ri<30, "r30">, DwarfRegNum<[30]>;
- def R31 : Ri<31, "r31">, DwarfRegNum<[31]>;
-
- def SP : Rj<"sp", R29>, DwarfRegNum<[29]>;
- def FP : Rj<"fp", R30>, DwarfRegNum<[30]>;
- def LR : Rj<"lr", R31>, DwarfRegNum<[31]>;
+ foreach i = 0-28 in {
+ def R#i : Ri<i, "r"#i>, DwarfRegNum<[i]>;
+ }
+
+ def R29 : Ri<29, "r29", ["sp"]>, DwarfRegNum<[29]>;
+ def R30 : Ri<30, "r30", ["fp"]>, DwarfRegNum<[30]>;
+ def R31 : Ri<31, "r31", ["lr"]>, DwarfRegNum<[31]>;
// Aliases of the R* registers used to hold 64-bit int values (doubles).
let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in {
@@ -124,20 +112,52 @@ let Namespace = "Hexagon" in {
def P2 : Rp<2, "p2">, DwarfRegNum<[65]>;
def P3 : Rp<3, "p3">, DwarfRegNum<[66]>;
- // Control registers.
- def SA0 : Rc<0, "sa0">, DwarfRegNum<[67]>;
- def LC0 : Rc<1, "lc0">, DwarfRegNum<[68]>;
-
- def SA1 : Rc<2, "sa1">, DwarfRegNum<[69]>;
- def LC1 : Rc<3, "lc1">, DwarfRegNum<[70]>;
+ // Modifier registers.
+ // C6 and C7 can also be M0 and M1, but register names must be unique, even
+ // if belonging to different register classes.
+ def M0 : Mx<0, "m0">, DwarfRegNum<[72]>;
+ def M1 : Mx<1, "m1">, DwarfRegNum<[73]>;
- def M0 : Rc<6, "m0">, DwarfRegNum<[71]>;
- def M1 : Rc<7, "m1">, DwarfRegNum<[72]>;
+ // Fake register to represent USR.OVF bit. Artihmetic/saturating instruc-
+ // tions modify this bit, and multiple such instructions are allowed in the
+ // same packet. We need to ignore output dependencies on this bit, but not
+ // on the entire USR.
+ def USR_OVF : Rc<?, "usr.ovf">;
- def PC : Rc<9, "pc">, DwarfRegNum<[32]>; // is the Dwarf number correct?
- def GP : Rc<11, "gp">, DwarfRegNum<[33]>; // is the Dwarf number correct?
+ // Control registers.
+ def SA0 : Rc<0, "sa0", ["c0"]>, DwarfRegNum<[67]>;
+ def LC0 : Rc<1, "lc0", ["c1"]>, DwarfRegNum<[68]>;
+ def SA1 : Rc<2, "sa1", ["c2"]>, DwarfRegNum<[69]>;
+ def LC1 : Rc<3, "lc1", ["c3"]>, DwarfRegNum<[70]>;
+ def P3_0 : Rc<4, "p3:0", ["c4"], [P0, P1, P2, P3]>,
+ DwarfRegNum<[71]>;
+ def C6 : Rc<6, "c6", [], [M0]>, DwarfRegNum<[72]>;
+ def C7 : Rc<7, "c7", [], [M1]>, DwarfRegNum<[73]>;
+
+ def USR : Rc<8, "usr", ["c8"]>, DwarfRegNum<[74]> {
+ let SubRegIndices = [subreg_overflow];
+ let SubRegs = [USR_OVF];
+ }
+ def PC : Rc<9, "pc">, DwarfRegNum<[75]>;
+ def UGP : Rc<10, "ugp", ["c10"]>, DwarfRegNum<[76]>;
+ def GP : Rc<11, "gp">, DwarfRegNum<[77]>;
+ def CS0 : Rc<12, "cs0", ["c12"]>, DwarfRegNum<[78]>;
+ def CS1 : Rc<13, "cs1", ["c13"]>, DwarfRegNum<[79]>;
+ def UPCL : Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[80]>;
+ def UPCH : Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[81]>;
}
+ // Control registers pairs.
+ let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in {
+ def C1_0 : Rcc<0, "c1:0", [SA0, LC0], ["lc0:sa0"]>, DwarfRegNum<[67]>;
+ def C3_2 : Rcc<2, "c3:2", [SA1, LC1], ["lc1:sa1"]>, DwarfRegNum<[69]>;
+ def C7_6 : Rcc<6, "c7:6", [C6, C7], ["m1:0"]>, DwarfRegNum<[72]>;
+ def C9_8 : Rcc<8, "c9:8", [USR, PC]>, DwarfRegNum<[74]>;
+ def C11_10 : Rcc<10, "c11:10", [UGP, GP]>, DwarfRegNum<[76]>;
+ def CS : Rcc<12, "c13:12", [CS0, CS1], ["cs1:0"]>, DwarfRegNum<[78]>;
+ def UPC : Rcc<14, "c15:14", [UPCL, UPCH]>, DwarfRegNum<[80]>;
+ }
+
// Register classes.
//
// FIXME: the register order should be defined in terms of the preferred
@@ -159,9 +179,29 @@ def PredRegs : RegisterClass<"Hexagon", [i1], 32, (add (sequence "P%u", 0, 3))>
let Size = 32;
}
-def CRRegs : RegisterClass<"Hexagon", [i32], 32,
- (add (sequence "LC%u", 0, 1),
- (sequence "SA%u", 0, 1),
- (sequence "M%u", 0, 1), PC, GP)> {
- let Size = 32;
+let Size = 32 in
+def ModRegs : RegisterClass<"Hexagon", [i32], 32, (add M0, M1)>;
+
+let Size = 32, isAllocatable = 0 in
+def CtrRegs : RegisterClass<"Hexagon", [i32], 32,
+ (add LC0, SA0, LC1, SA1,
+ P3_0,
+ M0, M1, C6, C7, CS0, CS1, UPCL, UPCH,
+ USR, USR_OVF, UGP, GP, PC)>;
+
+let Size = 64, isAllocatable = 0 in
+def CtrRegs64 : RegisterClass<"Hexagon", [i64], 64,
+ (add C1_0, C3_2, C7_6, C9_8, C11_10, CS, UPC)>;
+
+def VolatileV3 {
+ list<Register> Regs = [D0, D1, D2, D3, D4, D5, D6, D7,
+ R28, R31,
+ P0, P1, P2, P3,
+ M0, M1,
+ LC0, LC1, SA0, SA1, USR, USR_OVF];
}
+
+def PositiveHalfWord : PatLeaf<(i32 IntRegs:$a),
+[{
+ return isPositiveHalfWord(N);
+}]>;
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
index b40b30320cf1..8ac2e43f9294 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HexagonSELECTIONDAGINFO_H
-#define HexagonSELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONSELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index 247207f992dc..8fdd493a75dc 100644
--- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -68,12 +68,13 @@ char HexagonSplitConst32AndConst64::ID = 0;
bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
const HexagonTargetObjectFile &TLOF =
- (const HexagonTargetObjectFile &)
- QTM.getTargetLowering()->getObjFileLowering();
+ (const HexagonTargetObjectFile &)QTM.getSubtargetImpl()
+ ->getTargetLowering()
+ ->getObjFileLowering();
if (TLOF.IsSmallDataEnabled())
return true;
- const TargetInstrInfo *TII = QTM.getInstrInfo();
+ const TargetInstrInfo *TII = QTM.getSubtargetImpl()->getInstrInfo();
// Loop over all of the basic blocks
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
@@ -138,10 +139,10 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
else if (Opc == Hexagon::CONST64_Int_Real) {
int DestReg = MI->getOperand(0).getReg();
int64_t ImmValue = MI->getOperand(1).getImm ();
- unsigned DestLo =
- QTM.getRegisterInfo()->getSubReg (DestReg, Hexagon::subreg_loreg);
- unsigned DestHi =
- QTM.getRegisterInfo()->getSubReg (DestReg, Hexagon::subreg_hireg);
+ unsigned DestLo = QTM.getSubtargetImpl()->getRegisterInfo()->getSubReg(
+ DestReg, Hexagon::subreg_loreg);
+ unsigned DestHi = QTM.getSubtargetImpl()->getRegisterInfo()->getSubReg(
+ DestReg, Hexagon::subreg_hireg);
int32_t LowWord = (ImmValue & 0xFFFFFFFF);
int32_t HighWord = (ImmValue >> 32) & 0xFFFFFFFF;
diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
index 9601090af469..a304e655f0b1 100644
--- a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
+++ b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
@@ -80,7 +80,7 @@ char HexagonSplitTFRCondSets::ID = 0;
bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
- const TargetInstrInfo *TII = QTM.getInstrInfo();
+ const TargetInstrInfo *TII = QTM.getSubtargetImpl()->getInstrInfo();
// Loop over all of the basic blocks.
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
@@ -92,21 +92,19 @@ bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
MachineInstr *MI = MII;
int Opc1, Opc2;
switch(MI->getOpcode()) {
- case Hexagon::TFR_condset_rr:
case Hexagon::TFR_condset_rr_f:
case Hexagon::TFR_condset_rr64_f: {
int DestReg = MI->getOperand(0).getReg();
int SrcReg1 = MI->getOperand(2).getReg();
int SrcReg2 = MI->getOperand(3).getReg();
- if (MI->getOpcode() == Hexagon::TFR_condset_rr ||
- MI->getOpcode() == Hexagon::TFR_condset_rr_f) {
- Opc1 = Hexagon::TFR_cPt;
- Opc2 = Hexagon::TFR_cNotPt;
+ if (MI->getOpcode() == Hexagon::TFR_condset_rr_f) {
+ Opc1 = Hexagon::A2_tfrt;
+ Opc2 = Hexagon::A2_tfrf;
}
else if (MI->getOpcode() == Hexagon::TFR_condset_rr64_f) {
- Opc1 = Hexagon::TFR64_cPt;
- Opc2 = Hexagon::TFR64_cNotPt;
+ Opc1 = Hexagon::A2_tfrpt;
+ Opc2 = Hexagon::A2_tfrpf;
}
// Minor optimization: do not emit the predicated copy if the source
@@ -132,12 +130,12 @@ bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
// is the same register.
if (DestReg != SrcReg1) {
BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::TFR_cPt), DestReg).
+ TII->get(Hexagon::A2_tfrt), DestReg).
addReg(MI->getOperand(1).getReg()).addReg(SrcReg1);
}
if (MI->getOpcode() == Hexagon::TFR_condset_ri ) {
BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::TFRI_cNotPt), DestReg).
+ TII->get(Hexagon::C2_cmoveif), DestReg).
addReg(MI->getOperand(1).getReg()).
addImm(MI->getOperand(3).getImm());
} else if (MI->getOpcode() == Hexagon::TFR_condset_ri_f ) {
@@ -158,7 +156,7 @@ bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
if (MI->getOpcode() == Hexagon::TFR_condset_ir ) {
BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::TFRI_cPt), DestReg).
+ TII->get(Hexagon::C2_cmoveit), DestReg).
addReg(MI->getOperand(1).getReg()).
addImm(MI->getOperand(2).getImm());
} else if (MI->getOpcode() == Hexagon::TFR_condset_ir_f ) {
@@ -172,7 +170,7 @@ bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
// the destination is the same register.
if (DestReg != SrcReg2) {
BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::TFR_cNotPt), DestReg).
+ TII->get(Hexagon::A2_tfrf), DestReg).
addReg(MI->getOperand(1).getReg()).addReg(SrcReg2);
}
MII = MBB->erase(MI);
@@ -188,10 +186,10 @@ bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
int Immed1 = MI->getOperand(2).getImm();
int Immed2 = MI->getOperand(3).getImm();
BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::TFRI_cPt),
+ TII->get(Hexagon::C2_cmoveit),
DestReg).addReg(SrcReg1).addImm(Immed1);
BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::TFRI_cNotPt),
+ TII->get(Hexagon::C2_cmoveif),
DestReg).addReg(SrcReg1).addImm(Immed2);
} else if (MI->getOpcode() == Hexagon::TFR_condset_ii_f ) {
BuildMI(*MBB, MII, MI->getDebugLoc(),
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index b184e62b4d0d..34e327f7c3a1 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -11,12 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef Hexagon_SUBTARGET_H
-#define Hexagon_SUBTARGET_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H
#include "HexagonFrameLowering.h"
-#include "HexagonInstrInfo.h"
#include "HexagonISelLowering.h"
+#include "HexagonInstrInfo.h"
#include "HexagonSelectionDAGInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
@@ -56,19 +56,25 @@ public:
HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS,
const TargetMachine &TM);
- /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// getInstrItins - Return the instruction itineraries based on subtarget
/// selection.
- const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
- const HexagonInstrInfo *getInstrInfo() const { return &InstrInfo; }
- const HexagonRegisterInfo *getRegisterInfo() const {
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
+ const HexagonInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const HexagonRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
- const HexagonTargetLowering *getTargetLowering() const { return &TLInfo; }
- const HexagonFrameLowering *getFrameLowering() const {
+ const HexagonTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const HexagonFrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
- const HexagonSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
- const DataLayout *getDataLayout() const { return &DL; }
+ const HexagonSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const DataLayout *getDataLayout() const override { return &DL; }
HexagonSubtarget &initializeSubtargetDependencies(StringRef CPU,
StringRef FS);
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 78314100d18a..52aff2787bac 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -70,10 +70,13 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ TLOF(make_unique<HexagonTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
+HexagonTargetMachine::~HexagonTargetMachine() {}
+
namespace {
/// Hexagon Code Generator Pass Configuration Options.
class HexagonPassConfig : public TargetPassConfig {
@@ -100,10 +103,10 @@ public:
}
bool addInstSelector() override;
- bool addPreRegAlloc() override;
- bool addPostRegAlloc() override;
- bool addPreSched2() override;
- bool addPreEmitPass() override;
+ void addPreRegAlloc() override;
+ void addPostRegAlloc() override;
+ void addPreSched2() override;
+ void addPreEmitPass() override;
};
} // namespace
@@ -128,51 +131,45 @@ bool HexagonPassConfig::addInstSelector() {
return false;
}
-bool HexagonPassConfig::addPreRegAlloc() {
+void HexagonPassConfig::addPreRegAlloc() {
if (getOptLevel() != CodeGenOpt::None)
if (!DisableHardwareLoops)
- addPass(createHexagonHardwareLoops());
- return false;
+ addPass(createHexagonHardwareLoops(), false);
}
-bool HexagonPassConfig::addPostRegAlloc() {
+void HexagonPassConfig::addPostRegAlloc() {
const HexagonTargetMachine &TM = getHexagonTargetMachine();
if (getOptLevel() != CodeGenOpt::None)
if (!DisableHexagonCFGOpt)
- addPass(createHexagonCFGOptimizer(TM));
- return false;
+ addPass(createHexagonCFGOptimizer(TM), false);
}
-bool HexagonPassConfig::addPreSched2() {
+void HexagonPassConfig::addPreSched2() {
const HexagonTargetMachine &TM = getHexagonTargetMachine();
- addPass(createHexagonCopyToCombine());
+ addPass(createHexagonCopyToCombine(), false);
if (getOptLevel() != CodeGenOpt::None)
- addPass(&IfConverterID);
+ addPass(&IfConverterID, false);
addPass(createHexagonSplitConst32AndConst64(TM));
- printAndVerify("After hexagon split const32/64 pass");
- return true;
}
-bool HexagonPassConfig::addPreEmitPass() {
+void HexagonPassConfig::addPreEmitPass() {
const HexagonTargetMachine &TM = getHexagonTargetMachine();
bool NoOpt = (getOptLevel() == CodeGenOpt::None);
if (!NoOpt)
- addPass(createHexagonNewValueJump());
+ addPass(createHexagonNewValueJump(), false);
// Expand Spill code for predicate registers.
- addPass(createHexagonExpandPredSpillCode(TM));
+ addPass(createHexagonExpandPredSpillCode(TM), false);
// Split up TFRcondsets into conditional transfers.
- addPass(createHexagonSplitTFRCondSets(TM));
+ addPass(createHexagonSplitTFRCondSets(TM), false);
// Create Packets.
if (!NoOpt) {
if (!DisableHardwareLoops)
- addPass(createHexagonFixupHwLoops());
- addPass(createHexagonPacketizer());
+ addPass(createHexagonFixupHwLoops(), false);
+ addPass(createHexagonPacketizer(), false);
}
-
- return false;
}
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index d88178e052e7..4a9f44732a6b 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HexagonTARGETMACHINE_H
-#define HexagonTARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETMACHINE_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETMACHINE_H
#include "HexagonInstrInfo.h"
#include "HexagonSubtarget.h"
@@ -23,6 +23,7 @@ namespace llvm {
class Module;
class HexagonTargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
HexagonSubtarget Subtarget;
public:
@@ -30,34 +31,18 @@ public:
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
+ ~HexagonTargetMachine() override;
- const HexagonInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
const HexagonSubtarget *getSubtargetImpl() const override {
return &Subtarget;
}
- const HexagonRegisterInfo *getRegisterInfo() const override {
- return getSubtargetImpl()->getRegisterInfo();
- }
- const InstrItineraryData* getInstrItineraryData() const override {
- return &getSubtargetImpl()->getInstrItineraryData();
- }
- const HexagonTargetLowering* getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const HexagonFrameLowering* getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
- const HexagonSelectionDAGInfo* getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
static unsigned getModuleMatchQuality(const Module &M);
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
};
extern bool flag_aligned_memcpy;
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index c97526eed210..f4ab5e2b5c42 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -31,7 +31,7 @@ static cl::opt<int> SmallDataThreshold("hexagon-small-data-threshold",
void HexagonTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
-
+ InitializeELF(TM.Options.UseInitArray);
SmallDataSection =
getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
@@ -79,7 +79,8 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) {
Type *Ty = GV->getType()->getElementType();
- return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty));
+ return IsInSmallSection(
+ TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(Ty));
}
return false;
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.h b/lib/Target/Hexagon/HexagonTargetObjectFile.h
index 1bd1272befcf..c97420427240 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.h
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HexagonTARGETOBJECTFILE_H
-#define HexagonTARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/MC/MCSectionELF.h"
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 87ce960a60cf..c9605278e045 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -118,7 +118,6 @@ namespace {
public:
// Ctor.
HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
- MachineDominatorTree &MDT,
const MachineBranchProbabilityInfo *MBPI);
// initPacketizerState - initialize some internal flags.
@@ -146,23 +145,23 @@ namespace {
bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
MachineBasicBlock::iterator &MII,
const TargetRegisterClass* RC);
- bool CanPromoteToDotNew(MachineInstr* MI, SUnit* PacketSU,
- unsigned DepReg,
- std::map <MachineInstr*, SUnit*> MIToSUnit,
+ bool CanPromoteToDotNew(MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
+ const std::map<MachineInstr *, SUnit *> &MIToSUnit,
MachineBasicBlock::iterator &MII,
- const TargetRegisterClass* RC);
- bool CanPromoteToNewValue(MachineInstr* MI, SUnit* PacketSU,
- unsigned DepReg,
- std::map <MachineInstr*, SUnit*> MIToSUnit,
- MachineBasicBlock::iterator &MII);
- bool CanPromoteToNewValueStore(MachineInstr* MI, MachineInstr* PacketMI,
- unsigned DepReg,
- std::map <MachineInstr*, SUnit*> MIToSUnit);
- bool DemoteToDotOld(MachineInstr* MI);
- bool ArePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2,
- std::map <MachineInstr*, SUnit*> MIToSUnit);
- bool RestrictingDepExistInPacket(MachineInstr*,
- unsigned, std::map <MachineInstr*, SUnit*>);
+ const TargetRegisterClass *RC);
+ bool
+ CanPromoteToNewValue(MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
+ const std::map<MachineInstr *, SUnit *> &MIToSUnit,
+ MachineBasicBlock::iterator &MII);
+ bool CanPromoteToNewValueStore(
+ MachineInstr *MI, MachineInstr *PacketMI, unsigned DepReg,
+ const std::map<MachineInstr *, SUnit *> &MIToSUnit);
+ bool DemoteToDotOld(MachineInstr *MI);
+ bool ArePredicatesComplements(
+ MachineInstr *MI1, MachineInstr *MI2,
+ const std::map<MachineInstr *, SUnit *> &MIToSUnit);
+ bool RestrictingDepExistInPacket(MachineInstr *, unsigned,
+ const std::map<MachineInstr *, SUnit *> &);
bool isNewifiable(MachineInstr* MI);
bool isCondInst(MachineInstr* MI);
bool tryAllocateResourcesForConstExt(MachineInstr* MI);
@@ -184,20 +183,19 @@ INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer",
// HexagonPacketizerList Ctor.
HexagonPacketizerList::HexagonPacketizerList(
- MachineFunction &MF, MachineLoopInfo &MLI,MachineDominatorTree &MDT,
- const MachineBranchProbabilityInfo *MBPI)
- : VLIWPacketizerList(MF, MLI, MDT, true){
+ MachineFunction &MF, MachineLoopInfo &MLI,
+ const MachineBranchProbabilityInfo *MBPI)
+ : VLIWPacketizerList(MF, MLI, true) {
this->MBPI = MBPI;
}
bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) {
- const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
- MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
const MachineBranchProbabilityInfo *MBPI =
&getAnalysis<MachineBranchProbabilityInfo>();
// Instantiate the packetizer.
- HexagonPacketizerList Packetizer(Fn, MLI, MDT, MBPI);
+ HexagonPacketizerList Packetizer(Fn, MLI, MBPI);
// DFA state table should not be empty.
assert(Packetizer.getResourceTracker() && "Empty DFA table!");
@@ -266,7 +264,7 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) {
static bool IsIndirectCall(MachineInstr* MI) {
- return ((MI->getOpcode() == Hexagon::CALLR) ||
+ return ((MI->getOpcode() == Hexagon::J2_callr) ||
(MI->getOpcode() == Hexagon::CALLRv3));
}
@@ -275,7 +273,7 @@ static bool IsIndirectCall(MachineInstr* MI) {
void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
MachineFunction *MF = MI->getParent()->getParent();
- MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i),
+ MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::A4_ext),
MI->getDebugLoc());
if (ResourceTracker->canReserveResources(PseudoMI)) {
@@ -293,7 +291,7 @@ bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
assert((QII->isExtended(MI) || QII->isConstExtended(MI)) &&
"Should only be called for constant extended instructions");
MachineFunction *MF = MI->getParent()->getParent();
- MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i),
+ MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::A4_ext),
MI->getDebugLoc());
bool CanReserve = ResourceTracker->canReserveResources(PseudoMI);
MF->DeleteMachineInstr(PseudoMI);
@@ -305,7 +303,7 @@ bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
MachineFunction *MF = MI->getParent()->getParent();
- MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i),
+ MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::A4_ext),
MI->getDebugLoc());
if (ResourceTracker->canReserveResources(PseudoMI)) {
@@ -324,8 +322,8 @@ bool HexagonPacketizerList::IsCallDependent(MachineInstr* MI,
unsigned DepReg) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- const HexagonRegisterInfo* QRI =
- (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ const HexagonRegisterInfo *QRI =
+ (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo();
// Check for lr dependence
if (DepReg == QRI->getRARegister()) {
@@ -368,7 +366,7 @@ static bool IsRegDependence(const SDep::Kind DepType) {
}
static bool IsDirectJump(MachineInstr* MI) {
- return (MI->getOpcode() == Hexagon::JMP);
+ return (MI->getOpcode() == Hexagon::J2_jump);
}
static bool IsSchedBarrier(MachineInstr* MI) {
@@ -384,8 +382,8 @@ static bool IsControlFlow(MachineInstr* MI) {
}
static bool IsLoopN(MachineInstr *MI) {
- return (MI->getOpcode() == Hexagon::LOOP0_i ||
- MI->getOpcode() == Hexagon::LOOP0_r);
+ return (MI->getOpcode() == Hexagon::J2_loop0i ||
+ MI->getOpcode() == Hexagon::J2_loop0r);
}
/// DoesModifyCalleeSavedReg - Returns true if the instruction modifies a
@@ -536,9 +534,9 @@ static MachineOperand& GetStoreValueOperand(MachineInstr *MI) {
// if there is a new value store in the packet. Corollary, if there is
// already a store in a packet, there can not be a new value store.
// Arch Spec: 3.4.4.2
-bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
- MachineInstr *PacketMI, unsigned DepReg,
- std::map <MachineInstr*, SUnit*> MIToSUnit) {
+bool HexagonPacketizerList::CanPromoteToNewValueStore(
+ MachineInstr *MI, MachineInstr *PacketMI, unsigned DepReg,
+ const std::map<MachineInstr *, SUnit *> &MIToSUnit) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
// Make sure we are looking at the store, that can be promoted.
if (!QII->mayBeNewStore(MI))
@@ -549,8 +547,8 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
GetStoreValueOperand(MI).getReg() != DepReg)
return false;
- const HexagonRegisterInfo* QRI =
- (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ const HexagonRegisterInfo *QRI =
+ (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo();
const MCInstrDesc& MCID = PacketMI->getDesc();
// first operand is always the result
@@ -561,12 +559,12 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
VE = CurrentPacketMIs.end();
(VI != VE); ++VI) {
- SUnit* PacketSU = MIToSUnit[*VI];
+ SUnit *PacketSU = MIToSUnit.find(*VI)->second;
if (PacketSU->getInstr()->getDesc().mayStore() ||
// if we have mayStore = 1 set on ALLOCFRAME and DEALLOCFRAME,
// then we don't need this
- PacketSU->getInstr()->getOpcode() == Hexagon::ALLOCFRAME ||
- PacketSU->getInstr()->getOpcode() == Hexagon::DEALLOCFRAME)
+ PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe ||
+ PacketSU->getInstr()->getOpcode() == Hexagon::L2_deallocframe)
return false;
}
@@ -661,7 +659,7 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
for (VI=CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end();
(VI != VE); ++VI) {
- SUnit* TempSU = MIToSUnit[*VI];
+ SUnit *TempSU = MIToSUnit.find(*VI)->second;
MachineInstr* TempMI = TempSU->getInstr();
// Following condition is true for all the instructions until PacketMI is
@@ -717,15 +715,14 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
// can this MI to promoted to either
// new value store or new value jump
-bool HexagonPacketizerList::CanPromoteToNewValue( MachineInstr *MI,
- SUnit *PacketSU, unsigned DepReg,
- std::map <MachineInstr*, SUnit*> MIToSUnit,
- MachineBasicBlock::iterator &MII)
-{
+bool HexagonPacketizerList::CanPromoteToNewValue(
+ MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
+ const std::map<MachineInstr *, SUnit *> &MIToSUnit,
+ MachineBasicBlock::iterator &MII) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- const HexagonRegisterInfo* QRI =
- (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ const HexagonRegisterInfo *QRI =
+ (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo();
if (!QRI->Subtarget.hasV4TOps() ||
!QII->mayBeNewStore(MI))
return false;
@@ -746,12 +743,10 @@ bool HexagonPacketizerList::CanPromoteToNewValue( MachineInstr *MI,
// 1. dot new on predicate - V2/V3/V4
// 2. dot new on stores NV/ST - V4
// 3. dot new on jump NV/J - V4 -- This is generated in a pass.
-bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI,
- SUnit *PacketSU, unsigned DepReg,
- std::map <MachineInstr*, SUnit*> MIToSUnit,
- MachineBasicBlock::iterator &MII,
- const TargetRegisterClass* RC )
-{
+bool HexagonPacketizerList::CanPromoteToDotNew(
+ MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
+ const std::map<MachineInstr *, SUnit *> &MIToSUnit,
+ MachineBasicBlock::iterator &MII, const TargetRegisterClass *RC) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
// Already a dot new instruction.
if (QII->isDotNewInst(MI) && !QII->mayBeNewStore(MI))
@@ -803,12 +798,12 @@ bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI,
// The P3 from a) and d) will be complements after
// a)'s P3 is converted to .new form
// Anti Dep between c) and b) is irrelevant for this case
-bool HexagonPacketizerList::RestrictingDepExistInPacket (MachineInstr* MI,
- unsigned DepReg,
- std::map <MachineInstr*, SUnit*> MIToSUnit) {
+bool HexagonPacketizerList::RestrictingDepExistInPacket(
+ MachineInstr *MI, unsigned DepReg,
+ const std::map<MachineInstr *, SUnit *> &MIToSUnit) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- SUnit* PacketSUDep = MIToSUnit[MI];
+ SUnit *PacketSUDep = MIToSUnit.find(MI)->second;
for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(),
VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
@@ -817,7 +812,7 @@ bool HexagonPacketizerList::RestrictingDepExistInPacket (MachineInstr* MI,
if(!QII->isPredicated(*VIN)) continue;
// Scheduling Unit for current insn in the packet
- SUnit* PacketSU = MIToSUnit[*VIN];
+ SUnit *PacketSU = MIToSUnit.find(*VIN)->second;
// Look at dependencies between current members of the packet
// and predicate defining instruction MI.
@@ -861,8 +856,9 @@ static unsigned getPredicatedRegister(MachineInstr *MI,
// Given two predicated instructions, this function detects whether
// the predicates are complements
-bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1,
- MachineInstr* MI2, std::map <MachineInstr*, SUnit*> MIToSUnit) {
+bool HexagonPacketizerList::ArePredicatesComplements(
+ MachineInstr *MI1, MachineInstr *MI2,
+ const std::map<MachineInstr *, SUnit *> &MIToSUnit) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
@@ -873,7 +869,7 @@ bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1,
return false;
// Scheduling unit for candidate
- SUnit* SU = MIToSUnit[MI1];
+ SUnit *SU = MIToSUnit.find(MI1)->second;
// One corner case deals with the following scenario:
// Trying to add
@@ -898,7 +894,7 @@ bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1,
VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
// Scheduling Unit for current insn in the packet
- SUnit* PacketSU = MIToSUnit[*VIN];
+ SUnit *PacketSU = MIToSUnit.find(*VIN)->second;
// If this instruction in the packet is succeeded by the candidate...
if (PacketSU->isSucc(SU)) {
@@ -1007,8 +1003,8 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
MachineBasicBlock::iterator II = I;
const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
- const HexagonRegisterInfo* QRI =
- (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ const HexagonRegisterInfo *QRI =
+ (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo();
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
// Inline asm cannot go in the packet.
@@ -1103,7 +1099,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
VI = CurrentPacketMIs.begin(),
VE = CurrentPacketMIs.end();
(VI != VE && maintainNewValueJump); ++VI) {
- SUnit* PacketSU = MIToSUnit[*VI];
+ SUnit *PacketSU = MIToSUnit.find(*VI)->second;
// NVJ can not be part of the dual jump - Arch Spec: section 7.8
if (PacketSU->getInstr()->getDesc().isCall()) {
@@ -1119,7 +1115,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
// first operand is also a reg), first reg is not defined in
// the same packet.
if (PacketSU->getInstr()->getDesc().mayStore() ||
- PacketSU->getInstr()->getOpcode() == Hexagon::ALLOCFRAME ||
+ PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe ||
// Check #2.
(!secondRegMatch && NextMI->getOperand(1).isReg() &&
PacketSU->getInstr()->modifiesRegister(
@@ -1278,15 +1274,15 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
}
// For V4, special case ALLOCFRAME. Even though there is dependency
- // between ALLOCAFRAME and subsequent store, allow it to be
+ // between ALLOCFRAME and subsequent store, allow it to be
// packetized in a same packet. This implies that the store is using
- // caller's SP. Hense, offset needs to be updated accordingly.
+ // caller's SP. Hence, offset needs to be updated accordingly.
else if (DepType == SDep::Data
&& QRI->Subtarget.hasV4TOps()
- && J->getOpcode() == Hexagon::ALLOCFRAME
- && (I->getOpcode() == Hexagon::STrid
- || I->getOpcode() == Hexagon::STriw
- || I->getOpcode() == Hexagon::STrib)
+ && J->getOpcode() == Hexagon::S2_allocframe
+ && (I->getOpcode() == Hexagon::S2_storerd_io
+ || I->getOpcode() == Hexagon::S2_storeri_io
+ || I->getOpcode() == Hexagon::S2_storerb_io)
&& I->getOperand(0).getReg() == QRI->getStackRegister()
&& QII->isValidOffset(I->getOpcode(),
I->getOperand(1).getImm() -
diff --git a/lib/Target/Hexagon/HexagonVarargsCallingConvention.h b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
index 668ca98402b2..edbe29a5344a 100644
--- a/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
+++ b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
@@ -74,10 +74,14 @@ static bool CC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
}
const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
- unsigned Alignment =
- State.getTarget().getDataLayout()->getABITypeAlignment(ArgTy);
+ unsigned Alignment = State.getTarget()
+ .getSubtargetImpl()
+ ->getDataLayout()
+ ->getABITypeAlignment(ArgTy);
unsigned Size =
- State.getTarget().getDataLayout()->getTypeSizeInBits(ArgTy) / 8;
+ State.getTarget().getSubtargetImpl()->getDataLayout()->getTypeSizeInBits(
+ ArgTy) /
+ 8;
// If it's passed by value, then we need the size of the aggregate not of
// the pointer.
@@ -129,10 +133,14 @@ static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
}
const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
- unsigned Alignment =
- State.getTarget().getDataLayout()->getABITypeAlignment(ArgTy);
+ unsigned Alignment = State.getTarget()
+ .getSubtargetImpl()
+ ->getDataLayout()
+ ->getABITypeAlignment(ArgTy);
unsigned Size =
- State.getTarget().getDataLayout()->getTypeSizeInBits(ArgTy) / 8;
+ State.getTarget().getSubtargetImpl()->getDataLayout()->getTypeSizeInBits(
+ ArgTy) /
+ 8;
unsigned Offset3 = State.AllocateStack(Size, Alignment);
State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3,
diff --git a/lib/Target/Hexagon/InstPrinter/CMakeLists.txt b/lib/Target/Hexagon/InstPrinter/CMakeLists.txt
deleted file mode 100644
index 1ddaf9bac203..000000000000
--- a/lib/Target/Hexagon/InstPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-add_llvm_library(LLVMHexagonAsmPrinter
- HexagonInstPrinter.cpp
- )
diff --git a/lib/Target/Hexagon/LLVMBuild.txt b/lib/Target/Hexagon/LLVMBuild.txt
index a436b6e0454e..6ffd26a2022a 100644
--- a/lib/Target/Hexagon/LLVMBuild.txt
+++ b/lib/Target/Hexagon/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = InstPrinter MCTargetDesc TargetInfo
+subdirectories = Disassembler MCTargetDesc TargetInfo
[component_0]
type = TargetGroup
@@ -28,5 +28,5 @@ has_asmprinter = 1
type = Library
name = HexagonCodeGen
parent = Hexagon
-required_libraries = Analysis AsmPrinter CodeGen Core HexagonAsmPrinter HexagonDesc HexagonInfo MC SelectionDAG Support Target
+required_libraries = Analysis AsmPrinter CodeGen Core HexagonDesc HexagonInfo MC SelectionDAG Support Target
add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
index eeef3ef8c200..2a6124ee0c5a 100644
--- a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
@@ -1,5 +1,11 @@
add_llvm_library(LLVMHexagonDesc
+ HexagonAsmBackend.cpp
+ HexagonELFObjectWriter.cpp
+ HexagonInstPrinter.cpp
HexagonMCAsmInfo.cpp
+ HexagonMCCodeEmitter.cpp
HexagonMCInst.cpp
HexagonMCTargetDesc.cpp
)
+
+add_dependencies(LLVMHexagonDesc HexagonCommonTableGen)
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
new file mode 100644
index 000000000000..bdccf880d65f
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -0,0 +1,74 @@
+//===-- HexagonAsmBackend.cpp - Hexagon Assembler Backend -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+
+using namespace llvm;
+
+namespace {
+
+class HexagonAsmBackend : public MCAsmBackend {
+public:
+ HexagonAsmBackend(Target const & /*T*/) {}
+
+ unsigned getNumFixupKinds() const override { return 0; }
+
+ void applyFixup(MCFixup const & /*Fixup*/, char * /*Data*/,
+ unsigned /*DataSize*/, uint64_t /*Value*/,
+ bool /*IsPCRel*/) const override {
+ return;
+ }
+
+ bool mayNeedRelaxation(MCInst const & /*Inst*/) const override {
+ return false;
+ }
+
+ bool fixupNeedsRelaxation(MCFixup const & /*Fixup*/, uint64_t /*Value*/,
+ MCRelaxableFragment const * /*DF*/,
+ MCAsmLayout const & /*Layout*/) const override {
+ llvm_unreachable("fixupNeedsRelaxation() unimplemented");
+ }
+
+ void relaxInstruction(MCInst const & /*Inst*/,
+ MCInst & /*Res*/) const override {
+ llvm_unreachable("relaxInstruction() unimplemented");
+ }
+
+ bool writeNopData(uint64_t /*Count*/,
+ MCObjectWriter * /*OW*/) const override {
+ return true;
+ }
+};
+} // end anonymous namespace
+
+namespace {
+class ELFHexagonAsmBackend : public HexagonAsmBackend {
+ uint8_t OSABI;
+
+public:
+ ELFHexagonAsmBackend(Target const &T, uint8_t OSABI)
+ : HexagonAsmBackend(T), OSABI(OSABI) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ StringRef CPU("HexagonV4");
+ return createHexagonELFObjectWriter(OS, OSABI, CPU);
+ }
+};
+} // end anonymous namespace
+
+namespace llvm {
+MCAsmBackend *createHexagonAsmBackend(Target const &T,
+ MCRegisterInfo const & /*MRI*/,
+ StringRef TT, StringRef /*CPU*/) {
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
+ return new ELFHexagonAsmBackend(T, OSABI);
+}
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index f8be77cd3af5..8e02f799d7e4 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -14,11 +14,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HEXAGONBASEINFO_H
-#define HEXAGONBASEINFO_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONBASEINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONBASEINFO_H
#include "HexagonMCTargetDesc.h"
#include "llvm/Support/ErrorHandling.h"
+#include <stdint.h>
namespace llvm {
@@ -189,6 +190,15 @@ namespace HexagonII {
MO_GPREL
};
+ enum class InstParseBits : uint32_t {
+ INST_PARSE_MASK = 0x0000c000,
+ INST_PARSE_PACKET_END = 0x0000c000,
+ INST_PARSE_LOOP_END = 0x00008000,
+ INST_PARSE_NOT_END = 0x00004000,
+ INST_PARSE_DUPLEX = 0x00000000,
+ INST_PARSE_EXTENDER = 0x00000000
+ };
+
} // End namespace HexagonII.
} // End namespace llvm.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
new file mode 100644
index 000000000000..56c9dc712a6e
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -0,0 +1,62 @@
+//===-- HexagonELFObjectWriter.cpp - Hexagon Target Descriptions ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "hexagon-elf-writer"
+
+using namespace llvm;
+using namespace Hexagon;
+
+namespace {
+
+class HexagonELFObjectWriter : public MCELFObjectTargetWriter {
+private:
+ StringRef CPU;
+
+public:
+ HexagonELFObjectWriter(uint8_t OSABI, StringRef C);
+
+ virtual unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup,
+ bool IsPCRel) const override;
+};
+}
+
+HexagonELFObjectWriter::HexagonELFObjectWriter(uint8_t OSABI, StringRef C)
+ : MCELFObjectTargetWriter(/*Is64bit*/ false, OSABI, ELF::EM_HEXAGON,
+ /*HasRelocationAddend*/ true),
+ CPU(C) {}
+
+unsigned HexagonELFObjectWriter::GetRelocType(MCValue const &/*Target*/,
+ MCFixup const &Fixup,
+ bool IsPCRel) const {
+ unsigned Type = (unsigned)ELF::R_HEX_NONE;
+ llvm::MCFixupKind Kind = Fixup.getKind();
+
+ switch (Kind) {
+ default:
+ DEBUG(dbgs() << "unrecognized relocation " << Fixup.getKind() << "\n");
+ llvm_unreachable("Unimplemented Fixup kind!");
+ break;
+ case FK_Data_4:
+ Type = (IsPCRel) ? ELF::R_HEX_32_PCREL : ELF::R_HEX_32;
+ break;
+ }
+ return Type;
+}
+
+MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI,
+ StringRef CPU) {
+ MCELFObjectTargetWriter *MOTW = new HexagonELFObjectWriter(OSABI, CPU);
+ return createELFObjectWriter(MOTW, OS, /*IsLittleEndian*/ true);
+} \ No newline at end of file
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 9942a6067d19..5c6f0363f78a 100644
--- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -29,6 +29,45 @@ using namespace llvm;
#include "HexagonGenAsmWriter.inc"
const char HexagonInstPrinter::PacketPadding = '\t';
+// Return the minimum value that a constant extendable operand can have
+// without being extended.
+static int getMinValue(uint64_t TSFlags) {
+ unsigned isSigned =
+ (TSFlags >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
+ unsigned bits =
+ (TSFlags >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
+
+ if (isSigned)
+ return -1U << (bits - 1);
+
+ return 0;
+}
+
+// Return the maximum value that a constant extendable operand can have
+// without being extended.
+static int getMaxValue(uint64_t TSFlags) {
+ unsigned isSigned =
+ (TSFlags >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
+ unsigned bits =
+ (TSFlags >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
+
+ if (isSigned)
+ return ~(-1U << (bits - 1));
+
+ return ~(-1U << bits);
+}
+
+// Return true if the instruction must be extended.
+static bool isExtended(uint64_t TSFlags) {
+ return (TSFlags >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
+}
+
+// Currently just used in an assert statement
+static bool isExtendable(uint64_t TSFlags) LLVM_ATTRIBUTE_UNUSED;
+// Return true if the instruction may be extended based on the operand value.
+static bool isExtendable(uint64_t TSFlags) {
+ return (TSFlags >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
+}
StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const {
return MII.getName(Opcode);
@@ -52,14 +91,14 @@ void HexagonInstPrinter::printInst(const HexagonMCInst *MI, raw_ostream &O,
// Ending a harware loop is different from ending an regular packet.
assert(MI->isPacketEnd() && "Loop-end must also end the packet");
- if (MI->isPacketStart()) {
+ if (MI->isPacketBegin()) {
// There must be a packet to end a loop.
// FIXME: when shuffling is always run, this shouldn't be needed.
HexagonMCInst Nop;
StringRef NoAnnot;
- Nop.setOpcode (Hexagon::NOP);
- Nop.setPacketStart (MI->isPacketStart());
+ Nop.setOpcode (Hexagon::A2_nop);
+ Nop.setPacketBegin (MI->isPacketBegin());
printInst (&Nop, O, NoAnnot);
}
@@ -71,7 +110,7 @@ void HexagonInstPrinter::printInst(const HexagonMCInst *MI, raw_ostream &O,
}
else {
// Prefix the insn opening the packet.
- if (MI->isPacketStart())
+ if (MI->isPacketBegin())
O << PacketPadding << startPacket << '\n';
printInstruction(MI, O);
@@ -116,9 +155,20 @@ void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo,
void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) const {
- const HexagonMCInst *HMCI = static_cast<const HexagonMCInst*>(MI);
- if (HMCI->isConstExtended())
+ const MCOperand &MO = MI->getOperand(OpNo);
+ const MCInstrDesc &MII = getMII().get(MI->getOpcode());
+
+ assert((isExtendable(MII.TSFlags) || isExtended(MII.TSFlags)) &&
+ "Expecting an extendable operand");
+
+ if (MO.isExpr() || isExtended(MII.TSFlags)) {
O << "#";
+ } else if (MO.isImm()) {
+ int ImmValue = MO.getImm();
+ if (ImmValue < getMinValue(MII.TSFlags) ||
+ ImmValue > getMaxValue(MII.TSFlags))
+ O << "#";
+ }
printOperand(MI, OpNo, O);
}
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index 09e3f88434ed..55ae95cd06df 100644
--- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HEXAGONINSTPRINTER_H
-#define HEXAGONINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_INSTPRINTER_HEXAGONINSTPRINTER_H
+#define LLVM_LIB_TARGET_HEXAGON_INSTPRINTER_HEXAGONINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrInfo.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
index 141e514ce412..ad5e0fb15e7f 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -24,7 +24,6 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(StringRef TT) {
Data64bitsDirective = nullptr; // .xword is only supported by V9.
ZeroDirective = "\t.skip\t";
CommentString = "//";
- HasLEB128 = true;
LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
InlineAsmStart = "# InlineAsm Start";
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
index 953d804b49ab..ab18f0b37ba6 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HexagonMCASMINFO_H
-#define HexagonMCASMINFO_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCASMINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCASMINFO_H
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfoELF.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
new file mode 100644
index 000000000000..487872a1f5dd
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -0,0 +1,88 @@
+//===-- HexagonMCCodeEmitter.cpp - Hexagon Target Descriptions ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCCodeEmitter.h"
+#include "MCTargetDesc/HexagonMCInst.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "mccodeemitter"
+
+using namespace llvm;
+using namespace Hexagon;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+/// \brief 10.6 Instruction Packets
+/// Possible values for instruction packet parse field.
+enum class ParseField { duplex = 0x0, last0 = 0x1, last1 = 0x2, end = 0x3 };
+/// \brief Returns the packet bits based on instruction position.
+uint32_t getPacketBits(HexagonMCInst const &HMI) {
+ unsigned const ParseFieldOffset = 14;
+ ParseField Field = HMI.isPacketEnd() ? ParseField::end : ParseField::last0;
+ return static_cast <uint32_t> (Field) << ParseFieldOffset;
+}
+void emitLittleEndian(uint64_t Binary, raw_ostream &OS) {
+ OS << static_cast<uint8_t>((Binary >> 0x00) & 0xff);
+ OS << static_cast<uint8_t>((Binary >> 0x08) & 0xff);
+ OS << static_cast<uint8_t>((Binary >> 0x10) & 0xff);
+ OS << static_cast<uint8_t>((Binary >> 0x18) & 0xff);
+}
+}
+
+HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII,
+ MCSubtargetInfo const &aMST,
+ MCContext &aMCT)
+ : MST(aMST), MCT(aMCT) {}
+
+void HexagonMCCodeEmitter::EncodeInstruction(MCInst const &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ MCSubtargetInfo const &STI) const {
+ HexagonMCInst const &HMB = static_cast<HexagonMCInst const &>(MI);
+ uint64_t Binary = getBinaryCodeForInstr(HMB, Fixups, STI) | getPacketBits(HMB);
+ assert(HMB.getDesc().getSize() == 4 && "All instructions should be 32bit");
+ emitLittleEndian(Binary, OS);
+ ++MCNumEmitted;
+}
+
+unsigned
+HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ MCSubtargetInfo const &STI) const {
+ if (MO.isReg())
+ return MCT.getRegisterInfo()->getEncodingValue(MO.getReg());
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ llvm_unreachable("Only Immediates and Registers implemented right now");
+}
+
+MCSubtargetInfo const &HexagonMCCodeEmitter::getSubtargetInfo() const {
+ return MST;
+}
+
+MCCodeEmitter *llvm::createHexagonMCCodeEmitter(MCInstrInfo const &MII,
+ MCRegisterInfo const &MRI,
+ MCSubtargetInfo const &MST,
+ MCContext &MCT) {
+ return new HexagonMCCodeEmitter(MII, MST, MCT);
+}
+
+#include "HexagonGenMCCodeEmitter.inc"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
new file mode 100644
index 000000000000..96048adf34b7
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
@@ -0,0 +1,60 @@
+//===-- HexagonMCCodeEmitter.h - Hexagon Target Descriptions ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Definition for classes that emit Hexagon machine code from MCInsts
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCCODEEMITTER_H
+#define HEXAGONMCCODEEMITTER_H
+
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class HexagonMCCodeEmitter : public MCCodeEmitter {
+ MCSubtargetInfo const &MST;
+ MCContext &MCT;
+
+public:
+ HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCSubtargetInfo const &aMST,
+ MCContext &aMCT);
+
+ MCSubtargetInfo const &getSubtargetInfo() const;
+
+ void EncodeInstruction(MCInst const &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ MCSubtargetInfo const &STI) const override;
+
+ // \brief TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(MCInst const &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ MCSubtargetInfo const &STI) const;
+
+ /// \brief Return binary encoding of operand.
+ unsigned getMachineOpValue(MCInst const &MI, MCOperand const &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ MCSubtargetInfo const &STI) const;
+
+private:
+ HexagonMCCodeEmitter(HexagonMCCodeEmitter const &) LLVM_DELETED_FUNCTION;
+ void operator=(HexagonMCCodeEmitter const &) LLVM_DELETED_FUNCTION;
+}; // class HexagonMCCodeEmitter
+
+} // namespace llvm
+
+#endif /* HEXAGONMCCODEEMITTER_H */
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp
index 9260b4a27661..d8b9a2567eeb 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp
@@ -18,27 +18,77 @@
using namespace llvm;
+std::unique_ptr <MCInstrInfo const> HexagonMCInst::MCII;
+
+HexagonMCInst::HexagonMCInst() : MCInst() {}
+HexagonMCInst::HexagonMCInst(MCInstrDesc const &mcid) : MCInst() {}
+
+void HexagonMCInst::AppendImplicitOperands(MCInst &MCI) {
+ MCI.addOperand(MCOperand::CreateImm(0));
+ MCI.addOperand(MCOperand::CreateInst(nullptr));
+}
+
+std::bitset<16> HexagonMCInst::GetImplicitBits(MCInst const &MCI) {
+ SanityCheckImplicitOperands(MCI);
+ std::bitset<16> Bits(MCI.getOperand(MCI.getNumOperands() - 2).getImm());
+ return Bits;
+}
+
+void HexagonMCInst::SetImplicitBits(MCInst &MCI, std::bitset<16> Bits) {
+ SanityCheckImplicitOperands(MCI);
+ MCI.getOperand(MCI.getNumOperands() - 2).setImm(Bits.to_ulong());
+}
+
+void HexagonMCInst::setPacketBegin(bool f) {
+ std::bitset<16> Bits(GetImplicitBits(*this));
+ Bits.set(packetBeginIndex, f);
+ SetImplicitBits(*this, Bits);
+}
+
+bool HexagonMCInst::isPacketBegin() const {
+ std::bitset<16> Bits(GetImplicitBits(*this));
+ return Bits.test(packetBeginIndex);
+}
+
+void HexagonMCInst::setPacketEnd(bool f) {
+ std::bitset<16> Bits(GetImplicitBits(*this));
+ Bits.set(packetEndIndex, f);
+ SetImplicitBits(*this, Bits);
+}
+
+bool HexagonMCInst::isPacketEnd() const {
+ std::bitset<16> Bits(GetImplicitBits(*this));
+ return Bits.test(packetEndIndex);
+}
+
+void HexagonMCInst::resetPacket() {
+ setPacketBegin(false);
+ setPacketEnd(false);
+}
+
// Return the slots used by the insn.
-unsigned HexagonMCInst::getUnits(const HexagonTargetMachine* TM) const {
- const HexagonInstrInfo* QII = TM->getInstrInfo();
- const InstrItineraryData* II = TM->getInstrItineraryData();
- const InstrStage*
- IS = II->beginStage(QII->get(this->getOpcode()).getSchedClass());
+unsigned HexagonMCInst::getUnits(const HexagonTargetMachine *TM) const {
+ const HexagonInstrInfo *QII = TM->getSubtargetImpl()->getInstrInfo();
+ const InstrItineraryData *II =
+ TM->getSubtargetImpl()->getInstrItineraryData();
+ const InstrStage *IS =
+ II->beginStage(QII->get(this->getOpcode()).getSchedClass());
return (IS->getUnits());
}
+MCInstrDesc const& HexagonMCInst::getDesc() const { return (MCII->get(getOpcode())); }
+
// Return the Hexagon ISA class for the insn.
unsigned HexagonMCInst::getType() const {
- const uint64_t F = MCID->TSFlags;
+ const uint64_t F = getDesc().TSFlags;
return ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
}
// Return whether the insn is an actual insn.
bool HexagonMCInst::isCanon() const {
- return (!MCID->isPseudo() &&
- !isPrefix() &&
+ return (!getDesc().isPseudo() && !isPrefix() &&
getType() != HexagonII::TypeENDLOOP);
}
@@ -49,30 +99,30 @@ bool HexagonMCInst::isPrefix() const {
// Return whether the insn is solo, i.e., cannot be in a packet.
bool HexagonMCInst::isSolo() const {
- const uint64_t F = MCID->TSFlags;
+ const uint64_t F = getDesc().TSFlags;
return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask);
}
// Return whether the insn is a new-value consumer.
bool HexagonMCInst::isNewValue() const {
- const uint64_t F = MCID->TSFlags;
+ const uint64_t F = getDesc().TSFlags;
return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
}
// Return whether the instruction is a legal new-value producer.
bool HexagonMCInst::hasNewValue() const {
- const uint64_t F = MCID->TSFlags;
+ const uint64_t F = getDesc().TSFlags;
return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask);
}
// Return the operand that consumes or produces a new value.
-const MCOperand& HexagonMCInst::getNewValue() const {
- const uint64_t F = MCID->TSFlags;
- const unsigned O = (F >> HexagonII::NewValueOpPos) &
- HexagonII::NewValueOpMask;
- const MCOperand& MCO = getOperand(O);
+const MCOperand &HexagonMCInst::getNewValue() const {
+ const uint64_t F = getDesc().TSFlags;
+ const unsigned O =
+ (F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask;
+ const MCOperand &MCO = getOperand(O);
- assert ((isNewValue() || hasNewValue()) && MCO.isReg());
+ assert((isNewValue() || hasNewValue()) && MCO.isReg());
return (MCO);
}
@@ -92,9 +142,9 @@ bool HexagonMCInst::isConstExtended(void) const {
return false;
short ExtOpNum = getCExtOpNum();
- int MinValue = getMinValue();
- int MaxValue = getMaxValue();
- const MCOperand& MO = getOperand(ExtOpNum);
+ int MinValue = getMinValue();
+ int MaxValue = getMaxValue();
+ const MCOperand &MO = getOperand(ExtOpNum);
// We could be using an instruction with an extendable immediate and shoehorn
// a global address into it. If it is a global address it will be constant
@@ -115,46 +165,45 @@ bool HexagonMCInst::isConstExtended(void) const {
// Return whether the instruction must be always extended.
bool HexagonMCInst::isExtended(void) const {
- const uint64_t F = MCID->TSFlags;
+ const uint64_t F = getDesc().TSFlags;
return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
}
// Return true if the instruction may be extended based on the operand value.
bool HexagonMCInst::isExtendable(void) const {
- const uint64_t F = MCID->TSFlags;
+ const uint64_t F = getDesc().TSFlags;
return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
}
// Return number of bits in the constant extended operand.
unsigned HexagonMCInst::getBitCount(void) const {
- const uint64_t F = MCID->TSFlags;
+ const uint64_t F = getDesc().TSFlags;
return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
}
// Return constant extended operand number.
unsigned short HexagonMCInst::getCExtOpNum(void) const {
- const uint64_t F = MCID->TSFlags;
+ const uint64_t F = getDesc().TSFlags;
return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
}
// Return whether the operand can be constant extended.
bool HexagonMCInst::isOperandExtended(const unsigned short OperandNum) const {
- const uint64_t F = MCID->TSFlags;
- return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
- == OperandNum;
+ const uint64_t F = getDesc().TSFlags;
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) ==
+ OperandNum;
}
// Return the min value that a constant extendable operand can have
// without being extended.
int HexagonMCInst::getMinValue(void) const {
- const uint64_t F = MCID->TSFlags;
- unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
- & HexagonII::ExtentSignedMask;
- unsigned bits = (F >> HexagonII::ExtentBitsPos)
- & HexagonII::ExtentBitsMask;
+ const uint64_t F = getDesc().TSFlags;
+ unsigned isSigned =
+ (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
if (isSigned) // if value is signed
- return -1 << (bits - 1);
+ return -1U << (bits - 1);
else
return 0;
}
@@ -162,14 +211,13 @@ int HexagonMCInst::getMinValue(void) const {
// Return the max value that a constant extendable operand can have
// without being extended.
int HexagonMCInst::getMaxValue(void) const {
- const uint64_t F = MCID->TSFlags;
- unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
- & HexagonII::ExtentSignedMask;
- unsigned bits = (F >> HexagonII::ExtentBitsPos)
- & HexagonII::ExtentBitsMask;
+ const uint64_t F = getDesc().TSFlags;
+ unsigned isSigned =
+ (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
if (isSigned) // if value is signed
- return ~(-1 << (bits - 1));
+ return ~(-1U << (bits - 1));
else
- return ~(-1 << bits);
+ return ~(-1U << bits);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
index 3c52d4563f8c..ce9a8db5ac44 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
@@ -11,90 +11,98 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HEXAGONMCINST_H
-#define HEXAGONMCINST_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINST_H
+#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINST_H
#include "HexagonTargetMachine.h"
#include "llvm/MC/MCInst.h"
+#include <memory>
+extern "C" void LLVMInitializeHexagonTargetMC();
namespace llvm {
- class MCOperand;
+class MCOperand;
- class HexagonMCInst: public MCInst {
- // MCID is set during instruction lowering.
- // It is needed in order to access TSFlags for
- // use in checking MC instruction properties.
- const MCInstrDesc *MCID;
+class HexagonMCInst : public MCInst {
+ friend void ::LLVMInitializeHexagonTargetMC();
+ // Used to access TSFlags
+ static std::unique_ptr <MCInstrInfo const> MCII;
- // Packet start and end markers
- unsigned packetStart: 1, packetEnd: 1;
+public:
+ explicit HexagonMCInst();
+ HexagonMCInst(const MCInstrDesc &mcid);
- public:
- explicit HexagonMCInst():
- MCInst(), MCID(nullptr), packetStart(0), packetEnd(0) {};
- HexagonMCInst(const MCInstrDesc& mcid):
- MCInst(), MCID(&mcid), packetStart(0), packetEnd(0) {};
+ static void AppendImplicitOperands(MCInst &MCI);
+ static std::bitset<16> GetImplicitBits(MCInst const &MCI);
+ static void SetImplicitBits(MCInst &MCI, std::bitset<16> Bits);
+ static void SanityCheckImplicitOperands(MCInst const &MCI) {
+ assert(MCI.getNumOperands() >= 2 && "At least the two implicit operands");
+ assert(MCI.getOperand(MCI.getNumOperands() - 1).isInst() &&
+ "Implicit bits and flags");
+ assert(MCI.getOperand(MCI.getNumOperands() - 2).isImm() &&
+ "Parent pointer");
+ }
- bool isPacketStart() const { return (packetStart); };
- bool isPacketEnd() const { return (packetEnd); };
- void setPacketStart(bool Y) { packetStart = Y; };
- void setPacketEnd(bool Y) { packetEnd = Y; };
- void resetPacket() { setPacketStart(false); setPacketEnd(false); };
+ void setPacketBegin(bool Y);
+ bool isPacketBegin() const;
+ static const size_t packetBeginIndex = 0;
+ void setPacketEnd(bool Y);
+ bool isPacketEnd() const;
+ static const size_t packetEndIndex = 1;
+ void resetPacket();
- // Return the slots used by the insn.
- unsigned getUnits(const HexagonTargetMachine* TM) const;
+ // Return the slots used by the insn.
+ unsigned getUnits(const HexagonTargetMachine *TM) const;
- // Return the Hexagon ISA class for the insn.
- unsigned getType() const;
+ // Return the Hexagon ISA class for the insn.
+ unsigned getType() const;
- void setDesc(const MCInstrDesc& mcid) { MCID = &mcid; };
- const MCInstrDesc& getDesc(void) const { return *MCID; };
+ MCInstrDesc const &getDesc() const;
- // Return whether the insn is an actual insn.
- bool isCanon() const;
+ // Return whether the insn is an actual insn.
+ bool isCanon() const;
- // Return whether the insn is a prefix.
- bool isPrefix() const;
+ // Return whether the insn is a prefix.
+ bool isPrefix() const;
- // Return whether the insn is solo, i.e., cannot be in a packet.
- bool isSolo() const;
+ // Return whether the insn is solo, i.e., cannot be in a packet.
+ bool isSolo() const;
- // Return whether the instruction needs to be constant extended.
- bool isConstExtended() const;
+ // Return whether the instruction needs to be constant extended.
+ bool isConstExtended() const;
- // Return constant extended operand number.
- unsigned short getCExtOpNum(void) const;
+ // Return constant extended operand number.
+ unsigned short getCExtOpNum(void) const;
- // Return whether the insn is a new-value consumer.
- bool isNewValue() const;
+ // Return whether the insn is a new-value consumer.
+ bool isNewValue() const;
- // Return whether the instruction is a legal new-value producer.
- bool hasNewValue() const;
+ // Return whether the instruction is a legal new-value producer.
+ bool hasNewValue() const;
- // Return the operand that consumes or produces a new value.
- const MCOperand& getNewValue() const;
+ // Return the operand that consumes or produces a new value.
+ const MCOperand &getNewValue() const;
- // Return number of bits in the constant extended operand.
- unsigned getBitCount(void) const;
+ // Return number of bits in the constant extended operand.
+ unsigned getBitCount(void) const;
- private:
- // Return whether the instruction must be always extended.
- bool isExtended() const;
+private:
+ // Return whether the instruction must be always extended.
+ bool isExtended() const;
- // Return true if the insn may be extended based on the operand value.
- bool isExtendable() const;
+ // Return true if the insn may be extended based on the operand value.
+ bool isExtendable() const;
- // Return true if the operand can be constant extended.
- bool isOperandExtended(const unsigned short OperandNum) const;
+ // Return true if the operand can be constant extended.
+ bool isOperandExtended(const unsigned short OperandNum) const;
- // Return the min value that a constant extendable operand can have
- // without being extended.
- int getMinValue() const;
+ // Return the min value that a constant extendable operand can have
+ // without being extended.
+ int getMinValue() const;
- // Return the max value that a constant extendable operand can have
- // without being extended.
- int getMaxValue() const;
- };
+ // Return the max value that a constant extendable operand can have
+ // without being extended.
+ int getMaxValue() const;
+};
}
#endif
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 581674dd6cb2..ae5a22bdb01b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -13,8 +13,10 @@
#include "HexagonMCTargetDesc.h"
#include "HexagonMCAsmInfo.h"
-#include "InstPrinter/HexagonInstPrinter.h"
+#include "MCTargetDesc/HexagonInstPrinter.h"
+#include "MCTargetDesc/HexagonMCInst.h"
#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -46,9 +48,17 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) {
return X;
}
-static MCSubtargetInfo *createHexagonMCSubtargetInfo(StringRef TT,
- StringRef CPU,
- StringRef FS) {
+static MCStreamer *
+createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB,
+ raw_ostream &OS, MCCodeEmitter *CE,
+ bool RelaxAll) {
+ MCELFStreamer *ES = new MCELFStreamer(Context, MAB, OS, CE);
+ return ES;
+}
+
+
+static MCSubtargetInfo *
+createHexagonMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
MCSubtargetInfo *X = new MCSubtargetInfo();
InitHexagonMCSubtargetInfo(X, TT, CPU, FS);
return X;
@@ -59,22 +69,40 @@ static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
MCAsmInfo *MAI = new HexagonMCAsmInfo(TT);
// VirtualFP = (R30 + #0).
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(
- nullptr, Hexagon::R30, 0);
+ MCCFIInstruction Inst =
+ MCCFIInstruction::createDefCfa(nullptr, Hexagon::R30, 0);
MAI->addInitialFrameState(Inst);
return MAI;
}
+static MCStreamer *createMCStreamer(Target const &T, StringRef TT,
+ MCContext &Context, MCAsmBackend &MAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ MCSubtargetInfo const &STI, bool RelaxAll) {
+ MCStreamer *ES = createHexagonELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
+ new MCTargetStreamer(*ES);
+ return ES;
+}
+
+
static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
// For the time being, use static relocations, since there's really no
// support for PIC yet.
X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
return X;
}
+static MCInstPrinter *createHexagonMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ return new HexagonInstPrinter(MAI, MII, MRI);
+}
// Force static initialization.
extern "C" void LLVMInitializeHexagonTargetMC() {
@@ -86,7 +114,9 @@ extern "C" void LLVMInitializeHexagonTargetMC() {
createHexagonMCCodeGenInfo);
// Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget, createHexagonMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget,
+ createHexagonMCInstrInfo);
+ HexagonMCInst::MCII.reset (createHexagonMCInstrInfo());
// Register the MC register info.
TargetRegistry::RegisterMCRegInfo(TheHexagonTarget,
@@ -95,4 +125,19 @@ extern "C" void LLVMInitializeHexagonTargetMC() {
// Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(TheHexagonTarget,
createHexagonMCSubtargetInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(TheHexagonTarget,
+ createHexagonMCCodeEmitter);
+
+ // Register the MC Inst Printer
+ TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget,
+ createHexagonMCInstPrinter);
+
+ // Register the asm backend
+ TargetRegistry::RegisterMCAsmBackend(TheHexagonTarget,
+ createHexagonAsmBackend);
+
+ // Register the obj streamer
+ TargetRegistry::RegisterMCObjectStreamer(TheHexagonTarget, createMCStreamer);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 2238b1ae5f35..02fd5161d24a 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -11,15 +11,37 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HEXAGONMCTARGETDESC_H
-#define HEXAGONMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H
+#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H
+
+#include <cstdint>
namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
class MCSubtargetInfo;
class Target;
+class StringRef;
+class raw_ostream;
extern Target TheHexagonTarget;
+MCCodeEmitter *createHexagonMCCodeEmitter(MCInstrInfo const &MCII,
+ MCRegisterInfo const &MRI,
+ MCSubtargetInfo const &MST,
+ MCContext &MCT);
+
+MCAsmBackend *createHexagonAsmBackend(Target const &T,
+ MCRegisterInfo const &MRI, StringRef TT,
+ StringRef CPU);
+
+MCObjectWriter *createHexagonELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
+ StringRef CPU);
+
} // End llvm namespace
// Define symbolic names for Hexagon registers. This defines a mapping from
diff --git a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
index 73c7e016f939..f559a21e3f9e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = HexagonDesc
parent = Hexagon
-required_libraries = HexagonInfo MC
+required_libraries = HexagonInfo MC Support
add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/Makefile b/lib/Target/Hexagon/Makefile
index dc387c549a1d..329c9d3018f0 100644
--- a/lib/Target/Hexagon/Makefile
+++ b/lib/Target/Hexagon/Makefile
@@ -14,10 +14,12 @@ TARGET = Hexagon
BUILT_SOURCES = HexagonGenRegisterInfo.inc \
HexagonGenInstrInfo.inc \
HexagonGenAsmWriter.inc \
- HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \
- HexagonGenCallingConv.inc \
- HexagonGenDFAPacketizer.inc
-
-DIRS = InstPrinter TargetInfo MCTargetDesc
-
-include $(LEVEL)/Makefile.common
+ HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \
+ HexagonGenCallingConv.inc \
+ HexagonGenDFAPacketizer.inc \
+ HexagonGenMCCodeEmitter.inc \
+ HexagonGenDisassemblerTables.inc
+
+DIRS = TargetInfo MCTargetDesc Disassembler
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index 5afbd20dd421..7fae5056b1b5 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MSP430INSTPRINTER_H
-#define MSP430INSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_MSP430_INSTPRINTER_MSP430INSTPRINTER_H
+#define LLVM_LIB_TARGET_MSP430_INSTPRINTER_MSP430INSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index ef805bbab5be..2c9532d321e4 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MSP430TARGETASMINFO_H
-#define MSP430TARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430MCASMINFO_H
+#define LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430MCASMINFO_H
#include "llvm/MC/MCAsmInfoELF.h"
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index 72adb45f9be8..4c708033d6c1 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -39,7 +39,7 @@ static MCInstrInfo *createMSP430MCInstrInfo() {
static MCRegisterInfo *createMSP430MCRegisterInfo(StringRef TT) {
MCRegisterInfo *X = new MCRegisterInfo();
- InitMSP430MCRegisterInfo(X, MSP430::PCW);
+ InitMSP430MCRegisterInfo(X, MSP430::PC);
return X;
}
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
index 7f3505ca5514..586f5d991c94 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MSP430MCTARGETDESC_H
-#define MSP430MCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430MCTARGETDESC_H
+#define LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430MCTARGETDESC_H
namespace llvm {
class Target;
diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h
index 4574ce5f98b7..796f25233123 100644
--- a/lib/Target/MSP430/MSP430.h
+++ b/lib/Target/MSP430/MSP430.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_MSP430_H
-#define LLVM_TARGET_MSP430_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430_H
+#define LLVM_LIB_TARGET_MSP430_MSP430_H
#include "MCTargetDesc/MSP430MCTargetDesc.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index a96930a09889..ffcf22216d4f 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -17,6 +17,7 @@
#include "MSP430.h"
#include "MSP430InstrInfo.h"
+#include "MSP430Subtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -54,7 +55,7 @@ FunctionPass *llvm::createMSP430BranchSelectionPass() {
bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) {
const MSP430InstrInfo *TII =
- static_cast<const MSP430InstrInfo*>(Fn.getTarget().getInstrInfo());
+ static_cast<const MSP430InstrInfo *>(Fn.getSubtarget().getInstrInfo());
// Give the blocks of the function a dense, in-order, numbering.
Fn.RenumberBlocks();
BlockSizes.resize(Fn.getNumBlockIDs());
diff --git a/lib/Target/MSP430/MSP430CallingConv.td b/lib/Target/MSP430/MSP430CallingConv.td
index 8a69d1ecb287..b38f5781c84a 100644
--- a/lib/Target/MSP430/MSP430CallingConv.td
+++ b/lib/Target/MSP430/MSP430CallingConv.td
@@ -17,7 +17,7 @@ def RetCC_MSP430 : CallingConv<[
CCIfType<[i8], CCAssignToReg<[R15B, R14B, R13B, R12B]>>,
// i16 are returned in registers R15, R14, R13, R12
- CCIfType<[i16], CCAssignToReg<[R15W, R14W, R13W, R12W]>>
+ CCIfType<[i16], CCAssignToReg<[R15, R14, R13, R12]>>
]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index 82c8b298c51f..d6cb9f6459a6 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -14,6 +14,7 @@
#include "MSP430FrameLowering.h"
#include "MSP430InstrInfo.h"
#include "MSP430MachineFunctionInfo.h"
+#include "MSP430Subtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -43,7 +44,7 @@ void MSP430FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
const MSP430InstrInfo &TII =
- *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const MSP430InstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineBasicBlock::iterator MBBI = MBB.begin();
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -62,18 +63,18 @@ void MSP430FrameLowering::emitPrologue(MachineFunction &MF) const {
// Update the frame offset adjustment.
MFI->setOffsetAdjustment(-NumBytes);
- // Save FPW into the appropriate stack slot...
+ // Save FP into the appropriate stack slot...
BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r))
- .addReg(MSP430::FPW, RegState::Kill);
+ .addReg(MSP430::FP, RegState::Kill);
- // Update FPW with the new base value...
- BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FPW)
- .addReg(MSP430::SPW);
+ // Update FP with the new base value...
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FP)
+ .addReg(MSP430::SP);
// Mark the FramePtr as live-in in every block except the entry.
for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
I != E; ++I)
- I->addLiveIn(MSP430::FPW);
+ I->addLiveIn(MSP430::FP);
} else
NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize();
@@ -85,18 +86,18 @@ void MSP430FrameLowering::emitPrologue(MachineFunction &MF) const {
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
- if (NumBytes) { // adjust stack pointer: SPW -= numbytes
- // If there is an SUB16ri of SPW immediately before this instruction, merge
+ if (NumBytes) { // adjust stack pointer: SP -= numbytes
+ // If there is an SUB16ri of SP immediately before this instruction, merge
// the two.
//NumBytes -= mergeSPUpdates(MBB, MBBI, true);
- // If there is an ADD16ri or SUB16ri of SPW immediately after this
+ // If there is an ADD16ri or SUB16ri of SP immediately after this
// instruction, merge the two instructions.
// mergeSPUpdatesDown(MBB, MBBI, &NumBytes);
if (NumBytes) {
MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW)
- .addReg(MSP430::SPW).addImm(NumBytes);
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SP)
+ .addReg(MSP430::SP).addImm(NumBytes);
// The SRW implicit def is dead.
MI->getOperand(3).setIsDead();
}
@@ -108,7 +109,7 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
const MachineFrameInfo *MFI = MF.getFrameInfo();
MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
const MSP430InstrInfo &TII =
- *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const MSP430InstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
unsigned RetOpcode = MBBI->getOpcode();
@@ -131,8 +132,8 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
uint64_t FrameSize = StackSize - 2;
NumBytes = FrameSize - CSSize;
- // pop FPW.
- BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FPW);
+ // pop FP.
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FP);
} else
NumBytes = StackSize - CSSize;
@@ -147,28 +148,28 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
DL = MBBI->getDebugLoc();
- // If there is an ADD16ri or SUB16ri of SPW immediately before this
+ // If there is an ADD16ri or SUB16ri of SP immediately before this
// instruction, merge the two instructions.
//if (NumBytes || MFI->hasVarSizedObjects())
// mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
if (MFI->hasVarSizedObjects()) {
BuildMI(MBB, MBBI, DL,
- TII.get(MSP430::MOV16rr), MSP430::SPW).addReg(MSP430::FPW);
+ TII.get(MSP430::MOV16rr), MSP430::SP).addReg(MSP430::FP);
if (CSSize) {
MachineInstr *MI =
BuildMI(MBB, MBBI, DL,
- TII.get(MSP430::SUB16ri), MSP430::SPW)
- .addReg(MSP430::SPW).addImm(CSSize);
+ TII.get(MSP430::SUB16ri), MSP430::SP)
+ .addReg(MSP430::SP).addImm(CSSize);
// The SRW implicit def is dead.
MI->getOperand(3).setIsDead();
}
} else {
- // adjust stack pointer back: SPW += numbytes
+ // adjust stack pointer back: SP += numbytes
if (NumBytes) {
MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SPW)
- .addReg(MSP430::SPW).addImm(NumBytes);
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SP)
+ .addReg(MSP430::SP).addImm(NumBytes);
// The SRW implicit def is dead.
MI->getOperand(3).setIsDead();
}
@@ -188,7 +189,7 @@ MSP430FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
if (MI != MBB.end()) DL = MI->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MSP430MachineFunctionInfo *MFI = MF.getInfo<MSP430MachineFunctionInfo>();
MFI->setCalleeSavedFrameSize(CSI.size() * 2);
@@ -214,7 +215,7 @@ MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
if (MI != MBB.end()) DL = MI->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
for (unsigned i = 0, e = CSI.size(); i != e; ++i)
BuildMI(MBB, MI, DL, TII.get(MSP430::POP16r), CSI[i].getReg());
@@ -226,13 +227,13 @@ void MSP430FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const MSP430InstrInfo &TII =
- *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const MSP430InstrInfo *>(MF.getSubtarget().getInstrInfo());
unsigned StackAlign = getStackAlignment();
if (!hasReservedCallFrame(MF)) {
// If the stack pointer can be changed after prologue, turn the
- // adjcallstackup instruction into a 'sub SPW, <amt>' and the
- // adjcallstackdown instruction into 'add SPW, <amt>'
+ // adjcallstackup instruction into a 'sub SP, <amt>' and the
+ // adjcallstackdown instruction into 'add SP, <amt>'
// TODO: consider using push / pop instead of sub + store / add
MachineInstr *Old = I;
uint64_t Amount = Old->getOperand(0).getImm();
@@ -245,8 +246,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineInstr *New = nullptr;
if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) {
New = BuildMI(MF, Old->getDebugLoc(),
- TII.get(MSP430::SUB16ri), MSP430::SPW)
- .addReg(MSP430::SPW).addImm(Amount);
+ TII.get(MSP430::SUB16ri), MSP430::SP)
+ .addReg(MSP430::SP).addImm(Amount);
} else {
assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode());
// factor out the amount the callee already popped.
@@ -254,8 +255,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
Amount -= CalleeAmt;
if (Amount)
New = BuildMI(MF, Old->getDebugLoc(),
- TII.get(MSP430::ADD16ri), MSP430::SPW)
- .addReg(MSP430::SPW).addImm(Amount);
+ TII.get(MSP430::ADD16ri), MSP430::SP)
+ .addReg(MSP430::SP).addImm(Amount);
}
if (New) {
@@ -273,7 +274,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineInstr *Old = I;
MachineInstr *New =
BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri),
- MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt);
+ MSP430::SP).addReg(MSP430::SP).addImm(CalleeAmt);
// The SRW implicit def is dead.
New->getOperand(3).setIsDead();
@@ -287,11 +288,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
void
MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *) const {
- // Create a frame entry for the FPW register that must be saved.
+ // Create a frame entry for the FP register that must be saved.
if (hasFP(MF)) {
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
(void)FrameIdx;
assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
- "Slot for FPW register must be last in order to be found!");
+ "Slot for FP register must be last in order to be found!");
}
}
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index fadfeedd1853..1941af26e853 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MSP430_FRAMEINFO_H
-#define MSP430_FRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430FRAMELOWERING_H
+#define LLVM_LIB_TARGET_MSP430_MSP430FRAMELOWERING_H
#include "MSP430.h"
#include "llvm/Target/TargetFrameLowering.h"
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index a9b903503d3a..81c176b8262d 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -97,9 +97,9 @@ namespace {
public:
MSP430DAGToDAGISel(MSP430TargetMachine &TM, CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(TM, OptLevel),
- Lowering(*TM.getTargetLowering()),
- Subtarget(*TM.getSubtargetImpl()) { }
+ : SelectionDAGISel(TM, OptLevel),
+ Lowering(*TM.getSubtargetImpl()->getTargetLowering()),
+ Subtarget(*TM.getSubtargetImpl()) {}
const char *getPassName() const override {
return "MSP430 DAG->DAG Pattern Instruction Selection";
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 3d3ee92d4bcb..04bb6d041cf5 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -58,7 +58,7 @@ HWMultMode("msp430-hwmult-mode", cl::Hidden,
clEnumValEnd));
MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM)
- : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+ : TargetLowering(TM) {
// Set up the register classes.
addRegisterClass(MVT::i8, &MSP430::GR8RegClass);
@@ -72,7 +72,7 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM)
// Division is expensive
setIntDivIsCheap(false);
- setStackPointerRegisterToSaveRestore(MSP430::SPW);
+ setStackPointerRegisterToSaveRestore(MSP430::SP);
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
@@ -80,11 +80,13 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM)
setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
- setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand);
+ }
// We don't have any truncstores
setTruncStoreAction(MVT::i16, MVT::i8, Expand);
@@ -282,7 +284,7 @@ static void AnalyzeArguments(CCState &State,
SmallVectorImpl<CCValAssign> &ArgLocs,
const SmallVectorImpl<ArgT> &Args) {
static const MCPhysReg RegList[] = {
- MSP430::R15W, MSP430::R14W, MSP430::R13W, MSP430::R12W
+ MSP430::R15, MSP430::R14, MSP430::R13, MSP430::R12
};
static const unsigned NbRegs = array_lengthof(RegList);
@@ -437,8 +439,8 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
AnalyzeArguments(CCInfo, ArgLocs, Ins);
// Create frame index for the start of the first vararg value
@@ -533,8 +535,8 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
report_fatal_error("ISRs cannot return any value");
// CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
// Analize return values.
AnalyzeReturnValues(CCInfo, RVLocs, Outs);
@@ -583,8 +585,8 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
SmallVectorImpl<SDValue> &InVals) const {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
AnalyzeArguments(CCInfo, ArgLocs, Outs);
// Get a count of how many bytes are to be pushed on the stack.
@@ -627,7 +629,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
assert(VA.isMemLoc());
if (!StackPtr.getNode())
- StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SPW, getPointerTy());
+ StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SP, getPointerTy());
SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
StackPtr,
@@ -719,8 +721,8 @@ MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
AnalyzeReturnValues(CCInfo, RVLocs, Ins);
@@ -941,31 +943,31 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
Convert = false;
break;
case MSP430CC::COND_HS:
- // Res = SRW & 1, no processing is required
+ // Res = SR & 1, no processing is required
break;
case MSP430CC::COND_LO:
- // Res = ~(SRW & 1)
+ // Res = ~(SR & 1)
Invert = true;
break;
case MSP430CC::COND_NE:
if (andCC) {
- // C = ~Z, thus Res = SRW & 1, no processing is required
+ // C = ~Z, thus Res = SR & 1, no processing is required
} else {
- // Res = ~((SRW >> 1) & 1)
+ // Res = ~((SR >> 1) & 1)
Shift = true;
Invert = true;
}
break;
case MSP430CC::COND_E:
Shift = true;
- // C = ~Z for AND instruction, thus we can put Res = ~(SRW & 1), however,
- // Res = (SRW >> 1) & 1 is 1 word shorter.
+ // C = ~Z for AND instruction, thus we can put Res = ~(SR & 1), however,
+ // Res = (SR >> 1) & 1 is 1 word shorter.
break;
}
EVT VT = Op.getValueType();
SDValue One = DAG.getConstant(1, VT);
if (Convert) {
- SDValue SR = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::SRW,
+ SDValue SR = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::SR,
MVT::i16, Flag);
if (Shift)
// FIXME: somewhere this is turned into a SRL, lower it MSP specific?
@@ -1074,7 +1076,7 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
- MSP430::FPW, VT);
+ MSP430::FP, VT);
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
MachinePointerInfo(),
@@ -1199,7 +1201,8 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
MachineFunction *F = BB->getParent();
MachineRegisterInfo &RI = F->getRegInfo();
DebugLoc dl = MI->getDebugLoc();
- const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ const TargetInstrInfo &TII =
+ *getTargetMachine().getSubtargetImpl()->getInstrInfo();
unsigned Opc;
const TargetRegisterClass * RC;
@@ -1310,7 +1313,8 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
Opc == MSP430::Srl8 || Opc == MSP430::Srl16)
return EmitShiftInstr(MI, BB);
- const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ const TargetInstrInfo &TII =
+ *getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
assert((Opc == MSP430::Select16 || Opc == MSP430::Select8) &&
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 3e2f344aeb5a..073ddc91162e 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_MSP430_ISELLOWERING_H
-#define LLVM_TARGET_MSP430_ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430ISELLOWERING_H
+#define LLVM_LIB_TARGET_MSP430_MSP430ISELLOWERING_H
#include "MSP430.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -170,4 +170,4 @@ namespace llvm {
};
} // namespace llvm
-#endif // LLVM_TARGET_MSP430_ISELLOWERING_H
+#endif
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index ccb6c09e3f41..27681aae6068 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -307,7 +307,7 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return 0;
case TargetOpcode::INLINEASM: {
const MachineFunction *MF = MI->getParent()->getParent();
- const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
return TII.getInlineAsmLength(MI->getOperand(0).getSymbolName(),
*MF->getTarget().getMCAsmInfo());
}
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index e6baaefe4842..f9b25b639626 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_MSP430INSTRINFO_H
-#define LLVM_TARGET_MSP430INSTRINFO_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430INSTRINFO_H
+#define LLVM_LIB_TARGET_MSP430_MSP430INSTRINFO_H
#include "MSP430RegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index 50e3fdad1a9f..c0c29b992238 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -111,8 +111,8 @@ def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
// a stack adjustment and the codegen must know that they may modify the stack
// pointer before prolog-epilog rewriting occurs.
// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
-// sub / add which can clobber SRW.
-let Defs = [SPW, SRW], Uses = [SPW] in {
+// sub / add which can clobber SR.
+let Defs = [SP, SR], Uses = [SP] in {
def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i16imm:$amt),
"#ADJCALLSTACKDOWN",
[(MSP430callseq_start timm:$amt)]>;
@@ -130,7 +130,7 @@ let usesCustomInserter = 1 in {
"# Select16 PSEUDO",
[(set GR16:$dst,
(MSP430selectcc GR16:$src, GR16:$src2, imm:$cc))]>;
- let Defs = [SRW] in {
+ let Defs = [SR] in {
def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
"# Shl8 PSEUDO",
[(set GR8:$dst, (MSP430shl GR8:$src, GR8:$cnt))]>;
@@ -153,7 +153,7 @@ let usesCustomInserter = 1 in {
}
}
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def NOP : Pseudo<(outs), (ins), "nop", []>;
//===----------------------------------------------------------------------===//
@@ -192,7 +192,7 @@ let isBarrier = 1 in {
}
// Conditional branches
-let Uses = [SRW] in
+let Uses = [SR] in
def JCC : CJForm<0, 0,
(outs), (ins jmptarget:$dst, cc:$cc),
"j$cc\t$dst",
@@ -207,8 +207,8 @@ let isCall = 1 in
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead. Uses for argument
// registers are added manually.
- let Defs = [R12W, R13W, R14W, R15W, SRW],
- Uses = [SPW] in {
+ let Defs = [R12, R13, R14, R15, SR],
+ Uses = [SP] in {
def CALLi : II16i<0x0,
(outs), (ins i16imm:$dst),
"call\t$dst", [(MSP430call imm:$dst)]>;
@@ -224,7 +224,7 @@ let isCall = 1 in
//===----------------------------------------------------------------------===//
// Miscellaneous Instructions...
//
-let Defs = [SPW], Uses = [SPW], neverHasSideEffects=1 in {
+let Defs = [SP], Uses = [SP], hasSideEffects=0 in {
let mayLoad = 1 in
def POP16r : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR16:$reg), (ins), "pop.w\t$reg", []>;
@@ -238,7 +238,7 @@ def PUSH16r : II16r<0x0,
// Move Instructions
// FIXME: Provide proper encoding!
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def MOV8rr : I8rr<0x0,
(outs GR8:$dst), (ins GR8:$src),
"mov.b\t{$src, $dst}",
@@ -337,7 +337,7 @@ def MOV16mm : I16mm<0x0,
let Constraints = "$src = $dst" in {
-let Defs = [SRW] in {
+let Defs = [SR] in {
let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y
@@ -345,24 +345,24 @@ def ADD8rr : I8rr<0x0,
(outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"add.b\t{$src2, $dst}",
[(set GR8:$dst, (add GR8:$src, GR8:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADD16rr : I16rr<0x0,
(outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"add.w\t{$src2, $dst}",
[(set GR16:$dst, (add GR16:$src, GR16:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
def ADD8rm : I8rm<0x0,
(outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"add.b\t{$src2, $dst}",
[(set GR8:$dst, (add GR8:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADD16rm : I16rm<0x0,
(outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"add.w\t{$src2, $dst}",
[(set GR16:$dst, (add GR16:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1,
Constraints = "$base = $base_wb, $src = $dst" in {
@@ -381,160 +381,160 @@ def ADD8ri : I8ri<0x0,
(outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"add.b\t{$src2, $dst}",
[(set GR8:$dst, (add GR8:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADD16ri : I16ri<0x0,
(outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"add.w\t{$src2, $dst}",
[(set GR16:$dst, (add GR16:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
let Constraints = "" in {
def ADD8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"add.b\t{$src, $dst}",
[(store (add (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADD16mr : I16mr<0x0,
(outs), (ins memdst:$dst, GR16:$src),
"add.w\t{$src, $dst}",
[(store (add (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADD8mi : I8mi<0x0,
(outs), (ins memdst:$dst, i8imm:$src),
"add.b\t{$src, $dst}",
[(store (add (load addr:$dst), (i8 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADD16mi : I16mi<0x0,
(outs), (ins memdst:$dst, i16imm:$src),
"add.w\t{$src, $dst}",
[(store (add (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADD8mm : I8mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"add.b\t{$src, $dst}",
[(store (add (load addr:$dst),
(i8 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADD16mm : I16mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"add.w\t{$src, $dst}",
[(store (add (load addr:$dst),
(i16 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
-let Uses = [SRW] in {
+let Uses = [SR] in {
let isCommutable = 1 in { // X = ADDC Y, Z == X = ADDC Z, Y
def ADC8rr : I8rr<0x0,
(outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"addc.b\t{$src2, $dst}",
[(set GR8:$dst, (adde GR8:$src, GR8:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADC16rr : I16rr<0x0,
(outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"addc.w\t{$src2, $dst}",
[(set GR16:$dst, (adde GR16:$src, GR16:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
} // isCommutable
def ADC8ri : I8ri<0x0,
(outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"addc.b\t{$src2, $dst}",
[(set GR8:$dst, (adde GR8:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADC16ri : I16ri<0x0,
(outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"addc.w\t{$src2, $dst}",
[(set GR16:$dst, (adde GR16:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADC8rm : I8rm<0x0,
(outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"addc.b\t{$src2, $dst}",
[(set GR8:$dst, (adde GR8:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADC16rm : I16rm<0x0,
(outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"addc.w\t{$src2, $dst}",
[(set GR16:$dst, (adde GR16:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
let Constraints = "" in {
def ADC8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"addc.b\t{$src, $dst}",
[(store (adde (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADC16mr : I16mr<0x0,
(outs), (ins memdst:$dst, GR16:$src),
"addc.w\t{$src, $dst}",
[(store (adde (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADC8mi : I8mi<0x0,
(outs), (ins memdst:$dst, i8imm:$src),
"addc.b\t{$src, $dst}",
[(store (adde (load addr:$dst), (i8 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADC16mi : I16mi<0x0,
(outs), (ins memdst:$dst, i16imm:$src),
"addc.w\t{$src, $dst}",
[(store (adde (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADC8mm : I8mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"addc.b\t{$src, $dst}",
[(store (adde (load addr:$dst),
(i8 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADC16mm : I8mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"addc.w\t{$src, $dst}",
[(store (adde (load addr:$dst),
(i16 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
-} // Uses = [SRW]
+} // Uses = [SR]
let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y
def AND8rr : I8rr<0x0,
(outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"and.b\t{$src2, $dst}",
[(set GR8:$dst, (and GR8:$src, GR8:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def AND16rr : I16rr<0x0,
(outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"and.w\t{$src2, $dst}",
[(set GR16:$dst, (and GR16:$src, GR16:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
def AND8ri : I8ri<0x0,
(outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"and.b\t{$src2, $dst}",
[(set GR8:$dst, (and GR8:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def AND16ri : I16ri<0x0,
(outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"and.w\t{$src2, $dst}",
[(set GR16:$dst, (and GR16:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def AND8rm : I8rm<0x0,
(outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"and.b\t{$src2, $dst}",
[(set GR8:$dst, (and GR8:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
def AND16rm : I16rm<0x0,
(outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"and.w\t{$src2, $dst}",
[(set GR16:$dst, (and GR16:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1,
Constraints = "$base = $base_wb, $src = $dst" in {
@@ -553,36 +553,36 @@ def AND8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"and.b\t{$src, $dst}",
[(store (and (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def AND16mr : I16mr<0x0,
(outs), (ins memdst:$dst, GR16:$src),
"and.w\t{$src, $dst}",
[(store (and (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def AND8mi : I8mi<0x0,
(outs), (ins memdst:$dst, i8imm:$src),
"and.b\t{$src, $dst}",
[(store (and (load addr:$dst), (i8 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def AND16mi : I16mi<0x0,
(outs), (ins memdst:$dst, i16imm:$src),
"and.w\t{$src, $dst}",
[(store (and (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def AND8mm : I8mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"and.b\t{$src, $dst}",
[(store (and (load addr:$dst),
(i8 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def AND16mm : I16mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"and.w\t{$src, $dst}",
[(store (and (load addr:$dst),
(i16 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y
@@ -703,35 +703,35 @@ def XOR8rr : I8rr<0x0,
(outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"xor.b\t{$src2, $dst}",
[(set GR8:$dst, (xor GR8:$src, GR8:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def XOR16rr : I16rr<0x0,
(outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"xor.w\t{$src2, $dst}",
[(set GR16:$dst, (xor GR16:$src, GR16:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
def XOR8ri : I8ri<0x0,
(outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"xor.b\t{$src2, $dst}",
[(set GR8:$dst, (xor GR8:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def XOR16ri : I16ri<0x0,
(outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"xor.w\t{$src2, $dst}",
[(set GR16:$dst, (xor GR16:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def XOR8rm : I8rm<0x0,
(outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"xor.b\t{$src2, $dst}",
[(set GR8:$dst, (xor GR8:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
def XOR16rm : I16rm<0x0,
(outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"xor.w\t{$src2, $dst}",
[(set GR16:$dst, (xor GR16:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1,
Constraints = "$base = $base_wb, $src = $dst" in {
@@ -750,34 +750,34 @@ def XOR8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"xor.b\t{$src, $dst}",
[(store (xor (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def XOR16mr : I16mr<0x0,
(outs), (ins memdst:$dst, GR16:$src),
"xor.w\t{$src, $dst}",
[(store (xor (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def XOR8mi : I8mi<0x0,
(outs), (ins memdst:$dst, i8imm:$src),
"xor.b\t{$src, $dst}",
[(store (xor (load addr:$dst), (i8 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def XOR16mi : I16mi<0x0,
(outs), (ins memdst:$dst, i16imm:$src),
"xor.w\t{$src, $dst}",
[(store (xor (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def XOR8mm : I8mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"xor.b\t{$src, $dst}",
[(store (xor (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def XOR16mm : I16mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"xor.w\t{$src, $dst}",
[(store (xor (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
@@ -785,34 +785,34 @@ def SUB8rr : I8rr<0x0,
(outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"sub.b\t{$src2, $dst}",
[(set GR8:$dst, (sub GR8:$src, GR8:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB16rr : I16rr<0x0,
(outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"sub.w\t{$src2, $dst}",
[(set GR16:$dst, (sub GR16:$src, GR16:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB8ri : I8ri<0x0,
(outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"sub.b\t{$src2, $dst}",
[(set GR8:$dst, (sub GR8:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB16ri : I16ri<0x0,
(outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"sub.w\t{$src2, $dst}",
[(set GR16:$dst, (sub GR16:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB8rm : I8rm<0x0,
(outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"sub.b\t{$src2, $dst}",
[(set GR8:$dst, (sub GR8:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB16rm : I16rm<0x0,
(outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"sub.w\t{$src2, $dst}",
[(set GR16:$dst, (sub GR16:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1,
Constraints = "$base = $base_wb, $src = $dst" in {
@@ -831,153 +831,153 @@ def SUB8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"sub.b\t{$src, $dst}",
[(store (sub (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB16mr : I16mr<0x0,
(outs), (ins memdst:$dst, GR16:$src),
"sub.w\t{$src, $dst}",
[(store (sub (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB8mi : I8mi<0x0,
(outs), (ins memdst:$dst, i8imm:$src),
"sub.b\t{$src, $dst}",
[(store (sub (load addr:$dst), (i8 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB16mi : I16mi<0x0,
(outs), (ins memdst:$dst, i16imm:$src),
"sub.w\t{$src, $dst}",
[(store (sub (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB8mm : I8mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"sub.b\t{$src, $dst}",
[(store (sub (load addr:$dst),
(i8 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB16mm : I16mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"sub.w\t{$src, $dst}",
[(store (sub (load addr:$dst),
(i16 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
-let Uses = [SRW] in {
+let Uses = [SR] in {
def SBC8rr : I8rr<0x0,
(outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"subc.b\t{$src2, $dst}",
[(set GR8:$dst, (sube GR8:$src, GR8:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC16rr : I16rr<0x0,
(outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"subc.w\t{$src2, $dst}",
[(set GR16:$dst, (sube GR16:$src, GR16:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC8ri : I8ri<0x0,
(outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"subc.b\t{$src2, $dst}",
[(set GR8:$dst, (sube GR8:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC16ri : I16ri<0x0,
(outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"subc.w\t{$src2, $dst}",
[(set GR16:$dst, (sube GR16:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC8rm : I8rm<0x0,
(outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"subc.b\t{$src2, $dst}",
[(set GR8:$dst, (sube GR8:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC16rm : I16rm<0x0,
(outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"subc.w\t{$src2, $dst}",
[(set GR16:$dst, (sube GR16:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
let Constraints = "" in {
def SBC8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"subc.b\t{$src, $dst}",
[(store (sube (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC16mr : I16mr<0x0,
(outs), (ins memdst:$dst, GR16:$src),
"subc.w\t{$src, $dst}",
[(store (sube (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC8mi : I8mi<0x0,
(outs), (ins memdst:$dst, i8imm:$src),
"subc.b\t{$src, $dst}",
[(store (sube (load addr:$dst), (i8 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC16mi : I16mi<0x0,
(outs), (ins memdst:$dst, i16imm:$src),
"subc.w\t{$src, $dst}",
[(store (sube (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC8mm : I8mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"subc.b\t{$src, $dst}",
[(store (sube (load addr:$dst),
(i8 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC16mm : I16mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"subc.w\t{$src, $dst}",
[(store (sube (load addr:$dst),
(i16 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
-} // Uses = [SRW]
+} // Uses = [SR]
// FIXME: memory variant!
def SAR8r1 : II8r<0x0,
(outs GR8:$dst), (ins GR8:$src),
"rra.b\t$dst",
[(set GR8:$dst, (MSP430rra GR8:$src)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SAR16r1 : II16r<0x0,
(outs GR16:$dst), (ins GR16:$src),
"rra.w\t$dst",
[(set GR16:$dst, (MSP430rra GR16:$src)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SHL8r1 : I8rr<0x0,
(outs GR8:$dst), (ins GR8:$src),
"rla.b\t$dst",
[(set GR8:$dst, (MSP430rla GR8:$src)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SHL16r1 : I16rr<0x0,
(outs GR16:$dst), (ins GR16:$src),
"rla.w\t$dst",
[(set GR16:$dst, (MSP430rla GR16:$src)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SAR8r1c : Pseudo<(outs GR8:$dst), (ins GR8:$src),
"clrc\n\t"
"rrc.b\t$dst",
[(set GR8:$dst, (MSP430rrc GR8:$src)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SAR16r1c : Pseudo<(outs GR16:$dst), (ins GR16:$src),
"clrc\n\t"
"rrc.w\t$dst",
[(set GR16:$dst, (MSP430rrc GR16:$src)),
- (implicit SRW)]>;
+ (implicit SR)]>;
// FIXME: Memory sext's ?
def SEXT16r : II16r<0x0,
(outs GR16:$dst), (ins GR16:$src),
"sxt\t$dst",
[(set GR16:$dst, (sext_inreg GR16:$src, i8)),
- (implicit SRW)]>;
+ (implicit SR)]>;
-} // Defs = [SRW]
+} // Defs = [SR]
def ZEXT16r : I8rr<0x0,
(outs GR16:$dst), (ins GR16:$src),
@@ -993,57 +993,57 @@ def SWPB16r : II16r<0x0,
} // Constraints = "$src = $dst"
// Integer comparisons
-let Defs = [SRW] in {
+let Defs = [SR] in {
def CMP8rr : I8rr<0x0,
(outs), (ins GR8:$src, GR8:$src2),
"cmp.b\t{$src2, $src}",
- [(MSP430cmp GR8:$src, GR8:$src2), (implicit SRW)]>;
+ [(MSP430cmp GR8:$src, GR8:$src2), (implicit SR)]>;
def CMP16rr : I16rr<0x0,
(outs), (ins GR16:$src, GR16:$src2),
"cmp.w\t{$src2, $src}",
- [(MSP430cmp GR16:$src, GR16:$src2), (implicit SRW)]>;
+ [(MSP430cmp GR16:$src, GR16:$src2), (implicit SR)]>;
def CMP8ri : I8ri<0x0,
(outs), (ins GR8:$src, i8imm:$src2),
"cmp.b\t{$src2, $src}",
- [(MSP430cmp GR8:$src, imm:$src2), (implicit SRW)]>;
+ [(MSP430cmp GR8:$src, imm:$src2), (implicit SR)]>;
def CMP16ri : I16ri<0x0,
(outs), (ins GR16:$src, i16imm:$src2),
"cmp.w\t{$src2, $src}",
- [(MSP430cmp GR16:$src, imm:$src2), (implicit SRW)]>;
+ [(MSP430cmp GR16:$src, imm:$src2), (implicit SR)]>;
def CMP8mi : I8mi<0x0,
(outs), (ins memsrc:$src, i8imm:$src2),
"cmp.b\t{$src2, $src}",
[(MSP430cmp (load addr:$src),
- (i8 imm:$src2)), (implicit SRW)]>;
+ (i8 imm:$src2)), (implicit SR)]>;
def CMP16mi : I16mi<0x0,
(outs), (ins memsrc:$src, i16imm:$src2),
"cmp.w\t{$src2, $src}",
[(MSP430cmp (load addr:$src),
- (i16 imm:$src2)), (implicit SRW)]>;
+ (i16 imm:$src2)), (implicit SR)]>;
def CMP8rm : I8rm<0x0,
(outs), (ins GR8:$src, memsrc:$src2),
"cmp.b\t{$src2, $src}",
[(MSP430cmp GR8:$src, (load addr:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def CMP16rm : I16rm<0x0,
(outs), (ins GR16:$src, memsrc:$src2),
"cmp.w\t{$src2, $src}",
[(MSP430cmp GR16:$src, (load addr:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def CMP8mr : I8mr<0x0,
(outs), (ins memsrc:$src, GR8:$src2),
"cmp.b\t{$src2, $src}",
[(MSP430cmp (load addr:$src), GR8:$src2),
- (implicit SRW)]>;
+ (implicit SR)]>;
def CMP16mr : I16mr<0x0,
(outs), (ins memsrc:$src, GR16:$src2),
"cmp.w\t{$src2, $src}",
[(MSP430cmp (load addr:$src), GR16:$src2),
- (implicit SRW)]>;
+ (implicit SR)]>;
// BIT TESTS, just sets condition codes
@@ -1053,56 +1053,56 @@ def BIT8rr : I8rr<0x0,
(outs), (ins GR8:$src, GR8:$src2),
"bit.b\t{$src2, $src}",
[(MSP430cmp (and_su GR8:$src, GR8:$src2), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT16rr : I16rr<0x0,
(outs), (ins GR16:$src, GR16:$src2),
"bit.w\t{$src2, $src}",
[(MSP430cmp (and_su GR16:$src, GR16:$src2), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
def BIT8ri : I8ri<0x0,
(outs), (ins GR8:$src, i8imm:$src2),
"bit.b\t{$src2, $src}",
[(MSP430cmp (and_su GR8:$src, imm:$src2), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT16ri : I16ri<0x0,
(outs), (ins GR16:$src, i16imm:$src2),
"bit.w\t{$src2, $src}",
[(MSP430cmp (and_su GR16:$src, imm:$src2), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT8rm : I8rm<0x0,
(outs), (ins GR8:$src, memdst:$src2),
"bit.b\t{$src2, $src}",
[(MSP430cmp (and_su GR8:$src, (load addr:$src2)), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT16rm : I16rm<0x0,
(outs), (ins GR16:$src, memdst:$src2),
"bit.w\t{$src2, $src}",
[(MSP430cmp (and_su GR16:$src, (load addr:$src2)), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT8mr : I8mr<0x0,
(outs), (ins memsrc:$src, GR8:$src2),
"bit.b\t{$src2, $src}",
[(MSP430cmp (and_su (load addr:$src), GR8:$src2), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT16mr : I16mr<0x0,
(outs), (ins memsrc:$src, GR16:$src2),
"bit.w\t{$src2, $src}",
[(MSP430cmp (and_su (load addr:$src), GR16:$src2), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT8mi : I8mi<0x0,
(outs), (ins memsrc:$src, i8imm:$src2),
"bit.b\t{$src2, $src}",
[(MSP430cmp (and_su (load addr:$src), (i8 imm:$src2)), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT16mi : I16mi<0x0,
(outs), (ins memsrc:$src, i16imm:$src2),
"bit.w\t{$src2, $src}",
[(MSP430cmp (and_su (load addr:$src), (i16 imm:$src2)), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT8mm : I8mm<0x0,
(outs), (ins memsrc:$src, memsrc:$src2),
@@ -1110,15 +1110,15 @@ def BIT8mm : I8mm<0x0,
[(MSP430cmp (and_su (i8 (load addr:$src)),
(load addr:$src2)),
0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT16mm : I16mm<0x0,
(outs), (ins memsrc:$src, memsrc:$src2),
"bit.w\t{$src2, $src}",
[(MSP430cmp (and_su (i16 (load addr:$src)),
(load addr:$src2)),
0),
- (implicit SRW)]>;
-} // Defs = [SRW]
+ (implicit SR)]>;
+} // Defs = [SR]
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp
index 05352a2270d4..77b91b7bac2a 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -26,6 +26,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
MCSymbol *MSP430MCInstLower::
@@ -50,7 +51,7 @@ GetExternalSymbolSymbol(const MachineOperand &MO) const {
MCSymbol *MSP430MCInstLower::
GetJumpTableSymbol(const MachineOperand &MO) const {
- const DataLayout *DL = Printer.TM.getDataLayout();
+ const DataLayout *DL = Printer.TM.getSubtargetImpl()->getDataLayout();
SmallString<256> Name;
raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI"
<< Printer.getFunctionNumber() << '_'
@@ -67,7 +68,7 @@ GetJumpTableSymbol(const MachineOperand &MO) const {
MCSymbol *MSP430MCInstLower::
GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
- const DataLayout *DL = Printer.TM.getDataLayout();
+ const DataLayout *DL = Printer.TM.getSubtargetImpl()->getDataLayout();
SmallString<256> Name;
raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "CPI"
<< Printer.getFunctionNumber() << '_'
diff --git a/lib/Target/MSP430/MSP430MCInstLower.h b/lib/Target/MSP430/MSP430MCInstLower.h
index 794aa56bf04c..ebd639744bcc 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.h
+++ b/lib/Target/MSP430/MSP430MCInstLower.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MSP430_MCINSTLOWER_H
-#define MSP430_MCINSTLOWER_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430MCINSTLOWER_H
+#define LLVM_LIB_TARGET_MSP430_MSP430MCINSTLOWER_H
#include "llvm/Support/Compiler.h"
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index d1697f478cc2..fcc5f5b88600 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MSP430MACHINEFUNCTIONINFO_H
-#define MSP430MACHINEFUNCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430MACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_MSP430_MSP430MACHINEFUNCTIONINFO_H
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 691bceef0de0..614467bcd248 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -33,32 +33,32 @@ using namespace llvm;
// FIXME: Provide proper call frame setup / destroy opcodes.
MSP430RegisterInfo::MSP430RegisterInfo()
- : MSP430GenRegisterInfo(MSP430::PCW) {}
+ : MSP430GenRegisterInfo(MSP430::PC) {}
const MCPhysReg*
MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- const TargetFrameLowering *TFI = MF->getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
const Function* F = MF->getFunction();
static const MCPhysReg CalleeSavedRegs[] = {
- MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
- MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ MSP430::FP, MSP430::R5, MSP430::R6, MSP430::R7,
+ MSP430::R8, MSP430::R9, MSP430::R10, MSP430::R11,
0
};
static const MCPhysReg CalleeSavedRegsFP[] = {
- MSP430::R5W, MSP430::R6W, MSP430::R7W,
- MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ MSP430::R5, MSP430::R6, MSP430::R7,
+ MSP430::R8, MSP430::R9, MSP430::R10, MSP430::R11,
0
};
static const MCPhysReg CalleeSavedRegsIntr[] = {
- MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
- MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
- MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
+ MSP430::FP, MSP430::R5, MSP430::R6, MSP430::R7,
+ MSP430::R8, MSP430::R9, MSP430::R10, MSP430::R11,
+ MSP430::R12, MSP430::R13, MSP430::R14, MSP430::R15,
0
};
static const MCPhysReg CalleeSavedRegsIntrFP[] = {
- MSP430::R5W, MSP430::R6W, MSP430::R7W,
- MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
- MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
+ MSP430::R5, MSP430::R6, MSP430::R7,
+ MSP430::R8, MSP430::R9, MSP430::R10, MSP430::R11,
+ MSP430::R12, MSP430::R13, MSP430::R14, MSP430::R15,
0
};
@@ -73,22 +73,22 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
// Mark 4 special registers with subregisters as reserved.
Reserved.set(MSP430::PCB);
Reserved.set(MSP430::SPB);
Reserved.set(MSP430::SRB);
Reserved.set(MSP430::CGB);
- Reserved.set(MSP430::PCW);
- Reserved.set(MSP430::SPW);
- Reserved.set(MSP430::SRW);
- Reserved.set(MSP430::CGW);
+ Reserved.set(MSP430::PC);
+ Reserved.set(MSP430::SP);
+ Reserved.set(MSP430::SR);
+ Reserved.set(MSP430::CG);
// Mark frame pointer as reserved if needed.
if (TFI->hasFP(MF)) {
Reserved.set(MSP430::FPB);
- Reserved.set(MSP430::FPW);
+ Reserved.set(MSP430::FP);
}
return Reserved;
@@ -109,11 +109,11 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
DebugLoc dl = MI.getDebugLoc();
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
- unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW);
+ unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::FP : MSP430::SP);
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
// Skip the saved PC
@@ -122,7 +122,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (!TFI->hasFP(MF))
Offset += MF.getFrameInfo()->getStackSize();
else
- Offset += 2; // Skip the saved FPW
+ Offset += 2; // Skip the saved FP
// Fold imm into offset
Offset += MI.getOperand(FIOperandNum + 1).getImm();
@@ -131,7 +131,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// This is actually "load effective address" of the stack slot
// instruction. We have only two-address instructions, thus we need to
// expand it into mov + add
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MI.setDesc(TII.get(MSP430::MOV16rr));
MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
@@ -156,7 +156,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
- return TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW;
+ return TFI->hasFP(MF) ? MSP430::FP : MSP430::SP;
}
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index cb0196134a8d..3f88a6967170 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_MSP430REGISTERINFO_H
-#define LLVM_TARGET_MSP430REGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430REGISTERINFO_H
+#define LLVM_LIB_TARGET_MSP430_MSP430REGISTERINFO_H
#include "llvm/Target/TargetRegisterInfo.h"
@@ -44,4 +44,4 @@ public:
} // end namespace llvm
-#endif // LLVM_TARGET_MSP430REGISTERINFO_H
+#endif
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td
index 4010781a8608..b5a6ed0f0a56 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.td
+++ b/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -46,22 +46,22 @@ def R15B : MSP430Reg<15, "r15">;
def subreg_8bit : SubRegIndex<8> { let Namespace = "MSP430"; }
let SubRegIndices = [subreg_8bit] in {
-def PCW : MSP430RegWithSubregs<0, "r0", [PCB]>;
-def SPW : MSP430RegWithSubregs<1, "r1", [SPB]>;
-def SRW : MSP430RegWithSubregs<2, "r2", [SRB]>;
-def CGW : MSP430RegWithSubregs<3, "r3", [CGB]>;
-def FPW : MSP430RegWithSubregs<4, "r4", [FPB]>;
-def R5W : MSP430RegWithSubregs<5, "r5", [R5B]>;
-def R6W : MSP430RegWithSubregs<6, "r6", [R6B]>;
-def R7W : MSP430RegWithSubregs<7, "r7", [R7B]>;
-def R8W : MSP430RegWithSubregs<8, "r8", [R8B]>;
-def R9W : MSP430RegWithSubregs<9, "r9", [R9B]>;
-def R10W : MSP430RegWithSubregs<10, "r10", [R10B]>;
-def R11W : MSP430RegWithSubregs<11, "r11", [R11B]>;
-def R12W : MSP430RegWithSubregs<12, "r12", [R12B]>;
-def R13W : MSP430RegWithSubregs<13, "r13", [R13B]>;
-def R14W : MSP430RegWithSubregs<14, "r14", [R14B]>;
-def R15W : MSP430RegWithSubregs<15, "r15", [R15B]>;
+def PC : MSP430RegWithSubregs<0, "r0", [PCB]>;
+def SP : MSP430RegWithSubregs<1, "r1", [SPB]>;
+def SR : MSP430RegWithSubregs<2, "r2", [SRB]>;
+def CG : MSP430RegWithSubregs<3, "r3", [CGB]>;
+def FP : MSP430RegWithSubregs<4, "r4", [FPB]>;
+def R5 : MSP430RegWithSubregs<5, "r5", [R5B]>;
+def R6 : MSP430RegWithSubregs<6, "r6", [R6B]>;
+def R7 : MSP430RegWithSubregs<7, "r7", [R7B]>;
+def R8 : MSP430RegWithSubregs<8, "r8", [R8B]>;
+def R9 : MSP430RegWithSubregs<9, "r9", [R9B]>;
+def R10 : MSP430RegWithSubregs<10, "r10", [R10B]>;
+def R11 : MSP430RegWithSubregs<11, "r11", [R11B]>;
+def R12 : MSP430RegWithSubregs<12, "r12", [R12B]>;
+def R13 : MSP430RegWithSubregs<13, "r13", [R13B]>;
+def R14 : MSP430RegWithSubregs<14, "r14", [R14B]>;
+def R15 : MSP430RegWithSubregs<15, "r15", [R15B]>;
}
def GR8 : RegisterClass<"MSP430", [i8], 8,
@@ -74,8 +74,8 @@ def GR8 : RegisterClass<"MSP430", [i8], 8,
def GR16 : RegisterClass<"MSP430", [i16], 16,
// Volatile registers
- (add R12W, R13W, R14W, R15W, R11W, R10W, R9W, R8W, R7W, R6W, R5W,
+ (add R12, R13, R14, R15, R11, R10, R9, R8, R7, R6, R5,
// Frame pointer, sometimes allocable
- FPW,
+ FP,
// Volatile, but not allocable
- PCW, SPW, SRW, CGW)>;
+ PC, SP, SR, CG)>;
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
index cb04adc0de1e..61a6b19111db 100644
--- a/lib/Target/MSP430/MSP430SelectionDAGInfo.h
+++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MSP430SELECTIONDAGINFO_H
-#define MSP430SELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430SELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_MSP430_MSP430SELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
index dbddc5243db0..cb83b92d4364 100644
--- a/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -34,6 +34,6 @@ MSP430Subtarget::MSP430Subtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
: MSP430GenSubtargetInfo(TT, CPU, FS),
// FIXME: Check DataLayout string.
- DL("e-m:e-p:16:16-i32:16:32-n8:16"), FrameLowering(),
+ DL("e-m:e-p:16:16-i32:16:32-a:16-n8:16"), FrameLowering(),
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM),
TSInfo(DL) {}
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
index 0152ad19dfcd..58eb07bc2c92 100644
--- a/lib/Target/MSP430/MSP430Subtarget.h
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -11,12 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_MSP430_SUBTARGET_H
-#define LLVM_TARGET_MSP430_SUBTARGET_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430SUBTARGET_H
+#define LLVM_LIB_TARGET_MSP430_MSP430SUBTARGET_H
#include "MSP430FrameLowering.h"
-#include "MSP430InstrInfo.h"
#include "MSP430ISelLowering.h"
+#include "MSP430InstrInfo.h"
#include "MSP430RegisterInfo.h"
#include "MSP430SelectionDAGInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -51,14 +51,20 @@ public:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; }
- const MSP430InstrInfo *getInstrInfo() const { return &InstrInfo; }
- const DataLayout *getDataLayout() const { return &DL; }
- const TargetRegisterInfo *getRegisterInfo() const {
+ const TargetFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const MSP430InstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const TargetRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
- const MSP430TargetLowering *getTargetLowering() const { return &TLInfo; }
- const MSP430SelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ const MSP430TargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const MSP430SelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
};
} // End llvm namespace
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 5ca36f2e4e77..66e75037c921 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -14,6 +14,7 @@
#include "MSP430TargetMachine.h"
#include "MSP430.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/PassManager.h"
#include "llvm/Support/TargetRegistry.h"
@@ -30,10 +31,13 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ TLOF(make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
+MSP430TargetMachine::~MSP430TargetMachine() {}
+
namespace {
/// MSP430 Code Generator Pass Configuration Options.
class MSP430PassConfig : public TargetPassConfig {
@@ -46,7 +50,7 @@ public:
}
bool addInstSelector() override;
- bool addPreEmitPass() override;
+ void addPreEmitPass() override;
};
} // namespace
@@ -60,8 +64,7 @@ bool MSP430PassConfig::addInstSelector() {
return false;
}
-bool MSP430PassConfig::addPreEmitPass() {
+void MSP430PassConfig::addPreEmitPass() {
// Must run branch selection immediately preceding the asm printer.
- addPass(createMSP430BranchSelectionPass());
- return false;
+ addPass(createMSP430BranchSelectionPass(), false);
}
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index efa84039ff6b..0e54ed631beb 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_MSP430_TARGETMACHINE_H
-#define LLVM_TARGET_MSP430_TARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430TARGETMACHINE_H
+#define LLVM_LIB_TARGET_MSP430_MSP430TARGETMACHINE_H
#include "MSP430Subtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -24,6 +24,7 @@ namespace llvm {
/// MSP430TargetMachine
///
class MSP430TargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
MSP430Subtarget Subtarget;
public:
@@ -31,31 +32,18 @@ public:
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
+ ~MSP430TargetMachine() override;
- const TargetFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
- const MSP430InstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
const MSP430Subtarget *getSubtargetImpl() const override {
return &Subtarget;
}
- const TargetRegisterInfo *getRegisterInfo() const override {
- return getSubtargetImpl()->getRegisterInfo();
- }
- const MSP430TargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const MSP430SelectionDAGInfo *getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
}; // MSP430TargetMachine.
} // end namespace llvm
-#endif // LLVM_TARGET_MSP430_TARGETMACHINE_H
+#endif
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 2d65b08846c7..7db5b34204c3 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -12,6 +12,7 @@
#include "MipsRegisterInfo.h"
#include "MipsTargetStreamer.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -25,7 +26,9 @@
#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
+#include <memory>
using namespace llvm;
@@ -38,36 +41,69 @@ class MCInstrInfo;
namespace {
class MipsAssemblerOptions {
public:
- MipsAssemblerOptions() : aTReg(1), reorder(true), macro(true) {}
+ MipsAssemblerOptions(uint64_t Features_) :
+ ATReg(1), Reorder(true), Macro(true), Features(Features_) {}
- unsigned getATRegNum() { return aTReg; }
- bool setATReg(unsigned Reg);
+ MipsAssemblerOptions(const MipsAssemblerOptions *Opts) {
+ ATReg = Opts->getATRegNum();
+ Reorder = Opts->isReorder();
+ Macro = Opts->isMacro();
+ Features = Opts->getFeatures();
+ }
- bool isReorder() { return reorder; }
- void setReorder() { reorder = true; }
- void setNoreorder() { reorder = false; }
+ unsigned getATRegNum() const { return ATReg; }
+ bool setATReg(unsigned Reg);
- bool isMacro() { return macro; }
- void setMacro() { macro = true; }
- void setNomacro() { macro = false; }
+ bool isReorder() const { return Reorder; }
+ void setReorder() { Reorder = true; }
+ void setNoReorder() { Reorder = false; }
+
+ bool isMacro() const { return Macro; }
+ void setMacro() { Macro = true; }
+ void setNoMacro() { Macro = false; }
+
+ uint64_t getFeatures() const { return Features; }
+ void setFeatures(uint64_t Features_) { Features = Features_; }
+
+ // Set of features that are either architecture features or referenced
+ // by them (e.g.: FeatureNaN2008 implied by FeatureMips32r6).
+ // The full table can be found in MipsGenSubtargetInfo.inc (MipsFeatureKV[]).
+ // The reason we need this mask is explained in the selectArch function.
+ // FIXME: Ideally we would like TableGen to generate this information.
+ static const uint64_t AllArchRelatedMask =
+ Mips::FeatureMips1 | Mips::FeatureMips2 | Mips::FeatureMips3 |
+ Mips::FeatureMips3_32 | Mips::FeatureMips3_32r2 | Mips::FeatureMips4 |
+ Mips::FeatureMips4_32 | Mips::FeatureMips4_32r2 | Mips::FeatureMips5 |
+ Mips::FeatureMips5_32r2 | Mips::FeatureMips32 | Mips::FeatureMips32r2 |
+ Mips::FeatureMips32r6 | Mips::FeatureMips64 | Mips::FeatureMips64r2 |
+ Mips::FeatureMips64r6 | Mips::FeatureCnMips | Mips::FeatureFP64Bit |
+ Mips::FeatureGP64Bit | Mips::FeatureNaN2008;
private:
- unsigned aTReg;
- bool reorder;
- bool macro;
+ unsigned ATReg;
+ bool Reorder;
+ bool Macro;
+ uint64_t Features;
};
}
namespace {
class MipsAsmParser : public MCTargetAsmParser {
MipsTargetStreamer &getTargetStreamer() {
- MCTargetStreamer &TS = *Parser.getStreamer().getTargetStreamer();
+ MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<MipsTargetStreamer &>(TS);
}
MCSubtargetInfo &STI;
- MCAsmParser &Parser;
- MipsAssemblerOptions Options;
+ SmallVector<std::unique_ptr<MipsAssemblerOptions>, 2> AssemblerOptions;
+ MCSymbol *CurrentFn; // Pointer to the function being parsed. It may be a
+ // nullptr, which indicates that no function is currently
+ // selected. This usually happens after an '.end func'
+ // directive.
+
+ // Print a warning along with its fix-it message at the given range.
+ void printWarningWithFixIt(const Twine &Msg, const Twine &FixMsg,
+ SMRange Range, bool ShowColors = true);
#define GET_ASSEMBLER_HEADER
#include "MipsGenAsmMatcher.inc"
@@ -76,15 +112,15 @@ class MipsAsmParser : public MCTargetAsmParser {
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
/// Parse a register as used in CFI directives
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
- bool ParseParenSuffix(StringRef Name, OperandVector &Operands);
+ bool parseParenSuffix(StringRef Name, OperandVector &Operands);
- bool ParseBracketSuffix(StringRef Name, OperandVector &Operands);
+ bool parseBracketSuffix(StringRef Name, OperandVector &Operands);
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@@ -94,25 +130,31 @@ class MipsAsmParser : public MCTargetAsmParser {
MipsAsmParser::OperandMatchResultTy parseMemOperand(OperandVector &Operands);
MipsAsmParser::OperandMatchResultTy
- MatchAnyRegisterNameWithoutDollar(OperandVector &Operands,
+ matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
StringRef Identifier, SMLoc S);
MipsAsmParser::OperandMatchResultTy
- MatchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S);
+ matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S);
- MipsAsmParser::OperandMatchResultTy ParseAnyRegister(OperandVector &Operands);
+ MipsAsmParser::OperandMatchResultTy parseAnyRegister(OperandVector &Operands);
- MipsAsmParser::OperandMatchResultTy ParseImm(OperandVector &Operands);
+ MipsAsmParser::OperandMatchResultTy parseImm(OperandVector &Operands);
- MipsAsmParser::OperandMatchResultTy ParseJumpTarget(OperandVector &Operands);
+ MipsAsmParser::OperandMatchResultTy parseJumpTarget(OperandVector &Operands);
MipsAsmParser::OperandMatchResultTy parseInvNum(OperandVector &Operands);
- MipsAsmParser::OperandMatchResultTy ParseLSAImm(OperandVector &Operands);
+ MipsAsmParser::OperandMatchResultTy parseLSAImm(OperandVector &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseRegisterPair (OperandVector &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseRegisterList (OperandVector &Operands);
bool searchSymbolAlias(OperandVector &Operands);
- bool ParseOperand(OperandVector &, StringRef Mnemonic);
+ bool parseOperand(OperandVector &, StringRef Mnemonic);
bool needsExpansion(MCInst &Inst);
@@ -130,6 +172,9 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
+ void expandLoadAddressSym(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+
void expandMemInst(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions, bool isLoad,
bool isImmOpnd);
@@ -142,8 +187,10 @@ class MipsAsmParser : public MCTargetAsmParser {
const MCExpr *evaluateRelocExpr(const MCExpr *Expr, StringRef RelocStr);
bool isEvaluated(const MCExpr *Expr);
+ bool parseSetMips0Directive();
+ bool parseSetArchDirective();
bool parseSetFeature(uint64_t Feature);
- bool parseDirectiveCPLoad(SMLoc Loc);
+ bool parseDirectiveCpLoad(SMLoc Loc);
bool parseDirectiveCPSetup();
bool parseDirectiveNaN();
bool parseDirectiveSet();
@@ -153,10 +200,16 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseSetNoAtDirective();
bool parseSetMacroDirective();
bool parseSetNoMacroDirective();
+ bool parseSetMsaDirective();
+ bool parseSetNoMsaDirective();
+ bool parseSetNoDspDirective();
bool parseSetReorderDirective();
bool parseSetNoReorderDirective();
+ bool parseSetMips16Directive();
bool parseSetNoMips16Directive();
bool parseSetFpDirective();
+ bool parseSetPopDirective();
+ bool parseSetPushDirective();
bool parseSetAssignment();
@@ -174,6 +227,8 @@ class MipsAsmParser : public MCTargetAsmParser {
int matchCPURegisterName(StringRef Symbol);
+ int matchHWRegsRegisterName(StringRef Symbol);
+
int matchRegisterByNumber(unsigned RegNum, unsigned RegClass);
int matchFPURegisterName(StringRef Name);
@@ -200,11 +255,43 @@ class MipsAsmParser : public MCTargetAsmParser {
// Example: INSERT.B $w0[n], $1 => 16 > n >= 0
bool validateMSAIndex(int Val, int RegKind);
+ // Selects a new architecture by updating the FeatureBits with the necessary
+ // info including implied dependencies.
+ // Internally, it clears all the feature bits related to *any* architecture
+ // and selects the new one using the ToggleFeature functionality of the
+ // MCSubtargetInfo object that handles implied dependencies. The reason we
+ // clear all the arch related bits manually is because ToggleFeature only
+ // clears the features that imply the feature being cleared and not the
+ // features implied by the feature being cleared. This is easier to see
+ // with an example:
+ // --------------------------------------------------
+ // | Feature | Implies |
+ // | -------------------------------------------------|
+ // | FeatureMips1 | None |
+ // | FeatureMips2 | FeatureMips1 |
+ // | FeatureMips3 | FeatureMips2 | FeatureMipsGP64 |
+ // | FeatureMips4 | FeatureMips3 |
+ // | ... | |
+ // --------------------------------------------------
+ //
+ // Setting Mips3 is equivalent to set: (FeatureMips3 | FeatureMips2 |
+ // FeatureMipsGP64 | FeatureMips1)
+ // Clearing Mips3 is equivalent to clear (FeatureMips3 | FeatureMips4).
+ void selectArch(StringRef ArchFeature) {
+ uint64_t FeatureBits = STI.getFeatureBits();
+ FeatureBits &= ~MipsAssemblerOptions::AllArchRelatedMask;
+ STI.setFeatureBits(FeatureBits);
+ setAvailableFeatures(
+ ComputeAvailableFeatures(STI.ToggleFeature(ArchFeature)));
+ AssemblerOptions.back()->setFeatures(getAvailableFeatures());
+ }
+
void setFeatureBits(uint64_t Feature, StringRef FeatureString) {
if (!(STI.getFeatureBits() & Feature)) {
setAvailableFeatures(
ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
}
+ AssemblerOptions.back()->setFeatures(getAvailableFeatures());
}
void clearFeatureBits(uint64_t Feature, StringRef FeatureString) {
@@ -212,6 +299,7 @@ class MipsAsmParser : public MCTargetAsmParser {
setAvailableFeatures(
ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
}
+ AssemblerOptions.back()->setFeatures(getAvailableFeatures());
}
public:
@@ -225,9 +313,19 @@ public:
MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(sti), Parser(parser) {
+ : MCTargetAsmParser(), STI(sti) {
+ MCAsmParserExtension::Initialize(parser);
+
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+
+ // Remember the initial assembler options. The user can not modify these.
+ AssemblerOptions.push_back(
+ make_unique<MipsAssemblerOptions>(getAvailableFeatures()));
+
+ // Create an assembler options environment for the user to modify.
+ AssemblerOptions.push_back(
+ make_unique<MipsAssemblerOptions>(getAvailableFeatures()));
getTargetStreamer().updateABIInfo(*this);
@@ -239,10 +337,9 @@ public:
if (!isABI_O32() && !useOddSPReg() != 0)
report_fatal_error("-mno-odd-spreg requires the O32 ABI");
- }
- MCAsmParser &getParser() const { return Parser; }
- MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+ CurrentFn = nullptr;
+ }
/// True if all of $fcc0 - $fcc7 exist for the current ISA.
bool hasEightFccRegisters() const { return hasMips4() || hasMips32(); }
@@ -295,7 +392,7 @@ public:
bool abiUsesSoftFloat() const { return false; }
/// Warn if RegNo is the current assembler temporary.
- void WarnIfAssemblerTemporary(int RegNo, SMLoc Loc);
+ void warnIfAssemblerTemporary(int RegNo, SMLoc Loc);
};
}
@@ -333,7 +430,9 @@ private:
k_Memory, /// Base + Offset Memory Address
k_PhysRegister, /// A physical register from the Mips namespace
k_RegisterIndex, /// A register index in one or more RegKind.
- k_Token /// A simple token
+ k_Token, /// A simple token
+ k_RegList, /// A physical register list
+ k_RegPair /// A pair of physical register
} Kind;
public:
@@ -368,12 +467,17 @@ private:
const MCExpr *Off;
};
+ struct RegListOp {
+ SmallVector<unsigned, 10> *List;
+ };
+
union {
struct Token Tok;
struct PhysRegOp PhysReg;
struct RegIdxOp RegIdx;
struct ImmOp Imm;
struct MemOp Mem;
+ struct RegListOp RegList;
};
SMLoc StartLoc, EndLoc;
@@ -397,7 +501,15 @@ public:
/// target.
unsigned getGPR32Reg() const {
assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!");
- AsmParser.WarnIfAssemblerTemporary(RegIdx.Index, StartLoc);
+ AsmParser.warnIfAssemblerTemporary(RegIdx.Index, StartLoc);
+ unsigned ClassID = Mips::GPR32RegClassID;
+ return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index);
+ }
+
+ /// Coerce the register to GPR32 and return the real register for the current
+ /// target.
+ unsigned getGPRMM16Reg() const {
+ assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!");
unsigned ClassID = Mips::GPR32RegClassID;
return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index);
}
@@ -550,6 +662,16 @@ public:
Inst.addOperand(MCOperand::CreateReg(getGPR32Reg()));
}
+ void addGPRMM16AsmRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getGPRMM16Reg()));
+ }
+
+ void addGPRMM16AsmRegZeroOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getGPRMM16Reg()));
+ }
+
/// Render the operand to an MCInst as a GPR64
/// Asserts if the wrong number of operands are requested, or the operand
/// is not a k_RegisterIndex compatible with RegKind_GPR
@@ -647,6 +769,29 @@ public:
addExpr(Inst, Expr);
}
+ void addMicroMipsMemOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::CreateReg(getMemBase()->getGPRMM16Reg()));
+
+ const MCExpr *Expr = getMemOff();
+ addExpr(Inst, Expr);
+ }
+
+ void addRegListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ for (auto RegNo : getRegList())
+ Inst.addOperand(MCOperand::CreateReg(RegNo));
+ }
+
+ void addRegPairOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ unsigned RegNo = getRegPair();
+ Inst.addOperand(MCOperand::CreateReg(RegNo++));
+ Inst.addOperand(MCOperand::CreateReg(RegNo));
+ }
+
bool isReg() const override {
// As a special case until we sort out the definition of div/divu, pretend
// that $0/$zero are k_PhysRegister so that MCK_ZERO works correctly.
@@ -672,6 +817,37 @@ public:
template <unsigned Bits> bool isMemWithSimmOffset() const {
return isMem() && isConstantMemOff() && isInt<Bits>(getConstantMemOff());
}
+ bool isMemWithGRPMM16Base() const {
+ return isMem() && getMemBase()->isMM16AsmReg();
+ }
+ template <unsigned Bits> bool isMemWithUimmOffsetSP() const {
+ return isMem() && isConstantMemOff() && isUInt<Bits>(getConstantMemOff())
+ && getMemBase()->isRegIdx() && (getMemBase()->getGPR32Reg() == Mips::SP);
+ }
+ template <unsigned Bits> bool isMemWithUimmWordAlignedOffsetSP() const {
+ return isMem() && isConstantMemOff() && isUInt<Bits>(getConstantMemOff())
+ && (getConstantMemOff() % 4 == 0) && getMemBase()->isRegIdx()
+ && (getMemBase()->getGPR32Reg() == Mips::SP);
+ }
+ bool isRegList16() const {
+ if (!isRegList())
+ return false;
+
+ int Size = RegList.List->size();
+ if (Size < 2 || Size > 5 || *RegList.List->begin() != Mips::S0 ||
+ RegList.List->back() != Mips::RA)
+ return false;
+
+ int PrevReg = *RegList.List->begin();
+ for (int i = 1; i < Size - 1; i++) {
+ int Reg = (*(RegList.List))[i];
+ if ( Reg != PrevReg + 1)
+ return false;
+ PrevReg = Reg;
+ }
+
+ return true;
+ }
bool isInvNum() const { return Kind == k_Immediate; }
bool isLSAImm() const {
if (!isConstantImm())
@@ -679,11 +855,13 @@ public:
int64_t Val = getConstantImm();
return 1 <= Val && Val <= 4;
}
+ bool isRegList() const { return Kind == k_RegList; }
StringRef getToken() const {
assert(Kind == k_Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
}
+ bool isRegPair() const { return Kind == k_RegPair; }
unsigned getReg() const override {
// As a special case until we sort out the definition of div/divu, pretend
@@ -720,6 +898,16 @@ public:
return static_cast<const MCConstantExpr *>(getMemOff())->getValue();
}
+ const SmallVectorImpl<unsigned> &getRegList() const {
+ assert((Kind == k_RegList) && "Invalid access!");
+ return *(RegList.List);
+ }
+
+ unsigned getRegPair() const {
+ assert((Kind == k_RegPair) && "Invalid access!");
+ return RegIdx.Index;
+ }
+
static std::unique_ptr<MipsOperand> CreateToken(StringRef Str, SMLoc S,
MipsAsmParser &Parser) {
auto Op = make_unique<MipsOperand>(k_Token, Parser);
@@ -733,16 +921,16 @@ public:
/// Create a numeric register (e.g. $1). The exact register remains
/// unresolved until an instruction successfully matches
static std::unique_ptr<MipsOperand>
- CreateNumericReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
+ createNumericReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
SMLoc E, MipsAsmParser &Parser) {
- DEBUG(dbgs() << "CreateNumericReg(" << Index << ", ...)\n");
+ DEBUG(dbgs() << "createNumericReg(" << Index << ", ...)\n");
return CreateReg(Index, RegKind_Numeric, RegInfo, S, E, Parser);
}
/// Create a register that is definitely a GPR.
/// This is typically only used for named registers such as $gp.
static std::unique_ptr<MipsOperand>
- CreateGPRReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
+ createGPRReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_GPR, RegInfo, S, E, Parser);
}
@@ -750,15 +938,23 @@ public:
/// Create a register that is definitely a FGR.
/// This is typically only used for named registers such as $f0.
static std::unique_ptr<MipsOperand>
- CreateFGRReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
+ createFGRReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_FGR, RegInfo, S, E, Parser);
}
+ /// Create a register that is definitely a HWReg.
+ /// This is typically only used for named registers such as $hwr_cpunum.
+ static std::unique_ptr<MipsOperand>
+ createHWRegsReg(unsigned Index, const MCRegisterInfo *RegInfo,
+ SMLoc S, SMLoc E, MipsAsmParser &Parser) {
+ return CreateReg(Index, RegKind_HWRegs, RegInfo, S, E, Parser);
+ }
+
/// Create a register that is definitely an FCC.
/// This is typically only used for named registers such as $fcc0.
static std::unique_ptr<MipsOperand>
- CreateFCCReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
+ createFCCReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_FCC, RegInfo, S, E, Parser);
}
@@ -766,7 +962,7 @@ public:
/// Create a register that is definitely an ACC.
/// This is typically only used for named registers such as $ac0.
static std::unique_ptr<MipsOperand>
- CreateACCReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
+ createACCReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_ACC, RegInfo, S, E, Parser);
}
@@ -774,7 +970,7 @@ public:
/// Create a register that is definitely an MSA128.
/// This is typically only used for named registers such as $w0.
static std::unique_ptr<MipsOperand>
- CreateMSA128Reg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
+ createMSA128Reg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
SMLoc E, MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_MSA128, RegInfo, S, E, Parser);
}
@@ -782,7 +978,7 @@ public:
/// Create a register that is definitely an MSACtrl.
/// This is typically only used for named registers such as $msaaccess.
static std::unique_ptr<MipsOperand>
- CreateMSACtrlReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
+ createMSACtrlReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
SMLoc E, MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_MSACtrl, RegInfo, S, E, Parser);
}
@@ -807,9 +1003,45 @@ public:
return Op;
}
+ static std::unique_ptr<MipsOperand>
+ CreateRegList(SmallVectorImpl<unsigned> &Regs, SMLoc StartLoc, SMLoc EndLoc,
+ MipsAsmParser &Parser) {
+ assert (Regs.size() > 0 && "Empty list not allowed");
+
+ auto Op = make_unique<MipsOperand>(k_RegList, Parser);
+ Op->RegList.List = new SmallVector<unsigned, 10>();
+ for (auto Reg : Regs)
+ Op->RegList.List->push_back(Reg);
+ Op->StartLoc = StartLoc;
+ Op->EndLoc = EndLoc;
+ return Op;
+ }
+
+ static std::unique_ptr<MipsOperand>
+ CreateRegPair(unsigned RegNo, SMLoc S, SMLoc E, MipsAsmParser &Parser) {
+ auto Op = make_unique<MipsOperand>(k_RegPair, Parser);
+ Op->RegIdx.Index = RegNo;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
bool isGPRAsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index <= 31;
}
+ bool isMM16AsmReg() const {
+ if (!(isRegIdx() && RegIdx.Kind))
+ return false;
+ return ((RegIdx.Index >= 2 && RegIdx.Index <= 7)
+ || RegIdx.Index == 16 || RegIdx.Index == 17);
+ }
+ bool isMM16AsmRegZero() const {
+ if (!(isRegIdx() && RegIdx.Kind))
+ return false;
+ return (RegIdx.Index == 0 ||
+ (RegIdx.Index >= 2 && RegIdx.Index <= 7) ||
+ RegIdx.Index == 17);
+ }
bool isFGRAsmReg() const {
// AFGR64 is $0-$15 but we handle this in getAFGR64()
return isRegIdx() && RegIdx.Kind & RegKind_FGR && RegIdx.Index <= 31;
@@ -855,9 +1087,12 @@ public:
case k_Memory:
delete Mem.Base;
break;
+ case k_RegList:
+ delete RegList.List;
case k_PhysRegister:
case k_RegisterIndex:
case k_Token:
+ case k_RegPair:
break;
}
}
@@ -885,6 +1120,15 @@ public:
case k_Token:
OS << Tok.Data;
break;
+ case k_RegList:
+ OS << "RegList< ";
+ for (auto Reg : (*RegList.List))
+ OS << Reg << " ";
+ OS << ">";
+ break;
+ case k_RegPair:
+ OS << "RegPair<" << RegIdx.Index << "," << RegIdx.Index + 1 << ">";
+ break;
}
}
}; // class MipsOperand
@@ -897,6 +1141,19 @@ static const MCInstrDesc &getInstDesc(unsigned Opcode) {
return MipsInsts[Opcode];
}
+static bool hasShortDelaySlot(unsigned Opcode) {
+ switch (Opcode) {
+ case Mips::JALS_MM:
+ case Mips::JALRS_MM:
+ case Mips::JALRS16_MM:
+ case Mips::BGEZALS_MM:
+ case Mips::BLTZALS_MM:
+ return true;
+ default:
+ return false;
+ }
+}
+
bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
@@ -950,6 +1207,17 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
1LL << (inMicroMipsMode() ? 1 : 2)))
return Error(IDLoc, "branch to misaligned address");
break;
+ case Mips::BEQZ16_MM:
+ case Mips::BNEZ16_MM:
+ assert(MCID.getNumOperands() == 2 && "unexpected number of operands");
+ Offset = Inst.getOperand(1);
+ if (!Offset.isImm())
+ break; // We'll deal with this situation later on when applying fixups.
+ if (!isIntN(8, Offset.getImm()))
+ return Error(IDLoc, "branch target out of range");
+ if (OffsetToAlignment(Offset.getImm(), 2LL))
+ return Error(IDLoc, "branch to misaligned address");
+ break;
}
}
@@ -961,15 +1229,21 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
"nop instruction");
}
- if (MCID.hasDelaySlot() && Options.isReorder()) {
+ if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder()) {
// If this instruction has a delay slot and .set reorder is active,
// emit a NOP after it.
Instructions.push_back(Inst);
MCInst NopInst;
- NopInst.setOpcode(Mips::SLL);
- NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::CreateImm(0));
+ if (hasShortDelaySlot(Inst.getOpcode())) {
+ NopInst.setOpcode(Mips::MOVE16_MM);
+ NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+ NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+ } else {
+ NopInst.setOpcode(Mips::SLL);
+ NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+ NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+ NopInst.addOperand(MCOperand::CreateImm(0));
+ }
Instructions.push_back(NopInst);
return false;
}
@@ -1008,6 +1282,123 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
} // for
} // if load/store
+ // TODO: Handle this with the AsmOperandClass.PredicateMethod.
+ if (inMicroMipsMode()) {
+ MCOperand Opnd;
+ int Imm;
+
+ switch (Inst.getOpcode()) {
+ default:
+ break;
+ case Mips::ADDIUS5_MM:
+ Opnd = Inst.getOperand(2);
+ if (!Opnd.isImm())
+ return Error(IDLoc, "expected immediate operand kind");
+ Imm = Opnd.getImm();
+ if (Imm < -8 || Imm > 7)
+ return Error(IDLoc, "immediate operand value out of range");
+ break;
+ case Mips::ADDIUSP_MM:
+ Opnd = Inst.getOperand(0);
+ if (!Opnd.isImm())
+ return Error(IDLoc, "expected immediate operand kind");
+ Imm = Opnd.getImm();
+ if (Imm < -1032 || Imm > 1028 || (Imm < 8 && Imm > -12) ||
+ Imm % 4 != 0)
+ return Error(IDLoc, "immediate operand value out of range");
+ break;
+ case Mips::SLL16_MM:
+ case Mips::SRL16_MM:
+ Opnd = Inst.getOperand(2);
+ if (!Opnd.isImm())
+ return Error(IDLoc, "expected immediate operand kind");
+ Imm = Opnd.getImm();
+ if (Imm < 1 || Imm > 8)
+ return Error(IDLoc, "immediate operand value out of range");
+ break;
+ case Mips::LI16_MM:
+ Opnd = Inst.getOperand(1);
+ if (!Opnd.isImm())
+ return Error(IDLoc, "expected immediate operand kind");
+ Imm = Opnd.getImm();
+ if (Imm < -1 || Imm > 126)
+ return Error(IDLoc, "immediate operand value out of range");
+ break;
+ case Mips::ADDIUR2_MM:
+ Opnd = Inst.getOperand(2);
+ if (!Opnd.isImm())
+ return Error(IDLoc, "expected immediate operand kind");
+ Imm = Opnd.getImm();
+ if (!(Imm == 1 || Imm == -1 ||
+ ((Imm % 4 == 0) && Imm < 28 && Imm > 0)))
+ return Error(IDLoc, "immediate operand value out of range");
+ break;
+ case Mips::ADDIUR1SP_MM:
+ Opnd = Inst.getOperand(1);
+ if (!Opnd.isImm())
+ return Error(IDLoc, "expected immediate operand kind");
+ Imm = Opnd.getImm();
+ if (OffsetToAlignment(Imm, 4LL))
+ return Error(IDLoc, "misaligned immediate operand value");
+ if (Imm < 0 || Imm > 255)
+ return Error(IDLoc, "immediate operand value out of range");
+ break;
+ case Mips::ANDI16_MM:
+ Opnd = Inst.getOperand(2);
+ if (!Opnd.isImm())
+ return Error(IDLoc, "expected immediate operand kind");
+ Imm = Opnd.getImm();
+ if (!(Imm == 128 || (Imm >= 1 && Imm <= 4) || Imm == 7 || Imm == 8 ||
+ Imm == 15 || Imm == 16 || Imm == 31 || Imm == 32 || Imm == 63 ||
+ Imm == 64 || Imm == 255 || Imm == 32768 || Imm == 65535))
+ return Error(IDLoc, "immediate operand value out of range");
+ break;
+ case Mips::LBU16_MM:
+ Opnd = Inst.getOperand(2);
+ if (!Opnd.isImm())
+ return Error(IDLoc, "expected immediate operand kind");
+ Imm = Opnd.getImm();
+ if (Imm < -1 || Imm > 14)
+ return Error(IDLoc, "immediate operand value out of range");
+ break;
+ case Mips::SB16_MM:
+ Opnd = Inst.getOperand(2);
+ if (!Opnd.isImm())
+ return Error(IDLoc, "expected immediate operand kind");
+ Imm = Opnd.getImm();
+ if (Imm < 0 || Imm > 15)
+ return Error(IDLoc, "immediate operand value out of range");
+ break;
+ case Mips::LHU16_MM:
+ case Mips::SH16_MM:
+ Opnd = Inst.getOperand(2);
+ if (!Opnd.isImm())
+ return Error(IDLoc, "expected immediate operand kind");
+ Imm = Opnd.getImm();
+ if (Imm < 0 || Imm > 30 || (Imm % 2 != 0))
+ return Error(IDLoc, "immediate operand value out of range");
+ break;
+ case Mips::LW16_MM:
+ case Mips::SW16_MM:
+ Opnd = Inst.getOperand(2);
+ if (!Opnd.isImm())
+ return Error(IDLoc, "expected immediate operand kind");
+ Imm = Opnd.getImm();
+ if (Imm < 0 || Imm > 60 || (Imm % 4 != 0))
+ return Error(IDLoc, "immediate operand value out of range");
+ break;
+ case Mips::CACHE:
+ case Mips::PREF:
+ Opnd = Inst.getOperand(2);
+ if (!Opnd.isImm())
+ return Error(IDLoc, "expected immediate operand kind");
+ Imm = Opnd.getImm();
+ if (!isUInt<5>(Imm))
+ return Error(IDLoc, "immediate operand value out of range");
+ break;
+ }
+ }
+
if (needsExpansion(Inst))
return expandInstruction(Inst, IDLoc, Instructions);
else
@@ -1032,14 +1423,12 @@ bool MipsAsmParser::needsExpansion(MCInst &Inst) {
bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
switch (Inst.getOpcode()) {
- default:
- assert(0 && "unimplemented expansion");
- return true;
+ default: llvm_unreachable("unimplemented expansion");
case Mips::LoadImm32Reg:
return expandLoadImm(Inst, IDLoc, Instructions);
case Mips::LoadImm64Reg:
if (!isGP64bit()) {
- Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ Error(IDLoc, "instruction requires a 64-bit architecture");
return true;
}
return expandLoadImm(Inst, IDLoc, Instructions);
@@ -1051,8 +1440,8 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
}
namespace {
-template <int Shift, bool PerformShift>
-void createShiftOr(int64_t Value, unsigned RegNo, SMLoc IDLoc,
+template <bool PerformShift>
+void createShiftOr(MCOperand Operand, unsigned RegNo, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
MCInst tmpInst;
if (PerformShift) {
@@ -1067,11 +1456,18 @@ void createShiftOr(int64_t Value, unsigned RegNo, SMLoc IDLoc,
tmpInst.setOpcode(Mips::ORi);
tmpInst.addOperand(MCOperand::CreateReg(RegNo));
tmpInst.addOperand(MCOperand::CreateReg(RegNo));
- tmpInst.addOperand(
- MCOperand::CreateImm(((Value & (0xffffLL << Shift)) >> Shift)));
+ tmpInst.addOperand(Operand);
tmpInst.setLoc(IDLoc);
Instructions.push_back(tmpInst);
}
+
+template <int Shift, bool PerformShift>
+void createShiftOr(int64_t Value, unsigned RegNo, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ createShiftOr<PerformShift>(
+ MCOperand::CreateImm(((Value & (0xffffLL << Shift)) >> Shift)), RegNo,
+ IDLoc, Instructions);
+}
}
bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
@@ -1114,7 +1510,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
createShiftOr<0, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
} else if ((ImmValue & (0xffffLL << 48)) == 0) {
if (!isGP64bit()) {
- Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ Error(IDLoc, "instruction requires a 64-bit architecture");
return true;
}
@@ -1141,7 +1537,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
createShiftOr<0, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
} else {
if (!isGP64bit()) {
- Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ Error(IDLoc, "instruction requires a 64-bit architecture");
return true;
}
@@ -1176,7 +1572,12 @@ MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
MCInst tmpInst;
const MCOperand &ImmOp = Inst.getOperand(2);
- assert(ImmOp.isImm() && "expected immediate operand kind");
+ assert((ImmOp.isImm() || ImmOp.isExpr()) &&
+ "expected immediate operand kind");
+ if (!ImmOp.isImm()) {
+ expandLoadAddressSym(Inst, IDLoc, Instructions);
+ return false;
+ }
const MCOperand &SrcRegOp = Inst.getOperand(1);
assert(SrcRegOp.isReg() && "expected register operand kind");
const MCOperand &DstRegOp = Inst.getOperand(0);
@@ -1220,7 +1621,12 @@ MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
MCInst tmpInst;
const MCOperand &ImmOp = Inst.getOperand(1);
- assert(ImmOp.isImm() && "expected immediate operand kind");
+ assert((ImmOp.isImm() || ImmOp.isExpr()) &&
+ "expected immediate operand kind");
+ if (!ImmOp.isImm()) {
+ expandLoadAddressSym(Inst, IDLoc, Instructions);
+ return false;
+ }
const MCOperand &RegOp = Inst.getOperand(0);
assert(RegOp.isReg() && "expected register operand kind");
int ImmValue = ImmOp.getImm();
@@ -1250,6 +1656,71 @@ MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
return false;
}
+void
+MipsAsmParser::expandLoadAddressSym(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ // FIXME: If we do have a valid at register to use, we should generate a
+ // slightly shorter sequence here.
+ MCInst tmpInst;
+ int ExprOperandNo = 1;
+ // Sometimes the assembly parser will get the immediate expression as
+ // a $zero + an immediate.
+ if (Inst.getNumOperands() == 3) {
+ assert(Inst.getOperand(1).getReg() ==
+ (isGP64bit() ? Mips::ZERO_64 : Mips::ZERO));
+ ExprOperandNo = 2;
+ }
+ const MCOperand &SymOp = Inst.getOperand(ExprOperandNo);
+ assert(SymOp.isExpr() && "expected symbol operand kind");
+ const MCOperand &RegOp = Inst.getOperand(0);
+ unsigned RegNo = RegOp.getReg();
+ const MCSymbolRefExpr *Symbol = cast<MCSymbolRefExpr>(SymOp.getExpr());
+ const MCSymbolRefExpr *HiExpr =
+ MCSymbolRefExpr::Create(Symbol->getSymbol().getName(),
+ MCSymbolRefExpr::VK_Mips_ABS_HI, getContext());
+ const MCSymbolRefExpr *LoExpr =
+ MCSymbolRefExpr::Create(Symbol->getSymbol().getName(),
+ MCSymbolRefExpr::VK_Mips_ABS_LO, getContext());
+ if (isGP64bit()) {
+ // If it's a 64-bit architecture, expand to:
+ // la d,sym => lui d,highest(sym)
+ // ori d,d,higher(sym)
+ // dsll d,d,16
+ // ori d,d,hi16(sym)
+ // dsll d,d,16
+ // ori d,d,lo16(sym)
+ const MCSymbolRefExpr *HighestExpr =
+ MCSymbolRefExpr::Create(Symbol->getSymbol().getName(),
+ MCSymbolRefExpr::VK_Mips_HIGHEST, getContext());
+ const MCSymbolRefExpr *HigherExpr =
+ MCSymbolRefExpr::Create(Symbol->getSymbol().getName(),
+ MCSymbolRefExpr::VK_Mips_HIGHER, getContext());
+
+ tmpInst.setOpcode(Mips::LUi);
+ tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+ tmpInst.addOperand(MCOperand::CreateExpr(HighestExpr));
+ Instructions.push_back(tmpInst);
+
+ createShiftOr<false>(MCOperand::CreateExpr(HigherExpr), RegNo, SMLoc(),
+ Instructions);
+ createShiftOr<true>(MCOperand::CreateExpr(HiExpr), RegNo, SMLoc(),
+ Instructions);
+ createShiftOr<true>(MCOperand::CreateExpr(LoExpr), RegNo, SMLoc(),
+ Instructions);
+ } else {
+ // Otherwise, expand to:
+ // la d,sym => lui d,hi16(sym)
+ // ori d,d,lo16(sym)
+ tmpInst.setOpcode(Mips::LUi);
+ tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+ tmpInst.addOperand(MCOperand::CreateExpr(HiExpr));
+ Instructions.push_back(tmpInst);
+
+ createShiftOr<false>(MCOperand::CreateExpr(LoExpr), RegNo, SMLoc(),
+ Instructions);
+ }
+}
+
void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions,
bool isLoad, bool isImmOpnd) {
@@ -1381,7 +1852,7 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
@@ -1390,8 +1861,6 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
switch (MatchResult) {
- default:
- break;
case Match_Success: {
if (processInstruction(Inst, IDLoc, Instructions))
return true;
@@ -1404,7 +1873,7 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0U) {
+ if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
@@ -1420,19 +1889,29 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_RequiresDifferentSrcAndDst:
return Error(IDLoc, "source and destination must be different");
}
- return true;
+
+ llvm_unreachable("Implement any new match types added!");
}
-void MipsAsmParser::WarnIfAssemblerTemporary(int RegIndex, SMLoc Loc) {
- if ((RegIndex != 0) && ((int)Options.getATRegNum() == RegIndex)) {
+void MipsAsmParser::warnIfAssemblerTemporary(int RegIndex, SMLoc Loc) {
+ if ((RegIndex != 0) &&
+ ((int)AssemblerOptions.back()->getATRegNum() == RegIndex)) {
if (RegIndex == 1)
- Warning(Loc, "Used $at without \".set noat\"");
+ Warning(Loc, "used $at without \".set noat\"");
else
- Warning(Loc, Twine("Used $") + Twine(RegIndex) + " with \".set at=$" +
+ Warning(Loc, Twine("used $") + Twine(RegIndex) + " with \".set at=$" +
Twine(RegIndex) + "\"");
}
}
+void
+MipsAsmParser::printWarningWithFixIt(const Twine &Msg, const Twine &FixMsg,
+ SMRange Range, bool ShowColors) {
+ getSourceManager().PrintMessage(Range.Start, SourceMgr::DK_Warning, Msg,
+ Range, SMFixIt(Range, FixMsg),
+ ShowColors);
+}
+
int MipsAsmParser::matchCPURegisterName(StringRef Name) {
int CC;
@@ -1472,23 +1951,55 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) {
.Case("t9", 25)
.Default(-1);
- if (isABI_N32() || isABI_N64()) {
- // Although SGI documentation just cuts out t0-t3 for n32/n64,
- // GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7
- // We are supporting both cases, so for t0-t3 we'll just push them to t4-t7.
- if (8 <= CC && CC <= 11)
- CC += 4;
+ if (!(isABI_N32() || isABI_N64()))
+ return CC;
+
+ if (12 <= CC && CC <= 15) {
+ // Name is one of t4-t7
+ AsmToken RegTok = getLexer().peekTok();
+ SMRange RegRange = RegTok.getLocRange();
+
+ StringRef FixedName = StringSwitch<StringRef>(Name)
+ .Case("t4", "t0")
+ .Case("t5", "t1")
+ .Case("t6", "t2")
+ .Case("t7", "t3")
+ .Default("");
+ assert(FixedName != "" && "Register name is not one of t4-t7.");
+
+ printWarningWithFixIt("register names $t4-$t7 are only available in O32.",
+ "Did you mean $" + FixedName + "?", RegRange);
+ }
+
+ // Although SGI documentation just cuts out t0-t3 for n32/n64,
+ // GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7
+ // We are supporting both cases, so for t0-t3 we'll just push them to t4-t7.
+ if (8 <= CC && CC <= 11)
+ CC += 4;
+
+ if (CC == -1)
+ CC = StringSwitch<unsigned>(Name)
+ .Case("a4", 8)
+ .Case("a5", 9)
+ .Case("a6", 10)
+ .Case("a7", 11)
+ .Case("kt0", 26)
+ .Case("kt1", 27)
+ .Default(-1);
- if (CC == -1)
- CC = StringSwitch<unsigned>(Name)
- .Case("a4", 8)
- .Case("a5", 9)
- .Case("a6", 10)
- .Case("a7", 11)
- .Case("kt0", 26)
- .Case("kt1", 27)
- .Default(-1);
- }
+ return CC;
+}
+
+int MipsAsmParser::matchHWRegsRegisterName(StringRef Name) {
+ int CC;
+
+ CC = StringSwitch<unsigned>(Name)
+ .Case("hwr_cpunum", 0)
+ .Case("hwr_synci_step", 1)
+ .Case("hwr_cc", 2)
+ .Case("hwr_ccres", 3)
+ .Case("hwr_ulr", 29)
+ .Default(-1);
return CC;
}
@@ -1568,15 +2079,15 @@ bool MipsAssemblerOptions::setATReg(unsigned Reg) {
if (Reg > 31)
return false;
- aTReg = Reg;
+ ATReg = Reg;
return true;
}
int MipsAsmParser::getATReg(SMLoc Loc) {
- int AT = Options.getATRegNum();
+ int AT = AssemblerOptions.back()->getATRegNum();
if (AT == 0)
reportParseError(Loc,
- "Pseudo instruction requires $at, which is not available");
+ "pseudo-instruction requires $at, which is not available");
return AT;
}
@@ -1597,8 +2108,9 @@ int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, unsigned RegClass) {
return getReg(RegClass, RegNum);
}
-bool MipsAsmParser::ParseOperand(OperandVector &Operands, StringRef Mnemonic) {
- DEBUG(dbgs() << "ParseOperand\n");
+bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
+ MCAsmParser &Parser = getParser();
+ DEBUG(dbgs() << "parseOperand\n");
// Check if the current operand has a custom associated parser, if so, try to
// custom parse the operand, or fallback to the general approach.
@@ -1626,7 +2138,7 @@ bool MipsAsmParser::ParseOperand(OperandVector &Operands, StringRef Mnemonic) {
// for div, divu, and similar instructions because it is not an operand
// to the instruction definition but an explicit register. Special case
// this situation for now.
- if (ParseAnyRegister(Operands) != MatchOperand_NoMatch)
+ if (parseAnyRegister(Operands) != MatchOperand_NoMatch)
return false;
// Maybe it is a symbol reference.
@@ -1651,7 +2163,7 @@ bool MipsAsmParser::ParseOperand(OperandVector &Operands, StringRef Mnemonic) {
case AsmToken::Tilde:
case AsmToken::String: {
DEBUG(dbgs() << ".. generic integer\n");
- OperandMatchResultTy ResTy = ParseImm(Operands);
+ OperandMatchResultTy ResTy = parseImm(Operands);
return ResTy != MatchOperand_Success;
}
case AsmToken::Percent: {
@@ -1696,7 +2208,7 @@ const MCExpr *MipsAsmParser::evaluateRelocExpr(const MCExpr *Expr,
Val = ((MCE->getValue() + 0x800080008000LL) >> 48) & 0xffff;
break;
default:
- report_fatal_error("Unsupported reloc value!");
+ report_fatal_error("unsupported reloc value");
}
return MCConstantExpr::Create(Val, getContext());
}
@@ -1753,6 +2265,7 @@ bool MipsAsmParser::isEvaluated(const MCExpr *Expr) {
}
bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
+ MCAsmParser &Parser = getParser();
Parser.Lex(); // Eat the % token.
const AsmToken &Tok = Parser.getTok(); // Get next token, operation.
if (Tok.isNot(AsmToken::Identifier))
@@ -1797,7 +2310,7 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
- OperandMatchResultTy ResTy = ParseAnyRegister(Operands);
+ OperandMatchResultTy ResTy = parseAnyRegister(Operands);
if (ResTy == MatchOperand_Success) {
assert(Operands.size() == 1);
MipsOperand &Operand = static_cast<MipsOperand &>(*Operands.front());
@@ -1821,6 +2334,7 @@ bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
}
bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
+ MCAsmParser &Parser = getParser();
SMLoc S;
bool Result = true;
@@ -1850,6 +2364,7 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
MipsAsmParser::OperandMatchResultTy
MipsAsmParser::parseMemOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
DEBUG(dbgs() << "parseMemOperand\n");
const MCExpr *IdVal = nullptr;
SMLoc S;
@@ -1882,7 +2397,7 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) {
// Zero register assumed, add a memory operand with ZERO as its base.
// "Base" will be managed by k_Memory.
- auto Base = MipsOperand::CreateGPRReg(0, getContext().getRegisterInfo(),
+ auto Base = MipsOperand::createGPRReg(0, getContext().getRegisterInfo(),
S, E, *this);
Operands.push_back(
MipsOperand::CreateMem(std::move(Base), IdVal, S, E, *this));
@@ -1895,7 +2410,7 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) {
Parser.Lex(); // Eat the '(' token.
}
- Res = ParseAnyRegister(Operands);
+ Res = parseAnyRegister(Operands);
if (Res != MatchOperand_Success)
return Res;
@@ -1932,7 +2447,7 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) {
}
bool MipsAsmParser::searchSymbolAlias(OperandVector &Operands) {
-
+ MCAsmParser &Parser = getParser();
MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier());
if (Sym) {
SMLoc S = Parser.getTok().getLoc();
@@ -1943,10 +2458,10 @@ bool MipsAsmParser::searchSymbolAlias(OperandVector &Operands) {
return false;
if (Expr->getKind() == MCExpr::SymbolRef) {
const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr *>(Expr);
- const StringRef DefSymbol = Ref->getSymbol().getName();
+ StringRef DefSymbol = Ref->getSymbol().getName();
if (DefSymbol.startswith("$")) {
OperandMatchResultTy ResTy =
- MatchAnyRegisterNameWithoutDollar(Operands, DefSymbol.substr(1), S);
+ matchAnyRegisterNameWithoutDollar(Operands, DefSymbol.substr(1), S);
if (ResTy == MatchOperand_Success) {
Parser.Lex();
return true;
@@ -1966,47 +2481,54 @@ bool MipsAsmParser::searchSymbolAlias(OperandVector &Operands) {
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::MatchAnyRegisterNameWithoutDollar(OperandVector &Operands,
+MipsAsmParser::matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
StringRef Identifier,
SMLoc S) {
int Index = matchCPURegisterName(Identifier);
if (Index != -1) {
- Operands.push_back(MipsOperand::CreateGPRReg(
+ Operands.push_back(MipsOperand::createGPRReg(
+ Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
+ return MatchOperand_Success;
+ }
+
+ Index = matchHWRegsRegisterName(Identifier);
+ if (Index != -1) {
+ Operands.push_back(MipsOperand::createHWRegsReg(
Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
return MatchOperand_Success;
}
Index = matchFPURegisterName(Identifier);
if (Index != -1) {
- Operands.push_back(MipsOperand::CreateFGRReg(
+ Operands.push_back(MipsOperand::createFGRReg(
Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
return MatchOperand_Success;
}
Index = matchFCCRegisterName(Identifier);
if (Index != -1) {
- Operands.push_back(MipsOperand::CreateFCCReg(
+ Operands.push_back(MipsOperand::createFCCReg(
Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
return MatchOperand_Success;
}
Index = matchACRegisterName(Identifier);
if (Index != -1) {
- Operands.push_back(MipsOperand::CreateACCReg(
+ Operands.push_back(MipsOperand::createACCReg(
Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
return MatchOperand_Success;
}
Index = matchMSA128RegisterName(Identifier);
if (Index != -1) {
- Operands.push_back(MipsOperand::CreateMSA128Reg(
+ Operands.push_back(MipsOperand::createMSA128Reg(
Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
return MatchOperand_Success;
}
Index = matchMSA128CtrlRegisterName(Identifier);
if (Index != -1) {
- Operands.push_back(MipsOperand::CreateMSACtrlReg(
+ Operands.push_back(MipsOperand::createMSACtrlReg(
Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
return MatchOperand_Success;
}
@@ -2015,18 +2537,19 @@ MipsAsmParser::MatchAnyRegisterNameWithoutDollar(OperandVector &Operands,
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::MatchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) {
+MipsAsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) {
+ MCAsmParser &Parser = getParser();
auto Token = Parser.getLexer().peekTok(false);
if (Token.is(AsmToken::Identifier)) {
DEBUG(dbgs() << ".. identifier\n");
StringRef Identifier = Token.getIdentifier();
OperandMatchResultTy ResTy =
- MatchAnyRegisterNameWithoutDollar(Operands, Identifier, S);
+ matchAnyRegisterNameWithoutDollar(Operands, Identifier, S);
return ResTy;
} else if (Token.is(AsmToken::Integer)) {
DEBUG(dbgs() << ".. integer\n");
- Operands.push_back(MipsOperand::CreateNumericReg(
+ Operands.push_back(MipsOperand::createNumericReg(
Token.getIntVal(), getContext().getRegisterInfo(), S, Token.getLoc(),
*this));
return MatchOperand_Success;
@@ -2038,8 +2561,9 @@ MipsAsmParser::MatchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) {
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::ParseAnyRegister(OperandVector &Operands) {
- DEBUG(dbgs() << "ParseAnyRegister\n");
+MipsAsmParser::parseAnyRegister(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ DEBUG(dbgs() << "parseAnyRegister\n");
auto Token = Parser.getTok();
@@ -2056,7 +2580,7 @@ MipsAsmParser::ParseAnyRegister(OperandVector &Operands) {
}
DEBUG(dbgs() << ".. $\n");
- OperandMatchResultTy ResTy = MatchAnyRegisterWithoutDollar(Operands, S);
+ OperandMatchResultTy ResTy = matchAnyRegisterWithoutDollar(Operands, S);
if (ResTy == MatchOperand_Success) {
Parser.Lex(); // $
Parser.Lex(); // identifier
@@ -2065,7 +2589,8 @@ MipsAsmParser::ParseAnyRegister(OperandVector &Operands) {
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::ParseImm(OperandVector &Operands) {
+MipsAsmParser::parseImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
switch (getLexer().getKind()) {
default:
return MatchOperand_NoMatch;
@@ -2089,18 +2614,19 @@ MipsAsmParser::ParseImm(OperandVector &Operands) {
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::ParseJumpTarget(OperandVector &Operands) {
- DEBUG(dbgs() << "ParseJumpTarget\n");
+MipsAsmParser::parseJumpTarget(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ DEBUG(dbgs() << "parseJumpTarget\n");
SMLoc S = getLexer().getLoc();
// Integers and expressions are acceptable
- OperandMatchResultTy ResTy = ParseImm(Operands);
+ OperandMatchResultTy ResTy = parseImm(Operands);
if (ResTy != MatchOperand_NoMatch)
return ResTy;
// Registers are a valid target and have priority over symbols.
- ResTy = ParseAnyRegister(Operands);
+ ResTy = parseAnyRegister(Operands);
if (ResTy != MatchOperand_NoMatch)
return ResTy;
@@ -2116,6 +2642,7 @@ MipsAsmParser::ParseJumpTarget(OperandVector &Operands) {
MipsAsmParser::OperandMatchResultTy
MipsAsmParser::parseInvNum(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
const MCExpr *IdVal;
// If the first token is '$' we may have register operand.
if (Parser.getTok().is(AsmToken::Dollar))
@@ -2133,7 +2660,8 @@ MipsAsmParser::parseInvNum(OperandVector &Operands) {
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::ParseLSAImm(OperandVector &Operands) {
+MipsAsmParser::parseLSAImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
switch (getLexer().getKind()) {
default:
return MatchOperand_NoMatch;
@@ -2171,6 +2699,98 @@ MipsAsmParser::ParseLSAImm(OperandVector &Operands) {
return MatchOperand_Success;
}
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseRegisterList(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SmallVector<unsigned, 10> Regs;
+ unsigned RegNo;
+ unsigned PrevReg = Mips::NoRegister;
+ bool RegRange = false;
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> TmpOperands;
+
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_ParseFail;
+
+ SMLoc S = Parser.getTok().getLoc();
+ while (parseAnyRegister(TmpOperands) == MatchOperand_Success) {
+ SMLoc E = getLexer().getLoc();
+ MipsOperand &Reg = static_cast<MipsOperand &>(*TmpOperands.back());
+ RegNo = isGP64bit() ? Reg.getGPR64Reg() : Reg.getGPR32Reg();
+ if (RegRange) {
+ // Remove last register operand because registers from register range
+ // should be inserted first.
+ if (RegNo == Mips::RA) {
+ Regs.push_back(RegNo);
+ } else {
+ unsigned TmpReg = PrevReg + 1;
+ while (TmpReg <= RegNo) {
+ if ((TmpReg < Mips::S0) || (TmpReg > Mips::S7)) {
+ Error(E, "invalid register operand");
+ return MatchOperand_ParseFail;
+ }
+
+ PrevReg = TmpReg;
+ Regs.push_back(TmpReg++);
+ }
+ }
+
+ RegRange = false;
+ } else {
+ if ((PrevReg == Mips::NoRegister) && (RegNo != Mips::S0) &&
+ (RegNo != Mips::RA)) {
+ Error(E, "$16 or $31 expected");
+ return MatchOperand_ParseFail;
+ } else if (((RegNo < Mips::S0) || (RegNo > Mips::S7)) &&
+ (RegNo != Mips::FP) && (RegNo != Mips::RA)) {
+ Error(E, "invalid register operand");
+ return MatchOperand_ParseFail;
+ } else if ((PrevReg != Mips::NoRegister) && (RegNo != PrevReg + 1) &&
+ (RegNo != Mips::FP) && (RegNo != Mips::RA)) {
+ Error(E, "consecutive register numbers expected");
+ return MatchOperand_ParseFail;
+ }
+
+ Regs.push_back(RegNo);
+ }
+
+ if (Parser.getTok().is(AsmToken::Minus))
+ RegRange = true;
+
+ if (!Parser.getTok().isNot(AsmToken::Minus) &&
+ !Parser.getTok().isNot(AsmToken::Comma)) {
+ Error(E, "',' or '-' expected");
+ return MatchOperand_ParseFail;
+ }
+
+ Lex(); // Consume comma or minus
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ break;
+
+ PrevReg = RegNo;
+ }
+
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(MipsOperand::CreateRegList(Regs, S, E, *this));
+ parseMemOperand(Operands);
+ return MatchOperand_Success;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseRegisterPair(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+
+ SMLoc S = Parser.getTok().getLoc();
+ if (parseAnyRegister(Operands) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+
+ SMLoc E = Parser.getTok().getLoc();
+ MipsOperand &Op = static_cast<MipsOperand &>(*Operands.back());
+ unsigned Reg = Op.getGPR32Reg();
+ Operands.pop_back();
+ Operands.push_back(MipsOperand::CreateRegPair(Reg, S, E, *this));
+ return MatchOperand_Success;
+}
+
MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
MCSymbolRefExpr::VariantKind VK =
@@ -2212,12 +2832,13 @@ MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
/// ::= '(', register, ')'
/// handle it before we iterate so we don't get tripped up by the lack of
/// a comma.
-bool MipsAsmParser::ParseParenSuffix(StringRef Name, OperandVector &Operands) {
+bool MipsAsmParser::parseParenSuffix(StringRef Name, OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
if (getLexer().is(AsmToken::LParen)) {
Operands.push_back(
MipsOperand::CreateToken("(", getLexer().getLoc(), *this));
Parser.Lex();
- if (ParseOperand(Operands, Name)) {
+ if (parseOperand(Operands, Name)) {
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
@@ -2240,13 +2861,14 @@ bool MipsAsmParser::ParseParenSuffix(StringRef Name, OperandVector &Operands) {
/// ::= '[', integer, ']'
/// handle it before we iterate so we don't get tripped up by the lack of
/// a comma.
-bool MipsAsmParser::ParseBracketSuffix(StringRef Name,
+bool MipsAsmParser::parseBracketSuffix(StringRef Name,
OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
if (getLexer().is(AsmToken::LBrac)) {
Operands.push_back(
MipsOperand::CreateToken("[", getLexer().getLoc(), *this));
Parser.Lex();
- if (ParseOperand(Operands, Name)) {
+ if (parseOperand(Operands, Name)) {
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
@@ -2265,14 +2887,16 @@ bool MipsAsmParser::ParseBracketSuffix(StringRef Name,
bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
DEBUG(dbgs() << "ParseInstruction\n");
- // We have reached first instruction, module directive after
- // this is forbidden.
- getTargetStreamer().setCanHaveModuleDir(false);
+
+ // We have reached first instruction, module directive are now forbidden.
+ getTargetStreamer().forbidModuleDirective();
+
// Check if we have valid mnemonic
if (!mnemonicIsValid(Name, 0)) {
Parser.eatToEndOfStatement();
- return Error(NameLoc, "Unknown instruction");
+ return Error(NameLoc, "unknown instruction");
}
// First operand in MCInst is instruction mnemonic.
Operands.push_back(MipsOperand::CreateToken(Name, NameLoc, *this));
@@ -2280,29 +2904,29 @@ bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Read the remaining operands.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
- if (ParseOperand(Operands, Name)) {
+ if (parseOperand(Operands, Name)) {
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
- if (getLexer().is(AsmToken::LBrac) && ParseBracketSuffix(Name, Operands))
+ if (getLexer().is(AsmToken::LBrac) && parseBracketSuffix(Name, Operands))
return true;
// AFAIK, parenthesis suffixes are never on the first operand
while (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
// Parse and remember the operand.
- if (ParseOperand(Operands, Name)) {
+ if (parseOperand(Operands, Name)) {
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
// Parse bracket and parenthesis suffixes before we iterate
if (getLexer().is(AsmToken::LBrac)) {
- if (ParseBracketSuffix(Name, Operands))
+ if (parseBracketSuffix(Name, Operands))
return true;
} else if (getLexer().is(AsmToken::LParen) &&
- ParseParenSuffix(Name, Operands))
+ parseParenSuffix(Name, Operands))
return true;
}
}
@@ -2316,6 +2940,7 @@ bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
bool MipsAsmParser::reportParseError(Twine ErrorMsg) {
+ MCAsmParser &Parser = getParser();
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
return Error(Loc, ErrorMsg);
@@ -2326,14 +2951,15 @@ bool MipsAsmParser::reportParseError(SMLoc Loc, Twine ErrorMsg) {
}
bool MipsAsmParser::parseSetNoAtDirective() {
+ MCAsmParser &Parser = getParser();
// Line should look like: ".set noat".
// set at reg to 0.
- Options.setATReg(0);
+ AssemblerOptions.back()->setATReg(0);
// eat noat
Parser.Lex();
// If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
Parser.Lex(); // Consume the EndOfStatement.
@@ -2341,18 +2967,19 @@ bool MipsAsmParser::parseSetNoAtDirective() {
}
bool MipsAsmParser::parseSetAtDirective() {
+ MCAsmParser &Parser = getParser();
// Line can be .set at - defaults to $1
// or .set at=$reg
int AtRegNo;
getParser().Lex();
if (getLexer().is(AsmToken::EndOfStatement)) {
- Options.setATReg(1);
+ AssemblerOptions.back()->setATReg(1);
Parser.Lex(); // Consume the EndOfStatement.
return false;
} else if (getLexer().is(AsmToken::Equal)) {
getParser().Lex(); // Eat the '='.
if (getLexer().isNot(AsmToken::Dollar)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected dollar sign '$'");
return false;
}
Parser.Lex(); // Eat the '$'.
@@ -2362,7 +2989,7 @@ bool MipsAsmParser::parseSetAtDirective() {
} else if (Reg.is(AsmToken::Integer)) {
AtRegNo = Reg.getIntVal();
} else {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected identifier or integer");
return false;
}
@@ -2371,14 +2998,14 @@ bool MipsAsmParser::parseSetAtDirective() {
return false;
}
- if (!Options.setATReg(AtRegNo)) {
- reportParseError("unexpected token in statement");
+ if (!AssemblerOptions.back()->setATReg(AtRegNo)) {
+ reportParseError("invalid register");
return false;
}
getParser().Lex(); // Eat the register.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
Parser.Lex(); // Consume the EndOfStatement.
@@ -2390,72 +3017,138 @@ bool MipsAsmParser::parseSetAtDirective() {
}
bool MipsAsmParser::parseSetReorderDirective() {
+ MCAsmParser &Parser = getParser();
Parser.Lex();
// If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
- Options.setReorder();
+ AssemblerOptions.back()->setReorder();
getTargetStreamer().emitDirectiveSetReorder();
Parser.Lex(); // Consume the EndOfStatement.
return false;
}
bool MipsAsmParser::parseSetNoReorderDirective() {
+ MCAsmParser &Parser = getParser();
Parser.Lex();
// If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
- Options.setNoreorder();
+ AssemblerOptions.back()->setNoReorder();
getTargetStreamer().emitDirectiveSetNoReorder();
Parser.Lex(); // Consume the EndOfStatement.
return false;
}
bool MipsAsmParser::parseSetMacroDirective() {
+ MCAsmParser &Parser = getParser();
Parser.Lex();
// If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
- Options.setMacro();
+ AssemblerOptions.back()->setMacro();
Parser.Lex(); // Consume the EndOfStatement.
return false;
}
bool MipsAsmParser::parseSetNoMacroDirective() {
+ MCAsmParser &Parser = getParser();
Parser.Lex();
// If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("`noreorder' must be set before `nomacro'");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
- if (Options.isReorder()) {
+ if (AssemblerOptions.back()->isReorder()) {
reportParseError("`noreorder' must be set before `nomacro'");
return false;
}
- Options.setNomacro();
+ AssemblerOptions.back()->setNoMacro();
Parser.Lex(); // Consume the EndOfStatement.
return false;
}
-bool MipsAsmParser::parseSetNoMips16Directive() {
+bool MipsAsmParser::parseSetMsaDirective() {
+ MCAsmParser &Parser = getParser();
Parser.Lex();
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return reportParseError("unexpected token, expected end of statement");
+
+ setFeatureBits(Mips::FeatureMSA, "msa");
+ getTargetStreamer().emitDirectiveSetMsa();
+ return false;
+}
+
+bool MipsAsmParser::parseSetNoMsaDirective() {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex();
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return reportParseError("unexpected token, expected end of statement");
+
+ clearFeatureBits(Mips::FeatureMSA, "msa");
+ getTargetStreamer().emitDirectiveSetNoMsa();
+ return false;
+}
+
+bool MipsAsmParser::parseSetNoDspDirective() {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex(); // Eat "nodsp".
+
// If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ clearFeatureBits(Mips::FeatureDSP, "dsp");
+ getTargetStreamer().emitDirectiveSetNoDsp();
+ return false;
+}
+
+bool MipsAsmParser::parseSetMips16Directive() {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex(); // Eat "mips16".
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ setFeatureBits(Mips::FeatureMips16, "mips16");
+ getTargetStreamer().emitDirectiveSetMips16();
+ Parser.Lex(); // Consume the EndOfStatement.
+ return false;
+}
+
+bool MipsAsmParser::parseSetNoMips16Directive() {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex(); // Eat "nomips16".
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
return false;
}
- // For now do nothing.
+
+ clearFeatureBits(Mips::FeatureMips16, "mips16");
+ getTargetStreamer().emitDirectiveSetNoMips16();
Parser.Lex(); // Consume the EndOfStatement.
return false;
}
bool MipsAsmParser::parseSetFpDirective() {
+ MCAsmParser &Parser = getParser();
MipsABIFlagsSection::FpABIKind FpAbiVal;
// Line can be: .set fp=32
// .set fp=xx
@@ -2463,7 +3156,7 @@ bool MipsAsmParser::parseSetFpDirective() {
Parser.Lex(); // Eat fp token
AsmToken Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Equal)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected equals sign '='");
return false;
}
Parser.Lex(); // Eat '=' token.
@@ -2473,7 +3166,7 @@ bool MipsAsmParser::parseSetFpDirective() {
return false;
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
getTargetStreamer().emitDirectiveSetFp(FpAbiVal);
@@ -2481,15 +3174,50 @@ bool MipsAsmParser::parseSetFpDirective() {
return false;
}
+bool MipsAsmParser::parseSetPopDirective() {
+ MCAsmParser &Parser = getParser();
+ SMLoc Loc = getLexer().getLoc();
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return reportParseError("unexpected token, expected end of statement");
+
+ // Always keep an element on the options "stack" to prevent the user
+ // from changing the initial options. This is how we remember them.
+ if (AssemblerOptions.size() == 2)
+ return reportParseError(Loc, ".set pop with no .set push");
+
+ AssemblerOptions.pop_back();
+ setAvailableFeatures(AssemblerOptions.back()->getFeatures());
+
+ getTargetStreamer().emitDirectiveSetPop();
+ return false;
+}
+
+bool MipsAsmParser::parseSetPushDirective() {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return reportParseError("unexpected token, expected end of statement");
+
+ // Create a copy of the current assembler options environment and push it.
+ AssemblerOptions.push_back(
+ make_unique<MipsAssemblerOptions>(AssemblerOptions.back().get()));
+
+ getTargetStreamer().emitDirectiveSetPush();
+ return false;
+}
+
bool MipsAsmParser::parseSetAssignment() {
StringRef Name;
const MCExpr *Value;
+ MCAsmParser &Parser = getParser();
if (Parser.parseIdentifier(Name))
reportParseError("expected identifier after .set");
if (getLexer().isNot(AsmToken::Comma))
- return reportParseError("unexpected token in .set directive");
+ return reportParseError("unexpected token, expected comma");
Lex(); // Eat comma
if (Parser.parseExpression(Value))
@@ -2505,10 +3233,61 @@ bool MipsAsmParser::parseSetAssignment() {
return false;
}
+bool MipsAsmParser::parseSetMips0Directive() {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return reportParseError("unexpected token, expected end of statement");
+
+ // Reset assembler options to their initial values.
+ setAvailableFeatures(AssemblerOptions.front()->getFeatures());
+ AssemblerOptions.back()->setFeatures(AssemblerOptions.front()->getFeatures());
+
+ getTargetStreamer().emitDirectiveSetMips0();
+ return false;
+}
+
+bool MipsAsmParser::parseSetArchDirective() {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Equal))
+ return reportParseError("unexpected token, expected equals sign");
+
+ Parser.Lex();
+ StringRef Arch;
+ if (Parser.parseIdentifier(Arch))
+ return reportParseError("expected arch identifier");
+
+ StringRef ArchFeatureName =
+ StringSwitch<StringRef>(Arch)
+ .Case("mips1", "mips1")
+ .Case("mips2", "mips2")
+ .Case("mips3", "mips3")
+ .Case("mips4", "mips4")
+ .Case("mips5", "mips5")
+ .Case("mips32", "mips32")
+ .Case("mips32r2", "mips32r2")
+ .Case("mips32r6", "mips32r6")
+ .Case("mips64", "mips64")
+ .Case("mips64r2", "mips64r2")
+ .Case("mips64r6", "mips64r6")
+ .Case("cnmips", "cnmips")
+ .Case("r4000", "mips3") // This is an implementation of Mips3.
+ .Default("");
+
+ if (ArchFeatureName.empty())
+ return reportParseError("unsupported architecture");
+
+ selectArch(ArchFeatureName);
+ getTargetStreamer().emitDirectiveSetArch(Arch);
+ return false;
+}
+
bool MipsAsmParser::parseSetFeature(uint64_t Feature) {
+ MCAsmParser &Parser = getParser();
Parser.Lex();
if (getLexer().isNot(AsmToken::EndOfStatement))
- return reportParseError("unexpected token in .set directive");
+ return reportParseError("unexpected token, expected end of statement");
switch (Feature) {
default:
@@ -2520,26 +3299,56 @@ bool MipsAsmParser::parseSetFeature(uint64_t Feature) {
case Mips::FeatureMicroMips:
getTargetStreamer().emitDirectiveSetMicroMips();
break;
- case Mips::FeatureMips16:
- getTargetStreamer().emitDirectiveSetMips16();
+ case Mips::FeatureMips1:
+ selectArch("mips1");
+ getTargetStreamer().emitDirectiveSetMips1();
+ break;
+ case Mips::FeatureMips2:
+ selectArch("mips2");
+ getTargetStreamer().emitDirectiveSetMips2();
+ break;
+ case Mips::FeatureMips3:
+ selectArch("mips3");
+ getTargetStreamer().emitDirectiveSetMips3();
+ break;
+ case Mips::FeatureMips4:
+ selectArch("mips4");
+ getTargetStreamer().emitDirectiveSetMips4();
+ break;
+ case Mips::FeatureMips5:
+ selectArch("mips5");
+ getTargetStreamer().emitDirectiveSetMips5();
+ break;
+ case Mips::FeatureMips32:
+ selectArch("mips32");
+ getTargetStreamer().emitDirectiveSetMips32();
break;
case Mips::FeatureMips32r2:
- setFeatureBits(Mips::FeatureMips32r2, "mips32r2");
+ selectArch("mips32r2");
getTargetStreamer().emitDirectiveSetMips32R2();
break;
+ case Mips::FeatureMips32r6:
+ selectArch("mips32r6");
+ getTargetStreamer().emitDirectiveSetMips32R6();
+ break;
case Mips::FeatureMips64:
- setFeatureBits(Mips::FeatureMips64, "mips64");
+ selectArch("mips64");
getTargetStreamer().emitDirectiveSetMips64();
break;
case Mips::FeatureMips64r2:
- setFeatureBits(Mips::FeatureMips64r2, "mips64r2");
+ selectArch("mips64r2");
getTargetStreamer().emitDirectiveSetMips64R2();
break;
+ case Mips::FeatureMips64r6:
+ selectArch("mips64r6");
+ getTargetStreamer().emitDirectiveSetMips64R6();
+ break;
}
return false;
}
bool MipsAsmParser::eatComma(StringRef ErrorStr) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::Comma)) {
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
@@ -2550,14 +3359,17 @@ bool MipsAsmParser::eatComma(StringRef ErrorStr) {
return true;
}
-bool MipsAsmParser::parseDirectiveCPLoad(SMLoc Loc) {
- if (Options.isReorder())
- Warning(Loc, ".cpload in reorder section");
+bool MipsAsmParser::parseDirectiveCpLoad(SMLoc Loc) {
+ if (AssemblerOptions.back()->isReorder())
+ Warning(Loc, ".cpload should be inside a noreorder section");
- // FIXME: Warn if cpload is used in Mips16 mode.
+ if (inMips16Mode()) {
+ reportParseError(".cpload is not supported in Mips16 mode");
+ return false;
+ }
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Reg;
- OperandMatchResultTy ResTy = ParseAnyRegister(Reg);
+ OperandMatchResultTy ResTy = parseAnyRegister(Reg);
if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) {
reportParseError("expected register containing function address");
return false;
@@ -2569,17 +3381,24 @@ bool MipsAsmParser::parseDirectiveCPLoad(SMLoc Loc) {
return false;
}
- getTargetStreamer().emitDirectiveCpload(RegOpnd.getGPR32Reg());
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ getTargetStreamer().emitDirectiveCpLoad(RegOpnd.getGPR32Reg());
return false;
}
bool MipsAsmParser::parseDirectiveCPSetup() {
+ MCAsmParser &Parser = getParser();
unsigned FuncReg;
unsigned Save;
bool SaveIsReg = true;
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> TmpReg;
- OperandMatchResultTy ResTy = ParseAnyRegister(TmpReg);
+ OperandMatchResultTy ResTy = parseAnyRegister(TmpReg);
if (ResTy == MatchOperand_NoMatch) {
reportParseError("expected register containing function address");
Parser.eatToEndOfStatement();
@@ -2596,10 +3415,10 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
FuncReg = FuncRegOpnd.getGPR32Reg();
TmpReg.clear();
- if (!eatComma("expected comma parsing directive"))
+ if (!eatComma("unexpected token, expected comma"))
return true;
- ResTy = ParseAnyRegister(TmpReg);
+ ResTy = parseAnyRegister(TmpReg);
if (ResTy == MatchOperand_NoMatch) {
const AsmToken &Tok = Parser.getTok();
if (Tok.is(AsmToken::Integer)) {
@@ -2621,7 +3440,7 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
Save = SaveOpnd.getGPR32Reg();
}
- if (!eatComma("expected comma parsing directive"))
+ if (!eatComma("unexpected token, expected comma"))
return true;
StringRef Name;
@@ -2634,6 +3453,7 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
}
bool MipsAsmParser::parseDirectiveNaN() {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
const AsmToken &Tok = Parser.getTok();
@@ -2654,7 +3474,7 @@ bool MipsAsmParser::parseDirectiveNaN() {
}
bool MipsAsmParser::parseDirectiveSet() {
-
+ MCAsmParser &Parser = getParser();
// Get the next token.
const AsmToken &Tok = Parser.getTok();
@@ -2662,8 +3482,14 @@ bool MipsAsmParser::parseDirectiveSet() {
return parseSetNoAtDirective();
} else if (Tok.getString() == "at") {
return parseSetAtDirective();
+ } else if (Tok.getString() == "arch") {
+ return parseSetArchDirective();
} else if (Tok.getString() == "fp") {
return parseSetFpDirective();
+ } else if (Tok.getString() == "pop") {
+ return parseSetPopDirective();
+ } else if (Tok.getString() == "push") {
+ return parseSetPushDirective();
} else if (Tok.getString() == "reorder") {
return parseSetReorderDirective();
} else if (Tok.getString() == "noreorder") {
@@ -2673,7 +3499,7 @@ bool MipsAsmParser::parseDirectiveSet() {
} else if (Tok.getString() == "nomacro") {
return parseSetNoMacroDirective();
} else if (Tok.getString() == "mips16") {
- return parseSetFeature(Mips::FeatureMips16);
+ return parseSetMips16Directive();
} else if (Tok.getString() == "nomips16") {
return parseSetNoMips16Directive();
} else if (Tok.getString() == "nomicromips") {
@@ -2682,14 +3508,38 @@ bool MipsAsmParser::parseDirectiveSet() {
return false;
} else if (Tok.getString() == "micromips") {
return parseSetFeature(Mips::FeatureMicroMips);
+ } else if (Tok.getString() == "mips0") {
+ return parseSetMips0Directive();
+ } else if (Tok.getString() == "mips1") {
+ return parseSetFeature(Mips::FeatureMips1);
+ } else if (Tok.getString() == "mips2") {
+ return parseSetFeature(Mips::FeatureMips2);
+ } else if (Tok.getString() == "mips3") {
+ return parseSetFeature(Mips::FeatureMips3);
+ } else if (Tok.getString() == "mips4") {
+ return parseSetFeature(Mips::FeatureMips4);
+ } else if (Tok.getString() == "mips5") {
+ return parseSetFeature(Mips::FeatureMips5);
+ } else if (Tok.getString() == "mips32") {
+ return parseSetFeature(Mips::FeatureMips32);
} else if (Tok.getString() == "mips32r2") {
return parseSetFeature(Mips::FeatureMips32r2);
+ } else if (Tok.getString() == "mips32r6") {
+ return parseSetFeature(Mips::FeatureMips32r6);
} else if (Tok.getString() == "mips64") {
return parseSetFeature(Mips::FeatureMips64);
} else if (Tok.getString() == "mips64r2") {
return parseSetFeature(Mips::FeatureMips64r2);
+ } else if (Tok.getString() == "mips64r6") {
+ return parseSetFeature(Mips::FeatureMips64r6);
} else if (Tok.getString() == "dsp") {
return parseSetFeature(Mips::FeatureDSP);
+ } else if (Tok.getString() == "nodsp") {
+ return parseSetNoDspDirective();
+ } else if (Tok.getString() == "msa") {
+ return parseSetMsaDirective();
+ } else if (Tok.getString() == "nomsa") {
+ return parseSetNoMsaDirective();
} else {
// It is just an identifier, look for an assignment.
parseSetAssignment();
@@ -2702,6 +3552,7 @@ bool MipsAsmParser::parseDirectiveSet() {
/// parseDataDirective
/// ::= .word [ expression (, expression)* ]
bool MipsAsmParser::parseDataDirective(unsigned Size, SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
@@ -2713,9 +3564,8 @@ bool MipsAsmParser::parseDataDirective(unsigned Size, SMLoc L) {
if (getLexer().is(AsmToken::EndOfStatement))
break;
- // FIXME: Improve diagnostic.
if (getLexer().isNot(AsmToken::Comma))
- return Error(L, "unexpected token in directive");
+ return Error(L, "unexpected token, expected comma");
Parser.Lex();
}
}
@@ -2727,6 +3577,7 @@ bool MipsAsmParser::parseDataDirective(unsigned Size, SMLoc L) {
/// parseDirectiveGpWord
/// ::= .gpword local_sym
bool MipsAsmParser::parseDirectiveGpWord() {
+ MCAsmParser &Parser = getParser();
const MCExpr *Value;
// EmitGPRel32Value requires an expression, so we are using base class
// method to evaluate the expression.
@@ -2735,7 +3586,8 @@ bool MipsAsmParser::parseDirectiveGpWord() {
getParser().getStreamer().EmitGPRel32Value(Value);
if (getLexer().isNot(AsmToken::EndOfStatement))
- return Error(getLexer().getLoc(), "unexpected token in directive");
+ return Error(getLexer().getLoc(),
+ "unexpected token, expected end of statement");
Parser.Lex(); // Eat EndOfStatement token.
return false;
}
@@ -2743,6 +3595,7 @@ bool MipsAsmParser::parseDirectiveGpWord() {
/// parseDirectiveGpDWord
/// ::= .gpdword local_sym
bool MipsAsmParser::parseDirectiveGpDWord() {
+ MCAsmParser &Parser = getParser();
const MCExpr *Value;
// EmitGPRel64Value requires an expression, so we are using base class
// method to evaluate the expression.
@@ -2751,17 +3604,19 @@ bool MipsAsmParser::parseDirectiveGpDWord() {
getParser().getStreamer().EmitGPRel64Value(Value);
if (getLexer().isNot(AsmToken::EndOfStatement))
- return Error(getLexer().getLoc(), "unexpected token in directive");
+ return Error(getLexer().getLoc(),
+ "unexpected token, expected end of statement");
Parser.Lex(); // Eat EndOfStatement token.
return false;
}
bool MipsAsmParser::parseDirectiveOption() {
+ MCAsmParser &Parser = getParser();
// Get the option token.
AsmToken Tok = Parser.getTok();
// At the moment only identifiers are supported.
if (Tok.isNot(AsmToken::Identifier)) {
- Error(Parser.getTok().getLoc(), "unexpected token in .option directive");
+ Error(Parser.getTok().getLoc(), "unexpected token, expected identifier");
Parser.eatToEndOfStatement();
return false;
}
@@ -2773,7 +3628,7 @@ bool MipsAsmParser::parseDirectiveOption() {
Parser.Lex();
if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
Error(Parser.getTok().getLoc(),
- "unexpected token in .option pic0 directive");
+ "unexpected token, expected end of statement");
Parser.eatToEndOfStatement();
}
return false;
@@ -2784,14 +3639,15 @@ bool MipsAsmParser::parseDirectiveOption() {
Parser.Lex();
if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
Error(Parser.getTok().getLoc(),
- "unexpected token in .option pic2 directive");
+ "unexpected token, expected end of statement");
Parser.eatToEndOfStatement();
}
return false;
}
// Unknown option.
- Warning(Parser.getTok().getLoc(), "unknown option in .option directive");
+ Warning(Parser.getTok().getLoc(),
+ "unknown option, expected 'pic0' or 'pic2'");
Parser.eatToEndOfStatement();
return false;
}
@@ -2801,10 +3657,11 @@ bool MipsAsmParser::parseDirectiveOption() {
/// ::= .module nooddspreg
/// ::= .module fp=value
bool MipsAsmParser::parseDirectiveModule() {
+ MCAsmParser &Parser = getParser();
MCAsmLexer &Lexer = getLexer();
SMLoc L = Lexer.getLoc();
- if (!getTargetStreamer().getCanHaveModuleDir()) {
+ if (!getTargetStreamer().isModuleDirectiveAllowed()) {
// TODO : get a better message.
reportParseError(".module directive must appear before any code");
return false;
@@ -2819,7 +3676,7 @@ bool MipsAsmParser::parseDirectiveModule() {
clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("Expected end of statement");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
@@ -2834,7 +3691,7 @@ bool MipsAsmParser::parseDirectiveModule() {
setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("Expected end of statement");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
@@ -2854,10 +3711,11 @@ bool MipsAsmParser::parseDirectiveModule() {
/// ::= =xx
/// ::= =64
bool MipsAsmParser::parseDirectiveModuleFP() {
+ MCAsmParser &Parser = getParser();
MCAsmLexer &Lexer = getLexer();
if (Lexer.isNot(AsmToken::Equal)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected equals sign '='");
return false;
}
Parser.Lex(); // Eat '=' token.
@@ -2867,7 +3725,7 @@ bool MipsAsmParser::parseDirectiveModuleFP() {
return false;
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
@@ -2879,6 +3737,7 @@ bool MipsAsmParser::parseDirectiveModuleFP() {
bool MipsAsmParser::parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI,
StringRef Directive) {
+ MCAsmParser &Parser = getParser();
MCAsmLexer &Lexer = getLexer();
if (Lexer.is(AsmToken::Identifier)) {
@@ -2925,30 +3784,160 @@ bool MipsAsmParser::parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI,
}
bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
+ MCAsmParser &Parser = getParser();
StringRef IDVal = DirectiveID.getString();
if (IDVal == ".cpload")
- return parseDirectiveCPLoad(DirectiveID.getLoc());
+ return parseDirectiveCpLoad(DirectiveID.getLoc());
if (IDVal == ".dword") {
parseDataDirective(8, DirectiveID.getLoc());
return false;
}
-
if (IDVal == ".ent") {
- // Ignore this directive for now.
- Parser.Lex();
+ StringRef SymbolName;
+
+ if (Parser.parseIdentifier(SymbolName)) {
+ reportParseError("expected identifier after .ent");
+ return false;
+ }
+
+ // There's an undocumented extension that allows an integer to
+ // follow the name of the procedure which AFAICS is ignored by GAS.
+ // Example: .ent foo,2
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Comma)) {
+ // Even though we accept this undocumented extension for compatibility
+ // reasons, the additional integer argument does not actually change
+ // the behaviour of the '.ent' directive, so we would like to discourage
+ // its use. We do this by not referring to the extended version in
+ // error messages which are not directly related to its use.
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+ Parser.Lex(); // Eat the comma.
+ const MCExpr *DummyNumber;
+ int64_t DummyNumberVal;
+ // If the user was explicitly trying to use the extended version,
+ // we still give helpful extension-related error messages.
+ if (Parser.parseExpression(DummyNumber)) {
+ reportParseError("expected number after comma");
+ return false;
+ }
+ if (!DummyNumber->EvaluateAsAbsolute(DummyNumberVal)) {
+ reportParseError("expected an absolute expression after comma");
+ return false;
+ }
+ }
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName);
+
+ getTargetStreamer().emitDirectiveEnt(*Sym);
+ CurrentFn = Sym;
return false;
}
if (IDVal == ".end") {
- // Ignore this directive for now.
- Parser.Lex();
+ StringRef SymbolName;
+
+ if (Parser.parseIdentifier(SymbolName)) {
+ reportParseError("expected identifier after .end");
+ return false;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ if (CurrentFn == nullptr) {
+ reportParseError(".end used without .ent");
+ return false;
+ }
+
+ if ((SymbolName != CurrentFn->getName())) {
+ reportParseError(".end symbol does not match .ent symbol");
+ return false;
+ }
+
+ getTargetStreamer().emitDirectiveEnd(SymbolName);
+ CurrentFn = nullptr;
return false;
}
if (IDVal == ".frame") {
- // Ignore this directive for now.
- Parser.eatToEndOfStatement();
+ // .frame $stack_reg, frame_size_in_bytes, $return_reg
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> TmpReg;
+ OperandMatchResultTy ResTy = parseAnyRegister(TmpReg);
+ if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) {
+ reportParseError("expected stack register");
+ return false;
+ }
+
+ MipsOperand &StackRegOpnd = static_cast<MipsOperand &>(*TmpReg[0]);
+ if (!StackRegOpnd.isGPRAsmReg()) {
+ reportParseError(StackRegOpnd.getStartLoc(),
+ "expected general purpose register");
+ return false;
+ }
+ unsigned StackReg = StackRegOpnd.getGPR32Reg();
+
+ if (Parser.getTok().is(AsmToken::Comma))
+ Parser.Lex();
+ else {
+ reportParseError("unexpected token, expected comma");
+ return false;
+ }
+
+ // Parse the frame size.
+ const MCExpr *FrameSize;
+ int64_t FrameSizeVal;
+
+ if (Parser.parseExpression(FrameSize)) {
+ reportParseError("expected frame size value");
+ return false;
+ }
+
+ if (!FrameSize->EvaluateAsAbsolute(FrameSizeVal)) {
+ reportParseError("frame size not an absolute expression");
+ return false;
+ }
+
+ if (Parser.getTok().is(AsmToken::Comma))
+ Parser.Lex();
+ else {
+ reportParseError("unexpected token, expected comma");
+ return false;
+ }
+
+ // Parse the return register.
+ TmpReg.clear();
+ ResTy = parseAnyRegister(TmpReg);
+ if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) {
+ reportParseError("expected return register");
+ return false;
+ }
+
+ MipsOperand &ReturnRegOpnd = static_cast<MipsOperand &>(*TmpReg[0]);
+ if (!ReturnRegOpnd.isGPRAsmReg()) {
+ reportParseError(ReturnRegOpnd.getStartLoc(),
+ "expected general purpose register");
+ return false;
+ }
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ getTargetStreamer().emitFrame(StackReg, FrameSizeVal,
+ ReturnRegOpnd.getGPR32Reg());
return false;
}
@@ -2956,15 +3945,61 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveSet();
}
- if (IDVal == ".fmask") {
- // Ignore this directive for now.
- Parser.eatToEndOfStatement();
- return false;
- }
+ if (IDVal == ".mask" || IDVal == ".fmask") {
+ // .mask bitmask, frame_offset
+ // bitmask: One bit for each register used.
+ // frame_offset: Offset from Canonical Frame Address ($sp on entry) where
+ // first register is expected to be saved.
+ // Examples:
+ // .mask 0x80000000, -4
+ // .fmask 0x80000000, -4
+ //
- if (IDVal == ".mask") {
- // Ignore this directive for now.
- Parser.eatToEndOfStatement();
+ // Parse the bitmask
+ const MCExpr *BitMask;
+ int64_t BitMaskVal;
+
+ if (Parser.parseExpression(BitMask)) {
+ reportParseError("expected bitmask value");
+ return false;
+ }
+
+ if (!BitMask->EvaluateAsAbsolute(BitMaskVal)) {
+ reportParseError("bitmask not an absolute expression");
+ return false;
+ }
+
+ if (Parser.getTok().is(AsmToken::Comma))
+ Parser.Lex();
+ else {
+ reportParseError("unexpected token, expected comma");
+ return false;
+ }
+
+ // Parse the frame_offset
+ const MCExpr *FrameOffset;
+ int64_t FrameOffsetVal;
+
+ if (Parser.parseExpression(FrameOffset)) {
+ reportParseError("expected frame offset value");
+ return false;
+ }
+
+ if (!FrameOffset->EvaluateAsAbsolute(FrameOffsetVal)) {
+ reportParseError("frame offset not an absolute expression");
+ return false;
+ }
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ if (IDVal == ".mask")
+ getTargetStreamer().emitMask(BitMaskVal, FrameOffsetVal);
+ else
+ getTargetStreamer().emitFMask(BitMaskVal, FrameOffsetVal);
return false;
}
@@ -2992,7 +4027,8 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".abicalls") {
getTargetStreamer().emitDirectiveAbiCalls();
if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
- Error(Parser.getTok().getLoc(), "unexpected token in directive");
+ Error(Parser.getTok().getLoc(),
+ "unexpected token, expected end of statement");
// Clear line
Parser.eatToEndOfStatement();
}
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 04f05236ccda..36ba8e559e0b 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -3,8 +3,7 @@ set(LLVM_TARGET_DEFINITIONS Mips.td)
tablegen(LLVM MipsGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM MipsGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM MipsGenDisassemblerTables.inc -gen-disassembler)
-tablegen(LLVM MipsGenCodeEmitter.inc -gen-emitter)
-tablegen(LLVM MipsGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM MipsGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM MipsGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM MipsGenFastISel.inc -gen-fast-isel)
@@ -22,15 +21,12 @@ add_llvm_target(MipsCodeGen
Mips16ISelDAGToDAG.cpp
Mips16ISelLowering.cpp
Mips16RegisterInfo.cpp
- MipsABIInfo.cpp
MipsAnalyzeImmediate.cpp
MipsAsmPrinter.cpp
MipsCCState.cpp
- MipsCodeEmitter.cpp
MipsConstantIslandPass.cpp
MipsDelaySlotFiller.cpp
MipsFastISel.cpp
- MipsJITInfo.cpp
MipsInstrInfo.cpp
MipsISelDAGToDAG.cpp
MipsISelLowering.cpp
diff --git a/lib/Target/Mips/Disassembler/LLVMBuild.txt b/lib/Target/Mips/Disassembler/LLVMBuild.txt
index bb70fd3378bc..414b4f78480f 100644
--- a/lib/Target/Mips/Disassembler/LLVMBuild.txt
+++ b/lib/Target/Mips/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = MipsDisassembler
parent = Mips
-required_libraries = MC MipsInfo Support
+required_libraries = MCDisassembler MipsInfo Support
add_to_library_groups = Mips
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 53fb7a10ad0d..da33f3b913cd 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -20,7 +20,6 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -31,32 +30,29 @@ typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
-/// MipsDisassemblerBase - a disasembler class for Mips.
+/// A disasembler class for Mips.
class MipsDisassemblerBase : public MCDisassembler {
public:
- /// Constructor - Initializes the disassembler.
- ///
MipsDisassemblerBase(const MCSubtargetInfo &STI, MCContext &Ctx,
- bool bigEndian) :
- MCDisassembler(STI, Ctx),
- IsN64(STI.getFeatureBits() & Mips::FeatureN64), isBigEndian(bigEndian) {}
+ bool IsBigEndian)
+ : MCDisassembler(STI, Ctx),
+ IsGP64Bit(STI.getFeatureBits() & Mips::FeatureGP64Bit),
+ IsBigEndian(IsBigEndian) {}
virtual ~MipsDisassemblerBase() {}
- bool isN64() const { return IsN64; }
+ bool isGP64Bit() const { return IsGP64Bit; }
private:
- bool IsN64;
+ bool IsGP64Bit;
protected:
- bool isBigEndian;
+ bool IsBigEndian;
};
-/// MipsDisassembler - a disasembler class for Mips32.
+/// A disasembler class for Mips32.
class MipsDisassembler : public MipsDisassemblerBase {
bool IsMicroMips;
public:
- /// Constructor - Initializes the disassembler.
- ///
MipsDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, bool bigEndian)
: MipsDisassemblerBase(STI, Ctx, bigEndian) {
IsMicroMips = STI.getFeatureBits() & Mips::FeatureMicroMips;
@@ -75,32 +71,23 @@ public:
return !hasMips32() && !hasMips3();
}
- /// getInstruction - See MCDisassembler.
- DecodeStatus getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const override;
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
};
-
-/// Mips64Disassembler - a disasembler class for Mips64.
+/// A disasembler class for Mips64.
class Mips64Disassembler : public MipsDisassemblerBase {
public:
- /// Constructor - Initializes the disassembler.
- ///
Mips64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
bool bigEndian) :
MipsDisassemblerBase(STI, Ctx, bigEndian) {}
- /// getInstruction - See MCDisassembler.
- DecodeStatus getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const override;
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
};
} // end anonymous namespace
@@ -117,6 +104,16 @@ static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeGPRMM16RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeGPRMM16ZeroRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -231,6 +228,13 @@ static DecodeStatus DecodeBranchTarget26(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+// DecodeBranchTarget7MM - Decode microMIPS branch offset, which is
+// shifted left by 1 bit.
+static DecodeStatus DecodeBranchTarget7MM(MCInst &Inst,
+ unsigned Offset,
+ uint64_t Address,
+ const void *Decoder);
+
// DecodeBranchTargetMM - Decode microMIPS branch offset, which is
// shifted left by 1 bit.
static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
@@ -255,9 +259,29 @@ static DecodeStatus DecodeCacheOp(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeSyncI(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMemMMImm4(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeMemMMSPImm5Lsl2(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -285,6 +309,26 @@ static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeAddiur2Simm7(MCInst &Inst,
+ unsigned Value,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeUImm6Lsl2(MCInst &Inst,
+ unsigned Value,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeLiSimm7(MCInst &Inst,
+ unsigned Value,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeSimm4(MCInst &Inst,
+ unsigned Value,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeSimm16(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -313,6 +357,15 @@ static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeSimm18Lsl3(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSimm9SP(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeANDI16Imm(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeUImm5lsl2(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
/// INSVE_[BHWD] have an implicit operand that the generated decoder doesn't
/// handle.
template <typename InsnType>
@@ -349,6 +402,14 @@ static DecodeStatus
DecodeBlezGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
namespace llvm {
extern Target TheMipselTarget, TheMipsTarget, TheMips64Target,
TheMips64elTarget;
@@ -704,43 +765,55 @@ static DecodeStatus DecodeBlezGroupBranch(MCInst &MI, InsnType insn,
return MCDisassembler::Success;
}
- /// readInstruction - read four bytes from the MemoryObject
- /// and return 32 bit word sorted according to the given endianess
-static DecodeStatus readInstruction32(const MemoryObject &region,
- uint64_t address,
- uint64_t &size,
- uint32_t &insn,
- bool isBigEndian,
- bool IsMicroMips) {
- uint8_t Bytes[4];
+/// Read two bytes from the ArrayRef and return 16 bit halfword sorted
+/// according to the given endianess.
+static DecodeStatus readInstruction16(ArrayRef<uint8_t> Bytes, uint64_t Address,
+ uint64_t &Size, uint32_t &Insn,
+ bool IsBigEndian) {
+ // We want to read exactly 2 Bytes of data.
+ if (Bytes.size() < 2) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+ if (IsBigEndian) {
+ Insn = (Bytes[0] << 8) | Bytes[1];
+ } else {
+ Insn = (Bytes[1] << 8) | Bytes[0];
+ }
+
+ return MCDisassembler::Success;
+}
+
+/// Read four bytes from the ArrayRef and return 32 bit word sorted
+/// according to the given endianess
+static DecodeStatus readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address,
+ uint64_t &Size, uint32_t &Insn,
+ bool IsBigEndian, bool IsMicroMips) {
// We want to read exactly 4 Bytes of data.
- if (region.readBytes(address, 4, Bytes) == -1) {
- size = 0;
+ if (Bytes.size() < 4) {
+ Size = 0;
return MCDisassembler::Fail;
}
- if (isBigEndian) {
+ // High 16 bits of a 32-bit microMIPS instruction (where the opcode is)
+ // always precede the low 16 bits in the instruction stream (that is, they
+ // are placed at lower addresses in the instruction stream).
+ //
+ // microMIPS byte ordering:
+ // Big-endian: 0 | 1 | 2 | 3
+ // Little-endian: 1 | 0 | 3 | 2
+
+ if (IsBigEndian) {
// Encoded as a big-endian 32-bit word in the stream.
- insn = (Bytes[3] << 0) |
- (Bytes[2] << 8) |
- (Bytes[1] << 16) |
- (Bytes[0] << 24);
- }
- else {
- // Encoded as a small-endian 32-bit word in the stream.
- // Little-endian byte ordering:
- // mips32r2: 4 | 3 | 2 | 1
- // microMIPS: 2 | 1 | 4 | 3
+ Insn =
+ (Bytes[3] << 0) | (Bytes[2] << 8) | (Bytes[1] << 16) | (Bytes[0] << 24);
+ } else {
if (IsMicroMips) {
- insn = (Bytes[2] << 0) |
- (Bytes[3] << 8) |
- (Bytes[0] << 16) |
+ Insn = (Bytes[2] << 0) | (Bytes[3] << 8) | (Bytes[0] << 16) |
(Bytes[1] << 24);
} else {
- insn = (Bytes[0] << 0) |
- (Bytes[1] << 8) |
- (Bytes[2] << 16) |
+ Insn = (Bytes[0] << 0) | (Bytes[1] << 8) | (Bytes[2] << 16) |
(Bytes[3] << 24);
}
}
@@ -748,24 +821,33 @@ static DecodeStatus readInstruction32(const MemoryObject &region,
return MCDisassembler::Success;
}
-DecodeStatus
-MipsDisassembler::getInstruction(MCInst &instr,
- uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &vStream,
- raw_ostream &cStream) const {
+DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const {
uint32_t Insn;
-
- DecodeStatus Result = readInstruction32(Region, Address, Size,
- Insn, isBigEndian, IsMicroMips);
- if (Result == MCDisassembler::Fail)
- return MCDisassembler::Fail;
+ DecodeStatus Result;
if (IsMicroMips) {
- DEBUG(dbgs() << "Trying MicroMips32 table (32-bit opcodes):\n");
+ Result = readInstruction16(Bytes, Address, Size, Insn, IsBigEndian);
+
+ DEBUG(dbgs() << "Trying MicroMips16 table (16-bit instructions):\n");
+ // Calling the auto-generated decoder function.
+ Result = decodeInstruction(DecoderTableMicroMips16, Instr, Insn, Address,
+ this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 2;
+ return Result;
+ }
+
+ Result = readInstruction32(Bytes, Address, Size, Insn, IsBigEndian, true);
+ if (Result == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+
+ DEBUG(dbgs() << "Trying MicroMips32 table (32-bit instructions):\n");
// Calling the auto-generated decoder function.
- Result = decodeInstruction(DecoderTableMicroMips32, instr, Insn, Address,
+ Result = decodeInstruction(DecoderTableMicroMips32, Instr, Insn, Address,
this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
@@ -774,10 +856,14 @@ MipsDisassembler::getInstruction(MCInst &instr,
return MCDisassembler::Fail;
}
+ Result = readInstruction32(Bytes, Address, Size, Insn, IsBigEndian, false);
+ if (Result == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+
if (hasCOP3()) {
DEBUG(dbgs() << "Trying COP3_ table (32-bit opcodes):\n");
Result =
- decodeInstruction(DecoderTableCOP3_32, instr, Insn, Address, this, STI);
+ decodeInstruction(DecoderTableCOP3_32, Instr, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
@@ -786,7 +872,7 @@ MipsDisassembler::getInstruction(MCInst &instr,
if (hasMips32r6() && isGP64()) {
DEBUG(dbgs() << "Trying Mips32r6_64r6 (GPR64) table (32-bit opcodes):\n");
- Result = decodeInstruction(DecoderTableMips32r6_64r6_GP6432, instr, Insn,
+ Result = decodeInstruction(DecoderTableMips32r6_64r6_GP6432, Instr, Insn,
Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
@@ -796,7 +882,7 @@ MipsDisassembler::getInstruction(MCInst &instr,
if (hasMips32r6()) {
DEBUG(dbgs() << "Trying Mips32r6_64r6 table (32-bit opcodes):\n");
- Result = decodeInstruction(DecoderTableMips32r6_64r632, instr, Insn,
+ Result = decodeInstruction(DecoderTableMips32r6_64r632, Instr, Insn,
Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
@@ -806,8 +892,8 @@ MipsDisassembler::getInstruction(MCInst &instr,
DEBUG(dbgs() << "Trying Mips table (32-bit opcodes):\n");
// Calling the auto-generated decoder function.
- Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
- this, STI);
+ Result =
+ decodeInstruction(DecoderTableMips32, Instr, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
@@ -816,30 +902,28 @@ MipsDisassembler::getInstruction(MCInst &instr,
return MCDisassembler::Fail;
}
-DecodeStatus
-Mips64Disassembler::getInstruction(MCInst &instr,
- uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &vStream,
- raw_ostream &cStream) const {
+DecodeStatus Mips64Disassembler::getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const {
uint32_t Insn;
- DecodeStatus Result = readInstruction32(Region, Address, Size,
- Insn, isBigEndian, false);
+ DecodeStatus Result =
+ readInstruction32(Bytes, Address, Size, Insn, IsBigEndian, false);
if (Result == MCDisassembler::Fail)
return MCDisassembler::Fail;
// Calling the auto-generated decoder function.
- Result = decodeInstruction(DecoderTableMips6432, instr, Insn, Address,
- this, STI);
+ Result =
+ decodeInstruction(DecoderTableMips6432, Instr, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
}
// If we fail to decode in Mips64 decoder space we can try in Mips32
- Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
- this, STI);
+ Result =
+ decodeInstruction(DecoderTableMips32, Instr, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
@@ -870,6 +954,28 @@ static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeGPRMM16RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 7)
+ return MCDisassembler::Fail;
+ unsigned Reg = getReg(Decoder, Mips::GPRMM16RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeGPRMM16ZeroRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 7)
+ return MCDisassembler::Fail;
+ unsigned Reg = getReg(Decoder, Mips::GPRMM16ZeroRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -885,7 +991,7 @@ static DecodeStatus DecodePtrRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
const void *Decoder) {
- if (static_cast<const MipsDisassembler *>(Decoder)->isN64())
+ if (static_cast<const MipsDisassembler *>(Decoder)->isGP64Bit())
return DecodeGPR64RegisterClass(Inst, RegNo, Address, Decoder);
return DecodeGPR32RegisterClass(Inst, RegNo, Address, Decoder);
@@ -966,7 +1072,8 @@ static DecodeStatus DecodeMem(MCInst &Inst,
Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
- if(Inst.getOpcode() == Mips::SC){
+ if(Inst.getOpcode() == Mips::SC ||
+ Inst.getOpcode() == Mips::SCD){
Inst.addOperand(MCOperand::CreateReg(Reg));
}
@@ -994,6 +1101,38 @@ static DecodeStatus DecodeCacheOp(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<12>(Insn & 0xfff);
+ unsigned Base = fieldFromInstruction(Insn, 16, 5);
+ unsigned Hint = fieldFromInstruction(Insn, 21, 5);
+
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+ Inst.addOperand(MCOperand::CreateImm(Hint));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSyncI(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<16>(Insn & 0xffff);
+ unsigned Base = fieldFromInstruction(Insn, 21, 5);
+
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 16, 10));
@@ -1040,6 +1179,74 @@ static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeMemMMImm4(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Offset = Insn & 0xf;
+ unsigned Reg = fieldFromInstruction(Insn, 7, 3);
+ unsigned Base = fieldFromInstruction(Insn, 4, 3);
+
+ switch (Inst.getOpcode()) {
+ case Mips::LBU16_MM:
+ case Mips::LHU16_MM:
+ case Mips::LW16_MM:
+ if (DecodeGPRMM16RegisterClass(Inst, Reg, Address, Decoder)
+ == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+ break;
+ case Mips::SB16_MM:
+ case Mips::SH16_MM:
+ case Mips::SW16_MM:
+ if (DecodeGPRMM16ZeroRegisterClass(Inst, Reg, Address, Decoder)
+ == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+ break;
+ }
+
+ if (DecodeGPRMM16RegisterClass(Inst, Base, Address, Decoder)
+ == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+
+ switch (Inst.getOpcode()) {
+ case Mips::LBU16_MM:
+ if (Offset == 0xf)
+ Inst.addOperand(MCOperand::CreateImm(-1));
+ else
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+ break;
+ case Mips::SB16_MM:
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+ break;
+ case Mips::LHU16_MM:
+ case Mips::SH16_MM:
+ Inst.addOperand(MCOperand::CreateImm(Offset << 1));
+ break;
+ case Mips::LW16_MM:
+ case Mips::SW16_MM:
+ Inst.addOperand(MCOperand::CreateImm(Offset << 2));
+ break;
+ }
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemMMSPImm5Lsl2(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Offset = Insn & 0x1F;
+ unsigned Reg = fieldFromInstruction(Insn, 5, 5);
+
+ Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
+
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ Inst.addOperand(MCOperand::CreateReg(Mips::SP));
+ Inst.addOperand(MCOperand::CreateImm(Offset << 2));
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -1051,12 +1258,26 @@ static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
- if (Inst.getOpcode() == Mips::SC_MM)
+ switch (Inst.getOpcode()) {
+ case Mips::SWM32_MM:
+ case Mips::LWM32_MM:
+ if (DecodeRegListOperand(Inst, Insn, Address, Decoder)
+ == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+ break;
+ case Mips::SC_MM:
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ // fallthrough
+ default:
Inst.addOperand(MCOperand::CreateReg(Reg));
+ if (Inst.getOpcode() == Mips::LWP_MM || Inst.getOpcode() == Mips::SWP_MM)
+ Inst.addOperand(MCOperand::CreateReg(Reg+1));
- Inst.addOperand(MCOperand::CreateReg(Reg));
- Inst.addOperand(MCOperand::CreateReg(Base));
- Inst.addOperand(MCOperand::CreateImm(Offset));
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+ }
return MCDisassembler::Success;
}
@@ -1326,6 +1547,15 @@ static DecodeStatus DecodeBranchTarget26(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeBranchTarget7MM(MCInst &Inst,
+ unsigned Offset,
+ uint64_t Address,
+ const void *Decoder) {
+ int32_t BranchOffset = SignExtend32<7>(Offset) << 1;
+ Inst.addOperand(MCOperand::CreateImm(BranchOffset));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
unsigned Offset,
uint64_t Address,
@@ -1344,6 +1574,46 @@ static DecodeStatus DecodeJumpTargetMM(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeAddiur2Simm7(MCInst &Inst,
+ unsigned Value,
+ uint64_t Address,
+ const void *Decoder) {
+ if (Value == 0)
+ Inst.addOperand(MCOperand::CreateImm(1));
+ else if (Value == 0x7)
+ Inst.addOperand(MCOperand::CreateImm(-1));
+ else
+ Inst.addOperand(MCOperand::CreateImm(Value << 2));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeUImm6Lsl2(MCInst &Inst,
+ unsigned Value,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(Value << 2));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeLiSimm7(MCInst &Inst,
+ unsigned Value,
+ uint64_t Address,
+ const void *Decoder) {
+ if (Value == 0x7F)
+ Inst.addOperand(MCOperand::CreateImm(-1));
+ else
+ Inst.addOperand(MCOperand::CreateImm(Value));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSimm4(MCInst &Inst,
+ unsigned Value,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<4>(Value)));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeSimm16(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -1392,3 +1662,76 @@ static DecodeStatus DecodeSimm18Lsl3(MCInst &Inst, unsigned Insn,
Inst.addOperand(MCOperand::CreateImm(SignExtend32<18>(Insn) * 8));
return MCDisassembler::Success;
}
+
+static DecodeStatus DecodeSimm9SP(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ int32_t DecodedValue;
+ switch (Insn) {
+ case 0: DecodedValue = 256; break;
+ case 1: DecodedValue = 257; break;
+ case 510: DecodedValue = -258; break;
+ case 511: DecodedValue = -257; break;
+ default: DecodedValue = SignExtend32<9>(Insn); break;
+ }
+ Inst.addOperand(MCOperand::CreateImm(DecodedValue * 4));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeANDI16Imm(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ // Insn must be >= 0, since it is unsigned that condition is always true.
+ assert(Insn < 16);
+ int32_t DecodedValues[] = {128, 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64,
+ 255, 32768, 65535};
+ Inst.addOperand(MCOperand::CreateImm(DecodedValues[Insn]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeUImm5lsl2(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(Insn << 2));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRegListOperand(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Regs[] = {Mips::S0, Mips::S1, Mips::S2, Mips::S3, Mips::S4, Mips::S5,
+ Mips::S6, Mips::FP};
+ unsigned RegNum;
+
+ unsigned RegLst = fieldFromInstruction(Insn, 21, 5);
+ // Empty register lists are not allowed.
+ if (RegLst == 0)
+ return MCDisassembler::Fail;
+
+ RegNum = RegLst & 0xf;
+ for (unsigned i = 0; i < RegNum; i++)
+ Inst.addOperand(MCOperand::CreateReg(Regs[i]));
+
+ if (RegLst & 0x10)
+ Inst.addOperand(MCOperand::CreateReg(Mips::RA));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Regs[] = {Mips::S0, Mips::S1, Mips::S2, Mips::S3};
+ unsigned RegNum;
+
+ unsigned RegLst = fieldFromInstruction(Insn, 4, 2);
+ // Empty register lists are not allowed.
+ if (RegLst == 0)
+ return MCDisassembler::Fail;
+
+ RegNum = RegLst & 0x3;
+ for (unsigned i = 0; i < RegNum - 1; i++)
+ Inst.addOperand(MCOperand::CreateReg(Regs[i]));
+
+ Inst.addOperand(MCOperand::CreateReg(Mips::RA));
+
+ return MCDisassembler::Success;
+}
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index 8c797517e316..61743ff76205 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -134,8 +134,8 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
} else if (const MipsMCExpr *ME = dyn_cast<MipsMCExpr>(Expr)) {
ME->print(OS);
return;
- } else if (!(SRE = dyn_cast<MCSymbolRefExpr>(Expr)))
- assert(false && "Unexpected MCExpr type.");
+ } else
+ SRE = cast<MCSymbolRefExpr>(Expr);
MCSymbolRefExpr::VariantKind Kind = SRE->getKind();
@@ -225,6 +225,20 @@ printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
// Load/Store memory operands -- imm($reg)
// If PIC target the target is loaded as the
// pattern lw $25,%call16($28)
+
+ // opNum can be invalid if instruction had reglist as operand.
+ // MemOperand is always last operand of instruction (base + offset).
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case Mips::SWM32_MM:
+ case Mips::LWM32_MM:
+ case Mips::SWM16_MM:
+ case Mips::LWM16_MM:
+ opNum = MI->getNumOperands() - 2;
+ break;
+ }
+
printOperand(MI, opNum+1, O);
O << "(";
printOperand(MI, opNum, O);
@@ -248,6 +262,11 @@ printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) {
}
void MipsInstPrinter::
+printRegisterPair(const MCInst *MI, int opNum, raw_ostream &O) {
+ printRegName(O, MI->getOperand(opNum).getReg());
+}
+
+void MipsInstPrinter::
printSHFMask(const MCInst *MI, int opNum, raw_ostream &O) {
llvm_unreachable("TODO");
}
@@ -324,3 +343,13 @@ void MipsInstPrinter::printSaveRestore(const MCInst *MI, raw_ostream &O) {
}
}
+void MipsInstPrinter::
+printRegisterList(const MCInst *MI, int opNum, raw_ostream &O) {
+ // - 2 because register List is always first operand of instruction and it is
+ // always followed by memory operand (base + offset).
+ for (int i = opNum, e = MI->getNumOperands() - 2; i != e; ++i) {
+ if (i != opNum)
+ O << ", ";
+ printRegName(O, MI->getOperand(i).getReg());
+ }
+}
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 550a0f10d1ba..468dc07818e7 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSINSTPRINTER_H
-#define MIPSINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_MIPS_INSTPRINTER_MIPSINSTPRINTER_H
+#define LLVM_LIB_TARGET_MIPS_INSTPRINTER_MIPSINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
@@ -99,6 +99,7 @@ private:
void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O);
void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O);
+ void printRegisterPair(const MCInst *MI, int opNum, raw_ostream &O);
void printSHFMask(const MCInst *MI, int opNum, raw_ostream &O);
bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo,
@@ -107,6 +108,7 @@ private:
unsigned OpNo1, raw_ostream &OS);
bool printAlias(const MCInst &MI, raw_ostream &OS);
void printSaveRestore(const MCInst *MI, raw_ostream &O);
+ void printRegisterList(const MCInst *MI, int opNum, raw_ostream &O);
};
} // end namespace llvm
diff --git a/lib/Target/Mips/LLVMBuild.txt b/lib/Target/Mips/LLVMBuild.txt
index e6d3a426b2ea..0e8d902c56d2 100644
--- a/lib/Target/Mips/LLVMBuild.txt
+++ b/lib/Target/Mips/LLVMBuild.txt
@@ -31,5 +31,5 @@ has_jit = 1
type = Library
name = MipsCodeGen
parent = Mips
-required_libraries = Analysis AsmPrinter CodeGen Core MC MipsAsmPrinter MipsDesc MipsInfo Scalar SelectionDAG Support Target
+required_libraries = Analysis AsmPrinter CodeGen Core MC MipsAsmPrinter MipsDesc MipsInfo SelectionDAG Support Target
add_to_library_groups = Mips
diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
index 6b3788ca515c..c63af7c710cc 100644
--- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -1,4 +1,5 @@
add_llvm_library(LLVMMipsDesc
+ MipsABIInfo.cpp
MipsABIFlagsSection.cpp
MipsAsmBackend.cpp
MipsELFObjectWriter.cpp
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
index ea5bc12b0740..8bcfb0fab9ee 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSABIFLAGSSECTION_H
-#define MIPSABIFLAGSSECTION_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSABIFLAGSSECTION_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSABIFLAGSSECTION_H
#include "llvm/MC/MCStreamer.h"
diff --git a/lib/Target/Mips/MipsABIInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index f885369845e7..f885369845e7 100644
--- a/lib/Target/Mips/MipsABIInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
diff --git a/lib/Target/Mips/MipsABIInfo.h b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
index bea585e41bd4..eccb50d86240 100644
--- a/lib/Target/Mips/MipsABIInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
@@ -11,8 +11,8 @@
#define MIPSABIINFO_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/MC/MCRegisterInfo.h"
namespace llvm {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index d8e6128cd54d..6670dc208552 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -103,6 +103,14 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case Mips::fixup_MICROMIPS_26_S1:
Value >>= 1;
break;
+ case Mips::fixup_MICROMIPS_PC7_S1:
+ Value -= 4;
+ // Forcing a signed division because Value can be negative.
+ Value = (int64_t) Value / 2;
+ // We now check if Value can be encoded as a 7-bit signed immediate.
+ if (!isIntN(7, Value) && Ctx)
+ Ctx->FatalError(Fixup.getLoc(), "out of range PC7 fixup");
+ break;
case Mips::fixup_MICROMIPS_PC16_S1:
Value -= 4;
// Forcing a signed division because Value can be negative.
@@ -271,6 +279,7 @@ getFixupKindInfo(MCFixupKind Kind) const {
{ "fixup_MICROMIPS_HI16", 0, 16, 0 },
{ "fixup_MICROMIPS_LO16", 0, 16, 0 },
{ "fixup_MICROMIPS_GOT16", 0, 16, 0 },
+ { "fixup_MICROMIPS_PC7_S1", 0, 7, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_MICROMIPS_PC16_S1", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_MICROMIPS_CALL16", 0, 16, 0 },
{ "fixup_MICROMIPS_GOT_DISP", 0, 16, 0 },
@@ -334,6 +343,7 @@ getFixupKindInfo(MCFixupKind Kind) const {
{ "fixup_MICROMIPS_HI16", 16, 16, 0 },
{ "fixup_MICROMIPS_LO16", 16, 16, 0 },
{ "fixup_MICROMIPS_GOT16", 16, 16, 0 },
+ { "fixup_MICROMIPS_PC7_S1", 9, 7, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_MICROMIPS_PC16_S1",16, 16, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_MICROMIPS_CALL16", 16, 16, 0 },
{ "fixup_MICROMIPS_GOT_DISP", 16, 16, 0 },
@@ -367,7 +377,12 @@ bool MipsAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
// Check for a less than instruction size number of bytes
// FIXME: 16 bit instructions are not handled yet here.
// We shouldn't be using a hard coded number for instruction size.
- if (Count % 4) return false;
+
+ // If the count is not 4-byte aligned, we must be writing data into the text
+ // section (otherwise we have unaligned instructions, and thus have far
+ // bigger problems), so just write zeros instead.
+ for (uint64_t i = 0, e = Count % 4; i != e; ++i)
+ OW->Write8(0);
uint64_t NumNops = Count / 4;
for (uint64_t i = 0; i != NumNops; ++i)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index d5c3dbc47880..dd0e54c79cf1 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -12,12 +12,12 @@
//===----------------------------------------------------------------------===//
//
-#ifndef MIPSASMBACKEND_H
-#define MIPSASMBACKEND_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSASMBACKEND_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSASMBACKEND_H
#include "MCTargetDesc/MipsFixupKinds.h"
-#include "llvm/MC/MCAsmBackend.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCAsmBackend.h"
namespace llvm {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index d2323dc1b47b..ff7779ec1e78 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -11,8 +11,8 @@
// the Mips target useful for the compiler back-end and the MC libraries.
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSBASEINFO_H
-#define MIPSBASEINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSBASEINFO_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSBASEINFO_H
#include "MipsFixupKinds.h"
#include "MipsMCTargetDesc.h"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 4ea7846f83f6..56aac4eaea49 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -11,6 +11,7 @@
#include "MCTargetDesc/MipsFixupKinds.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSection.h"
@@ -161,6 +162,9 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
case Mips::fixup_MICROMIPS_GOT16:
Type = ELF::R_MICROMIPS_GOT16;
break;
+ case Mips::fixup_MICROMIPS_PC7_S1:
+ Type = ELF::R_MICROMIPS_PC7_S1;
+ break;
case Mips::fixup_MICROMIPS_PC16_S1:
Type = ELF::R_MICROMIPS_PC16_S1;
break;
@@ -219,7 +223,7 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
bool
MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
unsigned Type) const {
- // FIXME: This is extremelly conservative. This really needs to use a
+ // FIXME: This is extremely conservative. This really needs to use a
// whitelist with a clear explanation for why each realocation needs to
// point to the symbol, not to the section.
switch (Type) {
@@ -244,8 +248,11 @@ MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
case ELF::R_MICROMIPS_LO16:
return true;
- case ELF::R_MIPS_26:
case ELF::R_MIPS_32:
+ if (MCELF::getOther(SD) & (ELF::STO_MIPS_MICROMIPS >> 2))
+ return true;
+ // falltrough
+ case ELF::R_MIPS_26:
case ELF::R_MIPS_64:
case ELF::R_MIPS_GPREL16:
return false;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index 803ab85657dc..18c4a206059d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -8,7 +8,12 @@
//===----------------------------------------------------------------------===//
#include "MipsELFStreamer.h"
+#include "MipsTargetStreamer.h"
+#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
const MCSubtargetInfo &STI) {
@@ -16,6 +21,8 @@ void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
MCContext &Context = getContext();
const MCRegisterInfo *MCRegInfo = Context.getRegisterInfo();
+ MipsTargetELFStreamer *ELFTargetStreamer =
+ static_cast<MipsTargetELFStreamer *>(getTargetStreamer());
for (unsigned OpIndex = 0; OpIndex < Inst.getNumOperands(); ++OpIndex) {
const MCOperand &Op = Inst.getOperand(OpIndex);
@@ -26,6 +33,35 @@ void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
unsigned Reg = Op.getReg();
RegInfoRecord->SetPhysRegUsed(Reg, MCRegInfo);
}
+
+ if (ELFTargetStreamer->isMicroMipsEnabled()) {
+ for (auto Label : Labels) {
+ MCSymbolData &Data = getOrCreateSymbolData(Label);
+ // The "other" values are stored in the last 6 bits of the second byte.
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ MCELF::setOther(Data, ELF::STO_MIPS_MICROMIPS >> 2);
+ }
+ }
+
+ Labels.clear();
+}
+
+void MipsELFStreamer::EmitLabel(MCSymbol *Symbol) {
+ MCELFStreamer::EmitLabel(Symbol);
+ Labels.push_back(Symbol);
+}
+
+void MipsELFStreamer::SwitchSection(const MCSection * Section,
+ const MCExpr *Subsection) {
+ MCELFStreamer::SwitchSection(Section, Subsection);
+ Labels.clear();
+}
+
+void MipsELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+ const SMLoc &Loc) {
+ MCELFStreamer::EmitValueImpl(Value, Size, Loc);
+ Labels.clear();
}
void MipsELFStreamer::EmitMipsOptionRecords() {
@@ -36,8 +72,8 @@ void MipsELFStreamer::EmitMipsOptionRecords() {
namespace llvm {
MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll,
- bool NoExecStack) {
+ const MCSubtargetInfo &STI,
+ bool RelaxAll) {
return new MipsELFStreamer(Context, MAB, OS, Emitter, STI);
}
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index 58863be9cc23..136146b9474c 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSELFSTREAMER_H
-#define MIPSELFSTREAMER_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSELFSTREAMER_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSELFSTREAMER_H
#include "MipsOptionRecord.h"
#include "llvm/ADT/SmallVector.h"
@@ -29,6 +29,8 @@ class MCSubtargetInfo;
class MipsELFStreamer : public MCELFStreamer {
SmallVector<std::unique_ptr<MipsOptionRecord>, 8> MipsOptionRecords;
MipsRegInfoRecord *RegInfoRecord;
+ SmallVector<MCSymbol*, 4> Labels;
+
public:
MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS,
@@ -46,13 +48,27 @@ public:
/// usage for the translation unit.
void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+ /// Overriding this function allows us to record all labels that should be
+ /// marked as microMIPS. Based on this data marking is done in
+ /// EmitInstruction.
+ void EmitLabel(MCSymbol *Symbol) override;
+
+ /// Overriding this function allows us to dismiss all labels that are
+ /// candidates for marking as microMIPS when .section directive is processed.
+ void SwitchSection(const MCSection *Section,
+ const MCExpr *Subsection = nullptr) override;
+
+ /// Overriding this function allows us to dismiss all labels that are
+ /// candidates for marking as microMIPS when .word directive is emitted.
+ void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ const SMLoc &Loc) override;
+
/// Emits all the option records stored up until the point it's called.
void EmitMipsOptionRecords();
};
MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll,
- bool NoExecStack);
+ const MCSubtargetInfo &STI, bool RelaxAll);
} // namespace llvm.
#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index 05080f046f89..71c11d76b316 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_MIPS_MIPSFIXUPKINDS_H
-#define LLVM_MIPS_MIPSFIXUPKINDS_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSFIXUPKINDS_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSFIXUPKINDS_H
#include "llvm/MC/MCFixup.h"
@@ -158,6 +158,9 @@ namespace Mips {
// resulting in - R_MICROMIPS_GOT16
fixup_MICROMIPS_GOT16,
+ // resulting in - R_MICROMIPS_PC7_S1
+ fixup_MICROMIPS_PC7_S1,
+
// resulting in - R_MICROMIPS_PC16_S1
fixup_MICROMIPS_PC16_S1,
@@ -199,4 +202,4 @@ namespace Mips {
} // namespace llvm
-#endif // LLVM_MIPS_MIPSFIXUPKINDS_H
+#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index e415412ab6cb..e2bd5a815ab1 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -34,6 +34,7 @@ MipsMCAsmInfo::MipsMCAsmInfo(StringRef TT) {
Data32bitsDirective = "\t.4byte\t";
Data64bitsDirective = "\t.8byte\t";
PrivateGlobalPrefix = "$";
+ PrivateLabelPrefix = "$";
CommentString = "#";
ZeroDirective = "\t.space\t";
GPRel32Directive = "\t.gpword\t";
@@ -41,6 +42,5 @@ MipsMCAsmInfo::MipsMCAsmInfo(StringRef TT) {
UseAssignmentForEHBegin = true;
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
- HasLEB128 = true;
DwarfRegNumForCFI = true;
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
index 37ba0c4aaa7b..59ff1c41ed6e 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSTARGETASMINFO_H
-#define MIPSTARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCASMINFO_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCASMINFO_H
#include "llvm/MC/MCAsmInfoELF.h"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 43fc52136ddc..a54a2eb6b452 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -20,9 +20,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/raw_ostream.h"
@@ -173,7 +173,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// Unfortunately in MIPS both NOP and SLL will come in with Binary == 0
// so we have to special check for them.
unsigned Opcode = TmpInst.getOpcode();
- if ((Opcode != Mips::NOP) && (Opcode != Mips::SLL) && !Binary)
+ if ((Opcode != Mips::NOP) && (Opcode != Mips::SLL) &&
+ (Opcode != Mips::SLL_MM) && !Binary)
llvm_unreachable("unimplemented opcode in EncodeInstruction()");
if (STI.getFeatureBits() & Mips::FeatureMicroMips) {
@@ -219,6 +220,28 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
return 0;
}
+/// getBranchTarget7OpValueMM - Return binary encoding of the microMIPS branch
+/// target operand. If the machine operand requires relocation,
+/// record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getBranchTarget7OpValueMM(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+
+ // If the destination is an immediate, divide by 2.
+ if (MO.isImm()) return MO.getImm() >> 1;
+
+ assert(MO.isExpr() &&
+ "getBranchTargetOpValueMM expects only expressions or immediates");
+
+ const MCExpr *Expr = MO.getExpr();
+ Fixups.push_back(MCFixup::Create(0, Expr,
+ MCFixupKind(Mips::fixup_MICROMIPS_PC7_S1)));
+ return 0;
+}
+
/// getBranchTargetOpValue - Return binary encoding of the microMIPS branch
/// target operand. If the machine operand requires relocation,
/// record the relocation and return zero.
@@ -345,6 +368,67 @@ getJumpTargetOpValueMM(const MCInst &MI, unsigned OpNo,
}
unsigned MipsMCCodeEmitter::
+getUImm5Lsl2Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm()) {
+ // The immediate is encoded as 'immediate << 2'.
+ unsigned Res = getMachineOpValue(MI, MO, Fixups, STI);
+ assert((Res & 3) == 0);
+ return Res >> 2;
+ }
+
+ assert(MO.isExpr() &&
+ "getUImm5Lsl2Encoding expects only expressions or an immediate");
+
+ return 0;
+}
+
+unsigned MipsMCCodeEmitter::
+getSImm3Lsa2Value(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm()) {
+ int Value = MO.getImm();
+ return Value >> 2;
+ }
+
+ return 0;
+}
+
+unsigned MipsMCCodeEmitter::
+getUImm6Lsl2Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm()) {
+ unsigned Value = MO.getImm();
+ return Value >> 2;
+ }
+
+ return 0;
+}
+
+unsigned MipsMCCodeEmitter::
+getSImm9AddiuspValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm()) {
+ unsigned Binary = (MO.getImm() >> 2) & 0x0000ffff;
+ return (((Binary & 0x8000) >> 7) | (Binary & 0x00ff));
+ }
+
+ return 0;
+}
+
+unsigned MipsMCCodeEmitter::
getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
int64_t Res;
@@ -574,9 +658,76 @@ MipsMCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo,
}
unsigned MipsMCCodeEmitter::
+getMemEncodingMMImm4(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Base register is encoded in bits 6-4, offset is encoded in bits 3-0.
+ assert(MI.getOperand(OpNo).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),
+ Fixups, STI) << 4;
+ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1),
+ Fixups, STI);
+
+ return (OffBits & 0xF) | RegBits;
+}
+
+unsigned MipsMCCodeEmitter::
+getMemEncodingMMImm4Lsl1(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Base register is encoded in bits 6-4, offset is encoded in bits 3-0.
+ assert(MI.getOperand(OpNo).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),
+ Fixups, STI) << 4;
+ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1),
+ Fixups, STI) >> 1;
+
+ return (OffBits & 0xF) | RegBits;
+}
+
+unsigned MipsMCCodeEmitter::
+getMemEncodingMMImm4Lsl2(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Base register is encoded in bits 6-4, offset is encoded in bits 3-0.
+ assert(MI.getOperand(OpNo).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),
+ Fixups, STI) << 4;
+ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1),
+ Fixups, STI) >> 2;
+
+ return (OffBits & 0xF) | RegBits;
+}
+
+unsigned MipsMCCodeEmitter::
+getMemEncodingMMSPImm5Lsl2(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Register is encoded in bits 9-5, offset is encoded in bits 4-0.
+ assert(MI.getOperand(OpNo).isReg() &&
+ MI.getOperand(OpNo).getReg() == Mips::SP &&
+ "Unexpected base register!");
+ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1),
+ Fixups, STI) >> 2;
+
+ return OffBits & 0x1F;
+}
+
+unsigned MipsMCCodeEmitter::
getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
+ // opNum can be invalid if instruction had reglist as operand.
+ // MemOperand is always last operand of instruction (base + offset).
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case Mips::SWM32_MM:
+ case Mips::LWM32_MM:
+ OpNo = MI.getNumOperands() - 2;
+ break;
+ }
+
// Base register is encoded in bits 20-16, offset is encoded in bits 11-0.
assert(MI.getOperand(OpNo).isReg());
unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI) << 16;
@@ -585,6 +736,30 @@ getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
return (OffBits & 0x0FFF) | RegBits;
}
+unsigned MipsMCCodeEmitter::
+getMemEncodingMMImm4sp(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // opNum can be invalid if instruction had reglist as operand
+ // MemOperand is always last operand of instruction (base + offset)
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case Mips::SWM16_MM:
+ case Mips::LWM16_MM:
+ OpNo = MI.getNumOperands() - 2;
+ break;
+ }
+
+ // Offset is encoded in bits 4-0.
+ assert(MI.getOperand(OpNo).isReg());
+ // Base register is always SP - thus it is not encoded.
+ assert(MI.getOperand(OpNo+1).isImm());
+ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI);
+
+ return ((OffBits >> 2) & 0x0F);
+}
+
unsigned
MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
@@ -659,4 +834,75 @@ MipsMCCodeEmitter::getSimm18Lsl3Encoding(const MCInst &MI, unsigned OpNo,
return 0;
}
+unsigned
+MipsMCCodeEmitter::getUImm3Mod8Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ assert(MI.getOperand(OpNo).isImm());
+ const MCOperand &MO = MI.getOperand(OpNo);
+ return MO.getImm() % 8;
+}
+
+unsigned
+MipsMCCodeEmitter::getUImm4AndValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ assert(MI.getOperand(OpNo).isImm());
+ const MCOperand &MO = MI.getOperand(OpNo);
+ unsigned Value = MO.getImm();
+ switch (Value) {
+ case 128: return 0x0;
+ case 1: return 0x1;
+ case 2: return 0x2;
+ case 3: return 0x3;
+ case 4: return 0x4;
+ case 7: return 0x5;
+ case 8: return 0x6;
+ case 15: return 0x7;
+ case 16: return 0x8;
+ case 31: return 0x9;
+ case 32: return 0xa;
+ case 63: return 0xb;
+ case 64: return 0xc;
+ case 255: return 0xd;
+ case 32768: return 0xe;
+ case 65535: return 0xf;
+ }
+ llvm_unreachable("Unexpected value");
+}
+
+unsigned
+MipsMCCodeEmitter::getRegisterListOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ unsigned res = 0;
+
+ // Register list operand is always first operand of instruction and it is
+ // placed before memory operand (register + imm).
+
+ for (unsigned I = OpNo, E = MI.getNumOperands() - 2; I < E; ++I) {
+ unsigned Reg = MI.getOperand(I).getReg();
+ unsigned RegNo = Ctx.getRegisterInfo()->getEncodingValue(Reg);
+ if (RegNo != 31)
+ res++;
+ else
+ res |= 0x10;
+ }
+ return res;
+}
+
+unsigned
+MipsMCCodeEmitter::getRegisterListOpValue16(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return (MI.getNumOperands() - 4);
+}
+
+unsigned
+MipsMCCodeEmitter::getRegisterPairOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI);
+}
+
#include "MipsGenMCCodeEmitter.inc"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index 304167fd03db..0f0f49ddb978 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
//
-#ifndef MIPS_MC_CODE_EMITTER_H
-#define MIPS_MC_CODE_EMITTER_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCCODEEMITTER_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCCODEEMITTER_H
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/Support/DataTypes.h"
@@ -60,7 +60,7 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- // getBranchJumpOpValue - Return binary encoding of the jump
+ // getJumpTargetOpValue - Return binary encoding of the jump
// target operand. If the machine operand requires relocation,
// record the relocation and return zero.
unsigned getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
@@ -74,6 +74,26 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ // getUImm5Lsl2Encoding - Return binary encoding of the microMIPS jump
+ // target operand.
+ unsigned getUImm5Lsl2Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getSImm3Lsa2Value(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getUImm6Lsl2Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ // getSImm9AddiuspValue - Return binary encoding of the microMIPS addiusp
+ // instruction immediate operand.
+ unsigned getSImm9AddiuspValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
// getBranchTargetOpValue - Return binary encoding of the branch
// target operand. If the machine operand requires relocation,
// record the relocation and return zero.
@@ -81,6 +101,13 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ // getBranchTarget7OpValue - Return binary encoding of the microMIPS branch
+ // target operand. If the machine operand requires relocation,
+ // record the relocation and return zero.
+ unsigned getBranchTarget7OpValueMM(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
// getBranchTargetOpValue - Return binary encoding of the microMIPS branch
// target operand. If the machine operand requires relocation,
// record the relocation and return zero.
@@ -122,9 +149,24 @@ public:
unsigned getMemEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getMemEncodingMMImm4(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getMemEncodingMMImm4Lsl1(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getMemEncodingMMImm4Lsl2(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getMemEncodingMMSPImm5Lsl2(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getMemEncodingMMImm4sp(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
@@ -145,9 +187,27 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getUImm3Mod8Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getUImm4AndValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getRegisterPairOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
unsigned getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getRegisterListOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getRegisterListOpValue16(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
}; // class MipsMCCodeEmitter
} // namespace llvm.
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index 5bba3e5b7ae2..74490f334b37 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -80,8 +80,9 @@ void MipsMCExpr::PrintImpl(raw_ostream &OS) const {
bool
MipsMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const {
- return getSubExpr()->EvaluateAsRelocatable(Res, Layout);
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const {
+ return getSubExpr()->EvaluateAsRelocatable(Res, Layout, Fixup);
}
void MipsMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
index f193dc9b9d50..2b8f0c89a2e6 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSMCEXPR_H
-#define MIPSMCEXPR_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCEXPR_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCEXPR_H
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCExpr.h"
@@ -48,7 +48,8 @@ public:
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const override;
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
index 01d5363812ed..e756b4703efa 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSMCNACL_H
-#define MIPSMCNACL_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCNACL_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCNACL_H
#include "llvm/MC/MCELFStreamer.h"
@@ -26,8 +26,7 @@ MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS,
MCCodeEmitter *Emitter,
const MCSubtargetInfo &STI,
- bool RelaxAll, bool NoExecStack);
-
+ bool RelaxAll);
}
#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index d2b929bea334..bab425469433 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -109,15 +109,12 @@ static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Context, MCAsmBackend &MAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI,
- bool RelaxAll, bool NoExecStack) {
+ const MCSubtargetInfo &STI, bool RelaxAll) {
MCStreamer *S;
if (!Triple(TT).isOSNaCl())
- S = createMipsELFStreamer(Context, MAB, OS, Emitter, STI, RelaxAll,
- NoExecStack);
+ S = createMipsELFStreamer(Context, MAB, OS, Emitter, STI, RelaxAll);
else
- S = createMipsNaClELFStreamer(Context, MAB, OS, Emitter, STI, RelaxAll,
- NoExecStack);
+ S = createMipsNaClELFStreamer(Context, MAB, OS, Emitter, STI, RelaxAll);
new MipsTargetELFStreamer(*S, STI);
return S;
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 161d1eae82b9..f08a8f46fe9c 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSMCTARGETDESC_H
-#define MIPSMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCTARGETDESC_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCTARGETDESC_H
#include "llvm/Support/DataTypes.h"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index 6cde8f9ae3e4..92b84551a68f 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -255,13 +255,11 @@ MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS,
MCCodeEmitter *Emitter,
const MCSubtargetInfo &STI,
- bool RelaxAll, bool NoExecStack) {
+ bool RelaxAll) {
MipsNaClELFStreamer *S = new MipsNaClELFStreamer(Context, TAB, OS, Emitter,
STI);
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
- if (NoExecStack)
- S->getAssembler().setNoExecStack(true);
// Set bundle-alignment as required by the NaCl ABI for the target.
S->EmitBundleAlignMode(MIPS_NACL_BUNDLE_ALIGN);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 4a178e2df7a9..1e092f219be9 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -29,17 +29,21 @@
using namespace llvm;
MipsTargetStreamer::MipsTargetStreamer(MCStreamer &S)
- : MCTargetStreamer(S), canHaveModuleDirective(true) {}
+ : MCTargetStreamer(S), ModuleDirectiveAllowed(true) {
+ GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
+}
void MipsTargetStreamer::emitDirectiveSetMicroMips() {}
void MipsTargetStreamer::emitDirectiveSetNoMicroMips() {}
void MipsTargetStreamer::emitDirectiveSetMips16() {}
-void MipsTargetStreamer::emitDirectiveSetNoMips16() {}
-void MipsTargetStreamer::emitDirectiveSetReorder() {}
+void MipsTargetStreamer::emitDirectiveSetNoMips16() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetReorder() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetNoReorder() {}
-void MipsTargetStreamer::emitDirectiveSetMacro() {}
-void MipsTargetStreamer::emitDirectiveSetNoMacro() {}
-void MipsTargetStreamer::emitDirectiveSetAt() {}
-void MipsTargetStreamer::emitDirectiveSetNoAt() {}
+void MipsTargetStreamer::emitDirectiveSetMacro() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetNoMacro() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMsa() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetNoMsa() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetAt() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetNoAt() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveEnd(StringRef Name) {}
void MipsTargetStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {}
void MipsTargetStreamer::emitDirectiveAbiCalls() {}
@@ -52,11 +56,26 @@ void MipsTargetStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
void MipsTargetStreamer::emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) {}
void MipsTargetStreamer::emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) {
}
-void MipsTargetStreamer::emitDirectiveSetMips32R2() {}
-void MipsTargetStreamer::emitDirectiveSetMips64() {}
-void MipsTargetStreamer::emitDirectiveSetMips64R2() {}
-void MipsTargetStreamer::emitDirectiveSetDsp() {}
-void MipsTargetStreamer::emitDirectiveCpload(unsigned RegNo) {}
+void MipsTargetStreamer::emitDirectiveSetArch(StringRef Arch) {
+ forbidModuleDirective();
+}
+void MipsTargetStreamer::emitDirectiveSetMips0() {}
+void MipsTargetStreamer::emitDirectiveSetMips1() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips2() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips3() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips4() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips5() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips32() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips32R2() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips32R6() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips64() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips64R2() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips64R6() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetPop() {}
+void MipsTargetStreamer::emitDirectiveSetPush() {}
+void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {}
void MipsTargetStreamer::emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg) {
}
@@ -72,52 +91,62 @@ MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S,
void MipsTargetAsmStreamer::emitDirectiveSetMicroMips() {
OS << "\t.set\tmicromips\n";
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetAsmStreamer::emitDirectiveSetNoMicroMips() {
OS << "\t.set\tnomicromips\n";
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetAsmStreamer::emitDirectiveSetMips16() {
OS << "\t.set\tmips16\n";
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetAsmStreamer::emitDirectiveSetNoMips16() {
OS << "\t.set\tnomips16\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetNoMips16();
}
void MipsTargetAsmStreamer::emitDirectiveSetReorder() {
OS << "\t.set\treorder\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetReorder();
}
void MipsTargetAsmStreamer::emitDirectiveSetNoReorder() {
OS << "\t.set\tnoreorder\n";
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetAsmStreamer::emitDirectiveSetMacro() {
OS << "\t.set\tmacro\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetMacro();
}
void MipsTargetAsmStreamer::emitDirectiveSetNoMacro() {
OS << "\t.set\tnomacro\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetNoMacro();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMsa() {
+ OS << "\t.set\tmsa\n";
+ MipsTargetStreamer::emitDirectiveSetMsa();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetNoMsa() {
+ OS << "\t.set\tnomsa\n";
+ MipsTargetStreamer::emitDirectiveSetNoMsa();
}
void MipsTargetAsmStreamer::emitDirectiveSetAt() {
OS << "\t.set\tat\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetAt();
}
void MipsTargetAsmStreamer::emitDirectiveSetNoAt() {
OS << "\t.set\tnoat\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetNoAt();
}
void MipsTargetAsmStreamer::emitDirectiveEnd(StringRef Name) {
@@ -152,25 +181,82 @@ void MipsTargetAsmStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
<< StringRef(MipsInstPrinter::getRegisterName(ReturnReg)).lower() << '\n';
}
+void MipsTargetAsmStreamer::emitDirectiveSetArch(StringRef Arch) {
+ OS << "\t.set arch=" << Arch << "\n";
+ MipsTargetStreamer::emitDirectiveSetArch(Arch);
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips0() { OS << "\t.set\tmips0\n"; }
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips1() {
+ OS << "\t.set\tmips1\n";
+ MipsTargetStreamer::emitDirectiveSetMips1();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips2() {
+ OS << "\t.set\tmips2\n";
+ MipsTargetStreamer::emitDirectiveSetMips2();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips3() {
+ OS << "\t.set\tmips3\n";
+ MipsTargetStreamer::emitDirectiveSetMips3();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips4() {
+ OS << "\t.set\tmips4\n";
+ MipsTargetStreamer::emitDirectiveSetMips4();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips5() {
+ OS << "\t.set\tmips5\n";
+ MipsTargetStreamer::emitDirectiveSetMips5();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips32() {
+ OS << "\t.set\tmips32\n";
+ MipsTargetStreamer::emitDirectiveSetMips32();
+}
+
void MipsTargetAsmStreamer::emitDirectiveSetMips32R2() {
OS << "\t.set\tmips32r2\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetMips32R2();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips32R6() {
+ OS << "\t.set\tmips32r6\n";
+ MipsTargetStreamer::emitDirectiveSetMips32R6();
}
void MipsTargetAsmStreamer::emitDirectiveSetMips64() {
OS << "\t.set\tmips64\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetMips64();
}
void MipsTargetAsmStreamer::emitDirectiveSetMips64R2() {
OS << "\t.set\tmips64r2\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetMips64R2();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips64R6() {
+ OS << "\t.set\tmips64r6\n";
+ MipsTargetStreamer::emitDirectiveSetMips64R6();
}
void MipsTargetAsmStreamer::emitDirectiveSetDsp() {
OS << "\t.set\tdsp\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetDsp();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetNoDsp() {
+ OS << "\t.set\tnodsp\n";
+ MipsTargetStreamer::emitDirectiveSetNoDsp();
}
+
+void MipsTargetAsmStreamer::emitDirectiveSetPop() { OS << "\t.set\tpop\n"; }
+
+void MipsTargetAsmStreamer::emitDirectiveSetPush() { OS << "\t.set\tpush\n"; }
+
// Print a 32 bit hex number with all numbers.
static void printHex32(unsigned Value, raw_ostream &OS) {
OS << "0x";
@@ -192,10 +278,10 @@ void MipsTargetAsmStreamer::emitFMask(unsigned FPUBitmask,
OS << "," << FPUTopSavedRegOff << '\n';
}
-void MipsTargetAsmStreamer::emitDirectiveCpload(unsigned RegNo) {
+void MipsTargetAsmStreamer::emitDirectiveCpLoad(unsigned RegNo) {
OS << "\t.cpload\t$"
<< StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << "\n";
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo,
@@ -214,7 +300,7 @@ void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo,
OS << ", ";
OS << Sym.getName() << "\n";
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetAsmStreamer::emitDirectiveModuleFP(
@@ -378,11 +464,12 @@ void MipsTargetELFStreamer::emitDirectiveSetMicroMips() {
unsigned Flags = MCA.getELFHeaderEFlags();
Flags |= ELF::EF_MIPS_MICROMIPS;
MCA.setELFHeaderEFlags(Flags);
+ forbidModuleDirective();
}
void MipsTargetELFStreamer::emitDirectiveSetNoMicroMips() {
MicroMipsEnabled = false;
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetELFStreamer::emitDirectiveSetMips16() {
@@ -390,17 +477,7 @@ void MipsTargetELFStreamer::emitDirectiveSetMips16() {
unsigned Flags = MCA.getELFHeaderEFlags();
Flags |= ELF::EF_MIPS_ARCH_ASE_M16;
MCA.setELFHeaderEFlags(Flags);
- setCanHaveModuleDir(false);
-}
-
-void MipsTargetELFStreamer::emitDirectiveSetNoMips16() {
- // FIXME: implement.
- setCanHaveModuleDir(false);
-}
-
-void MipsTargetELFStreamer::emitDirectiveSetReorder() {
- // FIXME: implement.
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetELFStreamer::emitDirectiveSetNoReorder() {
@@ -408,35 +485,49 @@ void MipsTargetELFStreamer::emitDirectiveSetNoReorder() {
unsigned Flags = MCA.getELFHeaderEFlags();
Flags |= ELF::EF_MIPS_NOREORDER;
MCA.setELFHeaderEFlags(Flags);
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
-void MipsTargetELFStreamer::emitDirectiveSetMacro() {
- // FIXME: implement.
- setCanHaveModuleDir(false);
-}
+void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ MCContext &Context = MCA.getContext();
+ MCStreamer &OS = getStreamer();
-void MipsTargetELFStreamer::emitDirectiveSetNoMacro() {
- // FIXME: implement.
- setCanHaveModuleDir(false);
-}
+ const MCSectionELF *Sec = Context.getELFSection(".pdr", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHT_REL,
+ SectionKind::getMetadata());
-void MipsTargetELFStreamer::emitDirectiveSetAt() {
- // FIXME: implement.
- setCanHaveModuleDir(false);
-}
+ const MCSymbolRefExpr *ExprRef =
+ MCSymbolRefExpr::Create(Name, MCSymbolRefExpr::VK_None, Context);
-void MipsTargetELFStreamer::emitDirectiveSetNoAt() {
- // FIXME: implement.
- setCanHaveModuleDir(false);
-}
+ MCSectionData &SecData = MCA.getOrCreateSectionData(*Sec);
+ SecData.setAlignment(4);
-void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
- // FIXME: implement.
+ OS.PushSection();
+
+ OS.SwitchSection(Sec);
+
+ OS.EmitValueImpl(ExprRef, 4);
+
+ OS.EmitIntValue(GPRInfoSet ? GPRBitMask : 0, 4); // reg_mask
+ OS.EmitIntValue(GPRInfoSet ? GPROffset : 0, 4); // reg_offset
+
+ OS.EmitIntValue(FPRInfoSet ? FPRBitMask : 0, 4); // fpreg_mask
+ OS.EmitIntValue(FPRInfoSet ? FPROffset : 0, 4); // fpreg_offset
+
+ OS.EmitIntValue(FrameInfoSet ? FrameOffset : 0, 4); // frame_offset
+ OS.EmitIntValue(FrameInfoSet ? FrameReg : 0, 4); // frame_reg
+ OS.EmitIntValue(FrameInfoSet ? ReturnReg : 0, 4); // return_reg
+
+ // The .end directive marks the end of a procedure. Invalidate
+ // the information gathered up until this point.
+ GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
+
+ OS.PopSection();
}
void MipsTargetELFStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {
- // FIXME: implement.
+ GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
}
void MipsTargetELFStreamer::emitDirectiveAbiCalls() {
@@ -482,37 +573,31 @@ void MipsTargetELFStreamer::emitDirectiveOptionPic2() {
}
void MipsTargetELFStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
- unsigned ReturnReg) {
- // FIXME: implement.
+ unsigned ReturnReg_) {
+ MCContext &Context = getStreamer().getAssembler().getContext();
+ const MCRegisterInfo *RegInfo = Context.getRegisterInfo();
+
+ FrameInfoSet = true;
+ FrameReg = RegInfo->getEncodingValue(StackReg);
+ FrameOffset = StackSize;
+ ReturnReg = RegInfo->getEncodingValue(ReturnReg_);
}
void MipsTargetELFStreamer::emitMask(unsigned CPUBitmask,
int CPUTopSavedRegOff) {
- // FIXME: implement.
+ GPRInfoSet = true;
+ GPRBitMask = CPUBitmask;
+ GPROffset = CPUTopSavedRegOff;
}
void MipsTargetELFStreamer::emitFMask(unsigned FPUBitmask,
int FPUTopSavedRegOff) {
- // FIXME: implement.
-}
-
-void MipsTargetELFStreamer::emitDirectiveSetMips32R2() {
- setCanHaveModuleDir(false);
-}
-
-void MipsTargetELFStreamer::emitDirectiveSetMips64() {
- setCanHaveModuleDir(false);
-}
-
-void MipsTargetELFStreamer::emitDirectiveSetMips64R2() {
- setCanHaveModuleDir(false);
-}
-
-void MipsTargetELFStreamer::emitDirectiveSetDsp() {
- setCanHaveModuleDir(false);
+ FPRInfoSet = true;
+ FPRBitMask = FPUBitmask;
+ FPROffset = FPUTopSavedRegOff;
}
-void MipsTargetELFStreamer::emitDirectiveCpload(unsigned RegNo) {
+void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
// .cpload $reg
// This directive expands to:
// lui $gp, %hi(_gp_disp)
@@ -560,7 +645,7 @@ void MipsTargetELFStreamer::emitDirectiveCpload(unsigned RegNo) {
TmpInst.addOperand(MCOperand::CreateReg(RegNo));
getStreamer().EmitInstruction(TmpInst, STI);
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
@@ -617,7 +702,7 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
Inst.addOperand(MCOperand::CreateReg(RegNo));
getStreamer().EmitInstruction(Inst, STI);
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetELFStreamer::emitMipsAbiFlags() {
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
index 41efa470e42a..56db450f6961 100644
--- a/lib/Target/Mips/Makefile
+++ b/lib/Target/Mips/Makefile
@@ -13,7 +13,7 @@ TARGET = Mips
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \
- MipsGenAsmWriter.inc MipsGenFastISel.inc MipsGenCodeEmitter.inc \
+ MipsGenAsmWriter.inc MipsGenFastISel.inc \
MipsGenDAGISel.inc MipsGenCallingConv.inc \
MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \
MipsGenDisassemblerTables.inc \
diff --git a/lib/Target/Mips/MicroMipsInstrFPU.td b/lib/Target/Mips/MicroMipsInstrFPU.td
index b93017a7400a..fae70598fa33 100644
--- a/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -123,10 +123,10 @@ def MFC1_MM : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd,
II_MFC1, bitconvert>, MFC1_FM_MM<0x80>;
def MTC1_MM : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd,
II_MTC1, bitconvert>, MFC1_FM_MM<0xa0>;
-def MFHC1_MM : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, FGRH32Opnd, II_MFHC1>,
- MFC1_FM_MM<3>, ISA_MIPS32R2;
-def MTHC1_MM : MMRel, MTC1_FT<"mthc1", FGRH32Opnd, GPR32Opnd, II_MTHC1>,
- MFC1_FM_MM<7>, ISA_MIPS32R2;
+def MFHC1_MM : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, AFGR64Opnd, II_MFHC1>,
+ MFC1_FM_MM<0xc0>, ISA_MIPS32R2, AdditionalRequires<[NotFP64bit]>;
+def MTHC1_MM : MMRel, MTC1_64_FT<"mthc1", AFGR64Opnd, GPR32Opnd, II_MTHC1>,
+ MFC1_FM_MM<0xe0>, ISA_MIPS32R2, AdditionalRequires<[NotFP64bit]>;
def MADD_S_MM : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>,
MADDS_FM_MM<0x1>;
diff --git a/lib/Target/Mips/MicroMipsInstrFormats.td b/lib/Target/Mips/MicroMipsInstrFormats.td
index 15b951d98c52..51b5c0cd612b 100644
--- a/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -41,6 +41,118 @@ class MicroMipsInst16<dag outs, dag ins, string asmstr, list<dag> pattern,
// MicroMIPS 16-bit Instruction Formats
//===----------------------------------------------------------------------===//
+class ARITH_FM_MM16<bit funct> {
+ bits<3> rd;
+ bits<3> rt;
+ bits<3> rs;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x01;
+ let Inst{9-7} = rd;
+ let Inst{6-4} = rt;
+ let Inst{3-1} = rs;
+ let Inst{0} = funct;
+}
+
+class ANDI_FM_MM16<bits<6> funct> {
+ bits<3> rd;
+ bits<3> rs;
+ bits<4> imm;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = funct;
+ let Inst{9-7} = rd;
+ let Inst{6-4} = rs;
+ let Inst{3-0} = imm;
+}
+
+class LOGIC_FM_MM16<bits<4> funct> {
+ bits<3> rt;
+ bits<3> rs;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x11;
+ let Inst{9-6} = funct;
+ let Inst{5-3} = rt;
+ let Inst{2-0} = rs;
+}
+
+class SHIFT_FM_MM16<bits<1> funct> {
+ bits<3> rd;
+ bits<3> rt;
+ bits<3> shamt;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x09;
+ let Inst{9-7} = rd;
+ let Inst{6-4} = rt;
+ let Inst{3-1} = shamt;
+ let Inst{0} = funct;
+}
+
+class ADDIUR2_FM_MM16 {
+ bits<3> rd;
+ bits<3> rs;
+ bits<3> imm;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x1b;
+ let Inst{9-7} = rd;
+ let Inst{6-4} = rs;
+ let Inst{3-1} = imm;
+ let Inst{0} = 0;
+}
+
+class LOAD_STORE_FM_MM16<bits<6> op> {
+ bits<3> rt;
+ bits<7> addr;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = op;
+ let Inst{9-7} = rt;
+ let Inst{6-4} = addr{6-4};
+ let Inst{3-0} = addr{3-0};
+}
+
+class LOAD_STORE_SP_FM_MM16<bits<6> op> {
+ bits<5> rt;
+ bits<5> offset;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = op;
+ let Inst{9-5} = rt;
+ let Inst{4-0} = offset;
+}
+
+class ADDIUS5_FM_MM16 {
+ bits<5> rd;
+ bits<4> imm;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x13;
+ let Inst{9-5} = rd;
+ let Inst{4-1} = imm;
+ let Inst{0} = 0;
+}
+
+class ADDIUSP_FM_MM16 {
+ bits<9> imm;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x13;
+ let Inst{9-1} = imm;
+ let Inst{0} = 1;
+}
+
class MOVE_FM_MM16<bits<6> funct> {
bits<5> rs;
bits<5> rd;
@@ -52,6 +164,17 @@ class MOVE_FM_MM16<bits<6> funct> {
let Inst{4-0} = rs;
}
+class LI_FM_MM16 {
+ bits<3> rd;
+ bits<7> imm;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x3b;
+ let Inst{9-7} = rd;
+ let Inst{6-0} = imm;
+}
+
class JALR_FM_MM16<bits<5> op> {
bits<5> rs;
@@ -72,6 +195,49 @@ class MFHILO_FM_MM16<bits<5> funct> {
let Inst{4-0} = rd;
}
+class JRADDIUSP_FM_MM16<bits<5> op> {
+ bits<5> rs;
+ bits<5> imm;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x11;
+ let Inst{9-5} = op;
+ let Inst{4-0} = imm;
+}
+
+class ADDIUR1SP_FM_MM16 {
+ bits<3> rd;
+ bits<6> imm;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x1b;
+ let Inst{9-7} = rd;
+ let Inst{6-1} = imm;
+ let Inst{0} = 1;
+}
+
+class BRKSDBBP16_FM_MM<bits<6> op> {
+ bits<4> code_;
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x11;
+ let Inst{9-4} = op;
+ let Inst{3-0} = code_;
+}
+
+class BEQNEZ_FM_MM16<bits<6> op> {
+ bits<3> rs;
+ bits<7> offset;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = op;
+ let Inst{9-7} = rs;
+ let Inst{6-0} = offset;
+}
+
//===----------------------------------------------------------------------===//
// MicroMIPS 32-bit Instruction Formats
//===----------------------------------------------------------------------===//
@@ -621,3 +787,114 @@ class MADDS_FM_MM<bits<6> funct>: MMArch {
let Inst{10-6} = fr;
let Inst{5-0} = funct;
}
+
+class COMPACT_BRANCH_FM_MM<bits<5> funct> {
+ bits<5> rs;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x10;
+ let Inst{25-21} = funct;
+ let Inst{20-16} = rs;
+ let Inst{15-0} = offset;
+}
+
+class COP0_TLB_FM_MM<bits<10> op> : MMArch {
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x0;
+ let Inst{25-16} = 0x0;
+ let Inst{15-6} = op;
+ let Inst{5-0} = 0x3c;
+}
+
+class SDBBP_FM_MM : MMArch {
+ bits<10> code_;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x0;
+ let Inst{25-16} = code_;
+ let Inst{15-6} = 0x36d;
+ let Inst{5-0} = 0x3c;
+}
+
+class RDHWR_FM_MM : MMArch {
+ bits<5> rt;
+ bits<5> rd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x0;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rd;
+ let Inst{15-6} = 0x1ac;
+ let Inst{5-0} = 0x3c;
+}
+
+class LWXS_FM_MM<bits<10> funct> {
+ bits<5> rd;
+ bits<5> base;
+ bits<5> index;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x0;
+ let Inst{25-21} = index;
+ let Inst{20-16} = base;
+ let Inst{15-11} = rd;
+ let Inst{10} = 0;
+ let Inst{9-0} = funct;
+}
+
+class LWM_FM_MM<bits<4> funct> : MMArch {
+ bits<5> rt;
+ bits<21> addr;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x8;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = addr{20-16};
+ let Inst{15-12} = funct;
+ let Inst{11-0} = addr{11-0};
+}
+
+class LWM_FM_MM16<bits<4> funct> : MMArch {
+ bits<2> rt;
+ bits<4> addr;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x11;
+ let Inst{9-6} = funct;
+ let Inst{5-4} = rt;
+ let Inst{3-0} = addr;
+}
+
+class CACHE_PREF_FM_MM<bits<6> op, bits<4> funct> : MMArch {
+ bits<21> addr;
+ bits<5> hint;
+ bits<5> base = addr{20-16};
+ bits<12> offset = addr{11-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = hint;
+ let Inst{20-16} = base;
+ let Inst{15-12} = funct;
+ let Inst{11-0} = offset;
+}
+
+class BARRIER_FM_MM<bits<5> op> : MMArch {
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x0;
+ let Inst{25-21} = 0x0;
+ let Inst{20-16} = 0x0;
+ let Inst{15-11} = op;
+ let Inst{10-6} = 0x0;
+ let Inst{5-0} = 0x0;
+}
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index 87a3a3e29ca2..241f4528658c 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -1,9 +1,101 @@
def addrimm12 : ComplexPattern<iPTR, 2, "selectIntAddrMM", [frameindex]>;
+def simm4 : Operand<i32> {
+ let DecoderMethod = "DecodeSimm4";
+}
+def li_simm7 : Operand<i32> {
+ let DecoderMethod = "DecodeLiSimm7";
+}
+
def simm12 : Operand<i32> {
let DecoderMethod = "DecodeSimm12";
}
+def uimm5_lsl2 : Operand<OtherVT> {
+ let EncoderMethod = "getUImm5Lsl2Encoding";
+ let DecoderMethod = "DecodeUImm5lsl2";
+}
+
+def uimm6_lsl2 : Operand<i32> {
+ let EncoderMethod = "getUImm6Lsl2Encoding";
+ let DecoderMethod = "DecodeUImm6Lsl2";
+}
+
+def simm9_addiusp : Operand<i32> {
+ let EncoderMethod = "getSImm9AddiuspValue";
+ let DecoderMethod = "DecodeSimm9SP";
+}
+
+def uimm3_shift : Operand<i32> {
+ let EncoderMethod = "getUImm3Mod8Encoding";
+}
+
+def simm3_lsa2 : Operand<i32> {
+ let EncoderMethod = "getSImm3Lsa2Value";
+ let DecoderMethod = "DecodeAddiur2Simm7";
+}
+
+def uimm4_andi : Operand<i32> {
+ let EncoderMethod = "getUImm4AndValue";
+ let DecoderMethod = "DecodeANDI16Imm";
+}
+
+def immSExtAddiur2 : ImmLeaf<i32, [{return Imm == 1 || Imm == -1 ||
+ ((Imm % 4 == 0) &&
+ Imm < 28 && Imm > 0);}]>;
+
+def immSExtAddius5 : ImmLeaf<i32, [{return Imm >= -8 && Imm <= 7;}]>;
+
+def immZExtAndi16 : ImmLeaf<i32,
+ [{return (Imm == 128 || (Imm >= 1 && Imm <= 4) || Imm == 7 || Imm == 8 ||
+ Imm == 15 || Imm == 16 || Imm == 31 || Imm == 32 || Imm == 63 ||
+ Imm == 64 || Imm == 255 || Imm == 32768 || Imm == 65535 );}]>;
+
+def immZExt2Shift : ImmLeaf<i32, [{return Imm >= 1 && Imm <= 8;}]>;
+
+def immLi16 : ImmLeaf<i32, [{return Imm >= -1 && Imm <= 126;}]>;
+
+def MicroMipsMemGPRMM16AsmOperand : AsmOperandClass {
+ let Name = "MicroMipsMem";
+ let RenderMethod = "addMicroMipsMemOperands";
+ let ParserMethod = "parseMemOperand";
+ let PredicateMethod = "isMemWithGRPMM16Base";
+}
+
+class mem_mm_4_generic : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops ptr_rc, simm4);
+ let OperandType = "OPERAND_MEMORY";
+ let ParserMatchClass = MicroMipsMemGPRMM16AsmOperand;
+}
+
+def mem_mm_4 : mem_mm_4_generic {
+ let EncoderMethod = "getMemEncodingMMImm4";
+}
+
+def mem_mm_4_lsl1 : mem_mm_4_generic {
+ let EncoderMethod = "getMemEncodingMMImm4Lsl1";
+}
+
+def mem_mm_4_lsl2 : mem_mm_4_generic {
+ let EncoderMethod = "getMemEncodingMMImm4Lsl2";
+}
+
+def MicroMipsMemSPAsmOperand : AsmOperandClass {
+ let Name = "MicroMipsMemSP";
+ let RenderMethod = "addMemOperands";
+ let ParserMethod = "parseMemOperand";
+ let PredicateMethod = "isMemWithUimmWordAlignedOffsetSP<7>";
+}
+
+def mem_mm_sp_imm5_lsl2 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops GPR32:$base, simm5:$offset);
+ let OperandType = "OPERAND_MEMORY";
+ let ParserMatchClass = MicroMipsMemSPAsmOperand;
+ let EncoderMethod = "getMemEncodingMMSPImm5Lsl2";
+}
+
def mem_mm_12 : Operand<i32> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops GPR32, simm12);
@@ -12,6 +104,22 @@ def mem_mm_12 : Operand<i32> {
let OperandType = "OPERAND_MEMORY";
}
+def MipsMemUimm4AsmOperand : AsmOperandClass {
+ let Name = "MemOffsetUimm4";
+ let SuperClasses = [MipsMemAsmOperand];
+ let RenderMethod = "addMemOperands";
+ let ParserMethod = "parseMemOperand";
+ let PredicateMethod = "isMemWithUimmOffsetSP<6>";
+}
+
+def mem_mm_4sp : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops GPR32, uimm8);
+ let EncoderMethod = "getMemEncodingMMImm4sp";
+ let ParserMatchClass = MipsMemUimm4AsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
def jmptarget_mm : Operand<OtherVT> {
let EncoderMethod = "getJumpTargetOpValueMM";
}
@@ -20,12 +128,29 @@ def calltarget_mm : Operand<iPTR> {
let EncoderMethod = "getJumpTargetOpValueMM";
}
+def brtarget7_mm : Operand<OtherVT> {
+ let EncoderMethod = "getBranchTarget7OpValueMM";
+ let OperandType = "OPERAND_PCREL";
+ let DecoderMethod = "DecodeBranchTarget7MM";
+ let ParserMatchClass = MipsJumpTargetAsmOperand;
+}
+
def brtarget_mm : Operand<OtherVT> {
let EncoderMethod = "getBranchTargetOpValueMM";
let OperandType = "OPERAND_PCREL";
let DecoderMethod = "DecodeBranchTargetMM";
}
+class CompactBranchMM<string opstr, DAGOperand opnd, PatFrag cond_op,
+ RegisterOperand RO> :
+ InstSE<(outs), (ins RO:$rs, opnd:$offset),
+ !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 0;
+ let Defs = [AT];
+}
+
let canFoldAsLoad = 1 in
class LoadLeftRightMM<string opstr, SDNode OpNode, RegisterOperand RO,
Operand MemOpnd> :
@@ -45,6 +170,36 @@ class StoreLeftRightMM<string opstr, SDNode OpNode, RegisterOperand RO,
let DecoderMethod = "DecodeMemMMImm12";
}
+/// A register pair used by load/store pair instructions.
+def RegPairAsmOperand : AsmOperandClass {
+ let Name = "RegPair";
+ let ParserMethod = "parseRegisterPair";
+}
+
+def regpair : Operand<i32> {
+ let EncoderMethod = "getRegisterPairOpValue";
+ let ParserMatchClass = RegPairAsmOperand;
+ let PrintMethod = "printRegisterPair";
+ let DecoderMethod = "DecodeRegPairOperand";
+ let MIOperandInfo = (ops GPR32Opnd, GPR32Opnd);
+}
+
+class StorePairMM<string opstr, InstrItinClass Itin = NoItinerary,
+ ComplexPattern Addr = addr> :
+ InstSE<(outs), (ins regpair:$rt, mem_mm_12:$addr),
+ !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI, opstr> {
+ let DecoderMethod = "DecodeMemMMImm12";
+ let mayStore = 1;
+}
+
+class LoadPairMM<string opstr, InstrItinClass Itin = NoItinerary,
+ ComplexPattern Addr = addr> :
+ InstSE<(outs regpair:$rt), (ins mem_mm_12:$addr),
+ !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI, opstr> {
+ let DecoderMethod = "DecodeMemMMImm12";
+ let mayLoad = 1;
+}
+
class LLBaseMM<string opstr, RegisterOperand RO> :
InstSE<(outs RO:$rt), (ins mem_mm_12:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
@@ -70,6 +225,96 @@ class LoadMM<string opstr, DAGOperand RO, SDPatternOperator OpNode = null_frag,
let mayLoad = 1;
}
+class ArithRMM16<string opstr, RegisterOperand RO, bit isComm = 0,
+ InstrItinClass Itin = NoItinerary,
+ SDPatternOperator OpNode = null_frag> :
+ MicroMipsInst16<(outs RO:$rd), (ins RO:$rs, RO:$rt),
+ !strconcat(opstr, "\t$rd, $rs, $rt"),
+ [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR> {
+ let isCommutable = isComm;
+}
+
+class AndImmMM16<string opstr, RegisterOperand RO,
+ InstrItinClass Itin = NoItinerary> :
+ MicroMipsInst16<(outs RO:$rd), (ins RO:$rs, uimm4_andi:$imm),
+ !strconcat(opstr, "\t$rd, $rs, $imm"), [], Itin, FrmI>;
+
+class LogicRMM16<string opstr, RegisterOperand RO,
+ InstrItinClass Itin = NoItinerary,
+ SDPatternOperator OpNode = null_frag> :
+ MicroMipsInst16<(outs RO:$dst), (ins RO:$rs, RO:$rt),
+ !strconcat(opstr, "\t$rt, $rs"),
+ [(set RO:$dst, (OpNode RO:$rs, RO:$rt))], Itin, FrmR> {
+ let isCommutable = 1;
+ let Constraints = "$rt = $dst";
+}
+
+class NotMM16<string opstr, RegisterOperand RO> :
+ MicroMipsInst16<(outs RO:$rt), (ins RO:$rs),
+ !strconcat(opstr, "\t$rt, $rs"),
+ [(set RO:$rt, (not RO:$rs))], NoItinerary, FrmR>;
+
+class ShiftIMM16<string opstr, Operand ImmOpnd, RegisterOperand RO,
+ InstrItinClass Itin = NoItinerary> :
+ MicroMipsInst16<(outs RO:$rd), (ins RO:$rt, ImmOpnd:$shamt),
+ !strconcat(opstr, "\t$rd, $rt, $shamt"), [], Itin, FrmR>;
+
+class LoadMM16<string opstr, DAGOperand RO, SDPatternOperator OpNode,
+ InstrItinClass Itin, Operand MemOpnd> :
+ MicroMipsInst16<(outs RO:$rt), (ins MemOpnd:$addr),
+ !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI> {
+ let DecoderMethod = "DecodeMemMMImm4";
+ let canFoldAsLoad = 1;
+ let mayLoad = 1;
+}
+
+class StoreMM16<string opstr, DAGOperand RTOpnd, DAGOperand RO,
+ SDPatternOperator OpNode, InstrItinClass Itin,
+ Operand MemOpnd> :
+ MicroMipsInst16<(outs), (ins RTOpnd:$rt, MemOpnd:$addr),
+ !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI> {
+ let DecoderMethod = "DecodeMemMMImm4";
+ let mayStore = 1;
+}
+
+class LoadSPMM16<string opstr, DAGOperand RO, InstrItinClass Itin,
+ Operand MemOpnd> :
+ MicroMipsInst16<(outs RO:$rt), (ins MemOpnd:$offset),
+ !strconcat(opstr, "\t$rt, $offset"), [], Itin, FrmI> {
+ let DecoderMethod = "DecodeMemMMSPImm5Lsl2";
+ let canFoldAsLoad = 1;
+ let mayLoad = 1;
+}
+
+class StoreSPMM16<string opstr, DAGOperand RO, InstrItinClass Itin,
+ Operand MemOpnd> :
+ MicroMipsInst16<(outs), (ins RO:$rt, MemOpnd:$offset),
+ !strconcat(opstr, "\t$rt, $offset"), [], Itin, FrmI> {
+ let DecoderMethod = "DecodeMemMMSPImm5Lsl2";
+ let mayStore = 1;
+}
+
+class AddImmUR2<string opstr, RegisterOperand RO> :
+ MicroMipsInst16<(outs RO:$rd), (ins RO:$rs, simm3_lsa2:$imm),
+ !strconcat(opstr, "\t$rd, $rs, $imm"),
+ [], NoItinerary, FrmR> {
+ let isCommutable = 1;
+}
+
+class AddImmUS5<string opstr, RegisterOperand RO> :
+ MicroMipsInst16<(outs RO:$dst), (ins RO:$rd, simm4:$imm),
+ !strconcat(opstr, "\t$rd, $imm"), [], NoItinerary, FrmR> {
+ let Constraints = "$rd = $dst";
+}
+
+class AddImmUR1SP<string opstr, RegisterOperand RO> :
+ MicroMipsInst16<(outs RO:$rd), (ins uimm6_lsl2:$imm),
+ !strconcat(opstr, "\t$rd, $imm"), [], NoItinerary, FrmR>;
+
+class AddImmUSP<string opstr> :
+ MicroMipsInst16<(outs), (ins simm9_addiusp:$imm),
+ !strconcat(opstr, "\t$imm"), [], NoItinerary, FrmI>;
+
class MoveFromHILOMM<string opstr, RegisterOperand RO, Register UseReg> :
MicroMipsInst16<(outs RO:$rd), (ins), !strconcat(opstr, "\t$rd"),
[], II_MFHI_MFLO, FrmR> {
@@ -85,6 +330,12 @@ class MoveMM16<string opstr, RegisterOperand RO, bit isComm = 0,
let isReMaterializable = 1;
}
+class LoadImmMM16<string opstr, Operand Od, RegisterOperand RO> :
+ MicroMipsInst16<(outs RO:$rd), (ins Od:$imm),
+ !strconcat(opstr, "\t$rd, $imm"), [], NoItinerary, FrmI> {
+ let isReMaterializable = 1;
+}
+
// 16-bit Jump and Link (Call)
class JumpLinkRegMM16<string opstr, RegisterOperand RO> :
MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
@@ -94,16 +345,207 @@ class JumpLinkRegMM16<string opstr, RegisterOperand RO> :
let Defs = [RA];
}
+// 16-bit Jump Reg
+class JumpRegMM16<string opstr, RegisterOperand RO> :
+ MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
+ [], IIBranch, FrmR> {
+ let hasDelaySlot = 1;
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+}
+
+// Base class for JRADDIUSP instruction.
+class JumpRAddiuStackMM16 :
+ MicroMipsInst16<(outs), (ins uimm5_lsl2:$imm), "jraddiusp\t$imm",
+ [], IIBranch, FrmR> {
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+}
+
+// 16-bit Jump and Link (Call) - Short Delay Slot
+class JumpLinkRegSMM16<string opstr, RegisterOperand RO> :
+ MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
+ [], IIBranch, FrmR> {
+ let isCall = 1;
+ let hasDelaySlot = 1;
+ let Defs = [RA];
+}
+
+// 16-bit Jump Register Compact - No delay slot
+class JumpRegCMM16<string opstr, RegisterOperand RO> :
+ MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
+ [], IIBranch, FrmR> {
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+}
+
+// Break16 and Sdbbp16
+class BrkSdbbp16MM<string opstr> :
+ MicroMipsInst16<(outs), (ins uimm4:$code_),
+ !strconcat(opstr, "\t$code_"),
+ [], NoItinerary, FrmOther>;
+
+class CBranchZeroMM<string opstr, DAGOperand opnd, RegisterOperand RO> :
+ MicroMipsInst16<(outs), (ins RO:$rs, opnd:$offset),
+ !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+ let Defs = [AT];
+}
+
+// MicroMIPS Jump and Link (Call) - Short Delay Slot
+let isCall = 1, hasDelaySlot = 1, Defs = [RA] in {
+ class JumpLinkMM<string opstr, DAGOperand opnd> :
+ InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
+ [], IIBranch, FrmJ, opstr> {
+ let DecoderMethod = "DecodeJumpTargetMM";
+ }
+
+ class JumpLinkRegMM<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
+ [], IIBranch, FrmR>;
+
+ class BranchCompareToZeroLinkMM<string opstr, DAGOperand opnd,
+ RegisterOperand RO> :
+ InstSE<(outs), (ins RO:$rs, opnd:$offset),
+ !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI, opstr>;
+}
+
+class LoadWordIndexedScaledMM<string opstr, RegisterOperand RO,
+ InstrItinClass Itin = NoItinerary,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs RO:$rd), (ins PtrRC:$base, PtrRC:$index),
+ !strconcat(opstr, "\t$rd, ${index}(${base})"), [], Itin, FrmFI>;
+
+/// A list of registers used by load/store multiple instructions.
+def RegListAsmOperand : AsmOperandClass {
+ let Name = "RegList";
+ let ParserMethod = "parseRegisterList";
+}
+
+def reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = RegListAsmOperand;
+ let PrintMethod = "printRegisterList";
+ let DecoderMethod = "DecodeRegListOperand";
+}
+
+def RegList16AsmOperand : AsmOperandClass {
+ let Name = "RegList16";
+ let ParserMethod = "parseRegisterList";
+ let PredicateMethod = "isRegList16";
+ let RenderMethod = "addRegListOperands";
+}
+
+def reglist16 : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue16";
+ let DecoderMethod = "DecodeRegListOperand16";
+ let PrintMethod = "printRegisterList";
+ let ParserMatchClass = RegList16AsmOperand;
+}
+
+class StoreMultMM<string opstr,
+ InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
+ InstSE<(outs), (ins reglist:$rt, mem_mm_12:$addr),
+ !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI, opstr> {
+ let DecoderMethod = "DecodeMemMMImm12";
+ let mayStore = 1;
+}
+
+class LoadMultMM<string opstr,
+ InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
+ InstSE<(outs reglist:$rt), (ins mem_mm_12:$addr),
+ !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI, opstr> {
+ let DecoderMethod = "DecodeMemMMImm12";
+ let mayLoad = 1;
+}
+
+class StoreMultMM16<string opstr,
+ InstrItinClass Itin = NoItinerary,
+ ComplexPattern Addr = addr> :
+ MicroMipsInst16<(outs), (ins reglist16:$rt, mem_mm_4sp:$addr),
+ !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI> {
+ let mayStore = 1;
+}
+
+class LoadMultMM16<string opstr,
+ InstrItinClass Itin = NoItinerary,
+ ComplexPattern Addr = addr> :
+ MicroMipsInst16<(outs reglist16:$rt), (ins mem_mm_4sp:$addr),
+ !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI> {
+ let mayLoad = 1;
+}
+
+def ADDU16_MM : ArithRMM16<"addu16", GPRMM16Opnd, 1, II_ADDU, add>,
+ ARITH_FM_MM16<0>;
+def SUBU16_MM : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>,
+ ARITH_FM_MM16<1>;
+def ANDI16_MM : AndImmMM16<"andi16", GPRMM16Opnd, II_AND>, ANDI_FM_MM16<0x0b>;
+def AND16_MM : LogicRMM16<"and16", GPRMM16Opnd, II_AND, and>,
+ LOGIC_FM_MM16<0x2>;
+def OR16_MM : LogicRMM16<"or16", GPRMM16Opnd, II_OR, or>,
+ LOGIC_FM_MM16<0x3>;
+def XOR16_MM : LogicRMM16<"xor16", GPRMM16Opnd, II_XOR, xor>,
+ LOGIC_FM_MM16<0x1>;
+def NOT16_MM : NotMM16<"not16", GPRMM16Opnd>, LOGIC_FM_MM16<0x0>;
+def SLL16_MM : ShiftIMM16<"sll16", uimm3_shift, GPRMM16Opnd, II_SLL>,
+ SHIFT_FM_MM16<0>;
+def SRL16_MM : ShiftIMM16<"srl16", uimm3_shift, GPRMM16Opnd, II_SRL>,
+ SHIFT_FM_MM16<1>;
+def LBU16_MM : LoadMM16<"lbu16", GPRMM16Opnd, zextloadi8, II_LBU,
+ mem_mm_4>, LOAD_STORE_FM_MM16<0x02>;
+def LHU16_MM : LoadMM16<"lhu16", GPRMM16Opnd, zextloadi16, II_LHU,
+ mem_mm_4_lsl1>, LOAD_STORE_FM_MM16<0x0a>;
+def LW16_MM : LoadMM16<"lw16", GPRMM16Opnd, load, II_LW, mem_mm_4_lsl2>,
+ LOAD_STORE_FM_MM16<0x1a>;
+def SB16_MM : StoreMM16<"sb16", GPRMM16OpndZero, GPRMM16Opnd, truncstorei8,
+ II_SB, mem_mm_4>, LOAD_STORE_FM_MM16<0x22>;
+def SH16_MM : StoreMM16<"sh16", GPRMM16OpndZero, GPRMM16Opnd, truncstorei16,
+ II_SH, mem_mm_4_lsl1>,
+ LOAD_STORE_FM_MM16<0x2a>;
+def SW16_MM : StoreMM16<"sw16", GPRMM16OpndZero, GPRMM16Opnd, store, II_SW,
+ mem_mm_4_lsl2>, LOAD_STORE_FM_MM16<0x3a>;
+def LWSP_MM : LoadSPMM16<"lw", GPR32Opnd, II_LW, mem_mm_sp_imm5_lsl2>,
+ LOAD_STORE_SP_FM_MM16<0x12>;
+def SWSP_MM : StoreSPMM16<"sw", GPR32Opnd, II_SW, mem_mm_sp_imm5_lsl2>,
+ LOAD_STORE_SP_FM_MM16<0x32>;
+def ADDIUR1SP_MM : AddImmUR1SP<"addiur1sp", GPRMM16Opnd>, ADDIUR1SP_FM_MM16;
+def ADDIUR2_MM : AddImmUR2<"addiur2", GPRMM16Opnd>, ADDIUR2_FM_MM16;
+def ADDIUS5_MM : AddImmUS5<"addius5", GPR32Opnd>, ADDIUS5_FM_MM16;
+def ADDIUSP_MM : AddImmUSP<"addiusp">, ADDIUSP_FM_MM16;
def MFHI16_MM : MoveFromHILOMM<"mfhi", GPR32Opnd, AC0>, MFHILO_FM_MM16<0x10>;
def MFLO16_MM : MoveFromHILOMM<"mflo", GPR32Opnd, AC0>, MFHILO_FM_MM16<0x12>;
def MOVE16_MM : MoveMM16<"move", GPR32Opnd>, MOVE_FM_MM16<0x03>;
+def LI16_MM : LoadImmMM16<"li16", li_simm7, GPRMM16Opnd>, LI_FM_MM16,
+ IsAsCheapAsAMove;
def JALR16_MM : JumpLinkRegMM16<"jalr", GPR32Opnd>, JALR_FM_MM16<0x0e>;
+def JALRS16_MM : JumpLinkRegSMM16<"jalrs16", GPR32Opnd>, JALR_FM_MM16<0x0f>;
+def JRC16_MM : JumpRegCMM16<"jrc", GPR32Opnd>, JALR_FM_MM16<0x0d>;
+def JRADDIUSP : JumpRAddiuStackMM16, JRADDIUSP_FM_MM16<0x18>;
+def JR16_MM : JumpRegMM16<"jr16", GPR32Opnd>, JALR_FM_MM16<0x0c>;
+def BEQZ16_MM : CBranchZeroMM<"beqz16", brtarget7_mm, GPRMM16Opnd>,
+ BEQNEZ_FM_MM16<0x23>;
+def BNEZ16_MM : CBranchZeroMM<"bnez16", brtarget7_mm, GPRMM16Opnd>,
+ BEQNEZ_FM_MM16<0x2b>;
+def BREAK16_MM : BrkSdbbp16MM<"break16">, BRKSDBBP16_FM_MM<0x28>;
+def SDBBP16_MM : BrkSdbbp16MM<"sdbbp16">, BRKSDBBP16_FM_MM<0x2C>;
class WaitMM<string opstr> :
InstSE<(outs), (ins uimm10:$code_), !strconcat(opstr, "\t$code_"), [],
NoItinerary, FrmOther, opstr>;
let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
+ /// Compact Branch Instructions
+ def BEQZC_MM : CompactBranchMM<"beqzc", brtarget_mm, seteq, GPR32Opnd>,
+ COMPACT_BRANCH_FM_MM<0x7>;
+ def BNEZC_MM : CompactBranchMM<"bnezc", brtarget_mm, setne, GPR32Opnd>,
+ COMPACT_BRANCH_FM_MM<0x5>;
+
/// Arithmetic Instructions (ALU Immediate)
def ADDiu_MM : MMRel, ArithLogicI<"addiu", simm16, GPR32Opnd>,
ADDI_FM_MM<0xc>;
@@ -179,6 +621,8 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
def SW_MM : Store<"sw", GPR32Opnd>, MMRel, LW_FM_MM<0x3e>;
}
+ def LWXS_MM : LoadWordIndexedScaledMM<"lwxs", GPR32Opnd>, LWXS_FM_MM<0x118>;
+
def LWU_MM : LoadMM<"lwu", GPR32Opnd, zextloadi32, II_LWU>, LL_FM_MM<0xe>;
/// Load and Store Instructions - unaligned
@@ -191,6 +635,16 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
def SWR_MM : StoreLeftRightMM<"swr", MipsSWR, GPR32Opnd, mem_mm_12>,
LWL_FM_MM<0x9>;
+ /// Load and Store Instructions - multiple
+ def SWM32_MM : StoreMultMM<"swm32">, LWM_FM_MM<0xd>;
+ def LWM32_MM : LoadMultMM<"lwm32">, LWM_FM_MM<0x5>;
+ def SWM16_MM : StoreMultMM16<"swm16">, LWM_FM_MM16<0x5>;
+ def LWM16_MM : LoadMultMM16<"lwm16">, LWM_FM_MM16<0x4>;
+
+ /// Load and Store Pair Instructions
+ def SWP_MM : StorePairMM<"swp">, LWM_FM_MM<0x9>;
+ def LWP_MM : LoadPairMM<"lwp">, LWM_FM_MM<0x1>;
+
/// Move Conditional
def MOVZ_I_MM : MMRel, CMov_I_I_FT<"movz", GPR32Opnd, GPR32Opnd,
NoItinerary>, ADD_FM_MM<0, 0x58>;
@@ -247,6 +701,10 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
def JR_MM : MMRel, IndirectBranch<"jr", GPR32Opnd>, JR_FM_MM<0x3c>;
def JALR_MM : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM_MM<0x03c>;
+ /// Jump Instructions - Short Delay Slot
+ def JALS_MM : JumpLinkMM<"jals", calltarget_mm>, J_FM_MM<0x1d>;
+ def JALRS_MM : JumpLinkRegMM<"jalrs", GPR32Opnd>, JALR_FM_MM<0x13c>;
+
/// Branch Instructions
def BEQ_MM : MMRel, CBranch<"beq", brtarget_mm, seteq, GPR32Opnd>,
BEQ_FM_MM<0x25>;
@@ -265,6 +723,12 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
def BLTZAL_MM : MMRel, BGEZAL_FT<"bltzal", brtarget_mm, GPR32Opnd>,
BGEZAL_FM_MM<0x01>;
+ /// Branch Instructions - Short Delay Slot
+ def BGEZALS_MM : BranchCompareToZeroLinkMM<"bgezals", brtarget_mm,
+ GPR32Opnd>, BGEZAL_FM_MM<0x13>;
+ def BLTZALS_MM : BranchCompareToZeroLinkMM<"bltzals", brtarget_mm,
+ GPR32Opnd>, BGEZAL_FM_MM<0x11>;
+
/// Control Instructions
def SYNC_MM : MMRel, SYNC_FT<"sync">, SYNC_FM_MM;
def BREAK_MM : MMRel, BRK_FT<"break">, BRK_FM_MM;
@@ -295,12 +759,66 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
/// Load-linked, Store-conditional
def LL_MM : LLBaseMM<"ll", GPR32Opnd>, LL_FM_MM<0x3>;
def SC_MM : SCBaseMM<"sc", GPR32Opnd>, LL_FM_MM<0xb>;
+
+ let DecoderMethod = "DecodeCacheOpMM" in {
+ def CACHE_MM : MMRel, CacheOp<"cache", mem_mm_12>,
+ CACHE_PREF_FM_MM<0x08, 0x6>;
+ def PREF_MM : MMRel, CacheOp<"pref", mem_mm_12>,
+ CACHE_PREF_FM_MM<0x18, 0x2>;
+ }
+ def SSNOP_MM : MMRel, Barrier<"ssnop">, BARRIER_FM_MM<0x1>;
+ def EHB_MM : MMRel, Barrier<"ehb">, BARRIER_FM_MM<0x3>;
+ def PAUSE_MM : MMRel, Barrier<"pause">, BARRIER_FM_MM<0x5>;
+
+ def TLBP_MM : MMRel, TLB<"tlbp">, COP0_TLB_FM_MM<0x0d>;
+ def TLBR_MM : MMRel, TLB<"tlbr">, COP0_TLB_FM_MM<0x4d>;
+ def TLBWI_MM : MMRel, TLB<"tlbwi">, COP0_TLB_FM_MM<0x8d>;
+ def TLBWR_MM : MMRel, TLB<"tlbwr">, COP0_TLB_FM_MM<0xcd>;
+
+ def SDBBP_MM : MMRel, SYS_FT<"sdbbp">, SDBBP_FM_MM;
+ def RDHWR_MM : MMRel, ReadHardware<GPR32Opnd, HWRegsOpnd>, RDHWR_FM_MM;
}
+let Predicates = [InMicroMips] in {
+
+//===----------------------------------------------------------------------===//
+// MicroMips arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+def : MipsPat<(i32 immLi16:$imm),
+ (LI16_MM immLi16:$imm)>;
+def : MipsPat<(i32 immSExt16:$imm),
+ (ADDiu_MM ZERO, immSExt16:$imm)>;
+def : MipsPat<(i32 immZExt16:$imm),
+ (ORi_MM ZERO, immZExt16:$imm)>;
+
+def : MipsPat<(add GPRMM16:$src, immSExtAddiur2:$imm),
+ (ADDIUR2_MM GPRMM16:$src, immSExtAddiur2:$imm)>;
+def : MipsPat<(add GPR32:$src, immSExtAddius5:$imm),
+ (ADDIUS5_MM GPR32:$src, immSExtAddius5:$imm)>;
+def : MipsPat<(add GPR32:$src, immSExt16:$imm),
+ (ADDiu_MM GPR32:$src, immSExt16:$imm)>;
+
+def : MipsPat<(and GPRMM16:$src, immZExtAndi16:$imm),
+ (ANDI16_MM GPRMM16:$src, immZExtAndi16:$imm)>;
+def : MipsPat<(and GPR32:$src, immZExt16:$imm),
+ (ANDi_MM GPR32:$src, immZExt16:$imm)>;
+
+def : MipsPat<(shl GPRMM16:$src, immZExt2Shift:$imm),
+ (SLL16_MM GPRMM16:$src, immZExt2Shift:$imm)>;
+def : MipsPat<(shl GPR32:$src, immZExt5:$imm),
+ (SLL_MM GPR32:$src, immZExt5:$imm)>;
+
+def : MipsPat<(srl GPRMM16:$src, immZExt2Shift:$imm),
+ (SRL16_MM GPRMM16:$src, immZExt2Shift:$imm)>;
+def : MipsPat<(srl GPR32:$src, immZExt5:$imm),
+ (SRL_MM GPR32:$src, immZExt5:$imm)>;
+
//===----------------------------------------------------------------------===//
// MicroMips instruction aliases
//===----------------------------------------------------------------------===//
-let Predicates = [InMicroMips] in {
def : MipsInstAlias<"wait", (WAIT_MM 0x0), 1>;
+ def : MipsInstAlias<"nop", (SLL_MM ZERO, ZERO, 0), 1>;
+ def : MipsInstAlias<"nop", (MOVE16_MM ZERO, ZERO), 1>;
}
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index d512d6589c40..87f1b0440132 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef TARGET_MIPS_H
-#define TARGET_MIPS_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPS_H
+#define LLVM_LIB_TARGET_MIPS_MIPS_H
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "llvm/Target/TargetMachine.h"
@@ -26,8 +26,6 @@ namespace llvm {
FunctionPass *createMipsOptimizePICCallPass(MipsTargetMachine &TM);
FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
- FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
- JITCodeEmitter &JCE);
FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm);
} // end namespace llvm;
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index 93706c2c3fcc..6070276529e3 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -36,7 +36,7 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
const Mips16InstrInfo &TII =
- *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const Mips16InstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineBasicBlock::iterator MBBI = MBB.begin();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
uint64_t StackSize = MFI->getStackSize();
@@ -84,7 +84,7 @@ void Mips16FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
MachineFrameInfo *MFI = MF.getFrameInfo();
const Mips16InstrInfo &TII =
- *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const Mips16InstrInfo *>(MF.getSubtarget().getInstrInfo());
DebugLoc dl = MBBI->getDebugLoc();
uint64_t StackSize = MFI->getStackSize();
@@ -154,7 +154,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
Amount = -Amount;
const Mips16InstrInfo &TII =
- *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const Mips16InstrInfo *>(MF.getSubtarget().getInstrInfo());
TII.adjustStackPtr(Mips::SP, Amount, MBB, I);
}
@@ -174,7 +174,7 @@ void Mips16FrameLowering::
processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
const Mips16InstrInfo &TII =
- *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const Mips16InstrInfo *>(MF.getSubtarget().getInstrInfo());
const MipsRegisterInfo &RI = TII.getRegisterInfo();
const BitVector Reserved = RI.getReservedRegs(MF);
bool SaveS2 = Reserved[Mips::S2];
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index 1fb7eda0e915..012d558d61a5 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPS16_FRAMEINFO_H
-#define MIPS16_FRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPS16FRAMELOWERING_H
+#define LLVM_LIB_TARGET_MIPS_MIPS16FRAMELOWERING_H
#include "MipsFrameLowering.h"
diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp
index 14055d608e11..32dc90af2ef4 100644
--- a/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/lib/Target/Mips/Mips16HardFloat.cpp
@@ -247,12 +247,12 @@ static void swapFPIntParams
// Having called needsFPHelperFromSig
//
static void assureFPCallStub(Function &F, Module *M,
- const MipsSubtarget &Subtarget) {
+ const MipsTargetMachine &TM) {
// for now we only need them for static relocation
- if (Subtarget.getRelocationModel() == Reloc::PIC_)
+ if (TM.getRelocationModel() == Reloc::PIC_)
return;
LLVMContext &Context = M->getContext();
- bool LE = Subtarget.isLittle();
+ bool LE = TM.isLittleEndian();
std::string Name = F.getName();
std::string SectionName = ".mips16.call.fp." + Name;
std::string StubName = "__call_stub_fp_" + Name;
@@ -362,8 +362,8 @@ static bool isIntrinsicInline(Function *F) {
// Returns of float, double and complex need to be handled with a helper
// function.
//
-static bool fixupFPReturnAndCall
- (Function &F, Module *M, const MipsSubtarget &Subtarget) {
+static bool fixupFPReturnAndCall(Function &F, Module *M,
+ const MipsTargetMachine &TM) {
bool Modified = false;
LLVMContext &C = M->getContext();
Type *MyVoid = Type::getVoidTy(C);
@@ -403,7 +403,7 @@ static bool fixupFPReturnAndCall
Attribute::ReadNone);
A = A.addAttribute(C, AttributeSet::FunctionIndex,
Attribute::NoInline);
- Value *F = (M->getOrInsertFunction(Name, A, MyVoid, T, NULL));
+ Value *F = (M->getOrInsertFunction(Name, A, MyVoid, T, nullptr));
CallInst::Create(F, Params, "", &Inst );
} else if (const CallInst *CI = dyn_cast<CallInst>(I)) {
const Value* V = CI->getCalledValue();
@@ -426,9 +426,9 @@ static bool fixupFPReturnAndCall
Modified=true;
F.addFnAttr("saveS2");
}
- if (Subtarget.getRelocationModel() != Reloc::PIC_ ) {
+ if (TM.getRelocationModel() != Reloc::PIC_ ) {
if (needsFPHelperFromSig(*F_)) {
- assureFPCallStub(*F_, M, Subtarget);
+ assureFPCallStub(*F_, M, TM);
Modified=true;
}
}
@@ -439,9 +439,9 @@ static bool fixupFPReturnAndCall
}
static void createFPFnStub(Function *F, Module *M, FPParamVariant PV,
- const MipsSubtarget &Subtarget ) {
- bool PicMode = Subtarget.getRelocationModel() == Reloc::PIC_;
- bool LE = Subtarget.isLittle();
+ const MipsTargetMachine &TM) {
+ bool PicMode = TM.getRelocationModel() == Reloc::PIC_;
+ bool LE = TM.isLittleEndian();
LLVMContext &Context = M->getContext();
std::string Name = F->getName();
std::string SectionName = ".mips16.fn." + Name;
@@ -458,7 +458,6 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV,
FStub->setSection(SectionName);
BasicBlock *BB = BasicBlock::Create(Context, "entry", FStub);
InlineAsmHelper IAH(Context, BB);
- IAH.Out(" .set macro");
if (PicMode) {
IAH.Out(".set noreorder");
IAH.Out(".cpload $$25");
@@ -467,7 +466,6 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV,
IAH.Out("la $$25," + LocalName);
}
else {
- IAH.Out(".set reorder");
IAH.Out("la $$25," + Name);
}
swapFPIntParams(PV, M, IAH, LE, false);
@@ -522,11 +520,11 @@ bool Mips16HardFloat::runOnModule(Module &M) {
}
if (F->isDeclaration() || F->hasFnAttribute("mips16_fp_stub") ||
F->hasFnAttribute("nomips16")) continue;
- Modified |= fixupFPReturnAndCall(*F, &M, Subtarget);
+ Modified |= fixupFPReturnAndCall(*F, &M, TM);
FPParamVariant V = whichFPParamVariantNeeded(*F);
if (V != NoSig) {
Modified = true;
- createFPFnStub(F, &M, V, Subtarget);
+ createFPFnStub(F, &M, V, TM);
}
}
return Modified;
diff --git a/lib/Target/Mips/Mips16HardFloat.h b/lib/Target/Mips/Mips16HardFloat.h
index 826887ea5ebe..586cc252353b 100644
--- a/lib/Target/Mips/Mips16HardFloat.h
+++ b/lib/Target/Mips/Mips16HardFloat.h
@@ -12,40 +12,29 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_MIPS_MIPS16HARDFLOAT_H
+#define LLVM_LIB_TARGET_MIPS_MIPS16HARDFLOAT_H
+
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "MipsTargetMachine.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetMachine.h"
-
-#ifndef MIPS16HARDFLOAT_H
-#define MIPS16HARDFLOAT_H
-
using namespace llvm;
namespace llvm {
class Mips16HardFloat : public ModulePass {
-
public:
static char ID;
- Mips16HardFloat(MipsTargetMachine &TM_) : ModulePass(ID),
- TM(TM_), Subtarget(TM.getSubtarget<MipsSubtarget>()) {
- }
-
- const char *getPassName() const override {
- return "MIPS16 Hard Float Pass";
- }
+ Mips16HardFloat(MipsTargetMachine &TM_) : ModulePass(ID), TM(TM_) {}
+ const char *getPassName() const override { return "MIPS16 Hard Float Pass"; }
bool runOnModule(Module &M) override;
protected:
- /// Keep a pointer to the MipsSubtarget around so that we can make the right
- /// decision when generating code for different targets.
- const TargetMachine &TM;
- const MipsSubtarget &Subtarget;
-
+ const MipsTargetMachine &TM;
};
ModulePass *createMips16HardFloat(MipsTargetMachine &TM);
diff --git a/lib/Target/Mips/Mips16HardFloatInfo.h b/lib/Target/Mips/Mips16HardFloatInfo.h
index 02444d98c140..7295c287576d 100644
--- a/lib/Target/Mips/Mips16HardFloatInfo.h
+++ b/lib/Target/Mips/Mips16HardFloatInfo.h
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPS16HARDFLOATINFO_H
-#define MIPS16HARDFLOATINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPS16HARDFLOATINFO_H
+#define LLVM_LIB_TARGET_MIPS_MIPS16HARDFLOATINFO_H
namespace llvm {
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index 7b0584255542..36b3ac9c8043 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -72,11 +72,10 @@ void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator I = MBB.begin();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
- const TargetRegisterClass *RC =
- (const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
+ const TargetRegisterClass *RC = &Mips::CPU16RegsRegClass;
V0 = RegInfo.createVirtualRegister(RC);
V1 = RegInfo.createVirtualRegister(RC);
@@ -103,7 +102,7 @@ void Mips16DAGToDAGISel::initMips16SPAliasReg(MachineFunction &MF) {
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator I = MBB.begin();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg();
@@ -135,8 +134,9 @@ void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
switch (SD->getMemoryVT().getSizeInBits()) {
case 8:
case 16:
- AliasReg = TM.getFrameLowering()->hasFP(*MF)?
- AliasFPReg: getMips16SPAliasReg();
+ AliasReg = TM.getSubtargetImpl()->getFrameLowering()->hasFP(*MF)
+ ? AliasFPReg
+ : getMips16SPAliasReg();
return;
}
break;
@@ -146,8 +146,9 @@ void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
switch (SD->getMemoryVT().getSizeInBits()) {
case 8:
case 16:
- AliasReg = TM.getFrameLowering()->hasFP(*MF)?
- AliasFPReg: getMips16SPAliasReg();
+ AliasReg = TM.getSubtargetImpl()->getFrameLowering()->hasFP(*MF)
+ ? AliasFPReg
+ : getMips16SPAliasReg();
return;
}
break;
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h
index e653b3910306..ae0e61e19d9d 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.h
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPS16ISELDAGTODAG_H
-#define MIPS16ISELDAGTODAG_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPS16ISELDAGTODAG_H
+#define LLVM_LIB_TARGET_MIPS_MIPS16ISELDAGTODAG_H
#include "MipsISelDAGToDAG.h"
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
index 9576fd48a215..d4852c4ece40 100644
--- a/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "Mips16ISelLowering.h"
#include "MCTargetDesc/MipsBaseInfo.h"
+#include "Mips16HardFloatInfo.h"
+#include "MipsMachineFunction.h"
#include "MipsRegisterInfo.h"
#include "MipsTargetMachine.h"
#include "llvm/ADT/StringRef.h"
@@ -27,7 +29,7 @@ using namespace llvm;
static cl::opt<bool> DontExpandCondPseudos16(
"mips16-dont-expand-cond-pseudo",
cl::init(false),
- cl::desc("Dont expand conditional move related "
+ cl::desc("Don't expand conditional move related "
"pseudos for Mips 16"),
cl::Hidden);
@@ -118,7 +120,7 @@ static const Mips16IntrinsicHelperType Mips16IntrinsicHelper[] = {
{"truncf", "__mips16_call_stub_sf_1"},
};
-Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM,
+Mips16TargetLowering::Mips16TargetLowering(const MipsTargetMachine &TM,
const MipsSubtarget &STI)
: MipsTargetLowering(TM, STI) {
@@ -151,15 +153,16 @@ Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM,
}
const MipsTargetLowering *
-llvm::createMips16TargetLowering(MipsTargetMachine &TM,
+llvm::createMips16TargetLowering(const MipsTargetMachine &TM,
const MipsSubtarget &STI) {
return new Mips16TargetLowering(TM, STI);
}
bool
-Mips16TargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
- unsigned,
- bool *Fast) const {
+Mips16TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned,
+ unsigned,
+ bool *Fast) const {
return false;
}
@@ -318,7 +321,7 @@ unsigned int Mips16TargetLowering::getMips16HelperFunctionStubNumber
}
//
-// prefixs are attached to stub numbers depending on the return type .
+// Prefixes are attached to stub numbers depending on the return type.
// return type: float sf_
// double df_
// single complex sc_
@@ -329,17 +332,16 @@ unsigned int Mips16TargetLowering::getMips16HelperFunctionStubNumber
// The full name of a helper function is__mips16_call_stub +
// return type dependent prefix + stub number
//
-//
-// This is something that probably should be in a different source file and
-// perhaps done differently but my main purpose is to not waste runtime
+// FIXME: This is something that probably should be in a different source file
+// and perhaps done differently but my main purpose is to not waste runtime
// on something that we can enumerate in the source. Another possibility is
// to have a python script to generate these mapping tables. This will do
// for now. There are a whole series of helper function mapping arrays, one
// for each return type class as outlined above. There there are 11 possible
-// entries. Ones with 0 are ones which should never be selected
+// entries. Ones with 0 are ones which should never be selected.
//
// All the arrays are similar except for ones which return neither
-// sf, df, sc, dc, in which only care about ones which have sf or df as a
+// sf, df, sc, dc, in which we only care about ones which have sf or df as a
// first parameter.
//
#define P_ "__mips16_call_stub_"
@@ -421,7 +423,8 @@ void Mips16TargetLowering::
getOpndList(SmallVectorImpl<SDValue> &Ops,
std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
- CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
+ SDValue Chain) const {
SelectionDAG &DAG = CLI.DAG;
MachineFunction &MF = DAG.getMachineFunction();
MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
@@ -511,14 +514,16 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
Ops.push_back(JumpTarget);
MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
- InternalLinkage, CLI, Callee, Chain);
+ InternalLinkage, IsCallReloc, CLI, Callee,
+ Chain);
}
MachineBasicBlock *Mips16TargetLowering::
emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const {
if (DontExpandCondPseudos16)
return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
@@ -580,7 +585,8 @@ MachineBasicBlock *Mips16TargetLowering::emitSelT16
MachineInstr *MI, MachineBasicBlock *BB) const {
if (DontExpandCondPseudos16)
return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
@@ -644,7 +650,8 @@ MachineBasicBlock *Mips16TargetLowering::emitSeliT16
MachineInstr *MI, MachineBasicBlock *BB) const {
if (DontExpandCondPseudos16)
return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
@@ -709,7 +716,8 @@ MachineBasicBlock
MachineBasicBlock *BB) const {
if (DontExpandCondPseudos16)
return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
unsigned regX = MI->getOperand(0).getReg();
unsigned regY = MI->getOperand(1).getReg();
MachineBasicBlock *target = MI->getOperand(2).getMBB();
@@ -725,7 +733,8 @@ MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins(
MachineInstr *MI, MachineBasicBlock *BB) const {
if (DontExpandCondPseudos16)
return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
unsigned regX = MI->getOperand(0).getReg();
int64_t imm = MI->getOperand(1).getImm();
MachineBasicBlock *target = MI->getOperand(2).getMBB();
@@ -759,7 +768,8 @@ MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRX16_ins(
MachineInstr *MI, MachineBasicBlock *BB) const {
if (DontExpandCondPseudos16)
return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
unsigned CC = MI->getOperand(0).getReg();
unsigned regX = MI->getOperand(1).getReg();
unsigned regY = MI->getOperand(2).getReg();
@@ -776,7 +786,8 @@ MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRXI16_ins(
MachineInstr *MI, MachineBasicBlock *BB )const {
if (DontExpandCondPseudos16)
return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
unsigned CC = MI->getOperand(0).getReg();
unsigned regX = MI->getOperand(1).getReg();
int64_t Imm = MI->getOperand(2).getImm();
diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h
index 1649fe68b770..d3b9f750f347 100644
--- a/lib/Target/Mips/Mips16ISelLowering.h
+++ b/lib/Target/Mips/Mips16ISelLowering.h
@@ -11,19 +11,20 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPS16ISELLOWERING_H
-#define MIPS16ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPS16ISELLOWERING_H
+#define LLVM_LIB_TARGET_MIPS_MIPS16ISELLOWERING_H
#include "MipsISelLowering.h"
namespace llvm {
class Mips16TargetLowering : public MipsTargetLowering {
public:
- explicit Mips16TargetLowering(MipsTargetMachine &TM,
+ explicit Mips16TargetLowering(const MipsTargetMachine &TM,
const MipsSubtarget &STI);
- bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
- bool *Fast) const override;
+ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
+ unsigned Align,
+ bool *Fast) const override;
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
@@ -46,7 +47,7 @@ namespace llvm {
getOpndList(SmallVectorImpl<SDValue> &Ops,
std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
- CallLoweringInfo &CLI, SDValue Callee,
+ bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
SDValue Chain) const override;
MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI,
@@ -78,4 +79,4 @@ namespace llvm {
};
}
-#endif // Mips16ISELLOWERING_H
+#endif
diff --git a/lib/Target/Mips/Mips16InstrFormats.td b/lib/Target/Mips/Mips16InstrFormats.td
index da3a1f114af3..4ff68bef957e 100644
--- a/lib/Target/Mips/Mips16InstrFormats.td
+++ b/lib/Target/Mips/Mips16InstrFormats.td
@@ -591,7 +591,7 @@ class FEXT_I816<bits<3> _funct, dag outs, dag ins, string asmstr,
bits<3> funct;
let funct = _funct;
- let I8 = 0b0110;
+ let I8 = 0b00110;
let Inst{26-21} = imm16{10-5};
let Inst{20-16} = imm16{15-11};
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 4dd9af24968d..976beccfed9d 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -144,7 +144,6 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
/// opcode, e.g. turning BEQ to BNE.
unsigned Mips16InstrInfo::getOppositeBranchOpc(unsigned Opc) const {
switch (Opc) {
- default: llvm_unreachable("Illegal opcode!");
case Mips::BeqzRxImmX16: return Mips::BnezRxImmX16;
case Mips::BnezRxImmX16: return Mips::BeqzRxImmX16;
case Mips::BeqzRxImm16: return Mips::BnezRxImm16;
@@ -166,8 +165,7 @@ unsigned Mips16InstrInfo::getOppositeBranchOpc(unsigned Opc) const {
case Mips::BtnezT8SltX16: return Mips::BteqzT8SltX16;
case Mips::BtnezT8SltiX16: return Mips::BteqzT8SltiX16;
}
- assert(false && "Implement this function.");
- return 0;
+ llvm_unreachable("Illegal opcode!");
}
static void addSaveRestoreRegs(MachineInstrBuilder &MIB,
@@ -288,7 +286,7 @@ void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount,
void Mips16InstrInfo::adjustStackPtrBigUnrestricted(
unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- assert(false && "adjust stack pointer amount exceeded");
+ llvm_unreachable("adjust stack pointer amount exceeded");
}
/// Adjust SP by Amount bytes.
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index a004c567e6c9..e7d0c07f36c5 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPS16INSTRUCTIONINFO_H
-#define MIPS16INSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPS16INSTRINFO_H
+#define LLVM_LIB_TARGET_MIPS_MIPS16INSTRINFO_H
#include "Mips16RegisterInfo.h"
#include "MipsInstrInfo.h"
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index 5e4eebb62c12..10fff03b7240 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -502,7 +502,7 @@ class ArithLogic16Defs<bit isCom=0> {
bits<5> shamt = 0;
bit isCommutable = isCom;
bit isReMaterializable = 1;
- bit neverHasSideEffects = 1;
+ bit hasSideEffects = 0;
}
class branch16 {
@@ -879,7 +879,7 @@ def MoveR3216: FI8_MOVR3216_ins<"move", IIAlu>;
//
def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIAlu> {
let Uses = [HI0];
- let neverHasSideEffects = 1;
+ let hasSideEffects = 0;
}
//
@@ -889,7 +889,7 @@ def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIAlu> {
//
def Mflo16: FRR16_M_ins<0b10010, "mflo", IIAlu> {
let Uses = [LO0];
- let neverHasSideEffects = 1;
+ let hasSideEffects = 0;
}
//
@@ -897,13 +897,13 @@ def Mflo16: FRR16_M_ins<0b10010, "mflo", IIAlu> {
//
def MultRxRy16: FMULT16_ins<"mult", IIAlu> {
let isCommutable = 1;
- let neverHasSideEffects = 1;
+ let hasSideEffects = 0;
let Defs = [HI0, LO0];
}
def MultuRxRy16: FMULT16_ins<"multu", IIAlu> {
let isCommutable = 1;
- let neverHasSideEffects = 1;
+ let hasSideEffects = 0;
let Defs = [HI0, LO0];
}
@@ -914,7 +914,7 @@ def MultuRxRy16: FMULT16_ins<"multu", IIAlu> {
//
def MultRxRyRz16: FMULT16_LO_ins<"mult", IIAlu> {
let isCommutable = 1;
- let neverHasSideEffects = 1;
+ let hasSideEffects = 0;
let Defs = [HI0, LO0];
}
@@ -925,7 +925,7 @@ def MultRxRyRz16: FMULT16_LO_ins<"mult", IIAlu> {
//
def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIAlu> {
let isCommutable = 1;
- let neverHasSideEffects = 1;
+ let hasSideEffects = 0;
let Defs = [HI0, LO0];
}
@@ -1771,9 +1771,9 @@ def: Mips16Pat
//
// For constants, llvm transforms this to:
-// x > (k -1) and then reverses the operands to use setlt. So this pattern
+// x > (k - 1) and then reverses the operands to use setlt. So this pattern
// is not used now by the compiler. (Presumably checking that k-1 does not
-// overflow). The compiler never uses this at a the current time, due to
+// overflow). The compiler never uses this at the current time, due to
// other optimizations.
//
//def: Mips16Pat
@@ -1910,7 +1910,7 @@ def cpinst_operand : Operand<i32> {
// is the index into the MachineConstantPool that this is, the third is the
// size in bytes of this constant pool entry.
//
-let neverHasSideEffects = 1, isNotDuplicable = 1 in
+let hasSideEffects = 0, isNotDuplicable = 1 in
def CONSTPOOL_ENTRY :
MipsPseudo16<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
i32imm:$size), "foo", []>;
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
index dbee774637dd..0bb452a3f7d5 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -65,7 +65,7 @@ bool Mips16RegisterInfo::saveScavengerRegister
const TargetRegisterClass *RC,
unsigned Reg) const {
DebugLoc DL;
- const TargetInstrInfo &TII = *MBB.getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo();
TII.copyPhysReg(MBB, I, DL, Mips::T0, Reg, true);
TII.copyPhysReg(MBB, UseMI, DL, Reg, Mips::T0, true);
return true;
@@ -106,7 +106,7 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)
FrameReg = Mips::SP;
else {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (TFI->hasFP(MF)) {
FrameReg = Mips::S0;
}
@@ -140,8 +140,8 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
DebugLoc DL = II->getDebugLoc();
unsigned NewImm;
const Mips16InstrInfo &TII =
- *static_cast<const Mips16InstrInfo*>(
- MBB.getParent()->getTarget().getInstrInfo());
+ *static_cast<const Mips16InstrInfo *>(
+ MBB.getParent()->getSubtarget().getInstrInfo());
FrameReg = TII.loadImmediate(FrameReg, Offset, MBB, II, DL, NewImm);
Offset = SignExtend64<16>(NewImm);
IsKill = true;
diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h
index f59f1a7c1098..3cdf836134a8 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.h
+++ b/lib/Target/Mips/Mips16RegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPS16REGISTERINFO_H
-#define MIPS16REGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPS16REGISTERINFO_H
+#define LLVM_LIB_TARGET_MIPS_MIPS16REGISTERINFO_H
#include "MipsRegisterInfo.h"
diff --git a/lib/Target/Mips/Mips32r6InstrFormats.td b/lib/Target/Mips/Mips32r6InstrFormats.td
index e4ec96a92f5b..e9a4289ec247 100644
--- a/lib/Target/Mips/Mips32r6InstrFormats.td
+++ b/lib/Target/Mips/Mips32r6InstrFormats.td
@@ -403,7 +403,7 @@ class JMP_IDX_COMPACT_FM<bits<6> funct> : MipsR6Inst {
bits<32> Inst;
let Inst{31-26} = funct;
- let Inst{25-21} = 0b000000;
+ let Inst{25-21} = 0b00000;
let Inst{20-16} = rt;
let Inst{15-0} = offset;
}
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index f2072a6c06e3..bfe2ba011957 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -290,7 +290,8 @@ class ExtsCins<string opstr, SDPatternOperator Op = null_frag>:
class SetCC64_R<string opstr, PatFrag cond_op> :
InstSE<(outs GPR64Opnd:$rd), (ins GPR64Opnd:$rs, GPR64Opnd:$rt),
!strconcat(opstr, "\t$rd, $rs, $rt"),
- [(set GPR64Opnd:$rd, (cond_op GPR64Opnd:$rs, GPR64Opnd:$rt))],
+ [(set GPR64Opnd:$rd, (zext (cond_op GPR64Opnd:$rs,
+ GPR64Opnd:$rt)))],
II_SEQ_SNE, FrmR, opstr> {
let TwoOperandAliasConstraint = "$rd = $rs";
}
@@ -298,7 +299,8 @@ class SetCC64_R<string opstr, PatFrag cond_op> :
class SetCC64_I<string opstr, PatFrag cond_op>:
InstSE<(outs GPR64Opnd:$rt), (ins GPR64Opnd:$rs, simm10_64:$imm10),
!strconcat(opstr, "\t$rt, $rs, $imm10"),
- [(set GPR64Opnd:$rt, (cond_op GPR64Opnd:$rs, immSExt10_64:$imm10))],
+ [(set GPR64Opnd:$rt, (zext (cond_op GPR64Opnd:$rs,
+ immSExt10_64:$imm10)))],
II_SEQI_SNEI, FrmI, opstr> {
let TwoOperandAliasConstraint = "$rt = $rs";
}
@@ -446,28 +448,22 @@ def : MipsInstAlias<"move $dst, $src",
GPR_64;
def : MipsInstAlias<"daddu $rs, $rt, $imm",
(DADDiu GPR64Opnd:$rs, GPR64Opnd:$rt, simm16_64:$imm),
- 0>;
+ 0>, ISA_MIPS3;
def : MipsInstAlias<"dadd $rs, $rt, $imm",
(DADDi GPR64Opnd:$rs, GPR64Opnd:$rt, simm16_64:$imm),
0>, ISA_MIPS3_NOT_32R6_64R6;
def : MipsInstAlias<"daddu $rs, $imm",
(DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm),
- 0>;
+ 0>, ISA_MIPS3;
def : MipsInstAlias<"dadd $rs, $imm",
(DADDi GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm),
0>, ISA_MIPS3_NOT_32R6_64R6;
-def : MipsInstAlias<"add $rs, $imm",
- (ADDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm),
- 0>;
-def : MipsInstAlias<"addu $rs, $imm",
- (ADDiu GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm),
- 0>;
def : MipsInstAlias<"dsll $rd, $rt, $rs",
(DSLLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
ISA_MIPS3;
def : MipsInstAlias<"dsubu $rt, $rs, $imm",
(DADDiu GPR64Opnd:$rt, GPR64Opnd:$rs,
- InvertedImOperand64:$imm), 0>;
+ InvertedImOperand64:$imm), 0>, ISA_MIPS3;
def : MipsInstAlias<"dsubi $rs, $rt, $imm",
(DADDi GPR64Opnd:$rs, GPR64Opnd:$rt,
InvertedImOperand64:$imm),
@@ -487,7 +483,7 @@ def : MipsInstAlias<"dsub $rs, $imm",
def : MipsInstAlias<"dsubu $rs, $imm",
(DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs,
InvertedImOperand64:$imm),
- 0>;
+ 0>, ISA_MIPS3;
def : MipsInstAlias<"dsra $rd, $rt, $rs",
(DSRAV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
ISA_MIPS3;
@@ -514,3 +510,9 @@ def : MipsInstAlias<"dmtc0 $rt, $rd", (DMTC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0
def : MipsInstAlias<"dmfc2 $rt, $rd", (DMFC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
def : MipsInstAlias<"dmtc2 $rt, $rd", (DMTC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
+let Predicates = [HasMips64, HasCnMips] in {
+def : MipsInstAlias<"synciobdma", (SYNC 0x2), 0>;
+def : MipsInstAlias<"syncs", (SYNC 0x6), 0>;
+def : MipsInstAlias<"syncw", (SYNC 0x4), 0>;
+def : MipsInstAlias<"syncws", (SYNC 0x5), 0>;
+}
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
index 31a9b7d63983..161345d2a845 100644
--- a/lib/Target/Mips/MipsAnalyzeImmediate.cpp
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
@@ -72,7 +72,8 @@ void MipsAnalyzeImmediate::GetInstSeqLs(uint64_t Imm, unsigned RemSize,
if (Imm & 0x8000) {
InstSeqLs SeqLsORi;
GetInstSeqLsORi(Imm, RemSize, SeqLsORi);
- SeqLs.insert(SeqLs.end(), SeqLsORi.begin(), SeqLsORi.end());
+ SeqLs.append(std::make_move_iterator(SeqLsORi.begin()),
+ std::make_move_iterator(SeqLsORi.end()));
}
}
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.h b/lib/Target/Mips/MipsAnalyzeImmediate.h
index cc09034a9c39..ae3c38ced80b 100644
--- a/lib/Target/Mips/MipsAnalyzeImmediate.h
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.h
@@ -6,8 +6,8 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-#ifndef MIPS_ANALYZE_IMMEDIATE_H
-#define MIPS_ANALYZE_IMMEDIATE_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSANALYZEIMMEDIATE_H
+#define LLVM_LIB_TARGET_MIPS_MIPSANALYZEIMMEDIATE_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/DataTypes.h"
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 14463fce0dc3..50c0441e0dd4 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -53,7 +53,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips-asm-printer"
-MipsTargetStreamer &MipsAsmPrinter::getTargetStreamer() {
+MipsTargetStreamer &MipsAsmPrinter::getTargetStreamer() const {
return static_cast<MipsTargetStreamer &>(*OutStreamer.getTargetStreamer());
}
@@ -131,7 +131,7 @@ void MipsAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer,
void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MipsTargetStreamer &TS = getTargetStreamer();
- TS.setCanHaveModuleDir(false);
+ TS.forbidModuleDirective();
if (MI->isDebugValue()) {
SmallString<128> Str;
@@ -266,7 +266,8 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
if (Mips::GPR32RegClass.contains(Reg))
break;
- unsigned RegNum = TM.getRegisterInfo()->getEncodingValue(Reg);
+ unsigned RegNum =
+ TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue(Reg);
if (Mips::AFGR64RegClass.contains(Reg)) {
FPUBitmask |= (3 << RegNum);
CSFPRegsSize += AFGR64RegSize;
@@ -281,7 +282,8 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
// Set CPU Bitmask.
for (; i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- unsigned RegNum = TM.getRegisterInfo()->getEncodingValue(Reg);
+ unsigned RegNum =
+ TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue(Reg);
CPUBitmask |= (1 << RegNum);
}
@@ -306,7 +308,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
/// Frame Directive
void MipsAsmPrinter::emitFrameDirective() {
- const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+ const TargetRegisterInfo &RI = *TM.getSubtargetImpl()->getRegisterInfo();
unsigned stackReg = RI.getFrameRegister(*MF);
unsigned returnReg = RI.getRARegister();
@@ -558,7 +560,7 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
raw_ostream &O) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
const MachineOperand &MO = MI->getOperand(opNum);
bool closeP = false;
@@ -643,6 +645,18 @@ printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O) {
// Load/Store memory operands -- imm($reg)
// If PIC target the target is loaded as the
// pattern lw $25,%call16($28)
+
+ // opNum can be invalid if instruction has reglist as operand.
+ // MemOperand is always last operand of instruction (base + offset).
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case Mips::SWM32_MM:
+ case Mips::LWM32_MM:
+ opNum = MI->getNumOperands() - 2;
+ break;
+ }
+
printOperand(MI, opNum+1, O);
O << "(";
printOperand(MI, opNum, O);
@@ -666,6 +680,14 @@ printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm());
}
+void MipsAsmPrinter::
+printRegisterList(const MachineInstr *MI, int opNum, raw_ostream &O) {
+ for (int i = opNum, e = MI->getNumOperands(); i != e; ++i) {
+ if (i != opNum) O << ", ";
+ printOperand(MI, i, O);
+ }
+}
+
void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
bool IsABICalls = Subtarget->isABICalls();
if (IsABICalls) {
@@ -721,6 +743,29 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
Subtarget->isABI_O32());
}
+void MipsAsmPrinter::emitInlineAsmStart(
+ const MCSubtargetInfo &StartInfo) const {
+ MipsTargetStreamer &TS = getTargetStreamer();
+
+ // GCC's choice of assembler options for inline assembly code ('at', 'macro'
+ // and 'reorder') is different from LLVM's choice for generated code ('noat',
+ // 'nomacro' and 'noreorder').
+ // In order to maintain compatibility with inline assembly code which depends
+ // on GCC's assembler options being used, we have to switch to those options
+ // for the duration of the inline assembly block and then switch back.
+ TS.emitDirectiveSetPush();
+ TS.emitDirectiveSetAt();
+ TS.emitDirectiveSetMacro();
+ TS.emitDirectiveSetReorder();
+ OutStreamer.AddBlankLine();
+}
+
+void MipsAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
+ const MCSubtargetInfo *EndInfo) const {
+ OutStreamer.AddBlankLine();
+ getTargetStreamer().emitDirectiveSetPop();
+}
+
void MipsAsmPrinter::EmitJal(MCSymbol *Symbol) {
MCInst I;
I.setOpcode(Mips::JAL);
@@ -929,7 +974,7 @@ void MipsAsmPrinter::EmitFPCallStub(
// called otherwise. when the full stub generation is moved here
// we need to deal with pic.
//
- if (Subtarget->getRelocationModel() == Reloc::PIC_)
+ if (TM.getRelocationModel() == Reloc::PIC_)
llvm_unreachable("should not be here if we are compiling pic");
TS.emitDirectiveSetReorder();
//
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index abbd39b571a5..723155f5dd60 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSASMPRINTER_H
-#define MIPSASMPRINTER_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSASMPRINTER_H
+#define LLVM_LIB_TARGET_MIPS_MIPSASMPRINTER_H
#include "Mips16HardFloatInfo.h"
#include "MipsMCInstLower.h"
@@ -31,7 +31,7 @@ class Module;
class raw_ostream;
class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
- MipsTargetStreamer &getTargetStreamer();
+ MipsTargetStreamer &getTargetStreamer() const;
void EmitInstrWithMacroNoAT(const MachineInstr *MI);
@@ -60,6 +60,11 @@ private:
std::map<const char *, const llvm::Mips16HardFloatInfo::FuncSignature *>
StubsNeeded;
+ void emitInlineAsmStart(const MCSubtargetInfo &StartInfo) const override;
+
+ void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
+ const MCSubtargetInfo *EndInfo) const override;
+
void EmitJal(MCSymbol *Symbol);
void EmitInstrReg(unsigned Opcode, unsigned Reg);
@@ -134,6 +139,7 @@ public:
void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O);
void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier = nullptr);
+ void printRegisterList(const MachineInstr *MI, int opNum, raw_ostream &O);
void EmitStartOfAsmFile(Module &M) override;
void EmitEndOfAsmFile(Module &M) override;
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
diff --git a/lib/Target/Mips/MipsCCState.h b/lib/Target/Mips/MipsCCState.h
index 026b7b5e3275..9a08adef3823 100644
--- a/lib/Target/Mips/MipsCCState.h
+++ b/lib/Target/Mips/MipsCCState.h
@@ -10,9 +10,9 @@
#ifndef MIPSCCSTATE_H
#define MIPSCCSTATE_H
+#include "MipsISelLowering.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
-#include "MipsISelLowering.h"
namespace llvm {
class SDNode;
@@ -65,10 +65,9 @@ private:
public:
MipsCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
- const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs,
- LLVMContext &C,
+ SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
SpecialCallingConvType SpecialCC = NoSpecialCallingConv)
- : CCState(CC, isVarArg, MF, TM, locs, C), SpecialCallingConv(SpecialCC) {}
+ : CCState(CC, isVarArg, MF, locs, C), SpecialCallingConv(SpecialCC) {}
void
AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 76297dcbbcf1..90bf9f3f5835 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -12,14 +12,37 @@
/// CCIfSubtarget - Match if the current subtarget has a feature F.
class CCIfSubtarget<string F, CCAction A, string Invert = "">
: CCIf<!strconcat(Invert,
- "State.getMachineFunction().getTarget()."
- "getSubtarget<const MipsSubtarget>().",
+ "static_cast<const MipsSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).",
F),
A>;
// The inverse of CCIfSubtarget
class CCIfSubtargetNot<string F, CCAction A> : CCIfSubtarget<F, A, "!">;
+/// Match if the original argument (before lowering) was a float.
+/// For example, this is true for i32's that were lowered from soft-float.
+class CCIfOrigArgWasNotFloat<CCAction A>
+ : CCIf<"!static_cast<MipsCCState *>(&State)->WasOriginalArgFloat(ValNo)",
+ A>;
+
+/// Match if the original argument (before lowering) was a 128-bit float (i.e.
+/// long double).
+class CCIfOrigArgWasF128<CCAction A>
+ : CCIf<"static_cast<MipsCCState *>(&State)->WasOriginalArgF128(ValNo)", A>;
+
+/// Match if this specific argument is a vararg.
+/// This is slightly different fro CCIfIsVarArg which matches if any argument is
+/// a vararg.
+class CCIfArgIsVarArg<CCAction A>
+ : CCIf<"!static_cast<MipsCCState *>(&State)->IsCallOperandFixed(ValNo)", A>;
+
+
+/// Match if the special calling conv is the specified value.
+class CCIfSpecialCallingConv<string CC, CCAction A>
+ : CCIf<"static_cast<MipsCCState *>(&State)->getSpecialCallingConv() == "
+ "MipsCCState::" # CC, A>;
+
// For soft-float, f128 values are returned in A0_64 rather than V1_64.
def RetCC_F128SoftFloat : CallingConv<[
CCAssignToReg<[V0_64, A0_64]>
@@ -51,6 +74,19 @@ def RetCC_F128 : CallingConv<[
// Mips O32 Calling Convention
//===----------------------------------------------------------------------===//
+def CC_MipsO32 : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+ // Integer values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+ // Integer values get stored in stack slots that are 8 bytes in
+ // size and 8-byte aligned.
+ CCIfType<[f64], CCAssignToStack<8, 8>>
+]>;
+
// Only the return rules are defined here for O32. The rules for argument
// passing are defined in MipsISelLowering.cpp.
def RetCC_MipsO32 : CallingConv<[
@@ -92,9 +128,7 @@ def CC_MipsN : CallingConv<[
CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>,
// All integers (except soft-float integers) are promoted to 64-bit.
- CCIfType<[i8, i16, i32],
- CCIf<"!static_cast<MipsCCState *>(&State)->WasOriginalArgFloat(ValNo)",
- CCPromoteToType<i64>>>,
+ CCIfType<[i8, i16, i32], CCIfOrigArgWasNotFloat<CCPromoteToType<i64>>>,
// The only i32's we have left are soft-float arguments.
CCIfSubtarget<"abiUsesSoftFloat()", CCIfType<[i32], CCDelegateTo<CC_MipsN_SoftFloat>>>,
@@ -149,9 +183,7 @@ def RetCC_MipsN : CallingConv<[
//
// f128 should only occur for the N64 ABI where long double is 128-bit. On
// N32, long double is equivalent to double.
- CCIfType<[i64],
- CCIf<"static_cast<MipsCCState *>(&State)->WasOriginalArgF128(ValNo)",
- CCDelegateTo<RetCC_F128>>>,
+ CCIfType<[i64], CCIfOrigArgWasF128<CCDelegateTo<RetCC_F128>>>,
// Aggregate returns are positioned at the lowest address in the slot for
// both little and big-endian targets. When passing in registers, this
@@ -317,8 +349,7 @@ def CC_Mips16RetHelper : CallingConv<[
def CC_Mips_FixedArg : CallingConv<[
// Mips16 needs special handling on some functions.
CCIf<"State.getCallingConv() != CallingConv::Fast",
- CCIf<"static_cast<MipsCCState *>(&State)->getSpecialCallingConv() == "
- "MipsCCState::Mips16RetHelperConv",
+ CCIfSpecialCallingConv<"Mips16RetHelperConv",
CCDelegateTo<CC_Mips16RetHelper>>>,
CCIfByVal<CCDelegateTo<CC_Mips_ByVal>>,
@@ -335,8 +366,7 @@ def CC_Mips_FixedArg : CallingConv<[
// N32, long double is equivalent to double.
CCIfType<[i64],
CCIfSubtargetNot<"abiUsesSoftFloat()",
- CCIf<"static_cast<MipsCCState *>(&State)->WasOriginalArgF128(ValNo)",
- CCBitConvertToType<f64>>>>,
+ CCIfOrigArgWasF128<CCBitConvertToType<f64>>>>,
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_Mips_FastCC>>,
@@ -356,9 +386,7 @@ def CC_Mips_VarArg : CallingConv<[
]>;
def CC_Mips : CallingConv<[
- CCIfVarArg<
- CCIf<"!static_cast<MipsCCState *>(&State)->IsCallOperandFixed(ValNo)",
- CCDelegateTo<CC_Mips_VarArg>>>,
+ CCIfVarArg<CCIfArgIsVarArg<CCDelegateTo<CC_Mips_VarArg>>>,
CCDelegateTo<CC_Mips_FixedArg>
]>;
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
deleted file mode 100644
index 794c71898fa3..000000000000
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ /dev/null
@@ -1,481 +0,0 @@
-//===-- Mips/MipsCodeEmitter.cpp - Convert Mips Code to Machine Code ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===---------------------------------------------------------------------===//
-//
-// This file contains the pass that transforms the Mips machine instructions
-// into relocatable machine code.
-//
-//===---------------------------------------------------------------------===//
-
-#include "Mips.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
-#include "MipsInstrInfo.h"
-#include "MipsRelocations.h"
-#include "MipsSubtarget.h"
-#include "MipsTargetMachine.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#ifndef NDEBUG
-#include <iomanip>
-#endif
-
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-STATISTIC(NumEmitted, "Number of machine instructions emitted");
-
-namespace {
-
-class MipsCodeEmitter : public MachineFunctionPass {
- MipsJITInfo *JTI;
- const MipsInstrInfo *II;
- const DataLayout *TD;
- const MipsSubtarget *Subtarget;
- TargetMachine &TM;
- JITCodeEmitter &MCE;
- const std::vector<MachineConstantPoolEntry> *MCPEs;
- const std::vector<MachineJumpTableEntry> *MJTEs;
- bool IsPIC;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineModuleInfo> ();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- static char ID;
-
-public:
- MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(ID), JTI(nullptr), II(nullptr), TD(nullptr),
- TM(tm), MCE(mce), MCPEs(nullptr), MJTEs(nullptr),
- IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "Mips Machine Code Emitter";
- }
-
- /// getBinaryCodeForInstr - This function, generated by the
- /// CodeEmitterGenerator using TableGen, produces the binary encoding for
- /// machine instructions.
- uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
-
- void emitInstruction(MachineBasicBlock::instr_iterator MI,
- MachineBasicBlock &MBB);
-
-private:
-
- void emitWord(unsigned Word);
-
- /// Routines that handle operands which add machine relocations which are
- /// fixed up by the relocation stage.
- void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
- bool MayNeedFarStub) const;
- void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
- void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
- void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
- void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const;
-
- /// getMachineOpValue - Return binary encoding of operand. If the machine
- /// operand requires relocation, record the relocation and return zero.
- unsigned getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const;
-
- unsigned getRelocation(const MachineInstr &MI,
- const MachineOperand &MO) const;
-
- unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getJumpTargetOpValueMM(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getBranchTargetOpValueMM(const MachineInstr &MI,
- unsigned OpNo) const;
-
- unsigned getBranchTarget21OpValue(const MachineInstr &MI,
- unsigned OpNo) const;
- unsigned getBranchTarget26OpValue(const MachineInstr &MI,
- unsigned OpNo) const;
- unsigned getJumpOffset16OpValue(const MachineInstr &MI, unsigned OpNo) const;
-
- unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getMemEncodingMMImm12(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getMSAMemEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getLSAImmEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getSimm19Lsl2Encoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getSimm18Lsl3Encoding(const MachineInstr &MI, unsigned OpNo) const;
-
- /// Expand pseudo instructions with accumulator register operands.
- void expandACCInstr(MachineBasicBlock::instr_iterator MI,
- MachineBasicBlock &MBB, unsigned Opc) const;
-
- void expandPseudoIndirectBranch(MachineBasicBlock::instr_iterator MI,
- MachineBasicBlock &MBB) const;
-
- /// \brief Expand pseudo instruction. Return true if MI was expanded.
- bool expandPseudos(MachineBasicBlock::instr_iterator &MI,
- MachineBasicBlock &MBB) const;
-};
-}
-
-char MipsCodeEmitter::ID = 0;
-
-bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
- MipsTargetMachine &Target = static_cast<MipsTargetMachine &>(
- const_cast<TargetMachine &>(MF.getTarget()));
-
- JTI = Target.getJITInfo();
- II = Target.getInstrInfo();
- TD = Target.getDataLayout();
- Subtarget = &TM.getSubtarget<MipsSubtarget> ();
- MCPEs = &MF.getConstantPool()->getConstants();
- MJTEs = nullptr;
- if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
- JTI->Initialize(MF, IsPIC, Subtarget->isLittle());
- MCE.setModuleInfo(&getAnalysis<MachineModuleInfo> ());
-
- do {
- DEBUG(errs() << "JITTing function '"
- << MF.getName() << "'\n");
- MCE.startFunction(MF);
-
- for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
- MBB != E; ++MBB){
- MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(),
- E = MBB->instr_end(); I != E;)
- emitInstruction(*I++, *MBB);
- }
- } while (MCE.finishFunction(MF));
-
- return false;
-}
-
-unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI,
- const MachineOperand &MO) const {
- // NOTE: This relocations are for static.
- uint64_t TSFlags = MI.getDesc().TSFlags;
- uint64_t Form = TSFlags & MipsII::FormMask;
- if (Form == MipsII::FrmJ)
- return Mips::reloc_mips_26;
- if ((Form == MipsII::FrmI || Form == MipsII::FrmFI)
- && MI.isBranch())
- return Mips::reloc_mips_pc16;
- if (Form == MipsII::FrmI && MI.getOpcode() == Mips::LUi)
- return Mips::reloc_mips_hi;
- return Mips::reloc_mips_lo;
-}
-
-unsigned MipsCodeEmitter::getJumpTargetOpValue(const MachineInstr &MI,
- unsigned OpNo) const {
- MachineOperand MO = MI.getOperand(OpNo);
- if (MO.isGlobal())
- emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true);
- else if (MO.isSymbol())
- emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO));
- else if (MO.isMBB())
- emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
- else
- llvm_unreachable("Unexpected jump target operand kind.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getJumpTargetOpValueMM(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getBranchTargetOpValueMM(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getBranchTarget21OpValue(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getBranchTarget26OpValue(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getJumpOffset16OpValue(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getBranchTargetOpValue(const MachineInstr &MI,
- unsigned OpNo) const {
- MachineOperand MO = MI.getOperand(OpNo);
- emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
- return 0;
-}
-
-unsigned MipsCodeEmitter::getMemEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- // Base register is encoded in bits 20-16, offset is encoded in bits 15-0.
- assert(MI.getOperand(OpNo).isReg());
- unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo)) << 16;
- return (getMachineOpValue(MI, MI.getOperand(OpNo+1)) & 0xFFFF) | RegBits;
-}
-
-unsigned MipsCodeEmitter::getMemEncodingMMImm12(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getMSAMemEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getSizeExtEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- // size is encoded as size-1.
- return getMachineOpValue(MI, MI.getOperand(OpNo)) - 1;
-}
-
-unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- // size is encoded as pos+size-1.
- return getMachineOpValue(MI, MI.getOperand(OpNo-1)) +
- getMachineOpValue(MI, MI.getOperand(OpNo)) - 1;
-}
-
-unsigned MipsCodeEmitter::getLSAImmEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getSimm18Lsl3Encoding(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getSimm19Lsl2Encoding(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-/// getMachineOpValue - Return binary encoding of operand. If the machine
-/// operand requires relocation, record the relocation and return zero.
-unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const {
- if (MO.isReg())
- return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
- else if (MO.isImm())
- return static_cast<unsigned>(MO.getImm());
- else if (MO.isGlobal())
- emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true);
- else if (MO.isSymbol())
- emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO));
- else if (MO.isCPI())
- emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO));
- else if (MO.isJTI())
- emitJumpTableAddress(MO.getIndex(), getRelocation(MI, MO));
- else if (MO.isMBB())
- emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
- else
- llvm_unreachable("Unable to encode MachineOperand!");
- return 0;
-}
-
-void MipsCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
- bool MayNeedFarStub) const {
- MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
- const_cast<GlobalValue *>(GV), 0,
- MayNeedFarStub));
-}
-
-void MipsCodeEmitter::
-emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
- Reloc, ES, 0, 0));
-}
-
-void MipsCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
- Reloc, CPI, 0, false));
-}
-
-void MipsCodeEmitter::
-emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
- Reloc, JTIndex, 0, false));
-}
-
-void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
- unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
- Reloc, BB));
-}
-
-void MipsCodeEmitter::emitInstruction(MachineBasicBlock::instr_iterator MI,
- MachineBasicBlock &MBB) {
- DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << *MI);
-
- // Expand pseudo instruction. Skip if MI was not expanded.
- if (((MI->getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo) &&
- !expandPseudos(MI, MBB))
- return;
-
- MCE.processDebugLoc(MI->getDebugLoc(), true);
-
- emitWord(getBinaryCodeForInstr(*MI));
- ++NumEmitted; // Keep track of the # of mi's emitted
-
- MCE.processDebugLoc(MI->getDebugLoc(), false);
-}
-
-void MipsCodeEmitter::emitWord(unsigned Word) {
- DEBUG(errs() << " 0x";
- errs().write_hex(Word) << "\n");
- if (Subtarget->isLittle())
- MCE.emitWordLE(Word);
- else
- MCE.emitWordBE(Word);
-}
-
-void MipsCodeEmitter::expandACCInstr(MachineBasicBlock::instr_iterator MI,
- MachineBasicBlock &MBB,
- unsigned Opc) const {
- // Expand "pseudomult $ac0, $t0, $t1" to "mult $t0, $t1".
- BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Opc))
- .addReg(MI->getOperand(1).getReg()).addReg(MI->getOperand(2).getReg());
-}
-
-void MipsCodeEmitter::expandPseudoIndirectBranch(
- MachineBasicBlock::instr_iterator MI, MachineBasicBlock &MBB) const {
- // This logic is duplicated from MipsAsmPrinter::emitPseudoIndirectBranch()
- bool HasLinkReg = false;
- unsigned Opcode = 0;
-
- if (Subtarget->hasMips64r6()) {
- // MIPS64r6 should use (JALR64 ZERO_64, $rs)
- Opcode = Mips::JALR64;
- HasLinkReg = true;
- } else if (Subtarget->hasMips32r6()) {
- // MIPS32r6 should use (JALR ZERO, $rs)
- Opcode = Mips::JALR;
- HasLinkReg = true;
- } else if (Subtarget->inMicroMipsMode())
- // microMIPS should use (JR_MM $rs)
- Opcode = Mips::JR_MM;
- else {
- // Everything else should use (JR $rs)
- Opcode = Mips::JR;
- }
-
- auto MIB = BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Opcode));
-
- if (HasLinkReg) {
- unsigned ZeroReg = Subtarget->isGP64bit() ? Mips::ZERO_64 : Mips::ZERO;
- MIB.addReg(ZeroReg);
- }
-
- MIB.addReg(MI->getOperand(0).getReg());
-}
-
-bool MipsCodeEmitter::expandPseudos(MachineBasicBlock::instr_iterator &MI,
- MachineBasicBlock &MBB) const {
- switch (MI->getOpcode()) {
- default:
- llvm_unreachable("Unhandled pseudo");
- return false;
- case Mips::NOP:
- BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::SLL), Mips::ZERO)
- .addReg(Mips::ZERO).addImm(0);
- break;
- case Mips::B:
- BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::BEQ)).addReg(Mips::ZERO)
- .addReg(Mips::ZERO).addOperand(MI->getOperand(0));
- break;
- case Mips::TRAP:
- BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::BREAK)).addImm(0)
- .addImm(0);
- break;
- case Mips::JALRPseudo:
- BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::JALR), Mips::RA)
- .addReg(MI->getOperand(0).getReg());
- break;
- case Mips::PseudoMULT:
- expandACCInstr(MI, MBB, Mips::MULT);
- break;
- case Mips::PseudoMULTu:
- expandACCInstr(MI, MBB, Mips::MULTu);
- break;
- case Mips::PseudoSDIV:
- expandACCInstr(MI, MBB, Mips::SDIV);
- break;
- case Mips::PseudoUDIV:
- expandACCInstr(MI, MBB, Mips::UDIV);
- break;
- case Mips::PseudoMADD:
- expandACCInstr(MI, MBB, Mips::MADD);
- break;
- case Mips::PseudoMADDU:
- expandACCInstr(MI, MBB, Mips::MADDU);
- break;
- case Mips::PseudoMSUB:
- expandACCInstr(MI, MBB, Mips::MSUB);
- break;
- case Mips::PseudoMSUBU:
- expandACCInstr(MI, MBB, Mips::MSUBU);
- break;
- case Mips::PseudoReturn:
- case Mips::PseudoReturn64:
- case Mips::PseudoIndirectBranch:
- case Mips::PseudoIndirectBranch64:
- expandPseudoIndirectBranch(MI, MBB);
- break;
- case TargetOpcode::CFI_INSTRUCTION:
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- // Do nothing
- return false;
- }
-
- (MI--)->eraseFromBundle();
- return true;
-}
-
-/// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips
-/// code to the specified MCE object.
-FunctionPass *llvm::createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
- JITCodeEmitter &JCE) {
- return new MipsCodeEmitter(TM, JCE);
-}
-
-#include "MipsGenCodeEmitter.inc"
diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td
index 690f62608504..af10cd4e576c 100644
--- a/lib/Target/Mips/MipsCondMov.td
+++ b/lib/Target/Mips/MipsCondMov.td
@@ -263,3 +263,40 @@ defm : MovnPats<GPR32, FGR64, MOVN_I_D64, XOR>, INSN_MIPS4_32_NOT_32R6_64R6,
FGR_64;
defm : MovnPats<GPR64, FGR64, MOVN_I64_D64, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6,
FGR_64;
+
+// For targets that don't have conditional-move instructions
+// we have to match SELECT nodes with pseudo instructions.
+let usesCustomInserter = 1 in {
+ class Select_Pseudo<RegisterOperand RC> :
+ PseudoSE<(outs RC:$dst), (ins GPR32Opnd:$cond, RC:$T, RC:$F),
+ [(set RC:$dst, (select GPR32Opnd:$cond, RC:$T, RC:$F))]>,
+ ISA_MIPS1_NOT_4_32;
+
+ class SelectFP_Pseudo_T<RegisterOperand RC> :
+ PseudoSE<(outs RC:$dst), (ins GPR32Opnd:$cond, RC:$T, RC:$F),
+ [(set RC:$dst, (MipsCMovFP_T RC:$T, GPR32Opnd:$cond, RC:$F))]>,
+ ISA_MIPS1_NOT_4_32;
+
+ class SelectFP_Pseudo_F<RegisterOperand RC> :
+ PseudoSE<(outs RC:$dst), (ins GPR32Opnd:$cond, RC:$T, RC:$F),
+ [(set RC:$dst, (MipsCMovFP_F RC:$T, GPR32Opnd:$cond, RC:$F))]>,
+ ISA_MIPS1_NOT_4_32;
+}
+
+def PseudoSELECT_I : Select_Pseudo<GPR32Opnd>;
+def PseudoSELECT_I64 : Select_Pseudo<GPR64Opnd>;
+def PseudoSELECT_S : Select_Pseudo<FGR32Opnd>;
+def PseudoSELECT_D32 : Select_Pseudo<AFGR64Opnd>, FGR_32;
+def PseudoSELECT_D64 : Select_Pseudo<FGR64Opnd>, FGR_64;
+
+def PseudoSELECTFP_T_I : SelectFP_Pseudo_T<GPR32Opnd>;
+def PseudoSELECTFP_T_I64 : SelectFP_Pseudo_T<GPR64Opnd>;
+def PseudoSELECTFP_T_S : SelectFP_Pseudo_T<FGR32Opnd>;
+def PseudoSELECTFP_T_D32 : SelectFP_Pseudo_T<AFGR64Opnd>, FGR_32;
+def PseudoSELECTFP_T_D64 : SelectFP_Pseudo_T<FGR64Opnd>, FGR_64;
+
+def PseudoSELECTFP_F_I : SelectFP_Pseudo_F<GPR32Opnd>;
+def PseudoSELECTFP_F_I64 : SelectFP_Pseudo_F<GPR64Opnd>;
+def PseudoSELECTFP_F_S : SelectFP_Pseudo_F<FGR32Opnd>;
+def PseudoSELECTFP_F_D32 : SelectFP_Pseudo_F<AFGR64Opnd>, FGR_32;
+def PseudoSELECTFP_F_D64 : SelectFP_Pseudo_F<FGR64Opnd>, FGR_64;
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
index 76313b1d1a9d..c4e5ac0745d3 100644
--- a/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -28,6 +28,7 @@
#include "MipsTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -452,7 +453,9 @@ bool MipsConstantIslands::runOnMachineFunction(MachineFunction &mf) {
if (!STI->inMips16Mode() || !MipsSubtarget::useConstantIslands()) {
return false;
}
- TII = (const Mips16InstrInfo*)MF->getTarget().getInstrInfo();
+ TII = (const Mips16InstrInfo *)MF->getTarget()
+ .getSubtargetImpl()
+ ->getInstrInfo();
MFI = MF->getInfo<MipsFunctionInfo>();
DEBUG(dbgs() << "constant island processing " << "\n");
//
@@ -559,7 +562,7 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
// identity mapping of CPI's to CPE's.
const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
- const DataLayout &TD = *MF->getTarget().getDataLayout();
+ const DataLayout &TD = *MF->getSubtarget().getDataLayout();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
assert(Size >= 4 && "Too small constant pool entry");
@@ -585,9 +588,7 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
if (InsPoint[a] == InsAt)
InsPoint[a] = CPEMI;
// Add a new CPEntry, but no corresponding CPUser yet.
- std::vector<CPEntry> CPEs;
- CPEs.push_back(CPEntry(CPEMI, i));
- CPEntries.push_back(CPEs);
+ CPEntries.emplace_back(1, CPEntry(CPEMI, i));
++NumCPEs;
DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
<< Size << ", align = " << Align <<'\n');
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index bcfbc12df01c..d5027b9c3ca3 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -196,6 +196,9 @@ namespace {
private:
bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ Iter replaceWithCompactBranch(MachineBasicBlock &MBB,
+ Iter Branch, DebugLoc DL);
+
/// This function checks if it is valid to move Candidate to the delay slot
/// and returns true if it isn't. It also updates memory and register
/// dependence information.
@@ -207,7 +210,7 @@ namespace {
template<typename IterTy>
bool searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
RegDefsUses &RegDU, InspectMemInstr &IM,
- IterTy &Filler) const;
+ IterTy &Filler, Iter Slot) const;
/// This function searches in the backward direction for an instruction that
/// can be moved to the delay slot. Returns true on success.
@@ -275,7 +278,11 @@ static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) {
#ifndef NDEBUG
const MachineFunction &MF = *MBB.getParent();
- assert(MF.getTarget().getRegisterInfo()->getAllocatableSet(MF).test(R) &&
+ assert(MF.getTarget()
+ .getSubtargetImpl()
+ ->getRegisterInfo()
+ ->getAllocatableSet(MF)
+ .test(R) &&
"Shouldn't move an instruction with unallocatable registers across "
"basic block boundaries.");
#endif
@@ -286,8 +293,8 @@ static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) {
}
RegDefsUses::RegDefsUses(TargetMachine &TM)
- : TRI(*TM.getRegisterInfo()), Defs(TRI.getNumRegs(), false),
- Uses(TRI.getNumRegs(), false) {}
+ : TRI(*TM.getSubtargetImpl()->getRegisterInfo()),
+ Defs(TRI.getNumRegs(), false), Uses(TRI.getNumRegs(), false) {}
void RegDefsUses::init(const MachineInstr &MI) {
// Add all register operands which are explicit and non-variadic.
@@ -451,7 +458,8 @@ bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
bool MemDefsUses::updateDefsUses(ValueType V, bool MayStore) {
if (MayStore)
- return !Defs.insert(V) || Uses.count(V) || SeenNoObjStore || SeenNoObjLoad;
+ return !Defs.insert(V).second || Uses.count(V) || SeenNoObjStore ||
+ SeenNoObjLoad;
Uses.insert(V);
return Defs.count(V) || SeenNoObjStore;
@@ -489,10 +497,55 @@ getUnderlyingObjects(const MachineInstr &MI,
return true;
}
+// Replace Branch with the compact branch instruction.
+Iter Filler::replaceWithCompactBranch(MachineBasicBlock &MBB,
+ Iter Branch, DebugLoc DL) {
+ const MipsInstrInfo *TII= static_cast<const MipsInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
+
+ unsigned NewOpcode =
+ (((unsigned) Branch->getOpcode()) == Mips::BEQ) ? Mips::BEQZC_MM
+ : Mips::BNEZC_MM;
+
+ const MCInstrDesc &NewDesc = TII->get(NewOpcode);
+ MachineInstrBuilder MIB = BuildMI(MBB, Branch, DL, NewDesc);
+
+ MIB.addReg(Branch->getOperand(0).getReg());
+ MIB.addMBB(Branch->getOperand(2).getMBB());
+
+ Iter tmpIter = Branch;
+ Branch = std::prev(Branch);
+ MBB.erase(tmpIter);
+
+ return Branch;
+}
+
+// For given opcode returns opcode of corresponding instruction with short
+// delay slot.
+static int getEquivalentCallShort(int Opcode) {
+ switch (Opcode) {
+ case Mips::BGEZAL:
+ return Mips::BGEZALS_MM;
+ case Mips::BLTZAL:
+ return Mips::BLTZALS_MM;
+ case Mips::JAL:
+ return Mips::JALS_MM;
+ case Mips::JALR:
+ return Mips::JALRS_MM;
+ case Mips::JALR16_MM:
+ return Mips::JALRS16_MM;
+ default:
+ llvm_unreachable("Unexpected call instruction for microMIPS.");
+ }
+}
+
/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
/// We assume there is only one delay slot per delayed instruction.
bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
+ bool InMicroMipsMode = TM.getSubtarget<MipsSubtarget>().inMicroMipsMode();
+ const MipsInstrInfo *TII =
+ static_cast<const MipsInstrInfo *>(TM.getSubtargetImpl()->getInstrInfo());
for (Iter I = MBB.begin(); I != MBB.end(); ++I) {
if (!hasUnoccupiedSlot(&*I))
@@ -503,22 +556,48 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
// Delay slot filling is disabled at -O0.
if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None)) {
- if (searchBackward(MBB, I))
- continue;
-
- if (I->isTerminator()) {
- if (searchSuccBBs(MBB, I))
- continue;
+ bool Filled = false;
+
+ if (searchBackward(MBB, I)) {
+ Filled = true;
+ } else if (I->isTerminator()) {
+ if (searchSuccBBs(MBB, I)) {
+ Filled = true;
+ }
} else if (searchForward(MBB, I)) {
+ Filled = true;
+ }
+
+ if (Filled) {
+ // Get instruction with delay slot.
+ MachineBasicBlock::instr_iterator DSI(I);
+
+ if (InMicroMipsMode && TII->GetInstSizeInBytes(std::next(DSI)) == 2 &&
+ DSI->isCall()) {
+ // If instruction in delay slot is 16b change opcode to
+ // corresponding instruction with short delay slot.
+ DSI->setDesc(TII->get(getEquivalentCallShort(DSI->getOpcode())));
+ }
+
continue;
}
}
- // Bundle the NOP to the instruction with the delay slot.
- const MipsInstrInfo *TII =
- static_cast<const MipsInstrInfo*>(TM.getInstrInfo());
- BuildMI(MBB, std::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
- MIBundleBuilder(MBB, I, std::next(I, 2));
+ // If instruction is BEQ or BNE with one ZERO register, then instead of
+ // adding NOP replace this instruction with the corresponding compact
+ // branch instruction, i.e. BEQZC or BNEZC.
+ unsigned Opcode = I->getOpcode();
+ if (InMicroMipsMode &&
+ (Opcode == Mips::BEQ || Opcode == Mips::BNE) &&
+ ((unsigned) I->getOperand(1).getReg()) == Mips::ZERO) {
+
+ I = replaceWithCompactBranch(MBB, I, I->getDebugLoc());
+
+ } else {
+ // Bundle the NOP to the instruction with the delay slot.
+ BuildMI(MBB, std::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
+ MIBundleBuilder(MBB, I, std::next(I, 2));
+ }
}
return Changed;
@@ -533,7 +612,7 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
template<typename IterTy>
bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
RegDefsUses &RegDU, InspectMemInstr& IM,
- IterTy &Filler) const {
+ IterTy &Filler, Iter Slot) const {
for (IterTy I = Begin; I != End; ++I) {
// skip debug value
if (I->isDebugValue())
@@ -554,12 +633,22 @@ bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
// branches are not checked because non-NaCl targets never put them in
// delay slots.
unsigned AddrIdx;
- if ((isBasePlusOffsetMemoryAccess(I->getOpcode(), &AddrIdx)
- && baseRegNeedsLoadStoreMask(I->getOperand(AddrIdx).getReg()))
- || I->modifiesRegister(Mips::SP, TM.getRegisterInfo()))
+ if ((isBasePlusOffsetMemoryAccess(I->getOpcode(), &AddrIdx) &&
+ baseRegNeedsLoadStoreMask(I->getOperand(AddrIdx).getReg())) ||
+ I->modifiesRegister(Mips::SP,
+ TM.getSubtargetImpl()->getRegisterInfo()))
continue;
}
+ bool InMicroMipsMode = TM.getSubtarget<MipsSubtarget>().inMicroMipsMode();
+ const MipsInstrInfo *TII = static_cast<const MipsInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
+ unsigned Opcode = (*Slot).getOpcode();
+ if (InMicroMipsMode && TII->GetInstSizeInBytes(&(*I)) == 2 &&
+ (Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch ||
+ Opcode == Mips::PseudoReturn))
+ continue;
+
Filler = I;
return true;
}
@@ -577,7 +666,8 @@ bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const {
RegDU.init(*Slot);
- if (!searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Filler))
+ if (!searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Filler,
+ Slot))
return false;
MBB.splice(std::next(Slot), &MBB, std::next(Filler).base());
@@ -597,7 +687,7 @@ bool Filler::searchForward(MachineBasicBlock &MBB, Iter Slot) const {
RegDU.setCallerSaved(*Slot);
- if (!searchRange(MBB, std::next(Slot), MBB.end(), RegDU, NM, Filler))
+ if (!searchRange(MBB, std::next(Slot), MBB.end(), RegDU, NM, Filler, Slot))
return false;
MBB.splice(std::next(Slot), &MBB, Filler);
@@ -640,7 +730,8 @@ bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
IM.reset(new MemDefsUses(MFI));
}
- if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Filler))
+ if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Filler,
+ Slot))
return false;
insertDelayFiller(Filler, BrMap);
@@ -667,7 +758,7 @@ MachineBasicBlock *Filler::selectSuccBB(MachineBasicBlock &B) const {
std::pair<MipsInstrInfo::BranchType, MachineInstr *>
Filler::getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const {
const MipsInstrInfo *TII =
- static_cast<const MipsInstrInfo*>(TM.getInstrInfo());
+ static_cast<const MipsInstrInfo *>(TM.getSubtargetImpl()->getInstrInfo());
MachineBasicBlock *TrueBB = nullptr, *FalseBB = nullptr;
SmallVector<MachineInstr*, 2> BranchInstrs;
SmallVector<MachineOperand, 2> Cond;
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 617801ba8c67..4588f40b84d5 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -2,42 +2,63 @@
//---------------------===//
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "MipsCCState.h"
+#include "MipsISelLowering.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLibraryInfo.h"
-#include "MipsRegisterInfo.h"
-#include "MipsISelLowering.h"
-#include "MipsMachineFunction.h"
-#include "MipsSubtarget.h"
-#include "MipsTargetMachine.h"
using namespace llvm;
namespace {
-// All possible address modes.
-typedef struct Address {
- enum { RegBase, FrameIndexBase } BaseType;
+class MipsFastISel final : public FastISel {
- union {
- unsigned Reg;
- int FI;
- } Base;
+ // All possible address modes.
+ class Address {
+ public:
+ typedef enum { RegBase, FrameIndexBase } BaseKind;
- int64_t Offset;
+ private:
+ BaseKind Kind;
+ union {
+ unsigned Reg;
+ int FI;
+ } Base;
- // Innocuous defaults for our address.
- Address() : BaseType(RegBase), Offset(0) { Base.Reg = 0; }
-} Address;
+ int64_t Offset;
-class MipsFastISel final : public FastISel {
+ const GlobalValue *GV;
+
+ public:
+ // Innocuous defaults for our address.
+ Address() : Kind(RegBase), Offset(0), GV(0) { Base.Reg = 0; }
+ void setKind(BaseKind K) { Kind = K; }
+ BaseKind getKind() const { return Kind; }
+ bool isRegBase() const { return Kind == RegBase; }
+ void setReg(unsigned Reg) {
+ assert(isRegBase() && "Invalid base register access!");
+ Base.Reg = Reg;
+ }
+ unsigned getReg() const {
+ assert(isRegBase() && "Invalid base register access!");
+ return Base.Reg;
+ }
+ void setOffset(int64_t Offset_) { Offset = Offset_; }
+ int64_t getOffset() const { return Offset; }
+ void setGlobalValue(const GlobalValue *G) { GV = G; }
+ const GlobalValue *getGlobalValue() { return GV; }
+ };
/// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
/// make the right decision when generating code for different targets.
- Module &M;
const TargetMachine &TM;
const TargetInstrInfo &TII;
const TargetLowering &TLI;
@@ -47,74 +68,265 @@ class MipsFastISel final : public FastISel {
// Convenience variables to avoid some queries.
LLVMContext *Context;
- bool TargetSupported;
+ bool fastLowerCall(CallLoweringInfo &CLI) override;
-public:
- explicit MipsFastISel(FunctionLoweringInfo &funcInfo,
- const TargetLibraryInfo *libInfo)
- : FastISel(funcInfo, libInfo),
- M(const_cast<Module &>(*funcInfo.Fn->getParent())),
- TM(funcInfo.MF->getTarget()), TII(*TM.getInstrInfo()),
- TLI(*TM.getTargetLowering()),
- Subtarget(&TM.getSubtarget<MipsSubtarget>()) {
- MFI = funcInfo.MF->getInfo<MipsFunctionInfo>();
- Context = &funcInfo.Fn->getContext();
- TargetSupported = ((Subtarget->getRelocationModel() == Reloc::PIC_) &&
- (Subtarget->hasMips32r2() && (Subtarget->isABI_O32())));
- }
+ bool TargetSupported;
+ bool UnsupportedFPMode; // To allow fast-isel to proceed and just not handle
+ // floating point but not reject doing fast-isel in other
+ // situations
- bool TargetSelectInstruction(const Instruction *I) override;
- unsigned TargetMaterializeConstant(const Constant *C) override;
+private:
+ // Selection routines.
+ bool selectLoad(const Instruction *I);
+ bool selectStore(const Instruction *I);
+ bool selectBranch(const Instruction *I);
+ bool selectCmp(const Instruction *I);
+ bool selectFPExt(const Instruction *I);
+ bool selectFPTrunc(const Instruction *I);
+ bool selectFPToInt(const Instruction *I, bool IsSigned);
+ bool selectRet(const Instruction *I);
+ bool selectTrunc(const Instruction *I);
+ bool selectIntExt(const Instruction *I);
- bool ComputeAddress(const Value *Obj, Address &Addr);
+ // Utility helper routines.
+ bool isTypeLegal(Type *Ty, MVT &VT);
+ bool isLoadTypeLegal(Type *Ty, MVT &VT);
+ bool computeAddress(const Value *Obj, Address &Addr);
+ bool computeCallAddress(const Value *V, Address &Addr);
-private:
- bool EmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ // Emit helper routines.
+ bool emitCmp(unsigned DestReg, const CmpInst *CI);
+ bool emitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
unsigned Alignment = 0);
- bool EmitStore(MVT VT, unsigned SrcReg, Address &Addr,
+ bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
+ MachineMemOperand *MMO = nullptr);
+ bool emitStore(MVT VT, unsigned SrcReg, Address &Addr,
unsigned Alignment = 0);
- bool SelectLoad(const Instruction *I);
- bool SelectRet(const Instruction *I);
- bool SelectStore(const Instruction *I);
+ unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
+ bool emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg,
- bool isTypeLegal(Type *Ty, MVT &VT);
- bool isLoadTypeLegal(Type *Ty, MVT &VT);
+ bool IsZExt);
+ bool emitIntZExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg);
+
+ bool emitIntSExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg);
+ bool emitIntSExt32r1(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg);
+ bool emitIntSExt32r2(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg);
+
+ unsigned getRegEnsuringSimpleIntegerWidening(const Value *, bool IsUnsigned);
- unsigned MaterializeFP(const ConstantFP *CFP, MVT VT);
- unsigned MaterializeGV(const GlobalValue *GV, MVT VT);
- unsigned MaterializeInt(const Constant *C, MVT VT);
- unsigned Materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
+ unsigned materializeFP(const ConstantFP *CFP, MVT VT);
+ unsigned materializeGV(const GlobalValue *GV, MVT VT);
+ unsigned materializeInt(const Constant *C, MVT VT);
+ unsigned materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
+ MachineInstrBuilder emitInst(unsigned Opc) {
+ return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
+ }
+ MachineInstrBuilder emitInst(unsigned Opc, unsigned DstReg) {
+ return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
+ DstReg);
+ }
+ MachineInstrBuilder emitInstStore(unsigned Opc, unsigned SrcReg,
+ unsigned MemReg, int64_t MemOffset) {
+ return emitInst(Opc).addReg(SrcReg).addReg(MemReg).addImm(MemOffset);
+ }
+ MachineInstrBuilder emitInstLoad(unsigned Opc, unsigned DstReg,
+ unsigned MemReg, int64_t MemOffset) {
+ return emitInst(Opc, DstReg).addReg(MemReg).addImm(MemOffset);
+ }
// for some reason, this default is not generated by tablegen
// so we explicitly generate it here.
//
- unsigned FastEmitInst_riir(uint64_t inst, const TargetRegisterClass *RC,
+ unsigned fastEmitInst_riir(uint64_t inst, const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill, uint64_t imm1,
uint64_t imm2, unsigned Op3, bool Op3IsKill) {
return 0;
}
- MachineInstrBuilder EmitInst(unsigned Opc) {
- return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
+ // Call handling routines.
+private:
+ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
+ bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
+ unsigned &NumBytes);
+ bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
+
+public:
+ // Backend specific FastISel code.
+ explicit MipsFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
+ : FastISel(funcInfo, libInfo), TM(funcInfo.MF->getTarget()),
+ TII(*TM.getSubtargetImpl()->getInstrInfo()),
+ TLI(*TM.getSubtargetImpl()->getTargetLowering()),
+ Subtarget(&TM.getSubtarget<MipsSubtarget>()) {
+ MFI = funcInfo.MF->getInfo<MipsFunctionInfo>();
+ Context = &funcInfo.Fn->getContext();
+ TargetSupported = ((TM.getRelocationModel() == Reloc::PIC_) &&
+ ((Subtarget->hasMips32r2() || Subtarget->hasMips32()) &&
+ (Subtarget->isABI_O32())));
+ UnsupportedFPMode = Subtarget->isFP64bit();
}
- MachineInstrBuilder EmitInst(unsigned Opc, unsigned DstReg) {
- return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
- DstReg);
+ unsigned fastMaterializeConstant(const Constant *C) override;
+ bool fastSelectInstruction(const Instruction *I) override;
+
+#include "MipsGenFastISel.inc"
+};
+} // end anonymous namespace.
+
+static bool CC_Mips(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State) LLVM_ATTRIBUTE_UNUSED;
+
+static bool CC_MipsO32_FP32(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ llvm_unreachable("should not be called");
+}
+
+bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State) {
+ llvm_unreachable("should not be called");
+}
+
+#include "MipsGenCallingConv.inc"
+
+CCAssignFn *MipsFastISel::CCAssignFnForCall(CallingConv::ID CC) const {
+ return CC_MipsO32;
+}
+
+unsigned MipsFastISel::materializeInt(const Constant *C, MVT VT) {
+ if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
+ return 0;
+ const TargetRegisterClass *RC = &Mips::GPR32RegClass;
+ const ConstantInt *CI = cast<ConstantInt>(C);
+ int64_t Imm;
+ if ((VT != MVT::i1) && CI->isNegative())
+ Imm = CI->getSExtValue();
+ else
+ Imm = CI->getZExtValue();
+ return materialize32BitInt(Imm, RC);
+}
+
+unsigned MipsFastISel::materialize32BitInt(int64_t Imm,
+ const TargetRegisterClass *RC) {
+ unsigned ResultReg = createResultReg(RC);
+
+ if (isInt<16>(Imm)) {
+ unsigned Opc = Mips::ADDiu;
+ emitInst(Opc, ResultReg).addReg(Mips::ZERO).addImm(Imm);
+ return ResultReg;
+ } else if (isUInt<16>(Imm)) {
+ emitInst(Mips::ORi, ResultReg).addReg(Mips::ZERO).addImm(Imm);
+ return ResultReg;
}
+ unsigned Lo = Imm & 0xFFFF;
+ unsigned Hi = (Imm >> 16) & 0xFFFF;
+ if (Lo) {
+ // Both Lo and Hi have nonzero bits.
+ unsigned TmpReg = createResultReg(RC);
+ emitInst(Mips::LUi, TmpReg).addImm(Hi);
+ emitInst(Mips::ORi, ResultReg).addReg(TmpReg).addImm(Lo);
+ } else {
+ emitInst(Mips::LUi, ResultReg).addImm(Hi);
+ }
+ return ResultReg;
+}
- MachineInstrBuilder EmitInstStore(unsigned Opc, unsigned SrcReg,
- unsigned MemReg, int64_t MemOffset) {
- return EmitInst(Opc).addReg(SrcReg).addReg(MemReg).addImm(MemOffset);
+unsigned MipsFastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
+ if (UnsupportedFPMode)
+ return 0;
+ int64_t Imm = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ if (VT == MVT::f32) {
+ const TargetRegisterClass *RC = &Mips::FGR32RegClass;
+ unsigned DestReg = createResultReg(RC);
+ unsigned TempReg = materialize32BitInt(Imm, &Mips::GPR32RegClass);
+ emitInst(Mips::MTC1, DestReg).addReg(TempReg);
+ return DestReg;
+ } else if (VT == MVT::f64) {
+ const TargetRegisterClass *RC = &Mips::AFGR64RegClass;
+ unsigned DestReg = createResultReg(RC);
+ unsigned TempReg1 = materialize32BitInt(Imm >> 32, &Mips::GPR32RegClass);
+ unsigned TempReg2 =
+ materialize32BitInt(Imm & 0xFFFFFFFF, &Mips::GPR32RegClass);
+ emitInst(Mips::BuildPairF64, DestReg).addReg(TempReg2).addReg(TempReg1);
+ return DestReg;
}
+ return 0;
+}
- MachineInstrBuilder EmitInstLoad(unsigned Opc, unsigned DstReg,
- unsigned MemReg, int64_t MemOffset) {
- return EmitInst(Opc, DstReg).addReg(MemReg).addImm(MemOffset);
+unsigned MipsFastISel::materializeGV(const GlobalValue *GV, MVT VT) {
+ // For now 32-bit only.
+ if (VT != MVT::i32)
+ return 0;
+ const TargetRegisterClass *RC = &Mips::GPR32RegClass;
+ unsigned DestReg = createResultReg(RC);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ bool IsThreadLocal = GVar && GVar->isThreadLocal();
+ // TLS not supported at this time.
+ if (IsThreadLocal)
+ return 0;
+ emitInst(Mips::LW, DestReg)
+ .addReg(MFI->getGlobalBaseReg())
+ .addGlobalAddress(GV, 0, MipsII::MO_GOT);
+ if ((GV->hasInternalLinkage() ||
+ (GV->hasLocalLinkage() && !isa<Function>(GV)))) {
+ unsigned TempReg = createResultReg(RC);
+ emitInst(Mips::ADDiu, TempReg)
+ .addReg(DestReg)
+ .addGlobalAddress(GV, 0, MipsII::MO_ABS_LO);
+ DestReg = TempReg;
}
+ return DestReg;
+}
-#include "MipsGenFastISel.inc"
-};
+// Materialize a constant into a register, and return the register
+// number (or zero if we failed to handle it).
+unsigned MipsFastISel::fastMaterializeConstant(const Constant *C) {
+ EVT CEVT = TLI.getValueType(C->getType(), true);
+
+ // Only handle simple types.
+ if (!CEVT.isSimple())
+ return 0;
+ MVT VT = CEVT.getSimpleVT();
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return (UnsupportedFPMode) ? 0 : materializeFP(CFP, VT);
+ else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return materializeGV(GV, VT);
+ else if (isa<ConstantInt>(C))
+ return materializeInt(C, VT);
+
+ return 0;
+}
+
+bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) {
+ // This construct looks a big awkward but it is how other ports handle this
+ // and as this function is more fully completed, these cases which
+ // return false will have additional code in them.
+ //
+ if (isa<Instruction>(Obj))
+ return false;
+ else if (isa<ConstantExpr>(Obj))
+ return false;
+ Addr.setReg(getRegForValue(Obj));
+ return Addr.getReg() != 0;
+}
+
+bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) {
+ const GlobalValue *GV = dyn_cast<GlobalValue>(V);
+ if (GV && isa<Function>(GV) && dyn_cast<Function>(GV)->isIntrinsic())
+ return false;
+ if (!GV)
+ return false;
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ Addr.setGlobalValue(GV);
+ return true;
+ }
+ return false;
+}
bool MipsFastISel::isTypeLegal(Type *Ty, MVT &VT) {
EVT evt = TLI.getValueType(Ty, true);
@@ -138,21 +350,134 @@ bool MipsFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
return true;
return false;
}
-
-bool MipsFastISel::ComputeAddress(const Value *Obj, Address &Addr) {
- // This construct looks a big awkward but it is how other ports handle this
- // and as this function is more fully completed, these cases which
- // return false will have additional code in them.
- //
- if (isa<Instruction>(Obj))
+// Because of how EmitCmp is called with fast-isel, you can
+// end up with redundant "andi" instructions after the sequences emitted below.
+// We should try and solve this issue in the future.
+//
+bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) {
+ const Value *Left = CI->getOperand(0), *Right = CI->getOperand(1);
+ bool IsUnsigned = CI->isUnsigned();
+ unsigned LeftReg = getRegEnsuringSimpleIntegerWidening(Left, IsUnsigned);
+ if (LeftReg == 0)
return false;
- else if (isa<ConstantExpr>(Obj))
+ unsigned RightReg = getRegEnsuringSimpleIntegerWidening(Right, IsUnsigned);
+ if (RightReg == 0)
return false;
- Addr.Base.Reg = getRegForValue(Obj);
- return Addr.Base.Reg != 0;
-}
+ CmpInst::Predicate P = CI->getPredicate();
-bool MipsFastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ switch (P) {
+ default:
+ return false;
+ case CmpInst::ICMP_EQ: {
+ unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ emitInst(Mips::XOR, TempReg).addReg(LeftReg).addReg(RightReg);
+ emitInst(Mips::SLTiu, ResultReg).addReg(TempReg).addImm(1);
+ break;
+ }
+ case CmpInst::ICMP_NE: {
+ unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ emitInst(Mips::XOR, TempReg).addReg(LeftReg).addReg(RightReg);
+ emitInst(Mips::SLTu, ResultReg).addReg(Mips::ZERO).addReg(TempReg);
+ break;
+ }
+ case CmpInst::ICMP_UGT: {
+ emitInst(Mips::SLTu, ResultReg).addReg(RightReg).addReg(LeftReg);
+ break;
+ }
+ case CmpInst::ICMP_ULT: {
+ emitInst(Mips::SLTu, ResultReg).addReg(LeftReg).addReg(RightReg);
+ break;
+ }
+ case CmpInst::ICMP_UGE: {
+ unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ emitInst(Mips::SLTu, TempReg).addReg(LeftReg).addReg(RightReg);
+ emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1);
+ break;
+ }
+ case CmpInst::ICMP_ULE: {
+ unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ emitInst(Mips::SLTu, TempReg).addReg(RightReg).addReg(LeftReg);
+ emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1);
+ break;
+ }
+ case CmpInst::ICMP_SGT: {
+ emitInst(Mips::SLT, ResultReg).addReg(RightReg).addReg(LeftReg);
+ break;
+ }
+ case CmpInst::ICMP_SLT: {
+ emitInst(Mips::SLT, ResultReg).addReg(LeftReg).addReg(RightReg);
+ break;
+ }
+ case CmpInst::ICMP_SGE: {
+ unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ emitInst(Mips::SLT, TempReg).addReg(LeftReg).addReg(RightReg);
+ emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1);
+ break;
+ }
+ case CmpInst::ICMP_SLE: {
+ unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ emitInst(Mips::SLT, TempReg).addReg(RightReg).addReg(LeftReg);
+ emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1);
+ break;
+ }
+ case CmpInst::FCMP_OEQ:
+ case CmpInst::FCMP_UNE:
+ case CmpInst::FCMP_OLT:
+ case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_OGE: {
+ if (UnsupportedFPMode)
+ return false;
+ bool IsFloat = Left->getType()->isFloatTy();
+ bool IsDouble = Left->getType()->isDoubleTy();
+ if (!IsFloat && !IsDouble)
+ return false;
+ unsigned Opc, CondMovOpc;
+ switch (P) {
+ case CmpInst::FCMP_OEQ:
+ Opc = IsFloat ? Mips::C_EQ_S : Mips::C_EQ_D32;
+ CondMovOpc = Mips::MOVT_I;
+ break;
+ case CmpInst::FCMP_UNE:
+ Opc = IsFloat ? Mips::C_EQ_S : Mips::C_EQ_D32;
+ CondMovOpc = Mips::MOVF_I;
+ break;
+ case CmpInst::FCMP_OLT:
+ Opc = IsFloat ? Mips::C_OLT_S : Mips::C_OLT_D32;
+ CondMovOpc = Mips::MOVT_I;
+ break;
+ case CmpInst::FCMP_OLE:
+ Opc = IsFloat ? Mips::C_OLE_S : Mips::C_OLE_D32;
+ CondMovOpc = Mips::MOVT_I;
+ break;
+ case CmpInst::FCMP_OGT:
+ Opc = IsFloat ? Mips::C_ULE_S : Mips::C_ULE_D32;
+ CondMovOpc = Mips::MOVF_I;
+ break;
+ case CmpInst::FCMP_OGE:
+ Opc = IsFloat ? Mips::C_ULT_S : Mips::C_ULT_D32;
+ CondMovOpc = Mips::MOVF_I;
+ break;
+ default:
+ llvm_unreachable("Only switching of a subset of CCs.");
+ }
+ unsigned RegWithZero = createResultReg(&Mips::GPR32RegClass);
+ unsigned RegWithOne = createResultReg(&Mips::GPR32RegClass);
+ emitInst(Mips::ADDiu, RegWithZero).addReg(Mips::ZERO).addImm(0);
+ emitInst(Mips::ADDiu, RegWithOne).addReg(Mips::ZERO).addImm(1);
+ emitInst(Opc).addReg(LeftReg).addReg(RightReg).addReg(
+ Mips::FCC0, RegState::ImplicitDefine);
+ MachineInstrBuilder MI = emitInst(CondMovOpc, ResultReg)
+ .addReg(RegWithOne)
+ .addReg(Mips::FCC0)
+ .addReg(RegWithZero, RegState::Implicit);
+ MI->tieOperands(0, 3);
+ break;
+ }
+ }
+ return true;
+}
+bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
unsigned Alignment) {
//
// more cases will be handled here in following patches.
@@ -175,11 +500,15 @@ bool MipsFastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
break;
}
case MVT::f32: {
+ if (UnsupportedFPMode)
+ return false;
ResultReg = createResultReg(&Mips::FGR32RegClass);
Opc = Mips::LWC1;
break;
}
case MVT::f64: {
+ if (UnsupportedFPMode)
+ return false;
ResultReg = createResultReg(&Mips::AFGR64RegClass);
Opc = Mips::LDC1;
break;
@@ -187,31 +516,11 @@ bool MipsFastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
default:
return false;
}
- EmitInstLoad(Opc, ResultReg, Addr.Base.Reg, Addr.Offset);
+ emitInstLoad(Opc, ResultReg, Addr.getReg(), Addr.getOffset());
return true;
}
-// Materialize a constant into a register, and return the register
-// number (or zero if we failed to handle it).
-unsigned MipsFastISel::TargetMaterializeConstant(const Constant *C) {
- EVT CEVT = TLI.getValueType(C->getType(), true);
-
- // Only handle simple types.
- if (!CEVT.isSimple())
- return 0;
- MVT VT = CEVT.getSimpleVT();
-
- if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
- return MaterializeFP(CFP, VT);
- else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
- return MaterializeGV(GV, VT);
- else if (isa<ConstantInt>(C))
- return MaterializeInt(C, VT);
-
- return 0;
-}
-
-bool MipsFastISel::EmitStore(MVT VT, unsigned SrcReg, Address &Addr,
+bool MipsFastISel::emitStore(MVT VT, unsigned SrcReg, Address &Addr,
unsigned Alignment) {
//
// more cases will be handled here in following patches.
@@ -228,19 +537,23 @@ bool MipsFastISel::EmitStore(MVT VT, unsigned SrcReg, Address &Addr,
Opc = Mips::SW;
break;
case MVT::f32:
+ if (UnsupportedFPMode)
+ return false;
Opc = Mips::SWC1;
break;
case MVT::f64:
+ if (UnsupportedFPMode)
+ return false;
Opc = Mips::SDC1;
break;
default:
return false;
}
- EmitInstStore(Opc, SrcReg, Addr.Base.Reg, Addr.Offset);
+ emitInstStore(Opc, SrcReg, Addr.getReg(), Addr.getOffset());
return true;
}
-bool MipsFastISel::SelectLoad(const Instruction *I) {
+bool MipsFastISel::selectLoad(const Instruction *I) {
// Atomic loads need special handling.
if (cast<LoadInst>(I)->isAtomic())
return false;
@@ -252,17 +565,17 @@ bool MipsFastISel::SelectLoad(const Instruction *I) {
// See if we can handle this address.
Address Addr;
- if (!ComputeAddress(I->getOperand(0), Addr))
+ if (!computeAddress(I->getOperand(0), Addr))
return false;
unsigned ResultReg;
- if (!EmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
+ if (!emitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
-bool MipsFastISel::SelectStore(const Instruction *I) {
+bool MipsFastISel::selectStore(const Instruction *I) {
Value *Op0 = I->getOperand(0);
unsigned SrcReg = 0;
@@ -282,15 +595,394 @@ bool MipsFastISel::SelectStore(const Instruction *I) {
// See if we can handle this address.
Address Addr;
- if (!ComputeAddress(I->getOperand(1), Addr))
+ if (!computeAddress(I->getOperand(1), Addr))
return false;
- if (!EmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
+ if (!emitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
return false;
return true;
}
-bool MipsFastISel::SelectRet(const Instruction *I) {
+//
+// This can cause a redundant sltiu to be generated.
+// FIXME: try and eliminate this in a future patch.
+//
+bool MipsFastISel::selectBranch(const Instruction *I) {
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *BrBB = FuncInfo.MBB;
+ //
+ // TBB is the basic block for the case where the comparison is true.
+ // FBB is the basic block for the case where the comparison is false.
+ // if (cond) goto TBB
+ // goto FBB
+ // TBB:
+ //
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+ BI->getCondition();
+ // For now, just try the simplest case where it's fed by a compare.
+ if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+ unsigned CondReg = createResultReg(&Mips::GPR32RegClass);
+ if (!emitCmp(CondReg, CI))
+ return false;
+ BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ))
+ .addReg(CondReg)
+ .addMBB(TBB);
+ fastEmitBranch(FBB, DbgLoc);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+ }
+ return false;
+}
+
+bool MipsFastISel::selectCmp(const Instruction *I) {
+ const CmpInst *CI = cast<CmpInst>(I);
+ unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
+ if (!emitCmp(ResultReg, CI))
+ return false;
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+// Attempt to fast-select a floating-point extend instruction.
+bool MipsFastISel::selectFPExt(const Instruction *I) {
+ if (UnsupportedFPMode)
+ return false;
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::f32 || DestVT != MVT::f64)
+ return false;
+
+ unsigned SrcReg =
+ getRegForValue(Src); // his must be a 32 bit floating point register class
+ // maybe we should handle this differently
+ if (!SrcReg)
+ return false;
+
+ unsigned DestReg = createResultReg(&Mips::AFGR64RegClass);
+ emitInst(Mips::CVT_D32_S, DestReg).addReg(SrcReg);
+ updateValueMap(I, DestReg);
+ return true;
+}
+
+// Attempt to fast-select a floating-point truncate instruction.
+bool MipsFastISel::selectFPTrunc(const Instruction *I) {
+ if (UnsupportedFPMode)
+ return false;
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::f64 || DestVT != MVT::f32)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ unsigned DestReg = createResultReg(&Mips::FGR32RegClass);
+ if (!DestReg)
+ return false;
+
+ emitInst(Mips::CVT_S_D32, DestReg).addReg(SrcReg);
+ updateValueMap(I, DestReg);
+ return true;
+}
+
+// Attempt to fast-select a floating-point-to-integer conversion.
+bool MipsFastISel::selectFPToInt(const Instruction *I, bool IsSigned) {
+ if (UnsupportedFPMode)
+ return false;
+ MVT DstVT, SrcVT;
+ if (!IsSigned)
+ return false; // We don't handle this case yet. There is no native
+ // instruction for this but it can be synthesized.
+ Type *DstTy = I->getType();
+ if (!isTypeLegal(DstTy, DstVT))
+ return false;
+
+ if (DstVT != MVT::i32)
+ return false;
+
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+ if (!isTypeLegal(SrcTy, SrcVT))
+ return false;
+
+ if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (SrcReg == 0)
+ return false;
+
+ // Determine the opcode for the conversion, which takes place
+ // entirely within FPRs.
+ unsigned DestReg = createResultReg(&Mips::GPR32RegClass);
+ unsigned TempReg = createResultReg(&Mips::FGR32RegClass);
+ unsigned Opc;
+
+ if (SrcVT == MVT::f32)
+ Opc = Mips::TRUNC_W_S;
+ else
+ Opc = Mips::TRUNC_W_D32;
+
+ // Generate the convert.
+ emitInst(Opc, TempReg).addReg(SrcReg);
+
+ emitInst(Mips::MFC1, DestReg).addReg(TempReg);
+
+ updateValueMap(I, DestReg);
+ return true;
+}
+//
+bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI,
+ SmallVectorImpl<MVT> &OutVTs,
+ unsigned &NumBytes) {
+ CallingConv::ID CC = CLI.CallConv;
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
+ CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
+ // Get a count of how many bytes are to be pushed on the stack.
+ NumBytes = CCInfo.getNextStackOffset();
+ // This is the minimum argument area used for A0-A3.
+ if (NumBytes < 16)
+ NumBytes = 16;
+
+ emitInst(Mips::ADJCALLSTACKDOWN).addImm(16);
+ // Process the args.
+ MVT firstMVT;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ const Value *ArgVal = CLI.OutVals[VA.getValNo()];
+ MVT ArgVT = OutVTs[VA.getValNo()];
+
+ if (i == 0) {
+ firstMVT = ArgVT;
+ if (ArgVT == MVT::f32) {
+ VA.convertToReg(Mips::F12);
+ } else if (ArgVT == MVT::f64) {
+ VA.convertToReg(Mips::D6);
+ }
+ } else if (i == 1) {
+ if ((firstMVT == MVT::f32) || (firstMVT == MVT::f64)) {
+ if (ArgVT == MVT::f32) {
+ VA.convertToReg(Mips::F14);
+ } else if (ArgVT == MVT::f64) {
+ VA.convertToReg(Mips::D7);
+ }
+ }
+ }
+ if (((ArgVT == MVT::i32) || (ArgVT == MVT::f32)) && VA.isMemLoc()) {
+ switch (VA.getLocMemOffset()) {
+ case 0:
+ VA.convertToReg(Mips::A0);
+ break;
+ case 4:
+ VA.convertToReg(Mips::A1);
+ break;
+ case 8:
+ VA.convertToReg(Mips::A2);
+ break;
+ case 12:
+ VA.convertToReg(Mips::A3);
+ break;
+ default:
+ break;
+ }
+ }
+ unsigned ArgReg = getRegForValue(ArgVal);
+ if (!ArgReg)
+ return false;
+
+ // Handle arg promotion: SExt, ZExt, AExt.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::AExt:
+ case CCValAssign::SExt: {
+ MVT DestVT = VA.getLocVT();
+ MVT SrcVT = ArgVT;
+ ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
+ if (!ArgReg)
+ return false;
+ break;
+ }
+ case CCValAssign::ZExt: {
+ MVT DestVT = VA.getLocVT();
+ MVT SrcVT = ArgVT;
+ ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
+ if (!ArgReg)
+ return false;
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown arg promotion!");
+ }
+
+ // Now copy/store arg to correct locations.
+ if (VA.isRegLoc() && !VA.needsCustom()) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
+ CLI.OutRegs.push_back(VA.getLocReg());
+ } else if (VA.needsCustom()) {
+ llvm_unreachable("Mips does not use custom args.");
+ return false;
+ } else {
+ //
+ // FIXME: This path will currently return false. It was copied
+ // from the AArch64 port and should be essentially fine for Mips too.
+ // The work to finish up this path will be done in a follow-on patch.
+ //
+ assert(VA.isMemLoc() && "Assuming store on stack.");
+ // Don't emit stores for undef values.
+ if (isa<UndefValue>(ArgVal))
+ continue;
+
+ // Need to store on the stack.
+ // FIXME: This alignment is incorrect but this path is disabled
+ // for now (will return false). We need to determine the right alignment
+ // based on the normal alignment for the underlying machine type.
+ //
+ unsigned ArgSize = RoundUpToAlignment(ArgVT.getSizeInBits(), 4);
+
+ unsigned BEAlign = 0;
+ if (ArgSize < 8 && !Subtarget->isLittle())
+ BEAlign = 8 - ArgSize;
+
+ Address Addr;
+ Addr.setKind(Address::RegBase);
+ Addr.setReg(Mips::SP);
+ Addr.setOffset(VA.getLocMemOffset() + BEAlign);
+
+ unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getStack(Addr.getOffset()),
+ MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
+ (void)(MMO);
+ // if (!emitStore(ArgVT, ArgReg, Addr, MMO))
+ return false; // can't store on the stack yet.
+ }
+ }
+
+ return true;
+}
+
+bool MipsFastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
+ unsigned NumBytes) {
+ CallingConv::ID CC = CLI.CallConv;
+ emitInst(Mips::ADJCALLSTACKUP).addImm(16);
+ if (RetVT != MVT::isVoid) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, RetCC_Mips);
+
+ // Only handle a single return value.
+ if (RVLocs.size() != 1)
+ return false;
+ // Copy all of the result registers out of their specified physreg.
+ MVT CopyVT = RVLocs[0].getValVT();
+ // Special handling for extended integers.
+ if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
+ CopyVT = MVT::i32;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(RVLocs[0].getLocReg());
+ CLI.InRegs.push_back(RVLocs[0].getLocReg());
+
+ CLI.ResultReg = ResultReg;
+ CLI.NumResultRegs = 1;
+ }
+ return true;
+}
+
+bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
+ CallingConv::ID CC = CLI.CallConv;
+ bool IsTailCall = CLI.IsTailCall;
+ bool IsVarArg = CLI.IsVarArg;
+ const Value *Callee = CLI.Callee;
+ // const char *SymName = CLI.SymName;
+
+ // Allow SelectionDAG isel to handle tail calls.
+ if (IsTailCall)
+ return false;
+
+ // Let SDISel handle vararg functions.
+ if (IsVarArg)
+ return false;
+
+ // FIXME: Only handle *simple* calls for now.
+ MVT RetVT;
+ if (CLI.RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(CLI.RetTy, RetVT))
+ return false;
+
+ for (auto Flag : CLI.OutFlags)
+ if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
+ return false;
+
+ // Set up the argument vectors.
+ SmallVector<MVT, 16> OutVTs;
+ OutVTs.reserve(CLI.OutVals.size());
+
+ for (auto *Val : CLI.OutVals) {
+ MVT VT;
+ if (!isTypeLegal(Val->getType(), VT) &&
+ !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
+ return false;
+
+ // We don't handle vector parameters yet.
+ if (VT.isVector() || VT.getSizeInBits() > 64)
+ return false;
+
+ OutVTs.push_back(VT);
+ }
+
+ Address Addr;
+ if (!computeCallAddress(Callee, Addr))
+ return false;
+
+ // Handle the arguments now that we've gotten them.
+ unsigned NumBytes;
+ if (!processCallArgs(CLI, OutVTs, NumBytes))
+ return false;
+
+ // Issue the call.
+ unsigned DestAddress = materializeGV(Addr.getGlobalValue(), MVT::i32);
+ emitInst(TargetOpcode::COPY, Mips::T9).addReg(DestAddress);
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::JALR),
+ Mips::RA).addReg(Mips::T9);
+
+ // Add implicit physical register uses to the call.
+ for (auto Reg : CLI.OutRegs)
+ MIB.addReg(Reg, RegState::Implicit);
+
+ // Add a register mask with the call-preserved registers.
+ // Proper defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
+ CLI.Call = MIB;
+
+ // Add implicit physical register uses to the call.
+ for (auto Reg : CLI.OutRegs)
+ MIB.addReg(Reg, RegState::Implicit);
+
+ // Add a register mask with the call-preserved registers. Proper
+ // defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
+ CLI.Call = MIB;
+ // Finish off the call including any return values.
+ return finishCall(CLI, RetVT, NumBytes);
+}
+
+bool MipsFastISel::selectRet(const Instruction *I) {
const ReturnInst *Ret = cast<ReturnInst>(I);
if (!FuncInfo.CanLowerReturn)
@@ -298,98 +990,181 @@ bool MipsFastISel::SelectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
return false;
}
- EmitInst(Mips::RetRA);
+ emitInst(Mips::RetRA);
return true;
}
-bool MipsFastISel::TargetSelectInstruction(const Instruction *I) {
- if (!TargetSupported)
+bool MipsFastISel::selectTrunc(const Instruction *I) {
+ // The high bits for a type smaller than the register size are assumed to be
+ // undefined.
+ Value *Op = I->getOperand(0);
+
+ EVT SrcVT, DestVT;
+ SrcVT = TLI.getValueType(Op->getType(), true);
+ DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
return false;
- switch (I->getOpcode()) {
+ if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Op);
+ if (!SrcReg)
+ return false;
+
+ // Because the high bits are undefined, a truncate doesn't generate
+ // any code.
+ updateValueMap(I, SrcReg);
+ return true;
+}
+bool MipsFastISel::selectIntExt(const Instruction *I) {
+ Type *DestTy = I->getType();
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+
+ bool isZExt = isa<ZExtInst>(I);
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ EVT SrcEVT, DestEVT;
+ SrcEVT = TLI.getValueType(SrcTy, true);
+ DestEVT = TLI.getValueType(DestTy, true);
+ if (!SrcEVT.isSimple())
+ return false;
+ if (!DestEVT.isSimple())
+ return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+ MVT DestVT = DestEVT.getSimpleVT();
+ unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
+
+ if (!emitIntExt(SrcVT, SrcReg, DestVT, ResultReg, isZExt))
+ return false;
+ updateValueMap(I, ResultReg);
+ return true;
+}
+bool MipsFastISel::emitIntSExt32r1(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg) {
+ unsigned ShiftAmt;
+ switch (SrcVT.SimpleTy) {
default:
+ return false;
+ case MVT::i8:
+ ShiftAmt = 24;
+ break;
+ case MVT::i16:
+ ShiftAmt = 16;
break;
- case Instruction::Load:
- return SelectLoad(I);
- case Instruction::Store:
- return SelectStore(I);
- case Instruction::Ret:
- return SelectRet(I);
}
- return false;
-}
+ unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ emitInst(Mips::SLL, TempReg).addReg(SrcReg).addImm(ShiftAmt);
+ emitInst(Mips::SRA, DestReg).addReg(TempReg).addImm(ShiftAmt);
+ return true;
}
-unsigned MipsFastISel::MaterializeFP(const ConstantFP *CFP, MVT VT) {
- int64_t Imm = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
- if (VT == MVT::f32) {
- const TargetRegisterClass *RC = &Mips::FGR32RegClass;
- unsigned DestReg = createResultReg(RC);
- unsigned TempReg = Materialize32BitInt(Imm, &Mips::GPR32RegClass);
- EmitInst(Mips::MTC1, DestReg).addReg(TempReg);
- return DestReg;
- } else if (VT == MVT::f64) {
- const TargetRegisterClass *RC = &Mips::AFGR64RegClass;
- unsigned DestReg = createResultReg(RC);
- unsigned TempReg1 = Materialize32BitInt(Imm >> 32, &Mips::GPR32RegClass);
- unsigned TempReg2 =
- Materialize32BitInt(Imm & 0xFFFFFFFF, &Mips::GPR32RegClass);
- EmitInst(Mips::BuildPairF64, DestReg).addReg(TempReg2).addReg(TempReg1);
- return DestReg;
+bool MipsFastISel::emitIntSExt32r2(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg) {
+ switch (SrcVT.SimpleTy) {
+ default:
+ return false;
+ case MVT::i8:
+ emitInst(Mips::SEB, DestReg).addReg(SrcReg);
+ break;
+ case MVT::i16:
+ emitInst(Mips::SEH, DestReg).addReg(SrcReg);
+ break;
}
- return 0;
+ return true;
}
-unsigned MipsFastISel::MaterializeGV(const GlobalValue *GV, MVT VT) {
- // For now 32-bit only.
- if (VT != MVT::i32)
- return 0;
- const TargetRegisterClass *RC = &Mips::GPR32RegClass;
- unsigned DestReg = createResultReg(RC);
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
- bool IsThreadLocal = GVar && GVar->isThreadLocal();
- // TLS not supported at this time.
- if (IsThreadLocal)
- return 0;
- EmitInst(Mips::LW, DestReg).addReg(MFI->getGlobalBaseReg()).addGlobalAddress(
- GV, 0, MipsII::MO_GOT);
- return DestReg;
+bool MipsFastISel::emitIntSExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg) {
+ if ((DestVT != MVT::i32) && (DestVT != MVT::i16))
+ return false;
+ if (Subtarget->hasMips32r2())
+ return emitIntSExt32r2(SrcVT, SrcReg, DestVT, DestReg);
+ return emitIntSExt32r1(SrcVT, SrcReg, DestVT, DestReg);
}
-unsigned MipsFastISel::MaterializeInt(const Constant *C, MVT VT) {
- if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
- return 0;
- const TargetRegisterClass *RC = &Mips::GPR32RegClass;
- const ConstantInt *CI = cast<ConstantInt>(C);
- int64_t Imm;
- if (CI->isNegative())
- Imm = CI->getSExtValue();
- else
- Imm = CI->getZExtValue();
- return Materialize32BitInt(Imm, RC);
+
+bool MipsFastISel::emitIntZExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg) {
+ switch (SrcVT.SimpleTy) {
+ default:
+ return false;
+ case MVT::i1:
+ emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(1);
+ break;
+ case MVT::i8:
+ emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(0xff);
+ break;
+ case MVT::i16:
+ emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(0xffff);
+ break;
+ }
+ return true;
}
-unsigned MipsFastISel::Materialize32BitInt(int64_t Imm,
- const TargetRegisterClass *RC) {
- unsigned ResultReg = createResultReg(RC);
+bool MipsFastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg, bool IsZExt) {
+ if (IsZExt)
+ return emitIntZExt(SrcVT, SrcReg, DestVT, DestReg);
+ return emitIntSExt(SrcVT, SrcReg, DestVT, DestReg);
+}
- if (isInt<16>(Imm)) {
- unsigned Opc = Mips::ADDiu;
- EmitInst(Opc, ResultReg).addReg(Mips::ZERO).addImm(Imm);
- return ResultReg;
- } else if (isUInt<16>(Imm)) {
- EmitInst(Mips::ORi, ResultReg).addReg(Mips::ZERO).addImm(Imm);
- return ResultReg;
+unsigned MipsFastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ bool isZExt) {
+ unsigned DestReg = createResultReg(&Mips::GPR32RegClass);
+ return emitIntExt(SrcVT, SrcReg, DestVT, DestReg, isZExt);
+}
+
+bool MipsFastISel::fastSelectInstruction(const Instruction *I) {
+ if (!TargetSupported)
+ return false;
+ switch (I->getOpcode()) {
+ default:
+ break;
+ case Instruction::Load:
+ return selectLoad(I);
+ case Instruction::Store:
+ return selectStore(I);
+ case Instruction::Br:
+ return selectBranch(I);
+ case Instruction::Ret:
+ return selectRet(I);
+ case Instruction::Trunc:
+ return selectTrunc(I);
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ return selectIntExt(I);
+ case Instruction::FPTrunc:
+ return selectFPTrunc(I);
+ case Instruction::FPExt:
+ return selectFPExt(I);
+ case Instruction::FPToSI:
+ return selectFPToInt(I, /*isSigned*/ true);
+ case Instruction::FPToUI:
+ return selectFPToInt(I, /*isSigned*/ false);
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return selectCmp(I);
}
- unsigned Lo = Imm & 0xFFFF;
- unsigned Hi = (Imm >> 16) & 0xFFFF;
- if (Lo) {
- // Both Lo and Hi have nonzero bits.
- unsigned TmpReg = createResultReg(RC);
- EmitInst(Mips::LUi, TmpReg).addImm(Hi);
- EmitInst(Mips::ORi, ResultReg).addReg(TmpReg).addImm(Lo);
- } else {
- EmitInst(Mips::LUi, ResultReg).addImm(Hi);
+ return false;
+}
+
+unsigned MipsFastISel::getRegEnsuringSimpleIntegerWidening(const Value *V,
+ bool IsUnsigned) {
+ unsigned VReg = getRegForValue(V);
+ if (VReg == 0)
+ return 0;
+ MVT VMVT = TLI.getValueType(V->getType(), true).getSimpleVT();
+ if ((VMVT == MVT::i8) || (VMVT == MVT::i16)) {
+ unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ if (!emitIntExt(VMVT, VReg, MVT::i32, TempReg, IsUnsigned))
+ return 0;
+ VReg = TempReg;
}
- return ResultReg;
+ return VReg;
}
namespace llvm {
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 61afe179df57..3014a0d95342 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -100,7 +100,7 @@ bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
int64_t Offset = 0;
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index 9d593091e018..90a8d2af63b2 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPS_FRAMEINFO_H
-#define MIPS_FRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSFRAMELOWERING_H
+#define LLVM_LIB_TARGET_MIPS_MIPSFRAMELOWERING_H
#include "Mips.h"
#include "llvm/Target/TargetFrameLowering.h"
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
index 52f4c0d3b9ea..ff8760da3fa6 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSISELDAGTODAG_H
-#define MIPSISELDAGTODAG_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSISELDAGTODAG_H
+#define LLVM_LIB_TARGET_MIPS_MIPSISELDAGTODAG_H
#include "Mips.h"
#include "MipsSubtarget.h"
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 8e40668b2fa7..99fd739c3ed0 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -200,9 +201,9 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
-MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM,
+MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
const MipsSubtarget &STI)
- : TargetLowering(TM, new MipsTargetObjectFile()), Subtarget(STI) {
+ : TargetLowering(TM), Subtarget(STI) {
// Mips does not have i1 type, so use i32 for
// setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
@@ -214,12 +215,15 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM,
ZeroOrNegativeOneBooleanContent);
// Load extented operations for i1 types must be promoted
- setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+ }
// MIPS doesn't have extending float->double load/store
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ for (MVT VT : MVT::fp_valuetypes())
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// Used by legalize types to correctly generate the setcc result.
@@ -367,9 +371,9 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM,
setOperationAction(ISD::BSWAP, MVT::i64, Expand);
if (Subtarget.isGP64bit()) {
- setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Custom);
- setLoadExtAction(ISD::EXTLOAD, MVT::i32, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i64, MVT::i32, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, MVT::i32, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i64, MVT::i32, Custom);
setTruncStoreAction(MVT::i64, MVT::i32, Custom);
}
@@ -400,7 +404,7 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM,
isMicroMips = Subtarget.inMicroMipsMode();
}
-const MipsTargetLowering *MipsTargetLowering::create(MipsTargetMachine &TM,
+const MipsTargetLowering *MipsTargetLowering::create(const MipsTargetMachine &TM,
const MipsSubtarget &STI) {
if (STI.inMips16Mode())
return llvm::createMips16TargetLowering(TM, STI);
@@ -932,18 +936,37 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case Mips::DIVU:
case Mips::MOD:
case Mips::MODU:
- return insertDivByZeroTrap(MI, *BB, *getTargetMachine().getInstrInfo(),
- false);
+ return insertDivByZeroTrap(
+ MI, *BB, *getTargetMachine().getSubtargetImpl()->getInstrInfo(), false);
case Mips::PseudoDSDIV:
case Mips::PseudoDUDIV:
case Mips::DDIV:
case Mips::DDIVU:
case Mips::DMOD:
case Mips::DMODU:
- return insertDivByZeroTrap(MI, *BB, *getTargetMachine().getInstrInfo(),
- true);
+ return insertDivByZeroTrap(
+ MI, *BB, *getTargetMachine().getSubtargetImpl()->getInstrInfo(), true);
case Mips::SEL_D:
return emitSEL_D(MI, BB);
+
+ case Mips::PseudoSELECT_I:
+ case Mips::PseudoSELECT_I64:
+ case Mips::PseudoSELECT_S:
+ case Mips::PseudoSELECT_D32:
+ case Mips::PseudoSELECT_D64:
+ return emitPseudoSELECT(MI, BB, false, Mips::BNE);
+ case Mips::PseudoSELECTFP_F_I:
+ case Mips::PseudoSELECTFP_F_I64:
+ case Mips::PseudoSELECTFP_F_S:
+ case Mips::PseudoSELECTFP_F_D32:
+ case Mips::PseudoSELECTFP_F_D64:
+ return emitPseudoSELECT(MI, BB, true, Mips::BC1F);
+ case Mips::PseudoSELECTFP_T_I:
+ case Mips::PseudoSELECTFP_T_I64:
+ case Mips::PseudoSELECTFP_T_S:
+ case Mips::PseudoSELECTFP_T_D32:
+ case Mips::PseudoSELECTFP_T_D64:
+ return emitPseudoSELECT(MI, BB, true, Mips::BC1T);
}
}
@@ -958,7 +981,8 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned LL, SC, AND, NOR, ZERO, BEQ;
@@ -1041,7 +1065,8 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineBasicBlock *MipsTargetLowering::emitSignExtendToI32InReg(
MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned DstReg,
unsigned SrcReg) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
if (Subtarget.hasMips32r2() && Size == 1) {
@@ -1077,7 +1102,8 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned Dest = MI->getOperand(0).getReg();
@@ -1174,7 +1200,8 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
// beq success,$0,loopMBB
BB = loopMBB;
- BuildMI(BB, DL, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0);
+ unsigned LL = isMicroMips ? Mips::LL_MM : Mips::LL;
+ BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
if (Nand) {
// and andres, oldval, incr2
// nor binopres, $0, andres
@@ -1197,7 +1224,8 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
.addReg(OldVal).addReg(Mask2);
BuildMI(BB, DL, TII->get(Mips::OR), StoreVal)
.addReg(MaskedOldVal0).addReg(NewVal);
- BuildMI(BB, DL, TII->get(Mips::SC), Success)
+ unsigned SC = isMicroMips ? Mips::SC_MM : Mips::SC;
+ BuildMI(BB, DL, TII->get(SC), Success)
.addReg(StoreVal).addReg(AlignedAddr).addImm(0);
BuildMI(BB, DL, TII->get(Mips::BEQ))
.addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB);
@@ -1227,7 +1255,8 @@ MachineBasicBlock * MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned LL, SC, ZERO, BNE, BEQ;
@@ -1309,7 +1338,8 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned Dest = MI->getOperand(0).getReg();
@@ -1406,7 +1436,8 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
// and maskedoldval0,oldval,mask
// bne maskedoldval0,shiftedcmpval,sinkMBB
BB = loop1MBB;
- BuildMI(BB, DL, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0);
+ unsigned LL = isMicroMips ? Mips::LL_MM : Mips::LL;
+ BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0)
.addReg(OldVal).addReg(Mask);
BuildMI(BB, DL, TII->get(Mips::BNE))
@@ -1422,7 +1453,8 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
.addReg(OldVal).addReg(Mask2);
BuildMI(BB, DL, TII->get(Mips::OR), StoreVal)
.addReg(MaskedOldVal1).addReg(ShiftedNewVal);
- BuildMI(BB, DL, TII->get(Mips::SC), Success)
+ unsigned SC = isMicroMips ? Mips::SC_MM : Mips::SC;
+ BuildMI(BB, DL, TII->get(SC), Success)
.addReg(StoreVal).addReg(AlignedAddr).addImm(0);
BuildMI(BB, DL, TII->get(Mips::BEQ))
.addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
@@ -1444,8 +1476,10 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
MachineBasicBlock *MipsTargetLowering::emitSEL_D(MachineInstr *MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
DebugLoc DL = MI->getDebugLoc();
MachineBasicBlock::iterator II(MI);
@@ -1486,7 +1520,7 @@ SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
Addr = DAG.getExtLoad(ISD::SEXTLOAD, DL, PTy, Chain, Addr,
MachinePointerInfo::getJumpTable(), MemVT, false, false,
- 0);
+ false, 0);
Chain = Addr.getValue(1);
if ((getTargetMachine().getRelocationModel() == Reloc::PIC_) ||
@@ -1568,8 +1602,6 @@ SDValue MipsTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const {
SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
- // FIXME there isn't actually debug info here
- SDLoc DL(Op);
EVT Ty = Op.getValueType();
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = N->getGlobal();
@@ -1579,15 +1611,9 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
const MipsTargetObjectFile &TLOF =
(const MipsTargetObjectFile&)getObjFileLowering();
- // %gp_rel relocation
- if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
- SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, 0,
- MipsII::MO_GPREL);
- SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, DL,
- DAG.getVTList(MVT::i32), GA);
- SDValue GPReg = DAG.getRegister(Mips::GP, MVT::i32);
- return DAG.getNode(ISD::ADD, DL, MVT::i32, GPReg, GPRelNode);
- }
+ if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine()))
+ // %gp_rel relocation
+ return getAddrGPRel(N, Ty, DAG);
// %hi/%lo relocation
return getAddrNonPIC(N, Ty, DAG);
@@ -1718,21 +1744,20 @@ lowerJumpTable(SDValue Op, SelectionDAG &DAG) const
SDValue MipsTargetLowering::
lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
{
- // gp_rel relocation
- // FIXME: we should reference the constant pool using small data sections,
- // but the asm printer currently doesn't support this feature without
- // hacking it. This feature should come soon so we can uncomment the
- // stuff below.
- //if (IsInSmallSection(C->getType())) {
- // SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP);
- // SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
- // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
EVT Ty = Op.getValueType();
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
- !Subtarget.isABI_N64())
+ !Subtarget.isABI_N64()) {
+ const MipsTargetObjectFile &TLOF =
+ (const MipsTargetObjectFile&)getObjFileLowering();
+
+ if (TLOF.IsConstantInSmallSection(N->getConstVal(), getTargetMachine()))
+ // %gp_rel relocation
+ return getAddrGPRel(N, Ty, DAG);
+
return getAddrNonPIC(N, Ty, DAG);
+ }
return getAddrLocal(N, Ty, DAG,
Subtarget.isABI_N32() || Subtarget.isABI_N64());
@@ -2259,7 +2284,7 @@ SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op,
// an argument. Otherwise, passed in A1, A2, A3 and stack.
// f64 - Only passed in two aliased f32 registers if no int reg has been used
// yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is
-// not used, it must be shadowed. If only A3 is avaiable, shadow it and
+// not used, it must be shadowed. If only A3 is available, shadow it and
// go to stack.
//
// For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack.
@@ -2373,6 +2398,10 @@ static bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT,
return CC_MipsO32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs);
}
+static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State) LLVM_ATTRIBUTE_UNUSED;
+
#include "MipsGenCallingConv.inc"
//===----------------------------------------------------------------------===//
@@ -2407,13 +2436,19 @@ void MipsTargetLowering::
getOpndList(SmallVectorImpl<SDValue> &Ops,
std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
- CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
+ SDValue Chain) const {
// Insert node "GP copy globalreg" before call to function.
//
// R_MIPS_CALL* operators (emitted when non-internal functions are called
// in PIC mode) allow symbols to be resolved via lazy binding.
// The lazy binding stub requires GP to point to the GOT.
- if (IsPICCall && !InternalLinkage) {
+ // Note that we don't need GP to point to the GOT for indirect calls
+ // (when R_MIPS_CALL* is not used for the call) because Mips linker generates
+ // lazy binding stub for a function only when R_MIPS_CALL* are the only relocs
+ // used for the function (that is, Mips linker doesn't generate lazy binding
+ // stub for a function whose address is taken in the program).
+ if (IsPICCall && !InternalLinkage && IsCallReloc) {
unsigned GPReg = Subtarget.isABI_N64() ? Mips::GP_64 : Mips::GP;
EVT Ty = Subtarget.isABI_N64() ? MVT::i64 : MVT::i32;
RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty)));
@@ -2438,7 +2473,8 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
if (Subtarget.inMips16HardFloat()) {
@@ -2474,15 +2510,14 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFL = MF.getSubtarget().getFrameLowering();
MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
MipsCCState CCInfo(
- CallConv, IsVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext(),
+ CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext(),
MipsCCState::getSpecialCallingConvForCallee(Callee.getNode(), Subtarget));
// Allocate the reserved argument area. It seems strange to do this from the
@@ -2636,7 +2671,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool IsPICCall =
(Subtarget.isABI_N64() || IsPIC); // true if calls are translated to
// jalr $25
- bool GlobalOrExternal = false, InternalLinkage = false;
+ bool GlobalOrExternal = false, InternalLinkage = false, IsCallReloc = false;
SDValue CalleeLo;
EVT Ty = Callee.getValueType();
@@ -2648,13 +2683,16 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (InternalLinkage)
Callee = getAddrLocal(G, Ty, DAG,
Subtarget.isABI_N32() || Subtarget.isABI_N64());
- else if (LargeGOT)
+ else if (LargeGOT) {
Callee = getAddrGlobalLargeGOT(G, Ty, DAG, MipsII::MO_CALL_HI16,
MipsII::MO_CALL_LO16, Chain,
FuncInfo->callPtrInfo(Val));
- else
+ IsCallReloc = true;
+ } else {
Callee = getAddrGlobal(G, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
FuncInfo->callPtrInfo(Val));
+ IsCallReloc = true;
+ }
} else
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(), 0,
MipsII::MO_NO_FLAG);
@@ -2666,13 +2704,16 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (!Subtarget.isABI_N64() && !IsPIC) // !N64 && static
Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(),
MipsII::MO_NO_FLAG);
- else if (LargeGOT)
+ else if (LargeGOT) {
Callee = getAddrGlobalLargeGOT(S, Ty, DAG, MipsII::MO_CALL_HI16,
MipsII::MO_CALL_LO16, Chain,
FuncInfo->callPtrInfo(Sym));
- else // N64 || PIC
+ IsCallReloc = true;
+ } else { // N64 || PIC
Callee = getAddrGlobal(S, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
FuncInfo->callPtrInfo(Sym));
+ IsCallReloc = true;
+ }
GlobalOrExternal = true;
}
@@ -2681,7 +2722,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage,
- CLI, Callee, Chain);
+ IsCallReloc, CLI, Callee, Chain);
if (IsTailCall)
return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, Ops);
@@ -2709,8 +2750,8 @@ SDValue MipsTargetLowering::LowerCallResult(
TargetLowering::CallLoweringInfo &CLI) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_Mips, CLI);
// Copy all of the result registers out of their specified physreg.
@@ -2843,8 +2884,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
const MipsABIInfo &ABI = Subtarget.getABI();
CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1);
Function::const_arg_iterator FuncArg =
@@ -2982,8 +3023,7 @@ MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- MipsCCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(),
- RVLocs, Context);
+ MipsCCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_Mips);
}
@@ -2999,8 +3039,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
MachineFunction &MF = DAG.getMachineFunction();
// CCState - Info about the registers and stack slot.
- MipsCCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(), RVLocs,
- *DAG.getContext());
+ MipsCCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
// Analyze return values.
CCInfo.AnalyzeReturn(Outs, RetCC_Mips);
@@ -3181,7 +3220,7 @@ MipsTargetLowering::getSingleConstraintMatchWeight(
/// that is returned indicates whether parsing was successful. The second flag
/// is true if the numeric part exists.
static std::pair<bool, bool>
-parsePhysicalReg(const StringRef &C, std::string &Prefix,
+parsePhysicalReg(StringRef C, std::string &Prefix,
unsigned long long &Reg) {
if (C.front() != '{' || C.back() != '}')
return std::make_pair(false, false);
@@ -3202,8 +3241,9 @@ parsePhysicalReg(const StringRef &C, std::string &Prefix,
}
std::pair<unsigned, const TargetRegisterClass *> MipsTargetLowering::
-parseRegForInlineAsmConstraint(const StringRef &C, MVT VT) const {
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+parseRegForInlineAsmConstraint(StringRef C, MVT VT) const {
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const TargetRegisterClass *RC;
std::string Prefix;
unsigned long long Reg;
@@ -3585,7 +3625,8 @@ void MipsTargetLowering::passByValArg(
DAG.getConstant(OffsetInBytes, PtrTy));
SDValue LoadVal = DAG.getExtLoad(
ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, MachinePointerInfo(),
- MVT::getIntegerVT(LoadSizeInBytes * 8), false, false, Alignment);
+ MVT::getIntegerVT(LoadSizeInBytes * 8), false, false, false,
+ Alignment);
MemOpChains.push_back(LoadVal.getValue(1));
// Shift the loaded value.
@@ -3679,7 +3720,7 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
void MipsTargetLowering::HandleByVal(CCState *State, unsigned &Size,
unsigned Align) const {
MachineFunction &MF = State->getMachineFunction();
- const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFL = MF.getSubtarget().getFrameLowering();
assert(Size && "Byval argument's size shouldn't be 0.");
@@ -3721,3 +3762,102 @@ void MipsTargetLowering::HandleByVal(CCState *State, unsigned &Size,
State->addInRegsParamInfo(FirstReg, FirstReg + NumRegs);
}
+
+MachineBasicBlock *
+MipsTargetLowering::emitPseudoSELECT(MachineInstr *MI, MachineBasicBlock *BB,
+ bool isFPCmp, unsigned Opc) const {
+ assert(!(Subtarget.hasMips4() || Subtarget.hasMips32()) &&
+ "Subtarget already supports SELECT nodes with the use of"
+ "conditional-move instructions.");
+
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ // To "insert" a SELECT instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ if (isFPCmp) {
+ // bc1[tf] cc, sinkMBB
+ BuildMI(BB, DL, TII->get(Opc))
+ .addReg(MI->getOperand(1).getReg())
+ .addMBB(sinkMBB);
+ } else {
+ // bne rs, $0, sinkMBB
+ BuildMI(BB, DL, TII->get(Opc))
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(Mips::ZERO)
+ .addMBB(sinkMBB);
+ }
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+
+ return BB;
+}
+
+// FIXME? Maybe this could be a TableGen attribute on some registers and
+// this table could be generated automatically from RegInfo.
+unsigned MipsTargetLowering::getRegisterByName(const char* RegName,
+ EVT VT) const {
+ // Named registers is expected to be fairly rare. For now, just support $28
+ // since the linux kernel uses it.
+ if (Subtarget.isGP64bit()) {
+ unsigned Reg = StringSwitch<unsigned>(RegName)
+ .Case("$28", Mips::GP_64)
+ .Default(0);
+ if (Reg)
+ return Reg;
+ } else {
+ unsigned Reg = StringSwitch<unsigned>(RegName)
+ .Case("$28", Mips::GP)
+ .Default(0);
+ if (Reg)
+ return Reg;
+ }
+ report_fatal_error("Invalid register name global variable");
+}
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 23163b73e0a4..71f140b7a656 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MipsISELLOWERING_H
-#define MipsISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSISELLOWERING_H
+#define LLVM_LIB_TARGET_MIPS_MIPSISELLOWERING_H
#include "MCTargetDesc/MipsBaseInfo.h"
#include "Mips.h"
@@ -215,10 +215,10 @@ namespace llvm {
class MipsTargetLowering : public TargetLowering {
bool isMicroMips;
public:
- explicit MipsTargetLowering(MipsTargetMachine &TM,
+ explicit MipsTargetLowering(const MipsTargetMachine &TM,
const MipsSubtarget &STI);
- static const MipsTargetLowering *create(MipsTargetMachine &TM,
+ static const MipsTargetLowering *create(const MipsTargetMachine &TM,
const MipsSubtarget &STI);
/// createFastISel - This method returns a target specific FastISel object,
@@ -262,6 +262,8 @@ namespace llvm {
void HandleByVal(CCState *, unsigned &, unsigned) const override;
+ unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+
protected:
SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const;
@@ -332,6 +334,21 @@ namespace llvm {
DAG.getNode(MipsISD::Lo, DL, Ty, Lo));
}
+ // This method creates the following nodes, which are necessary for
+ // computing a symbol's address using gp-relative addressing:
+ //
+ // (add $gp, %gp_rel(sym))
+ template<class NodeTy>
+ SDValue getAddrGPRel(NodeTy *N, EVT Ty, SelectionDAG &DAG) const {
+ SDLoc DL(N);
+ assert(Ty == MVT::i32);
+ SDValue GPRel = getTargetNode(N, Ty, DAG, MipsII::MO_GPREL);
+ return DAG.getNode(ISD::ADD, DL, Ty,
+ DAG.getRegister(Mips::GP, Ty),
+ DAG.getNode(MipsISD::GPRel, DL, DAG.getVTList(Ty),
+ GPRel));
+ }
+
/// This function fills Ops, which is the list of operands that will later
/// be used when a function call node is created. It also generates
/// copyToReg nodes to set up argument registers.
@@ -339,7 +356,8 @@ namespace llvm {
getOpndList(SmallVectorImpl<SDValue> &Ops,
std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
- CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+ bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
+ SDValue Chain) const;
protected:
SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
@@ -470,7 +488,7 @@ namespace llvm {
/// This function parses registers that appear in inline-asm constraints.
/// It returns pair (0, 0) on failure.
std::pair<unsigned, const TargetRegisterClass *>
- parseRegForInlineAsmConstraint(const StringRef &C, MVT VT) const;
+ parseRegForInlineAsmConstraint(StringRef C, MVT VT) const;
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
@@ -518,13 +536,18 @@ namespace llvm {
MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr *MI,
MachineBasicBlock *BB, unsigned Size) const;
MachineBasicBlock *emitSEL_D(MachineInstr *MI, MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitPseudoSELECT(MachineInstr *MI,
+ MachineBasicBlock *BB, bool isFPCmp,
+ unsigned Opc) const;
};
/// Create MipsTargetLowering objects.
const MipsTargetLowering *
- createMips16TargetLowering(MipsTargetMachine &TM, const MipsSubtarget &STI);
+ createMips16TargetLowering(const MipsTargetMachine &TM,
+ const MipsSubtarget &STI);
const MipsTargetLowering *
- createMipsSETargetLowering(MipsTargetMachine &TM, const MipsSubtarget &STI);
+ createMipsSETargetLowering(const MipsTargetMachine &TM,
+ const MipsSubtarget &STI);
namespace Mips {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
@@ -532,4 +555,4 @@ namespace llvm {
}
}
-#endif // MipsISELLOWERING_H
+#endif
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 1100e1e0323a..2aa83289a106 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -178,38 +178,6 @@ class SW_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
let mayStore = 1;
}
-class SW_FT2<string opstr, RegisterOperand RC, InstrItinClass Itin,
- SDPatternOperator OpNode= null_frag> :
- InstSE<(outs), (ins RC:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
- [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI, opstr> {
- let DecoderMethod = "DecodeFMem2";
- let mayStore = 1;
-}
-
-class LW_FT2<string opstr, RegisterOperand RC, InstrItinClass Itin,
- SDPatternOperator OpNode= null_frag> :
- InstSE<(outs RC:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
- [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI, opstr> {
- let DecoderMethod = "DecodeFMem2";
- let mayLoad = 1;
-}
-
-class SW_FT3<string opstr, RegisterOperand RC, InstrItinClass Itin,
- SDPatternOperator OpNode= null_frag> :
- InstSE<(outs), (ins RC:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
- [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI, opstr> {
- let DecoderMethod = "DecodeFMem3";
- let mayStore = 1;
-}
-
-class LW_FT3<string opstr, RegisterOperand RC, InstrItinClass Itin,
- SDPatternOperator OpNode= null_frag> :
- InstSE<(outs RC:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
- [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI, opstr> {
- let DecoderMethod = "DecodeFMem3";
- let mayLoad = 1;
-}
-
class MADDS_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
SDPatternOperator OpNode = null_frag> :
InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
@@ -243,14 +211,14 @@ class SWXC1_FT<string opstr, RegisterOperand DRC,
}
class BC1F_FT<string opstr, DAGOperand opnd, InstrItinClass Itin,
- SDPatternOperator Op = null_frag> :
+ SDPatternOperator Op = null_frag, bit DelaySlot = 1> :
InstSE<(outs), (ins FCCRegsOpnd:$fcc, opnd:$offset),
!strconcat(opstr, "\t$fcc, $offset"),
[(MipsFPBrcond Op, FCCRegsOpnd:$fcc, bb:$offset)], Itin,
FrmFI, opstr> {
let isBranch = 1;
let isTerminator = 1;
- let hasDelaySlot = 1;
+ let hasDelaySlot = DelaySlot;
let Defs = [AT];
}
@@ -436,30 +404,6 @@ def LDC1 : MMRel, LW_FT<"ldc1", AFGR64Opnd, II_LDC1, load>, LW_FM<0x35>,
def SDC1 : MMRel, SW_FT<"sdc1", AFGR64Opnd, II_SDC1, store>, LW_FM<0x3d>,
ISA_MIPS2, FGR_32;
-// Cop2 Memory Instructions
-// FIXME: These aren't really FPU instructions and as such don't belong in this
-// file
-def LWC2 : LW_FT2<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>,
- ISA_MIPS1_NOT_32R6_64R6;
-def SWC2 : SW_FT2<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>,
- ISA_MIPS1_NOT_32R6_64R6;
-def LDC2 : LW_FT2<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>,
- ISA_MIPS2_NOT_32R6_64R6;
-def SDC2 : SW_FT2<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>,
- ISA_MIPS2_NOT_32R6_64R6;
-
-// Cop3 Memory Instructions
-// FIXME: These aren't really FPU instructions and as such don't belong in this
-// file
-let DecoderNamespace = "COP3_" in {
- def LWC3 : LW_FT3<"lwc3", COP3Opnd, NoItinerary, load>, LW_FM<0x33>;
- def SWC3 : SW_FT3<"swc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3b>;
- def LDC3 : LW_FT3<"ldc3", COP3Opnd, NoItinerary, load>, LW_FM<0x37>,
- ISA_MIPS2;
- def SDC3 : SW_FT3<"sdc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3f>,
- ISA_MIPS2;
-}
-
// Indexed loads and stores.
// Base register + offset register addressing mode (indicated by "x" in the
// instruction mnemonic) is disallowed under NaCl.
@@ -562,8 +506,12 @@ def MIPS_BRANCH_T : PatLeaf<(i32 1)>;
def BC1F : MMRel, BC1F_FT<"bc1f", brtarget, IIBranch, MIPS_BRANCH_F>,
BC1F_FM<0, 0>, ISA_MIPS1_NOT_32R6_64R6;
+def BC1FL : MMRel, BC1F_FT<"bc1fl", brtarget, IIBranch, MIPS_BRANCH_F, 0>,
+ BC1F_FM<1, 0>, ISA_MIPS2_NOT_32R6_64R6;
def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, IIBranch, MIPS_BRANCH_T>,
BC1F_FM<0, 1>, ISA_MIPS1_NOT_32R6_64R6;
+def BC1TL : MMRel, BC1F_FT<"bc1tl", brtarget, IIBranch, MIPS_BRANCH_T, 0>,
+ BC1F_FM<1, 1>, ISA_MIPS2_NOT_32R6_64R6;
//===----------------------------------------------------------------------===//
// Floating Point Flag Conditions
@@ -629,8 +577,12 @@ def ExtractElementF64_64 : ExtractElementF64Base<FGR64Opnd>,
//===----------------------------------------------------------------------===//
def : MipsInstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>,
ISA_MIPS1_NOT_32R6_64R6;
+def : MipsInstAlias<"bc1tl $offset", (BC1TL FCC0, brtarget:$offset)>,
+ ISA_MIPS2_NOT_32R6_64R6;
def : MipsInstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>,
ISA_MIPS1_NOT_32R6_64R6;
+def : MipsInstAlias<"bc1fl $offset", (BC1FL FCC0, brtarget:$offset)>,
+ ISA_MIPS2_NOT_32R6_64R6;
//===----------------------------------------------------------------------===//
// Floating Point Patterns
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 6a01ae560f3a..d8dae25aca9f 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -411,6 +411,20 @@ class SYNC_FM : StdArch {
let Inst{5-0} = 0xf;
}
+class SYNCI_FM : StdArch {
+ // Produced by the mem_simm16 address as reg << 16 | imm (see getMemEncoding).
+ bits<21> addr;
+ bits<5> rs = addr{20-16};
+ bits<16> offset = addr{15-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b000001;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = 0b11111;
+ let Inst{15-0} = offset;
+}
+
class MULT_FM<bits<6> op, bits<6> funct> : StdArch {
bits<5> rs;
bits<5> rt;
@@ -440,7 +454,7 @@ class EXT_FM<bits<6> funct> : StdArch {
let Inst{5-0} = funct;
}
-class RDHWR_FM {
+class RDHWR_FM : StdArch {
bits<5> rt;
bits<5> rd;
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index dcc0e24e080b..0839147984b5 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -15,7 +15,7 @@
#include "InstPrinter/MipsInstPrinter.h"
#include "MipsAnalyzeImmediate.h"
#include "MipsMachineFunction.h"
-#include "MipsTargetMachine.h"
+#include "MipsSubtarget.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index bdf2fd37ed8c..db149d4dc9d0 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -15,8 +15,8 @@
// size in bytes; MipsLongBranch only expects it to be the correct upper bound.
//===----------------------------------------------------------------------===//
-#ifndef MIPSINSTRUCTIONINFO_H
-#define MIPSINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSINSTRINFO_H
+#define LLVM_LIB_TARGET_MIPS_MIPSINSTRINFO_H
#include "Mips.h"
#include "MipsAnalyzeImmediate.h"
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 26ecfbf6f4e6..aef103956978 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -156,6 +156,8 @@ def HasMips3 : Predicate<"Subtarget->hasMips3()">,
AssemblerPredicate<"FeatureMips3">;
def HasMips4_32 : Predicate<"Subtarget->hasMips4_32()">,
AssemblerPredicate<"FeatureMips4_32">;
+def NotMips4_32 : Predicate<"!Subtarget->hasMips4_32()">,
+ AssemblerPredicate<"FeatureMips4_32">;
def HasMips4_32r2 : Predicate<"Subtarget->hasMips4_32r2()">,
AssemblerPredicate<"FeatureMips4_32r2">;
def HasMips5_32r2 : Predicate<"Subtarget->hasMips5_32r2()">,
@@ -180,8 +182,6 @@ def HasMips64r6 : Predicate<"Subtarget->hasMips64r6()">,
AssemblerPredicate<"FeatureMips64r6">;
def NotMips64r6 : Predicate<"!Subtarget->hasMips64r6()">,
AssemblerPredicate<"!FeatureMips64r6">;
-def IsN64 : Predicate<"Subtarget->isABI_N64()">,
- AssemblerPredicate<"FeatureN64">;
def InMips16Mode : Predicate<"Subtarget->inMips16Mode()">,
AssemblerPredicate<"FeatureMips16">;
def HasCnMips : Predicate<"Subtarget->hasCnMips()">,
@@ -220,6 +220,9 @@ class GPR_64 { list<Predicate> GPRPredicates = [IsGP64bit]; }
// subtractive predicate will hopefully keep us under the 32 predicate
// limit long enough to develop an alternative way to handle P1||P2
// predicates.
+class ISA_MIPS1_NOT_4_32 {
+ list<Predicate> InsnPredicates = [NotMips4_32];
+}
class ISA_MIPS1_NOT_32R6_64R6 {
list<Predicate> InsnPredicates = [NotMips32r6, NotMips64r6];
}
@@ -316,7 +319,7 @@ class IsAsCheapAsAMove {
}
class NeverHasSideEffects {
- bit neverHasSideEffects = 1;
+ bit hasSideEffects = 0;
}
//===----------------------------------------------------------------------===//
@@ -331,7 +334,7 @@ include "MipsInstrFormats.td"
def MipsJumpTargetAsmOperand : AsmOperandClass {
let Name = "JumpTarget";
- let ParserMethod = "ParseJumpTarget";
+ let ParserMethod = "parseJumpTarget";
let PredicateMethod = "isImm";
let RenderMethod = "addImmOperands";
}
@@ -425,7 +428,14 @@ def MipsMemSimm11AsmOperand : AsmOperandClass {
let RenderMethod = "addMemOperands";
let ParserMethod = "parseMemOperand";
let PredicateMethod = "isMemWithSimmOffset<11>";
- //let DiagnosticType = "Simm11";
+}
+
+def MipsMemSimm16AsmOperand : AsmOperandClass {
+ let Name = "MemOffsetSimm16";
+ let SuperClasses = [MipsMemAsmOperand];
+ let RenderMethod = "addMemOperands";
+ let ParserMethod = "parseMemOperand";
+ let PredicateMethod = "isMemWithSimmOffset<16>";
}
def MipsInvertedImmoperand : AsmOperandClass {
@@ -470,6 +480,12 @@ def mem_simm11 : mem_generic {
let ParserMatchClass = MipsMemSimm11AsmOperand;
}
+def mem_simm16 : mem_generic {
+ let MIOperandInfo = (ops ptr_rc, simm16);
+ let EncoderMethod = "getMemEncoding";
+ let ParserMatchClass = MipsMemSimm16AsmOperand;
+}
+
def mem_ea : Operand<iPTR> {
let PrintMethod = "printMemOperandEA";
let MIOperandInfo = (ops ptr_rc, simm16);
@@ -632,7 +648,7 @@ class shift_rotate_reg<string opstr, RegisterOperand RO, InstrItinClass itin,
class LoadUpper<string opstr, RegisterOperand RO, Operand Imm>:
InstSE<(outs RO:$rt), (ins Imm:$imm16), !strconcat(opstr, "\t$rt, $imm16"),
[], II_LUI, FrmI, opstr>, IsAsCheapAsAMove {
- let neverHasSideEffects = 1;
+ let hasSideEffects = 0;
let isReMaterializable = 1;
}
@@ -672,28 +688,62 @@ class StoreLeftRight<string opstr, SDNode OpNode, RegisterOperand RO,
let DecoderMethod = "DecodeMem";
}
+// COP2 Load/Store
+class LW_FT2<string opstr, RegisterOperand RC, InstrItinClass Itin,
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs RC:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI, opstr> {
+ let DecoderMethod = "DecodeFMem2";
+ let mayLoad = 1;
+}
+
+class SW_FT2<string opstr, RegisterOperand RC, InstrItinClass Itin,
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs), (ins RC:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI, opstr> {
+ let DecoderMethod = "DecodeFMem2";
+ let mayStore = 1;
+}
+
+// COP3 Load/Store
+class LW_FT3<string opstr, RegisterOperand RC, InstrItinClass Itin,
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs RC:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI, opstr> {
+ let DecoderMethod = "DecodeFMem3";
+ let mayLoad = 1;
+}
+
+class SW_FT3<string opstr, RegisterOperand RC, InstrItinClass Itin,
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs), (ins RC:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI, opstr> {
+ let DecoderMethod = "DecodeFMem3";
+ let mayStore = 1;
+}
+
// Conditional Branch
class CBranch<string opstr, DAGOperand opnd, PatFrag cond_op,
- RegisterOperand RO> :
+ RegisterOperand RO, bit DelaySlot = 1> :
InstSE<(outs), (ins RO:$rs, RO:$rt, opnd:$offset),
!strconcat(opstr, "\t$rs, $rt, $offset"),
[(brcond (i32 (cond_op RO:$rs, RO:$rt)), bb:$offset)], IIBranch,
FrmI, opstr> {
let isBranch = 1;
let isTerminator = 1;
- let hasDelaySlot = 1;
+ let hasDelaySlot = DelaySlot;
let Defs = [AT];
}
class CBranchZero<string opstr, DAGOperand opnd, PatFrag cond_op,
- RegisterOperand RO> :
+ RegisterOperand RO, bit DelaySlot = 1> :
InstSE<(outs), (ins RO:$rs, opnd:$offset),
!strconcat(opstr, "\t$rs, $offset"),
[(brcond (i32 (cond_op RO:$rs, 0)), bb:$offset)], IIBranch,
FrmI, opstr> {
let isBranch = 1;
let isTerminator = 1;
- let hasDelaySlot = 1;
+ let hasDelaySlot = DelaySlot;
let Defs = [AT];
}
@@ -765,9 +815,12 @@ let isCall=1, hasDelaySlot=1, Defs = [RA] in {
InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
[], IIBranch, FrmR>;
- class BGEZAL_FT<string opstr, DAGOperand opnd, RegisterOperand RO> :
+ class BGEZAL_FT<string opstr, DAGOperand opnd,
+ RegisterOperand RO, bit DelaySlot = 1> :
InstSE<(outs), (ins RO:$rs, opnd:$offset),
- !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI, opstr>;
+ !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI, opstr> {
+ let hasDelaySlot = DelaySlot;
+ }
}
@@ -823,6 +876,13 @@ class SYNC_FT<string opstr> :
InstSE<(outs), (ins i32imm:$stype), "sync $stype", [(MipsSync imm:$stype)],
NoItinerary, FrmOther, opstr>;
+class SYNCI_FT<string opstr> :
+ InstSE<(outs), (ins mem_simm16:$addr), !strconcat(opstr, "\t$addr"), [],
+ NoItinerary, FrmOther, opstr> {
+ let hasSideEffects = 1;
+ let DecoderMethod = "DecodeSyncI";
+}
+
let hasSideEffects = 1 in
class TEQ_FT<string opstr, RegisterOperand RO> :
InstSE<(outs), (ins RO:$rs, RO:$rt, uimm16:$code_),
@@ -839,7 +899,7 @@ class Mult<string opstr, InstrItinClass itin, RegisterOperand RO,
itin, FrmR, opstr> {
let isCommutable = 1;
let Defs = DefRegs;
- let neverHasSideEffects = 1;
+ let hasSideEffects = 0;
}
// Pseudo multiply/divide instruction with explicit accumulator register
@@ -885,7 +945,7 @@ class MoveFromLOHI<string opstr, RegisterOperand RO, Register UseReg>:
InstSE<(outs RO:$rd), (ins), !strconcat(opstr, "\t$rd"), [], II_MFHI_MFLO,
FrmR, opstr> {
let Uses = [UseReg];
- let neverHasSideEffects = 1;
+ let hasSideEffects = 0;
}
class PseudoMTLOHI<RegisterClass DstRC, RegisterClass SrcRC>
@@ -897,7 +957,7 @@ class MoveToLOHI<string opstr, RegisterOperand RO, list<Register> DefRegs>:
InstSE<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"), [], II_MTHI_MTLO,
FrmR, opstr> {
let Defs = DefRegs;
- let neverHasSideEffects = 1;
+ let hasSideEffects = 0;
}
class EffectiveAddress<string opstr, RegisterOperand RO> :
@@ -927,13 +987,13 @@ class SignExtInReg<string opstr, ValueType vt, RegisterOperand RO,
class SubwordSwap<string opstr, RegisterOperand RO>:
InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [],
NoItinerary, FrmR, opstr> {
- let neverHasSideEffects = 1;
+ let hasSideEffects = 0;
}
// Read Hardware
class ReadHardware<RegisterOperand CPURegOperand, RegisterOperand RO> :
InstSE<(outs CPURegOperand:$rt), (ins RO:$rd), "rdhwr\t$rt, $rd", [],
- II_RDHWR, FrmR>;
+ II_RDHWR, FrmR, "rdhwr">;
// Ext and Ins
class ExtBase<string opstr, RegisterOperand RO, Operand PosOpnd,
@@ -1059,18 +1119,20 @@ def LONG_BRANCH_ADDiu : PseudoSE<(outs GPR32Opnd:$dst),
//===----------------------------------------------------------------------===//
/// Arithmetic Instructions (ALU Immediate)
+let AdditionalPredicates = [NotInMicroMips] in {
def ADDiu : MMRel, ArithLogicI<"addiu", simm16, GPR32Opnd, II_ADDIU, immSExt16,
- add>,
- ADDI_FM<0x9>, IsAsCheapAsAMove;
+ add>, ADDI_FM<0x9>, IsAsCheapAsAMove;
+}
def ADDi : MMRel, ArithLogicI<"addi", simm16, GPR32Opnd>, ADDI_FM<0x8>,
ISA_MIPS1_NOT_32R6_64R6;
def SLTi : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, GPR32Opnd>,
SLTI_FM<0xa>;
def SLTiu : MMRel, SetCC_I<"sltiu", setult, simm16, immSExt16, GPR32Opnd>,
SLTI_FM<0xb>;
+let AdditionalPredicates = [NotInMicroMips] in {
def ANDi : MMRel, ArithLogicI<"andi", uimm16, GPR32Opnd, II_ANDI, immZExt16,
- and>,
- ADDI_FM<0xc>;
+ and>, ADDI_FM<0xc>;
+}
def ORi : MMRel, ArithLogicI<"ori", uimm16, GPR32Opnd, II_ORI, immZExt16,
or>,
ADDI_FM<0xd>;
@@ -1100,10 +1162,12 @@ def XOR : MMRel, ArithLogicR<"xor", GPR32Opnd, 1, II_XOR, xor>,
def NOR : MMRel, LogicNOR<"nor", GPR32Opnd>, ADD_FM<0, 0x27>;
/// Shift Instructions
+let AdditionalPredicates = [NotInMicroMips] in {
def SLL : MMRel, shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL, shl,
immZExt5>, SRA_FM<0, 0>;
def SRL : MMRel, shift_rotate_imm<"srl", uimm5, GPR32Opnd, II_SRL, srl,
immZExt5>, SRA_FM<2, 0>;
+}
def SRA : MMRel, shift_rotate_imm<"sra", uimm5, GPR32Opnd, II_SRA, sra,
immZExt5>, SRA_FM<3, 0>;
def SLLV : MMRel, shift_rotate_reg<"sllv", GPR32Opnd, II_SLLV, shl>,
@@ -1147,13 +1211,35 @@ def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, II_SWR>, LW_FM<0x2e>,
ISA_MIPS1_NOT_32R6_64R6;
}
+// COP2 Memory Instructions
+def LWC2 : LW_FT2<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def SWC2 : SW_FT2<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def LDC2 : LW_FT2<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>,
+ ISA_MIPS2_NOT_32R6_64R6;
+def SDC2 : SW_FT2<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>,
+ ISA_MIPS2_NOT_32R6_64R6;
+
+// COP3 Memory Instructions
+let DecoderNamespace = "COP3_" in {
+ def LWC3 : LW_FT3<"lwc3", COP3Opnd, NoItinerary, load>, LW_FM<0x33>;
+ def SWC3 : SW_FT3<"swc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3b>;
+ def LDC3 : LW_FT3<"ldc3", COP3Opnd, NoItinerary, load>, LW_FM<0x37>,
+ ISA_MIPS2;
+ def SDC3 : SW_FT3<"sdc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3f>,
+ ISA_MIPS2;
+}
+
def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM, ISA_MIPS32;
-def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>;
-def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>;
-def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>;
-def TLT : MMRel, TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM<0x32>;
-def TLTU : MMRel, TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM<0x33>;
-def TNE : MMRel, TEQ_FT<"tne", GPR32Opnd>, TEQ_FM<0x36>;
+def SYNCI : MMRel, SYNCI_FT<"synci">, SYNCI_FM, ISA_MIPS32R2;
+
+def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>, ISA_MIPS2;
+def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>, ISA_MIPS2;
+def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>, ISA_MIPS2;
+def TLT : MMRel, TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM<0x32>, ISA_MIPS2;
+def TLTU : MMRel, TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM<0x33>, ISA_MIPS2;
+def TNE : MMRel, TEQ_FT<"tne", GPR32Opnd>, TEQ_FM<0x36>, ISA_MIPS2;
def TEQI : MMRel, TEQI_FT<"teqi", GPR32Opnd>, TEQI_FM<0xc>,
ISA_MIPS2_NOT_32R6_64R6;
@@ -1171,7 +1257,7 @@ def TNEI : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM<0xe>,
def BREAK : MMRel, BRK_FT<"break">, BRK_FM<0xd>;
def SYSCALL : MMRel, SYS_FT<"syscall">, SYS_FM<0xc>;
def TRAP : TrapBase<BREAK>;
-def SDBBP : SYS_FT<"sdbbp">, SDBBP_FM, ISA_MIPS32_NOT_32R6_64R6;
+def SDBBP : MMRel, SYS_FT<"sdbbp">, SDBBP_FM, ISA_MIPS32_NOT_32R6_64R6;
def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18>, INSN_MIPS3_32;
def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f>, ISA_MIPS32;
@@ -1193,15 +1279,27 @@ def J : MMRel, JumpFJ<jmptarget, "j", br, bb, "j">, FJ<2>,
AdditionalRequires<[RelocStatic]>, IsBranch;
def JR : MMRel, IndirectBranch<"jr", GPR32Opnd>, MTLO_FM<8>;
def BEQ : MMRel, CBranch<"beq", brtarget, seteq, GPR32Opnd>, BEQ_FM<4>;
+def BEQL : MMRel, CBranch<"beql", brtarget, seteq, GPR32Opnd, 0>,
+ BEQ_FM<20>, ISA_MIPS2_NOT_32R6_64R6;
def BNE : MMRel, CBranch<"bne", brtarget, setne, GPR32Opnd>, BEQ_FM<5>;
+def BNEL : MMRel, CBranch<"bnel", brtarget, setne, GPR32Opnd, 0>,
+ BEQ_FM<21>, ISA_MIPS2_NOT_32R6_64R6;
def BGEZ : MMRel, CBranchZero<"bgez", brtarget, setge, GPR32Opnd>,
BGEZ_FM<1, 1>;
+def BGEZL : MMRel, CBranchZero<"bgezl", brtarget, setge, GPR32Opnd, 0>,
+ BGEZ_FM<1, 3>, ISA_MIPS2_NOT_32R6_64R6;
def BGTZ : MMRel, CBranchZero<"bgtz", brtarget, setgt, GPR32Opnd>,
BGEZ_FM<7, 0>;
+def BGTZL : MMRel, CBranchZero<"bgtzl", brtarget, setgt, GPR32Opnd, 0>,
+ BGEZ_FM<23, 0>, ISA_MIPS2_NOT_32R6_64R6;
def BLEZ : MMRel, CBranchZero<"blez", brtarget, setle, GPR32Opnd>,
BGEZ_FM<6, 0>;
+def BLEZL : MMRel, CBranchZero<"blezl", brtarget, setle, GPR32Opnd, 0>,
+ BGEZ_FM<22, 0>, ISA_MIPS2_NOT_32R6_64R6;
def BLTZ : MMRel, CBranchZero<"bltz", brtarget, setlt, GPR32Opnd>,
BGEZ_FM<1, 0>;
+def BLTZL : MMRel, CBranchZero<"bltzl", brtarget, setlt, GPR32Opnd, 0>,
+ BGEZ_FM<1, 2>, ISA_MIPS2_NOT_32R6_64R6;
def B : UncondBranch<BEQ>;
def JAL : MMRel, JumpLink<"jal", calltarget>, FJ<3>;
@@ -1214,8 +1312,12 @@ let AdditionalPredicates = [NotInMicroMips] in {
def JALX : JumpLink<"jalx", calltarget>, FJ<0x1D>, ISA_MIPS32_NOT_32R6_64R6;
def BGEZAL : MMRel, BGEZAL_FT<"bgezal", brtarget, GPR32Opnd>, BGEZAL_FM<0x11>,
ISA_MIPS1_NOT_32R6_64R6;
+def BGEZALL : MMRel, BGEZAL_FT<"bgezall", brtarget, GPR32Opnd, 0>,
+ BGEZAL_FM<0x13>, ISA_MIPS2_NOT_32R6_64R6;
def BLTZAL : MMRel, BGEZAL_FT<"bltzal", brtarget, GPR32Opnd>, BGEZAL_FM<0x10>,
ISA_MIPS1_NOT_32R6_64R6;
+def BLTZALL : MMRel, BGEZAL_FT<"bltzall", brtarget, GPR32Opnd, 0>,
+ BGEZAL_FM<0x12>, ISA_MIPS2_NOT_32R6_64R6;
def BAL_BR : BAL_BR_Pseudo<BGEZAL>;
def TAILCALL : TailCall<J>;
def TAILCALL_R : TailCallReg<GPR32Opnd, JR>;
@@ -1350,7 +1452,7 @@ def PseudoSDIV : MultDivPseudo<SDIV, ACC64, GPR32Opnd, MipsDivRem, II_DIV,
def PseudoUDIV : MultDivPseudo<UDIV, ACC64, GPR32Opnd, MipsDivRemU, II_DIVU,
0, 1, 1>, ISA_MIPS1_NOT_32R6_64R6;
-def RDHWR : ReadHardware<GPR32Opnd, HWRegsOpnd>, RDHWR_FM;
+def RDHWR : MMRel, ReadHardware<GPR32Opnd, HWRegsOpnd>, RDHWR_FM;
def EXT : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>, EXT_FM<0>;
def INS : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>, EXT_FM<4>;
@@ -1362,10 +1464,10 @@ def MFC2 : MFC3OP<"mfc2", GPR32Opnd>, MFC3OP_FM<0x12, 0>;
def MTC2 : MFC3OP<"mtc2", GPR32Opnd>, MFC3OP_FM<0x12, 4>;
class Barrier<string asmstr> : InstSE<(outs), (ins), asmstr, [], NoItinerary,
- FrmOther>;
-def SSNOP : Barrier<"ssnop">, BARRIER_FM<1>;
-def EHB : Barrier<"ehb">, BARRIER_FM<3>;
-def PAUSE : Barrier<"pause">, BARRIER_FM<5>, ISA_MIPS32R2;
+ FrmOther, asmstr>;
+def SSNOP : MMRel, Barrier<"ssnop">, BARRIER_FM<1>;
+def EHB : MMRel, Barrier<"ehb">, BARRIER_FM<3>;
+def PAUSE : MMRel, Barrier<"pause">, BARRIER_FM<5>, ISA_MIPS32R2;
// JR_HB and JALR_HB are defined here using the new style naming
// scheme because some of this code is shared with Mips32r6InstrInfo.td
@@ -1408,21 +1510,22 @@ def JR_HB : JR_HB_DESC, JR_HB_ENC, ISA_MIPS32_NOT_32R6_64R6;
def JALR_HB : JALR_HB_DESC, JALR_HB_ENC, ISA_MIPS32;
class TLB<string asmstr> : InstSE<(outs), (ins), asmstr, [], NoItinerary,
- FrmOther>;
-def TLBP : TLB<"tlbp">, COP0_TLB_FM<0x08>;
-def TLBR : TLB<"tlbr">, COP0_TLB_FM<0x01>;
-def TLBWI : TLB<"tlbwi">, COP0_TLB_FM<0x02>;
-def TLBWR : TLB<"tlbwr">, COP0_TLB_FM<0x06>;
+ FrmOther, asmstr>;
+def TLBP : MMRel, TLB<"tlbp">, COP0_TLB_FM<0x08>;
+def TLBR : MMRel, TLB<"tlbr">, COP0_TLB_FM<0x01>;
+def TLBWI : MMRel, TLB<"tlbwi">, COP0_TLB_FM<0x02>;
+def TLBWR : MMRel, TLB<"tlbwr">, COP0_TLB_FM<0x06>;
class CacheOp<string instr_asm, Operand MemOpnd> :
InstSE<(outs), (ins MemOpnd:$addr, uimm5:$hint),
- !strconcat(instr_asm, "\t$hint, $addr"), [], NoItinerary, FrmOther> {
+ !strconcat(instr_asm, "\t$hint, $addr"), [], NoItinerary, FrmOther,
+ instr_asm> {
let DecoderMethod = "DecodeCacheOp";
}
-def CACHE : CacheOp<"cache", mem>, CACHEOP_FM<0b101111>,
+def CACHE : MMRel, CacheOp<"cache", mem>, CACHEOP_FM<0b101111>,
INSN_MIPS3_32_NOT_32R6_64R6;
-def PREF : CacheOp<"pref", mem>, CACHEOP_FM<0b110011>,
+def PREF : MMRel, CacheOp<"pref", mem>, CACHEOP_FM<0b110011>,
INSN_MIPS3_32_NOT_32R6_64R6;
//===----------------------------------------------------------------------===//
@@ -1437,8 +1540,14 @@ def : MipsInstAlias<"bal $offset", (BGEZAL ZERO, brtarget:$offset), 0>,
ISA_MIPS1_NOT_32R6_64R6;
def : MipsInstAlias<"addu $rs, $rt, $imm",
(ADDiu GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
+def : MipsInstAlias<"addu $rs, $imm",
+ (ADDiu GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm), 0>;
def : MipsInstAlias<"add $rs, $rt, $imm",
- (ADDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
+ (ADDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def : MipsInstAlias<"add $rs, $imm",
+ (ADDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm), 0>,
+ ISA_MIPS1_NOT_32R6_64R6;
def : MipsInstAlias<"and $rs, $rt, $imm",
(ANDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
def : MipsInstAlias<"and $rs, $imm",
@@ -1482,25 +1591,30 @@ def : MipsInstAlias<"syscall", (SYSCALL 0), 1>;
def : MipsInstAlias<"break", (BREAK 0, 0), 1>;
def : MipsInstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>;
-def : MipsInstAlias<"ei", (EI ZERO), 1>;
-def : MipsInstAlias<"di", (DI ZERO), 1>;
-
-def : MipsInstAlias<"teq $rs, $rt", (TEQ GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
-def : MipsInstAlias<"tge $rs, $rt", (TGE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
-def : MipsInstAlias<"tgeu $rs, $rt", (TGEU GPR32Opnd:$rs, GPR32Opnd:$rt, 0),
- 1>;
-def : MipsInstAlias<"tlt $rs, $rt", (TLT GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
-def : MipsInstAlias<"tltu $rs, $rt", (TLTU GPR32Opnd:$rs, GPR32Opnd:$rt, 0),
- 1>;
-def : MipsInstAlias<"tne $rs, $rt", (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"ei", (EI ZERO), 1>, ISA_MIPS32R2;
+def : MipsInstAlias<"di", (DI ZERO), 1>, ISA_MIPS32R2;
+
+def : MipsInstAlias<"teq $rs, $rt",
+ (TEQ GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+def : MipsInstAlias<"tge $rs, $rt",
+ (TGE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+def : MipsInstAlias<"tgeu $rs, $rt",
+ (TGEU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+def : MipsInstAlias<"tlt $rs, $rt",
+ (TLT GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+def : MipsInstAlias<"tltu $rs, $rt",
+ (TLTU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+def : MipsInstAlias<"tne $rs, $rt",
+ (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+
def : MipsInstAlias<"sll $rd, $rt, $rs",
(SLLV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
def : MipsInstAlias<"sub, $rd, $rs, $imm",
(ADDi GPR32Opnd:$rd, GPR32Opnd:$rs,
- InvertedImOperand:$imm), 0>;
+ InvertedImOperand:$imm), 0>, ISA_MIPS1_NOT_32R6_64R6;
def : MipsInstAlias<"sub $rs, $imm",
(ADDi GPR32Opnd:$rs, GPR32Opnd:$rs, InvertedImOperand:$imm),
- 0>;
+ 0>, ISA_MIPS1_NOT_32R6_64R6;
def : MipsInstAlias<"subu, $rd, $rs, $imm",
(ADDiu GPR32Opnd:$rd, GPR32Opnd:$rs,
InvertedImOperand:$imm), 0>;
@@ -1544,10 +1658,12 @@ class StoreRegImmPat<Instruction StoreInst, ValueType ValTy> :
MipsPat<(store ValTy:$v, addrRegImm:$a), (StoreInst ValTy:$v, addrRegImm:$a)>;
// Small immediates
+let AdditionalPredicates = [NotInMicroMips] in {
def : MipsPat<(i32 immSExt16:$in),
(ADDiu ZERO, imm:$in)>;
def : MipsPat<(i32 immZExt16:$in),
(ORi ZERO, imm:$in)>;
+}
def : MipsPat<(i32 immLow16Zero:$in),
(LUi (HI16 imm:$in))>;
@@ -1565,6 +1681,12 @@ let AdditionalPredicates = [NotDSP] in {
(ADDiu GPR32:$src, imm:$imm)>;
}
+// Support multiplication for pre-Mips32 targets that don't have
+// the MUL instruction.
+def : MipsPat<(mul GPR32:$lhs, GPR32:$rhs),
+ (PseudoMFLO (PseudoMULT GPR32:$lhs, GPR32:$rhs))>,
+ ISA_MIPS1_NOT_32R6_64R6;
+
// SYNC
def : MipsPat<(MipsSync (i32 immz)),
(SYNC 0)>, ISA_MIPS2;
diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp
deleted file mode 100644
index 2072488206aa..000000000000
--- a/lib/Target/Mips/MipsJITInfo.cpp
+++ /dev/null
@@ -1,286 +0,0 @@
-//===-- MipsJITInfo.cpp - Implement the Mips JIT Interface ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the JIT interfaces for the Mips target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MipsJITInfo.h"
-#include "MipsInstrInfo.h"
-#include "MipsRelocations.h"
-#include "MipsSubtarget.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstdlib>
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-
-void MipsJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
- unsigned NewAddr = (intptr_t)New;
- unsigned OldAddr = (intptr_t)Old;
- const unsigned NopInstr = 0x0;
-
- // If the functions are in the same memory segment, insert PC-region branch.
- if ((NewAddr & 0xF0000000) == ((OldAddr + 4) & 0xF0000000)) {
- unsigned *OldInstruction = (unsigned *)Old;
- *OldInstruction = 0x08000000;
- unsigned JTargetAddr = NewAddr & 0x0FFFFFFC;
-
- JTargetAddr >>= 2;
- *OldInstruction |= JTargetAddr;
-
- // Insert a NOP.
- OldInstruction++;
- *OldInstruction = NopInstr;
-
- sys::Memory::InvalidateInstructionCache(Old, 2 * 4);
- } else {
- // We need to clear hint bits from the instruction, in case it is 'jr ra'.
- const unsigned HintMask = 0xFFFFF83F, ReturnSequence = 0x03e00008;
- unsigned* CurrentInstr = (unsigned*)Old;
- unsigned CurrInstrHintClear = (*CurrentInstr) & HintMask;
- unsigned* NextInstr = CurrentInstr + 1;
- unsigned NextInstrHintClear = (*NextInstr) & HintMask;
-
- // Do absolute jump if there are 2 or more instructions before return from
- // the old function.
- if ((CurrInstrHintClear != ReturnSequence) &&
- (NextInstrHintClear != ReturnSequence)) {
- const unsigned LuiT0Instr = 0x3c080000, AddiuT0Instr = 0x25080000;
- const unsigned JrT0Instr = 0x01000008;
- // lui t0, high 16 bit of the NewAddr
- (*(CurrentInstr++)) = LuiT0Instr | ((NewAddr & 0xffff0000) >> 16);
- // addiu t0, t0, low 16 bit of the NewAddr
- (*(CurrentInstr++)) = AddiuT0Instr | (NewAddr & 0x0000ffff);
- // jr t0
- (*(CurrentInstr++)) = JrT0Instr;
- (*CurrentInstr) = NopInstr;
-
- sys::Memory::InvalidateInstructionCache(Old, 4 * 4);
- } else {
- // Unsupported case
- report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction");
- }
- }
-}
-
-/// JITCompilerFunction - This contains the address of the JIT function used to
-/// compile a function lazily.
-static TargetJITInfo::JITCompilerFn JITCompilerFunction;
-
-// Get the ASMPREFIX for the current host. This is often '_'.
-#ifndef __USER_LABEL_PREFIX__
-#define __USER_LABEL_PREFIX__
-#endif
-#define GETASMPREFIX2(X) #X
-#define GETASMPREFIX(X) GETASMPREFIX2(X)
-#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
-
-// CompilationCallback stub - We can't use a C function with inline assembly in
-// it, because the prolog/epilog inserted by GCC won't work for us. Instead,
-// write our own wrapper, which does things our way, so we have complete control
-// over register saving and restoring. This code saves registers, calls
-// MipsCompilationCallbackC and restores registers.
-extern "C" {
-#if defined (__mips__)
-void MipsCompilationCallback();
-
- asm(
- ".text\n"
- ".align 2\n"
- ".globl " ASMPREFIX "MipsCompilationCallback\n"
- ASMPREFIX "MipsCompilationCallback:\n"
- ".ent " ASMPREFIX "MipsCompilationCallback\n"
- ".frame $sp, 32, $ra\n"
- ".set noreorder\n"
- ".cpload $t9\n"
-
- "addiu $sp, $sp, -64\n"
- ".cprestore 16\n"
-
- // Save argument registers a0, a1, a2, a3, f12, f14 since they may contain
- // stuff for the real target function right now. We have to act as if this
- // whole compilation callback doesn't exist as far as the caller is
- // concerned. We also need to save the ra register since it contains the
- // original return address, and t8 register since it contains the address
- // of the end of function stub.
- "sw $a0, 20($sp)\n"
- "sw $a1, 24($sp)\n"
- "sw $a2, 28($sp)\n"
- "sw $a3, 32($sp)\n"
- "sw $ra, 36($sp)\n"
- "sw $t8, 40($sp)\n"
- "sdc1 $f12, 48($sp)\n"
- "sdc1 $f14, 56($sp)\n"
-
- // t8 points at the end of function stub. Pass the beginning of the stub
- // to the MipsCompilationCallbackC.
- "addiu $a0, $t8, -16\n"
- "jal " ASMPREFIX "MipsCompilationCallbackC\n"
- "nop\n"
-
- // Restore registers.
- "lw $a0, 20($sp)\n"
- "lw $a1, 24($sp)\n"
- "lw $a2, 28($sp)\n"
- "lw $a3, 32($sp)\n"
- "lw $ra, 36($sp)\n"
- "lw $t8, 40($sp)\n"
- "ldc1 $f12, 48($sp)\n"
- "ldc1 $f14, 56($sp)\n"
- "addiu $sp, $sp, 64\n"
-
- // Jump to the (newly modified) stub to invoke the real function.
- "addiu $t8, $t8, -16\n"
- "jr $t8\n"
- "nop\n"
-
- ".set reorder\n"
- ".end " ASMPREFIX "MipsCompilationCallback\n"
- );
-#else // host != Mips
- void MipsCompilationCallback() {
- llvm_unreachable(
- "Cannot call MipsCompilationCallback() on a non-Mips arch!");
- }
-#endif
-}
-
-/// MipsCompilationCallbackC - This is the target-specific function invoked
-/// by the function stub when we did not know the real target of a call.
-/// This function must locate the start of the stub or call site and pass
-/// it into the JIT compiler function.
-extern "C" void MipsCompilationCallbackC(intptr_t StubAddr) {
- // Get the address of the compiled code for this function.
- intptr_t NewVal = (intptr_t) JITCompilerFunction((void*) StubAddr);
-
- // Rewrite the function stub so that we don't end up here every time we
- // execute the call. We're replacing the first four instructions of the
- // stub with code that jumps to the compiled function:
- // lui $t9, %hi(NewVal)
- // addiu $t9, $t9, %lo(NewVal)
- // jr $t9
- // nop
-
- int Hi = ((unsigned)NewVal & 0xffff0000) >> 16;
- if ((NewVal & 0x8000) != 0)
- Hi++;
- int Lo = (int)(NewVal & 0xffff);
-
- *(intptr_t *)(StubAddr) = 0xf << 26 | 25 << 16 | Hi;
- *(intptr_t *)(StubAddr + 4) = 9 << 26 | 25 << 21 | 25 << 16 | Lo;
- *(intptr_t *)(StubAddr + 8) = 25 << 21 | 8;
- *(intptr_t *)(StubAddr + 12) = 0;
-
- sys::Memory::InvalidateInstructionCache((void*) StubAddr, 16);
-}
-
-TargetJITInfo::LazyResolverFn MipsJITInfo::getLazyResolverFunction(
- JITCompilerFn F) {
- JITCompilerFunction = F;
- return MipsCompilationCallback;
-}
-
-TargetJITInfo::StubLayout MipsJITInfo::getStubLayout() {
- // The stub contains 4 4-byte instructions, aligned at 4 bytes. See
- // emitFunctionStub for details.
- StubLayout Result = { 4*4, 4 };
- return Result;
-}
-
-void *MipsJITInfo::emitFunctionStub(const Function *F, void *Fn,
- JITCodeEmitter &JCE) {
- JCE.emitAlignment(4);
- void *Addr = (void*) (JCE.getCurrentPCValue());
- if (!sys::Memory::setRangeWritable(Addr, 16))
- llvm_unreachable("ERROR: Unable to mark stub writable.");
-
- intptr_t EmittedAddr;
- if (Fn != (void*)(intptr_t)MipsCompilationCallback)
- EmittedAddr = (intptr_t)Fn;
- else
- EmittedAddr = (intptr_t)MipsCompilationCallback;
-
-
- int Hi = ((unsigned)EmittedAddr & 0xffff0000) >> 16;
- if ((EmittedAddr & 0x8000) != 0)
- Hi++;
- int Lo = (int)(EmittedAddr & 0xffff);
-
- // lui $t9, %hi(EmittedAddr)
- // addiu $t9, $t9, %lo(EmittedAddr)
- // jalr $t8, $t9
- // nop
- if (IsLittleEndian) {
- JCE.emitWordLE(0xf << 26 | 25 << 16 | Hi);
- JCE.emitWordLE(9 << 26 | 25 << 21 | 25 << 16 | Lo);
- JCE.emitWordLE(25 << 21 | 24 << 11 | 9);
- JCE.emitWordLE(0);
- } else {
- JCE.emitWordBE(0xf << 26 | 25 << 16 | Hi);
- JCE.emitWordBE(9 << 26 | 25 << 21 | 25 << 16 | Lo);
- JCE.emitWordBE(25 << 21 | 24 << 11 | 9);
- JCE.emitWordBE(0);
- }
-
- sys::Memory::InvalidateInstructionCache(Addr, 16);
- if (!sys::Memory::setRangeExecutable(Addr, 16))
- llvm_unreachable("ERROR: Unable to mark stub executable.");
-
- return Addr;
-}
-
-/// relocate - Before the JIT can run a block of code that has been emitted,
-/// it must rewrite the code to contain the actual addresses of any
-/// referenced global symbols.
-void MipsJITInfo::relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char *GOTBase) {
- for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
-
- void *RelocPos = (char*) Function + MR->getMachineCodeOffset();
- intptr_t ResultPtr = (intptr_t) MR->getResultPointer();
-
- switch ((Mips::RelocationType) MR->getRelocationType()) {
- case Mips::reloc_mips_pc16:
- ResultPtr = (((ResultPtr - (intptr_t) RelocPos) - 4) >> 2) & 0xffff;
- *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
- break;
-
- case Mips::reloc_mips_26:
- ResultPtr = (ResultPtr & 0x0fffffff) >> 2;
- *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
- break;
-
- case Mips::reloc_mips_hi:
- ResultPtr = ResultPtr >> 16;
- if ((((intptr_t) (MR->getResultPointer()) & 0xffff) >> 15) == 1) {
- ResultPtr += 1;
- }
- *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
- break;
-
- case Mips::reloc_mips_lo: {
- // Addend is needed for unaligned load/store instructions, where offset
- // for the second load/store in the expanded instruction sequence must
- // be modified by +1 or +3. Otherwise, Addend is 0.
- int Addend = *((unsigned*) RelocPos) & 0xffff;
- ResultPtr = (ResultPtr + Addend) & 0xffff;
- *((unsigned*) RelocPos) &= 0xffff0000;
- *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
- break;
- }
- }
- }
-}
diff --git a/lib/Target/Mips/MipsJITInfo.h b/lib/Target/Mips/MipsJITInfo.h
deleted file mode 100644
index c9dfd831d2df..000000000000
--- a/lib/Target/Mips/MipsJITInfo.h
+++ /dev/null
@@ -1,71 +0,0 @@
-//===- MipsJITInfo.h - Mips Implementation of the JIT Interface -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the MipsJITInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MIPSJITINFO_H
-#define MIPSJITINFO_H
-
-#include "MipsMachineFunction.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Target/TargetJITInfo.h"
-
-namespace llvm {
-class MipsTargetMachine;
-
-class MipsJITInfo : public TargetJITInfo {
-
- bool IsPIC;
- bool IsLittleEndian;
-
- public:
- explicit MipsJITInfo() :
- IsPIC(false), IsLittleEndian(true) {}
-
- /// replaceMachineCodeForFunction - Make it so that calling the function
- /// whose machine code is at OLD turns into a call to NEW, perhaps by
- /// overwriting OLD with a branch to NEW. This is used for self-modifying
- /// code.
- ///
- void replaceMachineCodeForFunction(void *Old, void *New) override;
-
- // getStubLayout - Returns the size and alignment of the largest call stub
- // on Mips.
- StubLayout getStubLayout() override;
-
- /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
- /// small native function that simply calls the function at the specified
- /// address.
- void *emitFunctionStub(const Function *F, void *Fn,
- JITCodeEmitter &JCE) override;
-
- /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
- LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
-
- /// relocate - Before the JIT can run a block of code that has been emitted,
- /// it must rewrite the code to contain the actual addresses of any
- /// referenced global symbols.
- void relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char *GOTBase) override;
-
- /// Initialize - Initialize internal stage for the function being JITted.
- void Initialize(const MachineFunction &MF, bool isPIC,
- bool isLittleEndian) {
- IsPIC = isPIC;
- IsLittleEndian = isLittleEndian;
- }
-
-};
-}
-
-#endif
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 5790ed6053c8..aae0922e79eb 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -16,6 +16,7 @@
#include "Mips.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "MCTargetDesc/MipsMCNaCl.h"
+#include "MipsMachineFunction.h"
#include "MipsTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -109,8 +110,7 @@ static MachineBasicBlock *getTargetMBB(const MachineInstr &Br) {
return MO.getMBB();
}
- assert(false && "This instruction does not have an MBB operand.");
- return nullptr;
+ llvm_unreachable("This instruction does not have an MBB operand.");
}
// Traverse the list of instructions backwards until a non-debug instruction is
@@ -170,7 +170,7 @@ void MipsLongBranch::initMBBInfo() {
MBBInfos.resize(MF->size());
const MipsInstrInfo *TII =
- static_cast<const MipsInstrInfo*>(TM.getInstrInfo());
+ static_cast<const MipsInstrInfo *>(TM.getSubtargetImpl()->getInstrInfo());
for (unsigned I = 0, E = MBBInfos.size(); I < E; ++I) {
MachineBasicBlock *MBB = MF->getBlockNumbered(I);
@@ -217,7 +217,7 @@ int64_t MipsLongBranch::computeOffset(const MachineInstr *Br) {
void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br,
DebugLoc DL, MachineBasicBlock *MBBOpnd) {
const MipsInstrInfo *TII =
- static_cast<const MipsInstrInfo*>(TM.getInstrInfo());
+ static_cast<const MipsInstrInfo *>(TM.getSubtargetImpl()->getInstrInfo());
unsigned NewOpc = TII->getOppositeBranchOpc(Br->getOpcode());
const MCInstrDesc &NewDesc = TII->get(NewOpc);
@@ -236,15 +236,21 @@ void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br,
MIB.addMBB(MBBOpnd);
- // Bundle the instruction in the delay slot to the newly created branch
- // and erase the original branch.
- assert(Br->isBundledWithSucc());
- MachineBasicBlock::instr_iterator II(Br);
- MIBundleBuilder(&*MIB).append((++II)->removeFromBundle());
+ if (Br->hasDelaySlot()) {
+ // Bundle the instruction in the delay slot to the newly created branch
+ // and erase the original branch.
+ assert(Br->isBundledWithSucc());
+ MachineBasicBlock::instr_iterator II(Br);
+ MIBundleBuilder(&*MIB).append((++II)->removeFromBundle());
+ }
Br->eraseFromParent();
}
// Expand branch instructions to long branches.
+// TODO: This function has to be fixed for beqz16 and bnez16, because it
+// currently assumes that all branches have 16-bit offsets, and will produce
+// wrong code if branches whose allowed offsets are [-128, -126, ..., 126]
+// are present.
void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
MachineBasicBlock::iterator Pos;
MachineBasicBlock *MBB = I.Br->getParent(), *TgtMBB = getTargetMBB(*I.Br);
@@ -254,7 +260,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
MachineBasicBlock *LongBrMBB = MF->CreateMachineBasicBlock(BB);
const MipsInstrInfo *TII =
- static_cast<const MipsInstrInfo*>(TM.getInstrInfo());
+ static_cast<const MipsInstrInfo *>(TM.getSubtargetImpl()->getInstrInfo());
MF->insert(FallThroughMBB, LongBrMBB);
MBB->removeSuccessor(TgtMBB);
@@ -447,7 +453,7 @@ static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) {
bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
const MipsInstrInfo *TII =
- static_cast<const MipsInstrInfo*>(TM.getInstrInfo());
+ static_cast<const MipsInstrInfo *>(TM.getSubtargetImpl()->getInstrInfo());
const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>();
if (STI.inMips16Mode() || !STI.enableLongBranchPass())
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index 269190ffc065..1ce27e401850 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSMCINSTLOWER_H
-#define MIPSMCINSTLOWER_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSMCINSTLOWER_H
+#define LLVM_LIB_TARGET_MIPS_MIPSMCINSTLOWER_H
#include "MCTargetDesc/MipsMCExpr.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineOperand.h"
diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td
index 285bb146cdb6..68230e6957a8 100644
--- a/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -69,7 +69,7 @@ def MipsVExtractZExt : SDNode<"MipsISD::VEXTRACT_ZEXT_ELT",
// as the encoded value should be subtracted by one.
def uimm2LSAAsmOperand : AsmOperandClass {
let Name = "LSAImm";
- let ParserMethod = "ParseLSAImm";
+ let ParserMethod = "parseLSAImm";
let RenderMethod = "addImmOperands";
}
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index bc896be4e1de..19babc720ce0 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -24,7 +24,7 @@ FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true),
cl::desc("Always use $gp as the global base register."));
// class MipsCallEntry.
-MipsCallEntry::MipsCallEntry(const StringRef &N) {
+MipsCallEntry::MipsCallEntry(StringRef N) {
#ifndef NDEBUG
Name = N;
Val = nullptr;
@@ -80,13 +80,10 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() {
const MipsSubtarget &ST = MF.getTarget().getSubtarget<MipsSubtarget>();
- const TargetRegisterClass *RC;
- if (ST.inMips16Mode())
- RC=(const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
- else
- RC = ST.isABI_N64() ?
- (const TargetRegisterClass*)&Mips::GPR64RegClass :
- (const TargetRegisterClass*)&Mips::GPR32RegClass;
+ const TargetRegisterClass *RC =
+ ST.inMips16Mode() ? &Mips::CPU16RegsRegClass
+ : ST.isABI_N64() ? &Mips::GPR64RegClass
+ : &Mips::GPR32RegClass;
return GlobalBaseReg = MF.getRegInfo().createVirtualRegister(RC);
}
@@ -98,8 +95,7 @@ unsigned MipsFunctionInfo::getMips16SPAliasReg() {
if (Mips16SPAliasReg)
return Mips16SPAliasReg;
- const TargetRegisterClass *RC;
- RC=(const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
+ const TargetRegisterClass *RC = &Mips::CPU16RegsRegClass;
return Mips16SPAliasReg = MF.getRegInfo().createVirtualRegister(RC);
}
@@ -119,7 +115,7 @@ bool MipsFunctionInfo::isEhDataRegFI(int FI) const {
|| FI == EhDataRegFI[2] || FI == EhDataRegFI[3]);
}
-MachinePointerInfo MipsFunctionInfo::callPtrInfo(const StringRef &Name) {
+MachinePointerInfo MipsFunctionInfo::callPtrInfo(StringRef Name) {
const MipsCallEntry *&E = ExternalCallEntries[Name];
if (!E)
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index 61260e578159..217f30734b29 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPS_MACHINE_FUNCTION_INFO_H
-#define MIPS_MACHINE_FUNCTION_INFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSMACHINEFUNCTION_H
+#define LLVM_LIB_TARGET_MIPS_MIPSMACHINEFUNCTION_H
#include "Mips16HardFloatInfo.h"
#include "llvm/ADT/StringMap.h"
@@ -34,7 +34,7 @@ namespace llvm {
/// resolved by lazy-binding.
class MipsCallEntry : public PseudoSourceValue {
public:
- explicit MipsCallEntry(const StringRef &N);
+ explicit MipsCallEntry(StringRef N);
explicit MipsCallEntry(const GlobalValue *V);
bool isConstant(const MachineFrameInfo *) const override;
bool isAliased(const MachineFrameInfo *) const override;
@@ -88,7 +88,7 @@ public:
/// \brief Create a MachinePointerInfo that has a MipsCallEntr object
/// representing a GOT entry for an external function.
- MachinePointerInfo callPtrInfo(const StringRef &Name);
+ MachinePointerInfo callPtrInfo(StringRef Name);
/// \brief Create a MachinePointerInfo that has a MipsCallEntr object
/// representing a GOT entry for a global function.
@@ -150,4 +150,4 @@ private:
} // end of namespace llvm
-#endif // MIPS_MACHINE_FUNCTION_INFO_H
+#endif
diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.h b/lib/Target/Mips/MipsModuleISelDAGToDAG.h
index f7a03104880b..85bae47aa08e 100644
--- a/lib/Target/Mips/MipsModuleISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsModuleISelDAGToDAG.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSMODULEISELDAGTODAG_H
-#define MIPSMODULEISELDAGTODAG_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSMODULEISELDAGTODAG_H
+#define LLVM_LIB_TARGET_MIPS_MIPSMODULEISELDAGTODAG_H
#include "Mips.h"
#include "MipsSubtarget.h"
diff --git a/lib/Target/Mips/MipsOptimizePICCall.cpp b/lib/Target/Mips/MipsOptimizePICCall.cpp
index c234049ed125..22c524ea4370 100644
--- a/lib/Target/Mips/MipsOptimizePICCall.cpp
+++ b/lib/Target/Mips/MipsOptimizePICCall.cpp
@@ -130,7 +130,7 @@ static MVT::SimpleValueType getRegTy(unsigned Reg, MachineFunction &MF) {
static void setCallTargetReg(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I) {
MachineFunction &MF = *MBB->getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
unsigned SrcReg = I->getOperand(0).getReg();
unsigned DstReg = getRegTy(SrcReg, MF) == MVT::i32 ? Mips::T9 : Mips::T9_64;
BuildMI(*MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), DstReg)
diff --git a/lib/Target/Mips/MipsOptionRecord.h b/lib/Target/Mips/MipsOptionRecord.h
index c0abce3eaddb..f82544ae43ae 100644
--- a/lib/Target/Mips/MipsOptionRecord.h
+++ b/lib/Target/Mips/MipsOptionRecord.h
@@ -17,19 +17,16 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSOPTIONRECORD_H
-#define MIPSOPTIONRECORD_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSOPTIONRECORD_H
+#define LLVM_LIB_TARGET_MIPS_MIPSOPTIONRECORD_H
-#include "MipsMCTargetDesc.h"
+#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCRegisterInfo.h"
-using namespace llvm;
-
namespace llvm {
class MipsELFStreamer;
class MCSubtargetInfo;
-}
class MipsOptionRecord {
public:
@@ -58,7 +55,7 @@ public:
}
~MipsRegInfoRecord() {}
- void EmitMipsOptionRecord();
+ void EmitMipsOptionRecord() override;
void SetPhysRegUsed(unsigned Reg, const MCRegisterInfo *MCRegInfo);
private:
@@ -77,4 +74,5 @@ private:
uint32_t ri_cprmask[4];
int64_t ri_gp_value;
};
+} // namespace llvm
#endif
diff --git a/lib/Target/Mips/MipsOs16.h b/lib/Target/Mips/MipsOs16.h
index 55e5a81daf99..77183ece7b9f 100644
--- a/lib/Target/Mips/MipsOs16.h
+++ b/lib/Target/Mips/MipsOs16.h
@@ -11,16 +11,14 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSOS16_H
+#define LLVM_LIB_TARGET_MIPS_MIPSOS16_H
+
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "MipsTargetMachine.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetMachine.h"
-
-
-#ifndef MIPSOS16_H
-#define MIPSOS16_H
-
using namespace llvm;
namespace llvm {
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index a6169705fe01..20ef3f387dcc 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -62,7 +62,7 @@ MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case Mips::GPR32RegClassID:
case Mips::GPR64RegClassID:
case Mips::DSPRRegClassID: {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
return 28 - TFI->hasFP(MF);
}
case Mips::FGR32RegClassID:
@@ -167,7 +167,7 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(*Reg);
}
// Reserve FP if this function should have a dedicated frame pointer register.
- if (MF.getTarget().getFrameLowering()->hasFP(MF)) {
+ if (MF.getSubtarget().getFrameLowering()->hasFP(MF)) {
if (Subtarget.inMips16Mode())
Reserved.set(Mips::S0);
else {
@@ -256,7 +256,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned MipsRegisterInfo::
getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
bool IsN64 = Subtarget.isABI_N64();
if (Subtarget.inMips16Mode())
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index b34496fd8e12..9ec4a38862dd 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSREGISTERINFO_H
-#define MIPSREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSREGISTERINFO_H
+#define LLVM_LIB_TARGET_MIPS_MIPSREGISTERINFO_H
#include "Mips.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 74dfa4fe7d9c..2b3b6c1e5242 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -212,8 +212,13 @@ let Namespace = "Mips" in {
// PC register
def PC : Register<"pc">;
- // Hardware register $29
- foreach I = 0-31 in
+ // Hardware registers
+ def HWR0 : MipsReg<0, "hwr_cpunum">;
+ def HWR1 : MipsReg<1, "hwr_synci_step">;
+ def HWR2 : MipsReg<2, "hwr_cc">;
+ def HWR3 : MipsReg<3, "hwr_ccres">;
+
+ foreach I = 4-31 in
def HWR#I : MipsReg<#I, ""#I>;
// Accum registers
@@ -283,6 +288,20 @@ class GPR32Class<list<ValueType> regTypes> :
def GPR32 : GPR32Class<[i32]>;
def DSPR : GPR32Class<[v4i8, v2i16]>;
+def GPRMM16 : RegisterClass<"Mips", [i32], 32, (add
+ // Callee save
+ S0, S1,
+ // Return Values and Arguments
+ V0, V1, A0, A1, A2, A3)>;
+
+def GPRMM16Zero : RegisterClass<"Mips", [i32], 32, (add
+ // Reserved
+ ZERO,
+ // Callee save
+ S1,
+ // Return Values and Arguments
+ V0, V1, A0, A1, A2, A3)>;
+
def GPR64 : RegisterClass<"Mips", [i64], 64, (add
// Reserved
ZERO_64, AT_64,
@@ -417,7 +436,7 @@ def OCTEON_P : RegisterClass<"Mips", [i64], 64, (add P0, P1, P2)>,
// Register Operands.
class MipsAsmRegOperand : AsmOperandClass {
- let ParserMethod = "ParseAnyRegister";
+ let ParserMethod = "parseAnyRegister";
}
def GPR64AsmOperand : MipsAsmRegOperand {
@@ -430,6 +449,16 @@ def GPR32AsmOperand : MipsAsmRegOperand {
let PredicateMethod = "isGPRAsmReg";
}
+def GPRMM16AsmOperand : MipsAsmRegOperand {
+ let Name = "GPRMM16AsmReg";
+ let PredicateMethod = "isMM16AsmReg";
+}
+
+def GPRMM16AsmOperandZero : MipsAsmRegOperand {
+ let Name = "GPRMM16AsmRegZero";
+ let PredicateMethod = "isMM16AsmRegZero";
+}
+
def ACC64DSPAsmOperand : MipsAsmRegOperand {
let Name = "ACC64DSPAsmReg";
let PredicateMethod = "isACCAsmReg";
@@ -485,6 +514,14 @@ def GPR32Opnd : RegisterOperand<GPR32> {
let ParserMatchClass = GPR32AsmOperand;
}
+def GPRMM16Opnd : RegisterOperand<GPRMM16> {
+ let ParserMatchClass = GPRMM16AsmOperand;
+}
+
+def GPRMM16OpndZero : RegisterOperand<GPRMM16Zero> {
+ let ParserMatchClass = GPRMM16AsmOperandZero;
+}
+
def GPR64Opnd : RegisterOperand<GPR64> {
let ParserMatchClass = GPR64AsmOperand;
}
@@ -578,4 +615,3 @@ def MSA128DOpnd : RegisterOperand<MSA128D> {
def MSA128CROpnd : RegisterOperand<MSACtrl> {
let ParserMatchClass = MSACtrlAsmOperand;
}
-
diff --git a/lib/Target/Mips/MipsRelocations.h b/lib/Target/Mips/MipsRelocations.h
deleted file mode 100644
index 0787ed399d5f..000000000000
--- a/lib/Target/Mips/MipsRelocations.h
+++ /dev/null
@@ -1,41 +0,0 @@
-//===-- MipsRelocations.h - Mips Code Relocations ---------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Mips target-specific relocation types
-// (for relocation-model=static).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MIPSRELOCATIONS_H_
-#define MIPSRELOCATIONS_H_
-
-#include "llvm/CodeGen/MachineRelocation.h"
-
-namespace llvm {
- namespace Mips{
- enum RelocationType {
- // reloc_mips_pc16 - pc relative relocation for branches. The lower 18
- // bits of the difference between the branch target and the branch
- // instruction, shifted right by 2.
- reloc_mips_pc16 = 1,
-
- // reloc_mips_hi - upper 16 bits of the address (modified by +1 if the
- // lower 16 bits of the address is negative).
- reloc_mips_hi = 2,
-
- // reloc_mips_lo - lower 16 bits of the address.
- reloc_mips_lo = 3,
-
- // reloc_mips_26 - lower 28 bits of the address, shifted right by 2.
- reloc_mips_26 = 4
- };
- }
-}
-
-#endif /* MIPSRELOCATIONS_H_ */
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 4825e1e569ee..97d9edfe6a5c 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -147,9 +147,9 @@ void ExpandPseudo::expandLoadCCond(MachineBasicBlock &MBB, Iter I) {
assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
- const MipsRegisterInfo &RegInfo =
- *static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
const TargetRegisterClass *RC = RegInfo.intRegClass(4);
unsigned VR = MRI.createVirtualRegister(RC);
@@ -167,9 +167,9 @@ void ExpandPseudo::expandStoreCCond(MachineBasicBlock &MBB, Iter I) {
assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
- const MipsRegisterInfo &RegInfo =
- *static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
const TargetRegisterClass *RC = RegInfo.intRegClass(4);
unsigned VR = MRI.createVirtualRegister(RC);
@@ -190,9 +190,9 @@ void ExpandPseudo::expandLoadACC(MachineBasicBlock &MBB, Iter I,
assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
- const MipsRegisterInfo &RegInfo =
- *static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
unsigned VR0 = MRI.createVirtualRegister(RC);
@@ -220,9 +220,9 @@ void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I,
assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
- const MipsRegisterInfo &RegInfo =
- *static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
unsigned VR0 = MRI.createVirtualRegister(RC);
@@ -255,9 +255,9 @@ bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I,
// copy dst_hi, $vr1
const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
- const MipsRegisterInfo &RegInfo =
- *static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg();
unsigned VRegSize = RegInfo.getMinimalPhysRegClass(Dst)->getSize() / 2;
@@ -303,10 +303,10 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB,
const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) ||
(FP64 && !Subtarget.useOddSPReg())) {
- const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo*>(TM.getInstrInfo());
- const MipsRegisterInfo &TRI =
- *static_cast<const MipsRegisterInfo*>(TM.getRegisterInfo());
+ const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
+ const MipsRegisterInfo &TRI = *static_cast<const MipsRegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
unsigned DstReg = I->getOperand(0).getReg();
unsigned LoReg = I->getOperand(1).getReg();
@@ -363,10 +363,10 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) ||
(FP64 && !Subtarget.useOddSPReg())) {
- const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo *>(TM.getInstrInfo());
- const MipsRegisterInfo &TRI =
- *static_cast<const MipsRegisterInfo *>(TM.getRegisterInfo());
+ const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
+ const MipsRegisterInfo &TRI = *static_cast<const MipsRegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
unsigned DstReg = I->getOperand(0).getReg();
unsigned SrcReg = I->getOperand(1).getReg();
@@ -415,9 +415,9 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
- const MipsRegisterInfo &RegInfo =
- *static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
MachineBasicBlock::iterator MBBI = MBB.begin();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -550,9 +550,9 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
- const MipsRegisterInfo &RegInfo =
- *static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
DebugLoc dl = MBBI->getDebugLoc();
unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
@@ -605,7 +605,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
MachineBasicBlock *EntryBlock = MF->begin();
- const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
// Add the callee-saved register as live-in. Do not add if the register is
@@ -646,7 +646,7 @@ void MipsSEFrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
if (!hasReservedCallFrame(MF)) {
int64_t Amount = I->getOperand(0).getImm();
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index e832848754dc..0eca1df12b0f 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSSE_FRAMEINFO_H
-#define MIPSSE_FRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSSEFRAMELOWERING_H
+#define LLVM_LIB_TARGET_MIPS_MIPSSEFRAMELOWERING_H
#include "MipsFrameLowering.h"
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 47e193191c3e..70963821690c 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -130,15 +130,12 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator I = MBB.begin();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
const TargetRegisterClass *RC;
- if (Subtarget->isABI_N64())
- RC = (const TargetRegisterClass*)&Mips::GPR64RegClass;
- else
- RC = (const TargetRegisterClass*)&Mips::GPR32RegClass;
+ RC = (Subtarget->isABI_N64()) ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
V0 = RegInfo.createVirtualRegister(RC);
V1 = RegInfo.createVirtualRegister(RC);
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 57328d2f9270..2e11fa7282a7 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSSEISELDAGTODAG_H
-#define MIPSSEISELDAGTODAG_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSSEISELDAGTODAG_H
+#define LLVM_LIB_TARGET_MIPS_MIPSSEISELDAGTODAG_H
#include "MipsISelDAGToDAG.h"
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index d7452e281e36..29aac2e276b0 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
#include "MipsSEISelLowering.h"
+#include "MipsMachineFunction.h"
#include "MipsRegisterInfo.h"
#include "MipsTargetMachine.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -34,7 +35,7 @@ static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
"stores to their single precision "
"counterparts"));
-MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM,
+MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
const MipsSubtarget &STI)
: MipsTargetLowering(TM, STI) {
// Set up the register classes
@@ -45,17 +46,13 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM,
if (Subtarget.hasDSP() || Subtarget.hasMSA()) {
// Expand all truncating stores and extending loads.
- unsigned FirstVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- unsigned LastVT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
-
- for (unsigned VT0 = FirstVT; VT0 <= LastVT; ++VT0) {
- for (unsigned VT1 = FirstVT; VT1 <= LastVT; ++VT1)
- setTruncStoreAction((MVT::SimpleValueType)VT0,
- (MVT::SimpleValueType)VT1, Expand);
-
- setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT0, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT0, Expand);
- setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT0, Expand);
+ for (MVT VT0 : MVT::vector_valuetypes()) {
+ for (MVT VT1 : MVT::vector_valuetypes()) {
+ setTruncStoreAction(VT0, VT1, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand);
+ }
}
}
@@ -127,6 +124,8 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM,
if (Subtarget.isGP64bit()) {
setOperationAction(ISD::MULHS, MVT::i64, Custom);
setOperationAction(ISD::MULHU, MVT::i64, Custom);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
}
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
@@ -134,8 +133,6 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM,
setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
- setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
- setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
@@ -227,7 +224,7 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM,
}
const MipsTargetLowering *
-llvm::createMipsSETargetLowering(MipsTargetMachine &TM,
+llvm::createMipsSETargetLowering(const MipsTargetMachine &TM,
const MipsSubtarget &STI) {
return new MipsSETargetLowering(TM, STI);
}
@@ -329,9 +326,10 @@ addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
}
bool
-MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
- unsigned,
- bool *Fast) const {
+MipsSETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned,
+ unsigned,
+ bool *Fast) const {
MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
if (Subtarget.systemSupportsUnalignedAccess()) {
@@ -577,10 +575,9 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) ||
Log2 == ExtendTySize) {
SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 };
- DAG.MorphNodeTo(Op0.getNode(), MipsISD::VEXTRACT_ZEXT_ELT,
- Op0->getVTList(),
- makeArrayRef(Ops, Op0->getNumOperands()));
- return Op0;
+ return DAG.getNode(MipsISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0),
+ Op0->getVTList(),
+ makeArrayRef(Ops, Op0->getNumOperands()));
}
}
@@ -922,10 +919,9 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
TotalBits <= 32)) {
SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1),
Op0Op0->getOperand(2) };
- DAG.MorphNodeTo(Op0Op0.getNode(), MipsISD::VEXTRACT_SEXT_ELT,
- Op0Op0->getVTList(),
- makeArrayRef(Ops, Op0Op0->getNumOperands()));
- return Op0Op0;
+ return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0),
+ Op0Op0->getVTList(),
+ makeArrayRef(Ops, Op0Op0->getNumOperands()));
}
}
}
@@ -1186,10 +1182,12 @@ void MipsSETargetLowering::
getOpndList(SmallVectorImpl<SDValue> &Ops,
std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
- CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
+ SDValue Chain) const {
Ops.push_back(Callee);
MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
- InternalLinkage, CLI, Callee, Chain);
+ InternalLinkage, IsCallReloc, CLI, Callee,
+ Chain);
}
SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
@@ -1245,13 +1243,13 @@ SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// i32 store to lower address.
Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(),
Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(),
- Nd.getTBAAInfo());
+ Nd.getAAInfo());
// i32 store to higher address.
Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT));
return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(),
Nd.isVolatile(), Nd.isNonTemporal(),
- std::min(Nd.getAlignment(), 4U), Nd.getTBAAInfo());
+ std::min(Nd.getAlignment(), 4U), Nd.getAAInfo());
}
SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
@@ -2745,7 +2743,8 @@ emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
// $vr0 = phi($vr2, $fbb, $vr1, $tbb)
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
const TargetRegisterClass *RC = &Mips::GPR32RegClass;
DebugLoc DL = MI->getDebugLoc();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -2810,7 +2809,8 @@ emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB,
// $rd = phi($rd1, $fbb, $rd2, $tbb)
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
const TargetRegisterClass *RC = &Mips::GPR32RegClass;
DebugLoc DL = MI->getDebugLoc();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -2871,7 +2871,8 @@ emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB,
// for lane 1 because it would require FR=0 mode which isn't supported by MSA.
MachineBasicBlock * MipsSETargetLowering::
emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned Fd = MI->getOperand(0).getReg();
@@ -2905,7 +2906,8 @@ MachineBasicBlock * MipsSETargetLowering::
emitCOPY_FD(MachineInstr *MI, MachineBasicBlock *BB) const{
assert(Subtarget.isFP64bit());
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
unsigned Fd = MI->getOperand(0).getReg();
unsigned Ws = MI->getOperand(1).getReg();
@@ -2934,7 +2936,8 @@ emitCOPY_FD(MachineInstr *MI, MachineBasicBlock *BB) const{
MachineBasicBlock *
MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned Wd = MI->getOperand(0).getReg();
@@ -2968,7 +2971,8 @@ MipsSETargetLowering::emitINSERT_FD(MachineInstr *MI,
MachineBasicBlock *BB) const {
assert(Subtarget.isFP64bit());
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned Wd = MI->getOperand(0).getReg();
@@ -3016,7 +3020,8 @@ MipsSETargetLowering::emitINSERT_DF_VIDX(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned EltSizeInBytes,
bool IsFP) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned Wd = MI->getOperand(0).getReg();
@@ -3126,7 +3131,8 @@ MipsSETargetLowering::emitINSERT_DF_VIDX(MachineInstr *MI,
MachineBasicBlock *
MipsSETargetLowering::emitFILL_FW(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned Wd = MI->getOperand(0).getReg();
@@ -3157,7 +3163,8 @@ MipsSETargetLowering::emitFILL_FD(MachineInstr *MI,
MachineBasicBlock *BB) const {
assert(Subtarget.isFP64bit());
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned Wd = MI->getOperand(0).getReg();
@@ -3185,7 +3192,8 @@ MipsSETargetLowering::emitFILL_FD(MachineInstr *MI,
MachineBasicBlock *
MipsSETargetLowering::emitFEXP2_W_1(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
const TargetRegisterClass *RC = &Mips::MSA128WRegClass;
unsigned Ws1 = RegInfo.createVirtualRegister(RC);
@@ -3214,7 +3222,8 @@ MipsSETargetLowering::emitFEXP2_W_1(MachineInstr *MI,
MachineBasicBlock *
MipsSETargetLowering::emitFEXP2_D_1(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
const TargetRegisterClass *RC = &Mips::MSA128DRegClass;
unsigned Ws1 = RegInfo.createVirtualRegister(RC);
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
index 2121b3bcaa51..d44f8d82ec3e 100644
--- a/lib/Target/Mips/MipsSEISelLowering.h
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSSEISELLOWERING_H
-#define MIPSSEISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSSEISELLOWERING_H
+#define LLVM_LIB_TARGET_MIPS_MIPSSEISELLOWERING_H
#include "MipsISelLowering.h"
#include "MipsRegisterInfo.h"
@@ -20,7 +20,7 @@
namespace llvm {
class MipsSETargetLowering : public MipsTargetLowering {
public:
- explicit MipsSETargetLowering(MipsTargetMachine &TM,
+ explicit MipsSETargetLowering(const MipsTargetMachine &TM,
const MipsSubtarget &STI);
/// \brief Enable MSA support for the given integer type and Register
@@ -31,8 +31,9 @@ namespace llvm {
void addMSAFloatType(MVT::SimpleValueType Ty,
const TargetRegisterClass *RC);
- bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS = 0,
- bool *Fast = nullptr) const override;
+ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS = 0,
+ unsigned Align = 1,
+ bool *Fast = nullptr) const override;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
@@ -58,7 +59,7 @@ namespace llvm {
getOpndList(SmallVectorImpl<SDValue> &Ops,
std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
- CallLoweringInfo &CLI, SDValue Callee,
+ bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
SDValue Chain) const override;
SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
@@ -113,4 +114,4 @@ namespace llvm {
};
}
-#endif // MipsSEISELLOWERING_H
+#endif
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index 69cb74cb1e49..74f291f609fd 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
MipsSEInstrInfo::MipsSEInstrInfo(const MipsSubtarget &STI)
: MipsInstrInfo(STI, STI.getRelocationModel() == Reloc::PIC_ ? Mips::B
: Mips::J),
- RI(STI), IsN64(STI.isABI_N64()) {}
+ RI(STI) {}
const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const {
return RI;
@@ -38,9 +38,8 @@ const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const {
/// the destination along with the FrameIndex of the loaded stack slot. If
/// not, return 0. This predicate must return 0 if the instruction has
/// any side effects other than loading from the stack slot.
-unsigned MipsSEInstrInfo::
-isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
-{
+unsigned MipsSEInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
unsigned Opc = MI->getOpcode();
if ((Opc == Mips::LW) || (Opc == Mips::LD) ||
@@ -61,9 +60,8 @@ isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
/// the source reg along with the FrameIndex of the loaded stack slot. If
/// not, return 0. This predicate must return 0 if the instruction has
/// any side effects other than storing to the stack slot.
-unsigned MipsSEInstrInfo::
-isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
-{
+unsigned MipsSEInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
unsigned Opc = MI->getOpcode();
if ((Opc == Mips::SW) || (Opc == Mips::SD) ||
@@ -352,6 +350,8 @@ unsigned MipsSEInstrInfo::getOppositeBranchOpc(unsigned Opc) const {
case Mips::BLEZ64: return Mips::BGTZ64;
case Mips::BC1T: return Mips::BC1F;
case Mips::BC1F: return Mips::BC1T;
+ case Mips::BEQZC_MM: return Mips::BNEZC_MM;
+ case Mips::BNEZC_MM: return Mips::BEQZC_MM;
}
}
@@ -422,7 +422,7 @@ unsigned MipsSEInstrInfo::getAnalyzableBrOpc(unsigned Opc) const {
Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 ||
Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 ||
Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B ||
- Opc == Mips::J) ?
+ Opc == Mips::J || Opc == Mips::BEQZC_MM || Opc == Mips::BNEZC_MM) ?
Opc : 0;
}
@@ -620,15 +620,13 @@ void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB,
// jr $ra (via RetRA)
const TargetMachine &TM = MBB.getParent()->getTarget();
if (TM.getRelocationModel() == Reloc::PIC_)
- BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), T9)
+ BuildMI(MBB, I, I->getDebugLoc(), get(ADDU), T9)
.addReg(TargetReg)
.addReg(ZERO);
- BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), RA)
+ BuildMI(MBB, I, I->getDebugLoc(), get(ADDU), RA)
.addReg(TargetReg)
.addReg(ZERO);
- BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), SP)
- .addReg(SP)
- .addReg(OffsetReg);
+ BuildMI(MBB, I, I->getDebugLoc(), get(ADDU), SP).addReg(SP).addReg(OffsetReg);
expandRetRA(MBB, I);
}
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index 9576fef1bd92..d16fab2e92fa 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSSEINSTRUCTIONINFO_H
-#define MIPSSEINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSSEINSTRINFO_H
+#define LLVM_LIB_TARGET_MIPS_MIPSSEINSTRINFO_H
#include "MipsInstrInfo.h"
#include "MipsSERegisterInfo.h"
@@ -21,7 +21,6 @@ namespace llvm {
class MipsSEInstrInfo : public MipsInstrInfo {
const MipsSERegisterInfo RI;
- bool IsN64;
public:
explicit MipsSEInstrInfo(const MipsSubtarget &STI);
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index 0af1a6b9e5c3..55c6638b45da 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -172,7 +172,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
unsigned Reg = RegInfo.createVirtualRegister(RC);
const MipsSEInstrInfo &TII =
*static_cast<const MipsSEInstrInfo *>(
- MBB.getParent()->getTarget().getInstrInfo());
+ MBB.getParent()->getSubtarget().getInstrInfo());
BuildMI(MBB, II, DL, TII.get(ADDiu), Reg).addReg(FrameReg).addImm(Offset);
FrameReg = Reg;
@@ -187,7 +187,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
unsigned NewImm = 0;
const MipsSEInstrInfo &TII =
*static_cast<const MipsSEInstrInfo *>(
- MBB.getParent()->getTarget().getInstrInfo());
+ MBB.getParent()->getSubtarget().getInstrInfo());
unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL,
OffsetBitSize == 16 ? &NewImm : nullptr);
BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(FrameReg)
diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h
index f2f3a7e75fd8..6b70d07b20dc 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.h
+++ b/lib/Target/Mips/MipsSERegisterInfo.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSSEREGISTERINFO_H
-#define MIPSSEREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSSEREGISTERINFO_H
+#define LLVM_LIB_TARGET_MIPS_MIPSSEREGISTERINFO_H
#include "MipsRegisterInfo.h"
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h
index 2b3d527fe6ff..061423fbeb86 100644
--- a/lib/Target/Mips/MipsSelectionDAGInfo.h
+++ b/lib/Target/Mips/MipsSelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSSELECTIONDAGINFO_H
-#define MIPSSELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_MIPS_MIPSSELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 9a1c40902e8a..d035ebec4a54 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -58,6 +58,10 @@ Mips16ConstantIslands(
cl::desc("MIPS: mips16 constant islands enable."),
cl::init(true));
+static cl::opt<bool>
+GPOpt("mgpopt", cl::Hidden,
+ cl::desc("MIPS: Enable gp-relative addressing of small data items"));
+
/// Select the Mips CPU for the given triple and cpu name.
/// FIXME: Merge with the copy in MipsMCTargetDesc.cpp
static StringRef selectMipsCPU(Triple TT, StringRef CPU) {
@@ -104,34 +108,30 @@ static std::string computeDataLayout(const MipsSubtarget &ST) {
MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool little,
- MipsTargetMachine *_TM)
- : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(Mips32),
+ const MipsTargetMachine &TM)
+ : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(MipsDefault),
ABI(MipsABIInfo::Unknown()), IsLittle(little), IsSingleFloat(false),
IsFPXX(false), NoABICalls(false), IsFP64bit(false), UseOddSPReg(true),
IsNaN2008bit(false), IsGP64bit(false), HasVFPU(false), HasCnMips(false),
- IsLinux(true), HasMips3_32(false), HasMips3_32r2(false),
- HasMips4_32(false), HasMips4_32r2(false), HasMips5_32r2(false),
- InMips16Mode(false), InMips16HardFloat(Mips16HardFloat),
- InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
- AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
- HasMSA(false), TM(_TM), TargetTriple(TT),
+ HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false),
+ HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
+ InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
+ HasDSPR2(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
+ HasMSA(false), TM(TM), TargetTriple(TT),
DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS, TM))),
- TSInfo(DL), JITInfo(), InstrInfo(MipsInstrInfo::create(*this)),
+ TSInfo(DL), InstrInfo(MipsInstrInfo::create(*this)),
FrameLowering(MipsFrameLowering::create(*this)),
- TLInfo(MipsTargetLowering::create(*TM, *this)) {
+ TLInfo(MipsTargetLowering::create(TM, *this)) {
PreviousInMips16Mode = InMips16Mode;
- // Don't even attempt to generate code for MIPS-I, MIPS-II, MIPS-III, and
- // MIPS-V. They have not been tested and currently exist for the integrated
- // assembler only.
+ if (MipsArchVersion == MipsDefault)
+ MipsArchVersion = Mips32;
+
+ // Don't even attempt to generate code for MIPS-I and MIPS-V. They have not
+ // been tested and currently exist for the integrated assembler only.
if (MipsArchVersion == Mips1)
report_fatal_error("Code generation for MIPS-I is not implemented", false);
- if (MipsArchVersion == Mips2)
- report_fatal_error("Code generation for MIPS-II is not implemented", false);
- if (MipsArchVersion == Mips3)
- report_fatal_error("Code generation for MIPS-III is not implemented",
- false);
if (MipsArchVersion == Mips5)
report_fatal_error("Code generation for MIPS-V is not implemented", false);
@@ -167,14 +167,16 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
report_fatal_error(ISA + " is not compatible with the DSP ASE", false);
}
- // Is the target system Linux ?
- if (TT.find("linux") == std::string::npos)
- IsLinux = false;
+ if (NoABICalls && TM.getRelocationModel() == Reloc::PIC_)
+ report_fatal_error("position-independent code requires '-mabicalls'");
// Set UseSmallSection.
- // TODO: Investigate the IsLinux check. I suspect it's really checking for
- // bare-metal.
- UseSmallSection = !IsLinux && (TM->getRelocationModel() == Reloc::Static);
+ UseSmallSection = GPOpt;
+ if (!NoABICalls && GPOpt) {
+ errs() << "warning: cannot use small-data accesses for '-mabicalls'"
+ << "\n";
+ UseSmallSection = false;
+ }
}
/// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
@@ -192,7 +194,7 @@ CodeGenOpt::Level MipsSubtarget::getOptLevelToEnablePostRAScheduler() const {
MipsSubtarget &
MipsSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS,
- const TargetMachine *TM) {
+ const TargetMachine &TM) {
std::string CPUName = selectMipsCPU(TargetTriple, CPU);
// Parse features string.
@@ -200,14 +202,14 @@ MipsSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS,
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUName);
- if (InMips16Mode && !TM->Options.UseSoftFloat)
+ if (InMips16Mode && !TM.Options.UseSoftFloat)
InMips16HardFloat = true;
return *this;
}
bool MipsSubtarget::abiUsesSoftFloat() const {
- return TM->Options.UseSoftFloat && !InMips16HardFloat;
+ return TM.Options.UseSoftFloat && !InMips16HardFloat;
}
bool MipsSubtarget::useConstantIslands() {
@@ -216,5 +218,5 @@ bool MipsSubtarget::useConstantIslands() {
}
Reloc::Model MipsSubtarget::getRelocationModel() const {
- return TM->getRelocationModel();
+ return TM.getRelocationModel();
}
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index c5b9a3e754d6..aca69a6839b3 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -11,19 +11,18 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSSUBTARGET_H
-#define MIPSSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSSUBTARGET_H
+#define LLVM_LIB_TARGET_MIPS_MIPSSUBTARGET_H
+#include "MCTargetDesc/MipsABIInfo.h"
#include "MipsFrameLowering.h"
#include "MipsISelLowering.h"
#include "MipsInstrInfo.h"
-#include "MipsJITInfo.h"
#include "MipsSelectionDAGInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include "MipsABIInfo.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
@@ -38,6 +37,7 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
virtual void anchor();
enum MipsArchEnum {
+ MipsDefault,
Mips1, Mips2, Mips32, Mips32r2, Mips32r6, Mips3, Mips4, Mips5, Mips64,
Mips64r2, Mips64r6
};
@@ -136,13 +136,12 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// as from the command line
enum {NoOverride, Mips16Override, NoMips16Override} OverrideMode;
- MipsTargetMachine *TM;
+ const MipsTargetMachine &TM;
Triple TargetTriple;
const DataLayout DL; // Calculates type size & alignment
const MipsSelectionDAGInfo TSInfo;
- MipsJITInfo JITInfo;
std::unique_ptr<const MipsInstrInfo> InstrInfo;
std::unique_ptr<const MipsFrameLowering> FrameLowering;
std::unique_ptr<const MipsTargetLowering> TLInfo;
@@ -164,7 +163,8 @@ public:
/// This constructor initializes the data members to match that
/// of the specified triple.
MipsSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool little, MipsTargetMachine *TM);
+ const std::string &FS, bool little,
+ const MipsTargetMachine &TM);
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
@@ -223,7 +223,6 @@ public:
bool hasDSP() const { return HasDSP; }
bool hasDSPR2() const { return HasDSPR2; }
bool hasMSA() const { return HasMSA; }
- bool isLinux() const { return IsLinux; }
bool useSmallSection() const { return UseSmallSection; }
bool hasStandardEncoding() const { return !inMips16Mode(); }
@@ -238,7 +237,6 @@ public:
bool hasExtractInsert() const { return !inMips16Mode() && hasMips32r2(); }
bool hasMTHC1() const { return hasMips32r2(); }
- const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
bool allowMixed16_32() const { return inMips16ModeDefault() |
AllowMixed16_32;}
@@ -256,7 +254,7 @@ public:
Reloc::Model getRelocationModel() const;
MipsSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS,
- const TargetMachine *TM);
+ const TargetMachine &TM);
/// Does the system support unaligned memory access.
///
@@ -269,17 +267,23 @@ public:
void setHelperClassesMips16();
void setHelperClassesMipsSE();
- MipsJITInfo *getJITInfo() { return &JITInfo; }
- const MipsSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
- const DataLayout *getDataLayout() const { return &DL; }
- const MipsInstrInfo *getInstrInfo() const { return InstrInfo.get(); }
- const TargetFrameLowering *getFrameLowering() const {
+ const MipsSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const MipsInstrInfo *getInstrInfo() const override { return InstrInfo.get(); }
+ const TargetFrameLowering *getFrameLowering() const override {
return FrameLowering.get();
}
- const MipsRegisterInfo *getRegisterInfo() const {
+ const MipsRegisterInfo *getRegisterInfo() const override {
return &InstrInfo->getRegisterInfo();
}
- const MipsTargetLowering *getTargetLowering() const { return TLInfo.get(); }
+ const MipsTargetLowering *getTargetLowering() const override {
+ return TLInfo.get();
+ }
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
};
} // End llvm namespace
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index bb1870ebe605..426b71ddeafd 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -26,6 +26,7 @@
#include "MipsSEISelDAGToDAG.h"
#include "MipsSEISelLowering.h"
#include "MipsSEInstrInfo.h"
+#include "MipsTargetObjectFile.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/PassManager.h"
@@ -56,15 +57,20 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool isLittle)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(nullptr), DefaultSubtarget(TT, CPU, FS, isLittle, this),
+ isLittle(isLittle),
+ TLOF(make_unique<MipsTargetObjectFile>()),
+ Subtarget(nullptr),
+ DefaultSubtarget(TT, CPU, FS, isLittle, *this),
NoMips16Subtarget(TT, CPU, FS.empty() ? "-mips16" : FS.str() + ",-mips16",
- isLittle, this),
+ isLittle, *this),
Mips16Subtarget(TT, CPU, FS.empty() ? "+mips16" : FS.str() + ",+mips16",
- isLittle, this) {
+ isLittle, *this) {
Subtarget = &DefaultSubtarget;
initAsmInfo();
}
+MipsTargetMachine::~MipsTargetMachine() {}
+
void MipsebTargetMachine::anchor() { }
MipsebTargetMachine::
@@ -83,20 +89,60 @@ MipselTargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL)
: MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+const MipsSubtarget *
+MipsTargetMachine::getSubtargetImpl(const Function &F) const {
+ AttributeSet FnAttrs = F.getAttributes();
+ Attribute CPUAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+ Attribute FSAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+ bool hasMips16Attr =
+ !FnAttrs.getAttribute(AttributeSet::FunctionIndex, "mips16")
+ .hasAttribute(Attribute::None);
+ bool hasNoMips16Attr =
+ !FnAttrs.getAttribute(AttributeSet::FunctionIndex, "nomips16")
+ .hasAttribute(Attribute::None);
+
+ // FIXME: This is related to the code below to reset the target options,
+ // we need to know whether or not the soft float flag is set on the
+ // function before we can generate a subtarget. We also need to use
+ // it as a key for the subtarget since that can be the only difference
+ // between two functions.
+ Attribute SFAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "use-soft-float");
+ bool softFloat = !SFAttr.hasAttribute(Attribute::None)
+ ? SFAttr.getValueAsString() == "true"
+ : Options.UseSoftFloat;
+
+ if (hasMips16Attr)
+ FS += FS.empty() ? "+mips16" : ",+mips16";
+ else if (hasNoMips16Attr)
+ FS += FS.empty() ? "-mips16" : ",-mips16";
+
+ auto &I = SubtargetMap[CPU + FS + (softFloat ? "use-soft-float=true"
+ : "use-soft-float=false")];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = llvm::make_unique<MipsSubtarget>(TargetTriple, CPU, FS, isLittle, *this);
+ }
+ return I.get();
+}
+
void MipsTargetMachine::resetSubtarget(MachineFunction *MF) {
DEBUG(dbgs() << "resetSubtarget\n");
- AttributeSet FnAttrs = MF->getFunction()->getAttributes();
- bool Mips16Attr = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, "mips16");
- bool NoMips16Attr =
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex, "nomips16");
- assert(!(Mips16Attr && NoMips16Attr) &&
- "mips16 and nomips16 specified on the same function");
- if (Mips16Attr)
- Subtarget = &Mips16Subtarget;
- else if (NoMips16Attr)
- Subtarget = &NoMips16Subtarget;
- else
- Subtarget = &DefaultSubtarget;
+
+ Subtarget = const_cast<MipsSubtarget *>(getSubtargetImpl(*MF->getFunction()));
+ MF->setSubtarget(Subtarget);
return;
}
@@ -124,9 +170,9 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
void addMachineSSAOptimization() override;
- bool addPreEmitPass() override;
+ void addPreEmitPass() override;
- bool addPreRegAlloc() override;
+ void addPreRegAlloc() override;
};
} // namespace
@@ -137,11 +183,11 @@ TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) {
void MipsPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
+ addPass(createAtomicExpandPass(&getMipsTargetMachine()));
if (getMipsSubtarget().os16())
addPass(createMipsOs16(getMipsTargetMachine()));
if (getMipsSubtarget().inMips16HardFloat())
addPass(createMips16HardFloat(getMipsTargetMachine()));
- addPass(createPartiallyInlineLibCallsPass());
}
// Install an instruction selector pass using
// the ISelDag to gen Mips code.
@@ -157,13 +203,9 @@ void MipsPassConfig::addMachineSSAOptimization() {
TargetPassConfig::addMachineSSAOptimization();
}
-bool MipsPassConfig::addPreRegAlloc() {
- if (getOptLevel() == CodeGenOpt::None) {
+void MipsPassConfig::addPreRegAlloc() {
+ if (getOptLevel() == CodeGenOpt::None)
addPass(createMipsOptimizePICCallPass(getMipsTargetMachine()));
- return true;
- }
- else
- return false;
}
void MipsTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
@@ -182,17 +224,9 @@ void MipsTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
// Implemented by targets that want to run passes immediately before
// machine code is emitted. return true if -print-machineinstrs should
// print out the code after the passes.
-bool MipsPassConfig::addPreEmitPass() {
+void MipsPassConfig::addPreEmitPass() {
MipsTargetMachine &TM = getMipsTargetMachine();
addPass(createMipsDelaySlotFillerPass(TM));
addPass(createMipsLongBranchPass(TM));
addPass(createMipsConstantIslandPass(TM));
- return true;
-}
-
-bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM,
- JITCodeEmitter &JCE) {
- // Machine code emitter pass for Mips.
- PM.add(createMipsJITCodeEmitterPass(*this, JCE));
- return false;
}
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index bcf411f9cd6f..4b097a2cf02a 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSTARGETMACHINE_H
-#define MIPSTARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSTARGETMACHINE_H
+#define LLVM_LIB_TARGET_MIPS_MIPSTARGETMACHINE_H
#include "MipsSubtarget.h"
#include "llvm/CodeGen/Passes.h"
@@ -25,57 +25,42 @@ class formatted_raw_ostream;
class MipsRegisterInfo;
class MipsTargetMachine : public LLVMTargetMachine {
+ bool isLittle;
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
MipsSubtarget *Subtarget;
MipsSubtarget DefaultSubtarget;
MipsSubtarget NoMips16Subtarget;
MipsSubtarget Mips16Subtarget;
+ mutable StringMap<std::unique_ptr<MipsSubtarget>> SubtargetMap;
+
public:
MipsTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle);
-
- virtual ~MipsTargetMachine() {}
+ ~MipsTargetMachine() override;
void addAnalysisPasses(PassManagerBase &PM) override;
- const MipsInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const TargetFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
const MipsSubtarget *getSubtargetImpl() const override {
if (Subtarget)
return Subtarget;
return &DefaultSubtarget;
}
- const InstrItineraryData *getInstrItineraryData() const override {
- return Subtarget->inMips16Mode()
- ? nullptr
- : &getSubtargetImpl()->getInstrItineraryData();
- }
- MipsJITInfo *getJITInfo() override {
- return Subtarget->getJITInfo();
- }
- const MipsRegisterInfo *getRegisterInfo() const override {
- return getSubtargetImpl()->getRegisterInfo();
- }
- const MipsTargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
- const MipsSelectionDAGInfo* getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
+
+ const MipsSubtarget *getSubtargetImpl(const Function &F) const override;
+
/// \brief Reset the subtarget for the Mips target.
void resetSubtarget(MachineFunction *MF);
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) override;
+
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
+
+ bool isLittleEndian() const { return isLittle; }
};
/// MipsebTargetMachine - Mips32/64 big endian target machine.
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index 13f9408f4e6e..b56c39bf496c 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -24,6 +24,17 @@ SSThreshold("mips-ssection-threshold", cl::Hidden,
cl::desc("Small data and bss section threshold size (default=8)"),
cl::init(8));
+static cl::opt<bool>
+LocalSData("mlocal-sdata", cl::Hidden,
+ cl::desc("MIPS: Use gp_rel for object-local data."),
+ cl::init(true));
+
+static cl::opt<bool>
+ExternSData("mextern-sdata", cl::Hidden,
+ cl::desc("MIPS: Use gp_rel for data that is not defined by the "
+ "current object."),
+ cl::init(true));
+
void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(TM.Options.UseInitArray);
@@ -37,29 +48,46 @@ void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
ELF::SHF_WRITE |ELF::SHF_ALLOC,
SectionKind::getBSS());
+ this->TM = &TM;
}
// A address must be loaded from a small section if its size is less than the
// small section size threshold. Data in this section must be addressed using
// gp_rel operator.
static bool IsInSmallSection(uint64_t Size) {
+ // gcc has traditionally not treated zero-sized objects as small data, so this
+ // is effectively part of the ABI.
return Size > 0 && Size <= SSThreshold;
}
-bool MipsTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV,
- const TargetMachine &TM) const {
+/// Return true if this global address should be placed into small data/bss
+/// section.
+bool MipsTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM) const {
+ // We first check the case where global is a declaration, because finding
+ // section kind using getKindForGlobal() is only allowed for global
+ // definitions.
if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
- return false;
+ return IsGlobalInSmallSectionImpl(GV, TM);
return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
}
-/// IsGlobalInSmallSection - Return true if this global address should be
-/// placed into small data/bss section.
+/// Return true if this global address should be placed into small data/bss
+/// section.
bool MipsTargetObjectFile::
IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
SectionKind Kind) const {
+ return (IsGlobalInSmallSectionImpl(GV, TM) &&
+ (Kind.isDataRel() || Kind.isBSS() || Kind.isCommon()));
+}
+/// Return true if this global address should be placed into small data/bss
+/// section. This method does all the work, except for checking the section
+/// kind.
+bool MipsTargetObjectFile::
+IsGlobalInSmallSectionImpl(const GlobalValue *GV,
+ const TargetMachine &TM) const {
const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
// Return if small section is not available.
@@ -71,21 +99,20 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
if (!GVA)
return false;
- // We can only do this for datarel or BSS objects for now.
- if (!Kind.isBSS() && !Kind.isDataRel())
+ // Enforce -mlocal-sdata.
+ if (!LocalSData && GV->hasLocalLinkage())
return false;
- // If this is a internal constant string, there is a special
- // section for it, but not in small data/bss.
- if (Kind.isMergeable1ByteCString())
+ // Enforce -mextern-sdata.
+ if (!ExternSData && ((GV->hasExternalLinkage() && GV->isDeclaration()) ||
+ GV->hasCommonLinkage()))
return false;
Type *Ty = GV->getType()->getElementType();
- return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty));
+ return IsInSmallSection(
+ TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(Ty));
}
-
-
const MCSection *MipsTargetObjectFile::
SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler &Mang, const TargetMachine &TM) const {
@@ -95,9 +122,27 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// Handle Small Section classification here.
if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
return SmallBSSSection;
- if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+ if (Kind.isDataRel() && IsGlobalInSmallSection(GV, TM, Kind))
return SmallDataSection;
// Otherwise, we work the same as ELF.
return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
}
+
+/// Return true if this constant should be placed into small data section.
+bool MipsTargetObjectFile::
+IsConstantInSmallSection(const Constant *CN, const TargetMachine &TM) const {
+ return (TM.getSubtarget<MipsSubtarget>().useSmallSection() &&
+ LocalSData &&
+ IsInSmallSection(TM.getSubtargetImpl()->getDataLayout()
+ ->getTypeAllocSize(CN->getType())));
+}
+
+const MCSection *MipsTargetObjectFile::
+getSectionForConstant(SectionKind Kind, const Constant *C) const {
+ if (IsConstantInSmallSection(C, *TM))
+ return SmallDataSection;
+
+ // Otherwise, we work the same as ELF.
+ return TargetLoweringObjectFileELF::getSectionForConstant(Kind, C);
+}
diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h
index 2bf5a75be90c..3a2b298db329 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/lib/Target/Mips/MipsTargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
-#define LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_MIPS_MIPSTARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
@@ -17,21 +17,30 @@ namespace llvm {
class MipsTargetObjectFile : public TargetLoweringObjectFileELF {
const MCSection *SmallDataSection;
const MCSection *SmallBSSSection;
+ const TargetMachine *TM;
public:
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
-
- /// IsGlobalInSmallSection - Return true if this global address should be
- /// placed into small data/bss section.
- bool IsGlobalInSmallSection(const GlobalValue *GV,
- const TargetMachine &TM, SectionKind Kind)const;
+ /// Return true if this global address should be placed into small data/bss
+ /// section.
+ bool IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+ SectionKind Kind) const;
bool IsGlobalInSmallSection(const GlobalValue *GV,
const TargetMachine &TM) const;
+ bool IsGlobalInSmallSectionImpl(const GlobalValue *GV,
+ const TargetMachine &TM) const;
const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
SectionKind Kind, Mangler &Mang,
const TargetMachine &TM) const override;
+
+ /// Return true if this constant should be placed into small data section.
+ bool IsConstantInSmallSection(const Constant *CN,
+ const TargetMachine &TM) const;
+
+ const MCSection *getSectionForConstant(SectionKind Kind,
+ const Constant *C) const override;
};
} // end namespace llvm
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
index 99f7d4c92cfb..6dcd15acef05 100644
--- a/lib/Target/Mips/MipsTargetStreamer.h
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -7,12 +7,13 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSTARGETSTREAMER_H
-#define MIPSTARGETSTREAMER_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_MIPS_MIPSTARGETSTREAMER_H
+#include "MCTargetDesc/MipsABIFlagsSection.h"
#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
-#include "MCTargetDesc/MipsABIFlagsSection.h"
namespace llvm {
@@ -30,6 +31,8 @@ public:
virtual void emitDirectiveSetNoReorder();
virtual void emitDirectiveSetMacro();
virtual void emitDirectiveSetNoMacro();
+ virtual void emitDirectiveSetMsa();
+ virtual void emitDirectiveSetNoMsa();
virtual void emitDirectiveSetAt();
virtual void emitDirectiveSetNoAt();
virtual void emitDirectiveEnd(StringRef Name);
@@ -45,13 +48,26 @@ public:
virtual void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff);
virtual void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff);
+ virtual void emitDirectiveSetArch(StringRef Arch);
+ virtual void emitDirectiveSetMips0();
+ virtual void emitDirectiveSetMips1();
+ virtual void emitDirectiveSetMips2();
+ virtual void emitDirectiveSetMips3();
+ virtual void emitDirectiveSetMips4();
+ virtual void emitDirectiveSetMips5();
+ virtual void emitDirectiveSetMips32();
virtual void emitDirectiveSetMips32R2();
+ virtual void emitDirectiveSetMips32R6();
virtual void emitDirectiveSetMips64();
virtual void emitDirectiveSetMips64R2();
+ virtual void emitDirectiveSetMips64R6();
virtual void emitDirectiveSetDsp();
+ virtual void emitDirectiveSetNoDsp();
+ virtual void emitDirectiveSetPop();
+ virtual void emitDirectiveSetPush();
// PIC support
- virtual void emitDirectiveCpload(unsigned RegNo);
+ virtual void emitDirectiveCpLoad(unsigned RegNo);
virtual void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg);
@@ -72,8 +88,8 @@ public:
virtual void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI);
virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value){};
virtual void emitMipsAbiFlags(){};
- void setCanHaveModuleDir(bool Can) { canHaveModuleDirective = Can; }
- bool getCanHaveModuleDir() { return canHaveModuleDirective; }
+ void forbidModuleDirective() { ModuleDirectiveAllowed = false; }
+ bool isModuleDirectiveAllowed() { return ModuleDirectiveAllowed; }
// This method enables template classes to set internal abi flags
// structure values.
@@ -87,8 +103,21 @@ public:
protected:
MipsABIFlagsSection ABIFlagsSection;
+ bool GPRInfoSet;
+ unsigned GPRBitMask;
+ int GPROffset;
+
+ bool FPRInfoSet;
+ unsigned FPRBitMask;
+ int FPROffset;
+
+ bool FrameInfoSet;
+ int FrameOffset;
+ unsigned FrameReg;
+ unsigned ReturnReg;
+
private:
- bool canHaveModuleDirective;
+ bool ModuleDirectiveAllowed;
};
// This part is for ascii assembly output
@@ -106,6 +135,8 @@ public:
void emitDirectiveSetNoReorder() override;
void emitDirectiveSetMacro() override;
void emitDirectiveSetNoMacro() override;
+ void emitDirectiveSetMsa() override;
+ void emitDirectiveSetNoMsa() override;
void emitDirectiveSetAt() override;
void emitDirectiveSetNoAt() override;
void emitDirectiveEnd(StringRef Name) override;
@@ -121,13 +152,26 @@ public:
void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override;
void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) override;
+ void emitDirectiveSetArch(StringRef Arch) override;
+ void emitDirectiveSetMips0() override;
+ void emitDirectiveSetMips1() override;
+ void emitDirectiveSetMips2() override;
+ void emitDirectiveSetMips3() override;
+ void emitDirectiveSetMips4() override;
+ void emitDirectiveSetMips5() override;
+ void emitDirectiveSetMips32() override;
void emitDirectiveSetMips32R2() override;
+ void emitDirectiveSetMips32R6() override;
void emitDirectiveSetMips64() override;
void emitDirectiveSetMips64R2() override;
+ void emitDirectiveSetMips64R6() override;
void emitDirectiveSetDsp() override;
+ void emitDirectiveSetNoDsp() override;
+ void emitDirectiveSetPop() override;
+ void emitDirectiveSetPush() override;
// PIC support
- virtual void emitDirectiveCpload(unsigned RegNo);
+ void emitDirectiveCpLoad(unsigned RegNo) override;
void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg) override;
@@ -157,14 +201,8 @@ public:
void emitDirectiveSetMicroMips() override;
void emitDirectiveSetNoMicroMips() override;
void emitDirectiveSetMips16() override;
- void emitDirectiveSetNoMips16() override;
- void emitDirectiveSetReorder() override;
void emitDirectiveSetNoReorder() override;
- void emitDirectiveSetMacro() override;
- void emitDirectiveSetNoMacro() override;
- void emitDirectiveSetAt() override;
- void emitDirectiveSetNoAt() override;
void emitDirectiveEnd(StringRef Name) override;
void emitDirectiveEnt(const MCSymbol &Symbol) override;
@@ -178,13 +216,8 @@ public:
void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override;
void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) override;
- void emitDirectiveSetMips32R2() override;
- void emitDirectiveSetMips64() override;
- void emitDirectiveSetMips64R2() override;
- void emitDirectiveSetDsp() override;
-
// PIC support
- virtual void emitDirectiveCpload(unsigned RegNo);
+ void emitDirectiveCpLoad(unsigned RegNo) override;
void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg) override;
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 4e35b1811295..3a4a19dc3991 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -9,26 +9,28 @@ tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(NVPTXCommonTableGen)
set(NVPTXCodeGen_sources
+ NVPTXAllocaHoisting.cpp
+ NVPTXAsmPrinter.cpp
+ NVPTXAssignValidGlobalNames.cpp
NVPTXFavorNonGenericAddrSpaces.cpp
NVPTXFrameLowering.cpp
- NVPTXInstrInfo.cpp
+ NVPTXGenericToNVVM.cpp
NVPTXISelDAGToDAG.cpp
NVPTXISelLowering.cpp
+ NVPTXImageOptimizer.cpp
+ NVPTXInstrInfo.cpp
+ NVPTXLowerAggrCopies.cpp
+ NVPTXLowerStructArgs.cpp
+ NVPTXMCExpr.cpp
+ NVPTXPrologEpilogPass.cpp
NVPTXRegisterInfo.cpp
+ NVPTXReplaceImageHandles.cpp
NVPTXSubtarget.cpp
NVPTXTargetMachine.cpp
- NVPTXLowerAggrCopies.cpp
- NVPTXutil.cpp
- NVPTXAllocaHoisting.cpp
- NVPTXAsmPrinter.cpp
+ NVPTXTargetTransformInfo.cpp
NVPTXUtilities.cpp
+ NVPTXutil.cpp
NVVMReflect.cpp
- NVPTXGenericToNVVM.cpp
- NVPTXAssignValidGlobalNames.cpp
- NVPTXPrologEpilogPass.cpp
- NVPTXMCExpr.cpp
- NVPTXReplaceImageHandles.cpp
- NVPTXImageOptimizer.cpp
)
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index 1fb3c57390c2..04969642fd37 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTX_INST_PRINTER_H
-#define NVPTX_INST_PRINTER_H
+#ifndef LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H
+#define LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index 16ec19c25f16..a72ae2ef53a7 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -14,8 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXBASEINFO_H
-#define NVPTXBASEINFO_H
+#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXBASEINFO_H
+#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXBASEINFO_H
namespace llvm {
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 366341afe1b8..11d737ec187f 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -25,7 +25,7 @@ static cl::opt<bool> CompileForDebugging("debug-compile",
void NVPTXMCAsmInfo::anchor() {}
-NVPTXMCAsmInfo::NVPTXMCAsmInfo(const StringRef &TT) {
+NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) {
Triple TheTriple(TT);
if (TheTriple.getArch() == Triple::nvptx64) {
PointerSize = CalleeSaveStackSlotSize = 8;
@@ -33,8 +33,6 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const StringRef &TT) {
CommentString = "//";
- HasSetDirective = false;
-
HasSingleParameterDotFile = false;
InlineAsmStart = " inline asm";
@@ -52,5 +50,6 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const StringRef &TT) {
AscizDirective = " .b8";
// @TODO: Can we just disable this?
+ WeakDirective = "\t// .weak\t";
GlobalDirective = "\t// .globl\t";
}
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
index 7d1633f60d2c..c3242866b177 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTX_MCASM_INFO_H
-#define NVPTX_MCASM_INFO_H
+#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCASMINFO_H
+#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCASMINFO_H
#include "llvm/MC/MCAsmInfo.h"
@@ -23,8 +23,8 @@ class StringRef;
class NVPTXMCAsmInfo : public MCAsmInfo {
virtual void anchor();
public:
- explicit NVPTXMCAsmInfo(const StringRef &TT);
+ explicit NVPTXMCAsmInfo(StringRef TT);
};
} // namespace llvm
-#endif // NVPTX_MCASM_INFO_H
+#endif
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
index af95c76f92b2..98821d231378 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXMCTARGETDESC_H
-#define NVPTXMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H
+#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H
namespace llvm {
class Target;
diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h
index f9fb05922920..a2d670f8d39d 100644
--- a/lib/Target/NVPTX/ManagedStringPool.h
+++ b/lib/Target/NVPTX/ManagedStringPool.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SUPPORT_MANAGED_STRING_H
-#define LLVM_SUPPORT_MANAGED_STRING_H
+#ifndef LLVM_LIB_TARGET_NVPTX_MANAGEDSTRINGPOOL_H
+#define LLVM_LIB_TARGET_NVPTX_MANAGEDSTRINGPOOL_H
#include "llvm/ADT/SmallVector.h"
#include <string>
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index e74c808f8554..a3382eb00003 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_NVPTX_H
-#define LLVM_TARGET_NVPTX_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTX_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTX_H
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "llvm/ADT/StringMap.h"
@@ -59,8 +59,9 @@ inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
llvm_unreachable("Unknown condition code");
}
-FunctionPass *
-createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);
+ImmutablePass *createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM);
+FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
+ llvm::CodeGenOpt::Level OptLevel);
ModulePass *createNVPTXAssignValidGlobalNamesPass();
ModulePass *createGenericToNVVMPass();
FunctionPass *createNVPTXFavorNonGenericAddrSpacesPass();
@@ -69,6 +70,7 @@ ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping);
MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
+FunctionPass *createNVPTXLowerStructArgsPass();
bool isImageOrSamplerVal(const Value *, const Module *);
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
index 5b610687e391..69fc86e75414 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTX_ALLOCA_HOISTING_H_
-#define NVPTX_ALLOCA_HOISTING_H_
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/IR/DataLayout.h"
@@ -47,4 +47,4 @@ extern FunctionPass *createAllocaHoisting();
} // end namespace llvm
-#endif // NVPTX_ALLOCA_HOISTING_H_
+#endif
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 187b88c1d54a..beec9b22921d 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -17,8 +17,8 @@
#include "MCTargetDesc/NVPTXMCAsmInfo.h"
#include "NVPTX.h"
#include "NVPTXInstrInfo.h"
-#include "NVPTXMachineFunctionInfo.h"
#include "NVPTXMCExpr.h"
+#include "NVPTXMachineFunctionInfo.h"
#include "NVPTXRegisterInfo.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXUtilities.h"
@@ -88,12 +88,9 @@ void VisitGlobalVariableForEmission(
return;
// Do we have a circular dependency?
- if (Visiting.count(GV))
+ if (!Visiting.insert(GV).second)
report_fatal_error("Circular dependency found in global variable set");
- // Start visiting this global
- Visiting.insert(GV);
-
// Make sure we visit all dependents first
DenseSet<const GlobalVariable *> Others;
for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
@@ -111,159 +108,6 @@ void VisitGlobalVariableForEmission(
}
}
-// @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we
-// cannot just link to the existing version.
-/// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
-///
-using namespace nvptx;
-const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
- MCContext &Ctx = AP.OutContext;
-
- if (CV->isNullValue() || isa<UndefValue>(CV))
- return MCConstantExpr::Create(0, Ctx);
-
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
- return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
-
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
- return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx);
-
- if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
- return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
-
- const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
- if (!CE)
- llvm_unreachable("Unknown constant value to lower!");
-
- switch (CE->getOpcode()) {
- default:
- // If the code isn't optimized, there may be outstanding folding
- // opportunities. Attempt to fold the expression using DataLayout as a
- // last resort before giving up.
- if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
- if (C != CE)
- return LowerConstant(C, AP);
-
- // Otherwise report the problem to the user.
- {
- std::string S;
- raw_string_ostream OS(S);
- OS << "Unsupported expression in static initializer: ";
- CE->printAsOperand(OS, /*PrintType=*/ false,
- !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
- report_fatal_error(OS.str());
- }
- case Instruction::AddrSpaceCast: {
- // Strip any addrspace(1)->addrspace(0) addrspace casts. These will be
- // handled by the generic() logic in the MCExpr printer
- PointerType *DstTy = cast<PointerType>(CE->getType());
- PointerType *SrcTy = cast<PointerType>(CE->getOperand(0)->getType());
- if (SrcTy->getAddressSpace() == 1 && DstTy->getAddressSpace() == 0) {
- return LowerConstant(cast<const Constant>(CE->getOperand(0)), AP);
- }
- std::string S;
- raw_string_ostream OS(S);
- OS << "Unsupported expression in static initializer: ";
- CE->printAsOperand(OS, /*PrintType=*/ false,
- !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
- report_fatal_error(OS.str());
- }
- case Instruction::GetElementPtr: {
- const DataLayout &TD = *AP.TM.getDataLayout();
- // Generate a symbolic expression for the byte address
- APInt OffsetAI(TD.getPointerSizeInBits(), 0);
- cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
-
- const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
- if (!OffsetAI)
- return Base;
-
- int64_t Offset = OffsetAI.getSExtValue();
- return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
- Ctx);
- }
-
- case Instruction::Trunc:
- // We emit the value and depend on the assembler to truncate the generated
- // expression properly. This is important for differences between
- // blockaddress labels. Since the two labels are in the same function, it
- // is reasonable to treat their delta as a 32-bit value.
- // FALL THROUGH.
- case Instruction::BitCast:
- return LowerConstant(CE->getOperand(0), AP);
-
- case Instruction::IntToPtr: {
- const DataLayout &TD = *AP.TM.getDataLayout();
- // Handle casts to pointers by changing them into casts to the appropriate
- // integer type. This promotes constant folding and simplifies this code.
- Constant *Op = CE->getOperand(0);
- Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
- false /*ZExt*/);
- return LowerConstant(Op, AP);
- }
-
- case Instruction::PtrToInt: {
- const DataLayout &TD = *AP.TM.getDataLayout();
- // Support only foldable casts to/from pointers that can be eliminated by
- // changing the pointer to the appropriately sized integer type.
- Constant *Op = CE->getOperand(0);
- Type *Ty = CE->getType();
-
- const MCExpr *OpExpr = LowerConstant(Op, AP);
-
- // We can emit the pointer value into this slot if the slot is an
- // integer slot equal to the size of the pointer.
- if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
- return OpExpr;
-
- // Otherwise the pointer is smaller than the resultant integer, mask off
- // the high bits so we are sure to get a proper truncation if the input is
- // a constant expr.
- unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
- const MCExpr *MaskExpr =
- MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx);
- return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
- }
-
- // The MC library also has a right-shift operator, but it isn't consistently
- // signed or unsigned between different targets.
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Mul:
- case Instruction::SDiv:
- case Instruction::SRem:
- case Instruction::Shl:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
- const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
- switch (CE->getOpcode()) {
- default:
- llvm_unreachable("Unknown binary operator constant cast expr");
- case Instruction::Add:
- return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
- case Instruction::Sub:
- return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
- case Instruction::Mul:
- return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
- case Instruction::SDiv:
- return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
- case Instruction::SRem:
- return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
- case Instruction::Shl:
- return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
- case Instruction::And:
- return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
- case Instruction::Or:
- return MCBinaryExpr::CreateOr(LHS, RHS, Ctx);
- case Instruction::Xor:
- return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
- }
- }
- }
-}
-
void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
if (!EmitLineNumbers)
return;
@@ -502,8 +346,8 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
}
void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
- const DataLayout *TD = TM.getDataLayout();
- const TargetLowering *TLI = TM.getTargetLowering();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+ const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
Type *Ty = F->getReturnType();
@@ -530,17 +374,15 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
} else if (isa<PointerType>(Ty)) {
O << ".param .b" << TLI->getPointerTy().getSizeInBits()
<< " func_retval0";
- } else {
- if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
- unsigned totalsz = TD->getTypeAllocSize(Ty);
- unsigned retAlignment = 0;
- if (!llvm::getAlign(*F, 0, retAlignment))
- retAlignment = TD->getABITypeAlignment(Ty);
- O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
- << "]";
- } else
- assert(false && "Unknown return type");
- }
+ } else if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
+ unsigned totalsz = TD->getTypeAllocSize(Ty);
+ unsigned retAlignment = 0;
+ if (!llvm::getAlign(*F, 0, retAlignment))
+ retAlignment = TD->getABITypeAlignment(Ty);
+ O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
+ << "]";
+ } else
+ llvm_unreachable("Unknown return type");
} else {
SmallVector<EVT, 16> vtparts;
ComputeValueVTs(*TLI, Ty, vtparts);
@@ -626,13 +468,14 @@ void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
unsigned RegNo = MI->getOperand(0).getReg();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
if (TRI->isVirtualRegister(RegNo)) {
OutStreamer.AddComment(Twine("implicit-def: ") +
getVirtualRegisterName(RegNo));
} else {
- OutStreamer.AddComment(Twine("implicit-def: ") +
- TM.getRegisterInfo()->getName(RegNo));
+ OutStreamer.AddComment(
+ Twine("implicit-def: ") +
+ TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo));
}
OutStreamer.AddBlankLine();
}
@@ -794,11 +637,6 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
return false;
}
- if (const MDNode *md = dyn_cast<MDNode>(U))
- if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
- (md->getName().str() == "llvm.dbg.sp")))
- return true;
-
for (const User *UU : U->users())
if (usedInOneFunc(UU, oneFunc) == false)
return false;
@@ -953,7 +791,7 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
.Initialize(OutContext, TM);
- Mang = new Mangler(TM.getDataLayout());
+ Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout());
// Emit header before any dwarf directives are emitted below.
emitHeader(M, OS1);
@@ -1154,7 +992,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
GVar->getName().startswith("nvvm."))
return;
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
// GlobalVariables are always constant pointers themselves.
const PointerType *PTy = GVar->getType();
@@ -1288,7 +1126,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
else
O << " .align " << GVar->getAlignment();
- if (ETy->isSingleValueType()) {
+ if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
O << " .";
// Special case: ABI requires that we use .u8 for predicates
if (ETy->isIntegerTy(1))
@@ -1457,7 +1295,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
raw_ostream &O) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
// GlobalVariables are always constant pointers themselves.
const PointerType *PTy = GVar->getType();
@@ -1470,7 +1308,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
else
O << " .align " << GVar->getAlignment();
- if (ETy->isSingleValueType()) {
+ if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
O << " .";
O << getPTXFundamentalTypeStr(ETy);
O << " ";
@@ -1509,17 +1347,6 @@ static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
if (ATy)
return getOpenCLAlignment(TD, ATy->getElementType());
- const VectorType *VTy = dyn_cast<VectorType>(Ty);
- if (VTy) {
- Type *ETy = VTy->getElementType();
- unsigned int numE = VTy->getNumElements();
- unsigned int alignE = TD->getPrefTypeAlignment(ETy);
- if (numE == 3)
- return 4 * alignE;
- else
- return numE * alignE;
- }
-
const StructType *STy = dyn_cast<StructType>(Ty);
if (STy) {
unsigned int alignStruct = 1;
@@ -1578,9 +1405,9 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
}
void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
const AttributeSet &PAL = F->getAttributes();
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
Function::const_arg_iterator I, E;
unsigned paramIndex = 0;
bool first = true;
@@ -1771,7 +1598,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
// Map the global virtual register number to a register class specific
// virtual register number starting from 1 with that class.
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
//unsigned numRegClasses = TRI->getNumRegClasses();
// Emit the Fake Stack Object
@@ -1901,7 +1728,7 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
}
return;
} else {
- O << *LowerConstant(CPV, *this);
+ O << *lowerConstant(CPV);
return;
}
}
@@ -1911,7 +1738,7 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
AggBuffer *aggBuffer) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
int s = TD->getTypeAllocSize(CPV->getType());
@@ -2035,7 +1862,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
AggBuffer *aggBuffer) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
int Bytes;
// Old constants
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index a9f9bdd6d3d8..c11b5793b22a 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXASMPRINTER_H
-#define NVPTXASMPRINTER_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXASMPRINTER_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXASMPRINTER_H
#include "NVPTX.h"
#include "NVPTXSubtarget.h"
@@ -39,13 +39,6 @@
// A better approach is to clone the MCAsmStreamer to a MCPTXAsmStreamer
// (subclass of MCStreamer).
-// This is defined in AsmPrinter.cpp.
-// Used to process the constant expressions in initializers.
-namespace nvptx {
-const llvm::MCExpr *
-LowerConstant(const llvm::Constant *CV, llvm::AsmPrinter &AP);
-}
-
namespace llvm {
class LineReader {
@@ -86,13 +79,13 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
// Once we have this AggBuffer setup, we can choose how to print
// it out.
public:
- unsigned size; // size of the buffer in bytes
- unsigned char *buffer; // the buffer
unsigned numSymbols; // number of symbol addresses
- SmallVector<unsigned, 4> symbolPosInBuffer;
- SmallVector<const Value *, 4> Symbols;
private:
+ const unsigned size; // size of the buffer in bytes
+ std::vector<unsigned char> buffer; // the buffer
+ SmallVector<unsigned, 4> symbolPosInBuffer;
+ SmallVector<const Value *, 4> Symbols;
unsigned curpos;
raw_ostream &O;
NVPTXAsmPrinter &AP;
@@ -100,14 +93,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
public:
AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
- : O(_O), AP(_AP) {
- buffer = new unsigned char[_size];
- size = _size;
+ : size(_size), buffer(_size), O(_O), AP(_AP) {
curpos = 0;
numSymbols = 0;
EmitGeneric = AP.EmitGeneric;
}
- ~AggBuffer() { delete[] buffer; }
unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
assert((curpos + Num) <= size);
assert((curpos + Bytes) <= size);
@@ -170,7 +160,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
O << *Name;
}
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
- O << *nvptx::LowerConstant(Cexpr, AP);
+ O << *AP.lowerConstant(Cexpr);
} else
llvm_unreachable("symbol type unknown");
nSym++;
@@ -179,9 +169,9 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
else
nextSymbolPos = symbolPosInBuffer[nSym];
} else if (nBytes == 4)
- O << *(unsigned int *)(buffer + pos);
+ O << *(unsigned int *)(&buffer[pos]);
else
- O << *(unsigned long long *)(buffer + pos);
+ O << *(unsigned long long *)(&buffer[pos]);
}
}
}
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 8b088412dbba..314df3828b88 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -48,20 +48,20 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
if (is64bit) {
unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass);
MachineInstr *MI =
- BuildMI(MBB, MBBI, dl,
- MF.getTarget().getInstrInfo()->get(NVPTX::cvta_local_yes_64),
+ BuildMI(MBB, MBBI, dl, MF.getSubtarget().getInstrInfo()->get(
+ NVPTX::cvta_local_yes_64),
NVPTX::VRFrame).addReg(LocalReg);
BuildMI(MBB, MI, dl,
- MF.getTarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64),
+ MF.getSubtarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64),
LocalReg).addImm(MF.getFunctionNumber());
} else {
unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int32RegsRegClass);
MachineInstr *MI =
BuildMI(MBB, MBBI, dl,
- MF.getTarget().getInstrInfo()->get(NVPTX::cvta_local_yes),
+ MF.getSubtarget().getInstrInfo()->get(NVPTX::cvta_local_yes),
NVPTX::VRFrame).addReg(LocalReg);
BuildMI(MBB, MI, dl,
- MF.getTarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR),
+ MF.getSubtarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR),
LocalReg).addImm(MF.getFunctionNumber());
}
}
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index 56fb673de0eb..0846b78d58e5 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTX_FRAMELOWERING_H
-#define NVPTX_FRAMELOWERING_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXFRAMELOWERING_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXFRAMELOWERING_H
#include "llvm/Target/TargetFrameLowering.h"
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index faa9fdb424b6..5f06d9a791a1 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/PassManager.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
@@ -54,8 +55,7 @@ private:
IRBuilder<> &Builder);
Value *remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
IRBuilder<> &Builder);
- void remapNamedMDNode(Module *M, NamedMDNode *N);
- MDNode *remapMDNode(Module *M, MDNode *N);
+ void remapNamedMDNode(ValueToValueMapTy &VM, NamedMDNode *N);
typedef ValueMap<GlobalVariable *, GlobalVariable *> GVMapTy;
typedef ValueMap<Constant *, Value *> ConstantToValueMapTy;
@@ -125,12 +125,17 @@ bool GenericToNVVM::runOnModule(Module &M) {
ConstantToValueMap.clear();
}
+ // Copy GVMap over to a standard value map.
+ ValueToValueMapTy VM;
+ for (auto I = GVMap.begin(), E = GVMap.end(); I != E; ++I)
+ VM[I->first] = I->second;
+
// Walk through the metadata section and update the debug information
// associated with the global variables in the default address space.
for (Module::named_metadata_iterator I = M.named_metadata_begin(),
E = M.named_metadata_end();
I != E; I++) {
- remapNamedMDNode(&M, I);
+ remapNamedMDNode(VM, I);
}
// Walk through the global variable initializers, and replace any use of
@@ -140,20 +145,23 @@ bool GenericToNVVM::runOnModule(Module &M) {
for (GVMapTy::iterator I = GVMap.begin(), E = GVMap.end(); I != E;) {
GlobalVariable *GV = I->first;
GlobalVariable *NewGV = I->second;
- ++I;
+
+ // Remove GV from the map so that it can be RAUWed. Note that
+ // DenseMap::erase() won't invalidate any iterators but this one.
+ auto Next = std::next(I);
+ GVMap.erase(I);
+ I = Next;
+
Constant *BitCastNewGV = ConstantExpr::getPointerCast(NewGV, GV->getType());
// At this point, the remaining uses of GV should be found only in global
// variable initializers, as other uses have been already been removed
// while walking through the instructions in function definitions.
- for (Value::use_iterator UI = GV->use_begin(), UE = GV->use_end();
- UI != UE;)
- (UI++)->set(BitCastNewGV);
+ GV->replaceAllUsesWith(BitCastNewGV);
std::string Name = GV->getName();
- GV->removeDeadConstantUsers();
GV->eraseFromParent();
NewGV->setName(Name);
}
- GVMap.clear();
+ assert(GVMap.empty() && "Expected it to be empty by now");
return true;
}
@@ -359,7 +367,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
}
}
-void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) {
+void GenericToNVVM::remapNamedMDNode(ValueToValueMapTy &VM, NamedMDNode *N) {
bool OperandChanged = false;
SmallVector<MDNode *, 16> NewOperands;
@@ -369,7 +377,7 @@ void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) {
// converted to another value.
for (unsigned i = 0; i < NumOperands; ++i) {
MDNode *Operand = N->getOperand(i);
- MDNode *NewOperand = remapMDNode(M, Operand);
+ MDNode *NewOperand = MapMetadata(Operand, VM);
OperandChanged |= Operand != NewOperand;
NewOperands.push_back(NewOperand);
}
@@ -387,47 +395,3 @@ void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) {
N->addOperand(*I);
}
}
-
-MDNode *GenericToNVVM::remapMDNode(Module *M, MDNode *N) {
-
- bool OperandChanged = false;
- SmallVector<Value *, 8> NewOperands;
- unsigned NumOperands = N->getNumOperands();
-
- // Check if any operand is or contains a global variable in GVMap, and thus
- // converted to another value.
- for (unsigned i = 0; i < NumOperands; ++i) {
- Value *Operand = N->getOperand(i);
- Value *NewOperand = Operand;
- if (Operand) {
- if (isa<GlobalVariable>(Operand)) {
- GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(Operand));
- if (I != GVMap.end()) {
- NewOperand = I->second;
- if (++i < NumOperands) {
- NewOperands.push_back(NewOperand);
- // Address space of the global variable follows the global variable
- // in the global variable debug info (see createGlobalVariable in
- // lib/Analysis/DIBuilder.cpp).
- NewOperand =
- ConstantInt::get(Type::getInt32Ty(M->getContext()),
- I->second->getType()->getAddressSpace());
- }
- }
- } else if (isa<MDNode>(Operand)) {
- NewOperand = remapMDNode(M, cast<MDNode>(Operand));
- }
- }
- OperandChanged |= Operand != NewOperand;
- NewOperands.push_back(NewOperand);
- }
-
- // If none of the operands has been modified, return N as it is.
- if (!OperandChanged) {
- return N;
- }
-
- // If any of the operands has been modified, create a new MDNode with the new
- // operands.
- return MDNode::get(M->getContext(), makeArrayRef(NewOperands));
-}
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 05205fba1aff..cd0422d78a8c 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
static cl::opt<int> UsePrecDivF32(
"nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
- " IEEE Compliant F32 div.rnd if avaiable."),
+ " IEEE Compliant F32 div.rnd if available."),
cl::init(2));
static cl::opt<bool>
@@ -5041,17 +5041,10 @@ bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
unsigned int spN) const {
const Value *Src = nullptr;
- // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
- // the classof() for MemSDNode does not include MemIntrinsicSDNode
- // (See SelectionDAGNodes.h). So we need to check for both.
if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
if (spN == 0 && mN->getMemOperand()->getPseudoValue())
return true;
Src = mN->getMemOperand()->getValue();
- } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
- if (spN == 0 && mN->getMemOperand()->getPseudoValue())
- return true;
- Src = mN->getMemOperand()->getValue();
}
if (!Src)
return false;
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index c62fc253c33d..69afcd7320d3 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -11,6 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELDAGTODAG_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXISELDAGTODAG_H
+
#include "NVPTX.h"
#include "NVPTXISelLowering.h"
#include "NVPTXRegisterInfo.h"
@@ -92,3 +95,5 @@ private:
};
}
+
+#endif
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index d76b20a29eb7..093ba1a2b824 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -106,8 +106,8 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty,
}
// NVPTXTargetLowering Constructor.
-NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
- : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM),
+NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
+ : TargetLowering(TM), nvTM(&TM),
nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
// always lower memset, memcpy, and memmove intrinsics to load/store
@@ -203,8 +203,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
// Turn FP extload into load/fextend
- setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
// Turn FP truncstore into trunc + store.
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
@@ -214,12 +215,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
setOperationAction(ISD::LOAD, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setTruncStoreAction(MVT::i64, MVT::i1, Expand);
- setTruncStoreAction(MVT::i32, MVT::i1, Expand);
- setTruncStoreAction(MVT::i16, MVT::i1, Expand);
- setTruncStoreAction(MVT::i8, MVT::i1, Expand);
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+ setTruncStoreAction(VT, MVT::i1, Expand);
+ }
// This is legal in NVPTX
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
@@ -232,9 +232,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
setOperationAction(ISD::ADDE, MVT::i64, Expand);
// Register custom handling for vector loads/stores
- for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE;
- ++i) {
- MVT VT = (MVT::SimpleValueType) i;
+ for (MVT VT : MVT::vector_valuetypes()) {
if (IsPTXVectorType(VT)) {
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
@@ -905,16 +903,14 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
O << ".param .b" << size << " _";
} else if (isa<PointerType>(retTy)) {
O << ".param .b" << getPointerTy().getSizeInBits() << " _";
+ } else if ((retTy->getTypeID() == Type::StructTyID) ||
+ isa<VectorType>(retTy)) {
+ O << ".param .align "
+ << retAlignment
+ << " .b8 _["
+ << getDataLayout()->getTypeAllocSize(retTy) << "]";
} else {
- if((retTy->getTypeID() == Type::StructTyID) ||
- isa<VectorType>(retTy)) {
- O << ".param .align "
- << retAlignment
- << " .b8 _["
- << getDataLayout()->getTypeAllocSize(retTy) << "]";
- } else {
- assert(false && "Unknown return type");
- }
+ llvm_unreachable("Unknown return type");
}
O << ") ";
}
@@ -1355,7 +1351,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// .param .align 16 .b8 retval0[<size-in-bytes>], or
// .param .b<size-in-bits> retval0
unsigned resultsz = TD->getTypeAllocSizeInBits(retTy);
- if (retTy->isSingleValueType()) {
+ // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
+ // these three types to match the logic in
+ // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
+ // Plus, this behavior is consistent with nvcc's.
+ if (retTy->isFloatingPointTy() || retTy->isIntegerTy() ||
+ retTy->isPointerTy()) {
// Scalar needs to be at least 32bit wide
if (resultsz < 32)
resultsz = 32;
@@ -1451,8 +1452,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
EVT ObjectVT = getValueType(retTy);
unsigned NumElts = ObjectVT.getVectorNumElements();
EVT EltVT = ObjectVT.getVectorElementType();
- assert(nvTM->getTargetLowering()->getNumRegisters(F->getContext(),
- ObjectVT) == NumElts &&
+ assert(nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters(
+ F->getContext(), ObjectVT) == NumElts &&
"Vector was not scalarized");
unsigned sz = EltVT.getSizeInBits();
bool needTruncate = sz < 8 ? true : false;
@@ -2028,7 +2029,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
const Function *F = MF.getFunction();
const AttributeSet &PAL = F->getAttributes();
- const TargetLowering *TLI = DAG.getTarget().getTargetLowering();
+ const TargetLowering *TLI = DAG.getSubtarget().getTargetLowering();
SDValue Root = DAG.getRoot();
std::vector<SDValue> OutChains;
@@ -2142,7 +2143,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
ISD::SEXTLOAD : ISD::ZEXTLOAD;
p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr,
MachinePointerInfo(srcValue), partVT, false,
- false, partAlign);
+ false, false, partAlign);
} else {
p = DAG.getLoad(partVT, dl, Root, srcAddr,
MachinePointerInfo(srcValue), false, false, false,
@@ -2163,7 +2164,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
unsigned NumElts = ObjectVT.getVectorNumElements();
assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&
"Vector was not scalarized");
- unsigned Ofst = 0;
EVT EltVT = ObjectVT.getVectorElementType();
// V1 load
@@ -2172,10 +2172,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// We only have one element, so just directly load it
Value *SrcValue = Constant::getNullValue(PointerType::get(
EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
- SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
- DAG.getConstant(Ofst, getPointerTy()));
SDValue P = DAG.getLoad(
- EltVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
+ EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
false, true,
TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
if (P.getNode())
@@ -2184,7 +2182,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P);
InVals.push_back(P);
- Ofst += TD->getTypeAllocSize(EltVT.getTypeForEVT(F->getContext()));
++InsIdx;
} else if (NumElts == 2) {
// V2 load
@@ -2192,10 +2189,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2);
Value *SrcValue = Constant::getNullValue(PointerType::get(
VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
- SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
- DAG.getConstant(Ofst, getPointerTy()));
SDValue P = DAG.getLoad(
- VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
+ VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
false, true,
TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
if (P.getNode())
@@ -2213,7 +2208,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
InVals.push_back(Elt0);
InVals.push_back(Elt1);
- Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
InsIdx += 2;
} else {
// V4 loads
@@ -2231,6 +2225,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
VecSize = 2;
}
EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
+ unsigned Ofst = 0;
for (unsigned i = 0; i < NumElts; i += VecSize) {
Value *SrcValue = Constant::getNullValue(
PointerType::get(VecVT.getTypeForEVT(F->getContext()),
@@ -2275,6 +2270,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
ISD::SEXTLOAD : ISD::ZEXTLOAD;
p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg,
MachinePointerInfo(srcValue), ObjectVT, false, false,
+ false,
TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
} else {
p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg,
@@ -3269,16 +3265,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.vol = 0;
Info.readMem = true;
Info.writeMem = false;
-
- // alignment is available as metadata.
- // Grab it and set the alignment.
- assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
- MDNode *AlignMD = I.getMetadata("align");
- assert(AlignMD && "Must have a non-null MDNode");
- assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
- Value *Align = AlignMD->getOperand(0);
- int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
- Info.align = Alignment;
+ Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
return true;
}
@@ -3298,16 +3285,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.vol = 0;
Info.readMem = true;
Info.writeMem = false;
-
- // alignment is available as metadata.
- // Grab it and set the alignment.
- assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
- MDNode *AlignMD = I.getMetadata("align");
- assert(AlignMD && "Must have a non-null MDNode");
- assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
- Value *Align = AlignMD->getOperand(0);
- int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
- Info.align = Alignment;
+ Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
return true;
}
@@ -3866,8 +3844,8 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
}
else if (N0.getOpcode() == ISD::FMUL) {
if (VT == MVT::f32 || VT == MVT::f64) {
- NVPTXTargetLowering *TLI =
- (NVPTXTargetLowering *)&DAG.getTargetLoweringInfo();
+ const auto *TLI = static_cast<const NVPTXTargetLowering *>(
+ &DAG.getTargetLoweringInfo());
if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel))
return SDValue();
@@ -4053,13 +4031,13 @@ static bool IsMulWideOperandDemotable(SDValue Op,
if (Op.getOpcode() == ISD::SIGN_EXTEND ||
Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
EVT OrigVT = Op.getOperand(0).getValueType();
- if (OrigVT.getSizeInBits() == OptSize) {
+ if (OrigVT.getSizeInBits() <= OptSize) {
S = Signed;
return true;
}
} else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
EVT OrigVT = Op.getOperand(0).getValueType();
- if (OrigVT.getSizeInBits() == OptSize) {
+ if (OrigVT.getSizeInBits() <= OptSize) {
S = Unsigned;
return true;
}
@@ -4514,3 +4492,10 @@ NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
delete DwarfRangesSection;
delete DwarfMacroInfoSection;
}
+
+const MCSection *
+NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
+ return getDataSection();
+}
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index bef6ed9faad6..b3fea3f4a36a 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXISELLOWERING_H
-#define NVPTXISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
#include "NVPTX.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -436,7 +436,7 @@ class NVPTXSubtarget;
//===--------------------------------------------------------------------===//
class NVPTXTargetLowering : public TargetLowering {
public:
- explicit NVPTXTargetLowering(NVPTXTargetMachine &TM);
+ explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM);
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -495,7 +495,7 @@ public:
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- NVPTXTargetMachine *nvTM;
+ const NVPTXTargetMachine *nvTM;
// PTX always uses 32-bit shift amounts
MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
@@ -505,9 +505,9 @@ public:
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
- virtual bool isFMAFasterThanFMulAndFAdd(EVT) const {
- return true;
- }
+ bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; }
+
+ bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
private:
const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
@@ -538,4 +538,4 @@ private:
};
} // namespace llvm
-#endif // NVPTXISELLOWERING_H
+#endif
diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index a98fb37f6e25..aa36b6be7250 100644
--- a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -16,11 +16,11 @@
#include "NVPTX.h"
#include "NVPTXUtilities.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Analysis/ConstantFolding.h"
using namespace llvm;
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index b5b4fbed0799..740ca0328efe 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -14,11 +14,11 @@
#include "NVPTX.h"
#include "NVPTXInstrInfo.h"
#include "NVPTXTargetMachine.h"
-#include "llvm/IR/Function.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
using namespace llvm;
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
index 2ac29748676a..6de75364a823 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXINSTRUCTIONINFO_H
-#define NVPTXINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXINSTRINFO_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXINSTRINFO_H
#include "NVPTX.h"
#include "NVPTXRegisterInfo.h"
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index 9900b8c8433f..2c571c4878a8 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -296,7 +296,7 @@ multiclass F2<string OpcStr, SDNode OpNode> {
// General Type Conversion
//-----------------------------------
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
// Generate a cvt to the given type from all possible types.
// Each instance takes a CvtMode immediate that defines the conversion mode to
// use. It can be CvtNONE to omit a conversion mode.
@@ -2094,7 +2094,7 @@ multiclass LD<NVPTXRegClass regclass> {
"$fromWidth \t$dst, [$addr+$offset];"), []>;
}
-let mayLoad=1, neverHasSideEffects=1 in {
+let mayLoad=1, hasSideEffects=0 in {
defm LD_i8 : LD<Int16Regs>;
defm LD_i16 : LD<Int16Regs>;
defm LD_i32 : LD<Int32Regs>;
@@ -2136,7 +2136,7 @@ multiclass ST<NVPTXRegClass regclass> {
" \t[$addr+$offset], $src;"), []>;
}
-let mayStore=1, neverHasSideEffects=1 in {
+let mayStore=1, hasSideEffects=0 in {
defm ST_i8 : ST<Int16Regs>;
defm ST_i16 : ST<Int16Regs>;
defm ST_i32 : ST<Int32Regs>;
@@ -2220,7 +2220,7 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
[]>;
}
-let mayLoad=1, neverHasSideEffects=1 in {
+let mayLoad=1, hasSideEffects=0 in {
defm LDV_i8 : LD_VEC<Int16Regs>;
defm LDV_i16 : LD_VEC<Int16Regs>;
defm LDV_i32 : LD_VEC<Int32Regs>;
@@ -2303,7 +2303,7 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
"$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
[]>;
}
-let mayStore=1, neverHasSideEffects=1 in {
+let mayStore=1, hasSideEffects=0 in {
defm STV_i8 : ST_VEC<Int16Regs>;
defm STV_i16 : ST_VEC<Int16Regs>;
defm STV_i32 : ST_VEC<Int32Regs>;
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
index 5ec1fc969687..8759406a6803 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTX_LOWER_AGGR_COPIES_H
-#define NVPTX_LOWER_AGGR_COPIES_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/IR/DataLayout.h"
diff --git a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
new file mode 100644
index 000000000000..3149399afb30
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
@@ -0,0 +1,134 @@
+//===-- NVPTXLowerStructArgs.cpp - Copy struct args to local memory =====--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Copy struct args to local memory. This is needed for kernel functions only.
+// This is a preparation for handling cases like
+//
+// kernel void foo(struct A arg, ...)
+// {
+// struct A *p = &arg;
+// ...
+// ... = p->filed1 ... (this is no generic address for .param)
+// p->filed2 = ... (this is no write access to .param)
+// }
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXUtilities.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeNVPTXLowerStructArgsPass(PassRegistry &);
+}
+
+class LLVM_LIBRARY_VISIBILITY NVPTXLowerStructArgs : public FunctionPass {
+ bool runOnFunction(Function &F) override;
+
+ void handleStructPtrArgs(Function &);
+ void handleParam(Argument *);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ NVPTXLowerStructArgs() : FunctionPass(ID) {}
+ const char *getPassName() const override {
+ return "Copy structure (byval *) arguments to stack";
+ }
+};
+
+char NVPTXLowerStructArgs::ID = 1;
+
+INITIALIZE_PASS(NVPTXLowerStructArgs, "nvptx-lower-struct-args",
+ "Lower structure arguments (NVPTX)", false, false)
+
+void NVPTXLowerStructArgs::handleParam(Argument *Arg) {
+ Function *Func = Arg->getParent();
+ Instruction *FirstInst = &(Func->getEntryBlock().front());
+ PointerType *PType = dyn_cast<PointerType>(Arg->getType());
+
+ assert(PType && "Expecting pointer type in handleParam");
+
+ Type *StructType = PType->getElementType();
+ AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst);
+
+ /* Set the alignment to alignment of the byval parameter. This is because,
+ * later load/stores assume that alignment, and we are going to replace
+ * the use of the byval parameter with this alloca instruction.
+ */
+ AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1));
+
+ Arg->replaceAllUsesWith(AllocA);
+
+ // Get the cvt.gen.to.param intrinsic
+ Type *CvtTypes[] = {
+ Type::getInt8PtrTy(Func->getParent()->getContext(), ADDRESS_SPACE_PARAM),
+ Type::getInt8PtrTy(Func->getParent()->getContext(),
+ ADDRESS_SPACE_GENERIC)};
+ Function *CvtFunc = Intrinsic::getDeclaration(
+ Func->getParent(), Intrinsic::nvvm_ptr_gen_to_param, CvtTypes);
+
+ Value *BitcastArgs[] = {
+ new BitCastInst(Arg, Type::getInt8PtrTy(Func->getParent()->getContext(),
+ ADDRESS_SPACE_GENERIC),
+ Arg->getName(), FirstInst)};
+ CallInst *CallCVT =
+ CallInst::Create(CvtFunc, BitcastArgs, "cvt_to_param", FirstInst);
+
+ BitCastInst *BitCast = new BitCastInst(
+ CallCVT, PointerType::get(StructType, ADDRESS_SPACE_PARAM),
+ Arg->getName(), FirstInst);
+ LoadInst *LI = new LoadInst(BitCast, Arg->getName(), FirstInst);
+ new StoreInst(LI, AllocA, FirstInst);
+}
+
+// =============================================================================
+// If the function had a struct ptr arg, say foo(%struct.x *byval %d), then
+// add the following instructions to the first basic block :
+//
+// %temp = alloca %struct.x, align 8
+// %tt1 = bitcast %struct.x * %d to i8 *
+// %tt2 = llvm.nvvm.cvt.gen.to.param %tt2
+// %tempd = bitcast i8 addrspace(101) * to %struct.x addrspace(101) *
+// %tv = load %struct.x addrspace(101) * %tempd
+// store %struct.x %tv, %struct.x * %temp, align 8
+//
+// The above code allocates some space in the stack and copies the incoming
+// struct from param space to local space.
+// Then replace all occurences of %d by %temp.
+// =============================================================================
+void NVPTXLowerStructArgs::handleStructPtrArgs(Function &F) {
+ for (Argument &Arg : F.args()) {
+ if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) {
+ handleParam(&Arg);
+ }
+ }
+}
+
+// =============================================================================
+// Main function for this pass.
+// =============================================================================
+bool NVPTXLowerStructArgs::runOnFunction(Function &F) {
+ // Skip non-kernels. See the comments at the top of this file.
+ if (!isKernelFunction(F))
+ return false;
+
+ handleStructPtrArgs(F);
+ return true;
+}
+
+FunctionPass *llvm::createNVPTXLowerStructArgsPass() {
+ return new NVPTXLowerStructArgs();
+}
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h
index 554764930a9e..d39a394fe750 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -9,8 +9,8 @@
// Modeled after ARMMCExpr
-#ifndef NVPTXMCEXPR_H
-#define NVPTXMCEXPR_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXMCEXPR_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXMCEXPR_H
#include "llvm/ADT/APFloat.h"
#include "llvm/MC/MCExpr.h"
@@ -63,7 +63,8 @@ public:
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const override {
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override {
return false;
}
void visitUsedExpr(MCStreamer &Streamer) const override {};
diff --git a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
index 67fb39050797..10f1135ad841 100644
--- a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
+++ b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
@@ -12,6 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H
+
#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
@@ -44,3 +47,5 @@ public:
}
};
}
+
+#endif
diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 348ab0c4bf14..a1e1b9e74480 100644
--- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -22,6 +22,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -48,8 +49,8 @@ char NVPTXPrologEpilogPass::ID = 0;
bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering &TFI = *TM.getFrameLowering();
- const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+ const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering();
+ const TargetRegisterInfo &TRI = *TM.getSubtargetImpl()->getRegisterInfo();
bool Modified = false;
calculateFrameObjectOffsets(MF);
@@ -108,8 +109,8 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
void
NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
- const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
- const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo();
bool StackGrowsDown =
TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h
index a7594be121a0..d2e67331f788 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXREGISTERINFO_H
-#define NVPTXREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXREGISTERINFO_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXREGISTERINFO_H
#include "ManagedStringPool.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index 20d4e272341e..b7f53c7929d1 100644
--- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -16,11 +16,11 @@
#include "NVPTX.h"
#include "NVPTXMachineFunctionInfo.h"
#include "NVPTXSubtarget.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseSet.h"
using namespace llvm;
@@ -33,9 +33,9 @@ private:
public:
NVPTXReplaceImageHandles();
- bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "NVPTX Replace Image Handles";
}
private:
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
index aa0436bf0da7..f1d3cb4da51b 100644
--- a/lib/Target/NVPTX/NVPTXSection.h
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_NVPTXSECTION_H
-#define LLVM_NVPTXSECTION_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXSECTION_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXSECTION_H
#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCSection.h"
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index d5cded218362..3d52532310ff 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -59,7 +59,8 @@ NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
: NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0),
SmVersion(20), DL(computeDataLayout(is64Bit)),
InstrInfo(initializeSubtargetDependencies(CPU, FS)),
- TLInfo((NVPTXTargetMachine &)TM), TSInfo(&DL), FrameLowering(*this) {
+ TLInfo((const NVPTXTargetMachine &)TM), TSInfo(&DL),
+ FrameLowering(*this) {
Triple T(TT);
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index 4c41e4e470dd..fb2d4047631a 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXSUBTARGET_H
-#define NVPTXSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H
#include "NVPTX.h"
#include "NVPTXFrameLowering.h"
@@ -57,14 +57,20 @@ public:
NVPTXSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM, bool is64Bit);
- const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; }
- const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
- const DataLayout *getDataLayout() const { return &DL; }
- const NVPTXRegisterInfo *getRegisterInfo() const {
+ const TargetFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const NVPTXRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
- const NVPTXTargetLowering *getTargetLowering() const { return &TLInfo; }
- const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ const NVPTXTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
bool hasBrkPt() const { return SmVersion >= 11; }
bool hasAtomRedG32() const { return SmVersion >= 11; }
@@ -113,4 +119,4 @@ public:
} // End llvm namespace
-#endif // NVPTXSUBTARGET_H
+#endif
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 069a1b9966f0..c7f95071b9fc 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -16,6 +16,7 @@
#include "NVPTX.h"
#include "NVPTXAllocaHoisting.h"
#include "NVPTXLowerAggrCopies.h"
+#include "NVPTXTargetObjectFile.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
@@ -50,6 +51,7 @@ void initializeNVVMReflectPass(PassRegistry&);
void initializeGenericToNVVMPass(PassRegistry&);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
+void initializeNVPTXLowerStructArgsPass(PassRegistry &);
}
extern "C" void LLVMInitializeNVPTXTarget() {
@@ -64,6 +66,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
initializeNVPTXFavorNonGenericAddrSpacesPass(
*PassRegistry::getPassRegistry());
+ initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry());
}
NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
@@ -72,10 +75,13 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64bit)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ TLOF(make_unique<NVPTXTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this, is64bit) {
initAsmInfo();
}
+NVPTXTargetMachine::~NVPTXTargetMachine() {}
+
void NVPTXTargetMachine32::anchor() {}
NVPTXTargetMachine32::NVPTXTargetMachine32(
@@ -104,8 +110,7 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
- bool addPreRegAlloc() override;
- bool addPostRegAlloc() override;
+ void addPostRegAlloc() override;
void addMachineSSAOptimization() override;
FunctionPass *createTargetRegisterAllocator(bool) override;
@@ -119,6 +124,14 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
return PassConfig;
}
+void NVPTXTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our NVPTX pass. This
+ // allows the NVPTX pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(this));
+ PM.add(createNVPTXTargetTransformInfoPass(this));
+}
+
void NVPTXPassConfig::addIRPasses() {
// The following passes are known to not play well with virtual regs hanging
// around after register allocation (which in our case, is *all* registers).
@@ -169,10 +182,8 @@ bool NVPTXPassConfig::addInstSelector() {
return false;
}
-bool NVPTXPassConfig::addPreRegAlloc() { return false; }
-bool NVPTXPassConfig::addPostRegAlloc() {
- addPass(createNVPTXPrologEpilogPass());
- return false;
+void NVPTXPassConfig::addPostRegAlloc() {
+ addPass(createNVPTXPrologEpilogPass(), false);
}
FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index a7a1c8f4e171..fa97ec8dfe2d 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -11,11 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTX_TARGETMACHINE_H
-#define NVPTX_TARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETMACHINE_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETMACHINE_H
-#include "NVPTXSubtarget.h"
#include "ManagedStringPool.h"
+#include "NVPTXSubtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
@@ -25,6 +25,7 @@ namespace llvm {
/// NVPTXTargetMachine
///
class NVPTXTargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
NVPTXSubtarget Subtarget;
// Hold Strings that can be free'd all together with NVPTXTargetMachine
@@ -35,27 +36,9 @@ public:
const TargetOptions &Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
- const TargetFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
- const NVPTXInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
- const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; }
- const NVPTXRegisterInfo *getRegisterInfo() const override {
- return getSubtargetImpl()->getRegisterInfo();
- }
-
- const NVPTXTargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
+ ~NVPTXTargetMachine() override;
- const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
+ const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; }
ManagedStringPool *getManagedStrPool() const {
return const_cast<ManagedStringPool *>(&ManagedStrPool);
@@ -63,17 +46,17 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- // Emission of machine code through JITCodeEmitter is not supported.
- bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &,
- bool = true) override {
- return true;
- }
-
// Emission of machine code through MCJIT is not supported.
bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &,
bool = true) override {
return true;
}
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
+
+ /// \brief Register NVPTX analysis passes with a pass manager.
+ void addAnalysisPasses(PassManagerBase &PM) override;
}; // NVPTXTargetMachine.
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index ba8086d78880..00ceca50a9f2 100644
--- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H
-#define LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETOBJECTFILE_H
#include "NVPTXSection.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -98,6 +98,9 @@ public:
return DataSection;
}
+ const MCSection *
+ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const override;
};
} // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
new file mode 100644
index 000000000000..b09d0d424f55
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -0,0 +1,115 @@
+//===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI pass ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file implements a TargetTransformInfo analysis pass specific to the
+// NVPTX target machine. It uses the target's detailed information to provide
+// more precise answers to certain TTI queries, while letting the target
+// independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXTargetMachine.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/CostTable.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "NVPTXtti"
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't have a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializeNVPTXTTIPass(PassRegistry &);
+}
+
+namespace {
+
+class NVPTXTTI final : public ImmutablePass, public TargetTransformInfo {
+ const NVPTXTargetLowering *TLI;
+public:
+ NVPTXTTI() : ImmutablePass(ID), TLI(nullptr) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ NVPTXTTI(const NVPTXTargetMachine *TM)
+ : ImmutablePass(ID), TLI(TM->getSubtargetImpl()->getTargetLowering()) {
+ initializeNVPTXTTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ void initializePass() override { pushTTIStack(this); }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ void *getAdjustedAnalysisPointer(const void *ID) override {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo *)this;
+ return this;
+ }
+
+ bool hasBranchDivergence() const override;
+
+ unsigned getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
+ OperandValueKind Opd2Info = OK_AnyValue,
+ OperandValueProperties Opd1PropInfo = OP_None,
+ OperandValueProperties Opd2PropInfo = OP_None) const override;
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(NVPTXTTI, TargetTransformInfo, "NVPTXtti",
+ "NVPTX Target Transform Info", true, true, false)
+char NVPTXTTI::ID = 0;
+
+ImmutablePass *
+llvm::createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM) {
+ return new NVPTXTTI(TM);
+}
+
+bool NVPTXTTI::hasBranchDivergence() const { return true; }
+
+unsigned NVPTXTTI::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
+ OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
+ OperandValueProperties Opd2PropInfo) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+
+ switch (ISD) {
+ default:
+ return TargetTransformInfo::getArithmeticInstrCost(
+ Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::XOR:
+ case ISD::OR:
+ case ISD::AND:
+ // The machine code (SASS) simulates an i64 with two i32. Therefore, we
+ // estimate that arithmetic operations on i64 are twice as expensive as
+ // those on types that can fit into one machine register.
+ if (LT.second.SimpleTy == MVT::i64)
+ return 2 * LT.first;
+ // Delegate other cases to the basic TTI.
+ return TargetTransformInfo::getArithmeticInstrCost(
+ Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
+ }
+}
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index a9fd190b7ff0..cf1feacba3f7 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -15,16 +15,16 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MutexGuard.h"
#include <algorithm>
#include <cstring>
#include <map>
#include <string>
#include <vector>
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/Support/MutexGuard.h"
using namespace llvm;
@@ -52,7 +52,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
assert(prop && "Annotation property not a string");
// value
- ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i + 1));
+ ConstantInt *Val = mdconst::dyn_extract<ConstantInt>(md->getOperand(i + 1));
assert(Val && "Value operand not a constant int");
std::string keyname = prop->getString().str();
@@ -75,7 +75,8 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
const MDNode *elem = NMD->getOperand(i);
- Value *entity = elem->getOperand(0);
+ GlobalValue *entity =
+ mdconst::dyn_extract_or_null<GlobalValue>(elem->getOperand(0));
// entity may be null due to DCE
if (!entity)
continue;
@@ -90,11 +91,11 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
return;
if ((*annotationCache).find(m) != (*annotationCache).end())
- (*annotationCache)[m][gv] = tmp;
+ (*annotationCache)[m][gv] = std::move(tmp);
else {
global_val_annot_t tmp1;
- tmp1[gv] = tmp;
- (*annotationCache)[m] = tmp1;
+ tmp1[gv] = std::move(tmp);
+ (*annotationCache)[m] = std::move(tmp1);
}
}
@@ -322,7 +323,7 @@ bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
if (MDNode *alignNode = I.getMetadata("callalign")) {
for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) {
if (const ConstantInt *CI =
- dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
+ mdconst::dyn_extract<ConstantInt>(alignNode->getOperand(i))) {
unsigned v = CI->getZExtValue();
if ((v >> 16) == index) {
align = v & 0xFFFF;
diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h
index 446bfa1e112c..7e2ce73daaa3 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/lib/Target/NVPTX/NVPTXUtilities.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXUTILITIES_H
-#define NVPTXUTILITIES_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTILITIES_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXUTILITIES_H
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
diff --git a/lib/Target/NVPTX/NVPTXVector.td b/lib/Target/NVPTX/NVPTXVector.td
index 775df19be162..85aa34e9aea7 100644
--- a/lib/Target/NVPTX/NVPTXVector.td
+++ b/lib/Target/NVPTX/NVPTXVector.td
@@ -661,7 +661,7 @@ class ShuffleAsmStr4<string type>
string s = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s);
}
-let neverHasSideEffects=1, VecInstType=isVecShuffle.Value in {
+let hasSideEffects=0, VecInstType=isVecShuffle.Value in {
def VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst),
(ins V4F32Regs:$src1, V4F32Regs:$src2,
i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
@@ -847,7 +847,7 @@ class Vec_Move<string asmstr, NVPTXRegClass vclass, NVPTXInst sop=NOP>
!strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"),
[], sop>;
-let isAsCheapAsAMove=1, neverHasSideEffects=1, IsSimpleMove=1,
+let isAsCheapAsAMove=1, hasSideEffects=0, IsSimpleMove=1,
VecInstType=isVecOther.Value in {
def V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>;
def V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>;
diff --git a/lib/Target/NVPTX/NVPTXutil.h b/lib/Target/NVPTX/NVPTXutil.h
index d1d117159486..1915dacf0f20 100644
--- a/lib/Target/NVPTX/NVPTXutil.h
+++ b/lib/Target/NVPTX/NVPTXutil.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_NVPTX_UTIL_H
-#define LLVM_TARGET_NVPTX_UTIL_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index d7066d58709a..cd36e58b78d3 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -32,9 +32,7 @@
using namespace llvm;
-namespace {
-
-static unsigned RRegs[32] = {
+static const MCPhysReg RRegs[32] = {
PPC::R0, PPC::R1, PPC::R2, PPC::R3,
PPC::R4, PPC::R5, PPC::R6, PPC::R7,
PPC::R8, PPC::R9, PPC::R10, PPC::R11,
@@ -44,7 +42,7 @@ static unsigned RRegs[32] = {
PPC::R24, PPC::R25, PPC::R26, PPC::R27,
PPC::R28, PPC::R29, PPC::R30, PPC::R31
};
-static unsigned RRegsNoR0[32] = {
+static const MCPhysReg RRegsNoR0[32] = {
PPC::ZERO,
PPC::R1, PPC::R2, PPC::R3,
PPC::R4, PPC::R5, PPC::R6, PPC::R7,
@@ -55,7 +53,7 @@ static unsigned RRegsNoR0[32] = {
PPC::R24, PPC::R25, PPC::R26, PPC::R27,
PPC::R28, PPC::R29, PPC::R30, PPC::R31
};
-static unsigned XRegs[32] = {
+static const MCPhysReg XRegs[32] = {
PPC::X0, PPC::X1, PPC::X2, PPC::X3,
PPC::X4, PPC::X5, PPC::X6, PPC::X7,
PPC::X8, PPC::X9, PPC::X10, PPC::X11,
@@ -65,7 +63,7 @@ static unsigned XRegs[32] = {
PPC::X24, PPC::X25, PPC::X26, PPC::X27,
PPC::X28, PPC::X29, PPC::X30, PPC::X31
};
-static unsigned XRegsNoX0[32] = {
+static const MCPhysReg XRegsNoX0[32] = {
PPC::ZERO8,
PPC::X1, PPC::X2, PPC::X3,
PPC::X4, PPC::X5, PPC::X6, PPC::X7,
@@ -76,7 +74,7 @@ static unsigned XRegsNoX0[32] = {
PPC::X24, PPC::X25, PPC::X26, PPC::X27,
PPC::X28, PPC::X29, PPC::X30, PPC::X31
};
-static unsigned FRegs[32] = {
+static const MCPhysReg FRegs[32] = {
PPC::F0, PPC::F1, PPC::F2, PPC::F3,
PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8, PPC::F9, PPC::F10, PPC::F11,
@@ -86,7 +84,7 @@ static unsigned FRegs[32] = {
PPC::F24, PPC::F25, PPC::F26, PPC::F27,
PPC::F28, PPC::F29, PPC::F30, PPC::F31
};
-static unsigned VRegs[32] = {
+static const MCPhysReg VRegs[32] = {
PPC::V0, PPC::V1, PPC::V2, PPC::V3,
PPC::V4, PPC::V5, PPC::V6, PPC::V7,
PPC::V8, PPC::V9, PPC::V10, PPC::V11,
@@ -96,7 +94,7 @@ static unsigned VRegs[32] = {
PPC::V24, PPC::V25, PPC::V26, PPC::V27,
PPC::V28, PPC::V29, PPC::V30, PPC::V31
};
-static unsigned VSRegs[64] = {
+static const MCPhysReg VSRegs[64] = {
PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
@@ -115,7 +113,7 @@ static unsigned VSRegs[64] = {
PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
};
-static unsigned VSFRegs[64] = {
+static const MCPhysReg VSFRegs[64] = {
PPC::F0, PPC::F1, PPC::F2, PPC::F3,
PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8, PPC::F9, PPC::F10, PPC::F11,
@@ -134,7 +132,7 @@ static unsigned VSFRegs[64] = {
PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
};
-static unsigned CRBITRegs[32] = {
+static const MCPhysReg CRBITRegs[32] = {
PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
@@ -144,7 +142,7 @@ static unsigned CRBITRegs[32] = {
PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN,
PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN
};
-static unsigned CRRegs[8] = {
+static const MCPhysReg CRRegs[8] = {
PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
};
@@ -210,20 +208,18 @@ EvaluateCRExpr(const MCExpr *E) {
llvm_unreachable("Invalid expression kind!");
}
+namespace {
+
struct PPCOperand;
class PPCAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
- MCAsmParser &Parser;
const MCInstrInfo &MII;
bool IsPPC64;
bool IsDarwin;
- MCAsmParser &getParser() const { return Parser; }
- MCAsmLexer &getLexer() const { return Parser.getLexer(); }
-
- void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
- bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+ void Warning(SMLoc L, const Twine &Msg) { getParser().Warning(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg) { return getParser().Error(L, Msg); }
bool isPPC64() const { return IsPPC64; }
bool isDarwin() const { return IsDarwin; }
@@ -250,7 +246,7 @@ class PPCAsmParser : public MCTargetAsmParser {
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
void ProcessInstruction(MCInst &Inst, const OperandVector &Ops);
@@ -266,9 +262,8 @@ class PPCAsmParser : public MCTargetAsmParser {
public:
PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &_MII,
- const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(_MII) {
+ const MCInstrInfo &_MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(_STI), MII(_MII) {
// Check for 64-bit vs. 32-bit pointer mode.
Triple TheTriple(STI.getTargetTriple());
IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
@@ -297,6 +292,7 @@ struct PPCOperand : public MCParsedAsmOperand {
enum KindTy {
Token,
Immediate,
+ ContextImmediate,
Expression,
TLSRegister
} Kind;
@@ -341,6 +337,7 @@ public:
Tok = o.Tok;
break;
case Immediate:
+ case ContextImmediate:
Imm = o.Imm;
break;
case Expression:
@@ -365,6 +362,16 @@ public:
assert(Kind == Immediate && "Invalid access!");
return Imm.Val;
}
+ int64_t getImmS16Context() const {
+ assert((Kind == Immediate || Kind == ContextImmediate) && "Invalid access!");
+ if (Kind == Immediate)
+ return Imm.Val;
+ return static_cast<int16_t>(Imm.Val);
+ }
+ int64_t getImmU16Context() const {
+ assert((Kind == Immediate || Kind == ContextImmediate) && "Invalid access!");
+ return Imm.Val;
+ }
const MCExpr *getExpr() const {
assert(Kind == Expression && "Invalid access!");
@@ -409,22 +416,73 @@ public:
bool isToken() const override { return Kind == Token; }
bool isImm() const override { return Kind == Immediate || Kind == Expression; }
bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
+ bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); }
bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); }
- bool isU16Imm() const { return Kind == Expression ||
- (Kind == Immediate && isUInt<16>(getImm())); }
- bool isS16Imm() const { return Kind == Expression ||
- (Kind == Immediate && isInt<16>(getImm())); }
+ bool isU6ImmX2() const { return Kind == Immediate &&
+ isUInt<6>(getImm()) &&
+ (getImm() & 1) == 0; }
+ bool isU7ImmX4() const { return Kind == Immediate &&
+ isUInt<7>(getImm()) &&
+ (getImm() & 3) == 0; }
+ bool isU8ImmX8() const { return Kind == Immediate &&
+ isUInt<8>(getImm()) &&
+ (getImm() & 7) == 0; }
+ bool isU16Imm() const {
+ switch (Kind) {
+ case Expression:
+ return true;
+ case Immediate:
+ case ContextImmediate:
+ return isUInt<16>(getImmU16Context());
+ default:
+ return false;
+ }
+ }
+ bool isS16Imm() const {
+ switch (Kind) {
+ case Expression:
+ return true;
+ case Immediate:
+ case ContextImmediate:
+ return isInt<16>(getImmS16Context());
+ default:
+ return false;
+ }
+ }
bool isS16ImmX4() const { return Kind == Expression ||
(Kind == Immediate && isInt<16>(getImm()) &&
(getImm() & 3) == 0); }
- bool isS17Imm() const { return Kind == Expression ||
- (Kind == Immediate && isInt<17>(getImm())); }
+ bool isS17Imm() const {
+ switch (Kind) {
+ case Expression:
+ return true;
+ case Immediate:
+ case ContextImmediate:
+ return isInt<17>(getImmS16Context());
+ default:
+ return false;
+ }
+ }
bool isTLSReg() const { return Kind == TLSRegister; }
- bool isDirectBr() const { return Kind == Expression ||
- (Kind == Immediate && isInt<26>(getImm()) &&
- (getImm() & 3) == 0); }
+ bool isDirectBr() const {
+ if (Kind == Expression)
+ return true;
+ if (Kind != Immediate)
+ return false;
+ // Operand must be 64-bit aligned, signed 27-bit immediate.
+ if ((getImm() & 3) != 0)
+ return false;
+ if (isInt<26>(getImm()))
+ return true;
+ if (!IsPPC64) {
+ // In 32-bit mode, large 32-bit quantities wrap around.
+ if (isUInt<32>(getImm()) && isInt<26>(static_cast<int32_t>(getImm())))
+ return true;
+ }
+ return false;
+ }
bool isCondBr() const { return Kind == Expression ||
(Kind == Immediate && isInt<16>(getImm()) &&
(getImm() & 3) == 0); }
@@ -529,6 +587,36 @@ public:
Inst.addOperand(MCOperand::CreateExpr(getExpr()));
}
+ void addS16ImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ switch (Kind) {
+ case Immediate:
+ Inst.addOperand(MCOperand::CreateImm(getImm()));
+ break;
+ case ContextImmediate:
+ Inst.addOperand(MCOperand::CreateImm(getImmS16Context()));
+ break;
+ default:
+ Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+ break;
+ }
+ }
+
+ void addU16ImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ switch (Kind) {
+ case Immediate:
+ Inst.addOperand(MCOperand::CreateImm(getImm()));
+ break;
+ case ContextImmediate:
+ Inst.addOperand(MCOperand::CreateImm(getImmU16Context()));
+ break;
+ default:
+ Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+ break;
+ }
+ }
+
void addBranchTargetOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
if (Kind == Immediate)
@@ -569,9 +657,9 @@ public:
// explicitly.
void *Mem = ::operator new(sizeof(PPCOperand) + Str.size());
std::unique_ptr<PPCOperand> Op(new (Mem) PPCOperand(Token));
- Op->Tok.Data = (const char *)(Op.get() + 1);
+ Op->Tok.Data = reinterpret_cast<const char *>(Op.get() + 1);
Op->Tok.Length = Str.size();
- std::memcpy((void *)Op->Tok.Data, Str.data(), Str.size());
+ std::memcpy(const_cast<char *>(Op->Tok.Data), Str.data(), Str.size());
Op->StartLoc = S;
Op->EndLoc = S;
Op->IsPPC64 = IsPPC64;
@@ -610,6 +698,16 @@ public:
}
static std::unique_ptr<PPCOperand>
+ CreateContextImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(ContextImmediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ Op->IsPPC64 = IsPPC64;
+ return Op;
+ }
+
+ static std::unique_ptr<PPCOperand>
CreateFromMCExpr(const MCExpr *Val, SMLoc S, SMLoc E, bool IsPPC64) {
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Val))
return CreateImm(CE->getValue(), S, E, IsPPC64);
@@ -618,6 +716,12 @@ public:
if (SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS)
return CreateTLSReg(SRE, S, E, IsPPC64);
+ if (const PPCMCExpr *TE = dyn_cast<PPCMCExpr>(Val)) {
+ int64_t Res;
+ if (TE->EvaluateAsConstant(Res))
+ return CreateContextImm(Res, S, E, IsPPC64);
+ }
+
return CreateExpr(Val, S, E, IsPPC64);
}
};
@@ -630,6 +734,7 @@ void PPCOperand::print(raw_ostream &OS) const {
OS << "'" << getToken() << "'";
break;
case Immediate:
+ case ContextImmediate:
OS << getImm();
break;
case Expression:
@@ -641,6 +746,29 @@ void PPCOperand::print(raw_ostream &OS) const {
}
}
+static void
+addNegOperand(MCInst &Inst, MCOperand &Op, MCContext &Ctx) {
+ if (Op.isImm()) {
+ Inst.addOperand(MCOperand::CreateImm(-Op.getImm()));
+ return;
+ }
+ const MCExpr *Expr = Op.getExpr();
+ if (const MCUnaryExpr *UnExpr = dyn_cast<MCUnaryExpr>(Expr)) {
+ if (UnExpr->getOpcode() == MCUnaryExpr::Minus) {
+ Inst.addOperand(MCOperand::CreateExpr(UnExpr->getSubExpr()));
+ return;
+ }
+ } else if (const MCBinaryExpr *BinExpr = dyn_cast<MCBinaryExpr>(Expr)) {
+ if (BinExpr->getOpcode() == MCBinaryExpr::Sub) {
+ const MCExpr *NE = MCBinaryExpr::CreateSub(BinExpr->getRHS(),
+ BinExpr->getLHS(), Ctx);
+ Inst.addOperand(MCOperand::CreateExpr(NE));
+ return;
+ }
+ }
+ Inst.addOperand(MCOperand::CreateExpr(MCUnaryExpr::CreateMinus(Expr, Ctx)));
+}
+
void PPCAsmParser::ProcessInstruction(MCInst &Inst,
const OperandVector &Operands) {
int Opcode = Inst.getOpcode();
@@ -656,41 +784,37 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
}
case PPC::SUBI: {
MCInst TmpInst;
- int64_t N = Inst.getOperand(2).getImm();
TmpInst.setOpcode(PPC::ADDI);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(-N));
+ addNegOperand(TmpInst, Inst.getOperand(2), getContext());
Inst = TmpInst;
break;
}
case PPC::SUBIS: {
MCInst TmpInst;
- int64_t N = Inst.getOperand(2).getImm();
TmpInst.setOpcode(PPC::ADDIS);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(-N));
+ addNegOperand(TmpInst, Inst.getOperand(2), getContext());
Inst = TmpInst;
break;
}
case PPC::SUBIC: {
MCInst TmpInst;
- int64_t N = Inst.getOperand(2).getImm();
TmpInst.setOpcode(PPC::ADDIC);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(-N));
+ addNegOperand(TmpInst, Inst.getOperand(2), getContext());
Inst = TmpInst;
break;
}
case PPC::SUBICo: {
MCInst TmpInst;
- int64_t N = Inst.getOperand(2).getImm();
TmpInst.setOpcode(PPC::ADDICo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(-N));
+ addNegOperand(TmpInst, Inst.getOperand(2), getContext());
Inst = TmpInst;
break;
}
@@ -924,12 +1048,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ MCStreamer &Out, uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
- default: break;
case Match_Success:
// Post-process instructions (typically extended mnemonics)
ProcessInstruction(Inst, Operands);
@@ -939,10 +1062,10 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_MissingFeature:
return Error(IDLoc, "instruction use requires an option to be enabled");
case Match_MnemonicFail:
- return Error(IDLoc, "unrecognized instruction mnemonic");
+ return Error(IDLoc, "unrecognized instruction mnemonic");
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0U) {
+ if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
@@ -998,6 +1121,7 @@ MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) {
bool PPCAsmParser::
ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
@@ -1179,6 +1303,7 @@ ParseExpression(const MCExpr *&EVal) {
/// for this to be done at a higher level.
bool PPCAsmParser::
ParseDarwinExpression(const MCExpr *&EVal) {
+ MCAsmParser &Parser = getParser();
PPCMCExpr::VariantKind Variant = PPCMCExpr::VK_PPC_None;
switch (getLexer().getKind()) {
default:
@@ -1221,6 +1346,7 @@ ParseDarwinExpression(const MCExpr *&EVal) {
/// This handles registers in the form 'NN', '%rNN' for ELF platforms and
/// rNN for MachO.
bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
const MCExpr *EVal;
@@ -1429,6 +1555,7 @@ bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
/// ParseDirectiveWord
/// ::= .word [ expression (, expression)* ]
bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
@@ -1453,6 +1580,7 @@ bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
/// ParseDirectiveTC
/// ::= .tc [ symbol (, expression)* ]
bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
+ MCAsmParser &Parser = getParser();
// Skip TC symbol, which is only used with XCOFF.
while (getLexer().isNot(AsmToken::EndOfStatement)
&& getLexer().isNot(AsmToken::Comma))
@@ -1473,6 +1601,7 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
/// ParseDirectiveMachine (ELF platforms)
/// ::= .machine [ cpu | "push" | "pop" ]
bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::Identifier) &&
getLexer().isNot(AsmToken::String)) {
Error(L, "unexpected token in directive");
@@ -1507,6 +1636,7 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
/// ParseDarwinDirectiveMachine (Mach-o platforms)
/// ::= .machine cpu-identifier
bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::Identifier) &&
getLexer().isNot(AsmToken::String)) {
Error(L, "unexpected token in directive");
@@ -1623,6 +1753,10 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
case MCK_1: ImmVal = 1; break;
case MCK_2: ImmVal = 2; break;
case MCK_3: ImmVal = 3; break;
+ case MCK_4: ImmVal = 4; break;
+ case MCK_5: ImmVal = 5; break;
+ case MCK_6: ImmVal = 6; break;
+ case MCK_7: ImmVal = 7; break;
default: return Match_InvalidOperand;
}
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index ea4de63a2448..47a9474ae161 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -2,9 +2,8 @@ set(LLVM_TARGET_DEFINITIONS PPC.td)
tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM PPCGenAsmMatcher.inc -gen-asm-matcher)
-tablegen(LLVM PPCGenCodeEmitter.inc -gen-emitter)
tablegen(LLVM PPCGenDisassemblerTables.inc -gen-disassembler)
-tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM PPCGenDAGISel.inc -gen-dag-isel)
@@ -16,7 +15,6 @@ add_public_tablegen_target(PowerPCCommonTableGen)
add_llvm_target(PowerPCCodeGen
PPCAsmPrinter.cpp
PPCBranchSelector.cpp
- PPCCodeEmitter.cpp
PPCCTRLoops.cpp
PPCHazardRecognizers.cpp
PPCInstrInfo.cpp
@@ -24,7 +22,6 @@ add_llvm_target(PowerPCCodeGen
PPCISelLowering.cpp
PPCFastISel.cpp
PPCFrameLowering.cpp
- PPCJITInfo.cpp
PPCMCInstLower.cpp
PPCMachineFunctionInfo.cpp
PPCRegisterInfo.cpp
diff --git a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
index b0978c227ae9..ea3e7eaf839d 100644
--- a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
+++ b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = PowerPCDisassembler
parent = PowerPC
-required_libraries = MC PowerPCInfo Support
+required_libraries = MCDisassembler PowerPCInfo Support
add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index a2305a9efc71..5251b60f3480 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -12,7 +12,6 @@
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -28,13 +27,10 @@ public:
: MCDisassembler(STI, Ctx) {}
virtual ~PPCDisassembler() {}
- // Override MCDisassembler.
- virtual DecodeStatus getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const override;
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
};
} // end anonymous namespace
@@ -325,23 +321,19 @@ static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm,
#include "PPCGenDisassemblerTables.inc"
DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &os,
- raw_ostream &cs) const {
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address, raw_ostream &OS,
+ raw_ostream &CS) const {
// Get the four bytes of the instruction.
- uint8_t Bytes[4];
Size = 4;
- if (Region.readBytes(Address, Size, Bytes) == -1) {
+ if (Bytes.size() < 4) {
Size = 0;
return MCDisassembler::Fail;
}
// The instruction is big-endian encoded.
- uint32_t Inst = (Bytes[0] << 24) |
- (Bytes[1] << 16) |
- (Bytes[2] << 8) |
- (Bytes[3] << 0);
+ uint32_t Inst =
+ (Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 8) | (Bytes[3] << 0);
return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI);
}
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 628819de407e..670c40a2a3bd 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -208,6 +208,13 @@ void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
O << (unsigned int)Value;
}
+void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 15 && "Invalid u4imm argument!");
+ O << (unsigned int)Value;
+}
+
void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
int Value = MI->getOperand(OpNo).getImm();
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 211a62813e7a..b21aa22daa1c 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPCINSTPRINTER_H
-#define PPCINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H
+#define LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
@@ -44,6 +44,7 @@ public:
raw_ostream &O, const char *Modifier = nullptr);
void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt
index 9d173d64b944..fd5fa560912f 100644
--- a/lib/Target/PowerPC/LLVMBuild.txt
+++ b/lib/Target/PowerPC/LLVMBuild.txt
@@ -31,5 +31,5 @@ has_jit = 1
type = Library
name = PowerPCCodeGen
parent = PowerPC
-required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo SelectionDAG Support Target TransformUtils
+required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo Scalar SelectionDAG Support Target TransformUtils
add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index c54d5e75bdfd..bea88a2ff2c6 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -9,8 +9,8 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
-#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixupKindInfo.h"
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index ca813176bd5a..b817394e52c6 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -24,11 +24,7 @@ namespace {
public:
PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI);
- virtual ~PPCELFObjectWriter();
protected:
- virtual unsigned getRelocTypeInner(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const;
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override;
@@ -42,9 +38,6 @@ PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC,
/*HasRelocationAddend*/ true) {}
-PPCELFObjectWriter::~PPCELFObjectWriter() {
-}
-
static MCSymbolRefExpr::VariantKind getAccessVariant(const MCValue &Target,
const MCFixup &Fixup) {
const MCExpr *Expr = Fixup.getValue();
@@ -73,10 +66,9 @@ static MCSymbolRefExpr::VariantKind getAccessVariant(const MCValue &Target,
llvm_unreachable("unknown PPCMCExpr kind");
}
-unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const
-{
+unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
MCSymbolRefExpr::VariantKind Modifier = getAccessVariant(Target, Fixup);
// determine the type of the relocation
@@ -95,6 +87,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_PLT:
Type = ELF::R_PPC_PLTREL24;
break;
+ case MCSymbolRefExpr::VK_PPC_LOCAL:
+ Type = ELF::R_PPC_LOCAL24PC;
+ break;
}
break;
case PPC::fixup_ppc_brcond14:
@@ -400,12 +395,6 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
return Type;
}
-unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const {
- return getRelocTypeInner(Target, Fixup, IsPCRel);
-}
-
bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
unsigned Type) const {
switch (Type) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 68de8c1f27a5..ae43e59d3cb1 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_PPC_PPCFIXUPKINDS_H
-#define LLVM_PPC_PPCFIXUPKINDS_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCFIXUPKINDS_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCFIXUPKINDS_H
#include "llvm/MC/MCFixup.h"
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index b95a2ac13e04..2b4f2d81db85 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -42,9 +42,9 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
UseIntegratedAssembler = true;
}
-void PPCLinuxMCAsmInfo::anchor() { }
+void PPCELFMCAsmInfo::anchor() { }
-PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) {
+PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
if (is64Bit) {
PointerSize = CalleeSaveStackSlotSize = 8;
}
@@ -64,7 +64,6 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) {
DollarIsPC = true;
// Set up DWARF directives
- HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
MinInstAlignment = 4;
// Exceptions handling
@@ -73,10 +72,8 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) {
ZeroDirective = "\t.space\t";
Data64bitsDirective = is64Bit ? "\t.quad\t" : nullptr;
AssemblerDialect = 1; // New-Style mnemonics.
+ LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
- if (T.getOS() == llvm::Triple::FreeBSD ||
- (T.getOS() == llvm::Triple::NetBSD && !is64Bit) ||
- (T.getOS() == llvm::Triple::OpenBSD && !is64Bit))
- UseIntegratedAssembler = true;
+ UseIntegratedAssembler = true;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 754330b2c60f..86ad3859b72c 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPCTARGETASMINFO_H
-#define PPCTARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCASMINFO_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCASMINFO_H
#include "llvm/MC/MCAsmInfoDarwin.h"
#include "llvm/MC/MCAsmInfoELF.h"
@@ -21,15 +21,16 @@ namespace llvm {
class Triple;
class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
- void anchor() override;
+ virtual void anchor();
+
public:
explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&);
};
- class PPCLinuxMCAsmInfo : public MCAsmInfoELF {
+ class PPCELFMCAsmInfo : public MCAsmInfoELF {
void anchor() override;
public:
- explicit PPCLinuxMCAsmInfo(bool is64Bit, const Triple&);
+ explicit PPCELFMCAsmInfo(bool is64Bit, const Triple&);
};
} // namespace llvm
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 435a93f78c1d..786b7fe35d66 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -66,6 +66,15 @@ public:
unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
@@ -260,6 +269,54 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
}
+unsigned PPCMCCodeEmitter::getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI)
+ const {
+ // Encode (imm, reg) as a spe8dis, which has the low 5-bits of (imm / 8)
+ // as the displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert(MO.isImm());
+ uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 3;
+ return reverseBits(Imm | RegBits) >> 22;
+}
+
+
+unsigned PPCMCCodeEmitter::getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI)
+ const {
+ // Encode (imm, reg) as a spe4dis, which has the low 5-bits of (imm / 4)
+ // as the displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert(MO.isImm());
+ uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 2;
+ return reverseBits(Imm | RegBits) >> 22;
+}
+
+
+unsigned PPCMCCodeEmitter::getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI)
+ const {
+ // Encode (imm, reg) as a spe2dis, which has the low 5-bits of (imm / 2)
+ // as the displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert(MO.isImm());
+ uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 1;
+ return reverseBits(Imm | RegBits) >> 22;
+}
+
+
unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index 3ac0aca6b78c..7204befe15ee 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -7,6 +7,7 @@
//
//===----------------------------------------------------------------------===//
+#include "PPCFixupKinds.h"
#include "PPCMCExpr.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
@@ -52,40 +53,56 @@ void PPCMCExpr::PrintImpl(raw_ostream &OS) const {
}
bool
+PPCMCExpr::EvaluateAsConstant(int64_t &Res) const {
+ MCValue Value;
+
+ if (!getSubExpr()->EvaluateAsRelocatable(Value, nullptr, nullptr))
+ return false;
+
+ if (!Value.isAbsolute())
+ return false;
+
+ Res = EvaluateAsInt64(Value.getConstant());
+ return true;
+}
+
+int64_t
+PPCMCExpr::EvaluateAsInt64(int64_t Value) const {
+ switch (Kind) {
+ case VK_PPC_LO:
+ return Value & 0xffff;
+ case VK_PPC_HI:
+ return (Value >> 16) & 0xffff;
+ case VK_PPC_HA:
+ return ((Value + 0x8000) >> 16) & 0xffff;
+ case VK_PPC_HIGHER:
+ return (Value >> 32) & 0xffff;
+ case VK_PPC_HIGHERA:
+ return ((Value + 0x8000) >> 32) & 0xffff;
+ case VK_PPC_HIGHEST:
+ return (Value >> 48) & 0xffff;
+ case VK_PPC_HIGHESTA:
+ return ((Value + 0x8000) >> 48) & 0xffff;
+ case VK_PPC_None:
+ break;
+ }
+ llvm_unreachable("Invalid kind!");
+}
+
+bool
PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const {
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const {
MCValue Value;
- if (!getSubExpr()->EvaluateAsRelocatable(Value, Layout))
+ if (!getSubExpr()->EvaluateAsRelocatable(Value, Layout, Fixup))
return false;
if (Value.isAbsolute()) {
- int64_t Result = Value.getConstant();
- switch (Kind) {
- default:
- llvm_unreachable("Invalid kind!");
- case VK_PPC_LO:
- Result = Result & 0xffff;
- break;
- case VK_PPC_HI:
- Result = (Result >> 16) & 0xffff;
- break;
- case VK_PPC_HA:
- Result = ((Result + 0x8000) >> 16) & 0xffff;
- break;
- case VK_PPC_HIGHER:
- Result = (Result >> 32) & 0xffff;
- break;
- case VK_PPC_HIGHERA:
- Result = ((Result + 0x8000) >> 32) & 0xffff;
- break;
- case VK_PPC_HIGHEST:
- Result = (Result >> 48) & 0xffff;
- break;
- case VK_PPC_HIGHESTA:
- Result = ((Result + 0x8000) >> 48) & 0xffff;
- break;
- }
+ int64_t Result = EvaluateAsInt64(Value.getConstant());
+ if ((Fixup == nullptr || (unsigned)Fixup->getKind() != PPC::fixup_ppc_half16) &&
+ (Result >= 0x8000))
+ return false;
Res = MCValue::get(Result);
} else {
if (!Layout)
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index bca408507e72..f0a6bb9b5f4b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPCMCEXPR_H
-#define PPCMCEXPR_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCEXPR_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCEXPR_H
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCExpr.h"
@@ -34,6 +34,8 @@ private:
const MCExpr *Expr;
bool IsDarwin;
+ int64_t EvaluateAsInt64(int64_t Value) const;
+
explicit PPCMCExpr(VariantKind _Kind, const MCExpr *_Expr,
bool _IsDarwin)
: Kind(_Kind), Expr(_Expr), IsDarwin(_IsDarwin) {}
@@ -78,7 +80,8 @@ public:
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const override;
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
@@ -87,6 +90,8 @@ public:
// There are no TLS PPCMCExprs at the moment.
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
+ bool EvaluateAsConstant(int64_t &Res) const;
+
static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 1bb5173e1937..f2da38961684 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -79,7 +79,7 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
if (TheTriple.isOSDarwin())
MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);
else
- MAI = new PPCLinuxMCAsmInfo(isPPC64, TheTriple);
+ MAI = new PPCELFMCAsmInfo(isPPC64, TheTriple);
// Initial state of the frame pointer is R1.
unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1;
@@ -129,10 +129,10 @@ public:
void emitMachine(StringRef CPU) override {
OS << "\t.machine " << CPU << '\n';
}
- virtual void emitAbiVersion(int AbiVersion) override {
+ void emitAbiVersion(int AbiVersion) override {
OS << "\t.abiversion " << AbiVersion << '\n';
}
- virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) {
+ void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override {
OS << "\t.localentry\t" << *S << ", " << *LocalOffset << '\n';
}
};
@@ -143,7 +143,7 @@ public:
MCELFStreamer &getStreamer() {
return static_cast<MCELFStreamer &>(Streamer);
}
- virtual void emitTCEntry(const MCSymbol &S) override {
+ void emitTCEntry(const MCSymbol &S) override {
// Creates a R_PPC64_TOC relocation
Streamer.EmitSymbolValue(&S, 8);
}
@@ -151,14 +151,14 @@ public:
// FIXME: Is there anything to do in here or does this directive only
// limit the parser?
}
- virtual void emitAbiVersion(int AbiVersion) override {
+ void emitAbiVersion(int AbiVersion) override {
MCAssembler &MCA = getStreamer().getAssembler();
unsigned Flags = MCA.getELFHeaderEFlags();
Flags &= ~ELF::EF_PPC64_ABI;
Flags |= (AbiVersion & ELF::EF_PPC64_ABI);
MCA.setELFHeaderEFlags(Flags);
}
- virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) {
+ void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override {
MCAssembler &MCA = getStreamer().getAssembler();
MCSymbolData &Data = getStreamer().getOrCreateSymbolData(S);
@@ -213,10 +213,10 @@ public:
// FIXME: We should update the CPUType, CPUSubType in the Object file if
// the new values are different from the defaults.
}
- virtual void emitAbiVersion(int AbiVersion) override {
+ void emitAbiVersion(int AbiVersion) override {
llvm_unreachable("Unknown pseudo-op: .abiversion");
}
- virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) {
+ void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override {
llvm_unreachable("Unknown pseudo-op: .localentry");
}
};
@@ -225,19 +225,15 @@ public:
// This is duplicated code. Refactor this.
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &OS,
- MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI,
- bool RelaxAll,
- bool NoExecStack) {
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ const MCSubtargetInfo &STI, bool RelaxAll) {
if (Triple(TT).isOSDarwin()) {
MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
new PPCTargetMachOStreamer(*S);
return S;
}
- MCStreamer *S =
- createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ MCStreamer *S = createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
new PPCTargetELFStreamer(*S);
return S;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 474395b93637..68f7f7aac82d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPCMCTARGETDESC_H
-#define PPCMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index cff27baeb5ee..f7259b9a098c 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -41,7 +41,7 @@ public:
: MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
/*UseAggressiveSymbolFolding=*/Is64Bit) {}
- void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
+ void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
const MCAsmLayout &Layout, const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) override {
@@ -80,7 +80,7 @@ static unsigned getFixupKindLog2Size(unsigned Kind) {
}
/// Translates generic PPC fixup kind to Mach-O/PPC relocation type enum.
-/// Outline based on PPCELFObjectWriter::getRelocTypeInner().
+/// Outline based on PPCELFObjectWriter::GetRelocType().
static unsigned getRelocType(const MCValue &Target,
const MCFixupKind FixupKind, // from
// Fixup.getKind()
@@ -282,7 +282,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
MachO::any_relocation_info MRE;
makeScatteredRelocationInfo(MRE, other_half, MachO::GENERIC_RELOC_PAIR,
Log2Size, IsPCRel, Value2);
- Writer->addRelocation(Fragment->getParent(), MRE);
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
} else {
// If the offset is more than 24-bits, it won't fit in a scattered
// relocation offset field, so we fall back to using a non-scattered
@@ -296,7 +296,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
}
MachO::any_relocation_info MRE;
makeScatteredRelocationInfo(MRE, FixupOffset, Type, Log2Size, IsPCRel, Value);
- Writer->addRelocation(Fragment->getParent(), MRE);
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
return true;
}
@@ -331,9 +331,9 @@ void PPCMachObjectWriter::RecordPPCRelocation(
// See <reloc.h>.
const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);
unsigned Index = 0;
- unsigned IsExtern = 0;
unsigned Type = RelocType;
+ const MCSymbolData *RelSymbol = nullptr;
if (Target.isAbsolute()) { // constant
// SymbolNum of 0 indicates the absolute section.
//
@@ -355,12 +355,11 @@ void PPCMachObjectWriter::RecordPPCRelocation(
// Check whether we need an external or internal relocation.
if (Writer->doesSymbolRequireExternRelocation(SD)) {
- IsExtern = 1;
- Index = SD->getIndex();
+ RelSymbol = SD;
// For external relocations, make sure to offset the fixup value to
// compensate for the addend of the symbol address, if it was
// undefined. This occurs with weak definitions, for example.
- if (!SD->Symbol->isUndefined())
+ if (!SD->getSymbol().isUndefined())
FixedValue -= Layout.getSymbolOffset(SD);
} else {
// The index is the section ordinal (1-based).
@@ -375,9 +374,8 @@ void PPCMachObjectWriter::RecordPPCRelocation(
// struct relocation_info (8 bytes)
MachO::any_relocation_info MRE;
- makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, IsExtern,
- Type);
- Writer->addRelocation(Fragment->getParent(), MRE);
+ makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, false, Type);
+ Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
}
MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index 10e328a8116e..6075631a541f 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
-#define LLVM_TARGET_POWERPC_PPCPREDICATES_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCPREDICATES_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCPREDICATES_H
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile
index c96674809b01..cf516f4e5ec9 100644
--- a/lib/Target/PowerPC/Makefile
+++ b/lib/Target/PowerPC/Makefile
@@ -13,7 +13,7 @@ TARGET = PPC
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = PPCGenRegisterInfo.inc PPCGenAsmMatcher.inc \
- PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \
+ PPCGenAsmWriter.inc \
PPCGenInstrInfo.inc PPCGenDAGISel.inc \
PPCGenSubtargetInfo.inc PPCGenCallingConv.inc \
PPCGenMCCodeEmitter.inc PPCGenFastISel.inc \
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 38840e624a53..8fb33df3fe9a 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_POWERPC_H
-#define LLVM_TARGET_POWERPC_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPC_H
+#define LLVM_LIB_TARGET_POWERPC_PPC_H
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include <string>
@@ -26,7 +26,6 @@ namespace llvm {
class PassRegistry;
class FunctionPass;
class ImmutablePass;
- class JITCodeEmitter;
class MachineInstr;
class AsmPrinter;
class MCInst;
@@ -41,8 +40,6 @@ namespace llvm {
FunctionPass *createPPCVSXFMAMutatePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
- FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
- JITCodeEmitter &MCE);
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index a9842b287cbb..a7fd62c07303 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -56,6 +56,8 @@ def FeatureCRBits : SubtargetFeature<"crbits", "UseCRBits", "true",
"Use condition-register bits individually">;
def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
"Enable Altivec instructions">;
+def FeatureSPE : SubtargetFeature<"spe","HasSPE", "true",
+ "Enable SPE instructions">;
def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
"Enable the MFOCRF instruction">;
def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
@@ -86,13 +88,27 @@ def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
"Enable the popcnt[dw] instructions">;
def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true",
"Enable the ldbrx instruction">;
+def FeatureCMPB : SubtargetFeature<"cmpb", "HasCMPB", "true",
+ "Enable the cmpb instruction">;
def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
"Enable Book E instructions">;
+def FeatureMSYNC : SubtargetFeature<"msync", "HasOnlyMSYNC", "true",
+ "Has only the msync instruction instead of sync",
+ [FeatureBookE]>;
+def FeatureE500 : SubtargetFeature<"e500", "IsE500", "true",
+ "Enable E500/E500mc instructions">;
+def FeaturePPC4xx : SubtargetFeature<"ppc4xx", "IsPPC4xx", "true",
+ "Enable PPC 4xx instructions">;
+def FeaturePPC6xx : SubtargetFeature<"ppc6xx", "IsPPC6xx", "true",
+ "Enable PPC 6xx instructions">;
def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
"Enable QPX instructions">;
def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
"Enable VSX instructions",
[FeatureAltivec]>;
+def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
+ "Enable POWER8 vector instructions",
+ [FeatureVSX, FeatureAltivec]>;
def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true",
"Treat mftb as deprecated">;
@@ -102,10 +118,18 @@ def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
// Note: Future features to add when support is extended to more
// recent ISA levels:
//
-// CMPB p6, p6x, p7 cmpb
// DFP p6, p6x, p7 decimal floating-point instructions
// POPCNTB p5 through p7 popcntb and related instructions
-// VSX p7 vector-scalar instruction set
+
+//===----------------------------------------------------------------------===//
+// ABI Selection //
+//===----------------------------------------------------------------------===//
+
+def FeatureELFv1 : SubtargetFeature<"elfv1", "TargetABI", "PPC_ABI_ELFv1",
+ "Use the ELFv1 ABI">;
+
+def FeatureELFv2 : SubtargetFeature<"elfv2", "TargetABI", "PPC_ABI_ELFv2",
+ "Use the ELFv2 ABI">;
//===----------------------------------------------------------------------===//
// Classes used for relation maps.
@@ -178,10 +202,12 @@ include "PPCInstrInfo.td"
def : Processor<"generic", G3Itineraries, [Directive32]>;
def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureBookE, DeprecatedMFTB]>;
+ FeatureBookE, FeatureMSYNC,
+ DeprecatedMFTB]>;
def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureBookE, DeprecatedMFTB]>;
+ FeatureBookE, FeatureMSYNC,
+ DeprecatedMFTB]>;
def : Processor<"601", G3Itineraries, [Directive601]>;
def : Processor<"602", G3Itineraries, [Directive602]>;
def : Processor<"603", G3Itineraries, [Directive603,
@@ -233,7 +259,7 @@ def : ProcessorModel<"a2", PPCA2Model,
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+ FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit
/*, Feature64BitRegs */, DeprecatedMFTB]>;
def : ProcessorModel<"a2q", PPCA2Model,
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
@@ -241,7 +267,7 @@ def : ProcessorModel<"a2q", PPCA2Model,
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+ FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit
/*, Feature64BitRegs */, FeatureQPX, DeprecatedMFTB]>;
def : ProcessorModel<"pwr3", G5Model,
[DirectivePwr3, FeatureAltivec,
@@ -267,32 +293,32 @@ def : ProcessorModel<"pwr6", G5Model,
[DirectivePwr6, FeatureAltivec,
FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
- FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
FeatureFPRND, Feature64Bit /*, Feature64BitRegs */,
DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr6x", G5Model,
[DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
- FeatureSTFIWX, FeatureLFIWAX,
+ FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
FeatureFPRND, Feature64Bit,
DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr7", P7Model,
- [DirectivePwr7, FeatureAltivec,
+ [DirectivePwr7, FeatureAltivec, FeatureVSX,
FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeaturePOPCNTD, FeatureLDBRX,
+ FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
Feature64Bit /*, Feature64BitRegs */,
DeprecatedMFTB, DeprecatedDST]>;
-def : ProcessorModel<"pwr8", P7Model /* FIXME: Update to P8Model when available */,
- [DirectivePwr8, FeatureAltivec,
+def : ProcessorModel<"pwr8", P8Model,
+ [DirectivePwr8, FeatureAltivec, FeatureVSX, FeatureP8Vector,
FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeaturePOPCNTD, FeatureLDBRX,
+ FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
Feature64Bit /*, Feature64BitRegs */,
DeprecatedMFTB, DeprecatedDST]>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 8357665d30db..49fd2f904a24 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -18,9 +18,9 @@
#include "PPC.h"
#include "InstPrinter/PPCInstPrinter.h"
-#include "PPCMachineFunctionInfo.h"
#include "MCTargetDesc/PPCMCExpr.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "PPCTargetStreamer.h"
@@ -34,6 +34,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
@@ -69,10 +70,11 @@ namespace {
MapVector<MCSymbol*, MCSymbol*> TOC;
const PPCSubtarget &Subtarget;
uint64_t TOCLabelID;
+ StackMaps SM;
public:
explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: AsmPrinter(TM, Streamer),
- Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {}
+ Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0), SM(*this) {}
const char *getPassName() const override {
return "PowerPC Assembly Printer";
@@ -90,6 +92,13 @@ namespace {
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O) override;
+
+ void EmitEndOfAsmFile(Module &M) override;
+
+ void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI);
+ void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI);
};
/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
@@ -147,7 +156,7 @@ static const char *stripRegisterPrefix(const char *RegName) {
void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
const MachineOperand &MO = MI->getOperand(OpNo);
switch (MO.getType()) {
@@ -275,6 +284,18 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
printOperand(MI, OpNo, O);
return false;
}
+ case 'U': // Print 'u' for update form.
+ case 'X': // Print 'x' for indexed form.
+ {
+ // FIXME: Currently for PowerPC memory operands are always loaded
+ // into a register, so we never get an update or indexed form.
+ // This is bad even for offset forms, since even if we know we
+ // have a value in -16(r1), we will generate a load into r<n>
+ // and then load from 0(r<n>). Until that issue is fixed,
+ // tolerate 'U' and 'X' but don't output anything.
+ assert(MI->getOperand(OpNo).isReg());
+ return false;
+ }
}
}
@@ -290,7 +311,7 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
/// exists for it. If not, create one. Then return a symbol that references
/// the TOC entry.
MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
MCSymbol *&TOCEntry = TOC[Sym];
// To avoid name clash check if the name already exists.
@@ -304,6 +325,80 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
return TOCEntry;
}
+void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) {
+ SM.serializeToStackMapSection();
+}
+
+void PPCAsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI) {
+ unsigned NumNOPBytes = MI.getOperand(1).getImm();
+
+ SM.recordStackMap(MI);
+ assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
+
+ // Scan ahead to trim the shadow.
+ const MachineBasicBlock &MBB = *MI.getParent();
+ MachineBasicBlock::const_iterator MII(MI);
+ ++MII;
+ while (NumNOPBytes > 0) {
+ if (MII == MBB.end() || MII->isCall() ||
+ MII->getOpcode() == PPC::DBG_VALUE ||
+ MII->getOpcode() == TargetOpcode::PATCHPOINT ||
+ MII->getOpcode() == TargetOpcode::STACKMAP)
+ break;
+ ++MII;
+ NumNOPBytes -= 4;
+ }
+
+ // Emit nops.
+ for (unsigned i = 0; i < NumNOPBytes; i += 4)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
+}
+
+// Lower a patchpoint of the form:
+// [<def>], <id>, <numBytes>, <target>, <numArgs>
+void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI) {
+ SM.recordPatchPoint(MI);
+ PatchPointOpers Opers(&MI);
+
+ int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
+ unsigned EncodedBytes = 0;
+ if (CallTarget) {
+ assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
+ "High 16 bits of call target should be zero.");
+ unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
+ EncodedBytes = 6*4;
+ // Materialize the jump address:
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI8)
+ .addReg(ScratchReg)
+ .addImm((CallTarget >> 32) & 0xFFFF));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::RLDIC)
+ .addReg(ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(32).addImm(16));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORIS8)
+ .addReg(ScratchReg)
+ .addReg(ScratchReg)
+ .addImm((CallTarget >> 16) & 0xFFFF));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORI8)
+ .addReg(ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(CallTarget & 0xFFFF));
+
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR8).addReg(ScratchReg));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTRL8));
+ }
+
+ // Emit padding.
+ unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
+ assert(NumBytes >= EncodedBytes &&
+ "Patchpoint can't request size less than the length of a call.");
+ assert((NumBytes - EncodedBytes) % 4 == 0 &&
+ "Invalid number of NOP bytes requested!");
+ for (unsigned i = EncodedBytes; i < NumBytes; i += 4)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
+}
/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
/// the current output stream.
@@ -311,12 +406,40 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwin = Triple(TM.getTargetTriple()).isOSDarwin();
+ const Module *M = MF->getFunction()->getParent();
+ PICLevel::Level PL = M->getPICLevel();
// Lower multi-instruction pseudo operations.
switch (MI->getOpcode()) {
default: break;
case TargetOpcode::DBG_VALUE:
llvm_unreachable("Should be handled target independently");
+ case TargetOpcode::STACKMAP:
+ return LowerSTACKMAP(OutStreamer, SM, *MI);
+ case TargetOpcode::PATCHPOINT:
+ return LowerPATCHPOINT(OutStreamer, SM, *MI);
+
+ case PPC::MoveGOTtoLR: {
+ // Transform %LR = MoveGOTtoLR
+ // Into this: bl _GLOBAL_OFFSET_TABLE_@local-4
+ // _GLOBAL_OFFSET_TABLE_@local-4 (instruction preceding
+ // _GLOBAL_OFFSET_TABLE_) has exactly one instruction:
+ // blrl
+ // This will return the pointer to _GLOBAL_OFFSET_TABLE_@local
+ MCSymbol *GOTSymbol =
+ OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+ const MCExpr *OffsExpr =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol,
+ MCSymbolRefExpr::VK_PPC_LOCAL,
+ OutContext),
+ MCConstantExpr::Create(4, OutContext),
+ OutContext);
+
+ // Emit the 'bl'.
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL).addExpr(OffsExpr));
+ return;
+ }
case PPC::MovePCtoLR:
case PPC::MovePCtoLR8: {
// Transform %LR = MovePCtoLR
@@ -335,10 +458,14 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitLabel(PICBase);
return;
}
- case PPC::GetGBRO: {
+ case PPC::UpdateGBR: {
+ // Transform %Rd = UpdateGBR(%Rt, %Ri)
+ // Into: lwz %Rt, .L0$poff - .L0$pb(%Ri)
+ // add %Rd, %Rt, %Ri
// Get the offset from the GOT Base Register to the GOT
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
- MCSymbol *PICOffset = MF->getInfo<PPCFunctionInfo>()->getPICOffsetSymbol();
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ MCSymbol *PICOffset =
+ MF->getInfo<PPCFunctionInfo>()->getPICOffsetSymbol();
TmpInst.setOpcode(PPC::LWZ);
const MCExpr *Exp =
MCSymbolRefExpr::Create(PICOffset, MCSymbolRefExpr::VK_None, OutContext);
@@ -346,26 +473,26 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::Create(MF->getPICBaseSymbol(),
MCSymbolRefExpr::VK_None,
OutContext);
- const MCOperand MO = TmpInst.getOperand(1);
- TmpInst.getOperand(1) = MCOperand::CreateExpr(MCBinaryExpr::CreateSub(Exp,
- PB,
- OutContext));
- TmpInst.addOperand(MO);
+ const MCOperand TR = TmpInst.getOperand(1);
+ const MCOperand PICR = TmpInst.getOperand(0);
+
+ // Step 1: lwz %Rt, .L$poff - .L$pb(%Ri)
+ TmpInst.getOperand(1) =
+ MCOperand::CreateExpr(MCBinaryExpr::CreateSub(Exp, PB, OutContext));
+ TmpInst.getOperand(0) = TR;
+ TmpInst.getOperand(2) = PICR;
EmitToStreamer(OutStreamer, TmpInst);
- return;
- }
- case PPC::UpdateGBR: {
- // Update the GOT Base Register to point to the GOT. It may be possible to
- // merge this with the PPC::GetGBRO, doing it all in one step.
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
TmpInst.setOpcode(PPC::ADD4);
- TmpInst.addOperand(TmpInst.getOperand(0));
+ TmpInst.getOperand(0) = PICR;
+ TmpInst.getOperand(1) = TR;
+ TmpInst.getOperand(2) = PICR;
EmitToStreamer(OutStreamer, TmpInst);
return;
}
case PPC::LWZtoc: {
- // Transform %X3 = LWZtoc <ga:@min1>, %X2
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ // Transform %R3 = LWZtoc <ga:@min1>, %R2
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
// Change the opcode to LWZ, and the global address operand to be a
// reference to the GOT entry we will synthesize later.
@@ -384,16 +511,23 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
else if (MO.isBlockAddress())
MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
- MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
-
- const MCExpr *Exp =
- MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_None,
- OutContext);
- const MCExpr *PB =
- MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".L.TOC.")),
- OutContext);
- Exp = MCBinaryExpr::CreateSub(Exp, PB, OutContext);
- TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ if (PL == PICLevel::Small) {
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_GOT,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ } else {
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_None,
+ OutContext);
+ const MCExpr *PB =
+ MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".LTOC")),
+ OutContext);
+ Exp = MCBinaryExpr::CreateSub(Exp, PB, OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ }
EmitToStreamer(OutStreamer, TmpInst);
return;
}
@@ -402,7 +536,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::LDtocBA:
case PPC::LDtoc: {
// Transform %X3 = LDtoc <ga:@min1>, %X2
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
// Change the opcode to LD, and the global address operand to be a
// reference to the TOC entry we will synthesize later.
@@ -433,7 +567,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::ADDIStocHA: {
// Transform %Xd = ADDIStocHA %X2, <ga:@sym>
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
// Change the opcode to ADDIS8. If the global address is external, has
// common linkage, is a non-local function address, or is a jump table
@@ -479,7 +613,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case PPC::LDtocL: {
// Transform %Xd = LDtocL <ga:@sym>, %Xs
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
// Change the opcode to LD. If the global address is external, has
// common linkage, or is a jump table address, then reference the
@@ -521,7 +655,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case PPC::ADDItocL: {
// Transform %Xd = ADDItocL %Xs, <ga:@sym>
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
// Change the opcode to ADDI8. If the global address is external, then
// generate a TOC entry and reference that. Otherwise reference the
@@ -572,7 +706,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::LDgotTprelL:
case PPC::LDgotTprelL32: {
// Transform %Xd = LDgotTprelL <ga:@sym>, %Xs
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
// Change the opcode to LD.
TmpInst.setOpcode(isPPC64 ? PPC::LD : PPC::LWZ);
@@ -796,7 +930,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
EmitToStreamer(OutStreamer, TmpInst);
}
@@ -812,16 +946,14 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
if (Subtarget.isPPC64() || TM.getRelocationModel() != Reloc::PIC_)
return AsmPrinter::EmitStartOfAsmFile(M);
- // FIXME: The use of .got2 assumes large GOT model (-fPIC), which is not
- // optimal for some cases. We should consider supporting small model (-fpic)
- // as well in the future.
- assert(TM.getCodeModel() != CodeModel::Small &&
- "Small code model PIC is currently unsupported.");
+ if (M.getPICLevel() == PICLevel::Small)
+ return AsmPrinter::EmitStartOfAsmFile(M);
+
OutStreamer.SwitchSection(OutContext.getELFSection(".got2",
ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
SectionKind::getReadOnly()));
- MCSymbol *TOCSym = OutContext.GetOrCreateSymbol(Twine(".L.TOC."));
+ MCSymbol *TOCSym = OutContext.GetOrCreateSymbol(Twine(".LTOC"));
MCSymbol *CurrentPos = OutContext.CreateTempSymbol();
OutStreamer.EmitLabel(CurrentPos);
@@ -840,7 +972,9 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
// linux/ppc32 - Normal entry label.
- if (!Subtarget.isPPC64() && TM.getRelocationModel() != Reloc::PIC_)
+ if (!Subtarget.isPPC64() &&
+ (TM.getRelocationModel() != Reloc::PIC_ ||
+ MF->getFunction()->getParent()->getPICLevel() == PICLevel::Small))
return AsmPrinter::EmitFunctionEntryLabel();
if (!Subtarget.isPPC64()) {
@@ -852,7 +986,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
const MCExpr *OffsExpr =
MCBinaryExpr::CreateSub(
- MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".L.TOC.")),
+ MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".LTOC")),
OutContext),
MCSymbolRefExpr::Create(PICBase, OutContext),
OutContext);
@@ -898,7 +1032,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
bool isPPC64 = TD->getPointerSizeInBits() == 64;
@@ -1104,7 +1238,8 @@ static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
void PPCDarwinAsmPrinter::
EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
- bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+ bool isPPC64 =
+ TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits() == 64;
bool isDarwin = Subtarget.isDarwin();
const TargetLoweringObjectFileMachO &TLOFMacho =
@@ -1240,7 +1375,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
- bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+ bool isPPC64 =
+ TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits() == 64;
// Darwin/PPC always uses mach-o.
const TargetLoweringObjectFileMachO &TLOFMacho =
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index ee906712ee02..940d55ac1f36 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "ppc-branch-select"
@@ -64,17 +65,42 @@ FunctionPass *llvm::createPPCBranchSelectionPass() {
bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
const PPCInstrInfo *TII =
- static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo());
+ static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
// Give the blocks of the function a dense, in-order, numbering.
Fn.RenumberBlocks();
BlockSizes.resize(Fn.getNumBlockIDs());
+ auto GetAlignmentAdjustment =
+ [TII](MachineBasicBlock &MBB, unsigned Offset) -> unsigned {
+ unsigned Align = MBB.getAlignment();
+ if (!Align)
+ return 0;
+
+ unsigned AlignAmt = 1 << Align;
+ unsigned ParentAlign = MBB.getParent()->getAlignment();
+
+ if (Align <= ParentAlign)
+ return OffsetToAlignment(Offset, AlignAmt);
+
+ // The alignment of this MBB is larger than the function's alignment, so we
+ // can't tell whether or not it will insert nops. Assume that it will.
+ return AlignAmt + OffsetToAlignment(Offset, AlignAmt);
+ };
+
// Measure each MBB and compute a size for the entire function.
unsigned FuncSize = 0;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
MachineBasicBlock *MBB = MFI;
+ // The end of the previous block may have extra nops if this block has an
+ // alignment requirement.
+ if (MBB->getNumber() > 0) {
+ unsigned AlignExtra = GetAlignmentAdjustment(*MBB, FuncSize);
+ BlockSizes[MBB->getNumber()-1] += AlignExtra;
+ FuncSize += AlignExtra;
+ }
+
unsigned BlockSize = 0;
for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
MBBI != EE; ++MBBI)
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index ec1e34d91f93..167f3355eb27 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -194,6 +194,21 @@ static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
return false;
}
+// Determining the address of a TLS variable results in a function call in
+// certain TLS models.
+static bool memAddrUsesCTR(const PPCTargetMachine *TM,
+ const llvm::Value *MemAddr) {
+ const auto *GV = dyn_cast<GlobalValue>(MemAddr);
+ if (!GV)
+ return false;
+ if (!GV->isThreadLocal())
+ return false;
+ if (!TM)
+ return true;
+ TLSModel::Model Model = TM->getTLSModel(GV);
+ return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
+}
+
bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
J != JE; ++J) {
@@ -214,7 +229,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
if (!TM)
return true;
- const TargetLowering *TLI = TM->getTargetLowering();
+ const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
if (Function *F = CI->getCalledFunction()) {
// Most intrinsics don't become function calls, but some might.
@@ -384,12 +399,14 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
if (!TM)
return true;
- const TargetLowering *TLI = TM->getTargetLowering();
+ const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
- if (TLI->supportJumpTables() &&
- SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries())
+ if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
return true;
}
+ for (Value *Operand : J->operands())
+ if (memAddrUsesCTR(TM, Operand))
+ return true;
}
return false;
diff --git a/lib/Target/PowerPC/PPCCallingConv.h b/lib/Target/PowerPC/PPCCallingConv.h
new file mode 100644
index 000000000000..eb904a858592
--- /dev/null
+++ b/lib/Target/PowerPC/PPCCallingConv.h
@@ -0,0 +1,35 @@
+//=== PPCCallingConv.h - PPC Custom Calling Convention Routines -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the PPC Calling Convention that
+// aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_PPC_PPCCALLINGCONV_H
+#define LLVM_LIB_TARGET_PPC_PPCCALLINGCONV_H
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
+
+namespace llvm {
+
+inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &,
+ CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
+ CCState &) {
+ llvm_unreachable("The AnyReg calling convention is only supported by the " \
+ "stackmap and patchpoint intrinsics.");
+ // gracefully fallback to PPC C calling convention on Release builds.
+ return false;
+}
+
+} // End llvm namespace
+
+#endif
+
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 222760a0cb91..56176f145332 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -14,16 +14,35 @@
/// CCIfSubtarget - Match if the current subtarget has a feature F.
class CCIfSubtarget<string F, CCAction A>
- : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+ : CCIf<!strconcat("static_cast<const PPCSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).",
+ F),
+ A>;
class CCIfNotSubtarget<string F, CCAction A>
- : CCIf<!strconcat("!State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+ : CCIf<!strconcat("!static_cast<const PPCSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).",
+ F),
+ A>;
//===----------------------------------------------------------------------===//
// Return Value Calling Convention
//===----------------------------------------------------------------------===//
+// PPC64 AnyReg return-value convention. No explicit register is specified for
+// the return-value. The register allocator is allowed and expected to choose
+// any free register.
+//
+// This calling convention is currently only supported by the stackmap and
+// patchpoint intrinsics. All other uses will result in an assert on Debug
+// builds. On Release builds we fallback to the PPC C calling convention.
+def RetCC_PPC64_AnyReg : CallingConv<[
+ CCCustom<"CC_PPC_AnyReg_Error">
+]>;
+
// Return-value convention for PowerPC
def RetCC_PPC : CallingConv<[
+ CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>,
+
// On PPC64, integer return values are always promoted to i64
CCIfType<[i32, i1], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>,
CCIfType<[i1], CCIfNotSubtarget<"isPPC64()", CCPromoteToType<i32>>>,
@@ -45,6 +64,15 @@ def RetCC_PPC : CallingConv<[
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
]>;
+// No explicit register is specified for the AnyReg calling convention. The
+// register allocator may assign the arguments to any free register.
+//
+// This calling convention is currently only supported by the stackmap and
+// patchpoint intrinsics. All other uses will result in an assert on Debug
+// builds. On Release builds we fallback to the PPC C calling convention.
+def CC_PPC64_AnyReg : CallingConv<[
+ CCCustom<"CC_PPC_AnyReg_Error">
+]>;
// Note that we don't currently have calling conventions for 64-bit
// PowerPC, but handle all the complexities of the ABI in the lowering
@@ -55,6 +83,8 @@ def RetCC_PPC : CallingConv<[
// Only handle ints and floats. All ints are promoted to i64.
// Vector types and quadword ints are not handled.
def CC_PPC64_ELF_FIS : CallingConv<[
+ CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_PPC64_AnyReg>>,
+
CCIfType<[i1], CCPromoteToType<i64>>,
CCIfType<[i8], CCPromoteToType<i64>>,
CCIfType<[i16], CCPromoteToType<i64>>,
@@ -68,6 +98,8 @@ def CC_PPC64_ELF_FIS : CallingConv<[
// and multiple register returns are "supported" to avoid compile
// errors, but none are handled by the fast selector.
def RetCC_PPC64_ELF_FIS : CallingConv<[
+ CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>,
+
CCIfType<[i1], CCPromoteToType<i64>>,
CCIfType<[i8], CCPromoteToType<i64>>,
CCIfType<[i16], CCPromoteToType<i64>>,
@@ -197,3 +229,15 @@ def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>;
def CSR_NoRegs : CalleeSavedRegs<(add)>;
+def CSR_64_AllRegs: CalleeSavedRegs<(add X0, (sequence "X%u", 3, 10),
+ (sequence "X%u", 14, 31),
+ (sequence "F%u", 0, 31),
+ (sequence "CR%u", 0, 7))>;
+
+def CSR_64_AllRegs_Altivec : CalleeSavedRegs<(add CSR_64_AllRegs,
+ (sequence "V%u", 0, 31))>;
+
+def CSR_64_AllRegs_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec,
+ (sequence "VSL%u", 0, 31),
+ (sequence "VSH%u", 0, 31))>;
+
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
deleted file mode 100644
index 08755238f925..000000000000
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ /dev/null
@@ -1,293 +0,0 @@
-//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the PowerPC 32-bit CodeEmitter and associated machinery to
-// JIT-compile bitcode to native PowerPC.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PPC.h"
-#include "PPCRelocations.h"
-#include "PPCTargetMachine.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/IR/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOptions.h"
-using namespace llvm;
-
-namespace {
- class PPCCodeEmitter : public MachineFunctionPass {
- TargetMachine &TM;
- JITCodeEmitter &MCE;
- MachineModuleInfo *MMI;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineModuleInfo>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- static char ID;
-
- /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record
- /// its address in the function into this pointer.
- void *MovePCtoLROffset;
- public:
-
- PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(ID), TM(tm), MCE(mce) {}
-
- /// getBinaryCodeForInstr - This function, generated by the
- /// CodeEmitterGenerator using TableGen, produces the binary encoding for
- /// machine instructions.
- uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
-
-
- MachineRelocation GetRelocation(const MachineOperand &MO,
- unsigned RelocID) const;
-
- /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
- unsigned getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const;
-
- unsigned get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getAbsDirectBrEncoding(const MachineInstr &MI,
- unsigned OpNo) const;
- unsigned getAbsCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
-
- unsigned getImm16Encoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getTLSCallEncoding(const MachineInstr &MI, unsigned OpNo) const;
-
- const char *getPassName() const override {
- return "PowerPC Machine Code Emitter";
- }
-
- /// runOnMachineFunction - emits the given MachineFunction to memory
- ///
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- /// emitBasicBlock - emits the given MachineBasicBlock to memory
- ///
- void emitBasicBlock(MachineBasicBlock &MBB);
- };
-}
-
-char PPCCodeEmitter::ID = 0;
-
-/// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code
-/// to the specified MCE object.
-FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
- JITCodeEmitter &JCE) {
- return new PPCCodeEmitter(TM, JCE);
-}
-
-bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
- assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
- MF.getTarget().getRelocationModel() != Reloc::Static) &&
- "JIT relocation model must be set to static or default!");
-
- MMI = &getAnalysis<MachineModuleInfo>();
- MCE.setModuleInfo(MMI);
- do {
- MovePCtoLROffset = nullptr;
- MCE.startFunction(MF);
- for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
- emitBasicBlock(*BB);
- } while (MCE.finishFunction(MF));
-
- return false;
-}
-
-void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
- MCE.StartMachineBasicBlock(&MBB);
-
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){
- const MachineInstr &MI = *I;
- MCE.processDebugLoc(MI.getDebugLoc(), true);
- switch (MI.getOpcode()) {
- default:
- MCE.emitWordBE(getBinaryCodeForInstr(MI));
- break;
- case TargetOpcode::CFI_INSTRUCTION:
- break;
- case TargetOpcode::EH_LABEL:
- MCE.emitLabel(MI.getOperand(0).getMCSymbol());
- break;
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- break; // pseudo opcode, no side effects
- case PPC::MovePCtoLR:
- case PPC::MovePCtoLR8:
- assert(TM.getRelocationModel() == Reloc::PIC_);
- MovePCtoLROffset = (void*)MCE.getCurrentPCValue();
- MCE.emitWordBE(0x48000005); // bl 1
- break;
- }
- MCE.processDebugLoc(MI.getDebugLoc(), false);
- }
-}
-
-unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
- unsigned OpNo) const {
- const MachineOperand &MO = MI.getOperand(OpNo);
- assert((MI.getOpcode() == PPC::MTOCRF || MI.getOpcode() == PPC::MTOCRF8 ||
- MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) &&
- (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
- return 0x80 >> TM.getRegisterInfo()->getEncodingValue(MO.getReg());
-}
-
-MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO,
- unsigned RelocID) const {
- // If in PIC mode, we need to encode the negated address of the
- // 'movepctolr' into the unrelocated field. After relocation, we'll have
- // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm
- // field, we get &gv. This doesn't happen for branch relocations, which are
- // always implicitly pc relative.
- intptr_t Cst = 0;
- if (TM.getRelocationModel() == Reloc::PIC_) {
- assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
- Cst = -(intptr_t)MovePCtoLROffset - 4;
- }
-
- if (MO.isGlobal())
- return MachineRelocation::getGV(MCE.getCurrentPCOffset(), RelocID,
- const_cast<GlobalValue *>(MO.getGlobal()),
- Cst, isa<Function>(MO.getGlobal()));
- if (MO.isSymbol())
- return MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
- RelocID, MO.getSymbolName(), Cst);
- if (MO.isCPI())
- return MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
- RelocID, MO.getIndex(), Cst);
-
- if (MO.isMBB())
- return MachineRelocation::getBB(MCE.getCurrentPCOffset(),
- RelocID, MO.getMBB());
-
- assert(MO.isJTI());
- return MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
- RelocID, MO.getIndex(), Cst);
-}
-
-unsigned PPCCodeEmitter::getDirectBrEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- const MachineOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
-
- MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bx));
- return 0;
-}
-
-unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- const MachineOperand &MO = MI.getOperand(OpNo);
- MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bcx));
- return 0;
-}
-
-unsigned PPCCodeEmitter::getAbsDirectBrEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- const MachineOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
-
- llvm_unreachable("Absolute branch relocations unsupported on the old JIT.");
-}
-
-unsigned PPCCodeEmitter::getAbsCondBrEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Absolute branch relocations unsupported on the old JIT.");
-}
-
-unsigned PPCCodeEmitter::getImm16Encoding(const MachineInstr &MI,
- unsigned OpNo) const {
- const MachineOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
-
- unsigned RelocID;
- switch (MO.getTargetFlags() & PPCII::MO_ACCESS_MASK) {
- default: llvm_unreachable("Unsupported target operand flags!");
- case PPCII::MO_LO: RelocID = PPC::reloc_absolute_low; break;
- case PPCII::MO_HA: RelocID = PPC::reloc_absolute_high; break;
- }
-
- MCE.addRelocation(GetRelocation(MO, RelocID));
- return 0;
-}
-
-unsigned PPCCodeEmitter::getMemRIEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- // Encode (imm, reg) as a memri, which has the low 16-bits as the
- // displacement and the next 5 bits as the register #.
- assert(MI.getOperand(OpNo+1).isReg());
- unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 16;
-
- const MachineOperand &MO = MI.getOperand(OpNo);
- if (MO.isImm())
- return (getMachineOpValue(MI, MO) & 0xFFFF) | RegBits;
-
- // Add a fixup for the displacement field.
- MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
- return RegBits;
-}
-
-unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- // Encode (imm, reg) as a memrix, which has the low 14-bits as the
- // displacement and the next 5 bits as the register #.
- assert(MI.getOperand(OpNo+1).isReg());
- unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 14;
-
- const MachineOperand &MO = MI.getOperand(OpNo);
- if (MO.isImm())
- return ((getMachineOpValue(MI, MO) >> 2) & 0x3FFF) | RegBits;
-
- MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix));
- return RegBits;
-}
-
-
-unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("TLS not supported on the old JIT.");
- return 0;
-}
-
-unsigned PPCCodeEmitter::getTLSCallEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("TLS not supported on the old JIT.");
- return 0;
-}
-
-unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const {
-
- if (MO.isReg()) {
- // MTOCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
- // The GPR operand should come through here though.
- assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 &&
- MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) ||
- MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
- return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
- }
-
- assert(MO.isImm() &&
- "Relocation required in an instruction that we cannot encode!");
- return MO.getImm();
-}
-
-#include "PPCGenCodeEmitter.inc"
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 9c55a2900389..13bd0c7be2c0 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -15,6 +15,7 @@
#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCCallingConv.h"
#include "PPCISelLowering.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
@@ -39,7 +40,7 @@
//===----------------------------------------------------------------------===//
//
// TBD:
-// FastLowerArguments: Handle simple cases.
+// fastLowerArguments: Handle simple cases.
// PPCMaterializeGV: Handle TLS.
// SelectCall: Handle function pointers.
// SelectCall: Handle multi-register return values.
@@ -92,34 +93,35 @@ class PPCFastISel final : public FastISel {
public:
explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo)
- : FastISel(FuncInfo, LibInfo),
- TM(FuncInfo.MF->getTarget()),
- TII(*TM.getInstrInfo()),
- TLI(*TM.getTargetLowering()),
- PPCSubTarget(&TM.getSubtarget<PPCSubtarget>()),
- Context(&FuncInfo.Fn->getContext()) { }
+ : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
+ TII(*TM.getSubtargetImpl()->getInstrInfo()),
+ TLI(*TM.getSubtargetImpl()->getTargetLowering()),
+ PPCSubTarget(&TM.getSubtarget<PPCSubtarget>()),
+ Context(&FuncInfo.Fn->getContext()) {}
// Backend specific FastISel code.
private:
- bool TargetSelectInstruction(const Instruction *I) override;
- unsigned TargetMaterializeConstant(const Constant *C) override;
- unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
+ bool fastSelectInstruction(const Instruction *I) override;
+ unsigned fastMaterializeConstant(const Constant *C) override;
+ unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) override;
- bool FastLowerArguments() override;
- unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
- unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ bool fastLowerArguments() override;
+ unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
+ unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
uint64_t Imm);
- unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ unsigned fastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill);
- unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill);
+ bool fastLowerCall(CallLoweringInfo &CLI) override;
+
// Instruction selection routines.
private:
bool SelectLoad(const Instruction *I);
@@ -131,7 +133,6 @@ class PPCFastISel final : public FastISel {
bool SelectIToFP(const Instruction *I, bool IsSigned);
bool SelectFPToI(const Instruction *I, bool IsSigned);
bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
- bool SelectCall(const Instruction *I);
bool SelectRet(const Instruction *I);
bool SelectTrunc(const Instruction *I);
bool SelectIntExt(const Instruction *I);
@@ -140,6 +141,9 @@ class PPCFastISel final : public FastISel {
private:
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
+ bool isVSFRCRegister(unsigned Register) const {
+ return MRI.getRegClass(Register)->getID() == PPC::VSFRCRegClassID;
+ }
bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt, unsigned DestReg);
bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
@@ -172,9 +176,7 @@ class PPCFastISel final : public FastISel {
CallingConv::ID CC,
unsigned &NumBytes,
bool IsVarArg);
- void finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
- const Instruction *I, CallingConv::ID CC,
- unsigned &NumBytes, bool IsVarArg);
+ bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
CCAssignFn *usePPC32CCs(unsigned Flag);
private:
@@ -483,6 +485,16 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
// the indexed form. Also handle stack pointers with special needs.
unsigned IndexReg = 0;
PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
+
+ // If this is a potential VSX load with an offset of 0, a VSX indexed load can
+ // be used.
+ bool IsVSFRC = (ResultReg != 0) && isVSFRCRegister(ResultReg);
+ if (IsVSFRC && (Opc == PPC::LFD) &&
+ (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
+ (Addr.Offset == 0)) {
+ UseOffset = false;
+ }
+
if (ResultReg == 0)
ResultReg = createResultReg(UseRC);
@@ -490,6 +502,8 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
// in range, as otherwise PPCSimplifyAddress would have converted it
// into a RegBase.
if (Addr.BaseType == Address::FrameIndexBase) {
+ // VSX only provides an indexed load.
+ if (IsVSFRC && Opc == PPC::LFD) return false;
MachineMemOperand *MMO =
FuncInfo.MF->getMachineMemOperand(
@@ -502,6 +516,8 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
// Base reg with offset in range.
} else if (UseOffset) {
+ // VSX only provides an indexed load.
+ if (IsVSFRC && Opc == PPC::LFD) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addImm(Addr.Offset).addReg(Addr.Base.Reg);
@@ -525,7 +541,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
case PPC::LWA_32: Opc = PPC::LWAX_32; break;
case PPC::LD: Opc = PPC::LDX; break;
case PPC::LFS: Opc = PPC::LFSX; break;
- case PPC::LFD: Opc = PPC::LFDX; break;
+ case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(Addr.Base.Reg).addReg(IndexReg);
@@ -560,7 +576,7 @@ bool PPCFastISel::SelectLoad(const Instruction *I) {
unsigned ResultReg = 0;
if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -603,10 +619,22 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
unsigned IndexReg = 0;
PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
+ // If this is a potential VSX store with an offset of 0, a VSX indexed store
+ // can be used.
+ bool IsVSFRC = isVSFRCRegister(SrcReg);
+ if (IsVSFRC && (Opc == PPC::STFD) &&
+ (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
+ (Addr.Offset == 0)) {
+ UseOffset = false;
+ }
+
// Note: If we still have a frame index here, we know the offset is
// in range, as otherwise PPCSimplifyAddress would have converted it
// into a RegBase.
if (Addr.BaseType == Address::FrameIndexBase) {
+ // VSX only provides an indexed store.
+ if (IsVSFRC && Opc == PPC::STFD) return false;
+
MachineMemOperand *MMO =
FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
@@ -620,12 +648,15 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
.addMemOperand(MMO);
// Base reg with offset in range.
- } else if (UseOffset)
+ } else if (UseOffset) {
+ // VSX only provides an indexed store.
+ if (IsVSFRC && Opc == PPC::STFD) return false;
+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
.addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
// Indexed form.
- else {
+ } else {
// Get the RR opcode corresponding to the RI one. FIXME: It would be
// preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
// is hard to get at.
@@ -639,7 +670,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
case PPC::STW8: Opc = PPC::STWX8; break;
case PPC::STD: Opc = PPC::STDX; break;
case PPC::STFS: Opc = PPC::STFSX; break;
- case PPC::STFD: Opc = PPC::STFDX; break;
+ case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
.addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg);
@@ -707,7 +738,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
.addImm(PPCPred).addReg(CondReg).addMBB(TBB);
- FastEmitBranch(FBB, DbgLoc);
+ fastEmitBranch(FBB, DbgLoc);
FuncInfo.MBB->addSuccessor(TBB);
return true;
@@ -715,7 +746,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
dyn_cast<ConstantInt>(BI->getCondition())) {
uint64_t Imm = CI->getZExtValue();
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
- FastEmitBranch(Target, DbgLoc);
+ fastEmitBranch(Target, DbgLoc);
return true;
}
@@ -838,7 +869,7 @@ bool PPCFastISel::SelectFPExt(const Instruction *I) {
return false;
// No code is generated for a FP extend.
- UpdateValueMap(I, SrcReg);
+ updateValueMap(I, SrcReg);
return true;
}
@@ -860,7 +891,7 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg)
.addReg(SrcReg);
- UpdateValueMap(I, DestReg);
+ updateValueMap(I, DestReg);
return true;
}
@@ -979,7 +1010,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addReg(FPReg);
- UpdateValueMap(I, DestReg);
+ updateValueMap(I, DestReg);
return true;
}
@@ -1080,7 +1111,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
if (IntReg == 0)
return false;
- UpdateValueMap(I, IntReg);
+ updateValueMap(I, IntReg);
return true;
}
@@ -1169,7 +1200,7 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
ResultReg)
.addReg(SrcReg1)
.addImm(Imm);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
}
@@ -1185,7 +1216,7 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(SrcReg1).addReg(SrcReg2);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1200,7 +1231,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
unsigned &NumBytes,
bool IsVarArg) {
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
// Reserve space for the linkage area on the stack.
bool isELFv2ABI = PPCSubTarget->isELFv2ABI();
@@ -1308,9 +1339,9 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
// For a call that we've determined we can fast-select, finish the
// call sequence and generate a copy to obtain the return value (if any).
-void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
- const Instruction *I, CallingConv::ID CC,
- unsigned &NumBytes, bool IsVarArg) {
+bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
+ CallingConv::ID CC = CLI.CallConv;
+
// Issue CallSEQ_END.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TII.getCallFrameDestroyOpcode()))
@@ -1321,7 +1352,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
// any real difficulties there.
if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
CCValAssign &VA = RVLocs[0];
assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
@@ -1366,39 +1397,35 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
}
assert(ResultReg && "ResultReg unset!");
- UsedRegs.push_back(SourcePhysReg);
- UpdateValueMap(I, ResultReg);
+ CLI.InRegs.push_back(SourcePhysReg);
+ CLI.ResultReg = ResultReg;
+ CLI.NumResultRegs = 1;
}
+
+ return true;
}
-// Attempt to fast-select a call instruction.
-bool PPCFastISel::SelectCall(const Instruction *I) {
- const CallInst *CI = cast<CallInst>(I);
- const Value *Callee = CI->getCalledValue();
+bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
+ CallingConv::ID CC = CLI.CallConv;
+ bool IsTailCall = CLI.IsTailCall;
+ bool IsVarArg = CLI.IsVarArg;
+ const Value *Callee = CLI.Callee;
+ const char *SymName = CLI.SymName;
- // Can't handle inline asm.
- if (isa<InlineAsm>(Callee))
+ if (!Callee && !SymName)
return false;
// Allow SelectionDAG isel to handle tail calls.
- if (CI->isTailCall())
+ if (IsTailCall)
return false;
- // Obtain calling convention.
- ImmutableCallSite CS(CI);
- CallingConv::ID CC = CS.getCallingConv();
-
- PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
- FunctionType *FTy = cast<FunctionType>(PT->getElementType());
- bool IsVarArg = FTy->isVarArg();
-
- // Not ready for varargs yet.
+ // Let SDISel handle vararg functions.
if (IsVarArg)
return false;
// Handle simple calls for now, with legal return types and
// those that can be extended.
- Type *RetTy = I->getType();
+ Type *RetTy = CLI.RetTy;
MVT RetVT;
if (RetTy->isVoidTy())
RetVT = MVT::isVoid;
@@ -1411,7 +1438,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
RetVT != MVT::f64) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
if (RVLocs.size() > 1)
return false;
@@ -1419,7 +1446,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
// Bail early if more than 8 arguments, as we only currently
// handle arguments passed in registers.
- unsigned NumArgs = CS.arg_size();
+ unsigned NumArgs = CLI.OutVals.size();
if (NumArgs > 8)
return false;
@@ -1434,28 +1461,16 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
ArgVTs.reserve(NumArgs);
ArgFlags.reserve(NumArgs);
- for (ImmutableCallSite::arg_iterator II = CS.arg_begin(), IE = CS.arg_end();
- II != IE; ++II) {
- // FIXME: ARM does something for intrinsic calls here, check into that.
-
- unsigned AttrIdx = II - CS.arg_begin() + 1;
-
+ for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
// Only handle easy calls for now. It would be reasonably easy
// to handle <= 8-byte structures passed ByVal in registers, but we
// have to ensure they are right-justified in the register.
- if (CS.paramHasAttr(AttrIdx, Attribute::InReg) ||
- CS.paramHasAttr(AttrIdx, Attribute::StructRet) ||
- CS.paramHasAttr(AttrIdx, Attribute::Nest) ||
- CS.paramHasAttr(AttrIdx, Attribute::ByVal))
+ ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
+ if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
return false;
- ISD::ArgFlagsTy Flags;
- if (CS.paramHasAttr(AttrIdx, Attribute::SExt))
- Flags.setSExt();
- if (CS.paramHasAttr(AttrIdx, Attribute::ZExt))
- Flags.setZExt();
-
- Type *ArgTy = (*II)->getType();
+ Value *ArgValue = CLI.OutVals[i];
+ Type *ArgTy = ArgValue->getType();
MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
return false;
@@ -1463,14 +1478,11 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
if (ArgVT.isVector())
return false;
- unsigned Arg = getRegForValue(*II);
+ unsigned Arg = getRegForValue(ArgValue);
if (Arg == 0)
return false;
- unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
- Flags.setOrigAlign(OriginalAlignment);
-
- Args.push_back(*II);
+ Args.push_back(ArgValue);
ArgRegs.push_back(Arg);
ArgVTs.push_back(ArgVT);
ArgFlags.push_back(Flags);
@@ -1484,18 +1496,28 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
RegArgs, CC, NumBytes, IsVarArg))
return false;
+ MachineInstrBuilder MIB;
// FIXME: No handling for function pointers yet. This requires
// implementing the function descriptor (OPD) setup.
const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
- if (!GV)
- return false;
-
- // Build direct call with NOP for TOC restore.
- // FIXME: We can and should optimize away the NOP for local calls.
- MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(PPC::BL8_NOP));
- // Add callee.
- MIB.addGlobalAddress(GV);
+ if (!GV) {
+ // patchpoints are a special case; they always dispatch to a pointer value.
+ // However, we don't actually want to generate the indirect call sequence
+ // here (that will be generated, as necessary, during asm printing), and
+ // the call we generate here will be erased by FastISel::selectPatchpoint,
+ // so don't try very hard...
+ if (CLI.IsPatchPoint)
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP));
+ else
+ return false;
+ } else {
+ // Build direct call with NOP for TOC restore.
+ // FIXME: We can and should optimize away the NOP for local calls.
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(PPC::BL8_NOP));
+ // Add callee.
+ MIB.addGlobalAddress(GV);
+ }
// Add implicit physical register uses to the call.
for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
@@ -1509,14 +1531,10 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
// defs for return values will be added by setPhysRegsDeadExcept().
MIB.addRegMask(TRI.getCallPreservedMask(CC));
- // Finish off the call including any return values.
- SmallVector<unsigned, 4> UsedRegs;
- finishCall(RetVT, UsedRegs, I, CC, NumBytes, IsVarArg);
-
- // Set all unused physregs defs as dead.
- static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+ CLI.Call = MIB;
- return true;
+ // Finish off the call including any return values.
+ return finishCall(RetVT, CLI, NumBytes);
}
// Attempt to fast-select a return instruction.
@@ -1538,7 +1556,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
- CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, *Context);
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
const Value *RV = Ret->getOperand(0);
@@ -1627,7 +1645,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
}
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(PPC::BLR));
+ TII.get(PPC::BLR8));
for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
MIB.addReg(RetRegs[i], RegState::Implicit);
@@ -1731,7 +1749,7 @@ bool PPCFastISel::SelectTrunc(const Instruction *I) {
SrcReg = ResultReg;
}
- UpdateValueMap(I, SrcReg);
+ updateValueMap(I, SrcReg);
return true;
}
@@ -1770,13 +1788,13 @@ bool PPCFastISel::SelectIntExt(const Instruction *I) {
if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
// Attempt to fast-select an instruction that wasn't handled by
// the table-generated machinery.
-bool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
+bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
case Instruction::Load:
@@ -1806,9 +1824,7 @@ bool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
case Instruction::Sub:
return SelectBinaryIntOp(I, ISD::SUB);
case Instruction::Call:
- if (dyn_cast<IntrinsicInst>(I))
- return false;
- return SelectCall(I);
+ return selectCall(I);
case Instruction::Ret:
return SelectRet(I);
case Instruction::Trunc:
@@ -2066,7 +2082,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT,
// Materialize a constant into a register, and return the register
// number (or zero if we failed to handle it).
-unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) {
+unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
EVT CEVT = TLI.getValueType(C->getType(), true);
// Only handle simple types.
@@ -2078,14 +2094,14 @@ unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) {
else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
return PPCMaterializeGV(GV, VT);
else if (isa<ConstantInt>(C))
- return PPCMaterializeInt(C, VT);
+ return PPCMaterializeInt(C, VT, VT != MVT::i1);
return 0;
}
// Materialize the address created by an alloca into a register, and
// return the register number (or zero if we failed to handle it).
-unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
// Don't handle dynamic allocas.
if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
@@ -2185,7 +2201,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
// Attempt to lower call arguments in a faster way than done by
// the selection DAG code.
-bool PPCFastISel::FastLowerArguments() {
+bool PPCFastISel::fastLowerArguments() {
// Defer to normal argument lowering for now. It's reasonably
// efficient. Consider doing something like ARM to handle the
// case where all args fit in registers, no varargs, no float
@@ -2195,7 +2211,7 @@ bool PPCFastISel::FastLowerArguments() {
// Handle materializing integer constants into a register. This is not
// automatically generated for PowerPC, so must be explicitly created here.
-unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
+unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
if (Opc != ISD::Constant)
return 0;
@@ -2232,7 +2248,7 @@ unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
// assigning R0 or X0 to the output register for GPRC and G8RC
// register classes, as any such result could be used in ADDI, etc.,
// where those regs have another meaning.
-unsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
uint64_t Imm) {
@@ -2245,27 +2261,27 @@ unsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
- return FastISel::FastEmitInst_ri(MachineInstOpcode, UseRC,
+ return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC,
Op0, Op0IsKill, Imm);
}
// Override for instructions with one register operand to avoid use of
// R0/X0. The automatic infrastructure isn't aware of the context so
// we must be conservative.
-unsigned PPCFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass* RC,
unsigned Op0, bool Op0IsKill) {
const TargetRegisterClass *UseRC =
(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
- return FastISel::FastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
+ return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
}
// Override for instructions with two register operands to avoid use
// of R0/X0. The automatic infrastructure isn't aware of the context
// so we must be conservative.
-unsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass* RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill) {
@@ -2273,7 +2289,7 @@ unsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
- return FastISel::FastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
+ return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
Op1, Op1IsKill);
}
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 2a02dfca6ae6..1b850478c88c 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -254,7 +254,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) {
// transform this into the appropriate ORI instruction.
static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
MachineFunction *MF = MI->getParent()->getParent();
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
DebugLoc dl = MI->getDebugLoc();
unsigned UsedRegMask = 0;
@@ -372,7 +372,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
// If we are a leaf function, and use up to 224 bytes of stack space,
// don't have a frame pointer, calls, or dynamic alloca then we do not need
@@ -450,6 +450,7 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
MFI->hasVarSizedObjects() ||
+ MFI->hasStackMap() || MFI->hasPatchPoint() ||
(MF.getTarget().Options.GuaranteedTailCallOpt &&
MF.getInfo<PPCFunctionInfo>()->hasFastCall());
}
@@ -460,7 +461,7 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
bool HasBP = RegInfo->hasBasePointer(MF);
unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg;
@@ -498,9 +499,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
@@ -611,6 +612,14 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
}
}
+ int PBPOffset = 0;
+ if (FI->usesPICBase()) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int PBPIndex = FI->getPICBasePointerSaveIndex();
+ assert(PBPIndex && "No PIC Base Pointer Save Slot!");
+ PBPOffset = FFI->getObjectOffset(PBPIndex);
+ }
+
// Get stack alignments.
unsigned MaxAlign = MFI->getMaxAlignment();
if (HasBP && MaxAlign > 1)
@@ -644,6 +653,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
.addImm(FPOffset)
.addReg(SPReg);
+ if (FI->usesPICBase())
+ // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
+ BuildMI(MBB, MBBI, dl, StoreInst)
+ .addReg(PPC::R30)
+ .addImm(PBPOffset)
+ .addReg(SPReg);
+
if (HasBP)
// FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
BuildMI(MBB, MBBI, dl, StoreInst)
@@ -755,6 +771,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
.addCFIIndex(CFIIndex);
}
+ if (FI->usesPICBase()) {
+ // Describe where FP was saved, at a fixed offset from CFA.
+ unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+
if (HasBP) {
// Describe where BP was saved, at a fixed offset from CFA.
unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
@@ -839,14 +864,15 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
assert(MBBI != MBB.end() && "Returning block has no terminator");
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc dl;
assert((RetOpcode == PPC::BLR ||
+ RetOpcode == PPC::BLR8 ||
RetOpcode == PPC::TCRETURNri ||
RetOpcode == PPC::TCRETURNdi ||
RetOpcode == PPC::TCRETURNai ||
@@ -924,6 +950,14 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
+ int PBPOffset = 0;
+ if (FI->usesPICBase()) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int PBPIndex = FI->getPICBasePointerSaveIndex();
+ assert(PBPIndex && "No PIC Base Pointer Save Slot!");
+ PBPOffset = FFI->getObjectOffset(PBPIndex);
+ }
+
bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
RetOpcode == PPC::TCRETURNdi ||
RetOpcode == PPC::TCRETURNai ||
@@ -1003,6 +1037,13 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
.addImm(FPOffset)
.addReg(SPReg);
+ if (FI->usesPICBase())
+ // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
+ BuildMI(MBB, MBBI, dl, LoadInst)
+ .addReg(PPC::R30)
+ .addImm(PBPOffset)
+ .addReg(SPReg);
+
if (HasBP)
BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
.addImm(BPOffset)
@@ -1018,7 +1059,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Callee pop calling convention. Pop parameter/linkage area. Used for tail
// call optimization
- if (MF.getTarget().Options.GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
MF.getFunction()->getCallingConv() == CallingConv::Fast) {
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
unsigned CallerAllocatedAmt = FI->getMinReservedArea();
@@ -1084,7 +1126,7 @@ void
PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *) const {
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
// Save and clear the LR state.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -1119,6 +1161,14 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
FI->setBasePointerSaveIndex(BPSI);
}
+ // Reserve stack space for the PIC Base register (R30).
+ // Only used in SVR4 32-bit.
+ if (FI->usesPICBase()) {
+ int PBPSI = FI->getPICBasePointerSaveIndex();
+ PBPSI = MFI->CreateFixedObject(4, -8, true);
+ FI->setPICBasePointerSaveIndex(PBPSI);
+ }
+
// Reserve stack space to move the linkage area to in case of a tail call.
int TCSPDelta = 0;
if (MF.getTarget().Options.GuaranteedTailCallOpt &&
@@ -1216,7 +1266,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
}
PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
int64_t LowerBound = 0;
@@ -1250,8 +1300,17 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
}
+ if (PFI->usesPICBase()) {
+ HasGPSaveArea = true;
+
+ int FI = PFI->getPICBasePointerSaveIndex();
+ assert(FI && "No PIC Base Pointer Save Slot!");
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
if (RegInfo->hasBasePointer(MF)) {
HasGPSaveArea = true;
@@ -1399,7 +1458,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo());
DebugLoc DL;
bool CRSpilled = false;
MachineInstrBuilder CRMIB;
@@ -1461,7 +1520,7 @@ restoreCRs(bool isPPC64, bool is31,
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo());
DebugLoc DL;
unsigned RestoreOp, MoveReg;
@@ -1494,7 +1553,7 @@ void PPCFrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
if (MF.getTarget().Options.GuaranteedTailCallOpt &&
I->getOpcode() == PPC::ADJCALLSTACKUP) {
// Add (actually subtract) back the amount the callee popped on return.
@@ -1544,7 +1603,7 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo());
bool CR2Spilled = false;
bool CR3Spilled = false;
bool CR4Spilled = false;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index c0c7d248f8d2..c4825882182d 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef POWERPC_FRAMEINFO_H
-#define POWERPC_FRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
+#define LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
#include "PPC.h"
#include "llvm/ADT/STLExtras.h"
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 23f76c16d138..4b502147ca63 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPCHAZRECS_H
-#define PPCHAZRECS_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCHAZARDRECOGNIZERS_H
+#define LLVM_LIB_TARGET_POWERPC_PPCHAZARDRECOGNIZERS_H
#include "PPCInstrInfo.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
@@ -76,10 +76,10 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
public:
PPCHazardRecognizer970(const ScheduleDAG &DAG);
- virtual HazardType getHazardType(SUnit *SU, int Stalls) override;
- virtual void EmitInstruction(SUnit *SU) override;
- virtual void AdvanceCycle() override;
- virtual void Reset() override;
+ HazardType getHazardType(SUnit *SU, int Stalls) override;
+ void EmitInstruction(SUnit *SU) override;
+ void AdvanceCycle() override;
+ void Reset() override;
private:
/// EndDispatchGroup - Called when we are finishing a new dispatch group.
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 57960598c83f..75ab3430b49d 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -41,6 +42,12 @@ using namespace llvm;
cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
+cl::opt<bool> UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
+ cl::desc("use aggressive ppc isel for bit permutations"), cl::Hidden);
+cl::opt<bool> BPermRewriterNoMasking("ppc-bit-perm-rewriter-stress-rotates",
+ cl::desc("stress rotate selection in aggressive ppc isel for "
+ "bit permutations"), cl::Hidden);
+
namespace llvm {
void initializePPCDAGToDAGISelPass(PassRegistry&);
}
@@ -57,16 +64,16 @@ namespace {
unsigned GlobalBaseReg;
public:
explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
- : SelectionDAGISel(tm), TM(tm),
- PPCLowering(TM.getTargetLowering()),
- PPCSubTarget(TM.getSubtargetImpl()) {
+ : SelectionDAGISel(tm), TM(tm),
+ PPCLowering(TM.getSubtargetImpl()->getTargetLowering()),
+ PPCSubTarget(TM.getSubtargetImpl()) {
initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
- PPCLowering = TM.getTargetLowering();
+ PPCLowering = TM.getSubtargetImpl()->getTargetLowering();
PPCSubTarget = TM.getSubtargetImpl();
SelectionDAGISel::runOnMachineFunction(MF);
@@ -76,6 +83,7 @@ namespace {
return true;
}
+ void PreprocessISelDAG() override;
void PostprocessISelDAG() override;
/// getI32Imm - Return a target constant with the specified value, of type
@@ -111,11 +119,14 @@ namespace {
/// base register. Return the virtual register that holds this value.
SDNode *getGlobalBaseReg();
+ SDNode *getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
+
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
SDNode *Select(SDNode *N) override;
SDNode *SelectBitfieldInsert(SDNode *N);
+ SDNode *SelectBitPermutation(SDNode *N);
/// SelectCC - Select a comparison of the specified values with the
/// specified condition code, returning the CR# of the expression.
@@ -177,7 +188,7 @@ namespace {
std::vector<SDValue> &OutOps) override {
// We need to make sure that this one operand does not end up in r0
// (because we might end up lowering this as 0(%op)).
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
SDValue NewOp =
@@ -202,8 +213,12 @@ private:
SDNode *SelectSETCC(SDNode *N);
void PeepholePPC64();
+ void PeepholePPC64ZExt();
void PeepholeCROps();
+ SDValue combineToCMPB(SDNode *N);
+ void foldBoolExts(SDValue &Res, SDNode *&N);
+
bool AllUsersSelectZero(SDNode *N);
void SwapAllSelectUsers(SDNode *N);
};
@@ -243,7 +258,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
- const TargetInstrInfo &TII = *TM.getInstrInfo();
+ const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
MachineBasicBlock &EntryBB = *Fn.begin();
DebugLoc dl;
// Emit the following code into the entry block:
@@ -279,27 +294,34 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
///
SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
if (!GlobalBaseReg) {
- const TargetInstrInfo &TII = *TM.getInstrInfo();
+ const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineBasicBlock &FirstMBB = MF->front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ const Module *M = MF->getFunction()->getParent();
DebugLoc dl;
if (PPCLowering->getPointerTy() == MVT::i32) {
- if (PPCSubTarget->isTargetELF())
+ if (PPCSubTarget->isTargetELF()) {
GlobalBaseReg = PPC::R30;
- else
+ if (M->getPICLevel() == PICLevel::Small) {
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
+ } else {
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ BuildMI(FirstMBB, MBBI, dl,
+ TII.get(PPC::UpdateGBR)).addReg(GlobalBaseReg)
+ .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
+ MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
+ }
+ } else {
GlobalBaseReg =
RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
- BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
- BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
- if (PPCSubTarget->isTargetELF()) {
- unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
- BuildMI(FirstMBB, MBBI, dl,
- TII.get(PPC::GetGBRO), TempReg).addReg(GlobalBaseReg);
- BuildMI(FirstMBB, MBBI, dl,
- TII.get(PPC::UpdateGBR)).addReg(GlobalBaseReg).addReg(TempReg);
- MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
}
} else {
GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass);
@@ -366,6 +388,18 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
&& isInt32Immediate(N->getOperand(1).getNode(), Imm);
}
+SDNode *PPCDAGToDAGISel::getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
+ SDLoc dl(SN);
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
+ unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
+ if (SN->hasOneUse())
+ return CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
+ getSmallIPtrImm(Offset));
+ return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
+ getSmallIPtrImm(Offset));
+}
+
bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
if (!Val)
return false;
@@ -510,6 +544,1401 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
return nullptr;
}
+// Predict the number of instructions that would be generated by calling
+// SelectInt64(N).
+static unsigned SelectInt64CountDirect(int64_t Imm) {
+ // Assume no remaining bits.
+ unsigned Remainder = 0;
+ // Assume no shift required.
+ unsigned Shift = 0;
+
+ // If it can't be represented as a 32 bit value.
+ if (!isInt<32>(Imm)) {
+ Shift = countTrailingZeros<uint64_t>(Imm);
+ int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
+
+ // If the shifted value fits 32 bits.
+ if (isInt<32>(ImmSh)) {
+ // Go with the shifted value.
+ Imm = ImmSh;
+ } else {
+ // Still stuck with a 64 bit value.
+ Remainder = Imm;
+ Shift = 32;
+ Imm >>= 32;
+ }
+ }
+
+ // Intermediate operand.
+ unsigned Result = 0;
+
+ // Handle first 32 bits.
+ unsigned Lo = Imm & 0xFFFF;
+ unsigned Hi = (Imm >> 16) & 0xFFFF;
+
+ // Simple value.
+ if (isInt<16>(Imm)) {
+ // Just the Lo bits.
+ ++Result;
+ } else if (Lo) {
+ // Handle the Hi bits and Lo bits.
+ Result += 2;
+ } else {
+ // Just the Hi bits.
+ ++Result;
+ }
+
+ // If no shift, we're done.
+ if (!Shift) return Result;
+
+ // Shift for next step if the upper 32-bits were not zero.
+ if (Imm)
+ ++Result;
+
+ // Add in the last bits as required.
+ if ((Hi = (Remainder >> 16) & 0xFFFF))
+ ++Result;
+ if ((Lo = Remainder & 0xFFFF))
+ ++Result;
+
+ return Result;
+}
+
+static uint64_t Rot64(uint64_t Imm, unsigned R) {
+ return (Imm << R) | (Imm >> (64 - R));
+}
+
+static unsigned SelectInt64Count(int64_t Imm) {
+ unsigned Count = SelectInt64CountDirect(Imm);
+ if (Count == 1)
+ return Count;
+
+ for (unsigned r = 1; r < 63; ++r) {
+ uint64_t RImm = Rot64(Imm, r);
+ unsigned RCount = SelectInt64CountDirect(RImm) + 1;
+ Count = std::min(Count, RCount);
+
+ // See comments in SelectInt64 for an explanation of the logic below.
+ unsigned LS = findLastSet(RImm);
+ if (LS != r-1)
+ continue;
+
+ uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
+ uint64_t RImmWithOnes = RImm | OnesMask;
+
+ RCount = SelectInt64CountDirect(RImmWithOnes) + 1;
+ Count = std::min(Count, RCount);
+ }
+
+ return Count;
+}
+
+// Select a 64-bit constant. For cost-modeling purposes, SelectInt64Count
+// (above) needs to be kept in sync with this function.
+static SDNode *SelectInt64Direct(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) {
+ // Assume no remaining bits.
+ unsigned Remainder = 0;
+ // Assume no shift required.
+ unsigned Shift = 0;
+
+ // If it can't be represented as a 32 bit value.
+ if (!isInt<32>(Imm)) {
+ Shift = countTrailingZeros<uint64_t>(Imm);
+ int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
+
+ // If the shifted value fits 32 bits.
+ if (isInt<32>(ImmSh)) {
+ // Go with the shifted value.
+ Imm = ImmSh;
+ } else {
+ // Still stuck with a 64 bit value.
+ Remainder = Imm;
+ Shift = 32;
+ Imm >>= 32;
+ }
+ }
+
+ // Intermediate operand.
+ SDNode *Result;
+
+ // Handle first 32 bits.
+ unsigned Lo = Imm & 0xFFFF;
+ unsigned Hi = (Imm >> 16) & 0xFFFF;
+
+ auto getI32Imm = [CurDAG](unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ };
+
+ // Simple value.
+ if (isInt<16>(Imm)) {
+ // Just the Lo bits.
+ Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
+ } else if (Lo) {
+ // Handle the Hi bits.
+ unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
+ Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
+ // And Lo bits.
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Lo));
+ } else {
+ // Just the Hi bits.
+ Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
+ }
+
+ // If no shift, we're done.
+ if (!Shift) return Result;
+
+ // Shift for next step if the upper 32-bits were not zero.
+ if (Imm) {
+ Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
+ SDValue(Result, 0),
+ getI32Imm(Shift),
+ getI32Imm(63 - Shift));
+ }
+
+ // Add in the last bits as required.
+ if ((Hi = (Remainder >> 16) & 0xFFFF)) {
+ Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Hi));
+ }
+ if ((Lo = Remainder & 0xFFFF)) {
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Lo));
+ }
+
+ return Result;
+}
+
+static SDNode *SelectInt64(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) {
+ unsigned Count = SelectInt64CountDirect(Imm);
+ if (Count == 1)
+ return SelectInt64Direct(CurDAG, dl, Imm);
+
+ unsigned RMin = 0;
+
+ int64_t MatImm;
+ unsigned MaskEnd;
+
+ for (unsigned r = 1; r < 63; ++r) {
+ uint64_t RImm = Rot64(Imm, r);
+ unsigned RCount = SelectInt64CountDirect(RImm) + 1;
+ if (RCount < Count) {
+ Count = RCount;
+ RMin = r;
+ MatImm = RImm;
+ MaskEnd = 63;
+ }
+
+ // If the immediate to generate has many trailing zeros, it might be
+ // worthwhile to generate a rotated value with too many leading ones
+ // (because that's free with li/lis's sign-extension semantics), and then
+ // mask them off after rotation.
+
+ unsigned LS = findLastSet(RImm);
+ // We're adding (63-LS) higher-order ones, and we expect to mask them off
+ // after performing the inverse rotation by (64-r). So we need that:
+ // 63-LS == 64-r => LS == r-1
+ if (LS != r-1)
+ continue;
+
+ uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
+ uint64_t RImmWithOnes = RImm | OnesMask;
+
+ RCount = SelectInt64CountDirect(RImmWithOnes) + 1;
+ if (RCount < Count) {
+ Count = RCount;
+ RMin = r;
+ MatImm = RImmWithOnes;
+ MaskEnd = LS;
+ }
+ }
+
+ if (!RMin)
+ return SelectInt64Direct(CurDAG, dl, Imm);
+
+ auto getI32Imm = [CurDAG](unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ };
+
+ SDValue Val = SDValue(SelectInt64Direct(CurDAG, dl, MatImm), 0);
+ return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val,
+ getI32Imm(64 - RMin), getI32Imm(MaskEnd));
+}
+
+// Select a 64-bit constant.
+static SDNode *SelectInt64(SelectionDAG *CurDAG, SDNode *N) {
+ SDLoc dl(N);
+
+ // Get 64 bit value.
+ int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return SelectInt64(CurDAG, dl, Imm);
+}
+
+namespace {
+class BitPermutationSelector {
+ struct ValueBit {
+ SDValue V;
+
+ // The bit number in the value, using a convention where bit 0 is the
+ // lowest-order bit.
+ unsigned Idx;
+
+ enum Kind {
+ ConstZero,
+ Variable
+ } K;
+
+ ValueBit(SDValue V, unsigned I, Kind K = Variable)
+ : V(V), Idx(I), K(K) {}
+ ValueBit(Kind K = Variable)
+ : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
+
+ bool isZero() const {
+ return K == ConstZero;
+ }
+
+ bool hasValue() const {
+ return K == Variable;
+ }
+
+ SDValue getValue() const {
+ assert(hasValue() && "Cannot get the value of a constant bit");
+ return V;
+ }
+
+ unsigned getValueBitIndex() const {
+ assert(hasValue() && "Cannot get the value bit index of a constant bit");
+ return Idx;
+ }
+ };
+
+ // A bit group has the same underlying value and the same rotate factor.
+ struct BitGroup {
+ SDValue V;
+ unsigned RLAmt;
+ unsigned StartIdx, EndIdx;
+
+ // This rotation amount assumes that the lower 32 bits of the quantity are
+ // replicated in the high 32 bits by the rotation operator (which is done
+ // by rlwinm and friends in 64-bit mode).
+ bool Repl32;
+ // Did converting to Repl32 == true change the rotation factor? If it did,
+ // it decreased it by 32.
+ bool Repl32CR;
+ // Was this group coalesced after setting Repl32 to true?
+ bool Repl32Coalesced;
+
+ BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
+ : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
+ Repl32Coalesced(false) {
+ DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R <<
+ " [" << S << ", " << E << "]\n");
+ }
+ };
+
+ // Information on each (Value, RLAmt) pair (like the number of groups
+ // associated with each) used to choose the lowering method.
+ struct ValueRotInfo {
+ SDValue V;
+ unsigned RLAmt;
+ unsigned NumGroups;
+ unsigned FirstGroupStartIdx;
+ bool Repl32;
+
+ ValueRotInfo()
+ : RLAmt(UINT32_MAX), NumGroups(0), FirstGroupStartIdx(UINT32_MAX),
+ Repl32(false) {}
+
+ // For sorting (in reverse order) by NumGroups, and then by
+ // FirstGroupStartIdx.
+ bool operator < (const ValueRotInfo &Other) const {
+ // We need to sort so that the non-Repl32 come first because, when we're
+ // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
+ // masking operation.
+ if (Repl32 < Other.Repl32)
+ return true;
+ else if (Repl32 > Other.Repl32)
+ return false;
+ else if (NumGroups > Other.NumGroups)
+ return true;
+ else if (NumGroups < Other.NumGroups)
+ return false;
+ else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
+ return true;
+ return false;
+ }
+ };
+
+ // Return true if something interesting was deduced, return false if we're
+ // providing only a generic representation of V (or something else likewise
+ // uninteresting for instruction selection).
+ bool getValueBits(SDValue V, SmallVector<ValueBit, 64> &Bits) {
+ switch (V.getOpcode()) {
+ default: break;
+ case ISD::ROTL:
+ if (isa<ConstantSDNode>(V.getOperand(1))) {
+ unsigned RotAmt = V.getConstantOperandVal(1);
+
+ SmallVector<ValueBit, 64> LHSBits(Bits.size());
+ getValueBits(V.getOperand(0), LHSBits);
+
+ for (unsigned i = 0; i < Bits.size(); ++i)
+ Bits[i] = LHSBits[i < RotAmt ? i + (Bits.size() - RotAmt) : i - RotAmt];
+
+ return true;
+ }
+ break;
+ case ISD::SHL:
+ if (isa<ConstantSDNode>(V.getOperand(1))) {
+ unsigned ShiftAmt = V.getConstantOperandVal(1);
+
+ SmallVector<ValueBit, 64> LHSBits(Bits.size());
+ getValueBits(V.getOperand(0), LHSBits);
+
+ for (unsigned i = ShiftAmt; i < Bits.size(); ++i)
+ Bits[i] = LHSBits[i - ShiftAmt];
+
+ for (unsigned i = 0; i < ShiftAmt; ++i)
+ Bits[i] = ValueBit(ValueBit::ConstZero);
+
+ return true;
+ }
+ break;
+ case ISD::SRL:
+ if (isa<ConstantSDNode>(V.getOperand(1))) {
+ unsigned ShiftAmt = V.getConstantOperandVal(1);
+
+ SmallVector<ValueBit, 64> LHSBits(Bits.size());
+ getValueBits(V.getOperand(0), LHSBits);
+
+ for (unsigned i = 0; i < Bits.size() - ShiftAmt; ++i)
+ Bits[i] = LHSBits[i + ShiftAmt];
+
+ for (unsigned i = Bits.size() - ShiftAmt; i < Bits.size(); ++i)
+ Bits[i] = ValueBit(ValueBit::ConstZero);
+
+ return true;
+ }
+ break;
+ case ISD::AND:
+ if (isa<ConstantSDNode>(V.getOperand(1))) {
+ uint64_t Mask = V.getConstantOperandVal(1);
+
+ SmallVector<ValueBit, 64> LHSBits(Bits.size());
+ bool LHSTrivial = getValueBits(V.getOperand(0), LHSBits);
+
+ for (unsigned i = 0; i < Bits.size(); ++i)
+ if (((Mask >> i) & 1) == 1)
+ Bits[i] = LHSBits[i];
+ else
+ Bits[i] = ValueBit(ValueBit::ConstZero);
+
+ // Mark this as interesting, only if the LHS was also interesting. This
+ // prevents the overall procedure from matching a single immediate 'and'
+ // (which is non-optimal because such an and might be folded with other
+ // things if we don't select it here).
+ return LHSTrivial;
+ }
+ break;
+ case ISD::OR: {
+ SmallVector<ValueBit, 64> LHSBits(Bits.size()), RHSBits(Bits.size());
+ getValueBits(V.getOperand(0), LHSBits);
+ getValueBits(V.getOperand(1), RHSBits);
+
+ bool AllDisjoint = true;
+ for (unsigned i = 0; i < Bits.size(); ++i)
+ if (LHSBits[i].isZero())
+ Bits[i] = RHSBits[i];
+ else if (RHSBits[i].isZero())
+ Bits[i] = LHSBits[i];
+ else {
+ AllDisjoint = false;
+ break;
+ }
+
+ if (!AllDisjoint)
+ break;
+
+ return true;
+ }
+ }
+
+ for (unsigned i = 0; i < Bits.size(); ++i)
+ Bits[i] = ValueBit(V, i);
+
+ return false;
+ }
+
+ // For each value (except the constant ones), compute the left-rotate amount
+ // to get it from its original to final position.
+ void computeRotationAmounts() {
+ HasZeros = false;
+ RLAmt.resize(Bits.size());
+ for (unsigned i = 0; i < Bits.size(); ++i)
+ if (Bits[i].hasValue()) {
+ unsigned VBI = Bits[i].getValueBitIndex();
+ if (i >= VBI)
+ RLAmt[i] = i - VBI;
+ else
+ RLAmt[i] = Bits.size() - (VBI - i);
+ } else if (Bits[i].isZero()) {
+ HasZeros = true;
+ RLAmt[i] = UINT32_MAX;
+ } else {
+ llvm_unreachable("Unknown value bit type");
+ }
+ }
+
+ // Collect groups of consecutive bits with the same underlying value and
+ // rotation factor. If we're doing late masking, we ignore zeros, otherwise
+ // they break up groups.
+ void collectBitGroups(bool LateMask) {
+ BitGroups.clear();
+
+ unsigned LastRLAmt = RLAmt[0];
+ SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
+ unsigned LastGroupStartIdx = 0;
+ for (unsigned i = 1; i < Bits.size(); ++i) {
+ unsigned ThisRLAmt = RLAmt[i];
+ SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
+ if (LateMask && !ThisValue) {
+ ThisValue = LastValue;
+ ThisRLAmt = LastRLAmt;
+ // If we're doing late masking, then the first bit group always starts
+ // at zero (even if the first bits were zero).
+ if (BitGroups.empty())
+ LastGroupStartIdx = 0;
+ }
+
+ // If this bit has the same underlying value and the same rotate factor as
+ // the last one, then they're part of the same group.
+ if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
+ continue;
+
+ if (LastValue.getNode())
+ BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
+ i-1));
+ LastRLAmt = ThisRLAmt;
+ LastValue = ThisValue;
+ LastGroupStartIdx = i;
+ }
+ if (LastValue.getNode())
+ BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
+ Bits.size()-1));
+
+ if (BitGroups.empty())
+ return;
+
+ // We might be able to combine the first and last groups.
+ if (BitGroups.size() > 1) {
+ // If the first and last groups are the same, then remove the first group
+ // in favor of the last group, making the ending index of the last group
+ // equal to the ending index of the to-be-removed first group.
+ if (BitGroups[0].StartIdx == 0 &&
+ BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
+ BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
+ BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
+ DEBUG(dbgs() << "\tcombining final bit group with inital one\n");
+ BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
+ BitGroups.erase(BitGroups.begin());
+ }
+ }
+ }
+
+ // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
+ // associated with each. If there is a degeneracy, pick the one that occurs
+ // first (in the final value).
+ void collectValueRotInfo() {
+ ValueRots.clear();
+
+ for (auto &BG : BitGroups) {
+ unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
+ ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
+ VRI.V = BG.V;
+ VRI.RLAmt = BG.RLAmt;
+ VRI.Repl32 = BG.Repl32;
+ VRI.NumGroups += 1;
+ VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
+ }
+
+ // Now that we've collected the various ValueRotInfo instances, we need to
+ // sort them.
+ ValueRotsVec.clear();
+ for (auto &I : ValueRots) {
+ ValueRotsVec.push_back(I.second);
+ }
+ std::sort(ValueRotsVec.begin(), ValueRotsVec.end());
+ }
+
+ // In 64-bit mode, rlwinm and friends have a rotation operator that
+ // replicates the low-order 32 bits into the high-order 32-bits. The mask
+ // indices of these instructions can only be in the lower 32 bits, so they
+ // can only represent some 64-bit bit groups. However, when they can be used,
+ // the 32-bit replication can be used to represent, as a single bit group,
+ // otherwise separate bit groups. We'll convert to replicated-32-bit bit
+ // groups when possible. Returns true if any of the bit groups were
+ // converted.
+ void assignRepl32BitGroups() {
+ // If we have bits like this:
+ //
+ // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+ // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
+ // Groups: | RLAmt = 8 | RLAmt = 40 |
+ //
+ // But, making use of a 32-bit operation that replicates the low-order 32
+ // bits into the high-order 32 bits, this can be one bit group with a RLAmt
+ // of 8.
+
+ auto IsAllLow32 = [this](BitGroup & BG) {
+ if (BG.StartIdx <= BG.EndIdx) {
+ for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
+ if (!Bits[i].hasValue())
+ continue;
+ if (Bits[i].getValueBitIndex() >= 32)
+ return false;
+ }
+ } else {
+ for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
+ if (!Bits[i].hasValue())
+ continue;
+ if (Bits[i].getValueBitIndex() >= 32)
+ return false;
+ }
+ for (unsigned i = 0; i <= BG.EndIdx; ++i) {
+ if (!Bits[i].hasValue())
+ continue;
+ if (Bits[i].getValueBitIndex() >= 32)
+ return false;
+ }
+ }
+
+ return true;
+ };
+
+ for (auto &BG : BitGroups) {
+ if (BG.StartIdx < 32 && BG.EndIdx < 32) {
+ if (IsAllLow32(BG)) {
+ if (BG.RLAmt >= 32) {
+ BG.RLAmt -= 32;
+ BG.Repl32CR = true;
+ }
+
+ BG.Repl32 = true;
+
+ DEBUG(dbgs() << "\t32-bit replicated bit group for " <<
+ BG.V.getNode() << " RLAmt = " << BG.RLAmt <<
+ " [" << BG.StartIdx << ", " << BG.EndIdx << "]\n");
+ }
+ }
+ }
+
+ // Now walk through the bit groups, consolidating where possible.
+ for (auto I = BitGroups.begin(); I != BitGroups.end();) {
+ // We might want to remove this bit group by merging it with the previous
+ // group (which might be the ending group).
+ auto IP = (I == BitGroups.begin()) ?
+ std::prev(BitGroups.end()) : std::prev(I);
+ if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
+ I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
+
+ DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " <<
+ I->V.getNode() << " RLAmt = " << I->RLAmt <<
+ " [" << I->StartIdx << ", " << I->EndIdx <<
+ "] with group with range [" <<
+ IP->StartIdx << ", " << IP->EndIdx << "]\n");
+
+ IP->EndIdx = I->EndIdx;
+ IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
+ IP->Repl32Coalesced = true;
+ I = BitGroups.erase(I);
+ continue;
+ } else {
+ // There is a special case worth handling: If there is a single group
+ // covering the entire upper 32 bits, and it can be merged with both
+ // the next and previous groups (which might be the same group), then
+ // do so. If it is the same group (so there will be only one group in
+ // total), then we need to reverse the order of the range so that it
+ // covers the entire 64 bits.
+ if (I->StartIdx == 32 && I->EndIdx == 63) {
+ assert(std::next(I) == BitGroups.end() &&
+ "bit group ends at index 63 but there is another?");
+ auto IN = BitGroups.begin();
+
+ if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
+ (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
+ IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
+ IsAllLow32(*I)) {
+
+ DEBUG(dbgs() << "\tcombining bit group for " <<
+ I->V.getNode() << " RLAmt = " << I->RLAmt <<
+ " [" << I->StartIdx << ", " << I->EndIdx <<
+ "] with 32-bit replicated groups with ranges [" <<
+ IP->StartIdx << ", " << IP->EndIdx << "] and [" <<
+ IN->StartIdx << ", " << IN->EndIdx << "]\n");
+
+ if (IP == IN) {
+ // There is only one other group; change it to cover the whole
+ // range (backward, so that it can still be Repl32 but cover the
+ // whole 64-bit range).
+ IP->StartIdx = 31;
+ IP->EndIdx = 30;
+ IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
+ IP->Repl32Coalesced = true;
+ I = BitGroups.erase(I);
+ } else {
+ // There are two separate groups, one before this group and one
+ // after us (at the beginning). We're going to remove this group,
+ // but also the group at the very beginning.
+ IP->EndIdx = IN->EndIdx;
+ IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
+ IP->Repl32Coalesced = true;
+ I = BitGroups.erase(I);
+ BitGroups.erase(BitGroups.begin());
+ }
+
+ // This must be the last group in the vector (and we might have
+ // just invalidated the iterator above), so break here.
+ break;
+ }
+ }
+ }
+
+ ++I;
+ }
+ }
+
+ SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ uint64_t getZerosMask() {
+ uint64_t Mask = 0;
+ for (unsigned i = 0; i < Bits.size(); ++i) {
+ if (Bits[i].hasValue())
+ continue;
+ Mask |= (UINT64_C(1) << i);
+ }
+
+ return ~Mask;
+ }
+
+ // Depending on the number of groups for a particular value, it might be
+ // better to rotate, mask explicitly (using andi/andis), and then or the
+ // result. Select this part of the result first.
+ void SelectAndParts32(SDLoc dl, SDValue &Res, unsigned *InstCnt) {
+ if (BPermRewriterNoMasking)
+ return;
+
+ for (ValueRotInfo &VRI : ValueRotsVec) {
+ unsigned Mask = 0;
+ for (unsigned i = 0; i < Bits.size(); ++i) {
+ if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
+ continue;
+ if (RLAmt[i] != VRI.RLAmt)
+ continue;
+ Mask |= (1u << i);
+ }
+
+ // Compute the masks for andi/andis that would be necessary.
+ unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
+ assert((ANDIMask != 0 || ANDISMask != 0) &&
+ "No set bits in mask for value bit groups");
+ bool NeedsRotate = VRI.RLAmt != 0;
+
+ // We're trying to minimize the number of instructions. If we have one
+ // group, using one of andi/andis can break even. If we have three
+ // groups, we can use both andi and andis and break even (to use both
+ // andi and andis we also need to or the results together). We need four
+ // groups if we also need to rotate. To use andi/andis we need to do more
+ // than break even because rotate-and-mask instructions tend to be easier
+ // to schedule.
+
+ // FIXME: We've biased here against using andi/andis, which is right for
+ // POWER cores, but not optimal everywhere. For example, on the A2,
+ // andi/andis have single-cycle latency whereas the rotate-and-mask
+ // instructions take two cycles, and it would be better to bias toward
+ // andi/andis in break-even cases.
+
+ unsigned NumAndInsts = (unsigned) NeedsRotate +
+ (unsigned) (ANDIMask != 0) +
+ (unsigned) (ANDISMask != 0) +
+ (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
+ (unsigned) (bool) Res;
+
+ DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
+ " RL: " << VRI.RLAmt << ":" <<
+ "\n\t\t\tisel using masking: " << NumAndInsts <<
+ " using rotates: " << VRI.NumGroups << "\n");
+
+ if (NumAndInsts >= VRI.NumGroups)
+ continue;
+
+ DEBUG(dbgs() << "\t\t\t\tusing masking\n");
+
+ if (InstCnt) *InstCnt += NumAndInsts;
+
+ SDValue VRot;
+ if (VRI.RLAmt) {
+ SDValue Ops[] =
+ { VRI.V, getI32Imm(VRI.RLAmt), getI32Imm(0), getI32Imm(31) };
+ VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
+ Ops), 0);
+ } else {
+ VRot = VRI.V;
+ }
+
+ SDValue ANDIVal, ANDISVal;
+ if (ANDIMask != 0)
+ ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
+ VRot, getI32Imm(ANDIMask)), 0);
+ if (ANDISMask != 0)
+ ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
+ VRot, getI32Imm(ANDISMask)), 0);
+
+ SDValue TotalVal;
+ if (!ANDIVal)
+ TotalVal = ANDISVal;
+ else if (!ANDISVal)
+ TotalVal = ANDIVal;
+ else
+ TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
+ ANDIVal, ANDISVal), 0);
+
+ if (!Res)
+ Res = TotalVal;
+ else
+ Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
+ Res, TotalVal), 0);
+
+ // Now, remove all groups with this underlying value and rotation
+ // factor.
+ for (auto I = BitGroups.begin(); I != BitGroups.end();) {
+ if (I->V == VRI.V && I->RLAmt == VRI.RLAmt)
+ I = BitGroups.erase(I);
+ else
+ ++I;
+ }
+ }
+ }
+
+ // Instruction selection for the 32-bit case.
+ SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
+ SDLoc dl(N);
+ SDValue Res;
+
+ if (InstCnt) *InstCnt = 0;
+
+ // Take care of cases that should use andi/andis first.
+ SelectAndParts32(dl, Res, InstCnt);
+
+ // If we've not yet selected a 'starting' instruction, and we have no zeros
+ // to fill in, select the (Value, RLAmt) with the highest priority (largest
+ // number of groups), and start with this rotated value.
+ if ((!HasZeros || LateMask) && !Res) {
+ ValueRotInfo &VRI = ValueRotsVec[0];
+ if (VRI.RLAmt) {
+ if (InstCnt) *InstCnt += 1;
+ SDValue Ops[] =
+ { VRI.V, getI32Imm(VRI.RLAmt), getI32Imm(0), getI32Imm(31) };
+ Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
+ } else {
+ Res = VRI.V;
+ }
+
+ // Now, remove all groups with this underlying value and rotation factor.
+ for (auto I = BitGroups.begin(); I != BitGroups.end();) {
+ if (I->V == VRI.V && I->RLAmt == VRI.RLAmt)
+ I = BitGroups.erase(I);
+ else
+ ++I;
+ }
+ }
+
+ if (InstCnt) *InstCnt += BitGroups.size();
+
+ // Insert the other groups (one at a time).
+ for (auto &BG : BitGroups) {
+ if (!Res) {
+ SDValue Ops[] =
+ { BG.V, getI32Imm(BG.RLAmt), getI32Imm(Bits.size() - BG.EndIdx - 1),
+ getI32Imm(Bits.size() - BG.StartIdx - 1) };
+ Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
+ } else {
+ SDValue Ops[] =
+ { Res, BG.V, getI32Imm(BG.RLAmt), getI32Imm(Bits.size() - BG.EndIdx - 1),
+ getI32Imm(Bits.size() - BG.StartIdx - 1) };
+ Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
+ }
+ }
+
+ if (LateMask) {
+ unsigned Mask = (unsigned) getZerosMask();
+
+ unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
+ assert((ANDIMask != 0 || ANDISMask != 0) &&
+ "No set bits in zeros mask?");
+
+ if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
+ (unsigned) (ANDISMask != 0) +
+ (unsigned) (ANDIMask != 0 && ANDISMask != 0);
+
+ SDValue ANDIVal, ANDISVal;
+ if (ANDIMask != 0)
+ ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
+ Res, getI32Imm(ANDIMask)), 0);
+ if (ANDISMask != 0)
+ ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
+ Res, getI32Imm(ANDISMask)), 0);
+
+ if (!ANDIVal)
+ Res = ANDISVal;
+ else if (!ANDISVal)
+ Res = ANDIVal;
+ else
+ Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
+ ANDIVal, ANDISVal), 0);
+ }
+
+ return Res.getNode();
+ }
+
+ unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
+ unsigned MaskStart, unsigned MaskEnd,
+ bool IsIns) {
+ // In the notation used by the instructions, 'start' and 'end' are reversed
+ // because bits are counted from high to low order.
+ unsigned InstMaskStart = 64 - MaskEnd - 1,
+ InstMaskEnd = 64 - MaskStart - 1;
+
+ if (Repl32)
+ return 1;
+
+ if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
+ InstMaskEnd == 63 - RLAmt)
+ return 1;
+
+ return 2;
+ }
+
+ // For 64-bit values, not all combinations of rotates and masks are
+ // available. Produce one if it is available.
+ SDValue SelectRotMask64(SDValue V, SDLoc dl, unsigned RLAmt, bool Repl32,
+ unsigned MaskStart, unsigned MaskEnd,
+ unsigned *InstCnt = nullptr) {
+ // In the notation used by the instructions, 'start' and 'end' are reversed
+ // because bits are counted from high to low order.
+ unsigned InstMaskStart = 64 - MaskEnd - 1,
+ InstMaskEnd = 64 - MaskStart - 1;
+
+ if (InstCnt) *InstCnt += 1;
+
+ if (Repl32) {
+ // This rotation amount assumes that the lower 32 bits of the quantity
+ // are replicated in the high 32 bits by the rotation operator (which is
+ // done by rlwinm and friends).
+ assert(InstMaskStart >= 32 && "Mask cannot start out of range");
+ assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
+ SDValue Ops[] =
+ { V, getI32Imm(RLAmt), getI32Imm(InstMaskStart - 32),
+ getI32Imm(InstMaskEnd - 32) };
+ return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
+ Ops), 0);
+ }
+
+ if (InstMaskEnd == 63) {
+ SDValue Ops[] =
+ { V, getI32Imm(RLAmt), getI32Imm(InstMaskStart) };
+ return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
+ }
+
+ if (InstMaskStart == 0) {
+ SDValue Ops[] =
+ { V, getI32Imm(RLAmt), getI32Imm(InstMaskEnd) };
+ return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
+ }
+
+ if (InstMaskEnd == 63 - RLAmt) {
+ SDValue Ops[] =
+ { V, getI32Imm(RLAmt), getI32Imm(InstMaskStart) };
+ return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
+ }
+
+ // We cannot do this with a single instruction, so we'll use two. The
+ // problem is that we're not free to choose both a rotation amount and mask
+ // start and end independently. We can choose an arbitrary mask start and
+ // end, but then the rotation amount is fixed. Rotation, however, can be
+ // inverted, and so by applying an "inverse" rotation first, we can get the
+ // desired result.
+ if (InstCnt) *InstCnt += 1;
+
+ // The rotation mask for the second instruction must be MaskStart.
+ unsigned RLAmt2 = MaskStart;
+ // The first instruction must rotate V so that the overall rotation amount
+ // is RLAmt.
+ unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
+ if (RLAmt1)
+ V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
+ return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
+ }
+
+ // For 64-bit values, not all combinations of rotates and masks are
+ // available. Produce a rotate-mask-and-insert if one is available.
+ SDValue SelectRotMaskIns64(SDValue Base, SDValue V, SDLoc dl, unsigned RLAmt,
+ bool Repl32, unsigned MaskStart,
+ unsigned MaskEnd, unsigned *InstCnt = nullptr) {
+ // In the notation used by the instructions, 'start' and 'end' are reversed
+ // because bits are counted from high to low order.
+ unsigned InstMaskStart = 64 - MaskEnd - 1,
+ InstMaskEnd = 64 - MaskStart - 1;
+
+ if (InstCnt) *InstCnt += 1;
+
+ if (Repl32) {
+ // This rotation amount assumes that the lower 32 bits of the quantity
+ // are replicated in the high 32 bits by the rotation operator (which is
+ // done by rlwinm and friends).
+ assert(InstMaskStart >= 32 && "Mask cannot start out of range");
+ assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
+ SDValue Ops[] =
+ { Base, V, getI32Imm(RLAmt), getI32Imm(InstMaskStart - 32),
+ getI32Imm(InstMaskEnd - 32) };
+ return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
+ Ops), 0);
+ }
+
+ if (InstMaskEnd == 63 - RLAmt) {
+ SDValue Ops[] =
+ { Base, V, getI32Imm(RLAmt), getI32Imm(InstMaskStart) };
+ return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
+ }
+
+ // We cannot do this with a single instruction, so we'll use two. The
+ // problem is that we're not free to choose both a rotation amount and mask
+ // start and end independently. We can choose an arbitrary mask start and
+ // end, but then the rotation amount is fixed. Rotation, however, can be
+ // inverted, and so by applying an "inverse" rotation first, we can get the
+ // desired result.
+ if (InstCnt) *InstCnt += 1;
+
+ // The rotation mask for the second instruction must be MaskStart.
+ unsigned RLAmt2 = MaskStart;
+ // The first instruction must rotate V so that the overall rotation amount
+ // is RLAmt.
+ unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
+ if (RLAmt1)
+ V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
+ return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
+ }
+
+ void SelectAndParts64(SDLoc dl, SDValue &Res, unsigned *InstCnt) {
+ if (BPermRewriterNoMasking)
+ return;
+
+ // The idea here is the same as in the 32-bit version, but with additional
+ // complications from the fact that Repl32 might be true. Because we
+ // aggressively convert bit groups to Repl32 form (which, for small
+ // rotation factors, involves no other change), and then coalesce, it might
+ // be the case that a single 64-bit masking operation could handle both
+ // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
+ // form allowed coalescing, then we must use a 32-bit rotaton in order to
+ // completely capture the new combined bit group.
+
+ for (ValueRotInfo &VRI : ValueRotsVec) {
+ uint64_t Mask = 0;
+
+ // We need to add to the mask all bits from the associated bit groups.
+ // If Repl32 is false, we need to add bits from bit groups that have
+ // Repl32 true, but are trivially convertable to Repl32 false. Such a
+ // group is trivially convertable if it overlaps only with the lower 32
+ // bits, and the group has not been coalesced.
+ auto MatchingBG = [VRI](BitGroup &BG) {
+ if (VRI.V != BG.V)
+ return false;
+
+ unsigned EffRLAmt = BG.RLAmt;
+ if (!VRI.Repl32 && BG.Repl32) {
+ if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
+ !BG.Repl32Coalesced) {
+ if (BG.Repl32CR)
+ EffRLAmt += 32;
+ } else {
+ return false;
+ }
+ } else if (VRI.Repl32 != BG.Repl32) {
+ return false;
+ }
+
+ if (VRI.RLAmt != EffRLAmt)
+ return false;
+
+ return true;
+ };
+
+ for (auto &BG : BitGroups) {
+ if (!MatchingBG(BG))
+ continue;
+
+ if (BG.StartIdx <= BG.EndIdx) {
+ for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
+ Mask |= (UINT64_C(1) << i);
+ } else {
+ for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
+ Mask |= (UINT64_C(1) << i);
+ for (unsigned i = 0; i <= BG.EndIdx; ++i)
+ Mask |= (UINT64_C(1) << i);
+ }
+ }
+
+ // We can use the 32-bit andi/andis technique if the mask does not
+ // require any higher-order bits. This can save an instruction compared
+ // to always using the general 64-bit technique.
+ bool Use32BitInsts = isUInt<32>(Mask);
+ // Compute the masks for andi/andis that would be necessary.
+ unsigned ANDIMask = (Mask & UINT16_MAX),
+ ANDISMask = (Mask >> 16) & UINT16_MAX;
+
+ bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
+
+ unsigned NumAndInsts = (unsigned) NeedsRotate +
+ (unsigned) (bool) Res;
+ if (Use32BitInsts)
+ NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
+ (unsigned) (ANDIMask != 0 && ANDISMask != 0);
+ else
+ NumAndInsts += SelectInt64Count(Mask) + /* and */ 1;
+
+ unsigned NumRLInsts = 0;
+ bool FirstBG = true;
+ for (auto &BG : BitGroups) {
+ if (!MatchingBG(BG))
+ continue;
+ NumRLInsts +=
+ SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
+ !FirstBG);
+ FirstBG = false;
+ }
+
+ DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
+ " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") <<
+ "\n\t\t\tisel using masking: " << NumAndInsts <<
+ " using rotates: " << NumRLInsts << "\n");
+
+ // When we'd use andi/andis, we bias toward using the rotates (andi only
+ // has a record form, and is cracked on POWER cores). However, when using
+ // general 64-bit constant formation, bias toward the constant form,
+ // because that exposes more opportunities for CSE.
+ if (NumAndInsts > NumRLInsts)
+ continue;
+ if (Use32BitInsts && NumAndInsts == NumRLInsts)
+ continue;
+
+ DEBUG(dbgs() << "\t\t\t\tusing masking\n");
+
+ if (InstCnt) *InstCnt += NumAndInsts;
+
+ SDValue VRot;
+ // We actually need to generate a rotation if we have a non-zero rotation
+ // factor or, in the Repl32 case, if we care about any of the
+ // higher-order replicated bits. In the latter case, we generate a mask
+ // backward so that it actually includes the entire 64 bits.
+ if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
+ VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
+ VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
+ else
+ VRot = VRI.V;
+
+ SDValue TotalVal;
+ if (Use32BitInsts) {
+ assert((ANDIMask != 0 || ANDISMask != 0) &&
+ "No set bits in mask when using 32-bit ands for 64-bit value");
+
+ SDValue ANDIVal, ANDISVal;
+ if (ANDIMask != 0)
+ ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
+ VRot, getI32Imm(ANDIMask)), 0);
+ if (ANDISMask != 0)
+ ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
+ VRot, getI32Imm(ANDISMask)), 0);
+
+ if (!ANDIVal)
+ TotalVal = ANDISVal;
+ else if (!ANDISVal)
+ TotalVal = ANDIVal;
+ else
+ TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
+ ANDIVal, ANDISVal), 0);
+ } else {
+ TotalVal = SDValue(SelectInt64(CurDAG, dl, Mask), 0);
+ TotalVal =
+ SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
+ VRot, TotalVal), 0);
+ }
+
+ if (!Res)
+ Res = TotalVal;
+ else
+ Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
+ Res, TotalVal), 0);
+
+ // Now, remove all groups with this underlying value and rotation
+ // factor.
+ for (auto I = BitGroups.begin(); I != BitGroups.end();) {
+ if (MatchingBG(*I))
+ I = BitGroups.erase(I);
+ else
+ ++I;
+ }
+ }
+ }
+
+ // Instruction selection for the 64-bit case.
+ SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
+ SDLoc dl(N);
+ SDValue Res;
+
+ if (InstCnt) *InstCnt = 0;
+
+ // Take care of cases that should use andi/andis first.
+ SelectAndParts64(dl, Res, InstCnt);
+
+ // If we've not yet selected a 'starting' instruction, and we have no zeros
+ // to fill in, select the (Value, RLAmt) with the highest priority (largest
+ // number of groups), and start with this rotated value.
+ if ((!HasZeros || LateMask) && !Res) {
+ // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
+ // groups will come first, and so the VRI representing the largest number
+ // of groups might not be first (it might be the first Repl32 groups).
+ unsigned MaxGroupsIdx = 0;
+ if (!ValueRotsVec[0].Repl32) {
+ for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
+ if (ValueRotsVec[i].Repl32) {
+ if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
+ MaxGroupsIdx = i;
+ break;
+ }
+ }
+
+ ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
+ bool NeedsRotate = false;
+ if (VRI.RLAmt) {
+ NeedsRotate = true;
+ } else if (VRI.Repl32) {
+ for (auto &BG : BitGroups) {
+ if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
+ BG.Repl32 != VRI.Repl32)
+ continue;
+
+ // We don't need a rotate if the bit group is confined to the lower
+ // 32 bits.
+ if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
+ continue;
+
+ NeedsRotate = true;
+ break;
+ }
+ }
+
+ if (NeedsRotate)
+ Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
+ VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
+ InstCnt);
+ else
+ Res = VRI.V;
+
+ // Now, remove all groups with this underlying value and rotation factor.
+ if (Res)
+ for (auto I = BitGroups.begin(); I != BitGroups.end();) {
+ if (I->V == VRI.V && I->RLAmt == VRI.RLAmt && I->Repl32 == VRI.Repl32)
+ I = BitGroups.erase(I);
+ else
+ ++I;
+ }
+ }
+
+ // Because 64-bit rotates are more flexible than inserts, we might have a
+ // preference regarding which one we do first (to save one instruction).
+ if (!Res)
+ for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
+ if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
+ false) <
+ SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
+ true)) {
+ if (I != BitGroups.begin()) {
+ BitGroup BG = *I;
+ BitGroups.erase(I);
+ BitGroups.insert(BitGroups.begin(), BG);
+ }
+
+ break;
+ }
+ }
+
+ // Insert the other groups (one at a time).
+ for (auto &BG : BitGroups) {
+ if (!Res)
+ Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
+ BG.EndIdx, InstCnt);
+ else
+ Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
+ BG.StartIdx, BG.EndIdx, InstCnt);
+ }
+
+ if (LateMask) {
+ uint64_t Mask = getZerosMask();
+
+ // We can use the 32-bit andi/andis technique if the mask does not
+ // require any higher-order bits. This can save an instruction compared
+ // to always using the general 64-bit technique.
+ bool Use32BitInsts = isUInt<32>(Mask);
+ // Compute the masks for andi/andis that would be necessary.
+ unsigned ANDIMask = (Mask & UINT16_MAX),
+ ANDISMask = (Mask >> 16) & UINT16_MAX;
+
+ if (Use32BitInsts) {
+ assert((ANDIMask != 0 || ANDISMask != 0) &&
+ "No set bits in mask when using 32-bit ands for 64-bit value");
+
+ if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
+ (unsigned) (ANDISMask != 0) +
+ (unsigned) (ANDIMask != 0 && ANDISMask != 0);
+
+ SDValue ANDIVal, ANDISVal;
+ if (ANDIMask != 0)
+ ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
+ Res, getI32Imm(ANDIMask)), 0);
+ if (ANDISMask != 0)
+ ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
+ Res, getI32Imm(ANDISMask)), 0);
+
+ if (!ANDIVal)
+ Res = ANDISVal;
+ else if (!ANDISVal)
+ Res = ANDIVal;
+ else
+ Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
+ ANDIVal, ANDISVal), 0);
+ } else {
+ if (InstCnt) *InstCnt += SelectInt64Count(Mask) + /* and */ 1;
+
+ SDValue MaskVal = SDValue(SelectInt64(CurDAG, dl, Mask), 0);
+ Res =
+ SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
+ Res, MaskVal), 0);
+ }
+ }
+
+ return Res.getNode();
+ }
+
+ SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
+ // Fill in BitGroups.
+ collectBitGroups(LateMask);
+ if (BitGroups.empty())
+ return nullptr;
+
+ // For 64-bit values, figure out when we can use 32-bit instructions.
+ if (Bits.size() == 64)
+ assignRepl32BitGroups();
+
+ // Fill in ValueRotsVec.
+ collectValueRotInfo();
+
+ if (Bits.size() == 32) {
+ return Select32(N, LateMask, InstCnt);
+ } else {
+ assert(Bits.size() == 64 && "Not 64 bits here?");
+ return Select64(N, LateMask, InstCnt);
+ }
+
+ return nullptr;
+ }
+
+ SmallVector<ValueBit, 64> Bits;
+
+ bool HasZeros;
+ SmallVector<unsigned, 64> RLAmt;
+
+ SmallVector<BitGroup, 16> BitGroups;
+
+ DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
+ SmallVector<ValueRotInfo, 16> ValueRotsVec;
+
+ SelectionDAG *CurDAG;
+
+public:
+ BitPermutationSelector(SelectionDAG *DAG)
+ : CurDAG(DAG) {}
+
+ // Here we try to match complex bit permutations into a set of
+ // rotate-and-shift/shift/and/or instructions, using a set of heuristics
+ // known to produce optimial code for common cases (like i32 byte swapping).
+ SDNode *Select(SDNode *N) {
+ Bits.resize(N->getValueType(0).getSizeInBits());
+ if (!getValueBits(SDValue(N, 0), Bits))
+ return nullptr;
+
+ DEBUG(dbgs() << "Considering bit-permutation-based instruction"
+ " selection for: ");
+ DEBUG(N->dump(CurDAG));
+
+ // Fill it RLAmt and set HasZeros.
+ computeRotationAmounts();
+
+ if (!HasZeros)
+ return Select(N, false);
+
+ // We currently have two techniques for handling results with zeros: early
+ // masking (the default) and late masking. Late masking is sometimes more
+ // efficient, but because the structure of the bit groups is different, it
+ // is hard to tell without generating both and comparing the results. With
+ // late masking, we ignore zeros in the resulting value when inserting each
+ // set of bit groups, and then mask in the zeros at the end. With early
+ // masking, we only insert the non-zero parts of the result at every step.
+
+ unsigned InstCnt, InstCntLateMask;
+ DEBUG(dbgs() << "\tEarly masking:\n");
+ SDNode *RN = Select(N, false, &InstCnt);
+ DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
+
+ DEBUG(dbgs() << "\tLate masking:\n");
+ SDNode *RNLM = Select(N, true, &InstCntLateMask);
+ DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask <<
+ " instructions\n");
+
+ if (InstCnt <= InstCntLateMask) {
+ DEBUG(dbgs() << "\tUsing early-masking for isel\n");
+ return RN;
+ }
+
+ DEBUG(dbgs() << "\tUsing late-masking for isel\n");
+ return RNLM;
+ }
+};
+} // anonymous namespace
+
+SDNode *PPCDAGToDAGISel::SelectBitPermutation(SDNode *N) {
+ if (N->getValueType(0) != MVT::i32 &&
+ N->getValueType(0) != MVT::i64)
+ return nullptr;
+
+ if (!UseBitPermRewriter)
+ return nullptr;
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ROTL:
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::AND:
+ case ISD::OR: {
+ BitPermutationSelector BPS(CurDAG);
+ return BPS.Select(N);
+ }
+ }
+
+ return nullptr;
+}
+
/// SelectCC - Select a comparison of the specified values with the specified
/// condition code, returning the CR# of the expression.
SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
@@ -918,81 +2347,23 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return nullptr; // Already selected.
}
+ // In case any misguided DAG-level optimizations form an ADD with a
+ // TargetConstant operand, crash here instead of miscompiling (by selecting
+ // an r+r add instead of some kind of r+i add).
+ if (N->getOpcode() == ISD::ADD &&
+ N->getOperand(1).getOpcode() == ISD::TargetConstant)
+ llvm_unreachable("Invalid ADD with TargetConstant operand");
+
+ // Try matching complex bit permutations before doing anything else.
+ if (SDNode *NN = SelectBitPermutation(N))
+ return NN;
+
switch (N->getOpcode()) {
default: break;
case ISD::Constant: {
- if (N->getValueType(0) == MVT::i64) {
- // Get 64 bit value.
- int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
- // Assume no remaining bits.
- unsigned Remainder = 0;
- // Assume no shift required.
- unsigned Shift = 0;
-
- // If it can't be represented as a 32 bit value.
- if (!isInt<32>(Imm)) {
- Shift = countTrailingZeros<uint64_t>(Imm);
- int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
-
- // If the shifted value fits 32 bits.
- if (isInt<32>(ImmSh)) {
- // Go with the shifted value.
- Imm = ImmSh;
- } else {
- // Still stuck with a 64 bit value.
- Remainder = Imm;
- Shift = 32;
- Imm >>= 32;
- }
- }
-
- // Intermediate operand.
- SDNode *Result;
-
- // Handle first 32 bits.
- unsigned Lo = Imm & 0xFFFF;
- unsigned Hi = (Imm >> 16) & 0xFFFF;
-
- // Simple value.
- if (isInt<16>(Imm)) {
- // Just the Lo bits.
- Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
- } else if (Lo) {
- // Handle the Hi bits.
- unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
- Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
- // And Lo bits.
- Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
- SDValue(Result, 0), getI32Imm(Lo));
- } else {
- // Just the Hi bits.
- Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
- }
-
- // If no shift, we're done.
- if (!Shift) return Result;
-
- // Shift for next step if the upper 32-bits were not zero.
- if (Imm) {
- Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
- SDValue(Result, 0),
- getI32Imm(Shift),
- getI32Imm(63 - Shift));
- }
-
- // Add in the last bits as required.
- if ((Hi = (Remainder >> 16) & 0xFFFF)) {
- Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
- SDValue(Result, 0), getI32Imm(Hi));
- }
- if ((Lo = Remainder & 0xFFFF)) {
- Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
- SDValue(Result, 0), getI32Imm(Lo));
- }
-
- return Result;
- }
+ if (N->getValueType(0) == MVT::i64)
+ return SelectInt64(CurDAG, N);
break;
}
@@ -1005,16 +2376,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
case PPCISD::GlobalBaseReg:
return getGlobalBaseReg();
- case ISD::FrameIndex: {
- int FI = cast<FrameIndexSDNode>(N)->getIndex();
- SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
- unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
- if (N->hasOneUse())
- return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), TFI,
- getSmallIPtrImm(0));
- return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
- getSmallIPtrImm(0));
- }
+ case ISD::FrameIndex:
+ return getFrameIndex(N, N);
case PPCISD::MFOCRF: {
SDValue InFlag = N->getOperand(1);
@@ -1022,35 +2385,31 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
N->getOperand(0), InFlag);
}
- case ISD::SDIV: {
- // FIXME: since this depends on the setting of the carry flag from the srawi
- // we should really be making notes about that for the scheduler.
- // FIXME: It sure would be nice if we could cheaply recognize the
- // srl/add/sra pattern the dag combiner will generate for this as
- // sra/addze rather than having to handle sdiv ourselves. oh well.
- unsigned Imm;
- if (isInt32Immediate(N->getOperand(1), Imm)) {
- SDValue N0 = N->getOperand(0);
- if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
- SDNode *Op =
- CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
- N0, getI32Imm(Log2_32(Imm)));
- return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
- SDValue(Op, 0), SDValue(Op, 1));
- } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) {
- SDNode *Op =
- CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
- N0, getI32Imm(Log2_32(-Imm)));
- SDValue PT =
- SDValue(CurDAG->getMachineNode(PPC::ADDZE, dl, MVT::i32,
- SDValue(Op, 0), SDValue(Op, 1)),
- 0);
- return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT);
- }
- }
+ case PPCISD::READ_TIME_BASE: {
+ return CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
+ MVT::Other, N->getOperand(0));
+ }
- // Other cases are autogenerated.
- break;
+ case PPCISD::SRA_ADDZE: {
+ SDValue N0 = N->getOperand(0);
+ SDValue ShiftAmt =
+ CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
+ getConstantIntValue(), N->getValueType(0));
+ if (N->getValueType(0) == MVT::i64) {
+ SDNode *Op =
+ CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
+ N0, ShiftAmt);
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64,
+ SDValue(Op, 0), SDValue(Op, 1));
+ } else {
+ assert(N->getValueType(0) == MVT::i32 &&
+ "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
+ SDNode *Op =
+ CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
+ N0, ShiftAmt);
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+ SDValue(Op, 0), SDValue(Op, 1));
+ }
}
case ISD::LOAD: {
@@ -1203,13 +2562,34 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
// Other cases are autogenerated.
break;
}
- case ISD::OR:
+ case ISD::OR: {
if (N->getValueType(0) == MVT::i32)
if (SDNode *I = SelectBitfieldInsert(N))
return I;
+ short Imm;
+ if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
+ isIntS16Immediate(N->getOperand(1), Imm)) {
+ APInt LHSKnownZero, LHSKnownOne;
+ CurDAG->computeKnownBits(N->getOperand(0), LHSKnownZero, LHSKnownOne);
+
+ // If this is equivalent to an add, then we can fold it with the
+ // FrameIndex calculation.
+ if ((LHSKnownZero.getZExtValue()|~(uint64_t)Imm) == ~0ULL)
+ return getFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
+ }
+
// Other cases are autogenerated.
break;
+ }
+ case ISD::ADD: {
+ short Imm;
+ if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
+ isIntS16Immediate(N->getOperand(1), Imm))
+ return getFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
+
+ break;
+ }
case ISD::SHL: {
unsigned Imm, SH, MB, ME;
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
@@ -1325,7 +2705,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
else if (N->getValueType(0) == MVT::f32)
SelectCCOp = PPC::SELECT_CC_F4;
else if (N->getValueType(0) == MVT::f64)
- SelectCCOp = PPC::SELECT_CC_F8;
+ if (PPCSubTarget->hasVSX())
+ SelectCCOp = PPC::SELECT_CC_VSFRC;
+ else
+ SelectCCOp = PPC::SELECT_CC_F8;
+ else if (N->getValueType(0) == MVT::v2f64 ||
+ N->getValueType(0) == MVT::v2i64)
+ SelectCCOp = PPC::SELECT_CC_VSRC;
else
SelectCCOp = PPC::SELECT_CC_VRRC;
@@ -1355,6 +2741,15 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
else
DM[i] = 1;
+ // For little endian, we must swap the input operands and adjust
+ // the mask elements (reverse and invert them).
+ if (PPCSubTarget->isLittleEndian()) {
+ std::swap(Op1, Op2);
+ unsigned tmp = DM[0];
+ DM[0] = 1 - DM[1];
+ DM[1] = 1 - tmp;
+ }
+
SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), MVT::i32);
if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
@@ -1439,13 +2834,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
}
case PPCISD::TOC_ENTRY: {
+ assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) &&
+ "Only supported for 64-bit ABI and 32-bit SVR4");
if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
SDValue GA = N->getOperand(0);
return CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
N->getOperand(1));
- }
- assert (PPCSubTarget->isPPC64() &&
- "Only supported for 64-bit ABI and 32-bit SVR4");
+ }
// For medium and large code model, we generate two instructions as
// described below. Otherwise we allow SelectCodeCommon to handle this,
@@ -1566,6 +2961,324 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return SelectCode(N);
}
+// If the target supports the cmpb instruction, do the idiom recognition here.
+// We don't do this as a DAG combine because we don't want to do it as nodes
+// are being combined (because we might miss part of the eventual idiom). We
+// don't want to do it during instruction selection because we want to reuse
+// the logic for lowering the masking operations already part of the
+// instruction selector.
+SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
+ SDLoc dl(N);
+
+ assert(N->getOpcode() == ISD::OR &&
+ "Only OR nodes are supported for CMPB");
+
+ SDValue Res;
+ if (!PPCSubTarget->hasCMPB())
+ return Res;
+
+ if (N->getValueType(0) != MVT::i32 &&
+ N->getValueType(0) != MVT::i64)
+ return Res;
+
+ EVT VT = N->getValueType(0);
+
+ SDValue RHS, LHS;
+ bool BytesFound[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+ uint64_t Mask = 0, Alt = 0;
+
+ auto IsByteSelectCC = [this](SDValue O, unsigned &b,
+ uint64_t &Mask, uint64_t &Alt,
+ SDValue &LHS, SDValue &RHS) {
+ if (O.getOpcode() != ISD::SELECT_CC)
+ return false;
+ ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
+
+ if (!isa<ConstantSDNode>(O.getOperand(2)) ||
+ !isa<ConstantSDNode>(O.getOperand(3)))
+ return false;
+
+ uint64_t PM = O.getConstantOperandVal(2);
+ uint64_t PAlt = O.getConstantOperandVal(3);
+ for (b = 0; b < 8; ++b) {
+ uint64_t Mask = UINT64_C(0xFF) << (8*b);
+ if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
+ break;
+ }
+
+ if (b == 8)
+ return false;
+ Mask |= PM;
+ Alt |= PAlt;
+
+ if (!isa<ConstantSDNode>(O.getOperand(1)) ||
+ O.getConstantOperandVal(1) != 0) {
+ SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
+ if (Op0.getOpcode() == ISD::TRUNCATE)
+ Op0 = Op0.getOperand(0);
+ if (Op1.getOpcode() == ISD::TRUNCATE)
+ Op1 = Op1.getOperand(0);
+
+ if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
+ Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
+ isa<ConstantSDNode>(Op0.getOperand(1))) {
+
+ unsigned Bits = Op0.getValueType().getSizeInBits();
+ if (b != Bits/8-1)
+ return false;
+ if (Op0.getConstantOperandVal(1) != Bits-8)
+ return false;
+
+ LHS = Op0.getOperand(0);
+ RHS = Op1.getOperand(0);
+ return true;
+ }
+
+ // When we have small integers (i16 to be specific), the form present
+ // post-legalization uses SETULT in the SELECT_CC for the
+ // higher-order byte, depending on the fact that the
+ // even-higher-order bytes are known to all be zero, for example:
+ // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
+ // (so when the second byte is the same, because all higher-order
+ // bits from bytes 3 and 4 are known to be zero, the result of the
+ // xor can be at most 255)
+ if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
+ isa<ConstantSDNode>(O.getOperand(1))) {
+
+ uint64_t ULim = O.getConstantOperandVal(1);
+ if (ULim != (UINT64_C(1) << b*8))
+ return false;
+
+ // Now we need to make sure that the upper bytes are known to be
+ // zero.
+ unsigned Bits = Op0.getValueType().getSizeInBits();
+ if (!CurDAG->MaskedValueIsZero(Op0,
+ APInt::getHighBitsSet(Bits, Bits - (b+1)*8)))
+ return false;
+
+ LHS = Op0.getOperand(0);
+ RHS = Op0.getOperand(1);
+ return true;
+ }
+
+ return false;
+ }
+
+ if (CC != ISD::SETEQ)
+ return false;
+
+ SDValue Op = O.getOperand(0);
+ if (Op.getOpcode() == ISD::AND) {
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return false;
+ if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
+ return false;
+
+ SDValue XOR = Op.getOperand(0);
+ if (XOR.getOpcode() == ISD::TRUNCATE)
+ XOR = XOR.getOperand(0);
+ if (XOR.getOpcode() != ISD::XOR)
+ return false;
+
+ LHS = XOR.getOperand(0);
+ RHS = XOR.getOperand(1);
+ return true;
+ } else if (Op.getOpcode() == ISD::SRL) {
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return false;
+ unsigned Bits = Op.getValueType().getSizeInBits();
+ if (b != Bits/8-1)
+ return false;
+ if (Op.getConstantOperandVal(1) != Bits-8)
+ return false;
+
+ SDValue XOR = Op.getOperand(0);
+ if (XOR.getOpcode() == ISD::TRUNCATE)
+ XOR = XOR.getOperand(0);
+ if (XOR.getOpcode() != ISD::XOR)
+ return false;
+
+ LHS = XOR.getOperand(0);
+ RHS = XOR.getOperand(1);
+ return true;
+ }
+
+ return false;
+ };
+
+ SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));
+ while (!Queue.empty()) {
+ SDValue V = Queue.pop_back_val();
+
+ for (const SDValue &O : V.getNode()->ops()) {
+ unsigned b;
+ uint64_t M = 0, A = 0;
+ SDValue OLHS, ORHS;
+ if (O.getOpcode() == ISD::OR) {
+ Queue.push_back(O);
+ } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
+ if (!LHS) {
+ LHS = OLHS;
+ RHS = ORHS;
+ BytesFound[b] = true;
+ Mask |= M;
+ Alt |= A;
+ } else if ((LHS == ORHS && RHS == OLHS) ||
+ (RHS == ORHS && LHS == OLHS)) {
+ BytesFound[b] = true;
+ Mask |= M;
+ Alt |= A;
+ } else {
+ return Res;
+ }
+ } else {
+ return Res;
+ }
+ }
+ }
+
+ unsigned LastB = 0, BCnt = 0;
+ for (unsigned i = 0; i < 8; ++i)
+ if (BytesFound[LastB]) {
+ ++BCnt;
+ LastB = i;
+ }
+
+ if (!LastB || BCnt < 2)
+ return Res;
+
+ // Because we'll be zero-extending the output anyway if don't have a specific
+ // value for each input byte (via the Mask), we can 'anyext' the inputs.
+ if (LHS.getValueType() != VT) {
+ LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
+ RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
+ }
+
+ Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
+
+ bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
+ if (NonTrivialMask && !Alt) {
+ // Res = Mask & CMPB
+ Res = CurDAG->getNode(ISD::AND, dl, VT, Res, CurDAG->getConstant(Mask, VT));
+ } else if (Alt) {
+ // Res = (CMPB & Mask) | (~CMPB & Alt)
+ // Which, as suggested here:
+ // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
+ // can be written as:
+ // Res = Alt ^ ((Alt ^ Mask) & CMPB)
+ // useful because the (Alt ^ Mask) can be pre-computed.
+ Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
+ CurDAG->getConstant(Mask ^ Alt, VT));
+ Res = CurDAG->getNode(ISD::XOR, dl, VT, Res, CurDAG->getConstant(Alt, VT));
+ }
+
+ return Res;
+}
+
+// When CR bit registers are enabled, an extension of an i1 variable to a i32
+// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
+// involves constant materialization of a 0 or a 1 or both. If the result of
+// the extension is then operated upon by some operator that can be constant
+// folded with a constant 0 or 1, and that constant can be materialized using
+// only one instruction (like a zero or one), then we should fold in those
+// operations with the select.
+void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
+ if (!PPCSubTarget->useCRBits())
+ return;
+
+ if (N->getOpcode() != ISD::ZERO_EXTEND &&
+ N->getOpcode() != ISD::SIGN_EXTEND &&
+ N->getOpcode() != ISD::ANY_EXTEND)
+ return;
+
+ if (N->getOperand(0).getValueType() != MVT::i1)
+ return;
+
+ if (!N->hasOneUse())
+ return;
+
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Cond = N->getOperand(0);
+ SDValue ConstTrue =
+ CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, VT);
+ SDValue ConstFalse = CurDAG->getConstant(0, VT);
+
+ do {
+ SDNode *User = *N->use_begin();
+ if (User->getNumOperands() != 2)
+ break;
+
+ auto TryFold = [this, N, User](SDValue Val) {
+ SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
+ SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
+ SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
+
+ return CurDAG->FoldConstantArithmetic(User->getOpcode(),
+ User->getValueType(0),
+ O0.getNode(), O1.getNode());
+ };
+
+ SDValue TrueRes = TryFold(ConstTrue);
+ if (!TrueRes)
+ break;
+ SDValue FalseRes = TryFold(ConstFalse);
+ if (!FalseRes)
+ break;
+
+ // For us to materialize these using one instruction, we must be able to
+ // represent them as signed 16-bit integers.
+ uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
+ False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
+ if (!isInt<16>(True) || !isInt<16>(False))
+ break;
+
+ // We can replace User with a new SELECT node, and try again to see if we
+ // can fold the select with its user.
+ Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
+ N = User;
+ ConstTrue = TrueRes;
+ ConstFalse = FalseRes;
+ } while (N->hasOneUse());
+}
+
+void PPCDAGToDAGISel::PreprocessISelDAG() {
+ SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
+ ++Position;
+
+ bool MadeChange = false;
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = --Position;
+ if (N->use_empty())
+ continue;
+
+ SDValue Res;
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::OR:
+ Res = combineToCMPB(N);
+ break;
+ }
+
+ if (!Res)
+ foldBoolExts(Res, N);
+
+ if (Res) {
+ DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
+ DEBUG(N->dump(CurDAG));
+ DEBUG(dbgs() << "\nNew: ");
+ DEBUG(Res.getNode()->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+ MadeChange = true;
+ }
+ }
+
+ if (MadeChange)
+ CurDAG->RemoveDeadNodes();
+}
+
/// PostprocessISelDAG - Perform some late peephole optimizations
/// on the DAG representation.
void PPCDAGToDAGISel::PostprocessISelDAG() {
@@ -1576,6 +3289,7 @@ void PPCDAGToDAGISel::PostprocessISelDAG() {
PeepholePPC64();
PeepholeCROps();
+ PeepholePPC64ZExt();
}
// Check if all users of this node will become isel where the second operand
@@ -1690,7 +3404,9 @@ void PPCDAGToDAGISel::PeepholeCROps() {
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
- case PPC::SELECT_VRRC: {
+ case PPC::SELECT_VRRC:
+ case PPC::SELECT_VSFRC:
+ case PPC::SELECT_VSRC: {
SDValue Op = MachineNode->getOperand(0);
if (Op.isMachineOpcode()) {
if (Op.getMachineOpcode() == PPC::CRSET)
@@ -1996,6 +3712,8 @@ void PPCDAGToDAGISel::PeepholeCROps() {
case PPC::SELECT_F4:
case PPC::SELECT_F8:
case PPC::SELECT_VRRC:
+ case PPC::SELECT_VSFRC:
+ case PPC::SELECT_VSRC:
if (Op1Set)
ResNode = MachineNode->getOperand(1).getNode();
else if (Op1Unset)
@@ -2045,6 +3763,315 @@ void PPCDAGToDAGISel::PeepholeCROps() {
} while (IsModified);
}
+// Gather the set of 32-bit operations that are known to have their
+// higher-order 32 bits zero, where ToPromote contains all such operations.
+static bool PeepholePPC64ZExtGather(SDValue Op32,
+ SmallPtrSetImpl<SDNode *> &ToPromote) {
+ if (!Op32.isMachineOpcode())
+ return false;
+
+ // First, check for the "frontier" instructions (those that will clear the
+ // higher-order 32 bits.
+
+ // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
+ // around. If it does not, then these instructions will clear the
+ // higher-order bits.
+ if ((Op32.getMachineOpcode() == PPC::RLWINM ||
+ Op32.getMachineOpcode() == PPC::RLWNM) &&
+ Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
+ ToPromote.insert(Op32.getNode());
+ return true;
+ }
+
+ // SLW and SRW always clear the higher-order bits.
+ if (Op32.getMachineOpcode() == PPC::SLW ||
+ Op32.getMachineOpcode() == PPC::SRW) {
+ ToPromote.insert(Op32.getNode());
+ return true;
+ }
+
+ // For LI and LIS, we need the immediate to be positive (so that it is not
+ // sign extended).
+ if (Op32.getMachineOpcode() == PPC::LI ||
+ Op32.getMachineOpcode() == PPC::LIS) {
+ if (!isUInt<15>(Op32.getConstantOperandVal(0)))
+ return false;
+
+ ToPromote.insert(Op32.getNode());
+ return true;
+ }
+
+ // LHBRX and LWBRX always clear the higher-order bits.
+ if (Op32.getMachineOpcode() == PPC::LHBRX ||
+ Op32.getMachineOpcode() == PPC::LWBRX) {
+ ToPromote.insert(Op32.getNode());
+ return true;
+ }
+
+ // CNTLZW always produces a 64-bit value in [0,32], and so is zero extended.
+ if (Op32.getMachineOpcode() == PPC::CNTLZW) {
+ ToPromote.insert(Op32.getNode());
+ return true;
+ }
+
+ // Next, check for those instructions we can look through.
+
+ // Assuming the mask does not wrap around, then the higher-order bits are
+ // taken directly from the first operand.
+ if (Op32.getMachineOpcode() == PPC::RLWIMI &&
+ Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
+ SmallPtrSet<SDNode *, 16> ToPromote1;
+ if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
+ return false;
+
+ ToPromote.insert(Op32.getNode());
+ ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
+ return true;
+ }
+
+ // For OR, the higher-order bits are zero if that is true for both operands.
+ // For SELECT_I4, the same is true (but the relevant operand numbers are
+ // shifted by 1).
+ if (Op32.getMachineOpcode() == PPC::OR ||
+ Op32.getMachineOpcode() == PPC::SELECT_I4) {
+ unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
+ SmallPtrSet<SDNode *, 16> ToPromote1;
+ if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
+ return false;
+ if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
+ return false;
+
+ ToPromote.insert(Op32.getNode());
+ ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
+ return true;
+ }
+
+ // For ORI and ORIS, we need the higher-order bits of the first operand to be
+ // zero, and also for the constant to be positive (so that it is not sign
+ // extended).
+ if (Op32.getMachineOpcode() == PPC::ORI ||
+ Op32.getMachineOpcode() == PPC::ORIS) {
+ SmallPtrSet<SDNode *, 16> ToPromote1;
+ if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
+ return false;
+ if (!isUInt<15>(Op32.getConstantOperandVal(1)))
+ return false;
+
+ ToPromote.insert(Op32.getNode());
+ ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
+ return true;
+ }
+
+ // The higher-order bits of AND are zero if that is true for at least one of
+ // the operands.
+ if (Op32.getMachineOpcode() == PPC::AND) {
+ SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
+ bool Op0OK =
+ PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
+ bool Op1OK =
+ PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
+ if (!Op0OK && !Op1OK)
+ return false;
+
+ ToPromote.insert(Op32.getNode());
+
+ if (Op0OK)
+ ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
+
+ if (Op1OK)
+ ToPromote.insert(ToPromote2.begin(), ToPromote2.end());
+
+ return true;
+ }
+
+ // For ANDI and ANDIS, the higher-order bits are zero if either that is true
+ // of the first operand, or if the second operand is positive (so that it is
+ // not sign extended).
+ if (Op32.getMachineOpcode() == PPC::ANDIo ||
+ Op32.getMachineOpcode() == PPC::ANDISo) {
+ SmallPtrSet<SDNode *, 16> ToPromote1;
+ bool Op0OK =
+ PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
+ bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
+ if (!Op0OK && !Op1OK)
+ return false;
+
+ ToPromote.insert(Op32.getNode());
+
+ if (Op0OK)
+ ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
+
+ return true;
+ }
+
+ return false;
+}
+
+void PPCDAGToDAGISel::PeepholePPC64ZExt() {
+ if (!PPCSubTarget->isPPC64())
+ return;
+
+ // When we zero-extend from i32 to i64, we use a pattern like this:
+ // def : Pat<(i64 (zext i32:$in)),
+ // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
+ // 0, 32)>;
+ // There are several 32-bit shift/rotate instructions, however, that will
+ // clear the higher-order bits of their output, rendering the RLDICL
+ // unnecessary. When that happens, we remove it here, and redefine the
+ // relevant 32-bit operation to be a 64-bit operation.
+
+ SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
+ ++Position;
+
+ bool MadeChange = false;
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = --Position;
+ // Skip dead nodes and any non-machine opcodes.
+ if (N->use_empty() || !N->isMachineOpcode())
+ continue;
+
+ if (N->getMachineOpcode() != PPC::RLDICL)
+ continue;
+
+ if (N->getConstantOperandVal(1) != 0 ||
+ N->getConstantOperandVal(2) != 32)
+ continue;
+
+ SDValue ISR = N->getOperand(0);
+ if (!ISR.isMachineOpcode() ||
+ ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
+ continue;
+
+ if (!ISR.hasOneUse())
+ continue;
+
+ if (ISR.getConstantOperandVal(2) != PPC::sub_32)
+ continue;
+
+ SDValue IDef = ISR.getOperand(0);
+ if (!IDef.isMachineOpcode() ||
+ IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
+ continue;
+
+ // We now know that we're looking at a canonical i32 -> i64 zext. See if we
+ // can get rid of it.
+
+ SDValue Op32 = ISR->getOperand(1);
+ if (!Op32.isMachineOpcode())
+ continue;
+
+ // There are some 32-bit instructions that always clear the high-order 32
+ // bits, there are also some instructions (like AND) that we can look
+ // through.
+ SmallPtrSet<SDNode *, 16> ToPromote;
+ if (!PeepholePPC64ZExtGather(Op32, ToPromote))
+ continue;
+
+ // If the ToPromote set contains nodes that have uses outside of the set
+ // (except for the original INSERT_SUBREG), then abort the transformation.
+ bool OutsideUse = false;
+ for (SDNode *PN : ToPromote) {
+ for (SDNode *UN : PN->uses()) {
+ if (!ToPromote.count(UN) && UN != ISR.getNode()) {
+ OutsideUse = true;
+ break;
+ }
+ }
+
+ if (OutsideUse)
+ break;
+ }
+ if (OutsideUse)
+ continue;
+
+ MadeChange = true;
+
+ // We now know that this zero extension can be removed by promoting to
+ // nodes in ToPromote to 64-bit operations, where for operations in the
+ // frontier of the set, we need to insert INSERT_SUBREGs for their
+ // operands.
+ for (SDNode *PN : ToPromote) {
+ unsigned NewOpcode;
+ switch (PN->getMachineOpcode()) {
+ default:
+ llvm_unreachable("Don't know the 64-bit variant of this instruction");
+ case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;
+ case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;
+ case PPC::SLW: NewOpcode = PPC::SLW8; break;
+ case PPC::SRW: NewOpcode = PPC::SRW8; break;
+ case PPC::LI: NewOpcode = PPC::LI8; break;
+ case PPC::LIS: NewOpcode = PPC::LIS8; break;
+ case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
+ case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
+ case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
+ case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
+ case PPC::OR: NewOpcode = PPC::OR8; break;
+ case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
+ case PPC::ORI: NewOpcode = PPC::ORI8; break;
+ case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
+ case PPC::AND: NewOpcode = PPC::AND8; break;
+ case PPC::ANDIo: NewOpcode = PPC::ANDIo8; break;
+ case PPC::ANDISo: NewOpcode = PPC::ANDISo8; break;
+ }
+
+ // Note: During the replacement process, the nodes will be in an
+ // inconsistent state (some instructions will have operands with values
+ // of the wrong type). Once done, however, everything should be right
+ // again.
+
+ SmallVector<SDValue, 4> Ops;
+ for (const SDValue &V : PN->ops()) {
+ if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
+ !isa<ConstantSDNode>(V)) {
+ SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
+ SDNode *ReplOp =
+ CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
+ ISR.getNode()->getVTList(), ReplOpOps);
+ Ops.push_back(SDValue(ReplOp, 0));
+ } else {
+ Ops.push_back(V);
+ }
+ }
+
+ // Because all to-be-promoted nodes only have users that are other
+ // promoted nodes (or the original INSERT_SUBREG), we can safely replace
+ // the i32 result value type with i64.
+
+ SmallVector<EVT, 2> NewVTs;
+ SDVTList VTs = PN->getVTList();
+ for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
+ if (VTs.VTs[i] == MVT::i32)
+ NewVTs.push_back(MVT::i64);
+ else
+ NewVTs.push_back(VTs.VTs[i]);
+
+ DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
+ DEBUG(PN->dump(CurDAG));
+
+ CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
+
+ DEBUG(dbgs() << "\nNew: ");
+ DEBUG(PN->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+ }
+
+ // Now we replace the original zero extend and its associated INSERT_SUBREG
+ // with the value feeding the INSERT_SUBREG (which has now been promoted to
+ // return an i64).
+
+ DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
+ DEBUG(N->dump(CurDAG));
+ DEBUG(dbgs() << "\nNew: ");
+ DEBUG(Op32.getNode()->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ ReplaceUses(N, Op32.getNode());
+ }
+
+ if (MadeChange)
+ CurDAG->RemoveDeadNodes();
+}
+
void PPCDAGToDAGISel::PeepholePPC64() {
// These optimizations are currently supported only for 64-bit SVR4.
if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64())
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 0d37f50cd17b..5c52bb19b0d1 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -13,6 +13,7 @@
#include "PPCISelLowering.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCCallingConv.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
#include "PPCTargetMachine.h"
@@ -24,6 +25,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
@@ -39,6 +41,10 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+// FIXME: Remove this once soft-float is supported.
+static cl::opt<bool> DisablePPCFloatInVariadic("disable-ppc-float-in-variadic",
+cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden);
+
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
@@ -51,20 +57,9 @@ cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
// FIXME: Remove this once the bug has been fixed!
extern cl::opt<bool> ANDIGlueBug;
-static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
- // If it isn't a Mach-O file then it's going to be a linux ELF
- // object file.
- if (TT.isOSDarwin())
- return new TargetLoweringObjectFileMachO();
-
- return new PPC64LinuxTargetObjectFile();
-}
-
-PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
- : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))),
+PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
+ : TargetLowering(TM),
Subtarget(*TM.getSubtargetImpl()) {
- setPow2DivIsCheap();
-
// Use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(true);
@@ -80,8 +75,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
+ }
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -120,12 +117,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
if (ANDIGlueBug)
setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setTruncStoreAction(MVT::i64, MVT::i1, Expand);
- setTruncStoreAction(MVT::i32, MVT::i1, Expand);
- setTruncStoreAction(MVT::i16, MVT::i1, Expand);
- setTruncStoreAction(MVT::i8, MVT::i1, Expand);
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+ setTruncStoreAction(VT, MVT::i1, Expand);
+ }
addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
}
@@ -400,10 +396,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
if (Subtarget.hasAltivec()) {
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
- for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
-
+ for (MVT VT : MVT::vector_valuetypes()) {
// add/sub are legal for all supported vector VT's.
setOperationAction(ISD::ADD , VT, Legal);
setOperationAction(ISD::SUB , VT, Legal);
@@ -470,14 +463,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
- for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
- MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j;
+ for (MVT InnerVT : MVT::vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
- setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
- setLoadExtAction(ISD::EXTLOAD, VT, Expand);
}
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
@@ -604,15 +595,15 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
}
}
- if (Subtarget.has64BitSupport()) {
+ if (Subtarget.has64BitSupport())
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
- setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
- }
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
+
+ if (!isPPC64) {
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
+ }
setBooleanContents(ZeroOrOneBooleanContent);
// Altivec instructions set fields to all zeros or all ones.
@@ -637,6 +628,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::SINT_TO_FP);
+ if (Subtarget.hasFPCVT())
+ setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::BR_CC);
@@ -644,6 +637,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setTargetDAGCombine(ISD::BRCOND);
setTargetDAGCombine(ISD::BSWAP);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+ setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
+ setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
@@ -684,10 +679,23 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
if (Subtarget.isDarwin())
setPrefFunctionAlignment(4);
- if (isPPC64 && Subtarget.isJITCodeModel())
- // Temporary workaround for the inability of PPC64 JIT to handle jump
- // tables.
- setSupportJumpTables(false);
+ switch (Subtarget.getDarwinDirective()) {
+ default: break;
+ case PPC::DIR_970:
+ case PPC::DIR_A2:
+ case PPC::DIR_E500mc:
+ case PPC::DIR_E5500:
+ case PPC::DIR_PWR4:
+ case PPC::DIR_PWR5:
+ case PPC::DIR_PWR5X:
+ case PPC::DIR_PWR6:
+ case PPC::DIR_PWR6X:
+ case PPC::DIR_PWR7:
+ case PPC::DIR_PWR8:
+ setPrefFunctionAlignment(4);
+ setPrefLoopAlignment(4);
+ break;
+ }
setInsertFencesForAtomic(true);
@@ -698,8 +706,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
computeRegisterProperties();
- // The Freescale cores does better with aggressive inlining of memcpy and
- // friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
+ // The Freescale cores do better with aggressive inlining of memcpy and
+ // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
MaxStoresPerMemset = 32;
@@ -708,8 +716,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
MaxStoresPerMemcpyOptSize = 8;
MaxStoresPerMemmove = 32;
MaxStoresPerMemmoveOptSize = 8;
-
- setPrefFunctionAlignment(4);
}
}
@@ -761,14 +767,20 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
default: return nullptr;
case PPCISD::FSEL: return "PPCISD::FSEL";
case PPCISD::FCFID: return "PPCISD::FCFID";
+ case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
+ case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
+ case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
+ case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
+ case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
case PPCISD::FRE: return "PPCISD::FRE";
case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
case PPCISD::STFIWX: return "PPCISD::STFIWX";
case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
case PPCISD::VPERM: return "PPCISD::VPERM";
+ case PPCISD::CMPB: return "PPCISD::CMPB";
case PPCISD::Hi: return "PPCISD::Hi";
case PPCISD::Lo: return "PPCISD::Lo";
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
@@ -785,7 +797,9 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL: return "PPCISD::BCTRL";
+ case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
+ case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
@@ -793,6 +807,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::VCMPo: return "PPCISD::VCMPo";
case PPCISD::LBRX: return "PPCISD::LBRX";
case PPCISD::STBRX: return "PPCISD::STBRX";
+ case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
+ case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
case PPCISD::LARX: return "PPCISD::LARX";
case PPCISD::STCX: return "PPCISD::STCX";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
@@ -827,6 +843,11 @@ EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
return VT.changeVectorElementTypeToInteger();
}
+bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
+ assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// Node matching predicates, for use by the tblgen matching code.
//===----------------------------------------------------------------------===//
@@ -858,20 +879,21 @@ static bool isConstantOrUndef(int Op, int Val) {
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
+ bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian();
if (ShuffleKind == 0) {
- if (DAG.getTarget().getDataLayout()->isLittleEndian())
+ if (IsLE)
return false;
for (unsigned i = 0; i != 16; ++i)
if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
return false;
} else if (ShuffleKind == 2) {
- if (!DAG.getTarget().getDataLayout()->isLittleEndian())
+ if (!IsLE)
return false;
for (unsigned i = 0; i != 16; ++i)
if (!isConstantOrUndef(N->getMaskElt(i), i*2))
return false;
} else if (ShuffleKind == 1) {
- unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 1;
+ unsigned j = IsLE ? 0 : 1;
for (unsigned i = 0; i != 8; ++i)
if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
!isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
@@ -888,22 +910,23 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
+ bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian();
if (ShuffleKind == 0) {
- if (DAG.getTarget().getDataLayout()->isLittleEndian())
+ if (IsLE)
return false;
for (unsigned i = 0; i != 16; i += 2)
if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
return false;
} else if (ShuffleKind == 2) {
- if (!DAG.getTarget().getDataLayout()->isLittleEndian())
+ if (!IsLE)
return false;
for (unsigned i = 0; i != 16; i += 2)
if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
return false;
} else if (ShuffleKind == 1) {
- unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 2;
+ unsigned j = IsLE ? 0 : 2;
for (unsigned i = 0; i != 8; i += 2)
if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
@@ -942,7 +965,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned ShuffleKind, SelectionDAG &DAG) {
- if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) {
if (ShuffleKind == 1) // unary
return isVMerge(N, UnitSize, 0, 0);
else if (ShuffleKind == 2) // swapped
@@ -967,7 +990,7 @@ bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned ShuffleKind, SelectionDAG &DAG) {
- if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) {
if (ShuffleKind == 1) // unary
return isVMerge(N, UnitSize, 8, 8);
else if (ShuffleKind == 2) // swapped
@@ -1011,7 +1034,8 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
if (ShiftAmt < i) return -1;
ShiftAmt -= i;
- bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian();
+ bool isLE = DAG.getTarget().getSubtargetImpl()->getDataLayout()->
+ isLittleEndian();
if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
// Check the rest of the elements to see if they are consecutive.
@@ -1084,7 +1108,7 @@ unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
assert(isSplatShuffleMask(SVOp, EltSize));
- if (DAG.getTarget().getDataLayout()->isLittleEndian())
+ if (DAG.getSubtarget().getDataLayout()->isLittleEndian())
return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
else
return SVOp->getMaskElt(0) / EltSize;
@@ -1682,6 +1706,8 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy();
bool is64bit = Subtarget.isPPC64();
+ const Module *M = DAG.getMachineFunction().getFunction()->getParent();
+ PICLevel::Level picLevel = M->getPICLevel();
TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
@@ -1721,7 +1747,10 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
GOTReg, TGA);
} else {
- GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+ if (picLevel == PICLevel::Small)
+ GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
+ else
+ GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
}
SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
GOTPtr, TGA);
@@ -1738,7 +1767,10 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
GOTReg, TGA);
} else {
- GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+ if (picLevel == PICLevel::Small)
+ GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
+ else
+ GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
}
SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
GOTPtr, TGA);
@@ -1873,7 +1905,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
// gpr_index
SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
VAListPtr, MachinePointerInfo(SV), MVT::i8,
- false, false, 0);
+ false, false, false, 0);
InChain = GprIndex.getValue(1);
if (VT == MVT::i64) {
@@ -1896,7 +1928,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
// fpr
SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
FprPtr, MachinePointerInfo(SV), MVT::i8,
- false, false, 0);
+ false, false, false, 0);
InChain = FprIndex.getValue(1);
SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
@@ -2310,7 +2342,8 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
/// ensure minimum alignment required for target.
static unsigned EnsureStackAlignment(const TargetMachine &Target,
unsigned NumBytes) {
- unsigned TargetAlign = Target.getFrameLowering()->getStackAlignment();
+ unsigned TargetAlign =
+ Target.getSubtargetImpl()->getFrameLowering()->getStackAlignment();
unsigned AlignMask = TargetAlign - 1;
NumBytes = (NumBytes + AlignMask) & ~AlignMask;
return NumBytes;
@@ -2387,8 +2420,8 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
// Reserve space for the linkage area on the stack.
unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false);
@@ -2462,7 +2495,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// caller's stack frame, right above the parameter list area.
SmallVector<CCValAssign, 16> ByValArgLocs;
CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ByValArgLocs, *DAG.getContext());
+ ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
@@ -2495,7 +2528,9 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
};
- const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
+ unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
+ if (DisablePPCFloatInVariadic)
+ NumFPArgRegs = 0;
FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
NumGPArgRegs));
@@ -2504,7 +2539,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// Make room for NumGPArgRegs and NumFPArgRegs.
int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
- NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
+ NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
FuncInfo->setVarArgsStackOffset(
MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
@@ -2546,7 +2581,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
MachinePointerInfo(), false, false, 0);
MemOps.push_back(Store);
// Increment the address by eight for the next argument to store
- SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
+ SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
PtrVT);
FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
}
@@ -2695,7 +2730,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
int FI;
if (HasParameterArea ||
ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
- FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false);
+ FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true);
else
FI = MFI->CreateStackObject(ArgSize, Align, false);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
@@ -3061,7 +3096,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
CurArgOffset = CurArgOffset + (4 - ObjSize);
}
// The value of the object is its address.
- int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false);
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(FIN);
if (ObjSize==1 || ObjSize==2) {
@@ -3539,9 +3574,24 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
InFlag = Chain.getValue(1);
}
+// Is this global address that of a function that can be called by name? (as
+// opposed to something that must hold a descriptor for an indirect call).
+static bool isFunctionGlobalAddress(SDValue Callee) {
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
+ Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
+ return false;
+
+ return G->getGlobal()->getType()->getElementType()->isFunctionTy();
+ }
+
+ return false;
+}
+
static
unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
+ bool IsPatchPoint,
SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
const PPCSubtarget &Subtarget) {
@@ -3564,34 +3614,31 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
needIndirectCall = false;
}
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
- // Use indirect calls for ALL functions calls in JIT mode, since the
- // far-call stubs may be outside relocation limits for a BL instruction.
- if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
- unsigned OpFlags = 0;
- if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
- (Subtarget.getTargetTriple().isMacOSX() &&
- Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
- (G->getGlobal()->isDeclaration() ||
- G->getGlobal()->isWeakForLinker())) ||
- (Subtarget.isTargetELF() && !isPPC64 &&
- !G->getGlobal()->hasLocalLinkage() &&
- DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
- // PC-relative references to external symbols should go through $stub,
- // unless we're building with the leopard linker or later, which
- // automatically synthesizes these stubs.
- OpFlags = PPCII::MO_PLT_OR_STUB;
- }
-
- // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
- // every direct call is) turn it into a TargetGlobalAddress /
- // TargetExternalSymbol node so that legalize doesn't hack it.
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
- Callee.getValueType(),
- 0, OpFlags);
- needIndirectCall = false;
+ if (isFunctionGlobalAddress(Callee)) {
+ GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
+ // A call to a TLS address is actually an indirect call to a
+ // thread-specific pointer.
+ unsigned OpFlags = 0;
+ if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
+ (Subtarget.getTargetTriple().isMacOSX() &&
+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
+ (G->getGlobal()->isDeclaration() ||
+ G->getGlobal()->isWeakForLinker())) ||
+ (Subtarget.isTargetELF() && !isPPC64 &&
+ !G->getGlobal()->hasLocalLinkage() &&
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = PPCII::MO_PLT_OR_STUB;
}
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+ // every direct call is) turn it into a TargetGlobalAddress /
+ // TargetExternalSymbol node so that legalize doesn't hack it.
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ Callee.getValueType(), 0, OpFlags);
+ needIndirectCall = false;
}
if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
@@ -3601,7 +3648,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
(Subtarget.getTargetTriple().isMacOSX() &&
Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) ||
(Subtarget.isTargetELF() && !isPPC64 &&
- DAG.getTarget().getRelocationModel() == Reloc::PIC_) ) {
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
@@ -3613,6 +3660,16 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
needIndirectCall = false;
}
+ if (IsPatchPoint) {
+ // We'll form an invalid direct call when lowering a patchpoint; the full
+ // sequence for an indirect call is complicated, and many of the
+ // instructions introduced might have side effects (and, thus, can't be
+ // removed later). The call itself will be removed as soon as the
+ // argument/return lowering is complete, so the fact that it has the wrong
+ // kind of operands should not really matter.
+ needIndirectCall = false;
+ }
+
if (needIndirectCall) {
// Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
// to do the call, we can't use PPCISD::CALL.
@@ -3738,7 +3795,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
RegsToPass[i].second.getValueType()));
// Direct calls in the ELFv2 ABI need the TOC register live into the call.
- if (Callee.getNode() && isELFv2ABI)
+ if (Callee.getNode() && isELFv2ABI && !IsPatchPoint)
Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
return CallOpc;
@@ -3761,8 +3818,8 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SmallVectorImpl<SDValue> &InVals) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
// Copy all of the result registers out of their specified physreg.
@@ -3801,7 +3858,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SDValue
PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
- bool isTailCall, bool isVarArg,
+ bool isTailCall, bool isVarArg, bool IsPatchPoint,
SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8>
&RegsToPass,
@@ -3815,8 +3872,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
std::vector<EVT> NodeTys;
SmallVector<SDValue, 8> Ops;
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
- isTailCall, RegsToPass, Ops, NodeTys,
- Subtarget);
+ isTailCall, IsPatchPoint, RegsToPass, Ops,
+ NodeTys, Subtarget);
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
@@ -3830,7 +3887,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
// Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -3859,8 +3917,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
// stack frame. If caller and callee belong to the same module (and have the
// same TOC), the NOP will remain unchanged.
- bool needsTOCRestore = false;
- if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64()) {
+ if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
+ !IsPatchPoint) {
if (CallOpc == PPCISD::BCTRL) {
// This is a call through a function pointer.
// Restore the caller TOC from the save area into R2.
@@ -3871,7 +3929,17 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
// since r2 is a reserved register (which prevents the register allocator
// from allocating it), resulting in an additional register being
// allocated and an unnecessary move instruction being generated.
- needsTOCRestore = true;
+ CallOpc = PPCISD::BCTRL_LOAD_TOC;
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
+ unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
+ SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
+
+ // The address needs to go after the chain input but before the flag (or
+ // any other variadic arguments).
+ Ops.insert(std::next(Ops.begin()), AddTOC);
} else if ((CallOpc == PPCISD::CALL) &&
(!isLocalCall(Callee) ||
DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
@@ -3885,17 +3953,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
- if (needsTOCRestore) {
- SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
- unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
- SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
- SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
- Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag);
- InFlag = Chain.getValue(1);
- }
-
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(BytesCalleePops, true),
InFlag, dl);
@@ -3919,6 +3976,7 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool &isTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool isVarArg = CLI.IsVarArg;
+ bool IsPatchPoint = CLI.IsPatchPoint;
if (isTailCall)
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
@@ -3931,23 +3989,23 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (Subtarget.isSVR4ABI()) {
if (Subtarget.isPPC64())
return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, Outs, OutVals, Ins,
+ isTailCall, IsPatchPoint, Outs, OutVals, Ins,
dl, DAG, InVals);
else
return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, Outs, OutVals, Ins,
+ isTailCall, IsPatchPoint, Outs, OutVals, Ins,
dl, DAG, InVals);
}
return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
- isTailCall, Outs, OutVals, Ins,
+ isTailCall, IsPatchPoint, Outs, OutVals, Ins,
dl, DAG, InVals);
}
SDValue
PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall,
+ bool isTailCall, bool IsPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -3978,8 +4036,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
// Assign locations to all of the outgoing arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
// Reserve space for the linkage area on the stack.
CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false),
@@ -4020,7 +4078,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
// Assign locations to all of the outgoing aggregate by value arguments.
SmallVector<CCValAssign, 16> ByValArgLocs;
CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ByValArgLocs, *DAG.getContext());
+ ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
@@ -4157,7 +4215,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
false, TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
Ins, InVals);
}
@@ -4185,7 +4243,7 @@ PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
SDValue
PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall,
+ bool isTailCall, bool IsPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -4357,7 +4415,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
MachinePointerInfo(), VT,
- false, false, 0);
+ false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
@@ -4621,9 +4679,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// Check if this is an indirect call (MTCTR/BCTRL).
// See PrepareCall() for more information about calls through function
// pointers in the 64-bit SVR4 ABI.
- if (!isTailCall &&
- !dyn_cast<GlobalAddressSDNode>(Callee) &&
- !dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ if (!isTailCall && !IsPatchPoint &&
+ !isFunctionGlobalAddress(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee)) {
// Load r2 into a virtual register and store it to the TOC save area.
SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
// TOC save area offset.
@@ -4635,7 +4693,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// In the ELFv2 ABI, R12 must contain the address of an indirect callee.
// This does not mean the MTCTR instruction must use R12; it's easier
// to model this as an extra parameter, so do that.
- if (isELFv2ABI)
+ if (isELFv2ABI && !IsPatchPoint)
RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
}
@@ -4652,7 +4710,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
FPOp, true, TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
Ins, InVals);
}
@@ -4660,7 +4718,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
SDValue
PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall,
+ bool isTailCall, bool IsPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -4827,7 +4885,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
MachinePointerInfo(), VT,
- false, false, 0);
+ false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
@@ -5026,8 +5084,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// not mean the MTCTR instruction must use R12; it's easier to model this as
// an extra parameter, so do that.
if (!isTailCall &&
- !dyn_cast<GlobalAddressSDNode>(Callee) &&
- !dyn_cast<ExternalSymbolSDNode>(Callee) &&
+ !isFunctionGlobalAddress(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee) &&
!isBLACompatibleAddress(Callee, DAG))
RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
PPC::R12), Callee));
@@ -5045,7 +5103,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
FPOp, true, TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
Ins, InVals);
}
@@ -5056,8 +5114,7 @@ PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
- RVLocs, Context);
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_PPC);
}
@@ -5069,8 +5126,8 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
SDLoc dl, SelectionDAG &DAG) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
SDValue Flag;
@@ -5160,7 +5217,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
// Find out what the fix offset of the frame pointer save area.
int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
// Allocate the frame index for frame pointer save area.
- RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true);
+ RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
// Save the result.
FI->setReturnAddrSaveIndex(RASI);
}
@@ -5378,9 +5435,9 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
return Op;
}
-// FIXME: Split this code up when LegalizeDAGTypes lands.
-SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
- SDLoc dl) const {
+void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
+ SelectionDAG &DAG,
+ SDLoc dl) const {
assert(Op.getOperand(0).getValueType().isFloatingPoint());
SDValue Src = Op.getOperand(0);
if (Src.getValueType() == MVT::f32)
@@ -5429,15 +5486,95 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
if (Op.getValueType() == MVT::i32 && !i32Stack) {
FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
DAG.getConstant(4, FIPtr.getValueType()));
- MPI = MachinePointerInfo();
+ MPI = MPI.getWithOffset(4);
}
- return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI,
- false, false, false, 0);
+ RLI.Chain = Chain;
+ RLI.Ptr = FIPtr;
+ RLI.MPI = MPI;
+}
+
+SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+ SDLoc dl) const {
+ ReuseLoadInfo RLI;
+ LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
+
+ return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
+ false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
+ RLI.Ranges);
+}
+
+// We're trying to insert a regular store, S, and then a load, L. If the
+// incoming value, O, is a load, we might just be able to have our load use the
+// address used by O. However, we don't know if anything else will store to
+// that address before we can load from it. To prevent this situation, we need
+// to insert our load, L, into the chain as a peer of O. To do this, we give L
+// the same chain operand as O, we create a token factor from the chain results
+// of O and L, and we replace all uses of O's chain result with that token
+// factor (see spliceIntoChain below for this last part).
+bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
+ ReuseLoadInfo &RLI,
+ SelectionDAG &DAG,
+ ISD::LoadExtType ET) const {
+ SDLoc dl(Op);
+ if (ET == ISD::NON_EXTLOAD &&
+ (Op.getOpcode() == ISD::FP_TO_UINT ||
+ Op.getOpcode() == ISD::FP_TO_SINT) &&
+ isOperationLegalOrCustom(Op.getOpcode(),
+ Op.getOperand(0).getValueType())) {
+
+ LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
+ return true;
+ }
+
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
+ if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
+ LD->isNonTemporal())
+ return false;
+ if (LD->getMemoryVT() != MemVT)
+ return false;
+
+ RLI.Ptr = LD->getBasePtr();
+ if (LD->isIndexed() && LD->getOffset().getOpcode() != ISD::UNDEF) {
+ assert(LD->getAddressingMode() == ISD::PRE_INC &&
+ "Non-pre-inc AM on PPC?");
+ RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
+ LD->getOffset());
+ }
+
+ RLI.Chain = LD->getChain();
+ RLI.MPI = LD->getPointerInfo();
+ RLI.IsInvariant = LD->isInvariant();
+ RLI.Alignment = LD->getAlignment();
+ RLI.AAInfo = LD->getAAInfo();
+ RLI.Ranges = LD->getRanges();
+
+ RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
+ return true;
+}
+
+// Given the head of the old chain, ResChain, insert a token factor containing
+// it and NewResChain, and make users of ResChain now be users of that token
+// factor.
+void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
+ SDValue NewResChain,
+ SelectionDAG &DAG) const {
+ if (!ResChain)
+ return;
+
+ SDLoc dl(NewResChain);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ NewResChain, DAG.getUNDEF(MVT::Other));
+ assert(TF.getNode() != NewResChain.getNode() &&
+ "A new TF really is required here");
+
+ DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
+ DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
}
SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
SDLoc dl(Op);
// Don't handle ppc_fp128 here; let it be lowered to a libcall.
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
@@ -5509,7 +5646,70 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
}
- SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
+ ReuseLoadInfo RLI;
+ SDValue Bits;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
+ Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
+ false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
+ RLI.Ranges);
+ spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
+ } else if (Subtarget.hasLFIWAX() &&
+ canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
+ RLI.Alignment, RLI.AAInfo, RLI.Ranges);
+ SDValue Ops[] = { RLI.Chain, RLI.Ptr };
+ Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
+ DAG.getVTList(MVT::f64, MVT::Other),
+ Ops, MVT::i32, MMO);
+ spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
+ } else if (Subtarget.hasFPCVT() &&
+ canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
+ RLI.Alignment, RLI.AAInfo, RLI.Ranges);
+ SDValue Ops[] = { RLI.Chain, RLI.Ptr };
+ Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
+ DAG.getVTList(MVT::f64, MVT::Other),
+ Ops, MVT::i32, MMO);
+ spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
+ } else if (((Subtarget.hasLFIWAX() &&
+ SINT.getOpcode() == ISD::SIGN_EXTEND) ||
+ (Subtarget.hasFPCVT() &&
+ SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
+ SINT.getOperand(0).getValueType() == MVT::i32) {
+ MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Store =
+ DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, 0);
+
+ assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
+ "Expected an i32 store");
+
+ RLI.Ptr = FIdx;
+ RLI.Chain = Store;
+ RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
+ RLI.Alignment = 4;
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
+ RLI.Alignment, RLI.AAInfo, RLI.Ranges);
+ SDValue Ops[] = { RLI.Chain, RLI.Ptr };
+ Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
+ PPCISD::LFIWZX : PPCISD::LFIWAX,
+ dl, DAG.getVTList(MVT::f64, MVT::Other),
+ Ops, MVT::i32, MMO);
+ } else
+ Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
+
SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
@@ -5530,23 +5730,36 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SDValue Ld;
if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
- int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
- SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
-
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
- MachinePointerInfo::getFixedStack(FrameIdx),
- false, false, 0);
+ ReuseLoadInfo RLI;
+ bool ReusingLoad;
+ if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
+ DAG))) {
+ int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, 0);
+
+ assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
+ "Expected an i32 store");
+
+ RLI.Ptr = FIdx;
+ RLI.Chain = Store;
+ RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
+ RLI.Alignment = 4;
+ }
- assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
- "Expected an i32 store");
MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
- MachineMemOperand::MOLoad, 4, 4);
- SDValue Ops[] = { Store, FIdx };
+ MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
+ RLI.Alignment, RLI.AAInfo, RLI.Ranges);
+ SDValue Ops[] = { RLI.Chain, RLI.Ptr };
Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
PPCISD::LFIWZX : PPCISD::LFIWAX,
dl, DAG.getVTList(MVT::f64, MVT::Other),
Ops, MVT::i32, MMO);
+ if (ReusingLoad)
+ spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
} else {
assert(Subtarget.isPPC64() &&
"i32->FP without LFIWAX supported only on PPC64");
@@ -6459,7 +6672,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
- SDLoc(Op));
+ SDLoc(Op));
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
@@ -6494,6 +6707,15 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
switch (N->getOpcode()) {
default:
llvm_unreachable("Do not know how to custom type legalize this operation!");
+ case ISD::READCYCLECOUNTER: {
+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
+ SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
+
+ Results.push_back(RTB);
+ Results.push_back(RTB.getValue(1));
+ Results.push_back(RTB.getValue(2));
+ break;
+ }
case ISD::INTRINSIC_W_CHAIN: {
if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
Intrinsic::ppc_is_decremented_ctr_nonzero)
@@ -6558,11 +6780,44 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
// Other Lowering Code
//===----------------------------------------------------------------------===//
+static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Function *Func = Intrinsic::getDeclaration(M, Id);
+ return Builder.CreateCall(Func);
+}
+
+// The mappings for emitLeading/TrailingFence is taken from
+// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord, bool IsStore,
+ bool IsLoad) const {
+ if (Ord == SequentiallyConsistent)
+ return callIntrinsic(Builder, Intrinsic::ppc_sync);
+ else if (isAtLeastRelease(Ord))
+ return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
+ else
+ return nullptr;
+}
+
+Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord, bool IsStore,
+ bool IsLoad) const {
+ if (IsLoad && isAtLeastAcquire(Ord))
+ return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
+ // FIXME: this is too conservative, a dependent branch + isync is enough.
+ // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
+ // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
+ // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
+ else
+ return nullptr;
+}
+
MachineBasicBlock *
PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
bool is64bit, unsigned BinOpcode) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
@@ -6585,9 +6840,8 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineRegisterInfo &RegInfo = F->getRegInfo();
unsigned TmpReg = (!BinOpcode) ? incr :
- RegInfo.createVirtualRegister(
- is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
- (const TargetRegisterClass *) &PPC::GPRCRegClass);
+ RegInfo.createVirtualRegister( is64bit ? &PPC::G8RCRegClass
+ : &PPC::GPRCRegClass);
// thisMBB:
// ...
@@ -6624,7 +6878,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
bool is8bit, // operation
unsigned BinOpcode) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
// In 64 bit mode we have to use 64 bits for addresses, even though the
// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
// registers without caring whether they're 32 or 64, but here we're
@@ -6652,9 +6907,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
- const TargetRegisterClass *RC =
- is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
- (const TargetRegisterClass *) &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
+ : &PPC::GPRCRegClass;
unsigned PtrReg = RegInfo.createVirtualRegister(RC);
unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
@@ -6752,7 +7006,8 @@ llvm::MachineBasicBlock*
PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -6851,7 +7106,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
const PPCRegisterInfo *TRI =
- static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo());
+ getTargetMachine().getSubtarget<PPCSubtarget>().getRegisterInfo();
MIB.addRegMask(TRI->getNoPreservedMask());
BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
@@ -6900,7 +7155,8 @@ MachineBasicBlock *
PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -7004,6 +7260,10 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineBasicBlock *
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
+ if (MI->getOpcode() == TargetOpcode::STACKMAP ||
+ MI->getOpcode() == TargetOpcode::PATCHPOINT)
+ return emitPatchPoint(MI, BB);
+
if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
return emitEHSjLjSetJmp(MI, BB);
@@ -7012,7 +7272,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return emitEHSjLjLongJmp(MI, BB);
}
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
// To "insert" these instructions we actually have to insert their
// control-flow patterns.
@@ -7035,7 +7296,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
Cond.push_back(MI->getOperand(1));
DebugLoc dl = MI->getDebugLoc();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
Cond, MI->getOperand(2).getReg(),
MI->getOperand(3).getReg());
@@ -7044,11 +7306,15 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MI->getOpcode() == PPC::SELECT_CC_F4 ||
MI->getOpcode() == PPC::SELECT_CC_F8 ||
MI->getOpcode() == PPC::SELECT_CC_VRRC ||
+ MI->getOpcode() == PPC::SELECT_CC_VSFRC ||
+ MI->getOpcode() == PPC::SELECT_CC_VSRC ||
MI->getOpcode() == PPC::SELECT_I4 ||
MI->getOpcode() == PPC::SELECT_I8 ||
MI->getOpcode() == PPC::SELECT_F4 ||
MI->getOpcode() == PPC::SELECT_F8 ||
- MI->getOpcode() == PPC::SELECT_VRRC) {
+ MI->getOpcode() == PPC::SELECT_VRRC ||
+ MI->getOpcode() == PPC::SELECT_VSFRC ||
+ MI->getOpcode() == PPC::SELECT_VSRC) {
// The incoming instruction knows the destination vreg to set, the
// condition code register to branch on, the true/false values to
// select between, and a branch opcode to use.
@@ -7079,7 +7345,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MI->getOpcode() == PPC::SELECT_I8 ||
MI->getOpcode() == PPC::SELECT_F4 ||
MI->getOpcode() == PPC::SELECT_F8 ||
- MI->getOpcode() == PPC::SELECT_VRRC) {
+ MI->getOpcode() == PPC::SELECT_VRRC ||
+ MI->getOpcode() == PPC::SELECT_VSFRC ||
+ MI->getOpcode() == PPC::SELECT_VSRC) {
BuildMI(BB, dl, TII->get(PPC::BC))
.addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
} else {
@@ -7104,6 +7372,51 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
TII->get(PPC::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+ } else if (MI->getOpcode() == PPC::ReadTB) {
+ // To read the 64-bit time-base register on a 32-bit target, we read the
+ // two halves. Should the counter have wrapped while it was being read, we
+ // need to try again.
+ // ...
+ // readLoop:
+ // mfspr Rx,TBU # load from TBU
+ // mfspr Ry,TB # load from TB
+ // mfspr Rz,TBU # load from TBU
+ // cmpw crX,Rx,Rz # check if ‘old’=’new’
+ // bne readLoop # branch if they're not equal
+ // ...
+
+ MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ DebugLoc dl = MI->getDebugLoc();
+ F->insert(It, readMBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(readMBB);
+ BB = readMBB;
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+ unsigned LoReg = MI->getOperand(0).getReg();
+ unsigned HiReg = MI->getOperand(1).getReg();
+
+ BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
+ BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
+ BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
+
+ unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+
+ BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
+ .addReg(HiReg).addReg(ReadAgainReg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
+
+ BB->addSuccessor(readMBB);
+ BB->addSuccessor(sinkMBB);
}
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
@@ -7262,9 +7575,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
- const TargetRegisterClass *RC =
- is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
- (const TargetRegisterClass *) &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
+ : &PPC::GPRCRegClass;
unsigned PtrReg = RegInfo.createVirtualRegister(RC);
unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
@@ -7440,151 +7752,76 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Target Optimization Hooks
//===----------------------------------------------------------------------===//
-SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
- DAGCombinerInfo &DCI) const {
- if (DCI.isAfterLegalizeVectorOps())
- return SDValue();
-
- EVT VT = Op.getValueType();
-
- if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
- (VT == MVT::f64 && Subtarget.hasFRE()) ||
+SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
+ bool &UseOneConstNR) const {
+ EVT VT = Operand.getValueType();
+ if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
+ (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
(VT == MVT::v2f64 && Subtarget.hasVSX())) {
-
- // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
- // For the reciprocal, we need to find the zero of the function:
- // F(X) = A X - 1 [which has a zero at X = 1/A]
- // =>
- // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
- // does not require additional intermediate precision]
-
// Convergence is quadratic, so we essentially double the number of digits
- // correct after every iteration. The minimum architected relative
- // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
- // 23 digits and double has 52 digits.
- int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
+ // correct after every iteration. For both FRE and FRSQRTE, the minimum
+ // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
+ // 2^-14. IEEE float has 23 digits and double has 52 digits.
+ RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
- ++Iterations;
-
- SelectionDAG &DAG = DCI.DAG;
- SDLoc dl(Op);
-
- SDValue FPOne =
- DAG.getConstantFP(1.0, VT.getScalarType());
- if (VT.isVector()) {
- assert(VT.getVectorNumElements() == 4 &&
- "Unknown vector type");
- FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
- FPOne, FPOne, FPOne, FPOne);
- }
-
- SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op);
- DCI.AddToWorklist(Est.getNode());
-
- // Newton iterations: Est = Est + Est (1 - Arg * Est)
- for (int i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est);
- DCI.AddToWorklist(NewEst.getNode());
-
- NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst);
- DCI.AddToWorklist(NewEst.getNode());
-
- NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
- DCI.AddToWorklist(NewEst.getNode());
-
- Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst);
- DCI.AddToWorklist(Est.getNode());
- }
-
- return Est;
+ ++RefinementSteps;
+ UseOneConstNR = true;
+ return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
}
-
return SDValue();
}
-SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
- DAGCombinerInfo &DCI) const {
- if (DCI.isAfterLegalizeVectorOps())
- return SDValue();
-
- EVT VT = Op.getValueType();
-
- if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
- (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
+SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const {
+ EVT VT = Operand.getValueType();
+ if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
+ (VT == MVT::f64 && Subtarget.hasFRE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
(VT == MVT::v2f64 && Subtarget.hasVSX())) {
-
- // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
- // For the reciprocal sqrt, we need to find the zero of the function:
- // F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
- // =>
- // X_{i+1} = X_i (1.5 - A X_i^2 / 2)
- // As a result, we precompute A/2 prior to the iteration loop.
-
// Convergence is quadratic, so we essentially double the number of digits
- // correct after every iteration. The minimum architected relative
- // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
- // 23 digits and double has 52 digits.
- int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
+ // correct after every iteration. For both FRE and FRSQRTE, the minimum
+ // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
+ // 2^-14. IEEE float has 23 digits and double has 52 digits.
+ RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
- ++Iterations;
-
- SelectionDAG &DAG = DCI.DAG;
- SDLoc dl(Op);
-
- SDValue FPThreeHalves =
- DAG.getConstantFP(1.5, VT.getScalarType());
- if (VT.isVector()) {
- assert(VT.getVectorNumElements() == 4 &&
- "Unknown vector type");
- FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
- FPThreeHalves, FPThreeHalves,
- FPThreeHalves, FPThreeHalves);
- }
-
- SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op);
- DCI.AddToWorklist(Est.getNode());
-
- // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that
- // this entire sequence requires only one FP constant.
- SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op);
- DCI.AddToWorklist(HalfArg.getNode());
-
- HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op);
- DCI.AddToWorklist(HalfArg.getNode());
-
- // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
- for (int i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est);
- DCI.AddToWorklist(NewEst.getNode());
-
- NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst);
- DCI.AddToWorklist(NewEst.getNode());
-
- NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst);
- DCI.AddToWorklist(NewEst.getNode());
+ ++RefinementSteps;
+ return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
+ }
+ return SDValue();
+}
- Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
- DCI.AddToWorklist(Est.getNode());
- }
+bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+ // Note: This functionality is used only when unsafe-fp-math is enabled, and
+ // on cores with reciprocal estimates (which are used when unsafe-fp-math is
+ // enabled for division), this functionality is redundant with the default
+ // combiner logic (once the division -> reciprocal/multiply transformation
+ // has taken place). As a result, this matters more for older cores than for
+ // newer ones.
- return Est;
+ // Combine multiple FDIVs with the same divisor into multiple FMULs by the
+ // reciprocal if there are two or more FDIVs (for embedded cores with only
+ // one FP pipeline) for three or more FDIVs (for generic OOO cores).
+ switch (Subtarget.getDarwinDirective()) {
+ default:
+ return NumUsers > 2;
+ case PPC::DIR_440:
+ case PPC::DIR_A2:
+ case PPC::DIR_E500mc:
+ case PPC::DIR_E5500:
+ return NumUsers > 1;
}
-
- return SDValue();
}
-// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
-// not enforce equality of the chain operands.
-static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
+static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
unsigned Bytes, int Dist,
SelectionDAG &DAG) {
- EVT VT = LS->getMemoryVT();
if (VT.getSizeInBits() / 8 != Bytes)
return false;
- SDValue Loc = LS->getBasePtr();
SDValue BaseLoc = Base->getBasePtr();
if (Loc.getOpcode() == ISD::FrameIndex) {
if (BaseLoc.getOpcode() != ISD::FrameIndex)
@@ -7615,11 +7852,77 @@ static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
return false;
}
+// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
+// not enforce equality of the chain operands.
+static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
+ unsigned Bytes, int Dist,
+ SelectionDAG &DAG) {
+ if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
+ EVT VT = LS->getMemoryVT();
+ SDValue Loc = LS->getBasePtr();
+ return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
+ }
+
+ if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
+ EVT VT;
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ default: return false;
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ case Intrinsic::ppc_vsx_lxvw4x:
+ VT = MVT::v4i32;
+ break;
+ case Intrinsic::ppc_vsx_lxvd2x:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_altivec_lvebx:
+ VT = MVT::i8;
+ break;
+ case Intrinsic::ppc_altivec_lvehx:
+ VT = MVT::i16;
+ break;
+ case Intrinsic::ppc_altivec_lvewx:
+ VT = MVT::i32;
+ break;
+ }
+
+ return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
+ }
+
+ if (N->getOpcode() == ISD::INTRINSIC_VOID) {
+ EVT VT;
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ default: return false;
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ case Intrinsic::ppc_vsx_stxvw4x:
+ VT = MVT::v4i32;
+ break;
+ case Intrinsic::ppc_vsx_stxvd2x:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_altivec_stvebx:
+ VT = MVT::i8;
+ break;
+ case Intrinsic::ppc_altivec_stvehx:
+ VT = MVT::i16;
+ break;
+ case Intrinsic::ppc_altivec_stvewx:
+ VT = MVT::i32;
+ break;
+ }
+
+ return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
+ }
+
+ return false;
+}
+
// Return true is there is a nearyby consecutive load to the one provided
// (regardless of alignment). We search up and down the chain, looking though
-// token factors and other loads (but nothing else). As a result, a true
-// results indicates that it is safe to create a new consecutive load adjacent
-// to the load provided.
+// token factors and other loads (but nothing else). As a result, a true result
+// indicates that it is safe to create a new consecutive load adjacent to the
+// load provided.
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
SDValue Chain = LD->getChain();
EVT VT = LD->getMemoryVT();
@@ -7633,10 +7936,10 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
// nodes just above the top-level loads and token factors.
while (!Queue.empty()) {
SDNode *ChainNext = Queue.pop_back_val();
- if (!Visited.insert(ChainNext))
+ if (!Visited.insert(ChainNext).second)
continue;
- if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) {
+ if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
return true;
@@ -7664,17 +7967,17 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
while (!Queue.empty()) {
SDNode *LoadRoot = Queue.pop_back_val();
- if (!Visited.insert(LoadRoot))
+ if (!Visited.insert(LoadRoot).second)
continue;
- if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot))
+ if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
return true;
for (SDNode::use_iterator UI = LoadRoot->use_begin(),
UE = LoadRoot->use_end(); UI != UE; ++UI)
- if (((isa<LoadSDNode>(*UI) &&
- cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
+ if (((isa<MemSDNode>(*UI) &&
+ cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
Queue.push_back(*UI);
}
@@ -7794,7 +8097,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
SDValue BinOp = BinOps.back();
BinOps.pop_back();
- if (!Visited.insert(BinOp.getNode()))
+ if (!Visited.insert(BinOp.getNode()).second)
continue;
PromOps.push_back(BinOp);
@@ -8008,7 +8311,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
SDValue BinOp = BinOps.back();
BinOps.pop_back();
- if (!Visited.insert(BinOp.getNode()))
+ if (!Visited.insert(BinOp.getNode()).second)
continue;
PromOps.push_back(BinOp);
@@ -8037,6 +8340,10 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
}
}
+ // The operands of a select that must be truncated when the select is
+ // promoted because the operand is actually part of the to-be-promoted set.
+ DenseMap<SDNode *, EVT> SelectTruncOp[2];
+
// Make sure that this is a self-contained cluster of operations (which
// is not quite the same thing as saying that everything has only one
// use).
@@ -8051,18 +8358,19 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
if (User != N && !Visited.count(User))
return SDValue();
- // Make sure that we're not going to promote the non-output-value
- // operand(s) or SELECT or SELECT_CC.
- // FIXME: Although we could sometimes handle this, and it does occur in
- // practice that one of the condition inputs to the select is also one of
- // the outputs, we currently can't deal with this.
+ // If we're going to promote the non-output-value operand(s) or SELECT or
+ // SELECT_CC, record them for truncation.
if (User->getOpcode() == ISD::SELECT) {
if (User->getOperand(0) == Inputs[i])
- return SDValue();
+ SelectTruncOp[0].insert(std::make_pair(User,
+ User->getOperand(0).getValueType()));
} else if (User->getOpcode() == ISD::SELECT_CC) {
- if (User->getOperand(0) == Inputs[i] ||
- User->getOperand(1) == Inputs[i])
- return SDValue();
+ if (User->getOperand(0) == Inputs[i])
+ SelectTruncOp[0].insert(std::make_pair(User,
+ User->getOperand(0).getValueType()));
+ if (User->getOperand(1) == Inputs[i])
+ SelectTruncOp[1].insert(std::make_pair(User,
+ User->getOperand(1).getValueType()));
}
}
}
@@ -8075,18 +8383,19 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
if (User != N && !Visited.count(User))
return SDValue();
- // Make sure that we're not going to promote the non-output-value
- // operand(s) or SELECT or SELECT_CC.
- // FIXME: Although we could sometimes handle this, and it does occur in
- // practice that one of the condition inputs to the select is also one of
- // the outputs, we currently can't deal with this.
+ // If we're going to promote the non-output-value operand(s) or SELECT or
+ // SELECT_CC, record them for truncation.
if (User->getOpcode() == ISD::SELECT) {
if (User->getOperand(0) == PromOps[i])
- return SDValue();
+ SelectTruncOp[0].insert(std::make_pair(User,
+ User->getOperand(0).getValueType()));
} else if (User->getOpcode() == ISD::SELECT_CC) {
- if (User->getOperand(0) == PromOps[i] ||
- User->getOperand(1) == PromOps[i])
- return SDValue();
+ if (User->getOperand(0) == PromOps[i])
+ SelectTruncOp[0].insert(std::make_pair(User,
+ User->getOperand(0).getValueType()));
+ if (User->getOperand(1) == PromOps[i])
+ SelectTruncOp[1].insert(std::make_pair(User,
+ User->getOperand(1).getValueType()));
}
}
}
@@ -8167,6 +8476,19 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
continue;
}
+ // For SELECT and SELECT_CC nodes, we do a similar check for any
+ // to-be-promoted comparison inputs.
+ if (PromOp.getOpcode() == ISD::SELECT ||
+ PromOp.getOpcode() == ISD::SELECT_CC) {
+ if ((SelectTruncOp[0].count(PromOp.getNode()) &&
+ PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
+ (SelectTruncOp[1].count(PromOp.getNode()) &&
+ PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
+ PromOps.insert(PromOps.begin(), PromOp);
+ continue;
+ }
+ }
+
SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
PromOp.getNode()->op_end());
@@ -8185,6 +8507,18 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
}
+ // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
+ // truncate them again to the original value type.
+ if (PromOp.getOpcode() == ISD::SELECT ||
+ PromOp.getOpcode() == ISD::SELECT_CC) {
+ auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
+ if (SI0 != SelectTruncOp[0].end())
+ Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
+ auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
+ if (SI1 != SelectTruncOp[1].end())
+ Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
+ }
+
DAG.ReplaceAllUsesOfValueWith(PromOp,
DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
}
@@ -8211,6 +8545,174 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
N->getOperand(0), ShiftCst), ShiftCst);
}
+SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ assert((N->getOpcode() == ISD::SINT_TO_FP ||
+ N->getOpcode() == ISD::UINT_TO_FP) &&
+ "Need an int -> FP conversion node here");
+
+ if (!Subtarget.has64BitSupport())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ SDValue Op(N, 0);
+
+ // Don't handle ppc_fp128 here or i1 conversions.
+ if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
+ return SDValue();
+ if (Op.getOperand(0).getValueType() == MVT::i1)
+ return SDValue();
+
+ // For i32 intermediate values, unfortunately, the conversion functions
+ // leave the upper 32 bits of the value are undefined. Within the set of
+ // scalar instructions, we have no method for zero- or sign-extending the
+ // value. Thus, we cannot handle i32 intermediate values here.
+ if (Op.getOperand(0).getValueType() == MVT::i32)
+ return SDValue();
+
+ assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
+ "UINT_TO_FP is supported only with FPCVT");
+
+ // If we have FCFIDS, then use it when converting to single-precision.
+ // Otherwise, convert to double-precision and then round.
+ unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ (Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::FCFIDUS : PPCISD::FCFIDS) :
+ (Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::FCFIDU : PPCISD::FCFID);
+ MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ MVT::f32 : MVT::f64;
+
+ // If we're converting from a float, to an int, and back to a float again,
+ // then we don't need the store/load pair at all.
+ if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
+ Subtarget.hasFPCVT()) ||
+ (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
+ SDValue Src = Op.getOperand(0).getOperand(0);
+ if (Src.getValueType() == MVT::f32) {
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+ DCI.AddToWorklist(Src.getNode());
+ }
+
+ unsigned FCTOp =
+ Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+ PPCISD::FCTIDUZ;
+
+ SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
+ SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
+
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
+ FP = DAG.getNode(ISD::FP_ROUND, dl,
+ MVT::f32, FP, DAG.getIntPtrConstant(0));
+ DCI.AddToWorklist(FP.getNode());
+ }
+
+ return FP;
+ }
+
+ return SDValue();
+}
+
+// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
+// builtins) into loads with swaps.
+SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ SDValue Chain;
+ SDValue Base;
+ MachineMemOperand *MMO;
+
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode for little endian VSX load");
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ Chain = LD->getChain();
+ Base = LD->getBasePtr();
+ MMO = LD->getMemOperand();
+ // If the MMO suggests this isn't a load of a full vector, leave
+ // things alone. For a built-in, we have to make the change for
+ // correctness, so if there is a size problem that will be a bug.
+ if (MMO->getSize() < 16)
+ return SDValue();
+ break;
+ }
+ case ISD::INTRINSIC_W_CHAIN: {
+ MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
+ Chain = Intrin->getChain();
+ Base = Intrin->getBasePtr();
+ MMO = Intrin->getMemOperand();
+ break;
+ }
+ }
+
+ MVT VecTy = N->getValueType(0).getSimpleVT();
+ SDValue LoadOps[] = { Chain, Base };
+ SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
+ DAG.getVTList(VecTy, MVT::Other),
+ LoadOps, VecTy, MMO);
+ DCI.AddToWorklist(Load.getNode());
+ Chain = Load.getValue(1);
+ SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
+ DAG.getVTList(VecTy, MVT::Other), Chain, Load);
+ DCI.AddToWorklist(Swap.getNode());
+ return Swap;
+}
+
+// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
+// builtins) into stores with swaps.
+SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ SDValue Chain;
+ SDValue Base;
+ unsigned SrcOpnd;
+ MachineMemOperand *MMO;
+
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode for little endian VSX store");
+ case ISD::STORE: {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ Chain = ST->getChain();
+ Base = ST->getBasePtr();
+ MMO = ST->getMemOperand();
+ SrcOpnd = 1;
+ // If the MMO suggests this isn't a store of a full vector, leave
+ // things alone. For a built-in, we have to make the change for
+ // correctness, so if there is a size problem that will be a bug.
+ if (MMO->getSize() < 16)
+ return SDValue();
+ break;
+ }
+ case ISD::INTRINSIC_VOID: {
+ MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
+ Chain = Intrin->getChain();
+ // Intrin->getBasePtr() oddly does not get what we want.
+ Base = Intrin->getOperand(3);
+ MMO = Intrin->getMemOperand();
+ SrcOpnd = 2;
+ break;
+ }
+ }
+
+ SDValue Src = N->getOperand(SrcOpnd);
+ MVT VecTy = Src.getValueType().getSimpleVT();
+ SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
+ DAG.getVTList(VecTy, MVT::Other), Chain, Src);
+ DCI.AddToWorklist(Swap.getNode());
+ Chain = Swap.getValue(1);
+ SDValue StoreOps[] = { Chain, Swap, Base };
+ SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
+ DAG.getVTList(MVT::Other),
+ StoreOps, VecTy, MMO);
+ DCI.AddToWorklist(Store.getNode());
+ return Store;
+}
+
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
const TargetMachine &TM = getTargetMachine();
@@ -8245,124 +8747,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SETCC:
case ISD::SELECT_CC:
return DAGCombineTruncBoolExt(N, DCI);
- case ISD::FDIV: {
- assert(TM.Options.UnsafeFPMath &&
- "Reciprocal estimates require UnsafeFPMath");
-
- if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
- SDValue RV =
- DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);
- if (RV.getNode()) {
- DCI.AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
- N->getOperand(0), RV);
- }
- } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND &&
- N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
- SDValue RV =
- DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
- DCI);
- if (RV.getNode()) {
- DCI.AddToWorklist(RV.getNode());
- RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)),
- N->getValueType(0), RV);
- DCI.AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
- N->getOperand(0), RV);
- }
- } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND &&
- N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
- SDValue RV =
- DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
- DCI);
- if (RV.getNode()) {
- DCI.AddToWorklist(RV.getNode());
- RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)),
- N->getValueType(0), RV,
- N->getOperand(1).getOperand(1));
- DCI.AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
- N->getOperand(0), RV);
- }
- }
-
- SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
- if (RV.getNode()) {
- DCI.AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
- N->getOperand(0), RV);
- }
-
- }
- break;
- case ISD::FSQRT: {
- assert(TM.Options.UnsafeFPMath &&
- "Reciprocal estimates require UnsafeFPMath");
-
- // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
- // reciprocal sqrt.
- SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);
- if (RV.getNode()) {
- DCI.AddToWorklist(RV.getNode());
- RV = DAGCombineFastRecip(RV, DCI);
- if (RV.getNode()) {
- // Unfortunately, RV is now NaN if the input was exactly 0. Select out
- // this case and force the answer to 0.
-
- EVT VT = RV.getValueType();
-
- SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
- if (VT.isVector()) {
- assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
- Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
- }
-
- SDValue ZeroCmp =
- DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
- N->getOperand(0), Zero, ISD::SETEQ);
- DCI.AddToWorklist(ZeroCmp.getNode());
- DCI.AddToWorklist(RV.getNode());
-
- RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
- ZeroCmp, Zero, RV);
- return RV;
- }
- }
-
- }
- break;
case ISD::SINT_TO_FP:
- if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
- if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
- // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
- // We allow the src/dst to be either f32/f64, but the intermediate
- // type must be i64.
- if (N->getOperand(0).getValueType() == MVT::i64 &&
- N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
- SDValue Val = N->getOperand(0).getOperand(0);
- if (Val.getValueType() == MVT::f32) {
- Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
- DCI.AddToWorklist(Val.getNode());
- }
-
- Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
- DCI.AddToWorklist(Val.getNode());
- Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
- DCI.AddToWorklist(Val.getNode());
- if (N->getValueType(0) == MVT::f32) {
- Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
- DAG.getIntPtrConstant(0));
- DCI.AddToWorklist(Val.getNode());
- }
- return Val;
- } else if (N->getOperand(0).getValueType() == MVT::i32) {
- // If the intermediate type is i32, we can avoid the load/store here
- // too.
- }
- }
- }
- break;
- case ISD::STORE:
+ case ISD::UINT_TO_FP:
+ return combineFPToIntToFP(N, DCI);
+ case ISD::STORE: {
// Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
!cast<StoreSDNode>(N)->isTruncatingStore() &&
@@ -8413,14 +8801,39 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
Ops, cast<StoreSDNode>(N)->getMemoryVT(),
cast<StoreSDNode>(N)->getMemOperand());
}
+
+ // For little endian, VSX stores require generating xxswapd/lxvd2x.
+ EVT VT = N->getOperand(1).getValueType();
+ if (VT.isSimple()) {
+ MVT StoreVT = VT.getSimpleVT();
+ if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
+ TM.getSubtarget<PPCSubtarget>().isLittleEndian() &&
+ (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
+ StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
+ return expandVSXStoreForLE(N, DCI);
+ }
break;
+ }
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT VT = LD->getValueType(0);
+
+ // For little endian, VSX loads require generating lxvd2x/xxswapd.
+ if (VT.isSimple()) {
+ MVT LoadVT = VT.getSimpleVT();
+ if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
+ TM.getSubtarget<PPCSubtarget>().isLittleEndian() &&
+ (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
+ LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
+ return expandVSXLoadForLE(N, DCI);
+ }
+
Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
+ // P8 and later hardware should just use LOAD.
+ !TM.getSubtarget<PPCSubtarget>().hasP8Vector() &&
(VT == MVT::v16i8 || VT == MVT::v8i16 ||
VT == MVT::v4i32 || VT == MVT::v4f32) &&
LD->getAlignment() < ABIAlignment) {
@@ -8458,17 +8871,25 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
Intrinsic::ppc_altivec_lvsl);
SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8);
- // Refine the alignment of the original load (a "new" load created here
- // which was identical to the first except for the alignment would be
- // merged with the existing node regardless).
+ // Create the new MMO for the new base load. It is like the original MMO,
+ // but represents an area in memory almost twice the vector size centered
+ // on the original address. If the address is unaligned, we might start
+ // reading up to (sizeof(vector)-1) bytes below the address of the
+ // original unaligned load.
MachineFunction &MF = DAG.getMachineFunction();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(LD->getPointerInfo(),
- LD->getMemOperand()->getFlags(),
- LD->getMemoryVT().getStoreSize(),
- ABIAlignment);
- LD->refineAlignment(MMO);
- SDValue BaseLoad = SDValue(LD, 0);
+ MachineMemOperand *BaseMMO =
+ MF.getMachineMemOperand(LD->getMemOperand(),
+ -LD->getMemoryVT().getStoreSize()+1,
+ 2*LD->getMemoryVT().getStoreSize()-1);
+
+ // Create the new base load.
+ SDValue LDXIntID = DAG.getTargetConstant(Intrinsic::ppc_altivec_lvx,
+ getPointerTy());
+ SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
+ SDValue BaseLoad =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
+ DAG.getVTList(MVT::v4i32, MVT::Other),
+ BaseLoadOps, MVT::v4i32, BaseMMO);
// Note that the value of IncOffset (which is provided to the next
// load's pointer info offset value, and thus used to calculate the
@@ -8490,21 +8911,18 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ MachineMemOperand *ExtraMMO =
+ MF.getMachineMemOperand(LD->getMemOperand(),
+ 1, 2*LD->getMemoryVT().getStoreSize()-1);
+ SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
SDValue ExtraLoad =
- DAG.getLoad(VT, dl, Chain, Ptr,
- LD->getPointerInfo().getWithOffset(IncOffset),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), ABIAlignment);
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
+ DAG.getVTList(MVT::v4i32, MVT::Other),
+ ExtraLoadOps, MVT::v4i32, ExtraMMO);
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
BaseLoad.getValue(1), ExtraLoad.getValue(1));
- if (BaseLoad.getValueType() != MVT::v4i32)
- BaseLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, BaseLoad);
-
- if (ExtraLoad.getValueType() != MVT::v4i32)
- ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad);
-
// Because vperm has a big-endian bias, we must reverse the order
// of the input vectors and complement the permute control vector
// when generating little endian code. We have already handled the
@@ -8521,36 +8939,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (VT != MVT::v4i32)
Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
- // Now we need to be really careful about how we update the users of the
- // original load. We cannot just call DCI.CombineTo (or
- // DAG.ReplaceAllUsesWith for that matter), because the load still has
- // uses created here (the permutation for example) that need to stay.
- SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
- while (UI != UE) {
- SDUse &Use = UI.getUse();
- SDNode *User = *UI;
- // Note: BaseLoad is checked here because it might not be N, but a
- // bitcast of N.
- if (User == Perm.getNode() || User == BaseLoad.getNode() ||
- User == TF.getNode() || Use.getResNo() > 1) {
- ++UI;
- continue;
- }
-
- SDValue To = Use.getResNo() ? TF : Perm;
- ++UI;
-
- SmallVector<SDValue, 8> Ops;
- for (const SDUse &O : User->ops()) {
- if (O == Use)
- Ops.push_back(To);
- else
- Ops.push_back(O);
- }
-
- DAG.UpdateNodeOperands(User, Ops);
- }
-
+ // The output of the permutation is our loaded result, the TokenFactor is
+ // our new chain.
+ DCI.CombineTo(N, Perm, TF);
return SDValue(N, 0);
}
}
@@ -8585,6 +8976,34 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
+ case ISD::INTRINSIC_W_CHAIN: {
+ // For little endian, VSX loads require generating lxvd2x/xxswapd.
+ if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
+ TM.getSubtarget<PPCSubtarget>().isLittleEndian()) {
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ default:
+ break;
+ case Intrinsic::ppc_vsx_lxvw4x:
+ case Intrinsic::ppc_vsx_lxvd2x:
+ return expandVSXLoadForLE(N, DCI);
+ }
+ }
+ break;
+ }
+ case ISD::INTRINSIC_VOID: {
+ // For little endian, VSX stores require generating xxswapd/stxvd2x.
+ if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
+ TM.getSubtarget<PPCSubtarget>().isLittleEndian()) {
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ default:
+ break;
+ case Intrinsic::ppc_vsx_stxvw4x:
+ case Intrinsic::ppc_vsx_stxvd2x:
+ return expandVSXStoreForLE(N, DCI);
+ }
+ }
+ break;
+ }
case ISD::BSWAP:
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
@@ -8795,6 +9214,38 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
}
+SDValue
+PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ std::vector<SDNode *> *Created) const {
+ // fold (sdiv X, pow2)
+ EVT VT = N->getValueType(0);
+ if (VT == MVT::i64 && !Subtarget.isPPC64())
+ return SDValue();
+ if ((VT != MVT::i32 && VT != MVT::i64) ||
+ !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue N0 = N->getOperand(0);
+
+ bool IsNegPow2 = (-Divisor).isPowerOf2();
+ unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
+ SDValue ShiftAmt = DAG.getConstant(Lg2, VT);
+
+ SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
+ if (Created)
+ Created->push_back(Op.getNode());
+
+ if (IsNegPow2) {
+ Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), Op);
+ if (Created)
+ Created->push_back(Op.getNode());
+ }
+
+ return Op;
+}
+
//===----------------------------------------------------------------------===//
// Inline Assembly Support
//===----------------------------------------------------------------------===//
@@ -8836,6 +9287,40 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
}
+unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
+ switch (Subtarget.getDarwinDirective()) {
+ default: break;
+ case PPC::DIR_970:
+ case PPC::DIR_PWR4:
+ case PPC::DIR_PWR5:
+ case PPC::DIR_PWR5X:
+ case PPC::DIR_PWR6:
+ case PPC::DIR_PWR6X:
+ case PPC::DIR_PWR7:
+ case PPC::DIR_PWR8: {
+ if (!ML)
+ break;
+
+ const PPCInstrInfo *TII =
+ static_cast<const PPCInstrInfo *>(getTargetMachine().getSubtargetImpl()->
+ getInstrInfo());
+
+ // For small loops (between 5 and 8 instructions), align to a 32-byte
+ // boundary so that the entire loop fits in one instruction-cache line.
+ uint64_t LoopSize = 0;
+ for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
+ for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J)
+ LoopSize += TII->GetInstSizeInBytes(J);
+
+ if (LoopSize > 16 && LoopSize <= 32)
+ return 5;
+
+ break;
+ }
+ }
+
+ return TargetLowering::getPrefLoopAlignment(ML);
+}
/// getConstraintType - Given a constraint, return the type of
/// constraint it is for this target.
@@ -8968,7 +9453,8 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// the AsmName field from *RegisterInfo.td, then this would not be necessary.
if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
PPC::GPRCRegClass.contains(R.first)) {
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
return std::make_pair(TRI->getMatchingSuperReg(R.first,
PPC::sub_32, &PPC::G8RCRegClass),
&PPC::G8RCRegClass);
@@ -9192,6 +9678,92 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
}
+bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const {
+
+ switch (Intrinsic) {
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ case Intrinsic::ppc_altivec_lvebx:
+ case Intrinsic::ppc_altivec_lvehx:
+ case Intrinsic::ppc_altivec_lvewx:
+ case Intrinsic::ppc_vsx_lxvd2x:
+ case Intrinsic::ppc_vsx_lxvw4x: {
+ EVT VT;
+ switch (Intrinsic) {
+ case Intrinsic::ppc_altivec_lvebx:
+ VT = MVT::i8;
+ break;
+ case Intrinsic::ppc_altivec_lvehx:
+ VT = MVT::i16;
+ break;
+ case Intrinsic::ppc_altivec_lvewx:
+ VT = MVT::i32;
+ break;
+ case Intrinsic::ppc_vsx_lxvd2x:
+ VT = MVT::v2f64;
+ break;
+ default:
+ VT = MVT::v4i32;
+ break;
+ }
+
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = VT;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = -VT.getStoreSize()+1;
+ Info.size = 2*VT.getStoreSize()-1;
+ Info.align = 1;
+ Info.vol = false;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ case Intrinsic::ppc_altivec_stvebx:
+ case Intrinsic::ppc_altivec_stvehx:
+ case Intrinsic::ppc_altivec_stvewx:
+ case Intrinsic::ppc_vsx_stxvd2x:
+ case Intrinsic::ppc_vsx_stxvw4x: {
+ EVT VT;
+ switch (Intrinsic) {
+ case Intrinsic::ppc_altivec_stvebx:
+ VT = MVT::i8;
+ break;
+ case Intrinsic::ppc_altivec_stvehx:
+ VT = MVT::i16;
+ break;
+ case Intrinsic::ppc_altivec_stvewx:
+ VT = MVT::i32;
+ break;
+ case Intrinsic::ppc_vsx_stxvd2x:
+ VT = MVT::v2f64;
+ break;
+ default:
+ VT = MVT::v4i32;
+ break;
+ }
+
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = VT;
+ Info.ptrVal = I.getArgOperand(1);
+ Info.offset = -VT.getStoreSize()+1;
+ Info.size = 2*VT.getStoreSize()-1;
+ Info.align = 1;
+ Info.vol = false;
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
+
/// getOptimalMemOpType - Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
/// lowering. If DstAlign is zero that means it's safe to destination
@@ -9243,6 +9815,31 @@ bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
return NumBits1 == 64 && NumBits2 == 32;
}
+bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ // Generally speaking, zexts are not free, but they are free when they can be
+ // folded with other operations.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
+ EVT MemVT = LD->getMemoryVT();
+ if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
+ (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
+ (LD->getExtensionType() == ISD::NON_EXTLOAD ||
+ LD->getExtensionType() == ISD::ZEXTLOAD))
+ return true;
+ }
+
+ // FIXME: Add other cases...
+ // - 32-bit shifts with a zext to i64
+ // - zext after ctlz, bswap, etc.
+ // - zext after and by a constant mask
+
+ return TargetLowering::isZExtFree(Val, VT2);
+}
+
+bool PPCTargetLowering::isFPExtFree(EVT VT) const {
+ assert(VT.isFloatingPoint());
+ return true;
+}
+
bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
return isInt<16>(Imm) || isUInt<16>(Imm);
}
@@ -9251,9 +9848,10 @@ bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
return isInt<16>(Imm) || isUInt<16>(Imm);
}
-bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
- unsigned,
- bool *Fast) const {
+bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned,
+ unsigned,
+ bool *Fast) const {
if (DisablePPCUnaligned)
return false;
@@ -9268,7 +9866,8 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
if (VT.getSimpleVT().isVector()) {
if (Subtarget.hasVSX()) {
- if (VT != MVT::v2f64 && VT != MVT::v2i64)
+ if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
+ VT != MVT::v4f32 && VT != MVT::v4i32)
return false;
} else {
return false;
@@ -9301,6 +9900,19 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
return false;
}
+const MCPhysReg *
+PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
+ // LR is a callee-save register, but we must treat it as clobbered by any call
+ // site. Hence we include LR in the scratch registers, which are in turn added
+ // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
+ // to CTR, which is used by any indirect call.
+ static const MCPhysReg ScratchRegs[] = {
+ PPC::X11, PPC::X12, PPC::LR8, PPC::CTR8, 0
+ };
+
+ return ScratchRegs;
+}
+
bool
PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
EVT VT , unsigned DefinedValues) const {
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 7179adc9704f..1b585d8e5090 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
-#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
+#define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
#include "PPC.h"
#include "PPCInstrInfo.h"
@@ -61,6 +61,9 @@ namespace llvm {
///
VPERM,
+ /// The CMPB instruction (takes two operands of i32 or i64).
+ CMPB,
+
/// Hi/Lo - These represent the high and low 16-bit parts of a global
/// address respectively. These nodes have two operands, the first of
/// which must be a TargetGlobalAddress, and the second of which must be a
@@ -94,6 +97,12 @@ namespace llvm {
/// code.
SRL, SRA, SHL,
+ /// The combination of sra[wd]i and addze used to implemented signed
+ /// integer division by a power of 2. The first operand is the dividend,
+ /// and the second is the constant shift amount (representing the
+ /// divisor).
+ SRA_ADDZE,
+
/// CALL - A direct function call.
/// CALL_NOP is a call with the special NOP which follows 64-bit
/// SVR4 calls.
@@ -111,6 +120,10 @@ namespace llvm {
/// BCTRL instruction.
BCTRL,
+ /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl
+ /// instruction and the TOC reload required on SVR4 PPC64.
+ BCTRL_LOAD_TOC,
+
/// Return with a flag operand, matched by 'blr'
RET_FLAG,
@@ -125,6 +138,10 @@ namespace llvm {
/// implement truncation of i32 or i64 to i1.
ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT,
+ // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit
+ // target (returns (Lo, Hi)). It takes a chain operand.
+ READ_TIME_BASE,
+
// EH_SJLJ_SETJMP - SjLj exception handling setjmp.
EH_SJLJ_SETJMP,
@@ -250,6 +267,13 @@ namespace llvm {
/// operand identifies the operating system entry point.
SC,
+ /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little
+ /// endian. Maps to an xxswapd instruction that corrects an lxvd2x
+ /// or stxvd2x instruction. The chain is necessary because the
+ /// sequence replaces a load and needs to provide the same number
+ /// of outputs.
+ XXSWAPD,
+
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
@@ -289,7 +313,17 @@ namespace llvm {
/// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces
/// an ADDI8 instruction that adds G8RReg to sym\@toc\@l.
/// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
- ADDI_TOC_L
+ ADDI_TOC_L,
+
+ /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
+ /// Maps directly to an lxvd2x instruction that will be followed by
+ /// an xxswapd.
+ LXVD2X,
+
+ /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
+ /// Maps directly to an stxvd2x instruction that will be preceded by
+ /// an xxswapd.
+ STXVD2X
};
}
@@ -345,7 +379,7 @@ namespace llvm {
const PPCSubtarget &Subtarget;
public:
- explicit PPCTargetLowering(PPCTargetMachine &TM);
+ explicit PPCTargetLowering(const PPCTargetMachine &TM);
/// getTargetNodeName() - This method returns the name of a target specific
/// DAG node.
@@ -353,9 +387,22 @@ namespace llvm {
MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
+ bool isCheapToSpeculateCttz() const override {
+ return true;
+ }
+
+ bool isCheapToSpeculateCtlz() const override {
+ return true;
+ }
+
/// getSetCCResultType - Return the ISD::SETCC ValueType
EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+ /// Return true if target always beneficiates from combining into FMA for a
+ /// given value type. This must typically return false on targets where FMA
+ /// takes more cycles to execute than FADD.
+ bool enableAggressiveFMAFusion(EVT VT) const override;
+
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
/// can be legally represented as pre-indexed load / store address.
@@ -394,8 +441,14 @@ namespace llvm {
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const override;
+ SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const;
+
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+ SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+ std::vector<SDNode *> *Created) const override;
+
unsigned getRegisterByName(const char* RegName, EVT VT) const override;
void computeKnownBitsForTargetNode(const SDValue Op,
@@ -404,6 +457,13 @@ namespace llvm {
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
+ unsigned getPrefLoopAlignment(MachineLoop *ML) const override;
+
+ Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const override;
+ Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const override;
+
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const override;
@@ -466,6 +526,10 @@ namespace llvm {
bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
bool isTruncateFree(EVT VT1, EVT VT2) const override;
+ bool isZExtFree(SDValue Val, EVT VT2) const override;
+
+ bool isFPExtFree(EVT VT) const override;
+
/// \brief Returns true if it is beneficial to convert a load of a constant
/// to just the constant itself.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
@@ -473,6 +537,10 @@ namespace llvm {
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const override;
+
/// getOptimalMemOpType - Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
/// lowering. If DstAlign is zero that means it's safe to destination
@@ -491,9 +559,10 @@ namespace llvm {
/// Is unaligned memory access allowed for the given type, and is it fast
/// relative to software emulation.
- bool allowsUnalignedMemoryAccesses(EVT VT,
- unsigned AddrSpace,
- bool *Fast = nullptr) const override;
+ bool allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned AddrSpace,
+ unsigned Align = 1,
+ bool *Fast = nullptr) const override;
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
@@ -501,6 +570,8 @@ namespace llvm {
/// expanded to fmul + fadd.
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
+ const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
+
// Should we expand the build vector with shuffles?
bool
shouldExpandBuildVectorWithShuffles(EVT VT,
@@ -526,6 +597,29 @@ namespace llvm {
}
private:
+
+ struct ReuseLoadInfo {
+ SDValue Ptr;
+ SDValue Chain;
+ SDValue ResChain;
+ MachinePointerInfo MPI;
+ bool IsInvariant;
+ unsigned Alignment;
+ AAMDNodes AAInfo;
+ const MDNode *Ranges;
+
+ ReuseLoadInfo() : IsInvariant(false), Alignment(0), Ranges(nullptr) {}
+ };
+
+ bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
+ SelectionDAG &DAG,
+ ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
+ void spliceIntoChain(SDValue ResChain, SDValue NewResChain,
+ SelectionDAG &DAG) const;
+
+ void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
+ SelectionDAG &DAG, SDLoc dl) const;
+
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
@@ -589,7 +683,7 @@ namespace llvm {
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
SDValue FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall,
- bool isVarArg,
+ bool isVarArg, bool IsPatchPoint,
SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8>
&RegsToPass,
@@ -654,7 +748,7 @@ namespace llvm {
SDValue
LowerCall_Darwin(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv,
- bool isVarArg, bool isTailCall,
+ bool isVarArg, bool isTailCall, bool IsPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -663,7 +757,7 @@ namespace llvm {
SDValue
LowerCall_64SVR4(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv,
- bool isVarArg, bool isTailCall,
+ bool isVarArg, bool isTailCall, bool IsPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -671,7 +765,7 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
SDValue
LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
- bool isVarArg, bool isTailCall,
+ bool isVarArg, bool isTailCall, bool IsPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -683,8 +777,14 @@ namespace llvm {
SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const;
- SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const;
+ SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
+ bool &UseOneConstNR) const override;
+ SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const override;
+ bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
CCAssignFn *useFastISelCCs(unsigned Flag) const;
};
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index b85d9c0ab62c..5c6675dd36b3 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -81,6 +81,9 @@ def HI48_64 : SDNodeXForm<imm, [{
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+ let isReturn = 1, Uses = [LR8, RM] in
+ def BLR8 : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB,
+ [(retflag)]>, Requires<[In64BitMode]>;
let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in {
def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
[]>,
@@ -167,6 +170,17 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
}
}
}
+
+let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
+ Defs = [LR8, X2], Uses = [CTR8, RM], RST = 2 in {
+ def BCTRL8_LDinto_toc :
+ XLForm_2_ext_and_DSForm_1<19, 528, 20, 0, 1, 58, 0, (outs),
+ (ins memrix:$src),
+ "bctrl\n\tld 2, $src", IIC_BrB,
+ [(PPCbctrl_load_toc ixaddr:$src)]>,
+ Requires<[In64BitMode]>;
+}
+
} // Interpretation64Bit
// FIXME: Duplicating this for the asm parser should be unnecessary, but the
@@ -282,7 +296,7 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
// 64-bit CR instructions
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def MTOCRF8: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins g8rc:$ST),
"mtocrf $FXM, $ST", IIC_BrMCRX>,
PPC970_DGroup_First, PPC970_Unit_CRU;
@@ -299,7 +313,7 @@ def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM),
def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins),
"mfcr $rT", IIC_SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
-} // neverHasSideEffects = 1
+} // hasSideEffects = 0
let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
let Defs = [CTR8] in
@@ -366,7 +380,7 @@ def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins),
let PPC970_Unit = 1 in { // FXU Operations.
let Interpretation64Bit = 1 in {
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let isCodeGenOnly = 1 in {
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
@@ -517,7 +531,7 @@ defm MULHDU : XOForm_1r<31, 9, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
}
} // Interpretation64Bit
-let isCompare = 1, neverHasSideEffects = 1 in {
+let isCompare = 1, hasSideEffects = 0 in {
def CMPD : XForm_16_ext<31, 0, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB),
"cmpd $crD, $rA, $rB", IIC_IntCompare>, isPPC64;
def CMPLD : XForm_16_ext<31, 32, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB),
@@ -529,7 +543,7 @@ let isCompare = 1, neverHasSideEffects = 1 in {
IIC_IntCompare>, isPPC64;
}
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
defm SLD : XForm_6r<31, 27, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
"sld", "$rA, $rS, $rB", IIC_IntRotateD,
[(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64;
@@ -540,13 +554,21 @@ defm SRAD : XForm_6rc<31, 794, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
"srad", "$rA, $rS, $rB", IIC_IntRotateD,
[(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
-let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+defm CNTLZW8 : XForm_11r<31, 26, (outs g8rc:$rA), (ins g8rc:$rS),
+ "cntlzw", "$rA, $rS", IIC_IntGeneral, []>;
+
defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$rA), (ins g8rc:$rS),
"extsb", "$rA, $rS", IIC_IntSimple,
[(set i64:$rA, (sext_inreg i64:$rS, i8))]>;
defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS),
"extsh", "$rA, $rS", IIC_IntSimple,
[(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
+
+defm SLW8 : XForm_6r<31, 24, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "slw", "$rA, $rS, $rB", IIC_IntGeneral, []>;
+defm SRW8 : XForm_6r<31, 536, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "srw", "$rA, $rS, $rB", IIC_IntGeneral, []>;
} // Interpretation64Bit
// For fast-isel:
@@ -575,6 +597,11 @@ def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
"popcntd $rA, $rS", IIC_IntGeneral,
[(set i64:$rA, (ctpop i64:$rS))]>;
+let isCodeGenOnly = 1, isCommutable = 1 in
+def CMPB8 : XForm_6<31, 508, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "cmpb $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i64:$rA, (PPCcmpb i64:$rS, i64:$rB))]>;
+
// popcntw also does a population count on the high 32 bits (storing the
// results in the high 32-bits of the output). We'll ignore that here (which is
// safe because we never separately use the high part of the 64-bit registers).
@@ -600,14 +627,12 @@ def MULLI8 : DForm_2<7, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
[(set i64:$rD, (mul i64:$rA, imm64SExt16:$imm))]>;
}
-let neverHasSideEffects = 1 in {
-let isCommutable = 1 in {
+let hasSideEffects = 0 in {
defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA),
(ins g8rc:$rSi, g8rc:$rS, u6imm:$SH, u6imm:$MBE),
"rldimi", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64, RegConstraint<"$rSi = $rA">,
NoEncode<"$rSi">;
-}
// Rotate instructions.
defm RLDCL : MDSForm_1r<30, 8,
@@ -645,7 +670,11 @@ defm RLWINM8 : MForm_2r<21, (outs g8rc:$rA),
"rlwinm", "$rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
[]>;
-let isCommutable = 1 in {
+defm RLWNM8 : MForm_2r<23, (outs g8rc:$rA),
+ (ins g8rc:$rS, g8rc:$rB, u5imm:$MB, u5imm:$ME),
+ "rlwnm", "$rA, $rS, $rB, $MB, $ME", IIC_IntGeneral,
+ []>;
+
// RLWIMI can be commuted if the rotate amount is zero.
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm RLWIMI8 : MForm_2r<20, (outs g8rc:$rA),
@@ -653,7 +682,6 @@ defm RLWIMI8 : MForm_2r<20, (outs g8rc:$rA),
u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME",
IIC_IntRotate, []>, PPC970_DGroup_Cracked,
RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">;
-}
let isSelect = 1 in
def ISEL8 : AForm_4<31, 15,
@@ -661,7 +689,7 @@ def ISEL8 : AForm_4<31, 15,
"isel $rT, $rA, $rB, $cond", IIC_IntGeneral,
[]>;
} // Interpretation64Bit
-} // neverHasSideEffects = 1
+} // hasSideEffects = 0
} // End FXU Operations.
@@ -702,7 +730,7 @@ def LWAX_32 : XForm_1<31, 341, (outs gprc:$rD), (ins memrr:$src),
} // end fast-isel isCodeGenOnly
// Update forms.
-let mayLoad = 1, neverHasSideEffects = 1 in {
+let mayLoad = 1, hasSideEffects = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def LHAU8 : DForm_1<43, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memri:$addr),
@@ -750,7 +778,7 @@ def LWZX8 : XForm_1<31, 23, (outs g8rc:$rD), (ins memrr:$src),
// Update forms.
-let mayLoad = 1, neverHasSideEffects = 1 in {
+let mayLoad = 1, hasSideEffects = 0 in {
def LBZU8 : DForm_1<35, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lbzu $rD, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
@@ -821,7 +849,14 @@ def LDBRX : XForm_1<31, 532, (outs g8rc:$rD), (ins memrr:$src),
"ldbrx $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
-let mayLoad = 1, neverHasSideEffects = 1 in {
+let mayLoad = 1, hasSideEffects = 0, isCodeGenOnly = 1 in {
+def LHBRX8 : XForm_1<31, 790, (outs g8rc:$rD), (ins memrr:$src),
+ "lhbrx $rD, $src", IIC_LdStLoad, []>;
+def LWBRX8 : XForm_1<31, 534, (outs g8rc:$rD), (ins memrr:$src),
+ "lwbrx $rD, $src", IIC_LdStLoad, []>;
+}
+
+let mayLoad = 1, hasSideEffects = 0 in {
def LDU : DSForm_1<58, 1, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
"ldu $rD, $addr", IIC_LdStLDU,
[]>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
@@ -1006,7 +1041,7 @@ def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
//
-let PPC970_Unit = 3, neverHasSideEffects = 1,
+let PPC970_Unit = 3, hasSideEffects = 0,
Uses = [RM] in { // FPU Operations.
defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB),
"fcfid", "$frD, $frB", IIC_FPGeneral,
@@ -1132,3 +1167,9 @@ def : Pat<(i64 (unaligned4load xoaddr:$src)),
def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
(STDX $rS, xoaddr:$dst)>;
+// 64-bits atomic loads and stores
+def : Pat<(atomic_load_64 ixaddr:$src), (LD memrix:$src)>;
+def : Pat<(atomic_load_64 xaddr:$src), (LDX memrr:$src)>;
+
+def : Pat<(atomic_store_64 ixaddr:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr)>;
+def : Pat<(atomic_store_64 xaddr:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index b271b5d5aa21..4ef08eb1130d 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -275,48 +275,64 @@ class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
def HasAltivec : Predicate<"PPCSubTarget->hasAltivec()">;
let Predicates = [HasAltivec] in {
-let isCodeGenOnly = 1 in {
-def DSS : DSS_Form<822, (outs),
- (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
- "dss $STRM", IIC_LdStLoad /*FIXME*/, []>,
- Deprecated<DeprecatedDST>;
-def DSSALL : DSS_Form<822, (outs),
- (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
- "dssall", IIC_LdStLoad /*FIXME*/, []>,
- Deprecated<DeprecatedDST>;
-def DST : DSS_Form<342, (outs),
- (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
- Deprecated<DeprecatedDST>;
-def DSTT : DSS_Form<342, (outs),
- (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
- Deprecated<DeprecatedDST>;
-def DSTST : DSS_Form<374, (outs),
- (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
- Deprecated<DeprecatedDST>;
-def DSTSTT : DSS_Form<374, (outs),
- (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
- Deprecated<DeprecatedDST>;
+def DSS : DSS_Form<0, 822, (outs), (ins u5imm:$STRM),
+ "dss $STRM", IIC_LdStLoad /*FIXME*/, [(int_ppc_altivec_dss imm:$STRM)]>,
+ Deprecated<DeprecatedDST> {
+ let A = 0;
+ let B = 0;
+}
-def DST64 : DSS_Form<342, (outs),
- (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+def DSSALL : DSS_Form<1, 822, (outs), (ins),
+ "dssall", IIC_LdStLoad /*FIXME*/, [(int_ppc_altivec_dssall)]>,
+ Deprecated<DeprecatedDST> {
+ let STRM = 0;
+ let A = 0;
+ let B = 0;
+}
+
+def DST : DSS_Form<0, 342, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM)]>,
Deprecated<DeprecatedDST>;
-def DSTT64 : DSS_Form<342, (outs),
- (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+
+def DSTT : DSS_Form<1, 342, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM)]>,
Deprecated<DeprecatedDST>;
-def DSTST64 : DSS_Form<374, (outs),
- (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+
+def DSTST : DSS_Form<0, 374, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM)]>,
Deprecated<DeprecatedDST>;
-def DSTSTT64 : DSS_Form<374, (outs),
- (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+
+def DSTSTT : DSS_Form<1, 374, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM)]>,
Deprecated<DeprecatedDST>;
+
+let isCodeGenOnly = 1 in {
+ // The very same instructions as above, but formally matching 64bit registers.
+ def DST64 : DSS_Form<0, 342, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
+
+ def DSTT64 : DSS_Form<1, 342, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
+
+ def DSTST64 : DSS_Form<0, 374, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstst i64:$rA, i32:$rB,
+ imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
+
+ def DSTSTT64 : DSS_Form<1, 374, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dststt i64:$rA, i32:$rB,
+ imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
}
def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
@@ -764,30 +780,6 @@ def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins),
// Additional Altivec Patterns
//
-// DS* intrinsics
-def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>;
-def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>;
-
-// * 32-bit
-def : Pat<(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM),
- (DST 0, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM),
- (DSTT 1, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM),
- (DSTST 0, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM),
- (DSTSTT 1, imm:$STRM, $rA, $rB)>;
-
-// * 64-bit
-def : Pat<(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM),
- (DST64 0, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM),
- (DSTT64 1, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dstst i64:$rA, i32:$rB, imm:$STRM),
- (DSTST64 0, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dststt i64:$rA, i32:$rB, imm:$STRM),
- (DSTSTT64 1, imm:$STRM, $rA, $rB)>;
-
// Loads.
def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
diff --git a/lib/Target/PowerPC/PPCInstrBuilder.h b/lib/Target/PowerPC/PPCInstrBuilder.h
index b424d1101416..cf71b1c59869 100644
--- a/lib/Target/PowerPC/PPCInstrBuilder.h
+++ b/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -17,8 +17,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef POWERPC_INSTRBUILDER_H
-#define POWERPC_INSTRBUILDER_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRBUILDER_H
+#define LLVM_LIB_TARGET_POWERPC_PPCINSTRBUILDER_H
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 1e4396cd1017..186d5dce5705 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -380,6 +380,17 @@ class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asms
let Inst{31} = RC;
}
+class XForm_tlb<bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : XForm_base_r3xo<31, xo, OOL, IOL, asmstr, itin, []> {
+ let RST = 0;
+}
+
+class XForm_attn<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Inst{21-30} = xo;
+}
+
// This is the same as XForm_base_r3xo, but the first two operands are swapped
// when code is emitted.
class XForm_base_r3xo_swapped
@@ -417,6 +428,22 @@ class XForm_rs<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let B = 0;
}
+class XForm_tlbws<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<5> A;
+ bits<1> WS;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{20} = WS;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
@@ -457,6 +484,52 @@ class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
+class XForm_icbt<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<4> CT;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Inst{6} = 0;
+ let Inst{7-10} = CT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_sr<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RS;
+ bits<4> SR;
+
+ let Inst{6-10} = RS;
+ let Inst{12-15} = SR;
+ let Inst{21-30} = xo;
+}
+
+class XForm_mbar<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> MO;
+
+ let Inst{6-10} = MO;
+ let Inst{21-30} = xo;
+}
+
+class XForm_srin<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RS;
+ bits<5> RB;
+
+ let Inst{6-10} = RS;
+ let Inst{16-20} = RB;
+ let Inst{21-30} = xo;
+}
+
class XForm_mtmsr<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
@@ -764,10 +837,9 @@ class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
// DSS_Form - Form X instruction, used for altivec dss* instructions.
-class DSS_Form<bits<10> xo, dag OOL, dag IOL, string asmstr,
+class DSS_Form<bits<1> T, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<31, OOL, IOL, asmstr, itin> {
- bits<1> T;
bits<2> STRM;
bits<5> A;
bits<5> B;
@@ -873,6 +945,45 @@ class XLForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
+class XLForm_2_and_DSForm_1<bits<6> opcode1, bits<10> xo1, bit lk,
+ bits<6> opcode2, bits<2> xo2,
+ dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I2<opcode1, opcode2, OOL, IOL, asmstr, itin> {
+ bits<5> BO;
+ bits<5> BI;
+ bits<2> BH;
+
+ bits<5> RST;
+ bits<19> DS_RA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = BO;
+ let Inst{11-15} = BI;
+ let Inst{16-18} = 0;
+ let Inst{19-20} = BH;
+ let Inst{21-30} = xo1;
+ let Inst{31} = lk;
+
+ let Inst{38-42} = RST;
+ let Inst{43-47} = DS_RA{18-14}; // Register #
+ let Inst{48-61} = DS_RA{13-0}; // Displacement.
+ let Inst{62-63} = xo2;
+}
+
+class XLForm_2_ext_and_DSForm_1<bits<6> opcode1, bits<10> xo1,
+ bits<5> bo, bits<5> bi, bit lk,
+ bits<6> opcode2, bits<2> xo2,
+ dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XLForm_2_and_DSForm_1<opcode1, xo1, lk, opcode2, xo2,
+ OOL, IOL, asmstr, itin, pattern> {
+ let BO = bo;
+ let BI = bi;
+ let BH = 0;
+}
+
// 1.7.8 XFX-Form
class XFXForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 9bac91d7d412..2492229dc319 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -75,7 +76,7 @@ PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
const InstrItineraryData *II =
- &static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
+ static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
return new ScoreboardHazardRecognizer(II, DAG);
}
@@ -230,10 +231,12 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
// Normal instructions can be commuted the obvious way.
if (MI->getOpcode() != PPC::RLWIMI &&
- MI->getOpcode() != PPC::RLWIMIo &&
- MI->getOpcode() != PPC::RLWIMI8 &&
- MI->getOpcode() != PPC::RLWIMI8o)
+ MI->getOpcode() != PPC::RLWIMIo)
return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
+ // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
+ // changing the relative order of the mask operands might change what happens
+ // to the high-bits of the mask (and, thus, the result).
// Cannot commute if it has a non-zero rotate count.
if (MI->getOperand(3).getImm() != 0)
@@ -331,6 +334,11 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
BuildMI(MBB, MI, DL, get(Opcode));
}
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void PPCInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ NopInst.setOpcode(PPC::NOP);
+}
+
// Branch analysis.
// Note: If the condition register is set to CTR or CTR8 then this is a
// BDNZ (imm == 1) or BDZ (imm == 0) branch.
@@ -1106,7 +1114,7 @@ bool PPCInstrInfo::PredicateInstruction(
MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
unsigned OpC = MI->getOpcode();
- if (OpC == PPC::BLR) {
+ if (OpC == PPC::BLR || OpC == PPC::BLR8) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
bool isPPC64 = Subtarget.isPPC64();
MI->setDesc(get(Pred[0].getImm() ?
@@ -1270,6 +1278,7 @@ bool PPCInstrInfo::isPredicable(MachineInstr *MI) const {
return false;
case PPC::B:
case PPC::BLR:
+ case PPC::BLR8:
case PPC::BCTR:
case PPC::BCTR8:
case PPC::BCTRL:
@@ -1588,6 +1597,11 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
const MachineFunction *MF = MI->getParent()->getParent();
const char *AsmStr = MI->getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+ } else if (Opcode == TargetOpcode::STACKMAP) {
+ return MI->getOperand(1).getImm();
+ } else if (Opcode == TargetOpcode::PATCHPOINT) {
+ PatchPointOpers Opers(MI);
+ return Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
} else {
const MCInstrDesc &Desc = get(Opcode);
return Desc.getSize();
@@ -1617,6 +1631,7 @@ protected:
bool Changed = false;
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
I != IE; ++I) {
MachineInstr *MI = I;
@@ -1682,16 +1697,26 @@ protected:
// In theory, there could be other uses of the addend copy before this
// fma. We could deal with this, but that would require additional
// logic below and I suspect it will not occur in any relevant
- // situations.
- bool OtherUsers = false;
+ // situations. Additionally, check whether the copy source is killed
+ // prior to the fma. In order to replace the addend here with the
+ // source of the copy, it must still be live here. We can't use
+ // interval testing for a physical register, so as long as we're
+ // walking the MIs we may as well test liveness here.
+ bool OtherUsers = false, KillsAddendSrc = false;
for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
- J != JE; --J)
+ J != JE; --J) {
if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
OtherUsers = true;
break;
}
+ if (J->modifiesRegister(AddendSrcReg, TRI) ||
+ J->killsRegister(AddendSrcReg, TRI)) {
+ KillsAddendSrc = true;
+ break;
+ }
+ }
- if (OtherUsers)
+ if (OtherUsers || KillsAddendSrc)
continue;
// Find one of the product operands that is killed by this instruction.
@@ -1712,10 +1737,11 @@ protected:
if (!KilledProdOp)
continue;
- // In order to replace the addend here with the source of the copy,
- // it must still be live here.
- if (!LIS->getInterval(AddendMI->getOperand(1).getReg()).liveAt(FMAIdx))
- continue;
+ // For virtual registers, verify that the addend source register
+ // is live here (as should have been assured above).
+ assert((!TargetRegisterInfo::isVirtualRegister(AddendSrcReg) ||
+ LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) &&
+ "Addend source register is not live!");
// Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
@@ -1737,6 +1763,12 @@ protected:
unsigned OldFMAReg = MI->getOperand(0).getReg();
+ // The transformation doesn't work well with things like:
+ // %vreg5 = A-form-op %vreg5, %vreg11, %vreg5;
+ // so leave such things alone.
+ if (OldFMAReg == KilledProdReg)
+ continue;
+
assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
"Addend copy not tied to old FMA output!");
@@ -1827,7 +1859,7 @@ public:
LIS = &getAnalysis<LiveIntervals>();
- TII = TM->getInstrInfo();
+ TII = TM->getSubtargetImpl()->getInstrInfo();
bool Changed = false;
@@ -1980,7 +2012,7 @@ public:
// If we don't have VSX on the subtarget, don't do anything.
if (!TM->getSubtargetImpl()->hasVSX())
return false;
- TII = TM->getInstrInfo();
+ TII = TM->getSubtargetImpl()->getInstrInfo();
bool Changed = false;
@@ -2057,7 +2089,7 @@ public:
// If we don't have VSX don't bother doing anything here.
if (!TM->getSubtargetImpl()->hasVSX())
return false;
- TII = TM->getInstrInfo();
+ TII = TM->getSubtargetImpl()->getInstrInfo();
bool Changed = false;
@@ -2113,7 +2145,8 @@ protected:
I = ReturnMBB.SkipPHIsAndLabels(I);
// The block must be essentially empty except for the blr.
- if (I == ReturnMBB.end() || I->getOpcode() != PPC::BLR ||
+ if (I == ReturnMBB.end() ||
+ (I->getOpcode() != PPC::BLR && I->getOpcode() != PPC::BLR8) ||
I != ReturnMBB.getLastNonDebugInstr())
return Changed;
@@ -2126,7 +2159,7 @@ protected:
if (J->getOperand(0).getMBB() == &ReturnMBB) {
// This is an unconditional branch to the return. Replace the
// branch with a blr.
- BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BLR));
+ BuildMI(**PI, J, J->getDebugLoc(), TII->get(I->getOpcode()));
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
BlockChanged = true;
@@ -2214,7 +2247,7 @@ protected:
public:
bool runOnMachineFunction(MachineFunction &MF) override {
TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
- TII = TM->getInstrInfo();
+ TII = TM->getSubtargetImpl()->getInstrInfo();
bool Changed = false;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 83f14c6cf214..4add6f9781e8 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef POWERPC_INSTRUCTIONINFO_H
-#define POWERPC_INSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
#include "PPC.h"
#include "PPCRegisterInfo.h"
@@ -106,6 +106,15 @@ public:
UseNode, UseIdx);
}
+ bool hasLowDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI,
+ unsigned DefIdx) const override {
+ // Machine LICM should hoist all instructions in low-register-pressure
+ // situations; none are sufficiently free to justify leaving in a loop
+ // body.
+ return false;
+ }
+
bool isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const override;
@@ -228,6 +237,8 @@ public:
/// instruction may be. This returns the maximum number of bytes.
///
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+ void getNoopForMachoTarget(MCInst &NopInst) const override;
};
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 197e751464ad..d597cdcbd644 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -118,6 +118,8 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
+def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>;
+
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
// amounts. These nodes are generated by the multi-precision shift code.
def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
@@ -153,6 +155,10 @@ def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+def PPCbctrl_load_toc : SDNode<"PPCISD::BCTRL_LOAD_TOC",
+ SDTypeProfile<0, 1, []>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@@ -424,6 +430,15 @@ def u2imm : Operand<i32> {
let PrintMethod = "printU2ImmOperand";
let ParserMatchClass = PPCU2ImmAsmOperand;
}
+
+def PPCU4ImmAsmOperand : AsmOperandClass {
+ let Name = "U4Imm"; let PredicateMethod = "isU4Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u4imm : Operand<i32> {
+ let PrintMethod = "printU4ImmOperand";
+ let ParserMatchClass = PPCU4ImmAsmOperand;
+}
def PPCS5ImmAsmOperand : AsmOperandClass {
let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
let RenderMethod = "addImmOperands";
@@ -453,7 +468,7 @@ def u6imm : Operand<i32> {
}
def PPCS16ImmAsmOperand : AsmOperandClass {
let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
- let RenderMethod = "addImmOperands";
+ let RenderMethod = "addS16ImmOperands";
}
def s16imm : Operand<i32> {
let PrintMethod = "printS16ImmOperand";
@@ -463,7 +478,7 @@ def s16imm : Operand<i32> {
}
def PPCU16ImmAsmOperand : AsmOperandClass {
let Name = "U16Imm"; let PredicateMethod = "isU16Imm";
- let RenderMethod = "addImmOperands";
+ let RenderMethod = "addU16ImmOperands";
}
def u16imm : Operand<i32> {
let PrintMethod = "printU16ImmOperand";
@@ -473,7 +488,7 @@ def u16imm : Operand<i32> {
}
def PPCS17ImmAsmOperand : AsmOperandClass {
let Name = "S17Imm"; let PredicateMethod = "isS17Imm";
- let RenderMethod = "addImmOperands";
+ let RenderMethod = "addS16ImmOperands";
}
def s17imm : Operand<i32> {
// This operand type is used for addis/lis to allow the assembler parser
@@ -549,7 +564,7 @@ def ptr_rc_idx : Operand<iPTR>, PointerLikeRegClass<0> {
def PPCDispRIOperand : AsmOperandClass {
let Name = "DispRI"; let PredicateMethod = "isS16Imm";
- let RenderMethod = "addImmOperands";
+ let RenderMethod = "addS16ImmOperands";
}
def dispRI : Operand<iPTR> {
let ParserMatchClass = PPCDispRIOperand;
@@ -561,6 +576,27 @@ def PPCDispRIXOperand : AsmOperandClass {
def dispRIX : Operand<iPTR> {
let ParserMatchClass = PPCDispRIXOperand;
}
+def PPCDispSPE8Operand : AsmOperandClass {
+ let Name = "DispSPE8"; let PredicateMethod = "isU8ImmX8";
+ let RenderMethod = "addImmOperands";
+}
+def dispSPE8 : Operand<iPTR> {
+ let ParserMatchClass = PPCDispSPE8Operand;
+}
+def PPCDispSPE4Operand : AsmOperandClass {
+ let Name = "DispSPE4"; let PredicateMethod = "isU7ImmX4";
+ let RenderMethod = "addImmOperands";
+}
+def dispSPE4 : Operand<iPTR> {
+ let ParserMatchClass = PPCDispSPE4Operand;
+}
+def PPCDispSPE2Operand : AsmOperandClass {
+ let Name = "DispSPE2"; let PredicateMethod = "isU6ImmX2";
+ let RenderMethod = "addImmOperands";
+}
+def dispSPE2 : Operand<iPTR> {
+ let ParserMatchClass = PPCDispSPE2Operand;
+}
def memri : Operand<iPTR> {
let PrintMethod = "printMemRegImm";
@@ -578,6 +614,21 @@ def memrix : Operand<iPTR> { // memri where the imm is 4-aligned.
let EncoderMethod = "getMemRIXEncoding";
let DecoderMethod = "decodeMemRIXOperands";
}
+def spe8dis : Operand<iPTR> { // SPE displacement where the imm is 8-aligned.
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispSPE8:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getSPE8DisEncoding";
+}
+def spe4dis : Operand<iPTR> { // SPE displacement where the imm is 4-aligned.
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispSPE4:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getSPE4DisEncoding";
+}
+def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned.
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispSPE2:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getSPE2DisEncoding";
+}
// A single-register address. This is used with the SjLj
// pseudo-instructions.
@@ -624,6 +675,12 @@ def In32BitMode : Predicate<"!PPCSubTarget->isPPC64()">;
def In64BitMode : Predicate<"PPCSubTarget->isPPC64()">;
def IsBookE : Predicate<"PPCSubTarget->isBookE()">;
def IsNotBookE : Predicate<"!PPCSubTarget->isBookE()">;
+def HasOnlyMSYNC : Predicate<"PPCSubTarget->hasOnlyMSYNC()">;
+def HasSYNC : Predicate<"!PPCSubTarget->hasOnlyMSYNC()">;
+def IsPPC4xx : Predicate<"PPCSubTarget->isPPC4xx()">;
+def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">;
+def IsE500 : Predicate<"PPCSubTarget->isE500()">;
+def HasSPE : Predicate<"PPCSubTarget->HasSPE()">;
//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.
@@ -959,7 +1016,7 @@ def RESTORE_CRBIT : Pseudo<(outs crbitrc:$cond), (ins memri:$F),
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
let isReturn = 1, Uses = [LR, RM] in
def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB,
- [(retflag)]>;
+ [(retflag)]>, Requires<[In32BitMode]>;
let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in {
def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
[]>;
@@ -980,6 +1037,9 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
let Defs = [LR] in
def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>,
PPC970_Unit_BRU;
+let Defs = [LR] in
+ def MoveGOTtoLR : Pseudo<(outs), (ins), "#MoveGOTtoLR", []>,
+ PPC970_Unit_BRU;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
let isBarrier = 1 in {
@@ -1258,8 +1318,15 @@ def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst",
IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
PPC970_DGroup_Single;
+def ICBT : XForm_icbt<31, 22, (outs), (ins u4imm:$CT, memrr:$src),
+ "icbt $CT, $src", IIC_LdStLoad>, Requires<[IsBookE]>;
+
def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
- (DCBT xoaddr:$dst)>;
+ (DCBT xoaddr:$dst)>; // data prefetch for loads
+def : Pat<(prefetch xoaddr:$dst, (i32 1), imm, (i32 1)),
+ (DCBTST xoaddr:$dst)>; // data prefetch for stores
+def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)),
+ (ICBT 0, xoaddr:$dst)>; // inst prefetch (for read)
// Atomic operations
let usesCustomInserter = 1 in {
@@ -1393,7 +1460,7 @@ def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src),
// Unindexed (r+i) Loads with Update (preinc).
-let mayLoad = 1, neverHasSideEffects = 1 in {
+let mayLoad = 1, hasSideEffects = 0 in {
def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lbzu $rD, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
@@ -1643,17 +1710,19 @@ def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
"stmw $rS, $dst", IIC_LdStLMW, []>;
def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L),
- "sync $L", IIC_LdStSync, []>, Requires<[IsNotBookE]>;
+ "sync $L", IIC_LdStSync, []>;
let isCodeGenOnly = 1 in {
def MSYNC : XForm_24_sync<31, 598, (outs), (ins),
- "msync", IIC_LdStSync, []>, Requires<[IsBookE]> {
+ "msync", IIC_LdStSync, []> {
let L = 0;
}
}
-def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>;
-def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;
+def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(int_ppc_lwsync), (SYNC 1)>, Requires<[HasSYNC]>;
+def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
+def : Pat<(int_ppc_lwsync), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
//===----------------------------------------------------------------------===//
// PPC32 Arithmetic Instructions.
@@ -1734,7 +1803,7 @@ def NOP_GT_PWR7 : DForm_4_fixedreg_zero<24, 2, (outs), (ins),
"ori 2, 2, 0", IIC_IntSimple, []>;
}
-let isCompare = 1, neverHasSideEffects = 1 in {
+let isCompare = 1, hasSideEffects = 0 in {
def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm),
"cmpwi $crD, $rA, $imm", IIC_IntCompare>;
def CMPLWI : DForm_6_ext<10, (outs crrc:$dst), (ins gprc:$src1, u16imm:$src2),
@@ -1742,7 +1811,7 @@ let isCompare = 1, neverHasSideEffects = 1 in {
}
}
-let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations.
+let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations.
let isCommutable = 1 in {
defm NAND : XForm_6r<31, 476, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
"nand", "$rA, $rS, $rB", IIC_IntSimple,
@@ -1785,7 +1854,7 @@ defm SRAW : XForm_6rc<31, 792, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
}
let PPC970_Unit = 1 in { // FXU Operations.
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
defm SRAWI : XForm_10rc<31, 824, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH),
"srawi", "$rA, $rS, $SH", IIC_IntShift,
[(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>;
@@ -1798,8 +1867,13 @@ defm EXTSB : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS),
defm EXTSH : XForm_11r<31, 922, (outs gprc:$rA), (ins gprc:$rS),
"extsh", "$rA, $rS", IIC_IntSimple,
[(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
+
+let isCommutable = 1 in
+def CMPB : XForm_6<31, 508, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "cmpb $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i32:$rA, (PPCcmpb i32:$rS, i32:$rB))]>;
}
-let isCompare = 1, neverHasSideEffects = 1 in {
+let isCompare = 1, hasSideEffects = 0 in {
def CMPW : XForm_16_ext<31, 0, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
"cmpw $crD, $rA, $rB", IIC_IntCompare>;
def CMPLW : XForm_16_ext<31, 32, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
@@ -1809,7 +1883,7 @@ let isCompare = 1, neverHasSideEffects = 1 in {
let PPC970_Unit = 3 in { // FPU Operations.
//def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB),
// "fcmpo $crD, $fA, $fB", IIC_FPCompare>;
-let isCompare = 1, neverHasSideEffects = 1 in {
+let isCompare = 1, hasSideEffects = 0 in {
def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB),
"fcmpu $crD, $fA, $fB", IIC_FPCompare>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
@@ -1818,7 +1892,7 @@ let isCompare = 1, neverHasSideEffects = 1 in {
}
let Uses = [RM] in {
- let neverHasSideEffects = 1 in {
+ let hasSideEffects = 0 in {
defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
"fctiw", "$frD, $frB", IIC_FPGeneral,
[]>;
@@ -1839,7 +1913,7 @@ let Uses = [RM] in {
[(set f32:$frD, (frnd f32:$frB))]>;
}
- let neverHasSideEffects = 1 in {
+ let hasSideEffects = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB),
"frip", "$frD, $frB", IIC_FPGeneral,
@@ -1876,13 +1950,13 @@ let Uses = [RM] in {
/// often coalesced away and we don't want the dispatch group builder to think
/// that they will fill slots (which could cause the load of a LSU reject to
/// sneak into a d-group with a store).
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
defm FMR : XForm_26r<63, 72, (outs f4rc:$frD), (ins f4rc:$frB),
"fmr", "$frD, $frB", IIC_FPGeneral,
[]>, // (set f32:$frD, f32:$frB)
PPC970_Unit_Pseudo;
-let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations.
+let PPC970_Unit = 3, hasSideEffects = 0 in { // FPU Operations.
// These are artificially split into two different forms, for 4/8 byte FP.
defm FABSS : XForm_26r<63, 264, (outs f4rc:$frD), (ins f4rc:$frB),
"fabs", "$frD, $frB", IIC_FPGeneral,
@@ -1931,11 +2005,20 @@ defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB),
// XL-Form instructions. condition register logical ops.
//
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def MCRF : XLForm_3<19, 0, (outs crrc:$BF), (ins crrc:$BFA),
"mcrf $BF, $BFA", IIC_BrMCR>,
PPC970_DGroup_First, PPC970_Unit_CRU;
+// FIXME: According to the ISA (section 2.5.1 of version 2.06), the
+// condition-register logical instructions have preferred forms. Specifically,
+// it is preferred that the bit specified by the BT field be in the same
+// condition register as that specified by the bit BB. We might want to account
+// for this via hinting the register allocator and anti-dep breakers, or we
+// could constrain the register class to force this constraint and then loosen
+// it during register allocation via convertToThreeAddress or some similar
+// mechanism.
+
let isCommutable = 1 in {
def CRAND : XLForm_1<19, 257, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
@@ -2009,6 +2092,12 @@ def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT),
def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR),
"mftb $RT, $SPR", IIC_SprMFTB>, Deprecated<DeprecatedMFTB>;
+// A pseudo-instruction used to implement the read of the 64-bit cycle counter
+// on a 32-bit target.
+let hasSideEffects = 1, usesCustomInserter = 1 in
+def ReadTB : Pseudo<(outs gprc:$lo, gprc:$hi), (ins),
+ "#ReadTB", []>;
+
let Uses = [CTR] in {
def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins),
"mfctr $rT", IIC_SprMFSPR>,
@@ -2070,7 +2159,7 @@ let mayLoad = 1 in
def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
"#RESTORE_VRSAVE", []>;
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def MTOCRF: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins gprc:$ST),
"mtocrf $FXM, $ST", IIC_BrMCRX>,
PPC970_DGroup_First, PPC970_Unit_CRU;
@@ -2087,7 +2176,7 @@ def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins),
"mfcr $rT", IIC_SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
-} // neverHasSideEffects = 1
+} // hasSideEffects = 0
// Pseudo instruction to perform FADD in round-to-zero mode.
let usesCustomInserter = 1, Uses = [RM] in {
@@ -2116,7 +2205,7 @@ let Uses = [RM] in {
}
-let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations.
+let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations.
// XO-Form instructions. Arithmetic instructions that can set overflow bit
let isCommutable = 1 in
defm ADD4 : XOForm_1r<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
@@ -2187,7 +2276,7 @@ defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA),
// A-Form instructions. Most of the instructions executed in the FPU are of
// this type.
//
-let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations.
+let PPC970_Unit = 3, hasSideEffects = 0 in { // FPU Operations.
let Uses = [RM] in {
let isCommutable = 1 in {
defm FMADD : AForm_1r<63, 29,
@@ -2283,7 +2372,7 @@ let Uses = [RM] in {
}
}
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let PPC970_Unit = 1 in { // FXU Operations.
let isSelect = 1 in
def ISEL : AForm_4<31, 15,
@@ -2319,7 +2408,7 @@ defm RLWNM : MForm_2r<23, (outs gprc:$rA),
"rlwnm", "$rA, $rS, $rB, $MB, $ME", IIC_IntGeneral,
[]>;
}
-} // neverHasSideEffects = 1
+} // hasSideEffects = 0
//===----------------------------------------------------------------------===//
// PowerPC Instruction Patterns
@@ -2442,15 +2531,13 @@ def ADDISdtprelHA32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp)
tglobaltlsaddr:$disp))]>;
// Support for Position-independent code
-def LWZtoc: Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
- "#LWZtoc",
- [(set i32:$rD,
- (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
+def LWZtoc : Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
+ "#LWZtoc",
+ [(set i32:$rD,
+ (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
// Get Global (GOT) Base Register offset, from the word immediately preceding
// the function label.
-def GetGBRO: Pseudo<(outs gprc:$rT), (ins gprc:$rI), "#GetGBRO", []>;
-// Update the Global(GOT) Base Register with the above offset.
-def UpdateGBR: Pseudo<(outs gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
+def UpdateGBR : Pseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
// Standard shifts. These are represented separately from the real shifts above
@@ -2487,8 +2574,15 @@ def : Pat<(f64 (extloadf32 xaddr:$src)),
def : Pat<(f64 (fextend f32:$src)),
(COPY_TO_REGCLASS $src, F8RC)>;
-def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>, Requires<[IsNotBookE]>;
-def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[IsBookE]>;
+// Only seq_cst fences require the heavyweight sync (SYNC 0).
+// All others can use the lightweight sync (SYNC 1).
+// source: http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+// The rule for seq_cst is duplicated to work with both 64 bits and 32 bits
+// versions of Power.
+def : Pat<(atomic_fence (i64 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (i32 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (imm), (imm)), (SYNC 1)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
@@ -2507,6 +2601,7 @@ def : Pat<(fcopysign f32:$frB, f64:$frA),
(FCPSGNS (COPY_TO_REGCLASS $frA, F4RC), $frB)>;
include "PPCInstrAltivec.td"
+include "PPCInstrSPE.td"
include "PPCInstr64Bit.td"
include "PPCInstrVSX.td"
@@ -3023,6 +3118,16 @@ def : Pat<(i1 (not (trunc i64:$in))),
// PowerPC Instructions used for assembler/disassembler only
//
+// FIXME: For B=0 or B > 8, the registers following RT are used.
+// WARNING: Do not add patterns for this instruction without fixing this.
+def LSWI : XForm_base_r3xo<31, 597, (outs gprc:$RT), (ins gprc:$A, u5imm:$B),
+ "lswi $RT, $A, $B", IIC_LdStLoad, []>;
+
+// FIXME: For B=0 or B > 8, the registers following RT are used.
+// WARNING: Do not add patterns for this instruction without fixing this.
+def STSWI : XForm_base_r3xo<31, 725, (outs), (ins gprc:$RT, gprc:$A, u5imm:$B),
+ "stswi $RT, $A, $B", IIC_LdStLoad, []>;
+
def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
"isync", IIC_SprISYNC, []>;
@@ -3035,9 +3140,47 @@ def EIEIO : XForm_24_eieio<31, 854, (outs), (ins),
def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L),
"wait $L", IIC_LdStLoad, []>;
+def MBAR : XForm_mbar<31, 854, (outs), (ins u5imm:$MO),
+ "mbar $MO", IIC_LdStLoad>, Requires<[IsBookE]>;
+
+def MTSR: XForm_sr<31, 210, (outs), (ins gprc:$RS, u4imm:$SR),
+ "mtsr $SR, $RS", IIC_SprMTSR>;
+
+def MFSR: XForm_sr<31, 595, (outs gprc:$RS), (ins u4imm:$SR),
+ "mfsr $RS, $SR", IIC_SprMFSR>;
+
+def MTSRIN: XForm_srin<31, 242, (outs), (ins gprc:$RS, gprc:$RB),
+ "mtsrin $RS, $RB", IIC_SprMTSR>;
+
+def MFSRIN: XForm_srin<31, 659, (outs gprc:$RS), (ins gprc:$RB),
+ "mfsrin $RS, $RB", IIC_SprMFSR>;
+
def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L),
"mtmsr $RS, $L", IIC_SprMTMSR>;
+def WRTEE: XForm_mtmsr<31, 131, (outs), (ins gprc:$RS),
+ "wrtee $RS", IIC_SprMTMSR>, Requires<[IsBookE]> {
+ let L = 0;
+}
+
+def WRTEEI: I<31, (outs), (ins i1imm:$E), "wrteei $E", IIC_SprMTMSR>,
+ Requires<[IsBookE]> {
+ bits<1> E;
+
+ let Inst{16} = E;
+ let Inst{21-30} = 163;
+}
+
+def DCCCI : XForm_tlb<454, (outs), (ins gprc:$A, gprc:$B),
+ "dccci $A, $B", IIC_LdStLoad>, Requires<[IsPPC4xx]>;
+def ICCCI : XForm_tlb<966, (outs), (ins gprc:$A, gprc:$B),
+ "iccci $A, $B", IIC_LdStLoad>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"dci 0", (DCCCI R0, R0)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"dccci", (DCCCI R0, R0)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"ici 0", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"iccci", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>;
+
def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins),
"mfmsr $RT", IIC_SprMFMSR, []>;
@@ -3055,15 +3198,86 @@ def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB),
def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>;
+def TLBIA : XForm_0<31, 370, (outs), (ins),
+ "tlbia", IIC_SprTLBIA, []>;
+
def TLBSYNC : XForm_0<31, 566, (outs), (ins),
"tlbsync", IIC_SprTLBSYNC, []>;
def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB),
"tlbiel $RB", IIC_SprTLBIEL, []>;
+def TLBLD : XForm_16b<31, 978, (outs), (ins gprc:$RB),
+ "tlbld $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>;
+def TLBLI : XForm_16b<31, 1010, (outs), (ins gprc:$RB),
+ "tlbli $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>;
+
def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB),
"tlbie $RB,$RS", IIC_SprTLBIE, []>;
+def TLBSX : XForm_tlb<914, (outs), (ins gprc:$A, gprc:$B), "tlbsx $A, $B",
+ IIC_LdStLoad>, Requires<[IsBookE]>;
+
+def TLBIVAX : XForm_tlb<786, (outs), (ins gprc:$A, gprc:$B), "tlbivax $A, $B",
+ IIC_LdStLoad>, Requires<[IsBookE]>;
+
+def TLBRE : XForm_24_eieio<31, 946, (outs), (ins),
+ "tlbre", IIC_LdStLoad, []>, Requires<[IsBookE]>;
+
+def TLBWE : XForm_24_eieio<31, 978, (outs), (ins),
+ "tlbwe", IIC_LdStLoad, []>, Requires<[IsBookE]>;
+
+def TLBRE2 : XForm_tlbws<31, 946, (outs gprc:$RS), (ins gprc:$A, i1imm:$WS),
+ "tlbre $RS, $A, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>;
+
+def TLBWE2 : XForm_tlbws<31, 978, (outs), (ins gprc:$RS, gprc:$A, i1imm:$WS),
+ "tlbwe $RS, $A, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>;
+
+def TLBSX2 : XForm_base_r3xo<31, 914, (outs), (ins gprc:$RST, gprc:$A, gprc:$B),
+ "tlbsx $RST, $A, $B", IIC_LdStLoad, []>,
+ Requires<[IsPPC4xx]>;
+def TLBSX2D : XForm_base_r3xo<31, 914, (outs),
+ (ins gprc:$RST, gprc:$A, gprc:$B),
+ "tlbsx. $RST, $A, $B", IIC_LdStLoad, []>,
+ Requires<[IsPPC4xx]>, isDOT;
+
+def RFID : XForm_0<19, 18, (outs), (ins), "rfid", IIC_IntRFID, []>;
+
+def RFI : XForm_0<19, 50, (outs), (ins), "rfi", IIC_SprRFI, []>,
+ Requires<[IsBookE]>;
+def RFCI : XForm_0<19, 51, (outs), (ins), "rfci", IIC_BrB, []>,
+ Requires<[IsBookE]>;
+
+def RFDI : XForm_0<19, 39, (outs), (ins), "rfdi", IIC_BrB, []>,
+ Requires<[IsE500]>;
+def RFMCI : XForm_0<19, 38, (outs), (ins), "rfmci", IIC_BrB, []>,
+ Requires<[IsE500]>;
+
+def MFDCR : XFXForm_1<31, 323, (outs gprc:$RT), (ins i32imm:$SPR),
+ "mfdcr $RT, $SPR", IIC_SprMFSPR>, Requires<[IsPPC4xx]>;
+def MTDCR : XFXForm_1<31, 451, (outs), (ins gprc:$RT, i32imm:$SPR),
+ "mtdcr $SPR, $RT", IIC_SprMTSPR>, Requires<[IsPPC4xx]>;
+
+def ATTN : XForm_attn<0, 256, (outs), (ins), "attn", IIC_BrB>;
+
+def LBZCIX : XForm_base_r3xo<31, 853, (outs gprc:$RST), (ins gprc:$A, gprc:$B),
+ "lbzcix $RST, $A, $B", IIC_LdStLoad, []>;
+def LHZCIX : XForm_base_r3xo<31, 821, (outs gprc:$RST), (ins gprc:$A, gprc:$B),
+ "lhzcix $RST, $A, $B", IIC_LdStLoad, []>;
+def LWZCIX : XForm_base_r3xo<31, 789, (outs gprc:$RST), (ins gprc:$A, gprc:$B),
+ "lwzcix $RST, $A, $B", IIC_LdStLoad, []>;
+def LDCIX : XForm_base_r3xo<31, 885, (outs gprc:$RST), (ins gprc:$A, gprc:$B),
+ "ldcix $RST, $A, $B", IIC_LdStLoad, []>;
+
+def STBCIX : XForm_base_r3xo<31, 981, (outs), (ins gprc:$RST, gprc:$A, gprc:$B),
+ "stbcix $RST, $A, $B", IIC_LdStLoad, []>;
+def STHCIX : XForm_base_r3xo<31, 949, (outs), (ins gprc:$RST, gprc:$A, gprc:$B),
+ "sthcix $RST, $A, $B", IIC_LdStLoad, []>;
+def STWCIX : XForm_base_r3xo<31, 917, (outs), (ins gprc:$RST, gprc:$A, gprc:$B),
+ "stwcix $RST, $A, $B", IIC_LdStLoad, []>;
+def STDCIX : XForm_base_r3xo<31, 1013, (outs), (ins gprc:$RST, gprc:$A, gprc:$B),
+ "stdcix $RST, $A, $B", IIC_LdStLoad, []>;
+
//===----------------------------------------------------------------------===//
// PowerPC Assembler Instruction Aliases
//
@@ -3086,15 +3300,17 @@ class PPCAsmPseudo<string asm, dag iops>
def : InstAlias<"sc", (SC 0)>;
-def : InstAlias<"sync", (SYNC 0)>, Requires<[IsNotBookE]>;
-def : InstAlias<"msync", (SYNC 0)>, Requires<[IsNotBookE]>;
-def : InstAlias<"lwsync", (SYNC 1)>, Requires<[IsNotBookE]>;
-def : InstAlias<"ptesync", (SYNC 2)>, Requires<[IsNotBookE]>;
+def : InstAlias<"sync", (SYNC 0)>, Requires<[HasSYNC]>;
+def : InstAlias<"msync", (SYNC 0)>, Requires<[HasSYNC]>;
+def : InstAlias<"lwsync", (SYNC 1)>, Requires<[HasSYNC]>;
+def : InstAlias<"ptesync", (SYNC 2)>, Requires<[HasSYNC]>;
def : InstAlias<"wait", (WAIT 0)>;
def : InstAlias<"waitrsv", (WAIT 1)>;
def : InstAlias<"waitimpl", (WAIT 2)>;
+def : InstAlias<"mbar", (MBAR 0)>, Requires<[IsBookE]>;
+
def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
@@ -3103,9 +3319,57 @@ def : InstAlias<"crnot $bx, $by", (CRNOR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>
def : InstAlias<"mtxer $Rx", (MTSPR 1, gprc:$Rx)>;
def : InstAlias<"mfxer $Rx", (MFSPR gprc:$Rx, 1)>;
+def : InstAlias<"mfrtcu $Rx", (MFSPR gprc:$Rx, 4)>;
+def : InstAlias<"mfrtcl $Rx", (MFSPR gprc:$Rx, 5)>;
+
+def : InstAlias<"mtdscr $Rx", (MTSPR 17, gprc:$Rx)>;
+def : InstAlias<"mfdscr $Rx", (MFSPR gprc:$Rx, 17)>;
+
+def : InstAlias<"mtdsisr $Rx", (MTSPR 18, gprc:$Rx)>;
+def : InstAlias<"mfdsisr $Rx", (MFSPR gprc:$Rx, 18)>;
+
+def : InstAlias<"mtdar $Rx", (MTSPR 19, gprc:$Rx)>;
+def : InstAlias<"mfdar $Rx", (MFSPR gprc:$Rx, 19)>;
+
+def : InstAlias<"mtdec $Rx", (MTSPR 22, gprc:$Rx)>;
+def : InstAlias<"mfdec $Rx", (MFSPR gprc:$Rx, 22)>;
+
+def : InstAlias<"mtsdr1 $Rx", (MTSPR 25, gprc:$Rx)>;
+def : InstAlias<"mfsdr1 $Rx", (MFSPR gprc:$Rx, 25)>;
+
+def : InstAlias<"mtsrr0 $Rx", (MTSPR 26, gprc:$Rx)>;
+def : InstAlias<"mfsrr0 $Rx", (MFSPR gprc:$Rx, 26)>;
+
+def : InstAlias<"mtsrr1 $Rx", (MTSPR 27, gprc:$Rx)>;
+def : InstAlias<"mfsrr1 $Rx", (MFSPR gprc:$Rx, 27)>;
+
+def : InstAlias<"mtsrr2 $Rx", (MTSPR 990, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfsrr2 $Rx", (MFSPR gprc:$Rx, 990)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtsrr3 $Rx", (MTSPR 991, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfsrr3 $Rx", (MFSPR gprc:$Rx, 991)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtcfar $Rx", (MTSPR 28, gprc:$Rx)>;
+def : InstAlias<"mfcfar $Rx", (MFSPR gprc:$Rx, 28)>;
+
+def : InstAlias<"mtamr $Rx", (MTSPR 29, gprc:$Rx)>;
+def : InstAlias<"mfamr $Rx", (MFSPR gprc:$Rx, 29)>;
+
+def : InstAlias<"mtpid $Rx", (MTSPR 48, gprc:$Rx)>, Requires<[IsBookE]>;
+def : InstAlias<"mfpid $Rx", (MFSPR gprc:$Rx, 48)>, Requires<[IsBookE]>;
+
def : InstAlias<"mftb $Rx", (MFTB gprc:$Rx, 268)>;
+def : InstAlias<"mftbl $Rx", (MFTB gprc:$Rx, 268)>;
def : InstAlias<"mftbu $Rx", (MFTB gprc:$Rx, 269)>;
+def : InstAlias<"mttbl $Rx", (MTSPR 284, gprc:$Rx)>;
+def : InstAlias<"mttbu $Rx", (MTSPR 285, gprc:$Rx)>;
+
+def : InstAlias<"mftblo $Rx", (MFSPR gprc:$Rx, 989)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mttblo $Rx", (MTSPR 989, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mftbhi $Rx", (MFSPR gprc:$Rx, 988)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mttbhi $Rx", (MTSPR 988, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+
def : InstAlias<"xnop", (XORI R0, R0, 0)>;
def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
@@ -3116,6 +3380,60 @@ def : InstAlias<"not. $rA, $rB", (NOR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
def : InstAlias<"mtcr $rA", (MTCRF8 255, g8rc:$rA)>;
+foreach BATR = 0-3 in {
+ def : InstAlias<"mtdbatu "#BATR#", $Rx",
+ (MTSPR !add(BATR, !add(BATR, 536)), gprc:$Rx)>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mfdbatu $Rx, "#BATR,
+ (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 536)))>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mtdbatl "#BATR#", $Rx",
+ (MTSPR !add(BATR, !add(BATR, 537)), gprc:$Rx)>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mfdbatl $Rx, "#BATR,
+ (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 537)))>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mtibatu "#BATR#", $Rx",
+ (MTSPR !add(BATR, !add(BATR, 528)), gprc:$Rx)>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mfibatu $Rx, "#BATR,
+ (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 528)))>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mtibatl "#BATR#", $Rx",
+ (MTSPR !add(BATR, !add(BATR, 529)), gprc:$Rx)>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mfibatl $Rx, "#BATR,
+ (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 529)))>,
+ Requires<[IsPPC6xx]>;
+}
+
+foreach BR = 0-7 in {
+ def : InstAlias<"mfbr"#BR#" $Rx",
+ (MFDCR gprc:$Rx, !add(BR, 0x80))>,
+ Requires<[IsPPC4xx]>;
+ def : InstAlias<"mtbr"#BR#" $Rx",
+ (MTDCR gprc:$Rx, !add(BR, 0x80))>,
+ Requires<[IsPPC4xx]>;
+}
+
+def : InstAlias<"mtdccr $Rx", (MTSPR 1018, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfdccr $Rx", (MFSPR gprc:$Rx, 1018)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mticcr $Rx", (MTSPR 1019, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mficcr $Rx", (MFSPR gprc:$Rx, 1019)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtdear $Rx", (MTSPR 981, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfdear $Rx", (MFSPR gprc:$Rx, 981)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtesr $Rx", (MTSPR 980, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfesr $Rx", (MFSPR gprc:$Rx, 980)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mfspefscr $Rx", (MFSPR gprc:$Rx, 512)>;
+def : InstAlias<"mtspefscr $Rx", (MTSPR 512, gprc:$Rx)>;
+
+def : InstAlias<"mttcr $Rx", (MTSPR 986, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mftcr $Rx", (MFSPR gprc:$Rx, 986)>, Requires<[IsPPC4xx]>;
+
def LAx : PPCAsmPseudo<"la $rA, $addr", (ins gprc:$rA, memri:$addr)>;
def SUBI : PPCAsmPseudo<"subi $rA, $rB, $imm",
@@ -3135,25 +3453,25 @@ def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>;
def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>;
-def : InstAlias<"mfsprg $RT, 0", (MFSPR gprc:$RT, 272)>;
-def : InstAlias<"mfsprg $RT, 1", (MFSPR gprc:$RT, 273)>;
-def : InstAlias<"mfsprg $RT, 2", (MFSPR gprc:$RT, 274)>;
-def : InstAlias<"mfsprg $RT, 3", (MFSPR gprc:$RT, 275)>;
-
-def : InstAlias<"mfsprg0 $RT", (MFSPR gprc:$RT, 272)>;
-def : InstAlias<"mfsprg1 $RT", (MFSPR gprc:$RT, 273)>;
-def : InstAlias<"mfsprg2 $RT", (MFSPR gprc:$RT, 274)>;
-def : InstAlias<"mfsprg3 $RT", (MFSPR gprc:$RT, 275)>;
+def : InstAlias<"mfasr $RT", (MFSPR gprc:$RT, 280)>;
+def : InstAlias<"mtasr $RT", (MTSPR 280, gprc:$RT)>;
-def : InstAlias<"mtsprg 0, $RT", (MTSPR 272, gprc:$RT)>;
-def : InstAlias<"mtsprg 1, $RT", (MTSPR 273, gprc:$RT)>;
-def : InstAlias<"mtsprg 2, $RT", (MTSPR 274, gprc:$RT)>;
-def : InstAlias<"mtsprg 3, $RT", (MTSPR 275, gprc:$RT)>;
-
-def : InstAlias<"mtsprg0 $RT", (MTSPR 272, gprc:$RT)>;
-def : InstAlias<"mtsprg1 $RT", (MTSPR 273, gprc:$RT)>;
-def : InstAlias<"mtsprg2 $RT", (MTSPR 274, gprc:$RT)>;
-def : InstAlias<"mtsprg3 $RT", (MTSPR 275, gprc:$RT)>;
+foreach SPRG = 0-3 in {
+ def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 272))>;
+ def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 272))>;
+ def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>;
+ def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>;
+}
+foreach SPRG = 4-7 in {
+ def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 256))>,
+ Requires<[IsBookE]>;
+ def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 256))>,
+ Requires<[IsBookE]>;
+ def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>,
+ Requires<[IsBookE]>;
+ def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>,
+ Requires<[IsBookE]>;
+}
def : InstAlias<"mtasr $RS", (MTSPR 280, gprc:$RS)>;
@@ -3172,6 +3490,15 @@ def : InstAlias<"mtsrr1 $RT", (MTSPR 27, gprc:$RT)>;
def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>;
+def : InstAlias<"tlbrehi $RS, $A", (TLBRE2 gprc:$RS, gprc:$A, 0)>,
+ Requires<[IsPPC4xx]>;
+def : InstAlias<"tlbrelo $RS, $A", (TLBRE2 gprc:$RS, gprc:$A, 1)>,
+ Requires<[IsPPC4xx]>;
+def : InstAlias<"tlbwehi $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 0)>,
+ Requires<[IsPPC4xx]>;
+def : InstAlias<"tlbwelo $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 1)>,
+ Requires<[IsPPC4xx]>;
+
def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def EXTLWIo : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b",
@@ -3420,3 +3747,18 @@ defm : TrapExtendedMnemonic<"lnl", 5>;
defm : TrapExtendedMnemonic<"lng", 6>;
defm : TrapExtendedMnemonic<"u", 31>;
+// Atomic loads
+def : Pat<(atomic_load_8 iaddr:$src), (LBZ memri:$src)>;
+def : Pat<(atomic_load_16 iaddr:$src), (LHZ memri:$src)>;
+def : Pat<(atomic_load_32 iaddr:$src), (LWZ memri:$src)>;
+def : Pat<(atomic_load_8 xaddr:$src), (LBZX memrr:$src)>;
+def : Pat<(atomic_load_16 xaddr:$src), (LHZX memrr:$src)>;
+def : Pat<(atomic_load_32 xaddr:$src), (LWZX memrr:$src)>;
+
+// Atomic stores
+def : Pat<(atomic_store_8 iaddr:$ptr, i32:$val), (STB gprc:$val, memri:$ptr)>;
+def : Pat<(atomic_store_16 iaddr:$ptr, i32:$val), (STH gprc:$val, memri:$ptr)>;
+def : Pat<(atomic_store_32 iaddr:$ptr, i32:$val), (STW gprc:$val, memri:$ptr)>;
+def : Pat<(atomic_store_8 xaddr:$ptr, i32:$val), (STBX gprc:$val, memrr:$ptr)>;
+def : Pat<(atomic_store_16 xaddr:$ptr, i32:$val), (STHX gprc:$val, memrr:$ptr)>;
+def : Pat<(atomic_store_32 xaddr:$ptr, i32:$val), (STWX gprc:$val, memrr:$ptr)>;
diff --git a/lib/Target/PowerPC/PPCInstrSPE.td b/lib/Target/PowerPC/PPCInstrSPE.td
new file mode 100644
index 000000000000..cc3a4d20a9b2
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrSPE.td
@@ -0,0 +1,447 @@
+//=======-- PPCInstrSPE.td - The PowerPC SPE Extension -*- tablegen -*-=======//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Signal Processing Engine extension to
+// the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+class EVXForm_1<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Pattern = [];
+
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-31} = xo;
+}
+
+class EVXForm_2<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : EVXForm_1<xo, OOL, IOL, asmstr, itin> {
+ let RB = 0;
+}
+
+class EVXForm_3<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> {
+ bits<3> crD;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Pattern = [];
+
+ let Inst{6-8} = crD;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-31} = xo;
+}
+
+class EVXForm_D<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<21> D;
+
+ let Pattern = [];
+
+ let Inst{6-10} = RT;
+ let Inst{20} = D{0};
+ let Inst{19} = D{1};
+ let Inst{18} = D{2};
+ let Inst{17} = D{3};
+ let Inst{16} = D{4};
+ let Inst{15} = D{5};
+ let Inst{14} = D{6};
+ let Inst{13} = D{7};
+ let Inst{12} = D{8};
+ let Inst{11} = D{9};
+ let Inst{11-20} = D{0-9};
+ let Inst{21-31} = xo;
+}
+
+let Predicates = [HasSPE], isAsmParserOnly = 1 in {
+
+def EVLDD : EVXForm_D<769, (outs gprc:$RT), (ins spe8dis:$dst),
+ "evldd $RT, $dst", IIC_VecFP>;
+def EVLDW : EVXForm_D<771, (outs gprc:$RT), (ins spe8dis:$dst),
+ "evldw $RT, $dst", IIC_VecFP>;
+def EVLDH : EVXForm_D<773, (outs gprc:$RT), (ins spe8dis:$dst),
+ "evldh $RT, $dst", IIC_VecFP>;
+def EVLHHESPLAT : EVXForm_D<777, (outs gprc:$RT), (ins spe2dis:$dst),
+ "evlhhesplat $RT, $dst", IIC_VecFP>;
+def EVLHHOUSPLAT : EVXForm_D<781, (outs gprc:$RT), (ins spe2dis:$dst),
+ "evlhhousplat $RT, $dst", IIC_VecFP>;
+def EVLHHOSSPLAT : EVXForm_D<783, (outs gprc:$RT), (ins spe2dis:$dst),
+ "evlhhossplat $RT, $dst", IIC_VecFP>;
+def EVLWHE : EVXForm_D<785, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwhe $RT, $dst", IIC_VecFP>;
+def EVLWHOU : EVXForm_D<789, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwhou $RT, $dst", IIC_VecFP>;
+def EVLWHOS : EVXForm_D<791, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwhos $RT, $dst", IIC_VecFP>;
+def EVLWWSPLAT : EVXForm_D<793, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwwsplat $RT, $dst", IIC_VecFP>;
+def EVLWHSPLAT : EVXForm_D<797, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwhsplat $RT, $dst", IIC_VecFP>;
+
+def EVSTDD : EVXForm_D<801, (outs), (ins gprc:$RT, spe8dis:$dst),
+ "evstdd $RT, $dst", IIC_VecFP>;
+def EVSTDH : EVXForm_D<805, (outs), (ins gprc:$RT, spe8dis:$dst),
+ "evstdh $RT, $dst", IIC_VecFP>;
+def EVSTDW : EVXForm_D<803, (outs), (ins gprc:$RT, spe8dis:$dst),
+ "evstdw $RT, $dst", IIC_VecFP>;
+def EVSTWHE : EVXForm_D<817, (outs), (ins gprc:$RT, spe4dis:$dst),
+ "evstwhe $RT, $dst", IIC_VecFP>;
+def EVSTWHO : EVXForm_D<821, (outs), (ins gprc:$RT, spe4dis:$dst),
+ "evstwho $RT, $dst", IIC_VecFP>;
+def EVSTWWE : EVXForm_D<825, (outs), (ins gprc:$RT, spe4dis:$dst),
+ "evstwwe $RT, $dst", IIC_VecFP>;
+def EVSTWWO : EVXForm_D<829, (outs), (ins gprc:$RT, spe4dis:$dst),
+ "evstwwo $RT, $dst", IIC_VecFP>;
+
+def EVMRA : EVXForm_1<1220, (outs gprc:$RT), (ins gprc:$RA),
+ "evmra $RT, $RA", IIC_VecFP> {
+ let RB = 0;
+}
+
+def BRINC : EVXForm_1<527, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "brinc $RT, $RA, $RB", IIC_VecFP>;
+def EVABS : EVXForm_2<520, (outs gprc:$RT), (ins gprc:$RA),
+ "evabs $RT, $RA", IIC_VecFP>;
+
+def EVADDIW : EVXForm_1<514, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evaddiw $RT, $RB, $RA", IIC_VecFP>;
+def EVADDSMIAAW : EVXForm_2<1225, (outs gprc:$RT), (ins gprc:$RA),
+ "evaddsmiaaw $RT, $RA", IIC_VecFP>;
+def EVADDSSIAAW : EVXForm_2<1217, (outs gprc:$RT), (ins gprc:$RA),
+ "evaddssiaaw $RT, $RA", IIC_VecFP>;
+def EVADDUSIAAW : EVXForm_2<1216, (outs gprc:$RT), (ins gprc:$RA),
+ "evaddusiaaw $RT, $RA", IIC_VecFP>;
+def EVADDUMIAAW : EVXForm_2<1224, (outs gprc:$RT), (ins gprc:$RA),
+ "evaddumiaaw $RT, $RA", IIC_VecFP>;
+def EVADDW : EVXForm_1<512, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evaddw $RT, $RA, $RB", IIC_VecFP>;
+
+def EVAND : EVXForm_1<529, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evand $RT, $RA, $RB", IIC_VecFP>;
+def EVANDC : EVXForm_1<530, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evandc $RT, $RA, $RB", IIC_VecFP>;
+
+def EVCMPEQ : EVXForm_3<564, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmpeq $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPGTS : EVXForm_3<561, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmpgts $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPGTU : EVXForm_3<560, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmpgtu $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPLTS : EVXForm_3<563, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmplts $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPLTU : EVXForm_3<562, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmpltu $crD, $RA, $RB", IIC_VecFP>;
+
+def EVCNTLSW : EVXForm_2<526, (outs gprc:$RT), (ins gprc:$RA),
+ "evcntlsw $RT, $RA", IIC_VecFP>;
+def EVCNTLZW : EVXForm_2<525, (outs gprc:$RT), (ins gprc:$RA),
+ "evcntlzw $RT, $RA", IIC_VecFP>;
+
+def EVDIVWS : EVXForm_1<1222, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evdivws $RT, $RA, $RB", IIC_VecFP>;
+def EVDIVWU : EVXForm_1<1223, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evdivwu $RT, $RA, $RB", IIC_VecFP>;
+
+def EVEQV : EVXForm_1<537, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "eveqv $RT, $RA, $RB", IIC_VecFP>;
+
+def EVEXTSB : EVXForm_2<522, (outs gprc:$RT), (ins gprc:$RA),
+ "evextsb $RT, $RA", IIC_VecFP>;
+def EVEXTSH : EVXForm_2<523, (outs gprc:$RT), (ins gprc:$RA),
+ "evextsh $RT, $RA", IIC_VecFP>;
+
+def EVLDDX : EVXForm_1<768, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlddx $RT, $RA, $RB", IIC_VecFP>;
+def EVLDWX : EVXForm_1<770, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evldwx $RT, $RA, $RB", IIC_VecFP>;
+def EVLDHX : EVXForm_1<772, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evldhx $RT, $RA, $RB", IIC_VecFP>;
+def EVLHHESPLATX : EVXForm_1<776, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlhhesplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLHHOUSPLATX : EVXForm_1<780, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlhhousplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLHHOSSPLATX : EVXForm_1<782, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlhhossplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHEX : EVXForm_1<784, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwhex $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHOUX : EVXForm_1<788, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwhoux $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHOSX : EVXForm_1<790, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwhosx $RT, $RA, $RB", IIC_VecFP>;
+def EVLWWSPLATX : EVXForm_1<792, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwwsplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHSPLATX : EVXForm_1<796, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwhsplatx $RT, $RA, $RB", IIC_VecFP>;
+
+def EVMERGEHI : EVXForm_1<556, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmergehi $RT, $RA, $RB", IIC_VecFP>;
+def EVMERGELO : EVXForm_1<557, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmergelo $RT, $RA, $RB", IIC_VecFP>;
+def EVMERGEHILO : EVXForm_1<558, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmergehilo $RT, $RA, $RB", IIC_VecFP>;
+def EVMERGELOHI : EVXForm_1<559, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmergelohi $RT, $RA, $RB", IIC_VecFP>;
+
+def EVMHEGSMFAA : EVXForm_1<1323, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegsmfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGSMFAN : EVXForm_1<1451, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegsmfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGSMIAA : EVXForm_1<1321, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegsmiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGSMIAN : EVXForm_1<1449, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegsmian $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGUMIAA : EVXForm_1<1320, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegumiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGUMIAN : EVXForm_1<1448, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegumian $RT, $RA, $RB", IIC_VecFP>;
+
+def EVMHESMF : EVXForm_1<1035, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMFA : EVXForm_1<1067, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMFAAW : EVXForm_1<1291, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMFANW : EVXForm_1<1419, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMI : EVXForm_1<1033, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMIA : EVXForm_1<1065, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMIAAW : EVXForm_1<1289, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMIANW : EVXForm_1<1417, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSF : EVXForm_1<1027, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSFA : EVXForm_1<1059, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSFAAW : EVXForm_1<1283, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSFANW : EVXForm_1<1411, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSIAAW : EVXForm_1<1281, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSIANW : EVXForm_1<1409, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMI : EVXForm_1<1032, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMIA : EVXForm_1<1064, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMIAAW : EVXForm_1<1288, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheumiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMIANW : EVXForm_1<1416, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheumianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUSIAAW : EVXForm_1<1280, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheusiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUSIANW : EVXForm_1<1408, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheusianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMFAA : EVXForm_1<1327, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogsmfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMFAN : EVXForm_1<1455, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogsmfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMIAA : EVXForm_1<1325, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogsmiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMIAN : EVXForm_1<1453, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogsmian $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGUMIAA : EVXForm_1<1324, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogumiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGUMIAN : EVXForm_1<1452, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogumian $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMF : EVXForm_1<1039, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMFA : EVXForm_1<1071, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMFAAW : EVXForm_1<1295, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMFANW : EVXForm_1<1423, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMI : EVXForm_1<1037, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMIA : EVXForm_1<1069, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMIAAW : EVXForm_1<1293, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMIANW : EVXForm_1<1421, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSF : EVXForm_1<1031, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSFA : EVXForm_1<1063, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSFAAW : EVXForm_1<1287, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSFANW : EVXForm_1<1415, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSIAAW : EVXForm_1<1285, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSIANW : EVXForm_1<1413, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMI : EVXForm_1<1036, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhoumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMIA : EVXForm_1<1068, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhoumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMIAAW : EVXForm_1<1292, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhoumiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMIANW : EVXForm_1<1420, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhoumianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUSIAAW : EVXForm_1<1284, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhousiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUSIANW : EVXForm_1<1412, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhousianw $RT, $RA, $RB", IIC_VecFP>;
+
+
+def EVMWHSMF : EVXForm_1<1103, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhsmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSMFA : EVXForm_1<1135, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhsmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSMI : EVXForm_1<1101, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhsmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSMIA : EVXForm_1<1133, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhsmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSSF : EVXForm_1<1095, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhssf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSSFA : EVXForm_1<1127, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhssfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHUMI : EVXForm_1<1100, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHUMIA : EVXForm_1<1132, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSMIAAW : EVXForm_1<1353, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlsmiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSMIANW : EVXForm_1<1481, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlsmianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSSIAAW : EVXForm_1<1345, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlssiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSSIANW : EVXForm_1<1473, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlssianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMI : EVXForm_1<1096, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMIA : EVXForm_1<1128, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMIAAW : EVXForm_1<1352, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlumiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMIANW : EVXForm_1<1480, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlumianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUSIAAW : EVXForm_1<1344, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlusiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUSIANW : EVXForm_1<1472, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlusianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMF : EVXForm_1<1115, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMFA : EVXForm_1<1147, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMFAA : EVXForm_1<1371, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMFAN : EVXForm_1<1499, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMI : EVXForm_1<1113, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMIA : EVXForm_1<1145, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMIAA : EVXForm_1<1369, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMIAN : EVXForm_1<1497, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmian $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSF : EVXForm_1<1107, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwssf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSFA : EVXForm_1<1139, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwssfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSFAA : EVXForm_1<1363, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwssfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSFAN : EVXForm_1<1491, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwssfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMI : EVXForm_1<1112, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMIA : EVXForm_1<1144, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMIAA : EVXForm_1<1368, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwumiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMIAN : EVXForm_1<1496, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwumian $RT, $RA, $RB", IIC_VecFP>;
+
+
+def EVNAND : EVXForm_1<542, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evnand $RT, $RA, $RB", IIC_VecFP>;
+
+def EVNEG : EVXForm_2<521, (outs gprc:$RT), (ins gprc:$RA),
+ "evneg $RT, $RA", IIC_VecFP>;
+
+def EVNOR : EVXForm_1<536, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evnor $RT, $RA, $RB", IIC_VecFP>;
+def EVOR : EVXForm_1<535, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evor $RT, $RA, $RB", IIC_VecFP>;
+def EVORC : EVXForm_1<539, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evorc $RT, $RA, $RB", IIC_VecFP>;
+
+def EVRLWI : EVXForm_1<554, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evrlwi $RT, $RA, $RB", IIC_VecFP>;
+def EVRLW : EVXForm_1<552, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evrlw $RT, $RA, $RB", IIC_VecFP>;
+
+def EVRNDW : EVXForm_2<524, (outs gprc:$RT), (ins gprc:$RA),
+ "evrndw $RT, $RA", IIC_VecFP>;
+
+def EVSLWI : EVXForm_1<550, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evslwi $RT, $RA, $RB", IIC_VecFP>;
+def EVSLW : EVXForm_1<548, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evslw $RT, $RA, $RB", IIC_VecFP>;
+
+def EVSPLATFI : EVXForm_2<555, (outs gprc:$RT), (ins i32imm:$RA),
+ "evsplatfi $RT, $RA", IIC_VecFP>;
+def EVSPLATI : EVXForm_2<553, (outs gprc:$RT), (ins i32imm:$RA),
+ "evsplati $RT, $RA", IIC_VecFP>;
+
+def EVSRWIS : EVXForm_1<547, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evsrwis $RT, $RA, $RB", IIC_VecFP>;
+def EVSRWIU : EVXForm_1<546, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evsrwiu $RT, $RA, $RB", IIC_VecFP>;
+def EVSRWS : EVXForm_1<545, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evsrws $RT, $RA, $RB", IIC_VecFP>;
+def EVSRWU : EVXForm_1<544, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evsrwu $RT, $RA, $RB", IIC_VecFP>;
+
+def EVSTDDX : EVXForm_1<800, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstddx $RT, $RA, $RB", IIC_VecFP>;
+def EVSTDHX : EVXForm_1<804, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstdhx $RT, $RA, $RB", IIC_VecFP>;
+def EVSTDWX : EVXForm_1<802, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstdwx $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWHEX : EVXForm_1<816, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstwhex $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWHOX : EVXForm_1<820, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstwhox $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWWEX : EVXForm_1<824, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstwwex $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWWOX : EVXForm_1<828, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstwwox $RT, $RA, $RB", IIC_VecFP>;
+
+def EVSUBFSSIAAW : EVXForm_2<1219, (outs gprc:$RT), (ins gprc:$RA),
+ "evsubfssiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFSMIAAW : EVXForm_2<1227, (outs gprc:$RT), (ins gprc:$RA),
+ "evsubfsmiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFUMIAAW : EVXForm_2<1226, (outs gprc:$RT), (ins gprc:$RA),
+ "evsubfumiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFUSIAAW : EVXForm_2<1218, (outs gprc:$RT), (ins gprc:$RA),
+ "evsubfusiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFW : EVXForm_1<516, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evsubfw $RT, $RA, $RB", IIC_VecFP>;
+def EVSUBIFW : EVXForm_1<518, (outs gprc:$RT), (ins u5imm:$RA, gprc:$RB),
+ "evsubifw $RT, $RA, $RB", IIC_VecFP>;
+def EVXOR : EVXForm_1<534, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evxor $RT, $RA, $RB", IIC_VecFP>;
+
+} // HasSPE
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 49bcc4876d33..b21b251443eb 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -25,6 +25,23 @@ def vsfrc : RegisterOperand<VSFRC> {
let ParserMatchClass = PPCRegVSFRCAsmOperand;
}
+// Little-endian-specific nodes.
+def SDT_PPClxvd2x : SDTypeProfile<1, 1, [
+ SDTCisVT<0, v2f64>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCstxvd2x : SDTypeProfile<0, 2, [
+ SDTCisVT<0, v2f64>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCxxswapd : SDTypeProfile<1, 1, [
+ SDTCisSameAs<0, 1>
+]>;
+
+def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
+ [SDNPHasChain, SDNPMayStore]>;
+def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
+
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
@@ -40,30 +57,34 @@ multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
}
def HasVSX : Predicate<"PPCSubTarget->hasVSX()">;
+def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">;
+def IsBigEndian : Predicate<"!PPCSubTarget->isLittleEndian()">;
+
let Predicates = [HasVSX] in {
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
-let neverHasSideEffects = 1 in { // VSX instructions don't have side effects.
+let hasSideEffects = 0 in { // VSX instructions don't have side effects.
let Uses = [RM] in {
// Load indexed instructions
let mayLoad = 1, canFoldAsLoad = 1 in {
- def LXSDX : XForm_1<31, 588,
+ def LXSDX : XX1Form<31, 588,
(outs vsfrc:$XT), (ins memrr:$src),
"lxsdx $XT, $src", IIC_LdStLFD,
[(set f64:$XT, (load xoaddr:$src))]>;
- def LXVD2X : XForm_1<31, 844,
+ def LXVD2X : XX1Form<31, 844,
(outs vsrc:$XT), (ins memrr:$src),
"lxvd2x $XT, $src", IIC_LdStLFD,
- [(set v2f64:$XT, (load xoaddr:$src))]>;
+ [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>;
- def LXVDSX : XForm_1<31, 332,
+ def LXVDSX : XX1Form<31, 332,
(outs vsrc:$XT), (ins memrr:$src),
"lxvdsx $XT, $src", IIC_LdStLFD, []>;
- def LXVW4X : XForm_1<31, 780,
+ def LXVW4X : XX1Form<31, 780,
(outs vsrc:$XT), (ins memrr:$src),
- "lxvw4x $XT, $src", IIC_LdStLFD, []>;
+ "lxvw4x $XT, $src", IIC_LdStLFD,
+ [(set v4i32:$XT, (int_ppc_vsx_lxvw4x xoaddr:$src))]>;
}
// Store indexed instructions
@@ -76,11 +97,12 @@ let Uses = [RM] in {
def STXVD2X : XX1Form<31, 972,
(outs), (ins vsrc:$XT, memrr:$dst),
"stxvd2x $XT, $dst", IIC_LdStSTFD,
- [(store v2f64:$XT, xoaddr:$dst)]>;
+ [(int_ppc_vsx_stxvd2x v2f64:$XT, xoaddr:$dst)]>;
def STXVW4X : XX1Form<31, 908,
(outs), (ins vsrc:$XT, memrr:$dst),
- "stxvw4x $XT, $dst", IIC_LdStSTFD, []>;
+ "stxvw4x $XT, $dst", IIC_LdStSTFD,
+ [(int_ppc_vsx_stxvw4x v4i32:$XT, xoaddr:$dst)]>;
}
// Add/Mul Instructions
@@ -641,24 +663,36 @@ let Uses = [RM] in {
let isCommutable = 1 in {
def XSMAXDP : XX3Form<60, 160,
(outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
- "xsmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+ "xsmaxdp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsfrc:$XT,
+ (int_ppc_vsx_xsmaxdp vsfrc:$XA, vsfrc:$XB))]>;
def XSMINDP : XX3Form<60, 168,
(outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
- "xsmindp $XT, $XA, $XB", IIC_VecFP, []>;
+ "xsmindp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsfrc:$XT,
+ (int_ppc_vsx_xsmindp vsfrc:$XA, vsfrc:$XB))]>;
def XVMAXDP : XX3Form<60, 224,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+ "xvmaxdp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvmaxdp vsrc:$XA, vsrc:$XB))]>;
def XVMINDP : XX3Form<60, 232,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvmindp $XT, $XA, $XB", IIC_VecFP, []>;
+ "xvmindp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvmindp vsrc:$XA, vsrc:$XB))]>;
def XVMAXSP : XX3Form<60, 192,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvmaxsp $XT, $XA, $XB", IIC_VecFP, []>;
+ "xvmaxsp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvmaxsp vsrc:$XA, vsrc:$XB))]>;
def XVMINSP : XX3Form<60, 200,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvminsp $XT, $XA, $XB", IIC_VecFP, []>;
+ "xvminsp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>;
} // isCommutable
} // Uses = [RM]
@@ -714,7 +748,32 @@ let Uses = [RM] in {
def XXSPLTW : XX2Form_2<60, 164,
(outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
"xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
-} // neverHasSideEffects
+} // hasSideEffects
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1, // Expanded after instruction selection.
+ PPC970_Single = 1 in {
+
+ def SELECT_CC_VSRC: Pseudo<(outs vsrc:$dst),
+ (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC),
+ "#SELECT_CC_VSRC",
+ []>;
+ def SELECT_VSRC: Pseudo<(outs vsrc:$dst),
+ (ins crbitrc:$cond, vsrc:$T, vsrc:$F),
+ "#SELECT_VSRC",
+ [(set v2f64:$dst,
+ (select i1:$cond, v2f64:$T, v2f64:$F))]>;
+ def SELECT_CC_VSFRC: Pseudo<(outs f8rc:$dst),
+ (ins crrc:$cond, f8rc:$T, f8rc:$F,
+ i32imm:$BROPC), "#SELECT_CC_VSFRC",
+ []>;
+ def SELECT_VSFRC: Pseudo<(outs f8rc:$dst),
+ (ins crbitrc:$cond, f8rc:$T, f8rc:$F),
+ "#SELECT_VSFRC",
+ [(set f64:$dst,
+ (select i1:$cond, f64:$T, f64:$F))]>;
+} // usesCustomInserter
} // AddedComplexity
def : InstAlias<"xvmovdp $XT, $XB",
@@ -734,6 +793,8 @@ def : InstAlias<"xxswapd $XT, $XB",
(XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+
+let Predicates = [IsBigEndian] in {
def : Pat<(v2f64 (scalar_to_vector f64:$A)),
(v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
@@ -741,6 +802,18 @@ def : Pat<(f64 (vector_extract v2f64:$S, 0)),
(f64 (EXTRACT_SUBREG $S, sub_64))>;
def : Pat<(f64 (vector_extract v2f64:$S, 1)),
(f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
+}
+
+let Predicates = [IsLittleEndian] in {
+def : Pat<(v2f64 (scalar_to_vector f64:$A)),
+ (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64),
+ (SUBREG_TO_REG (i64 1), $A, sub_64), 0))>;
+
+def : Pat<(f64 (vector_extract v2f64:$S, 0)),
+ (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
+def : Pat<(f64 (vector_extract v2f64:$S, 1)),
+ (f64 (EXTRACT_SUBREG $S, sub_64))>;
+}
// Additional fnmsub patterns: -a*c + b == -(a*c - b)
def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
@@ -811,6 +884,57 @@ def : Pat<(sext_inreg v2i64:$C, v2i32),
def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))),
(XVCVSXWDP (XXSLDWI $C, $C, 1))>;
+// Loads.
+def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+
+// Stores.
+def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+
+// Permutes.
+def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;
+
+// Selects.
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
+ (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)),
+ (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)),
+ (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)),
+ (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)),
+ (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)),
+ (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
+ (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
+ (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
+ (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
+ (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
+ (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
+ (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+// Divides.
+def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B),
+ (XVDIVSP $A, $B)>;
+def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
+ (XVDIVDP $A, $B)>;
+
} // AddedComplexity
} // HasVSX
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
deleted file mode 100644
index e5f113a0c030..000000000000
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ /dev/null
@@ -1,482 +0,0 @@
-//===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the JIT interfaces for the 32-bit PowerPC target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PPCJITInfo.h"
-#include "PPCRelocations.h"
-#include "PPCSubtarget.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-static TargetJITInfo::JITCompilerFn JITCompilerFunction;
-
-PPCJITInfo::PPCJITInfo(PPCSubtarget &STI)
- : Subtarget(STI), is64Bit(STI.isPPC64()) {
- useGOT = 0;
-}
-
-#define BUILD_ADDIS(RD,RS,IMM16) \
- ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
-#define BUILD_ORI(RD,RS,UIMM16) \
- ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
-#define BUILD_ORIS(RD,RS,UIMM16) \
- ((25 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
-#define BUILD_RLDICR(RD,RS,SH,ME) \
- ((30 << 26) | ((RS) << 21) | ((RD) << 16) | (((SH) & 31) << 11) | \
- (((ME) & 63) << 6) | (1 << 2) | ((((SH) >> 5) & 1) << 1))
-#define BUILD_MTSPR(RS,SPR) \
- ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1))
-#define BUILD_BCCTRx(BO,BI,LINK) \
- ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1))
-#define BUILD_B(TARGET, LINK) \
- ((18 << 26) | (((TARGET) & 0x00FFFFFF) << 2) | ((LINK) & 1))
-
-// Pseudo-ops
-#define BUILD_LIS(RD,IMM16) BUILD_ADDIS(RD,0,IMM16)
-#define BUILD_SLDI(RD,RS,IMM6) BUILD_RLDICR(RD,RS,IMM6,63-IMM6)
-#define BUILD_MTCTR(RS) BUILD_MTSPR(RS,9)
-#define BUILD_BCTR(LINK) BUILD_BCCTRx(20,0,LINK)
-
-static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){
- intptr_t Offset = ((intptr_t)To - (intptr_t)At) >> 2;
- unsigned *AtI = (unsigned*)(intptr_t)At;
-
- if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
- AtI[0] = BUILD_B(Offset, isCall); // b/bl target
- } else if (!is64Bit) {
- AtI[0] = BUILD_LIS(12, To >> 16); // lis r12, hi16(address)
- AtI[1] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
- AtI[2] = BUILD_MTCTR(12); // mtctr r12
- AtI[3] = BUILD_BCTR(isCall); // bctr/bctrl
- } else {
- AtI[0] = BUILD_LIS(12, To >> 48); // lis r12, hi16(address)
- AtI[1] = BUILD_ORI(12, 12, To >> 32); // ori r12, r12, lo16(address)
- AtI[2] = BUILD_SLDI(12, 12, 32); // sldi r12, r12, 32
- AtI[3] = BUILD_ORIS(12, 12, To >> 16); // oris r12, r12, hi16(address)
- AtI[4] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
- AtI[5] = BUILD_MTCTR(12); // mtctr r12
- AtI[6] = BUILD_BCTR(isCall); // bctr/bctrl
- }
-}
-
-extern "C" void PPC32CompilationCallback();
-extern "C" void PPC64CompilationCallback();
-
-// The first clause of the preprocessor directive looks wrong, but it is
-// necessary when compiling this code on non-PowerPC hosts.
-#if (!defined(__ppc__) && !defined(__powerpc__)) || defined(__powerpc64__) || defined(__ppc64__)
-void PPC32CompilationCallback() {
- llvm_unreachable("This is not a 32bit PowerPC, you can't execute this!");
-}
-#elif !defined(__ELF__)
-// CompilationCallback stub - We can't use a C function with inline assembly in
-// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
-// write our own wrapper, which does things our way, so we have complete control
-// over register saving and restoring.
-asm(
- ".text\n"
- ".align 2\n"
- ".globl _PPC32CompilationCallback\n"
-"_PPC32CompilationCallback:\n"
- // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
- // FIXME: need to save v[0-19] for altivec?
- // FIXME: could shrink frame
- // Set up a proper stack frame
- // FIXME Layout
- // PowerPC32 ABI linkage - 24 bytes
- // parameters - 32 bytes
- // 13 double registers - 104 bytes
- // 8 int registers - 32 bytes
- "mflr r0\n"
- "stw r0, 8(r1)\n"
- "stwu r1, -208(r1)\n"
- // Save all int arg registers
- "stw r10, 204(r1)\n" "stw r9, 200(r1)\n"
- "stw r8, 196(r1)\n" "stw r7, 192(r1)\n"
- "stw r6, 188(r1)\n" "stw r5, 184(r1)\n"
- "stw r4, 180(r1)\n" "stw r3, 176(r1)\n"
- // Save all call-clobbered FP regs.
- "stfd f13, 168(r1)\n" "stfd f12, 160(r1)\n"
- "stfd f11, 152(r1)\n" "stfd f10, 144(r1)\n"
- "stfd f9, 136(r1)\n" "stfd f8, 128(r1)\n"
- "stfd f7, 120(r1)\n" "stfd f6, 112(r1)\n"
- "stfd f5, 104(r1)\n" "stfd f4, 96(r1)\n"
- "stfd f3, 88(r1)\n" "stfd f2, 80(r1)\n"
- "stfd f1, 72(r1)\n"
- // Arguments to Compilation Callback:
- // r3 - our lr (address of the call instruction in stub plus 4)
- // r4 - stub's lr (address of instruction that called the stub plus 4)
- // r5 - is64Bit - always 0.
- "mr r3, r0\n"
- "lwz r2, 208(r1)\n" // stub's frame
- "lwz r4, 8(r2)\n" // stub's lr
- "li r5, 0\n" // 0 == 32 bit
- "bl _LLVMPPCCompilationCallback\n"
- "mtctr r3\n"
- // Restore all int arg registers
- "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n"
- "lwz r8, 196(r1)\n" "lwz r7, 192(r1)\n"
- "lwz r6, 188(r1)\n" "lwz r5, 184(r1)\n"
- "lwz r4, 180(r1)\n" "lwz r3, 176(r1)\n"
- // Restore all FP arg registers
- "lfd f13, 168(r1)\n" "lfd f12, 160(r1)\n"
- "lfd f11, 152(r1)\n" "lfd f10, 144(r1)\n"
- "lfd f9, 136(r1)\n" "lfd f8, 128(r1)\n"
- "lfd f7, 120(r1)\n" "lfd f6, 112(r1)\n"
- "lfd f5, 104(r1)\n" "lfd f4, 96(r1)\n"
- "lfd f3, 88(r1)\n" "lfd f2, 80(r1)\n"
- "lfd f1, 72(r1)\n"
- // Pop 3 frames off the stack and branch to target
- "lwz r1, 208(r1)\n"
- "lwz r2, 8(r1)\n"
- "mtlr r2\n"
- "bctr\n"
- );
-
-#else
-// ELF PPC 32 support
-
-// CompilationCallback stub - We can't use a C function with inline assembly in
-// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
-// write our own wrapper, which does things our way, so we have complete control
-// over register saving and restoring.
-asm(
- ".text\n"
- ".align 2\n"
- ".globl PPC32CompilationCallback\n"
-"PPC32CompilationCallback:\n"
- // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the
- // FIXME: need to save v[0-19] for altivec?
- // FIXME: could shrink frame
- // Set up a proper stack frame
- // FIXME Layout
- // 8 double registers - 64 bytes
- // 8 int registers - 32 bytes
- "mflr 0\n"
- "stw 0, 4(1)\n"
- "stwu 1, -104(1)\n"
- // Save all int arg registers
- "stw 10, 100(1)\n" "stw 9, 96(1)\n"
- "stw 8, 92(1)\n" "stw 7, 88(1)\n"
- "stw 6, 84(1)\n" "stw 5, 80(1)\n"
- "stw 4, 76(1)\n" "stw 3, 72(1)\n"
- // Save all call-clobbered FP regs.
- "stfd 8, 64(1)\n"
- "stfd 7, 56(1)\n" "stfd 6, 48(1)\n"
- "stfd 5, 40(1)\n" "stfd 4, 32(1)\n"
- "stfd 3, 24(1)\n" "stfd 2, 16(1)\n"
- "stfd 1, 8(1)\n"
- // Arguments to Compilation Callback:
- // r3 - our lr (address of the call instruction in stub plus 4)
- // r4 - stub's lr (address of instruction that called the stub plus 4)
- // r5 - is64Bit - always 0.
- "mr 3, 0\n"
- "lwz 5, 104(1)\n" // stub's frame
- "lwz 4, 4(5)\n" // stub's lr
- "li 5, 0\n" // 0 == 32 bit
- "bl LLVMPPCCompilationCallback\n"
- "mtctr 3\n"
- // Restore all int arg registers
- "lwz 10, 100(1)\n" "lwz 9, 96(1)\n"
- "lwz 8, 92(1)\n" "lwz 7, 88(1)\n"
- "lwz 6, 84(1)\n" "lwz 5, 80(1)\n"
- "lwz 4, 76(1)\n" "lwz 3, 72(1)\n"
- // Restore all FP arg registers
- "lfd 8, 64(1)\n"
- "lfd 7, 56(1)\n" "lfd 6, 48(1)\n"
- "lfd 5, 40(1)\n" "lfd 4, 32(1)\n"
- "lfd 3, 24(1)\n" "lfd 2, 16(1)\n"
- "lfd 1, 8(1)\n"
- // Pop 3 frames off the stack and branch to target
- "lwz 1, 104(1)\n"
- "lwz 0, 4(1)\n"
- "mtlr 0\n"
- "bctr\n"
- );
-#endif
-
-#if !defined(__powerpc64__) && !defined(__ppc64__)
-void PPC64CompilationCallback() {
- llvm_unreachable("This is not a 64bit PowerPC, you can't execute this!");
-}
-#else
-# ifdef __ELF__
-asm(
- ".text\n"
- ".align 2\n"
- ".globl PPC64CompilationCallback\n"
-#if _CALL_ELF == 2
- ".type PPC64CompilationCallback,@function\n"
-"PPC64CompilationCallback:\n"
-#else
- ".section \".opd\",\"aw\",@progbits\n"
- ".align 3\n"
-"PPC64CompilationCallback:\n"
- ".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n"
- ".size PPC64CompilationCallback,24\n"
- ".previous\n"
- ".align 4\n"
- ".type PPC64CompilationCallback,@function\n"
-".L.PPC64CompilationCallback:\n"
-#endif
-# else
-asm(
- ".text\n"
- ".align 2\n"
- ".globl _PPC64CompilationCallback\n"
-"_PPC64CompilationCallback:\n"
-# endif
- // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
- // FIXME: need to save v[0-19] for altivec?
- // Set up a proper stack frame
- // Layout
- // PowerPC64 ABI linkage - 48 bytes
- // parameters - 64 bytes
- // 13 double registers - 104 bytes
- // 8 int registers - 64 bytes
- "mflr 0\n"
- "std 0, 16(1)\n"
- "stdu 1, -280(1)\n"
- // Save all int arg registers
- "std 10, 272(1)\n" "std 9, 264(1)\n"
- "std 8, 256(1)\n" "std 7, 248(1)\n"
- "std 6, 240(1)\n" "std 5, 232(1)\n"
- "std 4, 224(1)\n" "std 3, 216(1)\n"
- // Save all call-clobbered FP regs.
- "stfd 13, 208(1)\n" "stfd 12, 200(1)\n"
- "stfd 11, 192(1)\n" "stfd 10, 184(1)\n"
- "stfd 9, 176(1)\n" "stfd 8, 168(1)\n"
- "stfd 7, 160(1)\n" "stfd 6, 152(1)\n"
- "stfd 5, 144(1)\n" "stfd 4, 136(1)\n"
- "stfd 3, 128(1)\n" "stfd 2, 120(1)\n"
- "stfd 1, 112(1)\n"
- // Arguments to Compilation Callback:
- // r3 - our lr (address of the call instruction in stub plus 4)
- // r4 - stub's lr (address of instruction that called the stub plus 4)
- // r5 - is64Bit - always 1.
- "mr 3, 0\n" // return address (still in r0)
- "ld 5, 280(1)\n" // stub's frame
- "ld 4, 16(5)\n" // stub's lr
- "li 5, 1\n" // 1 == 64 bit
-# ifdef __ELF__
- "bl LLVMPPCCompilationCallback\n"
- "nop\n"
-# else
- "bl _LLVMPPCCompilationCallback\n"
-# endif
- "mtctr 3\n"
- // Restore all int arg registers
- "ld 10, 272(1)\n" "ld 9, 264(1)\n"
- "ld 8, 256(1)\n" "ld 7, 248(1)\n"
- "ld 6, 240(1)\n" "ld 5, 232(1)\n"
- "ld 4, 224(1)\n" "ld 3, 216(1)\n"
- // Restore all FP arg registers
- "lfd 13, 208(1)\n" "lfd 12, 200(1)\n"
- "lfd 11, 192(1)\n" "lfd 10, 184(1)\n"
- "lfd 9, 176(1)\n" "lfd 8, 168(1)\n"
- "lfd 7, 160(1)\n" "lfd 6, 152(1)\n"
- "lfd 5, 144(1)\n" "lfd 4, 136(1)\n"
- "lfd 3, 128(1)\n" "lfd 2, 120(1)\n"
- "lfd 1, 112(1)\n"
- // Pop 3 frames off the stack and branch to target
- "ld 1, 280(1)\n"
- "ld 0, 16(1)\n"
- "mtlr 0\n"
- // XXX: any special TOC handling in the ELF case for JIT?
- "bctr\n"
- );
-#endif
-
-extern "C" {
-LLVM_LIBRARY_VISIBILITY void *
-LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4,
- unsigned *OrigCallAddrPlus4,
- bool is64Bit) {
- // Adjust the pointer to the address of the call instruction in the stub
- // emitted by emitFunctionStub, rather than the instruction after it.
- unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
- unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1;
-
- void *Target = JITCompilerFunction(StubCallAddr);
-
- // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite
- // it to branch directly to the destination. If so, rewrite it so it does not
- // need to go through the stub anymore.
- unsigned OrigCallInst = *OrigCallAddr;
- if ((OrigCallInst >> 26) == 18) { // Direct call.
- intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2;
-
- if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
- // Clear the original target out.
- OrigCallInst &= (63 << 26) | 3;
- // Fill in the new target.
- OrigCallInst |= (Offset & ((1 << 24)-1)) << 2;
- // Replace the call.
- *OrigCallAddr = OrigCallInst;
- }
- }
-
- // Assert that we are coming from a stub that was created with our
- // emitFunctionStub.
- if ((*StubCallAddr >> 26) == 18)
- StubCallAddr -= 3;
- else {
- assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!");
- StubCallAddr -= is64Bit ? 9 : 6;
- }
-
- // Rewrite the stub with an unconditional branch to the target, for any users
- // who took the address of the stub.
- EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit);
- sys::Memory::InvalidateInstructionCache(StubCallAddr, 7*4);
-
- // Put the address of the target function to call and the address to return to
- // after calling the target function in a place that is easy to get on the
- // stack after we restore all regs.
- return Target;
-}
-}
-
-
-
-TargetJITInfo::LazyResolverFn
-PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) {
- JITCompilerFunction = Fn;
- return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback;
-}
-
-TargetJITInfo::StubLayout PPCJITInfo::getStubLayout() {
- // The stub contains up to 10 4-byte instructions, aligned at 4 bytes: 3
- // instructions to save the caller's address if this is a lazy-compilation
- // stub, plus a 1-, 4-, or 7-instruction sequence to load an arbitrary address
- // into a register and jump through it.
- StubLayout Result = {10*4, 4};
- return Result;
-}
-
-#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
-defined(__APPLE__)
-extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
-#endif
-
-void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
- JITCodeEmitter &JCE) {
- // If this is just a call to an external function, emit a branch instead of a
- // call. The code is the same except for one bit of the last instruction.
- if (Fn != (void*)(intptr_t)PPC32CompilationCallback &&
- Fn != (void*)(intptr_t)PPC64CompilationCallback) {
- void *Addr = (void*)JCE.getCurrentPCValue();
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- EmitBranchToAt((intptr_t)Addr, (intptr_t)Fn, false, is64Bit);
- sys::Memory::InvalidateInstructionCache(Addr, 7*4);
- return Addr;
- }
-
- void *Addr = (void*)JCE.getCurrentPCValue();
- if (is64Bit) {
- JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1)
- JCE.emitWordBE(0x7d6802a6); // mflr r11
- JCE.emitWordBE(0xf9610060); // std r11, 96(r1)
- } else if (Subtarget.isDarwinABI()){
- JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
- JCE.emitWordBE(0x7d6802a6); // mflr r11
- JCE.emitWordBE(0x91610028); // stw r11, 40(r1)
- } else {
- JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
- JCE.emitWordBE(0x7d6802a6); // mflr r11
- JCE.emitWordBE(0x91610024); // stw r11, 36(r1)
- }
- intptr_t BranchAddr = (intptr_t)JCE.getCurrentPCValue();
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit);
- sys::Memory::InvalidateInstructionCache(Addr, 10*4);
- return Addr;
-}
-
-
-void PPCJITInfo::relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase) {
- for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
- unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
- intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
- switch ((PPC::RelocationType)MR->getRelocationType()) {
- default: llvm_unreachable("Unknown relocation type!");
- case PPC::reloc_pcrel_bx:
- // PC-relative relocation for b and bl instructions.
- ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
- assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) &&
- "Relocation out of range!");
- *RelocPos |= (ResultPtr & ((1 << 24)-1)) << 2;
- break;
- case PPC::reloc_pcrel_bcx:
- // PC-relative relocation for BLT,BLE,BEQ,BGE,BGT,BNE, or other
- // bcx instructions.
- ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
- assert(ResultPtr >= -(1 << 13) && ResultPtr < (1 << 13) &&
- "Relocation out of range!");
- *RelocPos |= (ResultPtr & ((1 << 14)-1)) << 2;
- break;
- case PPC::reloc_absolute_high: // high bits of ref -> low 16 of instr
- case PPC::reloc_absolute_low: { // low bits of ref -> low 16 of instr
- ResultPtr += MR->getConstantVal();
-
- // If this is a high-part access, get the high-part.
- if (MR->getRelocationType() == PPC::reloc_absolute_high) {
- // If the low part will have a carry (really a borrow) from the low
- // 16-bits into the high 16, add a bit to borrow from.
- if (((int)ResultPtr << 16) < 0)
- ResultPtr += 1 << 16;
- ResultPtr >>= 16;
- }
-
- // Do the addition then mask, so the addition does not overflow the 16-bit
- // immediate section of the instruction.
- unsigned LowBits = (*RelocPos + ResultPtr) & 65535;
- unsigned HighBits = *RelocPos & ~65535;
- *RelocPos = LowBits | HighBits; // Slam into low 16-bits
- break;
- }
- case PPC::reloc_absolute_low_ix: { // low bits of ref -> low 14 of instr
- ResultPtr += MR->getConstantVal();
- // Do the addition then mask, so the addition does not overflow the 16-bit
- // immediate section of the instruction.
- unsigned LowBits = (*RelocPos + ResultPtr) & 0xFFFC;
- unsigned HighBits = *RelocPos & 0xFFFF0003;
- *RelocPos = LowBits | HighBits; // Slam into low 14-bits.
- break;
- }
- }
- }
-}
-
-void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
- EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit);
- sys::Memory::InvalidateInstructionCache(Old, 7*4);
-}
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
deleted file mode 100644
index b6b37ffb852b..000000000000
--- a/lib/Target/PowerPC/PPCJITInfo.h
+++ /dev/null
@@ -1,46 +0,0 @@
-//===-- PPCJITInfo.h - PowerPC impl. of the JIT interface -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PowerPC implementation of the TargetJITInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef POWERPC_JITINFO_H
-#define POWERPC_JITINFO_H
-
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/Target/TargetJITInfo.h"
-
-namespace llvm {
-class PPCSubtarget;
-class PPCJITInfo : public TargetJITInfo {
-protected:
- PPCSubtarget &Subtarget;
- bool is64Bit;
-
-public:
- PPCJITInfo(PPCSubtarget &STI);
-
- StubLayout getStubLayout() override;
- void *emitFunctionStub(const Function *F, void *Fn,
- JITCodeEmitter &JCE) override;
- LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
- void relocate(void *Function, MachineRelocation *MR, unsigned NumRelocs,
- unsigned char *GOTBase) override;
-
- /// replaceMachineCodeForFunction - Make it so that calling the function
- /// whose machine code is at OLD turns into a call to NEW, perhaps by
- /// overwriting OLD with a branch to NEW. This is used for self-modifying
- /// code.
- ///
- void replaceMachineCodeForFunction(void *Old, void *New) override;
-};
-}
-
-#endif
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 681eda8b9ea4..c417199548ad 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#include "PPC.h"
-#include "PPCSubtarget.h"
#include "MCTargetDesc/PPCMCExpr.h"
+#include "PPCSubtarget.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -38,9 +38,9 @@ static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
const TargetMachine &TM = AP.TM;
Mangler *Mang = AP.Mang;
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
MCContext &Ctx = AP.OutContext;
- bool isDarwin = TM.getSubtarget<PPCSubtarget>().isDarwin();
+ bool isDarwin = Triple(TM.getTargetTriple()).isOSDarwin();
SmallString<128> Name;
StringRef Suffix;
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 9da1b1b5c754..4aff95a8a657 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -11,13 +11,14 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
void PPCFunctionInfo::anchor() { }
MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
- const DataLayout *DL = MF.getTarget().getDataLayout();
+ const DataLayout *DL = MF.getSubtarget().getDataLayout();
return MF.getContext().GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
Twine(MF.getFunctionNumber())+"$poff");
}
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 9a2cec744274..37b2ff83c45f 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPC_MACHINE_FUNCTION_INFO_H
-#define PPC_MACHINE_FUNCTION_INFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H
#include "llvm/CodeGen/MachineFunction.h"
@@ -35,6 +35,9 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// Frame index where the old base pointer is stored.
int BasePointerSaveIndex;
+ /// Frame index where the old PIC base pointer is stored.
+ int PICBasePointerSaveIndex;
+
/// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current
/// function. This is only valid after the initial scan of the function by
/// PEI.
@@ -103,6 +106,7 @@ public:
: FramePointerSaveIndex(0),
ReturnAddrSaveIndex(0),
BasePointerSaveIndex(0),
+ PICBasePointerSaveIndex(0),
HasSpills(false),
HasNonRISpills(false),
SpillsCR(false),
@@ -128,6 +132,9 @@ public:
int getBasePointerSaveIndex() const { return BasePointerSaveIndex; }
void setBasePointerSaveIndex(int Idx) { BasePointerSaveIndex = Idx; }
+ int getPICBasePointerSaveIndex() const { return PICBasePointerSaveIndex; }
+ void setPICBasePointerSaveIndex(int Idx) { PICBasePointerSaveIndex = Idx; }
+
unsigned getMinReservedArea() const { return MinReservedArea; }
void setMinReservedArea(unsigned size) { MinReservedArea = size; }
diff --git a/lib/Target/PowerPC/PPCPerfectShuffle.h b/lib/Target/PowerPC/PPCPerfectShuffle.h
index 17b836d1ed97..8a1d68011c5f 100644
--- a/lib/Target/PowerPC/PPCPerfectShuffle.h
+++ b/lib/Target/PowerPC/PPCPerfectShuffle.h
@@ -12,6 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCPERFECTSHUFFLE_H
+#define LLVM_LIB_TARGET_POWERPC_PPCPERFECTSHUFFLE_H
+
// 31 entries have cost 0
// 292 entries have cost 1
// 1384 entries have cost 2
@@ -6584,3 +6587,5 @@ static const unsigned PerfectShuffleTable[6561+1] = {
835584U, // <u,u,u,u>: Cost 0 copy LHS
0
};
+
+#endif
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 9895ee6267aa..aa0da7a97b14 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -99,6 +99,14 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
const MCPhysReg*
PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) {
+ if (Subtarget.hasVSX())
+ return CSR_64_AllRegs_VSX_SaveList;
+ if (Subtarget.hasAltivec())
+ return CSR_64_AllRegs_Altivec_SaveList;
+ return CSR_64_AllRegs_SaveList;
+ }
+
if (Subtarget.isDarwinABI())
return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
CSR_Darwin64_Altivec_SaveList :
@@ -117,6 +125,14 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const uint32_t*
PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+ if (CC == CallingConv::AnyReg) {
+ if (Subtarget.hasVSX())
+ return CSR_64_AllRegs_VSX_RegMask;
+ if (Subtarget.hasAltivec())
+ return CSR_64_AllRegs_Altivec_RegMask;
+ return CSR_64_AllRegs_RegMask;
+ }
+
if (Subtarget.isDarwinABI())
return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
CSR_Darwin64_Altivec_RegMask :
@@ -138,10 +154,18 @@ PPCRegisterInfo::getNoPreservedMask() const {
return CSR_NoRegs_RegMask;
}
+void PPCRegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
+ unsigned PseudoRegs[] = { PPC::ZERO, PPC::ZERO8, PPC::RM };
+ for (unsigned i = 0, ie = array_lengthof(PseudoRegs); i != ie; ++i) {
+ unsigned Reg = PseudoRegs[i];
+ Mask[Reg / 32] &= ~(1u << (Reg % 32));
+ }
+}
+
BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const PPCFrameLowering *PPCFI =
- static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+ const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>(
+ MF.getSubtarget().getFrameLowering());
// The ZERO register is not really a register, but the representation of r0
// when used in instructions that treat r0 as the constant 0.
@@ -223,7 +247,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
unsigned
PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const unsigned DefaultSafety = 1;
switch (RC->getID()) {
@@ -287,7 +311,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
// Get the frame info.
MachineFrameInfo *MFI = MF.getFrameInfo();
// Get the instruction info.
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
// Determine whether 64-bit pointers are used.
bool LP64 = Subtarget.isPPC64();
DebugLoc dl = MI.getDebugLoc();
@@ -298,7 +322,10 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
unsigned FrameSize = MFI->getStackSize();
// Get stack alignments.
- unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned TargetAlign = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
unsigned MaxAlign = MFI->getMaxAlignment();
assert((maxCallFrameSize & (MaxAlign-1)) == 0 &&
"Maximum call-frame size not sufficiently aligned");
@@ -403,7 +430,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
bool LP64 = Subtarget.isPPC64();
@@ -447,7 +474,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
bool LP64 = Subtarget.isPPC64();
@@ -520,7 +547,7 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
bool LP64 = Subtarget.isPPC64();
@@ -563,7 +590,7 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
bool LP64 = Subtarget.isPPC64();
@@ -610,7 +637,7 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -635,7 +662,7 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -697,7 +724,10 @@ static unsigned getOffsetONFromFION(const MachineInstr &MI,
// Take into account whether it's an add or mem instruction
unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
if (MI.isInlineAsm())
- OffsetOperandNo = FIOperandNum-1;
+ OffsetOperandNo = FIOperandNum - 1;
+ else if (MI.getOpcode() == TargetOpcode::STACKMAP ||
+ MI.getOpcode() == TargetOpcode::PATCHPOINT)
+ OffsetOperandNo = FIOperandNum + 1;
return OffsetOperandNo;
}
@@ -715,7 +745,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Get the basic block's function.
MachineFunction &MF = *MBB.getParent();
// Get the instruction info.
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
// Get the frame info.
MachineFrameInfo *MFI = MF.getFrameInfo();
DebugLoc dl = MI.getDebugLoc();
@@ -769,7 +799,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// If the instruction is not present in ImmToIdxMap, then it has no immediate
// form (and must be r+r).
- bool noImmForm = !MI.isInlineAsm() && !ImmToIdxMap.count(OpC);
+ bool noImmForm = !MI.isInlineAsm() && OpC != TargetOpcode::STACKMAP &&
+ OpC != TargetOpcode::PATCHPOINT && !ImmToIdxMap.count(OpC);
// Now add the frame object offset to the offset from r1.
int Offset = MFI->getObjectOffset(FrameIndex);
@@ -793,8 +824,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// only "std" to a stack slot that is at least 4-byte aligned, but it can
// happen in invalid code.
assert(OpC != PPC::DBG_VALUE &&
- "This should be handle in a target independent way");
- if (!noImmForm && isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) {
+ "This should be handled in a target-independent way");
+ if (!noImmForm && ((isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) ||
+ OpC == TargetOpcode::STACKMAP ||
+ OpC == TargetOpcode::PATCHPOINT)) {
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
return;
}
@@ -840,7 +873,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (!Subtarget.isPPC64())
return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
@@ -884,7 +917,10 @@ bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
- unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned StackAlign = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
bool requiresRealignment =
((MFI->getMaxAlignment() > StackAlign) ||
F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
@@ -922,8 +958,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
MachineBasicBlock &MBB = *MI->getParent();
MachineFunction &MF = *MBB.getParent();
- const PPCFrameLowering *PPCFI =
- static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+ const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>(
+ MF.getSubtarget().getFrameLowering());
unsigned StackEst =
PPCFI->determineFrameLayout(MF, false, true);
@@ -957,7 +993,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
DL = Ins->getDebugLoc();
const MachineFunction &MF = *MBB->getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const MCInstrDesc &MCID = TII.get(ADDriOpc);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
@@ -982,7 +1018,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const MCInstrDesc &MCID = MI.getDesc();
MachineRegisterInfo &MRI = MF.getRegInfo();
MRI.constrainRegClass(BaseReg,
@@ -1002,6 +1038,8 @@ bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
Offset += MI->getOperand(OffsetOperandNo).getImm();
return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
+ MI->getOpcode() == TargetOpcode::STACKMAP ||
+ MI->getOpcode() == TargetOpcode::PATCHPOINT ||
(isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
}
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 13a35f6309d4..4c2ef9067cb2 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef POWERPC32_REGISTERINFO_H
-#define POWERPC32_REGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
#include "PPC.h"
#include "llvm/ADT/DenseMap.h"
@@ -49,6 +49,8 @@ public:
const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override;
const uint32_t *getNoPreservedMask() const;
+ void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
+
BitVector getReservedRegs(const MachineFunction &MF) const override;
/// We require the register scavenger.
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 0442a941f45a..2fdf04eaaeee 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -131,8 +131,8 @@ foreach Index = 0-31 in {
}
// The reprsentation of r0 when treated as the constant 0.
-def ZERO : GPR<0, "0">;
-def ZERO8 : GP8<ZERO, "0">;
+def ZERO : GPR<0, "0">, DwarfRegAlias<R0>;
+def ZERO8 : GP8<ZERO, "0">, DwarfRegAlias<X0>;
// Representations of the frame pointer used by ISD::FRAMEADDR.
def FP : GPR<0 /* arbitrary */, "**FRAME POINTER**">;
@@ -203,7 +203,7 @@ def VRSAVE: SPR<256, "vrsave">, DwarfRegNum<[109]>;
// Carry bit. In the architecture this is really bit 0 of the XER register
// (which really is SPR register 1); this is the only bit interesting to a
// compiler.
-def CARRY: SPR<1, "ca">;
+def CARRY: SPR<1, "ca">, DwarfRegNum<[76]>;
// FP rounding mode: bits 30 and 31 of the FP status and control register
// This is not allocated as a normal register; it appears only in
@@ -212,8 +212,7 @@ def CARRY: SPR<1, "ca">;
// most registers, it has to be done in code; to make this work all the
// return and call instructions are described as Uses of RM, so instructions
// that do nothing but change RM will not get deleted.
-// Also, in the architecture it is not really a SPR; 512 is arbitrary.
-def RM: SPR<512, "**ROUNDING MODE**">;
+def RM: PPCReg<"**ROUNDING MODE**">;
/// Register classes
// Allocate volatiles first
diff --git a/lib/Target/PowerPC/PPCRelocations.h b/lib/Target/PowerPC/PPCRelocations.h
deleted file mode 100644
index 0b392f99b6d7..000000000000
--- a/lib/Target/PowerPC/PPCRelocations.h
+++ /dev/null
@@ -1,56 +0,0 @@
-//===-- PPCRelocations.h - PPC Code Relocations -----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the PowerPC 32-bit target-specific relocation types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PPCRELOCATIONS_H
-#define PPCRELOCATIONS_H
-
-#include "llvm/CodeGen/MachineRelocation.h"
-
-// Hack to rid us of a PPC pre-processor symbol which is erroneously
-// defined in a PowerPC header file (bug in Linux/PPC)
-#ifdef PPC
-#undef PPC
-#endif
-
-namespace llvm {
- namespace PPC {
- enum RelocationType {
- // reloc_vanilla - A standard relocation, where the address of the
- // relocated object completely overwrites the address of the relocation.
- reloc_vanilla,
-
- // reloc_pcrel_bx - PC relative relocation, for the b or bl instructions.
- reloc_pcrel_bx,
-
- // reloc_pcrel_bcx - PC relative relocation, for BLT,BLE,BEQ,BGE,BGT,BNE,
- // and other bcx instructions.
- reloc_pcrel_bcx,
-
- // reloc_absolute_high - Absolute relocation, for the loadhi instruction
- // (which is really addis). Add the high 16-bits of the specified global
- // address into the low 16-bits of the instruction.
- reloc_absolute_high,
-
- // reloc_absolute_low - Absolute relocation, for the la instruction (which
- // is really an addi). Add the low 16-bits of the specified global
- // address into the low 16-bits of the instruction.
- reloc_absolute_low,
-
- // reloc_absolute_low_ix - Absolute relocation for the 64-bit load/store
- // instruction which have two implicit zero bits.
- reloc_absolute_low_ix
- };
- }
-}
-
-#endif
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index 1221d4149996..3ae3793f1a8b 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -106,6 +106,7 @@ def IIC_SprSLBIE : InstrItinClass;
def IIC_SprSLBMTE : InstrItinClass;
def IIC_SprSLBMFEE : InstrItinClass;
def IIC_SprSLBIA : InstrItinClass;
+def IIC_SprTLBIA : InstrItinClass;
def IIC_SprTLBIEL : InstrItinClass;
def IIC_SprTLBIE : InstrItinClass;
@@ -118,6 +119,7 @@ include "PPCScheduleG4.td"
include "PPCScheduleG4Plus.td"
include "PPCScheduleG5.td"
include "PPCScheduleP7.td"
+include "PPCScheduleP8.td"
include "PPCScheduleA2.td"
include "PPCScheduleE500mc.td"
include "PPCScheduleE5500.td"
diff --git a/lib/Target/PowerPC/PPCScheduleP7.td b/lib/Target/PowerPC/PPCScheduleP7.td
index d3e426975ec0..03693cbeada0 100644
--- a/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/lib/Target/PowerPC/PPCScheduleP7.td
@@ -380,6 +380,9 @@ def P7Model : SchedMachineModel {
// Itineraries are queried instead.
let MispredictPenalty = 16;
+ // Try to make sure we have at least 10 dispatch groups in a loop.
+ let LoopMicroOpBufferSize = 40;
+
let Itineraries = P7Itineraries;
}
diff --git a/lib/Target/PowerPC/PPCScheduleP8.td b/lib/Target/PowerPC/PPCScheduleP8.td
new file mode 100644
index 000000000000..07971809c877
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleP8.td
@@ -0,0 +1,397 @@
+//===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the POWER8 processor.
+//
+//===----------------------------------------------------------------------===//
+
+// Scheduling for the P8 involves tracking two types of resources:
+// 1. The dispatch bundle slots
+// 2. The functional unit resources
+
+// Dispatch units:
+def P8_DU1 : FuncUnit;
+def P8_DU2 : FuncUnit;
+def P8_DU3 : FuncUnit;
+def P8_DU4 : FuncUnit;
+def P8_DU5 : FuncUnit;
+def P8_DU6 : FuncUnit;
+def P8_DU7 : FuncUnit; // Only branch instructions will use DU7,DU8
+def P8_DU8 : FuncUnit;
+
+// 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
+
+def P8_LU1 : FuncUnit; // Loads or fixed-point operations 1
+def P8_LU2 : FuncUnit; // Loads or fixed-point operations 2
+
+// Load/Store pipelines can handle Stores, fixed-point loads, and simple
+// fixed-point operations.
+def P8_LSU1 : FuncUnit; // Load/Store pipeline 1
+def P8_LSU2 : FuncUnit; // Load/Store pipeline 2
+
+// Fixed Point unit
+def P8_FXU1 : FuncUnit; // FX pipeline 1
+def P8_FXU2 : FuncUnit; // FX pipeline 2
+
+// The Floating-Point Unit (FPU) and Vector Media Extension (VMX) units
+// are combined on P7 and newer into a Vector Scalar Unit (VSU).
+// The P8 Instruction latency documents still refers to the unit as the
+// FPU, so keep in mind that FPU==VSU.
+// In contrast to the P7, the VMX units on P8 are symmetric, so no need to
+// split vector integer ops or 128-bit load/store/perms to the specific units.
+def P8_FPU1 : FuncUnit; // VS pipeline 1
+def P8_FPU2 : FuncUnit; // VS pipeline 2
+
+def P8_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
+def P8_BRU : FuncUnit; // BR unit
+
+def P8Itineraries : ProcessorItineraries<
+ [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6, P8_DU7, P8_DU8,
+ P8_LU1, P8_LU2, P8_LSU1, P8_LSU2, P8_FXU1, P8_FXU2,
+ P8_FPU1, P8_FPU2, P8_CRU, P8_BRU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2,
+ P8_LU1, P8_LU2,
+ P8_LSU1, P8_LSU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2, P8_LU1,
+ P8_LU2, P8_LSU1, P8_LSU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntDivW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<15, [P8_FXU1, P8_FXU2]>],
+ [15, 1, 1]>,
+ InstrItinData<IIC_IntDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<23, [P8_FXU1, P8_FXU2]>],
+ [23, 1, 1]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntRotateD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntShift , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [1, 1]>,
+ InstrItinData<IIC_IntTrapD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [1, 1]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [P8_DU7, P8_DU8], 0>,
+ InstrStage<1, [P8_BRU]>],
+ [3, 1, 1]>,
+ // FIXME - the Br* groups below are not branch related, so should probably
+ // be renamed.
+ // IIC_BrCR consists of the cr* instructions. (crand,crnor,creqv, etc).
+ // and should be 'First' in dispatch.
+ InstrItinData<IIC_BrCR , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_CRU]>],
+ [3, 1, 1]>,
+ // IIC_BrMCR consists of the mcrf instruction.
+ InstrItinData<IIC_BrMCR , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_CRU]>],
+ [3, 1, 1]>,
+ // IIC_BrMCRX consists of mcrxr (obsolete instruction) and mtcrf, which
+ // should be first in the dispatch group.
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [3, 1]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2 ], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [2, 2, 1, 1]>,
+ // Update-Indexed form loads/stores are no longer first and last in the
+ // dispatch group. They are simply cracked, so require DU1,DU2.
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_LdStLDU , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [2, 2, 1, 1]>,
+ InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_LU1, P8_LU2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_LU1, P8_LU2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LU1, P8_LU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LU1, P8_LU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2,
+ P8_LU1, P8_LU2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [4, 4, 1, 1]>,
+ // first+last in dispatch group.
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_DU3], 0>,
+ InstrStage<1, [P8_DU4], 0>,
+ InstrStage<1, [P8_DU5], 0>,
+ InstrStage<1, [P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [4, 4, 1, 1]>,
+ InstrItinData<IIC_LdStLWA , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2]>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_DU3], 0>,
+ InstrStage<1, [P8_DU4], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2]>],
+ [3, 1, 1]>,
+ // first+last
+ InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_DU3], 0>,
+ InstrStage<1, [P8_DU4], 0>,
+ InstrStage<1, [P8_DU5], 0>,
+ InstrStage<1, [P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2]>],
+ [2, 1, 1]>,
+// Stores are dual-issued from the issue queue, so may only take up one
+// dispatch slot. The instruction will be broken into two IOPS. The agen
+// op is issued to the LSU, and the data op (register fetch) is issued
+// to either the LU (GPR store) or the VSU (FPR store).
+ InstrItinData<IIC_LdStStore , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2]>,
+ InstrStage<1, [P8_LU1, P8_LU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_LU1, P8_LU2,
+ P8_LSU1, P8_LSU2]>]
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LU1, P8_LU2,
+ P8_LSU1, P8_LSU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [2, 1, 1, 1]>,
+ // First+last
+ InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_DU3], 0>,
+ InstrStage<1, [P8_DU4], 0>,
+ InstrStage<1, [P8_DU5], 0>,
+ InstrStage<1, [P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_DU3], 0>,
+ InstrStage<1, [P8_DU4], 0>,
+ InstrStage<1, [P8_DU5], 0>,
+ InstrStage<1, [P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
+ InstrStage<1, [P8_LU1, P8_LU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_DU3], 0>,
+ InstrStage<1, [P8_DU4], 0>,
+ InstrStage<1, [P8_DU5], 0>,
+ InstrStage<1, [P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
+ InstrStage<1, [P8_LU1, P8_LU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_SprMFCR , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_CRU]>],
+ [6, 1]>,
+ InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_CRU]>],
+ [3, 1]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [4, 1]>, // mtctr
+ InstrItinData<IIC_FPGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [5, 1, 1]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [8, 1, 1]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [33, 1, 1]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [27, 1, 1]>,
+ InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [44, 1, 1]>,
+ InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [32, 1, 1]>,
+ InstrItinData<IIC_FPFused , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [5, 1, 1, 1]>,
+ InstrItinData<IIC_FPRes , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [5, 1, 1]>,
+ InstrItinData<IIC_VecGeneral , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VecVSL , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VecVSR , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VecFP , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [6, 1, 1]>,
+ InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [6, 1, 1]>,
+ InstrItinData<IIC_VecFPRound , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [6, 1, 1]>,
+ InstrItinData<IIC_VecComplex , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [7, 1, 1]>,
+ InstrItinData<IIC_VecPerm , [InstrStage<1, [P8_DU1, P8_DU2], 0>,
+ InstrStage<1, [P8_FPU2, P8_FPU2]>],
+ [3, 1, 1]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// P8 machine model for scheduling and other instruction cost heuristics.
+// P8 has an 8 insn dispatch group (6 non-branch, 2 branch) and can issue up
+// to 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
+
+def P8Model : SchedMachineModel {
+ let IssueWidth = 8; // up to 8 instructions dispatched per cycle.
+ // up to six non-branch instructions.
+ // up to two branches in a dispatch group.
+
+ let MinLatency = 0; // Out-of-order dispatch.
+ let LoadLatency = 3; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 16;
+
+ // Try to make sure we have at least 10 dispatch groups in a loop.
+ let LoopMicroOpBufferSize = 60;
+
+ let Itineraries = P8Itineraries;
+}
+
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
index b2e7f3b5f2ac..2c1378d5670d 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef POWERPCCSELECTIONDAGINFO_H
-#define POWERPCCSELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index b51512d335fc..d8ba1c71a0f5 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
@@ -32,14 +33,16 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "PPCGenSubtargetInfo.inc"
-/// Return the datalayout string of a subtarget.
-static std::string getDataLayoutString(const PPCSubtarget &ST) {
- const Triple &T = ST.getTargetTriple();
+static cl::opt<bool> UseSubRegLiveness("ppc-track-subreg-liveness",
+cl::desc("Enable subregister liveness tracking for PPC"), cl::Hidden);
+/// Return the datalayout string of a subtarget.
+static std::string getDataLayoutString(const Triple &T) {
+ bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
std::string Ret;
// Most PPC* platforms are big endian, PPC64LE is little endian.
- if (ST.isLittleEndian())
+ if (T.getArch() == Triple::ppc64le)
Ret = "e";
else
Ret = "E";
@@ -48,18 +51,18 @@ static std::string getDataLayoutString(const PPCSubtarget &ST) {
// PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
// pointers.
- if (!ST.isPPC64() || T.getOS() == Triple::Lv2)
+ if (!is64Bit || T.getOS() == Triple::Lv2)
Ret += "-p:32:32";
// Note, the alignment values for f64 and i64 on ppc64 in Darwin
// documentation are wrong; these are correct (i.e. "what gcc does").
- if (ST.isPPC64() || ST.isSVR4ABI())
+ if (is64Bit || !T.isOSDarwin())
Ret += "-i64:64";
else
Ret += "-f64:32:64";
// PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
- if (ST.isPPC64())
+ if (is64Bit)
Ret += "-n32:64";
else
Ret += "-n32";
@@ -70,47 +73,20 @@ static std::string getDataLayoutString(const PPCSubtarget &ST) {
PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
initializeEnvironment();
- resetSubtargetFeatures(CPU, FS);
+ initSubtargetFeatures(CPU, FS);
return *this;
}
PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, PPCTargetMachine &TM,
- bool is64Bit, CodeGenOpt::Level OptLevel)
- : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT),
- OptLevel(OptLevel),
- FrameLowering(initializeSubtargetDependencies(CPU, FS)),
- DL(getDataLayoutString(*this)), InstrInfo(*this), JITInfo(*this),
+ const std::string &FS, const PPCTargetMachine &TM)
+ : PPCGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
+ DL(getDataLayoutString(TargetTriple)),
+ IsPPC64(TargetTriple.getArch() == Triple::ppc64 ||
+ TargetTriple.getArch() == Triple::ppc64le),
+ TargetABI(PPC_ABI_UNKNOWN),
+ FrameLowering(initializeSubtargetDependencies(CPU, FS)), InstrInfo(*this),
TLInfo(TM), TSInfo(&DL) {}
-/// SetJITMode - This is called to inform the subtarget info that we are
-/// producing code for the JIT.
-void PPCSubtarget::SetJITMode() {
- // JIT mode doesn't want lazy resolver stubs, it knows exactly where
- // everything is. This matters for PPC64, which codegens in PIC mode without
- // stubs.
- HasLazyResolverStubs = false;
-
- // Calls to external functions need to use indirect calls
- IsJITCodeModel = true;
-}
-
-void PPCSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
- AttributeSet FnAttrs = MF->getFunction()->getAttributes();
- Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
- "target-cpu");
- Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
- "target-features");
- std::string CPU =
- !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString() : "";
- std::string FS =
- !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
- if (!FS.empty()) {
- initializeEnvironment();
- resetSubtargetFeatures(CPU, FS);
- }
-}
-
void PPCSubtarget::initializeEnvironment() {
StackAlignment = 16;
DarwinDirective = PPC::DIR_NONE;
@@ -119,8 +95,10 @@ void PPCSubtarget::initializeEnvironment() {
Use64BitRegs = false;
UseCRBits = false;
HasAltivec = false;
+ HasSPE = false;
HasQPX = false;
HasVSX = false;
+ HasP8Vector = false;
HasFCPSGN = false;
HasFSQRT = false;
HasFRE = false;
@@ -134,15 +112,19 @@ void PPCSubtarget::initializeEnvironment() {
HasFPCVT = false;
HasISEL = false;
HasPOPCNTD = false;
+ HasCMPB = false;
HasLDBRX = false;
IsBookE = false;
+ HasOnlyMSYNC = false;
+ IsPPC4xx = false;
+ IsPPC6xx = false;
+ IsE500 = false;
DeprecatedMFTB = false;
DeprecatedDST = false;
HasLazyResolverStubs = false;
- IsJITCodeModel = false;
}
-void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
+void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// Determine default and user specified characteristics
std::string CPUName = CPU;
if (CPUName.empty())
@@ -156,35 +138,13 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUName);
- // Make sure 64-bit features are available when CPUname is generic
- std::string FullFS = FS;
-
- // If we are generating code for ppc64, verify that options make sense.
- if (IsPPC64) {
- Has64BitSupport = true;
- // Silently force 64-bit register use on ppc64.
- Use64BitRegs = true;
- if (!FullFS.empty())
- FullFS = "+64bit," + FullFS;
- else
- FullFS = "+64bit";
- }
-
- // At -O2 and above, track CR bits as individual registers.
- if (OptLevel >= CodeGenOpt::Default) {
- if (!FullFS.empty())
- FullFS = "+crbits," + FullFS;
- else
- FullFS = "+crbits";
- }
-
// Parse features string.
- ParseSubtargetFeatures(CPUName, FullFS);
+ ParseSubtargetFeatures(CPUName, FS);
// If the user requested use of 64-bit regs, but the cpu selected doesn't
// support it, ignore.
- if (use64BitRegs() && !has64BitSupport())
- Use64BitRegs = false;
+ if (IsPPC64 && has64BitSupport())
+ Use64BitRegs = true;
// Set up darwin-specific properties.
if (isDarwin())
@@ -199,10 +159,15 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// Determine endianness.
IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le);
- // FIXME: For now, we disable VSX in little-endian mode until endian
- // issues in those instructions can be addressed.
- if (IsLittleEndian)
- HasVSX = false;
+ // Determine default ABI.
+ if (TargetABI == PPC_ABI_UNKNOWN) {
+ if (!isDarwin() && IsPPC64) {
+ if (IsLittleEndian)
+ TargetABI = PPC_ABI_ELFv2;
+ else
+ TargetABI = PPC_ABI_ELFv1;
+ }
+ }
}
/// hasLazyResolverStub - Return true if accesses to the specified global have
@@ -213,9 +178,7 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
// We never have stubs if HasLazyResolverStubs=false or if in static mode.
if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
return false;
- // If symbol visibility is hidden, the extra load is not needed if
- // the symbol is definitely defined in the current translation unit.
- bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
+ bool isDecl = GV->isDeclaration();
if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage())
return false;
return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
@@ -275,3 +238,7 @@ bool PPCSubtarget::useAA() const {
return needsAggressiveScheduling(DarwinDirective);
}
+bool PPCSubtarget::enableSubRegLiveness() const {
+ return UseSubRegLiveness;
+}
+
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index a3cedafb5ef2..a1b644d26858 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -11,13 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef POWERPCSUBTARGET_H
-#define POWERPCSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCSUBTARGET_H
+#define LLVM_LIB_TARGET_POWERPC_PPCSUBTARGET_H
#include "PPCFrameLowering.h"
-#include "PPCInstrInfo.h"
#include "PPCISelLowering.h"
-#include "PPCJITInfo.h"
+#include "PPCInstrInfo.h"
#include "PPCSelectionDAGInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/DataLayout.h"
@@ -66,6 +65,12 @@ class TargetMachine;
class PPCSubtarget : public PPCGenSubtargetInfo {
protected:
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+
+ // Calculates type size & alignment
+ const DataLayout DL;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned StackAlignment;
@@ -83,8 +88,10 @@ protected:
bool UseCRBits;
bool IsPPC64;
bool HasAltivec;
+ bool HasSPE;
bool HasQPX;
bool HasVSX;
+ bool HasP8Vector;
bool HasFCPSGN;
bool HasFSQRT;
bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
@@ -95,24 +102,26 @@ protected:
bool HasFPCVT;
bool HasISEL;
bool HasPOPCNTD;
+ bool HasCMPB;
bool HasLDBRX;
bool IsBookE;
+ bool HasOnlyMSYNC;
+ bool IsE500;
+ bool IsPPC4xx;
+ bool IsPPC6xx;
bool DeprecatedMFTB;
bool DeprecatedDST;
bool HasLazyResolverStubs;
- bool IsJITCodeModel;
bool IsLittleEndian;
- /// TargetTriple - What processor and OS we're targeting.
- Triple TargetTriple;
-
- /// OptLevel - What default optimization level we're emitting code for.
- CodeGenOpt::Level OptLevel;
+ enum {
+ PPC_ABI_UNKNOWN,
+ PPC_ABI_ELFv1,
+ PPC_ABI_ELFv2
+ } TargetABI;
PPCFrameLowering FrameLowering;
- const DataLayout DL;
PPCInstrInfo InstrInfo;
- PPCJITInfo JITInfo;
PPCTargetLowering TLInfo;
PPCSelectionDAGInfo TSInfo;
@@ -121,17 +130,12 @@ public:
/// of the specified triple.
///
PPCSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, PPCTargetMachine &TM, bool is64Bit,
- CodeGenOpt::Level OptLevel);
+ const std::string &FS, const PPCTargetMachine &TM);
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- /// SetJITMode - This is called to inform the subtarget info that we are
- /// producing code for the JIT.
- void SetJITMode();
-
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
/// function for this subtarget.
@@ -143,24 +147,32 @@ public:
/// getInstrItins - Return the instruction itineraries based on subtarget
/// selection.
- const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
-
- const PPCFrameLowering *getFrameLowering() const { return &FrameLowering; }
- const DataLayout *getDataLayout() const { return &DL; }
- const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
- PPCJITInfo *getJITInfo() { return &JITInfo; }
- const PPCTargetLowering *getTargetLowering() const { return &TLInfo; }
- const PPCSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
+
+ const PPCFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const PPCInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const PPCTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const PPCSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const PPCRegisterInfo *getRegisterInfo() const override {
+ return &getInstrInfo()->getRegisterInfo();
+ }
/// initializeSubtargetDependencies - Initializes using a CPU and feature string
/// so that we can use initializer lists for subtarget initialization.
PPCSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
- /// \brief Reset the features for the PowerPC target.
- void resetSubtargetFeatures(const MachineFunction *MF) override;
private:
void initializeEnvironment();
- void resetSubtargetFeatures(StringRef CPU, StringRef FS);
+ void initSubtargetFeatures(StringRef CPU, StringRef FS);
public:
/// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
@@ -186,9 +198,6 @@ public:
bool hasLazyResolverStub(const GlobalValue *GV,
const TargetMachine &TM) const;
- // isJITCodeModel - True if we're generating code for the JIT
- bool isJITCodeModel() const { return IsJITCodeModel; }
-
// isLittleEndian - True if generating little-endian code
bool isLittleEndian() const { return IsLittleEndian; }
@@ -205,13 +214,20 @@ public:
bool hasFPRND() const { return HasFPRND; }
bool hasFPCVT() const { return HasFPCVT; }
bool hasAltivec() const { return HasAltivec; }
+ bool hasSPE() const { return HasSPE; }
bool hasQPX() const { return HasQPX; }
bool hasVSX() const { return HasVSX; }
+ bool hasP8Vector() const { return HasP8Vector; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasPOPCNTD() const { return HasPOPCNTD; }
+ bool hasCMPB() const { return HasCMPB; }
bool hasLDBRX() const { return HasLDBRX; }
bool isBookE() const { return IsBookE; }
+ bool hasOnlyMSYNC() const { return HasOnlyMSYNC; }
+ bool isPPC4xx() const { return IsPPC4xx; }
+ bool isPPC6xx() const { return IsPPC6xx; }
+ bool isE500() const { return IsE500; }
bool isDeprecatedMFTB() const { return DeprecatedMFTB; }
bool isDeprecatedDST() const { return DeprecatedDST; }
@@ -227,9 +243,7 @@ public:
bool isDarwinABI() const { return isDarwin(); }
bool isSVR4ABI() const { return !isDarwin(); }
- /// FIXME: Should use a command-line option.
- bool isELFv2ABI() const { return isPPC64() && isSVR4ABI() &&
- isLittleEndian(); }
+ bool isELFv2ABI() const { return TargetABI == PPC_ABI_ELFv2; }
bool enableEarlyIfConversion() const override { return hasISEL(); }
@@ -245,6 +259,8 @@ public:
MachineInstr *end,
unsigned NumRegionInstrs) const override;
bool useAA() const override;
+
+ bool enableSubRegLiveness() const override;
};
} // End llvm namespace
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 9563b9045c39..e07fd0575b50 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -13,13 +13,16 @@
#include "PPCTargetMachine.h"
#include "PPC.h"
+#include "PPCTargetObjectFile.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
static cl::
@@ -30,6 +33,11 @@ static cl::opt<bool>
VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early"));
+static cl::opt<bool>
+EnableGEPOpt("ppc-gep-opt", cl::Hidden,
+ cl::desc("Enable optimizations on complex GEPs"),
+ cl::init(true));
+
extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
@@ -37,15 +45,54 @@ extern "C" void LLVMInitializePowerPCTarget() {
RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
}
+static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, StringRef TT) {
+ std::string FullFS = FS;
+ Triple TargetTriple(TT);
+
+ // Make sure 64-bit features are available when CPUname is generic
+ if (TargetTriple.getArch() == Triple::ppc64 ||
+ TargetTriple.getArch() == Triple::ppc64le) {
+ if (!FullFS.empty())
+ FullFS = "+64bit," + FullFS;
+ else
+ FullFS = "+64bit";
+ }
+
+ if (OL >= CodeGenOpt::Default) {
+ if (!FullFS.empty())
+ FullFS = "+crbits," + FullFS;
+ else
+ FullFS = "+crbits";
+ }
+ return FullFS;
+}
+
+static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
+ // If it isn't a Mach-O file then it's going to be a linux ELF
+ // object file.
+ if (TT.isOSDarwin())
+ return make_unique<TargetLoweringObjectFileMachO>();
+
+ return make_unique<PPC64LinuxTargetObjectFile>();
+}
+
+// The FeatureString here is a little subtle. We are modifying the feature string
+// with what are (currently) non-function specific overrides as it goes into the
+// LLVMTargetMachine constructor and then using the stored value in the
+// Subtarget constructor below it.
PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL, bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, *this, is64Bit, OL) {
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, computeFSAdditions(FS, OL, TT), Options, RM,
+ CM, OL),
+ TLOF(createTLOF(Triple(getTargetTriple()))),
+ Subtarget(TT, CPU, TargetFS, *this) {
initAsmInfo();
}
+PPCTargetMachine::~PPCTargetMachine() {}
+
void PPC32TargetMachine::anchor() { }
PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
@@ -53,7 +100,7 @@ PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
+ : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
}
void PPC64TargetMachine::anchor() { }
@@ -63,9 +110,34 @@ PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
+ : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
}
+const PPCSubtarget *
+PPCTargetMachine::getSubtargetImpl(const Function &F) const {
+ AttributeSet FnAttrs = F.getAttributes();
+ Attribute CPUAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+ Attribute FSAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+
+ auto &I = SubtargetMap[CPU + FS];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = llvm::make_unique<PPCSubtarget>(TargetTriple, CPU, FS, *this);
+ }
+ return I.get();
+}
//===----------------------------------------------------------------------===//
// Pass Pipeline Configuration
@@ -86,12 +158,13 @@ public:
return *getPPCTargetMachine().getSubtargetImpl();
}
+ void addIRPasses() override;
bool addPreISel() override;
bool addILPOpts() override;
bool addInstSelector() override;
- bool addPreRegAlloc() override;
- bool addPreSched2() override;
- bool addPreEmitPass() override;
+ void addPreRegAlloc() override;
+ void addPreSched2() override;
+ void addPreEmitPass() override;
};
} // namespace
@@ -99,6 +172,25 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
return new PPCPassConfig(this, PM);
}
+void PPCPassConfig::addIRPasses() {
+ addPass(createAtomicExpandPass(&getPPCTargetMachine()));
+
+ if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
+ // Call SeparateConstOffsetFromGEP pass to extract constants within indices
+ // and lower a GEP with multiple indices to either arithmetic operations or
+ // multiple GEPs with single index.
+ addPass(createSeparateConstOffsetFromGEPPass(TM, true));
+ // Call EarlyCSE pass to find and remove subexpressions in the lowered
+ // result.
+ addPass(createEarlyCSEPass());
+ // Do loop invariant code motion in case part of the lowered result is
+ // invariant.
+ addPass(createLICMPass());
+ }
+
+ TargetPassConfig::addIRPasses();
+}
+
bool PPCPassConfig::addPreISel() {
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
addPass(createPPCCTRLoops(getPPCTargetMachine()));
@@ -124,40 +216,24 @@ bool PPCPassConfig::addInstSelector() {
return false;
}
-bool PPCPassConfig::addPreRegAlloc() {
+void PPCPassConfig::addPreRegAlloc() {
initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
&PPCVSXFMAMutateID);
- return false;
}
-bool PPCPassConfig::addPreSched2() {
- addPass(createPPCVSXCopyCleanupPass());
+void PPCPassConfig::addPreSched2() {
+ addPass(createPPCVSXCopyCleanupPass(), false);
if (getOptLevel() != CodeGenOpt::None)
addPass(&IfConverterID);
-
- return true;
}
-bool PPCPassConfig::addPreEmitPass() {
+void PPCPassConfig::addPreEmitPass() {
if (getOptLevel() != CodeGenOpt::None)
- addPass(createPPCEarlyReturnPass());
+ addPass(createPPCEarlyReturnPass(), false);
// Must run branch selection immediately preceding the asm printer.
- addPass(createPPCBranchSelectionPass());
- return false;
-}
-
-bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
- JITCodeEmitter &JCE) {
- // Inform the subtarget that we are in JIT mode. FIXME: does this break macho
- // writing?
- Subtarget.SetJITMode();
-
- // Machine code emitter pass for PowerPC.
- PM.add(createPPCJITCodeEmitterPass(*this, JCE));
-
- return false;
+ addPass(createPPCBranchSelectionPass(), false);
}
void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 4c7029ca7a36..5095d736a651 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPC_TARGETMACHINE_H
-#define PPC_TARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETMACHINE_H
+#define LLVM_LIB_TARGET_POWERPC_PPCTARGETMACHINE_H
#include "PPCInstrInfo.h"
#include "PPCSubtarget.h"
@@ -24,46 +24,30 @@ namespace llvm {
/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
///
class PPCTargetMachine : public LLVMTargetMachine {
- PPCSubtarget Subtarget;
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
+ PPCSubtarget Subtarget;
+
+ mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
public:
PPCTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL, bool is64Bit);
+ CodeGenOpt::Level OL);
- const PPCInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const PPCFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
- PPCJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
- const PPCTargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const PPCSelectionDAGInfo* getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
- const PPCRegisterInfo *getRegisterInfo() const override {
- return &getInstrInfo()->getRegisterInfo();
- }
+ ~PPCTargetMachine() override;
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
- const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; }
- const InstrItineraryData *getInstrItineraryData() const override {
- return &getSubtargetImpl()->getInstrItineraryData();
- }
+ const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const PPCSubtarget *getSubtargetImpl(const Function &F) const override;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- bool addCodeEmitter(PassManagerBase &PM,
- JITCodeEmitter &JCE) override;
/// \brief Register PPC analysis passes with a pass manager.
void addAnalysisPasses(PassManagerBase &PM) override;
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
};
/// PPC32TargetMachine - PowerPC 32-bit target machine.
diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.h b/lib/Target/PowerPC/PPCTargetObjectFile.h
index 3e71bbc67379..cd84da222751 100644
--- a/lib/Target/PowerPC/PPCTargetObjectFile.h
+++ b/lib/Target/PowerPC/PPCTargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_PPC_TARGETOBJECTFILE_H
-#define LLVM_TARGET_PPC_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_POWERPC_PPCTARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h
index 73fb69101353..6493713bfbad 100644
--- a/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPCTARGETSTREAMER_H
-#define PPCTARGETSTREAMER_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
#include "llvm/MC/MCStreamer.h"
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 007901b23e0c..a7bb2526d9d4 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -38,6 +38,7 @@ void initializePPCTTIPass(PassRegistry &);
namespace {
class PPCTTI final : public ImmutablePass, public TargetTransformInfo {
+ const TargetMachine *TM;
const PPCSubtarget *ST;
const PPCTargetLowering *TLI;
@@ -47,16 +48,16 @@ public:
}
PPCTTI(const PPCTargetMachine *TM)
- : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
- TLI(TM->getTargetLowering()) {
+ : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+ TLI(TM->getSubtargetImpl()->getTargetLowering()) {
initializePPCTTIPass(*PassRegistry::getPassRegistry());
}
- virtual void initializePass() override {
+ void initializePass() override {
pushTTIStack(this);
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
TargetTransformInfo::getAnalysisUsage(AU);
}
@@ -64,7 +65,7 @@ public:
static char ID;
/// Provide necessary pointer adjustments for the two base classes.
- virtual void *getAdjustedAnalysisPointer(const void *ID) override {
+ void *getAdjustedAnalysisPointer(const void *ID) override {
if (ID == &TargetTransformInfo::ID)
return (TargetTransformInfo*)this;
return this;
@@ -79,33 +80,31 @@ public:
unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) const override;
- virtual PopcntSupportKind
- getPopcntSupport(unsigned TyWidth) const override;
- virtual void getUnrollingPreferences(
- Loop *L, UnrollingPreferences &UP) const override;
+ PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
+ void getUnrollingPreferences(const Function *F, Loop *L,
+ UnrollingPreferences &UP) const override;
/// @}
/// \name Vector TTI Implementations
/// @{
- virtual unsigned getNumberOfRegisters(bool Vector) const override;
- virtual unsigned getRegisterBitWidth(bool Vector) const override;
- virtual unsigned getMaximumUnrollFactor() const override;
- virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind,
- OperandValueKind) const override;
- virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
- int Index, Type *SubTp) const override;
- virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const override;
- virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) const override;
- virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const override;
- virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) const override;
+ unsigned getNumberOfRegisters(bool Vector) const override;
+ unsigned getRegisterBitWidth(bool Vector) const override;
+ unsigned getMaxInterleaveFactor() const override;
+ unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
+ OperandValueKind, OperandValueProperties,
+ OperandValueProperties) const override;
+ unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) const override;
+ unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const override;
+ unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const override;
+ unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const override;
+ unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const override;
/// @}
};
@@ -184,6 +183,15 @@ unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
return TCC_Free;
break;
+ case Intrinsic::experimental_stackmap:
+ if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+ return TCC_Free;
+ break;
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+ return TCC_Free;
+ break;
}
return PPCTTI::getIntImmCost(Imm, Ty);
}
@@ -271,12 +279,15 @@ unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
return PPCTTI::getIntImmCost(Imm, Ty);
}
-void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
- if (ST->getDarwinDirective() == PPC::DIR_A2) {
+void PPCTTI::getUnrollingPreferences(const Function *F, Loop *L,
+ UnrollingPreferences &UP) const {
+ if (TM->getSubtarget<PPCSubtarget>(F).getDarwinDirective() == PPC::DIR_A2) {
// The A2 is in-order with a deep pipeline, and concatenation unrolling
// helps expose latency-hiding opportunities to the instruction scheduler.
UP.Partial = UP.Runtime = true;
}
+
+ TargetTransformInfo::getUnrollingPreferences(F, L, UP);
}
unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
@@ -297,7 +308,7 @@ unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
}
-unsigned PPCTTI::getMaximumUnrollFactor() const {
+unsigned PPCTTI::getMaxInterleaveFactor() const {
unsigned Directive = ST->getDarwinDirective();
// The 440 has no SIMD support, but floating-point instructions
// have a 5-cycle latency, so unroll by 5x for latency hiding.
@@ -318,14 +329,15 @@ unsigned PPCTTI::getMaximumUnrollFactor() const {
return 2;
}
-unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind Op1Info,
- OperandValueKind Op2Info) const {
+unsigned PPCTTI::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+ OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
+ OperandValueProperties Opd2PropInfo) const {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
// Fallback to the default implementation.
- return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
- Op2Info);
+ return TargetTransformInfo::getArithmeticInstrCost(
+ Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
}
unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index 514f8407c972..4132b045e5a0 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -5,38 +5,6 @@ TODO:
===-------------------------------------------------------------------------===
-On PPC64, this:
-
-long f2 (long x) { return 0xfffffff000000000UL; }
-long f3 (long x) { return 0x1ffffffffUL; }
-
-could compile into:
-
-_f2:
- li r3,-1
- rldicr r3,r3,0,27
- blr
-_f3:
- li r3,-1
- rldicl r3,r3,0,31
- blr
-
-we produce:
-
-_f2:
- lis r2, 4095
- ori r2, r2, 65535
- sldi r3, r2, 36
- blr
-_f3:
- li r2, 1
- sldi r2, r2, 32
- oris r2, r2, 65535
- ori r3, r2, 65535
- blr
-
-===-------------------------------------------------------------------------===
-
This code:
unsigned add32carry(unsigned sum, unsigned x) {
@@ -63,40 +31,6 @@ Ick.
===-------------------------------------------------------------------------===
-Support 'update' load/store instructions. These are cracked on the G5, but are
-still a codesize win.
-
-With preinc enabled, this:
-
-long *%test4(long *%X, long *%dest) {
- %Y = getelementptr long* %X, int 4
- %A = load long* %Y
- store long %A, long* %dest
- ret long* %Y
-}
-
-compiles to:
-
-_test4:
- mr r2, r3
- lwzu r5, 32(r2)
- lwz r3, 36(r3)
- stw r5, 0(r4)
- stw r3, 4(r4)
- mr r3, r2
- blr
-
-with -sched=list-burr, I get:
-
-_test4:
- lwz r2, 36(r3)
- lwzu r5, 32(r3)
- stw r2, 4(r4)
- stw r5, 0(r4)
- blr
-
-===-------------------------------------------------------------------------===
-
We compile the hottest inner loop of viterbi to:
li r6, 0
@@ -184,33 +118,6 @@ http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
===-------------------------------------------------------------------------===
-Compile offsets from allocas:
-
-int *%test() {
- %X = alloca { int, int }
- %Y = getelementptr {int,int}* %X, int 0, uint 1
- ret int* %Y
-}
-
-into a single add, not two:
-
-_test:
- addi r2, r1, -8
- addi r3, r2, 4
- blr
-
---> important for C++.
-
-===-------------------------------------------------------------------------===
-
-No loads or stores of the constants should be needed:
-
-struct foo { double X, Y; };
-void xxx(struct foo F);
-void bar() { struct foo R = { 1.0, 2.0 }; xxx(R); }
-
-===-------------------------------------------------------------------------===
-
Darwin Stub removal:
We still generate calls to foo$stub, and stubs, on Darwin. This is not
@@ -269,57 +176,6 @@ just fastcc.
===-------------------------------------------------------------------------===
-Compile this:
-
-int foo(int a) {
- int b = (a < 8);
- if (b) {
- return b * 3; // ignore the fact that this is always 3.
- } else {
- return 2;
- }
-}
-
-into something not this:
-
-_foo:
-1) cmpwi cr7, r3, 8
- mfcr r2, 1
- rlwinm r2, r2, 29, 31, 31
-1) cmpwi cr0, r3, 7
- bgt cr0, LBB1_2 ; UnifiedReturnBlock
-LBB1_1: ; then
- rlwinm r2, r2, 0, 31, 31
- mulli r3, r2, 3
- blr
-LBB1_2: ; UnifiedReturnBlock
- li r3, 2
- blr
-
-In particular, the two compares (marked 1) could be shared by reversing one.
-This could be done in the dag combiner, by swapping a BR_CC when a SETCC of the
-same operands (but backwards) exists. In this case, this wouldn't save us
-anything though, because the compares still wouldn't be shared.
-
-===-------------------------------------------------------------------------===
-
-We should custom expand setcc instead of pretending that we have it. That
-would allow us to expose the access of the crbit after the mfcr, allowing
-that access to be trivially folded into other ops. A simple example:
-
-int foo(int a, int b) { return (a < b) << 4; }
-
-compiles into:
-
-_foo:
- cmpw cr7, r3, r4
- mfcr r2, 1
- rlwinm r2, r2, 29, 31, 31
- slwi r3, r2, 4
- blr
-
-===-------------------------------------------------------------------------===
-
Fold add and sub with constant into non-extern, non-weak addresses so this:
static int a;
@@ -347,48 +203,6 @@ _foo:
===-------------------------------------------------------------------------===
-We generate really bad code for this:
-
-int f(signed char *a, _Bool b, _Bool c) {
- signed char t = 0;
- if (b) t = *a;
- if (c) *a = t;
-}
-
-===-------------------------------------------------------------------------===
-
-This:
-int test(unsigned *P) { return *P >> 24; }
-
-Should compile to:
-
-_test:
- lbz r3,0(r3)
- blr
-
-not:
-
-_test:
- lwz r2, 0(r3)
- srwi r3, r2, 24
- blr
-
-===-------------------------------------------------------------------------===
-
-On the G5, logical CR operations are more expensive in their three
-address form: ops that read/write the same register are half as expensive as
-those that read from two registers that are different from their destination.
-
-We should model this with two separate instructions. The isel should generate
-the "two address" form of the instructions. When the register allocator
-detects that it needs to insert a copy due to the two-addresness of the CR
-logical op, it will invoke PPCInstrInfo::convertToThreeAddress. At this point
-we can convert to the "three address" instruction, to save code space.
-
-This only matters when we start generating cr logical ops.
-
-===-------------------------------------------------------------------------===
-
We should compile these two functions to the same thing:
#include <stdlib.h>
@@ -474,27 +288,6 @@ http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf
===-------------------------------------------------------------------------===
-float foo(float X) { return (int)(X); }
-
-Currently produces:
-
-_foo:
- fctiwz f0, f1
- stfd f0, -8(r1)
- lwz r2, -4(r1)
- extsw r2, r2
- std r2, -16(r1)
- lfd f0, -16(r1)
- fcfid f0, f0
- frsp f1, f0
- blr
-
-We could use a target dag combine to turn the lwz/extsw into an lwa when the
-lwz has a single use. Since LWA is cracked anyway, this would be a codesize
-win only.
-
-===-------------------------------------------------------------------------===
-
We generate ugly code for this:
void func(unsigned int *ret, float dx, float dy, float dz, float dw) {
@@ -552,32 +345,6 @@ _foo:
===-------------------------------------------------------------------------===
-We compile:
-
-unsigned test6(unsigned x) {
- return ((x & 0x00FF0000) >> 16) | ((x & 0x000000FF) << 16);
-}
-
-into:
-
-_test6:
- lis r2, 255
- rlwinm r3, r3, 16, 0, 31
- ori r2, r2, 255
- and r3, r3, r2
- blr
-
-GCC gets it down to:
-
-_test6:
- rlwinm r0,r3,16,8,15
- rlwinm r3,r3,16,24,31
- or r3,r3,r0
- blr
-
-
-===-------------------------------------------------------------------------===
-
Consider a function like this:
float foo(float X) { return X + 1234.4123f; }
@@ -674,48 +441,6 @@ _bar:
===-------------------------------------------------------------------------===
-We currently compile 32-bit bswap:
-
-declare i32 @llvm.bswap.i32(i32 %A)
-define i32 @test(i32 %A) {
- %B = call i32 @llvm.bswap.i32(i32 %A)
- ret i32 %B
-}
-
-to:
-
-_test:
- rlwinm r2, r3, 24, 16, 23
- slwi r4, r3, 24
- rlwimi r2, r3, 8, 24, 31
- rlwimi r4, r3, 8, 8, 15
- rlwimi r4, r2, 0, 16, 31
- mr r3, r4
- blr
-
-it would be more efficient to produce:
-
-_foo: mr r0,r3
- rlwinm r3,r3,8,0xffffffff
- rlwimi r3,r0,24,0,7
- rlwimi r3,r0,24,16,23
- blr
-
-===-------------------------------------------------------------------------===
-
-test/CodeGen/PowerPC/2007-03-24-cntlzd.ll compiles to:
-
-__ZNK4llvm5APInt17countLeadingZerosEv:
- ld r2, 0(r3)
- cntlzd r2, r2
- or r2, r2, r2 <<-- silly.
- addi r3, r2, -64
- blr
-
-The dead or is a 'truncate' from 64- to 32-bits.
-
-===-------------------------------------------------------------------------===
-
We generate horrible ppc code for this:
#define N 2000000
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index d7e94f75e123..fcf9eca80e96 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -8,8 +8,8 @@
/// \file
//===----------------------------------------------------------------------===//
-#ifndef AMDGPU_H
-#define AMDGPU_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPU_H
+#define LLVM_LIB_TARGET_R600_AMDGPU_H
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
@@ -38,21 +38,31 @@ FunctionPass *createAMDGPUCFGStructurizerPass();
// SI Passes
FunctionPass *createSITypeRewriter();
FunctionPass *createSIAnnotateControlFlowPass();
+FunctionPass *createSIFoldOperandsPass();
FunctionPass *createSILowerI1CopiesPass();
FunctionPass *createSIShrinkInstructionsPass();
+FunctionPass *createSILoadStoreOptimizerPass(TargetMachine &tm);
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
FunctionPass *createSIFixSGPRLiveRangesPass();
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSIInsertWaits(TargetMachine &tm);
+FunctionPass *createSIPrepareScratchRegs();
+
+void initializeSIFoldOperandsPass(PassRegistry &);
+extern char &SIFoldOperandsID;
void initializeSILowerI1CopiesPass(PassRegistry &);
extern char &SILowerI1CopiesID;
+void initializeSILoadStoreOptimizerPass(PassRegistry &);
+extern char &SILoadStoreOptimizerID;
+
// Passes common to R600 and SI
FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST);
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
+ModulePass *createAMDGPUAlwaysInlinePass();
/// \brief Creates an AMDGPU-specific Target Transformation Info pass.
ImmutablePass *
@@ -63,6 +73,7 @@ extern char &SIFixSGPRLiveRangesID;
extern Target TheAMDGPUTarget;
+extern Target TheGCNTarget;
namespace AMDGPU {
enum TargetIndex {
@@ -127,4 +138,4 @@ enum AddressSpaces {
} // namespace AMDGPUAS
-#endif // AMDGPU_H
+#endif
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index 5645f1a2322e..8a5ca613dc80 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -81,6 +81,17 @@ def FeatureCFALUBug : SubtargetFeature<"cfalubug",
"true",
"GPU has CF_ALU bug">;
+// XXX - This should probably be removed once enabled by default
+def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
+ "EnableLoadStoreOpt",
+ "true",
+ "Enable SI load/store optimizer pass">;
+
+def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
+ "FlatAddressSpace",
+ "true",
+ "Support flat address space">;
+
class SubtargetFeatureFetchLimit <string Value> :
SubtargetFeature <"fetch"#Value,
"TexVTXClauseSize",
@@ -135,16 +146,28 @@ def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
[Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
- FeatureWavefrontSize64]>;
+ FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
+
+def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
+ [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
+ FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
+
//===----------------------------------------------------------------------===//
def AMDGPUInstrInfo : InstrInfo {
let guessInstructionProperties = 1;
}
+def AMDGPUAsmParser : AsmParser {
+ // Some of the R600 registers have the same name, so this crashes.
+ // For example T0_XYZW and T0_XY both have the asm name T0.
+ let ShouldEmitMatchRegisterName = 0;
+}
+
def AMDGPU : Target {
// Pull in Instruction Info:
let InstructionSet = AMDGPUInstrInfo;
+ let AssemblyParsers = [AMDGPUAsmParser];
}
// Dummy Instruction itineraries for pseudo instructions
diff --git a/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp b/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp
new file mode 100644
index 000000000000..b545b456161f
--- /dev/null
+++ b/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp
@@ -0,0 +1,66 @@
+//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass marks all internal functions as always_inline and creates
+/// duplicates of all other functions a marks the duplicates as always_inline.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUAlwaysInline : public ModulePass {
+
+ static char ID;
+
+public:
+ AMDGPUAlwaysInline() : ModulePass(ID) { }
+ bool runOnModule(Module &M) override;
+ const char *getPassName() const override { return "AMDGPU Always Inline Pass"; }
+};
+
+} // End anonymous namespace
+
+char AMDGPUAlwaysInline::ID = 0;
+
+bool AMDGPUAlwaysInline::runOnModule(Module &M) {
+
+ std::vector<Function*> FuncsToClone;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ Function &F = *I;
+ if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty())
+ FuncsToClone.push_back(&F);
+ }
+
+ for (Function *F : FuncsToClone) {
+ ValueToValueMapTy VMap;
+ Function *NewFunc = CloneFunction(F, VMap, false);
+ NewFunc->setLinkage(GlobalValue::InternalLinkage);
+ F->getParent()->getFunctionList().push_back(NewFunc);
+ F->replaceAllUsesWith(NewFunc);
+ }
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ Function &F = *I;
+ if (F.hasLocalLinkage()) {
+ F.addFnAttr(Attribute::AlwaysInline);
+ }
+ }
+ return false;
+}
+
+ModulePass *llvm::createAMDGPUAlwaysInlinePass() {
+ return new AMDGPUAlwaysInline();
+}
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index 73faaa183581..624f3919b409 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -18,6 +18,7 @@
#include "AMDGPUAsmPrinter.h"
#include "AMDGPU.h"
+#include "AMDKernelCodeT.h"
#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600MachineFunctionInfo.h"
@@ -79,6 +80,7 @@ static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
extern "C" void LLVMInitializeR600AsmPrinter() {
TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
+ TargetRegistry::RegisterAsmPrinter(TheGCNTarget, createAMDGPUAsmPrinterPass);
}
AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
@@ -97,9 +99,13 @@ void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
}
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+
+ // The starting address of all shader programs must be 256 bytes aligned.
+ MF.setAlignment(8);
+
SetupMachineFunction(MF);
- OutStreamer.emitRawComment(Twine('@') + MF.getName() + Twine(':'));
+ EmitFunctionHeader();
MCContext &Context = getObjFileLowering().getContext();
const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config",
@@ -109,7 +115,12 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
- if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ if (STM.isAmdHsaOS()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+ getSIProgramInfo(KernelInfo, MF);
+ EmitAmdKernelCodeT(MF, KernelInfo);
+ OutStreamer.EmitCodeAlignment(2 << (MF.getAlignment() - 1));
+ } else if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
getSIProgramInfo(KernelInfo, MF);
EmitProgramInfoSI(MF, KernelInfo);
} else {
@@ -151,23 +162,18 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
}
}
- if (STM.dumpCode()) {
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- MF.dump();
-#endif
+ if (STM.dumpCode() && DisasmEnabled) {
- if (DisasmEnabled) {
- OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm",
- ELF::SHT_NOTE, 0,
- SectionKind::getReadOnly()));
+ OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm",
+ ELF::SHT_NOTE, 0,
+ SectionKind::getReadOnly()));
- for (size_t i = 0; i < DisasmLines.size(); ++i) {
- std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
- Comment += " ; " + HexLines[i] + "\n";
+ for (size_t i = 0; i < DisasmLines.size(); ++i) {
+ std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
+ Comment += " ; " + HexLines[i] + "\n";
- OutStreamer.EmitBytes(StringRef(DisasmLines[i]));
- OutStreamer.EmitBytes(StringRef(Comment));
- }
+ OutStreamer.EmitBytes(StringRef(DisasmLines[i]));
+ OutStreamer.EmitBytes(StringRef(Comment));
}
}
@@ -177,8 +183,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
unsigned MaxGPR = 0;
bool killPixel = false;
- const R600RegisterInfo *RI
- = static_cast<const R600RegisterInfo*>(TM.getRegisterInfo());
+ const R600RegisterInfo *RI = static_cast<const R600RegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
@@ -236,12 +242,15 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
const MachineFunction &MF) const {
+ const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
uint64_t CodeSize = 0;
unsigned MaxSGPR = 0;
unsigned MaxVGPR = 0;
bool VCCUsed = false;
- const SIRegisterInfo *RI
- = static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
+ bool FlatUsed = false;
+ const SIRegisterInfo *RI = static_cast<const SIRegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
for (const MachineBasicBlock &MBB : MF) {
for (const MachineInstr &MI : MBB) {
@@ -262,6 +271,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
reg == AMDGPU::VCC_HI) {
VCCUsed = true;
continue;
+ } else if (reg == AMDGPU::FLAT_SCR ||
+ reg == AMDGPU::FLAT_SCR_LO ||
+ reg == AMDGPU::FLAT_SCR_HI) {
+ FlatUsed = true;
+ continue;
}
switch (reg) {
@@ -275,7 +289,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
if (AMDGPU::SReg_32RegClass.contains(reg)) {
isSGPR = true;
width = 1;
- } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
+ } else if (AMDGPU::VGPR_32RegClass.contains(reg)) {
isSGPR = false;
width = 1;
} else if (AMDGPU::SReg_64RegClass.contains(reg)) {
@@ -322,9 +336,16 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
if (VCCUsed)
MaxSGPR += 2;
- ProgInfo.NumVGPR = MaxVGPR;
- ProgInfo.NumSGPR = MaxSGPR;
+ if (FlatUsed)
+ MaxSGPR += 2;
+
+ // We found the maximum register index. They start at 0, so add one to get the
+ // number of registers.
+ ProgInfo.NumVGPR = MaxVGPR + 1;
+ ProgInfo.NumSGPR = MaxSGPR + 1;
+ ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
+ ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
// register.
ProgInfo.FloatMode = getFPMode(MF);
@@ -338,22 +359,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
ProgInfo.ScratchSize = FrameInfo->estimateStackSize(MF);
+ ProgInfo.FlatUsed = FlatUsed;
+ ProgInfo.VCCUsed = VCCUsed;
ProgInfo.CodeLen = CodeSize;
-}
-
-void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
- const SIProgramInfo &KernelInfo) {
- const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-
- unsigned RsrcReg;
- switch (MFI->getShaderType()) {
- default: // Fall through
- case ShaderType::COMPUTE: RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break;
- case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break;
- case ShaderType::PIXEL: RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break;
- case ShaderType::VERTEX: RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
- }
unsigned LDSAlignShift;
if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
@@ -364,52 +372,188 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
LDSAlignShift = 9;
}
- unsigned LDSBlocks =
- RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
+ unsigned LDSSpillSize = MFI->LDSWaveSpillSize *
+ MFI->getMaximumWorkGroupSize(MF);
+
+ ProgInfo.LDSSize = MFI->LDSSize + LDSSpillSize;
+ ProgInfo.LDSBlocks =
+ RoundUpToAlignment(ProgInfo.LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
// Scratch is allocated in 256 dword blocks.
unsigned ScratchAlignShift = 10;
// We need to program the hardware with the amount of scratch memory that
- // is used by the entire wave. KernelInfo.ScratchSize is the amount of
+ // is used by the entire wave. ProgInfo.ScratchSize is the amount of
// scratch memory used per thread.
- unsigned ScratchBlocks =
- RoundUpToAlignment(KernelInfo.ScratchSize * STM.getWavefrontSize(),
+ ProgInfo.ScratchBlocks =
+ RoundUpToAlignment(ProgInfo.ScratchSize * STM.getWavefrontSize(),
1 << ScratchAlignShift) >> ScratchAlignShift;
+ ProgInfo.ComputePGMRSrc1 =
+ S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
+ S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
+ S_00B848_PRIORITY(ProgInfo.Priority) |
+ S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
+ S_00B848_PRIV(ProgInfo.Priv) |
+ S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
+ S_00B848_IEEE_MODE(ProgInfo.DebugMode) |
+ S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
+
+ ProgInfo.ComputePGMRSrc2 =
+ S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
+ S_00B84C_USER_SGPR(MFI->NumUserSGPRs) |
+ S_00B84C_TGID_X_EN(1) |
+ S_00B84C_TGID_Y_EN(1) |
+ S_00B84C_TGID_Z_EN(1) |
+ S_00B84C_TG_SIZE_EN(1) |
+ S_00B84C_TIDIG_COMP_CNT(2) |
+ S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks);
+}
+
+static unsigned getRsrcReg(unsigned ShaderType) {
+ switch (ShaderType) {
+ default: // Fall through
+ case ShaderType::COMPUTE: return R_00B848_COMPUTE_PGM_RSRC1;
+ case ShaderType::GEOMETRY: return R_00B228_SPI_SHADER_PGM_RSRC1_GS;
+ case ShaderType::PIXEL: return R_00B028_SPI_SHADER_PGM_RSRC1_PS;
+ case ShaderType::VERTEX: return R_00B128_SPI_SHADER_PGM_RSRC1_VS;
+ }
+}
+
+void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
+ const SIProgramInfo &KernelInfo) {
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ unsigned RsrcReg = getRsrcReg(MFI->getShaderType());
+
if (MFI->getShaderType() == ShaderType::COMPUTE) {
OutStreamer.EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
- const uint32_t ComputePGMRSrc1 =
- S_00B848_VGPRS(KernelInfo.NumVGPR / 4) |
- S_00B848_SGPRS(KernelInfo.NumSGPR / 8) |
- S_00B848_PRIORITY(KernelInfo.Priority) |
- S_00B848_FLOAT_MODE(KernelInfo.FloatMode) |
- S_00B848_PRIV(KernelInfo.Priv) |
- S_00B848_DX10_CLAMP(KernelInfo.DX10Clamp) |
- S_00B848_IEEE_MODE(KernelInfo.DebugMode) |
- S_00B848_IEEE_MODE(KernelInfo.IEEEMode);
-
- OutStreamer.EmitIntValue(ComputePGMRSrc1, 4);
+ OutStreamer.EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
- const uint32_t ComputePGMRSrc2 =
- S_00B84C_LDS_SIZE(LDSBlocks) |
- S_00B02C_SCRATCH_EN(ScratchBlocks > 0);
-
- OutStreamer.EmitIntValue(ComputePGMRSrc2, 4);
+ OutStreamer.EmitIntValue(KernelInfo.ComputePGMRSrc2, 4);
OutStreamer.EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
- OutStreamer.EmitIntValue(S_00B860_WAVESIZE(ScratchBlocks), 4);
+ OutStreamer.EmitIntValue(S_00B860_WAVESIZE(KernelInfo.ScratchBlocks), 4);
+
+ // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
+ // 0" comment but I don't see a corresponding field in the register spec.
} else {
OutStreamer.EmitIntValue(RsrcReg, 4);
- OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
- S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4);
+ OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
+ S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
}
if (MFI->getShaderType() == ShaderType::PIXEL) {
OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
- OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
+ OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
}
}
+
+void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
+ const SIProgramInfo &KernelInfo) const {
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+ amd_kernel_code_t header;
+
+ memset(&header, 0, sizeof(header));
+
+ header.amd_code_version_major = AMD_CODE_VERSION_MAJOR;
+ header.amd_code_version_minor = AMD_CODE_VERSION_MINOR;
+
+ header.struct_byte_size = sizeof(amd_kernel_code_t);
+
+ header.target_chip = STM.getAmdKernelCodeChipID();
+
+ header.kernel_code_entry_byte_offset = (1ULL << MF.getAlignment());
+
+ header.compute_pgm_resource_registers =
+ KernelInfo.ComputePGMRSrc1 |
+ (KernelInfo.ComputePGMRSrc2 << 32);
+
+ // Code Properties:
+ header.code_properties = AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
+ AMD_CODE_PROPERTY_IS_PTR64;
+
+ if (KernelInfo.FlatUsed)
+ header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+
+ if (KernelInfo.ScratchBlocks)
+ header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
+
+ header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
+ header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
+
+ // MFI->ABIArgOffset is the number of bytes for the kernel arguments
+ // plus 36. 36 is the number of bytes reserved at the begining of the
+ // input buffer to store work-group size information.
+ // FIXME: We should be adding the size of the implicit arguments
+ // to this value.
+ header.kernarg_segment_byte_size = MFI->ABIArgOffset;
+
+ header.wavefront_sgpr_count = KernelInfo.NumSGPR;
+ header.workitem_vgpr_count = KernelInfo.NumVGPR;
+
+ // FIXME: What values do I put for these alignments
+ header.kernarg_segment_alignment = 0;
+ header.group_segment_alignment = 0;
+ header.private_segment_alignment = 0;
+
+ header.code_type = 1; // HSA_EXT_CODE_KERNEL
+
+ header.wavefront_size = STM.getWavefrontSize();
+
+ if (isVerbose()) {
+ OutStreamer.emitRawComment("amd_code_version_major = " +
+ Twine(header.amd_code_version_major), false);
+ OutStreamer.emitRawComment("amd_code_version_minor = " +
+ Twine(header.amd_code_version_minor), false);
+ OutStreamer.emitRawComment("struct_byte_size = " +
+ Twine(header.struct_byte_size), false);
+ OutStreamer.emitRawComment("target_chip = " +
+ Twine(header.target_chip), false);
+ OutStreamer.emitRawComment(" compute_pgm_rsrc1: " +
+ Twine::utohexstr(KernelInfo.ComputePGMRSrc1), false);
+ OutStreamer.emitRawComment(" compute_pgm_rsrc2: " +
+ Twine::utohexstr(KernelInfo.ComputePGMRSrc2), false);
+ OutStreamer.emitRawComment("enable_sgpr_private_segment_buffer = " +
+ Twine((bool)(header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE)), false);
+ OutStreamer.emitRawComment("enable_sgpr_kernarg_segment_ptr = " +
+ Twine((bool)(header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR)), false);
+ OutStreamer.emitRawComment("private_element_size = 2 ", false);
+ OutStreamer.emitRawComment("is_ptr64 = " +
+ Twine((bool)(header.code_properties & AMD_CODE_PROPERTY_IS_PTR64)), false);
+ OutStreamer.emitRawComment("workitem_private_segment_byte_size = " +
+ Twine(header.workitem_private_segment_byte_size),
+ false);
+ OutStreamer.emitRawComment("workgroup_group_segment_byte_size = " +
+ Twine(header.workgroup_group_segment_byte_size),
+ false);
+ OutStreamer.emitRawComment("gds_segment_byte_size = " +
+ Twine(header.gds_segment_byte_size), false);
+ OutStreamer.emitRawComment("kernarg_segment_byte_size = " +
+ Twine(header.kernarg_segment_byte_size), false);
+ OutStreamer.emitRawComment("wavefront_sgpr_count = " +
+ Twine(header.wavefront_sgpr_count), false);
+ OutStreamer.emitRawComment("workitem_vgpr_count = " +
+ Twine(header.workitem_vgpr_count), false);
+ OutStreamer.emitRawComment("code_type = " + Twine(header.code_type), false);
+ OutStreamer.emitRawComment("wavefront_size = " +
+ Twine((int)header.wavefront_size), false);
+ OutStreamer.emitRawComment("optimization_level = " +
+ Twine(header.optimization_level), false);
+ OutStreamer.emitRawComment("hsail_profile = " +
+ Twine(header.hsail_profile), false);
+ OutStreamer.emitRawComment("hsail_machine_model = " +
+ Twine(header.hsail_machine_model), false);
+ OutStreamer.emitRawComment("hsail_version_major = " +
+ Twine(header.hsail_version_major), false);
+ OutStreamer.emitRawComment("hsail_version_minor = " +
+ Twine(header.hsail_version_minor), false);
+ }
+
+ OutStreamer.EmitBytes(StringRef((char*)&header, sizeof(header)));
+}
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
index 19907cfd013e..b360ae88f1e6 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.h
+++ b/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AMDGPU_ASMPRINTER_H
-#define AMDGPU_ASMPRINTER_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUASMPRINTER_H
+#define LLVM_LIB_TARGET_R600_AMDGPUASMPRINTER_H
#include "llvm/CodeGen/AsmPrinter.h"
#include <vector>
@@ -24,8 +24,8 @@ class AMDGPUAsmPrinter : public AsmPrinter {
private:
struct SIProgramInfo {
SIProgramInfo() :
- NumVGPR(0),
- NumSGPR(0),
+ VGPRBlocks(0),
+ SGPRBlocks(0),
Priority(0),
FloatMode(0),
Priv(0),
@@ -33,11 +33,19 @@ private:
DebugMode(0),
IEEEMode(0),
ScratchSize(0),
+ ComputePGMRSrc1(0),
+ LDSBlocks(0),
+ ScratchBlocks(0),
+ ComputePGMRSrc2(0),
+ NumVGPR(0),
+ NumSGPR(0),
+ FlatUsed(false),
+ VCCUsed(false),
CodeLen(0) {}
// Fields set in PGM_RSRC1 pm4 packet.
- uint32_t NumVGPR;
- uint32_t NumSGPR;
+ uint32_t VGPRBlocks;
+ uint32_t SGPRBlocks;
uint32_t Priority;
uint32_t FloatMode;
uint32_t Priv;
@@ -46,7 +54,21 @@ private:
uint32_t IEEEMode;
uint32_t ScratchSize;
+ uint64_t ComputePGMRSrc1;
+
+ // Fields set in PGM_RSRC2 pm4 packet.
+ uint32_t LDSBlocks;
+ uint32_t ScratchBlocks;
+
+ uint64_t ComputePGMRSrc2;
+
+ uint32_t NumVGPR;
+ uint32_t NumSGPR;
+ uint32_t LDSSize;
+ bool FlatUsed;
+
// Bonus information for debugging.
+ bool VCCUsed;
uint64_t CodeLen;
};
@@ -59,6 +81,8 @@ private:
/// can correctly setup the GPU state.
void EmitProgramInfoR600(const MachineFunction &MF);
void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo);
+ void EmitAmdKernelCodeT(const MachineFunction &MF,
+ const SIProgramInfo &KernelInfo) const;
public:
explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer);
@@ -82,4 +106,4 @@ protected:
} // End anonymous llvm
-#endif //AMDGPU_ASMPRINTER_H
+#endif
diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td
index 3586c8826908..6ffa7a083583 100644
--- a/lib/Target/R600/AMDGPUCallingConv.td
+++ b/lib/Target/R600/AMDGPUCallingConv.td
@@ -59,16 +59,24 @@ def CC_AMDGPU_Kernel : CallingConv<[
]>;
def CC_AMDGPU : CallingConv<[
- CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().getGeneration() >= "
- "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
- "State.getMachineFunction().getInfo<SIMachineFunctionInfo>()->"#
- "getShaderType() == ShaderType::COMPUTE", CCDelegateTo<CC_AMDGPU_Kernel>>,
- CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().getGeneration() < "
- "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
- "State.getMachineFunction().getInfo<R600MachineFunctionInfo>()->"
- "getShaderType() == ShaderType::COMPUTE", CCDelegateTo<CC_AMDGPU_Kernel>>,
- CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>()"#
- ".getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>>,
- CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>()"#
- ".getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_R600>>
+ CCIf<"static_cast<const AMDGPUSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).getGeneration() >="
+ "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
+ "State.getMachineFunction().getInfo<SIMachineFunctionInfo>()"
+ "->getShaderType() == ShaderType::COMPUTE",
+ CCDelegateTo<CC_AMDGPU_Kernel>>,
+ CCIf<"static_cast<const AMDGPUSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).getGeneration() < "
+ "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
+ "State.getMachineFunction().getInfo<R600MachineFunctionInfo>()"
+ "->getShaderType() == ShaderType::COMPUTE",
+ CCDelegateTo<CC_AMDGPU_Kernel>>,
+ CCIf<"static_cast<const AMDGPUSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).getGeneration() >= "
+ "AMDGPUSubtarget::SOUTHERN_ISLANDS",
+ CCDelegateTo<CC_SI>>,
+ CCIf<"static_cast<const AMDGPUSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).getGeneration() < "
+ "AMDGPUSubtarget::SOUTHERN_ISLANDS",
+ CCDelegateTo<CC_R600>>
]>;
diff --git a/lib/Target/R600/AMDGPUFrameLowering.h b/lib/Target/R600/AMDGPUFrameLowering.h
index d18ede5004e1..15a6636a1aea 100644
--- a/lib/Target/R600/AMDGPUFrameLowering.h
+++ b/lib/Target/R600/AMDGPUFrameLowering.h
@@ -12,8 +12,8 @@
/// machine.
//
//===----------------------------------------------------------------------===//
-#ifndef AMDILFRAME_LOWERING_H
-#define AMDILFRAME_LOWERING_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUFRAMELOWERING_H
+#define LLVM_LIB_TARGET_R600_AMDGPUFRAMELOWERING_H
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -42,4 +42,4 @@ public:
bool hasFP(const MachineFunction &MF) const override;
};
} // namespace llvm
-#endif // AMDILFRAME_LOWERING_H
+#endif
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index cc17b7ec6183..eaa506db96c3 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -65,6 +65,7 @@ private:
static bool checkPrivateAddress(const MachineMemOperand *Op);
static bool isGlobalStore(const StoreSDNode *N);
+ static bool isFlatStore(const StoreSDNode *N);
static bool isPrivateStore(const StoreSDNode *N);
static bool isLocalStore(const StoreSDNode *N);
static bool isRegionStore(const StoreSDNode *N);
@@ -72,30 +73,49 @@ private:
bool isCPLoad(const LoadSDNode *N) const;
bool isConstantLoad(const LoadSDNode *N, int cbID) const;
bool isGlobalLoad(const LoadSDNode *N) const;
+ bool isFlatLoad(const LoadSDNode *N) const;
bool isParamLoad(const LoadSDNode *N) const;
bool isPrivateLoad(const LoadSDNode *N) const;
bool isLocalLoad(const LoadSDNode *N) const;
bool isRegionLoad(const LoadSDNode *N) const;
- /// \returns True if the current basic block being selected is at control
- /// flow depth 0. Meaning that the current block dominates the
- // exit block.
- bool isCFDepth0() const;
-
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
SDValue& Offset);
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr, SDValue &Offset,
- SDValue &ImmOffset) const;
+ bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
+ unsigned OffsetBits) const;
+ bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
+ bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+ SDValue &Offset1) const;
+ void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+ SDValue &SOffset, SDValue &Offset, SDValue &Offen,
+ SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
+ SDValue &TFE) const;
+ bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+ SDValue &Offset) const;
+ bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
+ SDValue &VAddr, SDValue &Offset,
+ SDValue &SLC) const;
bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
SDValue &SOffset, SDValue &ImmOffset) const;
- bool SelectMUBUFAddr32(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &Offset, SDValue &Offen,
- SDValue &Idxen, SDValue &GLC, SDValue &SLC,
+ bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
+ SDValue &Offset, SDValue &GLC, SDValue &SLC,
SDValue &TFE) const;
+ bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
+ SDValue &Offset, SDValue &GLC) const;
+ SDNode *SelectAddrSpaceCast(SDNode *N);
+ bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp, SDValue &Omod) const;
+
+ bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Omod) const;
+ bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp,
+ SDValue &Omod) const;
SDNode *SelectADD_SUB_I64(SDNode *N);
SDNode *SelectDIV_SCALE(SDNode *N);
@@ -135,7 +155,8 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
switch (N->getMachineOpcode()) {
default: {
- const MCInstrDesc &Desc = TM.getInstrInfo()->get(N->getMachineOpcode());
+ const MCInstrDesc &Desc =
+ TM.getSubtargetImpl()->getInstrInfo()->get(N->getMachineOpcode());
unsigned OpIdx = Desc.getNumDefs() + OpNo;
if (OpIdx >= Desc.getNumOperands())
return nullptr;
@@ -143,15 +164,17 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
if (RegClass == -1)
return nullptr;
- return TM.getRegisterInfo()->getRegClass(RegClass);
+ return TM.getSubtargetImpl()->getRegisterInfo()->getRegClass(RegClass);
}
case AMDGPU::REG_SEQUENCE: {
unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- const TargetRegisterClass *SuperRC = TM.getRegisterInfo()->getRegClass(RCID);
+ const TargetRegisterClass *SuperRC =
+ TM.getSubtargetImpl()->getRegisterInfo()->getRegClass(RCID);
SDValue SubRegOp = N->getOperand(OpNo + 1);
unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
- return TM.getRegisterInfo()->getSubClassWithSubReg(SuperRC, SubRegIdx);
+ return TM.getSubtargetImpl()->getRegisterInfo()->getSubClassWithSubReg(
+ SuperRC, SubRegIdx);
}
}
}
@@ -239,10 +262,10 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
case AMDGPUISD::BUILD_VERTICAL_VECTOR:
case ISD::BUILD_VECTOR: {
unsigned RegClassID;
- const AMDGPURegisterInfo *TRI =
- static_cast<const AMDGPURegisterInfo*>(TM.getRegisterInfo());
- const SIRegisterInfo *SIRI =
- static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
+ const AMDGPURegisterInfo *TRI = static_cast<const AMDGPURegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
+ const SIRegisterInfo *SIRI = static_cast<const SIRegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
EVT VT = N->getValueType(0);
unsigned NumVectorElts = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
@@ -263,7 +286,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
}
switch(NumVectorElts) {
- case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID :
+ case 1: RegClassID = UseVReg ? AMDGPU::VGPR_32RegClassID :
AMDGPU::SReg_32RegClassID;
break;
case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
@@ -470,7 +493,16 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
case AMDGPUISD::DIV_SCALE: {
return SelectDIV_SCALE(N);
}
+ case ISD::CopyToReg: {
+ const SITargetLowering& Lowering =
+ *static_cast<const SITargetLowering*>(getTargetLowering());
+ Lowering.legalizeTargetIndependentNode(N, *CurDAG);
+ break;
+ }
+ case ISD::ADDRSPACECAST:
+ return SelectAddrSpaceCast(N);
}
+
return SelectCode(N);
}
@@ -508,6 +540,10 @@ bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
}
+bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) {
+ return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
+}
+
bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
}
@@ -539,6 +575,10 @@ bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const {
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
}
+bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const {
+ return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
+}
+
bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const {
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
}
@@ -568,23 +608,16 @@ bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
const Value *MemVal = N->getMemOperand()->getValue();
if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
!checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
+ !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) &&
!checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
!checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
!checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
- !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)){
+ !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) {
return true;
}
return false;
}
-bool AMDGPUDAGToDAGISel::isCFDepth0() const {
- // FIXME: Figure out a way to use DominatorTree analysis here.
- const BasicBlock *CurBlock = FuncInfo->MBB->getBasicBlock();
- const Function *Fn = FuncInfo->Fn;
- return &Fn->front() == CurBlock || &Fn->back() == CurBlock;
-}
-
-
const char *AMDGPUDAGToDAGISel::getPassName() const {
return "AMDGPU DAG->DAG Pattern Instruction Selection";
}
@@ -687,14 +720,9 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
- unsigned Opc = IsAdd ? AMDGPU::S_ADD_I32 : AMDGPU::S_SUB_I32;
+ unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
- if (!isCFDepth0()) {
- Opc = IsAdd ? AMDGPU::V_ADD_I32_e32 : AMDGPU::V_SUB_I32_e32;
- CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e32 : AMDGPU::V_SUBB_U32_e32;
- }
-
SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
SDValue Carry(AddLo, 1);
SDNode *AddHi
@@ -721,34 +749,134 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
-
+ const SDValue False = CurDAG->getTargetConstant(0, MVT::i1);
SDValue Ops[] = {
- N->getOperand(0),
- N->getOperand(1),
- N->getOperand(2),
- Zero,
- Zero,
- Zero,
- Zero
+ Zero, // src0_modifiers
+ N->getOperand(0), // src0
+ Zero, // src1_modifiers
+ N->getOperand(1), // src1
+ Zero, // src2_modifiers
+ N->getOperand(2), // src2
+ False, // clamp
+ Zero // omod
};
return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
}
-static SDValue wrapAddr64Rsrc(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
- return SDValue(DAG->getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::v4i32,
- Ptr), 0);
+bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
+ unsigned OffsetBits) const {
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
+ (OffsetBits == 8 && !isUInt<8>(Offset)))
+ return false;
+
+ if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
+ return true;
+
+ // On Southern Islands instruction with a negative base value and an offset
+ // don't seem to work.
+ return CurDAG->SignBitIsZero(Base);
+}
+
+bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ SDValue N0 = Addr.getOperand(0);
+ SDValue N1 = Addr.getOperand(1);
+ ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
+ if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
+ // (add n0, c0)
+ Base = N0;
+ Offset = N1;
+ return true;
+ }
+ }
+
+ // If we have a constant address, prefer to put the constant into the
+ // offset. This can save moves to load the constant address since multiple
+ // operations can share the zero base address register, and enables merging
+ // into read2 / write2 instructions.
+ if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+ if (isUInt<16>(CAddr->getZExtValue())) {
+ SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
+ MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
+ SDLoc(Addr), MVT::i32, Zero);
+ Base = SDValue(MovZero, 0);
+ Offset = Addr;
+ return true;
+ }
+ }
+
+ // default case
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i16);
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
+ SDValue &Offset0,
+ SDValue &Offset1) const {
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ SDValue N0 = Addr.getOperand(0);
+ SDValue N1 = Addr.getOperand(1);
+ ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
+ unsigned DWordOffset0 = C1->getZExtValue() / 4;
+ unsigned DWordOffset1 = DWordOffset0 + 1;
+ // (add n0, c0)
+ if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
+ Base = N0;
+ Offset0 = CurDAG->getTargetConstant(DWordOffset0, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(DWordOffset1, MVT::i8);
+ return true;
+ }
+ }
+
+ if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+ unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
+ unsigned DWordOffset1 = DWordOffset0 + 1;
+ assert(4 * DWordOffset0 == CAddr->getZExtValue());
+
+ if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
+ SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
+ MachineSDNode *MovZero
+ = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
+ SDLoc(Addr), MVT::i32, Zero);
+ Base = SDValue(MovZero, 0);
+ Offset0 = CurDAG->getTargetConstant(DWordOffset0, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(DWordOffset1, MVT::i8);
+ return true;
+ }
+ }
+
+ // default case
+ Base = Addr;
+ Offset0 = CurDAG->getTargetConstant(0, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(1, MVT::i8);
+ return true;
}
static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
return isUInt<12>(Imm->getZExtValue());
}
-bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr,
- SDValue &Offset,
- SDValue &ImmOffset) const {
+void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
+ SDValue &VAddr, SDValue &SOffset,
+ SDValue &Offset, SDValue &Offen,
+ SDValue &Idxen, SDValue &Addr64,
+ SDValue &GLC, SDValue &SLC,
+ SDValue &TFE) const {
SDLoc DL(Addr);
+ GLC = CurDAG->getTargetConstant(0, MVT::i1);
+ SLC = CurDAG->getTargetConstant(0, MVT::i1);
+ TFE = CurDAG->getTargetConstant(0, MVT::i1);
+
+ Idxen = CurDAG->getTargetConstant(0, MVT::i1);
+ Offen = CurDAG->getTargetConstant(0, MVT::i1);
+ Addr64 = CurDAG->getTargetConstant(0, MVT::i1);
+ SOffset = CurDAG->getTargetConstant(0, MVT::i32);
+
if (CurDAG->isBaseWithConstantOffset(Addr)) {
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
@@ -757,57 +885,69 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr,
if (isLegalMUBUFImmOffset(C1)) {
if (N0.getOpcode() == ISD::ADD) {
- // (add (add N2, N3), C1)
+ // (add (add N2, N3), C1) -> addr64
SDValue N2 = N0.getOperand(0);
SDValue N3 = N0.getOperand(1);
- Ptr = wrapAddr64Rsrc(CurDAG, DL, N2);
- Offset = N3;
- ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
- return true;
+ Addr64 = CurDAG->getTargetConstant(1, MVT::i1);
+ Ptr = N2;
+ VAddr = N3;
+ Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
+ return;
}
- // (add N0, C1)
- Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getTargetConstant(0, MVT::i64));;
- Offset = N0;
- ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
- return true;
+ // (add N0, C1) -> offset
+ VAddr = CurDAG->getTargetConstant(0, MVT::i32);
+ Ptr = N0;
+ Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
+ return;
}
}
if (Addr.getOpcode() == ISD::ADD) {
- // (add N0, N1)
+ // (add N0, N1) -> addr64
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
- Ptr = wrapAddr64Rsrc(CurDAG, DL, N0);
- Offset = N1;
- ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
- return true;
+ Addr64 = CurDAG->getTargetConstant(1, MVT::i1);
+ Ptr = N0;
+ VAddr = N1;
+ Offset = CurDAG->getTargetConstant(0, MVT::i16);
+ return;
}
- // default case
- Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getConstant(0, MVT::i64));
- Offset = Addr;
- ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
- return true;
+ // default case -> offset
+ VAddr = CurDAG->getTargetConstant(0, MVT::i32);
+ Ptr = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i16);
+
}
-/// \brief Return a resource descriptor with the 'Add TID' bit enabled
-/// The TID (Thread ID) is multipled by the stride value (bits [61:48]
-/// of the resource descriptor) to create an offset, which is added to the
-/// resource ponter.
-static SDValue buildScratchRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
+bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
+ SDValue &VAddr,
+ SDValue &Offset) const {
+ SDValue Ptr, SOffset, Offen, Idxen, Addr64, GLC, SLC, TFE;
+
+ SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
+ GLC, SLC, TFE);
+
+ ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
+ if (C->getSExtValue()) {
+ SDLoc DL(Addr);
+
+ const SITargetLowering& Lowering =
+ *static_cast<const SITargetLowering*>(getTargetLowering());
+
+ SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
+ return true;
+ }
- uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
- 0xffffffff;
+ return false;
+}
- SDValue PtrLo = DAG->getTargetExtractSubreg(AMDGPU::sub0, DL, MVT::i32, Ptr);
- SDValue PtrHi = DAG->getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr);
- SDValue DataLo = DAG->getTargetConstant(
- Rsrc & APInt::getAllOnesValue(32).getZExtValue(), MVT::i32);
- SDValue DataHi = DAG->getTargetConstant(Rsrc >> 32, MVT::i32);
+bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
+ SDValue &VAddr, SDValue &Offset,
+ SDValue &SLC) const {
+ SLC = CurDAG->getTargetConstant(0, MVT::i1);
- const SDValue Ops[] = { PtrLo, PtrHi, DataLo, DataHi };
- return SDValue(DAG->getMachineNode(AMDGPU::SI_BUFFER_RSRC, DL,
- MVT::v4i32, Ops), 0);
+ return SelectMUBUFAddr64(Addr, SRsrc, VAddr, Offset);
}
bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
@@ -816,16 +956,23 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
SDLoc DL(Addr);
MachineFunction &MF = CurDAG->getMachineFunction();
- const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
-
+ const SITargetLowering& Lowering =
+ *static_cast<const SITargetLowering*>(getTargetLowering());
unsigned ScratchPtrReg =
TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
unsigned ScratchOffsetReg =
TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
+ Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass,
+ ScratchOffsetReg, MVT::i32);
- Rsrc = buildScratchRSRC(CurDAG, DL, CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, MRI.getLiveInVirtReg(ScratchPtrReg), MVT::i64));
+ SDValue ScratchPtr =
+ CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+ MRI.getLiveInVirtReg(ScratchPtrReg), MVT::i64);
+ Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0);
SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);
@@ -863,20 +1010,150 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
return true;
}
-bool AMDGPUDAGToDAGISel::SelectMUBUFAddr32(SDValue Addr, SDValue &SRsrc,
- SDValue &VAddr, SDValue &SOffset,
- SDValue &Offset, SDValue &Offen,
- SDValue &Idxen, SDValue &GLC,
- SDValue &SLC, SDValue &TFE) const {
+bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
+ SDValue &SOffset, SDValue &Offset,
+ SDValue &GLC, SDValue &SLC,
+ SDValue &TFE) const {
+ SDValue Ptr, VAddr, Offen, Idxen, Addr64;
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(Subtarget.getInstrInfo());
- GLC = CurDAG->getTargetConstant(0, MVT::i1);
- SLC = CurDAG->getTargetConstant(0, MVT::i1);
- TFE = CurDAG->getTargetConstant(0, MVT::i1);
+ SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
+ GLC, SLC, TFE);
- Idxen = CurDAG->getTargetConstant(0, MVT::i1);
- Offen = CurDAG->getTargetConstant(1, MVT::i1);
+ if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
+ !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
+ !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
+ uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
+ APInt::getAllOnesValue(32).getZExtValue(); // Size
+ SDLoc DL(Addr);
+
+ const SITargetLowering& Lowering =
+ *static_cast<const SITargetLowering*>(getTargetLowering());
+
+ SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
+ return true;
+ }
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
+ SDValue &Soffset, SDValue &Offset,
+ SDValue &GLC) const {
+ SDValue SLC, TFE;
+
+ return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
+}
+
+// FIXME: This is incorrect and only enough to be able to compile.
+SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
+ AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
+ SDLoc DL(N);
+
+ assert(Subtarget.hasFlatAddressSpace() &&
+ "addrspacecast only supported with flat address space!");
+
+ assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
+ ASC->getDestAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) &&
+ "Cannot cast address space to / from constant address!");
+
+ assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
+ ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) &&
+ "Can only cast to / from flat address space!");
+
+ // The flat instructions read the address as the index of the VGPR holding the
+ // address, so casting should just be reinterpreting the base VGPR, so just
+ // insert trunc / bitcast / zext.
+
+ SDValue Src = ASC->getOperand(0);
+ EVT DestVT = ASC->getValueType(0);
+ EVT SrcVT = Src.getValueType();
+
+ unsigned SrcSize = SrcVT.getSizeInBits();
+ unsigned DestSize = DestVT.getSizeInBits();
+
+ if (SrcSize > DestSize) {
+ assert(SrcSize == 64 && DestSize == 32);
+ return CurDAG->getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG,
+ DL,
+ DestVT,
+ Src,
+ CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32));
+ }
+
+
+ if (DestSize > SrcSize) {
+ assert(SrcSize == 32 && DestSize == 64);
+
+ // FIXME: This is probably wrong, we should never be defining
+ // a register class with both VGPRs and SGPRs
+ SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, MVT::i32);
+
+ const SDValue Ops[] = {
+ RC,
+ Src,
+ CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
+ SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
+ CurDAG->getConstant(0, MVT::i32)), 0),
+ CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32)
+ };
+
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+ SDLoc(N), N->getValueType(0), Ops);
+ }
+
+ assert(SrcSize == 64 && DestSize == 64);
+ return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode();
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+
+ unsigned Mods = 0;
+
+ Src = In;
+
+ if (Src.getOpcode() == ISD::FNEG) {
+ Mods |= SISrcMods::NEG;
+ Src = Src.getOperand(0);
+ }
+
+ if (Src.getOpcode() == ISD::FABS) {
+ Mods |= SISrcMods::ABS;
+ Src = Src.getOperand(0);
+ }
+
+ SrcMods = CurDAG->getTargetConstant(Mods, MVT::i32);
+
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
+ SDValue &SrcMods, SDValue &Clamp,
+ SDValue &Omod) const {
+ // FIXME: Handle Clamp and Omod
+ Clamp = CurDAG->getTargetConstant(0, MVT::i32);
+ Omod = CurDAG->getTargetConstant(0, MVT::i32);
+
+ return SelectVOP3Mods(In, Src, SrcMods);
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
+ SDValue &SrcMods,
+ SDValue &Omod) const {
+ // FIXME: Handle Omod
+ Omod = CurDAG->getTargetConstant(0, MVT::i32);
+
+ return SelectVOP3Mods(In, Src, SrcMods);
+}
- return SelectMUBUFScratch(Addr, SRsrc, VAddr, SOffset, Offset);
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
+ SDValue &SrcMods,
+ SDValue &Clamp,
+ SDValue &Omod) const {
+ Clamp = Omod = CurDAG->getTargetConstant(0, MVT::i32);
+ return SelectVOP3Mods(In, Src, SrcMods);
}
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 5a46297b6032..206050d54a02 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -103,7 +103,7 @@ EVT AMDGPUTargetLowering::getEquivalentLoadRegType(LLVMContext &Ctx, EVT VT) {
}
AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
- TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+ TargetLowering(TM) {
Subtarget = &TM.getSubtarget<AMDGPUSubtarget>();
@@ -130,6 +130,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FROUND, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::FREM, MVT::f32, Custom);
+ setOperationAction(ISD::FREM, MVT::f64, Custom);
+
// Lower floating point store/load to integer store/load to reduce the number
// of patterns in tablegen.
setOperationAction(ISD::STORE, MVT::f32, Promote);
@@ -213,18 +216,28 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
- setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand);
- setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand);
- setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand);
- setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand);
- setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand);
+ // There are no 64-bit extloads. These should be done as a 32-bit extload and
+ // an extension to 64-bit.
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, MVT::i64, VT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i64, VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, VT, Expand);
+ }
+
+ for (MVT VT : MVT::integer_vector_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i8, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i8, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i16, Expand);
+ }
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
@@ -243,7 +256,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
@@ -282,6 +296,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::UDIV, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
if (!Subtarget->hasFFBH())
@@ -310,7 +327,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SUB, VT, Expand);
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
- // TODO: Implement custom UREM / SREM routines.
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
@@ -342,12 +358,15 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
for (MVT VT : FloatVectorTypes) {
setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FMINNUM, VT, Expand);
+ setOperationAction(ISD::FMAXNUM, VT, Expand);
setOperationAction(ISD::FADD, VT, Expand);
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
@@ -370,22 +389,29 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
setTargetDAGCombine(ISD::MUL);
+ setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::STORE);
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
setSchedulingPreference(Sched::RegPressure);
setJumpIsExpensive(true);
+ // SI at least has hardware support for floating point exceptions, but no way
+ // of using or handling them is implemented. They are also optional in OpenCL
+ // (Section 7.3)
+ setHasFloatingPointExceptions(false);
+
setSelectIsExpensive(false);
PredictableSelectIsExpensive = false;
// There are no integer divide instructions, and these expand to a pretty
// large sequence of instructions.
setIntDivIsCheap(false);
- setPow2DivIsCheap(false);
-
- // TODO: Investigate this when 64-bit divides are implemented.
- addBypassSlowDiv(64, 32);
+ setPow2SDivIsCheap(false);
+ setFsqrtIsCheap(true);
// FIXME: Need to really handle these.
MaxStoresPerMemcpy = 4096;
@@ -418,6 +444,29 @@ bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
}
+bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
+ ISD::LoadExtType,
+ EVT NewVT) const {
+
+ unsigned NewSize = NewVT.getStoreSizeInBits();
+
+ // If we are reducing to a 32-bit load, this is always better.
+ if (NewSize == 32)
+ return true;
+
+ EVT OldVT = N->getValueType(0);
+ unsigned OldSize = OldVT.getStoreSizeInBits();
+
+ // Don't produce extloads from sub 32-bit types. SI doesn't have scalar
+ // extloads, so doing one requires using a buffer_load. In cases where we
+ // still couldn't use a scalar load, using the wider load shouldn't really
+ // hurt anything.
+
+ // If the old size already had to be an extload, there's no harm in continuing
+ // to reduce the width.
+ return (OldSize < 32);
+}
+
bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
EVT CastTy) const {
if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
@@ -431,18 +480,30 @@ bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
(LScalarSize < 32));
}
+// SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also
+// profitable with the expansion for 64-bit since it's generally good to
+// speculate things.
+// FIXME: These should really have the size as a parameter.
+bool AMDGPUTargetLowering::isCheapToSpeculateCttz() const {
+ return true;
+}
+
+bool AMDGPUTargetLowering::isCheapToSpeculateCtlz() const {
+ return true;
+}
+
//===---------------------------------------------------------------------===//
// Target Properties
//===---------------------------------------------------------------------===//
bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
assert(VT.isFloatingPoint());
- return VT == MVT::f32;
+ return VT == MVT::f32 || VT == MVT::f64;
}
bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
assert(VT.isFloatingPoint());
- return VT == MVT::f32;
+ return VT == MVT::f32 || VT == MVT::f64;
}
bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
@@ -542,16 +603,18 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::SDIV: return LowerSDIV(Op, DAG);
- case ISD::SREM: return LowerSREM(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
+ case ISD::FREM: return LowerFREM(Op, DAG);
case ISD::FCEIL: return LowerFCEIL(Op, DAG);
case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
case ISD::FRINT: return LowerFRINT(Op, DAG);
case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+ case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
}
return Op;
}
@@ -606,7 +669,7 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
const SDValue &InitPtr,
SDValue Chain,
SelectionDAG &DAG) const {
- const DataLayout *TD = getTargetMachine().getDataLayout();
+ const DataLayout *TD = getTargetMachine().getSubtargetImpl()->getDataLayout();
SDLoc DL(InitPtr);
Type *InitTy = Init->getType();
@@ -679,22 +742,35 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
llvm_unreachable("Unhandled constant initializer");
}
+static bool hasDefinedInitializer(const GlobalValue *GV) {
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar || !GVar->hasInitializer())
+ return false;
+
+ if (isa<UndefValue>(GVar->getInitializer()))
+ return false;
+
+ return true;
+}
+
SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
SDValue Op,
SelectionDAG &DAG) const {
- const DataLayout *TD = getTargetMachine().getDataLayout();
+ const DataLayout *TD = getTargetMachine().getSubtargetImpl()->getDataLayout();
GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = G->getGlobal();
switch (G->getAddressSpace()) {
- default: llvm_unreachable("Global Address lowering not implemented for this "
- "address space");
case AMDGPUAS::LOCAL_ADDRESS: {
// XXX: What does the value of G->getOffset() mean?
assert(G->getOffset() == 0 &&
"Do not know what to do with an non-zero offset");
+ // TODO: We could emit code to handle the initialization somewhere.
+ if (hasDefinedInitializer(GV))
+ break;
+
unsigned Offset;
if (MFI->LocalMemoryObjects.count(GV) == 0) {
uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
@@ -706,7 +782,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
Offset = MFI->LocalMemoryObjects[GV];
}
- return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace()));
+ return DAG.getConstant(Offset, getPointerTy(AMDGPUAS::LOCAL_ADDRESS));
}
case AMDGPUAS::CONSTANT_ADDRESS: {
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
@@ -748,6 +824,12 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT);
}
}
+
+ const Function &Fn = *DAG.getMachineFunction().getFunction();
+ DiagnosticInfoUnsupported BadInit(Fn,
+ "initializer for address space");
+ DAG.getContext()->diagnose(BadInit);
+ return SDValue();
}
SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
@@ -778,8 +860,8 @@ SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- const AMDGPUFrameLowering *TFL =
- static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
+ const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
+ getTargetMachine().getSubtargetImpl()->getFrameLowering());
FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
@@ -821,13 +903,21 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// first parameter must be the same as the first instruction.
SDValue Numerator = Op.getOperand(1);
SDValue Denominator = Op.getOperand(2);
+
+ // Note this order is opposite of the machine instruction's operations,
+ // which is s0.f = Quotient, s1.f = Denominator, s2.f = Numerator. The
+ // intrinsic has the numerator as the first operand to match a normal
+ // division operation.
+
SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator;
- return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, VT,
- Src0, Denominator, Numerator);
+ return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0,
+ Denominator, Numerator);
}
case Intrinsic::AMDGPU_div_fmas:
+ // FIXME: Dropping bool parameter. Work is needed to support the implicit
+ // read from VCC.
return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
@@ -849,7 +939,23 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
case Intrinsic::AMDGPU_rsq_clamped:
- return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ Type *Type = VT.getTypeForEVT(*DAG.getContext());
+ APFloat Max = APFloat::getLargest(Type->getFltSemantics());
+ APFloat Min = APFloat::getLargest(Type->getFltSemantics(), true);
+
+ SDValue Rsq = DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
+ SDValue Tmp = DAG.getNode(ISD::FMINNUM, DL, VT, Rsq,
+ DAG.getConstantFP(Max, VT));
+ return DAG.getNode(ISD::FMAXNUM, DL, VT, Tmp,
+ DAG.getConstantFP(Min, VT));
+ } else {
+ return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
+ }
+
+ case Intrinsic::AMDGPU_ldexp:
+ return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_imax:
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
@@ -918,6 +1024,10 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case AMDGPUIntrinsic::AMDGPU_brev:
return DAG.getNode(AMDGPUISD::BREV, DL, VT, Op.getOperand(1));
+ case Intrinsic::AMDGPU_class:
+ return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT,
+ Op.getOperand(1), Op.getOperand(2));
+
case AMDGPUIntrinsic::AMDIL_exp: // Legacy name.
return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
@@ -956,22 +1066,21 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
}
/// \brief Generate Min/Max node
-SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N,
- SelectionDAG &DAG) const {
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
-
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- SDValue True = N->getOperand(2);
- SDValue False = N->getOperand(3);
- SDValue CC = N->getOperand(4);
+SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL,
+ EVT VT,
+ SDValue LHS,
+ SDValue RHS,
+ SDValue True,
+ SDValue False,
+ SDValue CC,
+ DAGCombinerInfo &DCI) const {
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ return SDValue();
- if (VT != MVT::f32 ||
- !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
+ if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
return SDValue();
- }
+ SelectionDAG &DAG = DCI.DAG;
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
switch (CCOpcode) {
case ISD::SETOEQ:
@@ -986,24 +1095,49 @@ SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N,
case ISD::SETTRUE2:
case ISD::SETUO:
case ISD::SETO:
- llvm_unreachable("Operation should already be optimised!");
+ break;
case ISD::SETULE:
- case ISD::SETULT:
+ case ISD::SETULT: {
+ if (LHS == True)
+ return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
+ return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
+ }
case ISD::SETOLE:
case ISD::SETOLT:
case ISD::SETLE:
case ISD::SETLT: {
- unsigned Opc = (LHS == True) ? AMDGPUISD::FMIN : AMDGPUISD::FMAX;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
+ // Ordered. Assume ordered for undefined.
+
+ // Only do this after legalization to avoid interfering with other combines
+ // which might occur.
+ if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
+ !DCI.isCalledByLegalizer())
+ return SDValue();
+
+ // We need to permute the operands to get the correct NaN behavior. The
+ // selected operand is the second one based on the failing compare with NaN,
+ // so permute it based on the compare type the hardware uses.
+ if (LHS == True)
+ return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
+ return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
+ }
+ case ISD::SETUGE:
+ case ISD::SETUGT: {
+ if (LHS == True)
+ return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
+ return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
}
case ISD::SETGT:
case ISD::SETGE:
- case ISD::SETUGE:
case ISD::SETOGE:
- case ISD::SETUGT:
case ISD::SETOGT: {
- unsigned Opc = (LHS == True) ? AMDGPUISD::FMAX : AMDGPUISD::FMIN;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
+ if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
+ !DCI.isCalledByLegalizer())
+ return SDValue();
+
+ if (LHS == True)
+ return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
+ return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
}
case ISD::SETCC_INVALID:
llvm_unreachable("Invalid setcc condcode!");
@@ -1011,12 +1145,53 @@ SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N,
return SDValue();
}
-SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
- SelectionDAG &DAG) const {
- LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
- EVT MemEltVT = Load->getMemoryVT().getVectorElementType();
+/// \brief Generate Min/Max node
+SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL,
+ EVT VT,
+ SDValue LHS,
+ SDValue RHS,
+ SDValue True,
+ SDValue False,
+ SDValue CC,
+ SelectionDAG &DAG) const {
+ if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
+ return SDValue();
+
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ switch (CCOpcode) {
+ case ISD::SETULE:
+ case ISD::SETULT: {
+ unsigned Opc = (LHS == True) ? AMDGPUISD::UMIN : AMDGPUISD::UMAX;
+ return DAG.getNode(Opc, DL, VT, LHS, RHS);
+ }
+ case ISD::SETLE:
+ case ISD::SETLT: {
+ unsigned Opc = (LHS == True) ? AMDGPUISD::SMIN : AMDGPUISD::SMAX;
+ return DAG.getNode(Opc, DL, VT, LHS, RHS);
+ }
+ case ISD::SETGT:
+ case ISD::SETGE: {
+ unsigned Opc = (LHS == True) ? AMDGPUISD::SMAX : AMDGPUISD::SMIN;
+ return DAG.getNode(Opc, DL, VT, LHS, RHS);
+ }
+ case ISD::SETUGE:
+ case ISD::SETUGT: {
+ unsigned Opc = (LHS == True) ? AMDGPUISD::UMAX : AMDGPUISD::UMIN;
+ return DAG.getNode(Opc, DL, VT, LHS, RHS);
+ }
+ default:
+ return SDValue();
+ }
+}
+
+SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op,
+ SelectionDAG &DAG) const {
+ LoadSDNode *Load = cast<LoadSDNode>(Op);
+ EVT MemVT = Load->getMemoryVT();
+ EVT MemEltVT = MemVT.getVectorElementType();
+
EVT LoadVT = Op.getValueType();
- EVT EltVT = Op.getValueType().getVectorElementType();
+ EVT EltVT = LoadVT.getVectorElementType();
EVT PtrVT = Load->getBasePtr().getValueType();
unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
@@ -1024,17 +1199,19 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
SmallVector<SDValue, 8> Chains;
SDLoc SL(Op);
+ unsigned MemEltSize = MemEltVT.getStoreSize();
+ MachinePointerInfo SrcValue(Load->getMemOperand()->getValue());
- for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ for (unsigned i = 0; i < NumElts; ++i) {
SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
- DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT));
+ DAG.getConstant(i * MemEltSize, PtrVT));
SDValue NewLoad
= DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
Load->getChain(), Ptr,
- MachinePointerInfo(Load->getMemOperand()->getValue()),
+ SrcValue.getWithOffset(i * MemEltSize),
MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
- Load->getAlignment());
+ Load->isInvariant(), Load->getAlignment());
Loads.push_back(NewLoad.getValue(0));
Chains.push_back(NewLoad.getValue(1));
}
@@ -1047,6 +1224,55 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
return DAG.getMergeValues(Ops, SL);
}
+SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+
+ // If this is a 2 element vector, we really want to scalarize and not create
+ // weird 1 element vectors.
+ if (VT.getVectorNumElements() == 2)
+ return ScalarizeVectorLoad(Op, DAG);
+
+ LoadSDNode *Load = cast<LoadSDNode>(Op);
+ SDValue BasePtr = Load->getBasePtr();
+ EVT PtrVT = BasePtr.getValueType();
+ EVT MemVT = Load->getMemoryVT();
+ SDLoc SL(Op);
+ MachinePointerInfo SrcValue(Load->getMemOperand()->getValue());
+
+ EVT LoVT, HiVT;
+ EVT LoMemVT, HiMemVT;
+ SDValue Lo, Hi;
+
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
+ std::tie(Lo, Hi) = DAG.SplitVector(Op, SL, LoVT, HiVT);
+ SDValue LoLoad
+ = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
+ Load->getChain(), BasePtr,
+ SrcValue,
+ LoMemVT, Load->isVolatile(), Load->isNonTemporal(),
+ Load->isInvariant(), Load->getAlignment());
+
+ SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
+ DAG.getConstant(LoMemVT.getStoreSize(), PtrVT));
+
+ SDValue HiLoad
+ = DAG.getExtLoad(Load->getExtensionType(), SL, HiVT,
+ Load->getChain(), HiPtr,
+ SrcValue.getWithOffset(LoMemVT.getStoreSize()),
+ HiMemVT, Load->isVolatile(), Load->isNonTemporal(),
+ Load->isInvariant(), Load->getAlignment());
+
+ SDValue Ops[] = {
+ DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad),
+ DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
+ LoLoad.getValue(1), HiLoad.getValue(1))
+ };
+
+ return DAG.getMergeValues(Ops, SL);
+}
+
SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
SelectionDAG &DAG) const {
StoreSDNode *Store = cast<StoreSDNode>(Op);
@@ -1105,8 +1331,8 @@ SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
Store->getAlignment());
}
-SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue AMDGPUTargetLowering::ScalarizeVectorStore(SDValue Op,
+ SelectionDAG &DAG) const {
StoreSDNode *Store = cast<StoreSDNode>(Op);
EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
EVT EltVT = Store->getValue().getValueType().getVectorElementType();
@@ -1116,21 +1342,77 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
SmallVector<SDValue, 8> Chains;
+ unsigned EltSize = MemEltVT.getStoreSize();
+ MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
+
for (unsigned i = 0, e = NumElts; i != e; ++i) {
SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
- Store->getValue(), DAG.getConstant(i, MVT::i32));
- SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT,
- Store->getBasePtr(),
- DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8),
- PtrVT));
- Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
- MachinePointerInfo(Store->getMemOperand()->getValue()),
- MemEltVT, Store->isVolatile(), Store->isNonTemporal(),
- Store->getAlignment()));
+ Store->getValue(),
+ DAG.getConstant(i, MVT::i32));
+
+ SDValue Offset = DAG.getConstant(i * MemEltVT.getStoreSize(), PtrVT);
+ SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Store->getBasePtr(), Offset);
+ SDValue NewStore =
+ DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
+ SrcValue.getWithOffset(i * EltSize),
+ MemEltVT, Store->isNonTemporal(), Store->isVolatile(),
+ Store->getAlignment());
+ Chains.push_back(NewStore);
}
+
return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains);
}
+SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
+ SelectionDAG &DAG) const {
+ StoreSDNode *Store = cast<StoreSDNode>(Op);
+ SDValue Val = Store->getValue();
+ EVT VT = Val.getValueType();
+
+ // If this is a 2 element vector, we really want to scalarize and not create
+ // weird 1 element vectors.
+ if (VT.getVectorNumElements() == 2)
+ return ScalarizeVectorStore(Op, DAG);
+
+ EVT MemVT = Store->getMemoryVT();
+ SDValue Chain = Store->getChain();
+ SDValue BasePtr = Store->getBasePtr();
+ SDLoc SL(Op);
+
+ EVT LoVT, HiVT;
+ EVT LoMemVT, HiMemVT;
+ SDValue Lo, Hi;
+
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
+ std::tie(Lo, Hi) = DAG.SplitVector(Val, SL, LoVT, HiVT);
+
+ EVT PtrVT = BasePtr.getValueType();
+ SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
+ DAG.getConstant(LoMemVT.getStoreSize(), PtrVT));
+
+ MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
+ SDValue LoStore
+ = DAG.getTruncStore(Chain, SL, Lo,
+ BasePtr,
+ SrcValue,
+ LoMemVT,
+ Store->isNonTemporal(),
+ Store->isVolatile(),
+ Store->getAlignment());
+ SDValue HiStore
+ = DAG.getTruncStore(Chain, SL, Hi,
+ HiPtr,
+ SrcValue.getWithOffset(LoMemVT.getStoreSize()),
+ HiMemVT,
+ Store->isNonTemporal(),
+ Store->isVolatile(),
+ Store->getAlignment());
+
+ return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
+}
+
+
SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
LoadSDNode *Load = cast<LoadSDNode>(Op);
@@ -1138,24 +1420,6 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
EVT MemVT = Load->getMemoryVT();
- if (ExtType != ISD::NON_EXTLOAD && !VT.isVector() && VT.getSizeInBits() > 32) {
- // We can do the extload to 32-bits, and then need to separately extend to
- // 64-bits.
-
- SDValue ExtLoad32 = DAG.getExtLoad(ExtType, DL, MVT::i32,
- Load->getChain(),
- Load->getBasePtr(),
- MemVT,
- Load->getMemOperand());
-
- SDValue Ops[] = {
- DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32),
- ExtLoad32.getValue(1)
- };
-
- return DAG.getMergeValues(Ops, DL);
- }
-
if (ExtType == ISD::NON_EXTLOAD && VT.getSizeInBits() < 32) {
assert(VT == MVT::i1 && "Only i1 non-extloads expected");
// FIXME: Copied from PPC
@@ -1228,7 +1492,7 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
Store->getValue().getValueType().isVector()) {
- return SplitVectorStore(Op, DAG);
+ return ScalarizeVectorStore(Op, DAG);
}
EVT MemVT = Store->getMemoryVT();
@@ -1273,249 +1537,179 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-SDValue AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
+// This is a shortcut for integer division because we have fast i32<->f32
+// conversions, and fast f32 reciprocal instructions. The fractional part of a
+// float is enough to accurately represent up to a 24-bit integer.
+SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const {
SDLoc DL(Op);
- EVT OVT = Op.getValueType();
+ EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
- MVT INTTY;
- MVT FLTTY;
- if (!OVT.isVector()) {
- INTTY = MVT::i32;
- FLTTY = MVT::f32;
- } else if (OVT.getVectorNumElements() == 2) {
- INTTY = MVT::v2i32;
- FLTTY = MVT::v2f32;
- } else if (OVT.getVectorNumElements() == 4) {
- INTTY = MVT::v4i32;
- FLTTY = MVT::v4f32;
- }
- unsigned bitsize = OVT.getScalarType().getSizeInBits();
- // char|short jq = ia ^ ib;
- SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
-
- // jq = jq >> (bitsize - 2)
- jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
-
- // jq = jq | 0x1
- jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
-
- // jq = (int)jq
- jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
+ MVT IntVT = MVT::i32;
+ MVT FltVT = MVT::f32;
+
+ ISD::NodeType ToFp = sign ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
+ ISD::NodeType ToInt = sign ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
+
+ if (VT.isVector()) {
+ unsigned NElts = VT.getVectorNumElements();
+ IntVT = MVT::getVectorVT(MVT::i32, NElts);
+ FltVT = MVT::getVectorVT(MVT::f32, NElts);
+ }
+
+ unsigned BitSize = VT.getScalarType().getSizeInBits();
+
+ SDValue jq = DAG.getConstant(1, IntVT);
+
+ if (sign) {
+ // char|short jq = ia ^ ib;
+ jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);
+
+ // jq = jq >> (bitsize - 2)
+ jq = DAG.getNode(ISD::SRA, DL, VT, jq, DAG.getConstant(BitSize - 2, VT));
+
+ // jq = jq | 0x1
+ jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, VT));
+
+ // jq = (int)jq
+ jq = DAG.getSExtOrTrunc(jq, DL, IntVT);
+ }
// int ia = (int)LHS;
- SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+ SDValue ia = sign ?
+ DAG.getSExtOrTrunc(LHS, DL, IntVT) : DAG.getZExtOrTrunc(LHS, DL, IntVT);
// int ib, (int)RHS;
- SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+ SDValue ib = sign ?
+ DAG.getSExtOrTrunc(RHS, DL, IntVT) : DAG.getZExtOrTrunc(RHS, DL, IntVT);
// float fa = (float)ia;
- SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+ SDValue fa = DAG.getNode(ToFp, DL, FltVT, ia);
// float fb = (float)ib;
- SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+ SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);
// float fq = native_divide(fa, fb);
- SDValue fq = DAG.getNode(ISD::FMUL, DL, FLTTY,
- fa, DAG.getNode(AMDGPUISD::RCP, DL, FLTTY, fb));
+ SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
+ fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));
// fq = trunc(fq);
- fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+ fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq);
// float fqneg = -fq;
- SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
+ SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq);
// float fr = mad(fqneg, fb, fa);
- SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
- DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
+ SDValue fr = DAG.getNode(ISD::FADD, DL, FltVT,
+ DAG.getNode(ISD::FMUL, DL, FltVT, fqneg, fb), fa);
// int iq = (int)fq;
- SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+ SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq);
// fr = fabs(fr);
- fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
+ fr = DAG.getNode(ISD::FABS, DL, FltVT, fr);
// fb = fabs(fb);
- fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
-
- // int cv = fr >= fb;
- SDValue cv;
- if (INTTY == MVT::i32) {
- cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
- } else {
- cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
- }
- // jq = (cv ? jq : 0);
- jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
- DAG.getConstant(0, OVT));
- // dst = iq + jq;
- iq = DAG.getSExtOrTrunc(iq, DL, OVT);
- iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
- return iq;
-}
-
-SDValue AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- // The LowerSDIV32 function generates equivalent to the following IL.
- // mov r0, LHS
- // mov r1, RHS
- // ilt r10, r0, 0
- // ilt r11, r1, 0
- // iadd r0, r0, r10
- // iadd r1, r1, r11
- // ixor r0, r0, r10
- // ixor r1, r1, r11
- // udiv r0, r0, r1
- // ixor r10, r10, r11
- // iadd r0, r0, r10
- // ixor DST, r0, r10
-
- // mov r0, LHS
- SDValue r0 = LHS;
-
- // mov r1, RHS
- SDValue r1 = RHS;
-
- // ilt r10, r0, 0
- SDValue r10 = DAG.getSelectCC(DL,
- r0, DAG.getConstant(0, OVT),
- DAG.getConstant(-1, OVT),
- DAG.getConstant(0, OVT),
- ISD::SETLT);
-
- // ilt r11, r1, 0
- SDValue r11 = DAG.getSelectCC(DL,
- r1, DAG.getConstant(0, OVT),
- DAG.getConstant(-1, OVT),
- DAG.getConstant(0, OVT),
- ISD::SETLT);
+ fb = DAG.getNode(ISD::FABS, DL, FltVT, fb);
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+ EVT SetCCVT = getSetCCResultType(*DAG.getContext(), VT);
- // iadd r1, r1, r11
- r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
-
- // ixor r0, r0, r10
- r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
-
- // ixor r1, r1, r11
- r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+ // int cv = fr >= fb;
+ SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE);
- // udiv r0, r0, r1
- r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
+ // jq = (cv ? jq : 0);
+ jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, VT));
- // ixor r10, r10, r11
- r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
+ // dst = trunc/extend to legal type
+ iq = sign ? DAG.getSExtOrTrunc(iq, DL, VT) : DAG.getZExtOrTrunc(iq, DL, VT);
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+ // dst = iq + jq;
+ SDValue Div = DAG.getNode(ISD::ADD, DL, VT, iq, jq);
- // ixor DST, r0, r10
- SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
- return DST;
-}
+ // Rem needs compensation, it's easier to recompute it
+ SDValue Rem = DAG.getNode(ISD::MUL, DL, VT, Div, RHS);
+ Rem = DAG.getNode(ISD::SUB, DL, VT, LHS, Rem);
-SDValue AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
- return SDValue(Op.getNode(), 0);
+ SDValue Res[2] = {
+ Div,
+ Rem
+ };
+ return DAG.getMergeValues(Res, DL);
}
-SDValue AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
- EVT OVT = Op.getValueType().getScalarType();
-
- if (OVT == MVT::i64)
- return LowerSDIV64(Op, DAG);
-
- if (OVT.getScalarType() == MVT::i32)
- return LowerSDIV32(Op, DAG);
+void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &Results) const {
+ assert(Op.getValueType() == MVT::i64);
- if (OVT == MVT::i16 || OVT == MVT::i8) {
- // FIXME: We should be checking for the masked bits. This isn't reached
- // because i8 and i16 are not legal types.
- return LowerSDIV24(Op, DAG);
- }
-
- return SDValue(Op.getNode(), 0);
-}
-
-SDValue AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- // The LowerSREM32 function generates equivalent to the following IL.
- // mov r0, LHS
- // mov r1, RHS
- // ilt r10, r0, 0
- // ilt r11, r1, 0
- // iadd r0, r0, r10
- // iadd r1, r1, r11
- // ixor r0, r0, r10
- // ixor r1, r1, r11
- // udiv r20, r0, r1
- // umul r20, r20, r1
- // sub r0, r0, r20
- // iadd r0, r0, r10
- // ixor DST, r0, r10
+ EVT VT = Op.getValueType();
+ EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
- // mov r0, LHS
- SDValue r0 = LHS;
+ SDValue one = DAG.getConstant(1, HalfVT);
+ SDValue zero = DAG.getConstant(0, HalfVT);
- // mov r1, RHS
- SDValue r1 = RHS;
+ //HiLo split
+ SDValue LHS = Op.getOperand(0);
+ SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
+ SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
- // ilt r10, r0, 0
- SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
+ SDValue RHS = Op.getOperand(1);
+ SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
+ SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
- // ilt r11, r1, 0
- SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
+ // Get Speculative values
+ SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
+ SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+ SDValue REM_Hi = zero;
+ SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
- // iadd r1, r1, r11
- r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+ SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
+ SDValue DIV_Lo = zero;
- // ixor r0, r0, r10
- r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ const unsigned halfBitWidth = HalfVT.getSizeInBits();
- // ixor r1, r1, r11
- r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+ for (unsigned i = 0; i < halfBitWidth; ++i) {
+ SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
+ // Get Value of high bit
+ SDValue HBit;
+ if (halfBitWidth == 32 && Subtarget->hasBFE()) {
+ HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
+ } else {
+ HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
+ HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
+ }
- // udiv r20, r0, r1
- SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
+ SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
+ DAG.getConstant(halfBitWidth - 1, HalfVT));
+ REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
+ REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
- // umul r20, r20, r1
- r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
+ REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
+ REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
- // sub r0, r0, r20
- r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+ SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
- // ixor DST, r0, r10
- SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
- return DST;
-}
+ SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
+ SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETUGE);
-SDValue AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
- return SDValue(Op.getNode(), 0);
-}
+ DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
-SDValue AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
- EVT OVT = Op.getValueType();
+ // Update REM
- if (OVT.getScalarType() == MVT::i64)
- return LowerSREM64(Op, DAG);
+ SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
- if (OVT.getScalarType() == MVT::i32)
- return LowerSREM32(Op, DAG);
+ REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE);
+ REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
+ REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
+ }
- return SDValue(Op.getNode(), 0);
+ SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
+ SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
+ Results.push_back(DIV);
+ Results.push_back(REM);
}
SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
@@ -1523,15 +1717,31 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SDLoc DL(Op);
EVT VT = Op.getValueType();
+ if (VT == MVT::i64) {
+ SmallVector<SDValue, 2> Results;
+ LowerUDIVREM64(Op, DAG, Results);
+ return DAG.getMergeValues(Results, DL);
+ }
+
SDValue Num = Op.getOperand(0);
SDValue Den = Op.getOperand(1);
+ if (VT == MVT::i32) {
+ if (DAG.MaskedValueIsZero(Op.getOperand(0), APInt(32, 0xff << 24)) &&
+ DAG.MaskedValueIsZero(Op.getOperand(1), APInt(32, 0xff << 24))) {
+ // TODO: We technically could do this for i64, but shouldn't that just be
+ // handled by something generally reducing 64-bit division on 32-bit
+ // values to 32-bit?
+ return LowerDIVREM24(Op, DAG, false);
+ }
+ }
+
// RCP = URECIP(Den) = 2^32 / Den + e
// e is rounding error.
SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
- // RCP_LO = umulo(RCP, Den) */
- SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
+ // RCP_LO = mul(RCP, Den) */
+ SDValue RCP_LO = DAG.getNode(ISD::MUL, DL, VT, RCP, Den);
// RCP_HI = mulhu (RCP, Den) */
SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
@@ -1562,7 +1772,7 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
// Num_S_Remainder = Quotient * Den
- SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
+ SDValue Num_S_Remainder = DAG.getNode(ISD::MUL, DL, VT, Quotient, Den);
// Remainder = Num - Num_S_Remainder
SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
@@ -1627,12 +1837,22 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
SDLoc DL(Op);
EVT VT = Op.getValueType();
- SDValue Zero = DAG.getConstant(0, VT);
- SDValue NegOne = DAG.getConstant(-1, VT);
-
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
+ if (VT == MVT::i32) {
+ if (DAG.ComputeNumSignBits(Op.getOperand(0)) > 8 &&
+ DAG.ComputeNumSignBits(Op.getOperand(1)) > 8) {
+ // TODO: We technically could do this for i64, but shouldn't that just be
+ // handled by something generally reducing 64-bit division on 32-bit
+ // values to 32-bit?
+ return LowerDIVREM24(Op, DAG, true);
+ }
+ }
+
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDValue NegOne = DAG.getConstant(-1, VT);
+
SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
@@ -1660,6 +1880,20 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
return DAG.getMergeValues(Res, DL);
}
+// (frem x, y) -> (fsub x, (fmul (ftrunc (fdiv x, y)), y))
+SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ EVT VT = Op.getValueType();
+ SDValue X = Op.getOperand(0);
+ SDValue Y = Op.getOperand(1);
+
+ SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y);
+ SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div);
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y);
+
+ return DAG.getNode(ISD::FSUB, SL, VT, X, Mul);
+}
+
SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Src = Op.getOperand(0);
@@ -1702,7 +1936,7 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
const unsigned ExpBits = 11;
// Extract the exponent.
- SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_I32, SL, MVT::i32,
+ SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
Hi,
DAG.getConstant(FractBits - 32, MVT::i32),
DAG.getConstant(ExpBits, MVT::i32));
@@ -1793,13 +2027,43 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
}
+SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
+ bool Signed) const {
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+
+ SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
+
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
+ DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
+ DAG.getConstant(1, MVT::i32));
+
+ SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP,
+ SL, MVT::f64, Hi);
+
+ SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo);
+
+ SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
+ DAG.getConstant(32, MVT::i32));
+
+ return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
+}
+
SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDValue S0 = Op.getOperand(0);
- SDLoc DL(Op);
- if (Op.getValueType() != MVT::f32 || S0.getValueType() != MVT::i64)
+ if (S0.getValueType() != MVT::i64)
return SDValue();
+ EVT DestVT = Op.getValueType();
+ if (DestVT == MVT::f64)
+ return LowerINT_TO_FP64(Op, DAG, false);
+
+ assert(DestVT == MVT::f32);
+
+ SDLoc DL(Op);
+
// f32 uint_to_fp i64
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
DAG.getConstant(0, MVT::i32));
@@ -1812,16 +2076,62 @@ SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
}
-SDValue AMDGPUTargetLowering::ExpandSIGN_EXTEND_INREG(SDValue Op,
- unsigned BitsDiff,
- SelectionDAG &DAG) const {
- MVT VT = Op.getSimpleValueType();
- SDLoc DL(Op);
- SDValue Shift = DAG.getConstant(BitsDiff, VT);
- // Shift left by 'Shift' bits.
- SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Op.getOperand(0), Shift);
- // Signed shift Right by 'Shift' bits.
- return DAG.getNode(ISD::SRA, DL, VT, Shl, Shift);
+SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType() == MVT::i64 && Op.getValueType() == MVT::f64)
+ return LowerINT_TO_FP64(Op, DAG, true);
+
+ return SDValue();
+}
+
+SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG,
+ bool Signed) const {
+ SDLoc SL(Op);
+
+ SDValue Src = Op.getOperand(0);
+
+ SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
+
+ SDValue K0
+ = DAG.getConstantFP(BitsToDouble(UINT64_C(0x3df0000000000000)), MVT::f64);
+ SDValue K1
+ = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), MVT::f64);
+
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0);
+
+ SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul);
+
+
+ SDValue Fma = DAG.getNode(ISD::FMA, SL, MVT::f64, FloorMul, K1, Trunc);
+
+ SDValue Hi = DAG.getNode(Signed ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, SL,
+ MVT::i32, FloorMul);
+ SDValue Lo = DAG.getNode(ISD::FP_TO_UINT, SL, MVT::i32, Fma);
+
+ SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Lo, Hi);
+
+ return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Result);
+}
+
+SDValue AMDGPUTargetLowering::LowerFP_TO_SINT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Src = Op.getOperand(0);
+
+ if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
+ return LowerFP64_TO_INT(Op, DAG, true);
+
+ return SDValue();
+}
+
+SDValue AMDGPUTargetLowering::LowerFP_TO_UINT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Src = Op.getOperand(0);
+
+ if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
+ return LowerFP64_TO_INT(Op, DAG, false);
+
+ return SDValue();
}
SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
@@ -1887,7 +2197,8 @@ template <typename IntTy>
static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0,
uint32_t Offset, uint32_t Width) {
if (Width + Offset < 32) {
- IntTy Result = (Src0 << (32 - Offset - Width)) >> (32 - Width);
+ uint32_t Shl = static_cast<uint32_t>(Src0) << (32 - Offset - Width);
+ IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width);
return DAG.getConstant(Result, MVT::i32);
}
@@ -1916,7 +2227,8 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
SDValue Value = SN->getValue();
EVT VT = Value.getValueType();
- if (isTypeLegal(VT) || SN->isVolatile() || !ISD::isNormalLoad(Value.getNode()))
+ if (isTypeLegal(VT) || SN->isVolatile() ||
+ !ISD::isNormalLoad(Value.getNode()) || VT.getSizeInBits() < 8)
return SDValue();
LoadSDNode *LoadVal = cast<LoadSDNode>(Value);
@@ -1992,9 +2304,30 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
simplifyI24(N1, DCI);
return SDValue();
}
- case ISD::SELECT_CC: {
- return CombineMinMax(N, DAG);
+ case ISD::SELECT: {
+ SDValue Cond = N->getOperand(0);
+ if (Cond.getOpcode() == ISD::SETCC && Cond.hasOneUse()) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue LHS = Cond.getOperand(0);
+ SDValue RHS = Cond.getOperand(1);
+ SDValue CC = Cond.getOperand(2);
+
+ SDValue True = N->getOperand(1);
+ SDValue False = N->getOperand(2);
+
+ if (VT == MVT::f32)
+ return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
+
+ // TODO: Implement min / max Evergreen instructions.
+ if (VT == MVT::i32 &&
+ Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ return CombineIMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
+ }
}
+
+ break;
+ }
case AMDGPUISD::BFE_I32:
case AMDGPUISD::BFE_U32: {
assert(!N->getValueType(0).isVector() &&
@@ -2039,37 +2372,40 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT);
}
- if (ConstantSDNode *Val = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(BitsFrom)) {
if (Signed) {
return constantFoldBFE<int32_t>(DAG,
- Val->getSExtValue(),
+ CVal->getSExtValue(),
OffsetVal,
WidthVal);
}
return constantFoldBFE<uint32_t>(DAG,
- Val->getZExtValue(),
+ CVal->getZExtValue(),
OffsetVal,
WidthVal);
}
- APInt Demanded = APInt::getBitsSet(32,
- OffsetVal,
- OffsetVal + WidthVal);
-
if ((OffsetVal + WidthVal) >= 32) {
SDValue ShiftVal = DAG.getConstant(OffsetVal, MVT::i32);
return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32,
BitsFrom, ShiftVal);
}
- APInt KnownZero, KnownOne;
- TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
- !DCI.isBeforeLegalizeOps());
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
- TLI.SimplifyDemandedBits(BitsFrom, Demanded, KnownZero, KnownOne, TLO)) {
- DCI.CommitTargetLoweringOpt(TLO);
+ if (BitsFrom.hasOneUse()) {
+ APInt Demanded = APInt::getBitsSet(32,
+ OffsetVal,
+ OffsetVal + WidthVal);
+
+ APInt KnownZero, KnownOne;
+ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
+ TLI.SimplifyDemandedBits(BitsFrom, Demanded,
+ KnownZero, KnownOne, TLO)) {
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
}
break;
@@ -2167,12 +2503,19 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(DWORDADDR)
NODE_NAME_CASE(FRACT)
NODE_NAME_CASE(CLAMP)
- NODE_NAME_CASE(FMAX)
+ NODE_NAME_CASE(MAD)
+ NODE_NAME_CASE(FMAX_LEGACY)
NODE_NAME_CASE(SMAX)
NODE_NAME_CASE(UMAX)
- NODE_NAME_CASE(FMIN)
+ NODE_NAME_CASE(FMIN_LEGACY)
NODE_NAME_CASE(SMIN)
NODE_NAME_CASE(UMIN)
+ NODE_NAME_CASE(FMAX3)
+ NODE_NAME_CASE(SMAX3)
+ NODE_NAME_CASE(UMAX3)
+ NODE_NAME_CASE(FMIN3)
+ NODE_NAME_CASE(SMIN3)
+ NODE_NAME_CASE(UMIN3)
NODE_NAME_CASE(URECIP)
NODE_NAME_CASE(DIV_SCALE)
NODE_NAME_CASE(DIV_FMAS)
@@ -2182,6 +2525,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(RSQ)
NODE_NAME_CASE(RSQ_LEGACY)
NODE_NAME_CASE(RSQ_CLAMPED)
+ NODE_NAME_CASE(LDEXP)
+ NODE_NAME_CASE(FP_CLASS)
NODE_NAME_CASE(DOT4)
NODE_NAME_CASE(BFE_U32)
NODE_NAME_CASE(BFE_I32)
@@ -2213,6 +2558,46 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
+SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
+ bool &UseOneConstNR) const {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = Operand.getValueType();
+
+ if (VT == MVT::f32) {
+ RefinementSteps = 0;
+ return DAG.getNode(AMDGPUISD::RSQ, SDLoc(Operand), VT, Operand);
+ }
+
+ // TODO: There is also f64 rsq instruction, but the documentation is less
+ // clear on its precision.
+
+ return SDValue();
+}
+
+SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = Operand.getValueType();
+
+ if (VT == MVT::f32) {
+ // Reciprocal, < 1 ulp error.
+ //
+ // This reciprocal approximation converges to < 0.5 ulp error with one
+ // newton rhapson performed with two fused multiple adds (FMAs).
+
+ RefinementSteps = 0;
+ return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand);
+ }
+
+ // TODO: There is also f64 rcp instruction, but the documentation is less
+ // clear on its precision.
+
+ return SDValue();
+}
+
static void computeKnownBitsForMinMax(const SDValue Op0,
const SDValue Op1,
APInt &KnownZero,
@@ -2276,17 +2661,8 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
unsigned BitWidth = 32;
uint32_t Width = CWidth->getZExtValue() & 0x1f;
- if (Width == 0) {
- KnownZero = APInt::getAllOnesValue(BitWidth);
- KnownOne = APInt::getNullValue(BitWidth);
- return;
- }
- // FIXME: This could do a lot more. If offset is 0, should be the same as
- // sign_extend_inreg implementation, but that involves duplicating it.
- if (Opc == AMDGPUISD::BFE_I32)
- KnownOne = APInt::getHighBitsSet(BitWidth, BitWidth - Width);
- else
+ if (Opc == AMDGPUISD::BFE_U32)
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - Width);
break;
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index 624d4e0c1967..15f529c40a31 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AMDGPUISELLOWERING_H
-#define AMDGPUISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUISELLOWERING_H
+#define LLVM_LIB_TARGET_R600_AMDGPUISELLOWERING_H
#include "llvm/Target/TargetLowering.h"
@@ -43,25 +43,22 @@ private:
/// \brief Split a vector store into multiple scalar stores.
/// \returns The resulting chain.
- SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const;
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG, bool Signed) const;
+ SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
- SDValue ExpandSIGN_EXTEND_INREG(SDValue Op,
- unsigned BitsDiff,
- SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -73,12 +70,25 @@ protected:
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const;
- /// \brief Split a vector load into multiple scalar loads.
- SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const;
+
+ /// \brief Split a vector load into a scalar load of each component.
+ SDValue ScalarizeVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+
+ /// \brief Split a vector load into 2 loads of half the vector.
+ SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+
+ /// \brief Split a vector store into a scalar store of each component.
+ SDValue ScalarizeVectorStore(SDValue Op, SelectionDAG &DAG) const;
+
+ /// \brief Split a vector store into 2 stores of half the vector.
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
+
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
+ void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &Results) const;
bool isHWTrueValue(SDValue Op) const;
bool isHWFalseValue(SDValue Op) const;
@@ -114,8 +124,14 @@ public:
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
bool ShouldShrinkFPConstant(EVT VT) const override;
+ bool shouldReduceLoadWidth(SDNode *Load,
+ ISD::LoadExtType ExtType,
+ EVT ExtVT) const override;
bool isLoadBitCastBeneficial(EVT, EVT) const override;
+ bool isCheapToSpeculateCttz() const override;
+ bool isCheapToSpeculateCtlz() const override;
+
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -132,9 +148,33 @@ public:
SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
- SDValue CombineMinMax(SDNode *N, SelectionDAG &DAG) const;
+ SDValue CombineFMinMaxLegacy(SDLoc DL,
+ EVT VT,
+ SDValue LHS,
+ SDValue RHS,
+ SDValue True,
+ SDValue False,
+ SDValue CC,
+ DAGCombinerInfo &DCI) const;
+ SDValue CombineIMinMax(SDLoc DL,
+ EVT VT,
+ SDValue LHS,
+ SDValue RHS,
+ SDValue True,
+ SDValue False,
+ SDValue CC,
+ SelectionDAG &DAG) const;
+
const char* getTargetNodeName(unsigned Opcode) const override;
+ SDValue getRsqrtEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
+ bool &UseOneConstNR) const override;
+ SDValue getRecipEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const override;
+
virtual SDNode *PostISelFolding(MachineSDNode *N,
SelectionDAG &DAG) const {
return N;
@@ -149,10 +189,8 @@ public:
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
- virtual unsigned ComputeNumSignBitsForTargetNode(
- SDValue Op,
- const SelectionDAG &DAG,
- unsigned Depth = 0) const override;
+ unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG,
+ unsigned Depth = 0) const override;
/// \brief Helper function that adds Reg to the LiveIn list of the DAG's
/// MachineFunction.
@@ -176,17 +214,24 @@ enum {
DWORDADDR,
FRACT,
CLAMP,
+ MAD, // Multiply + add with same result as the separate operations.
// SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
// Denormals handled on some parts.
COS_HW,
SIN_HW,
- FMAX,
+ FMAX_LEGACY,
SMAX,
UMAX,
- FMIN,
+ FMIN_LEGACY,
SMIN,
UMIN,
+ FMAX3,
+ SMAX3,
+ UMAX3,
+ FMIN3,
+ SMIN3,
+ UMIN3,
URECIP,
DIV_SCALE,
DIV_FMAS,
@@ -199,6 +244,8 @@ enum {
RSQ,
RSQ_LEGACY,
RSQ_CLAMPED,
+ LDEXP,
+ FP_CLASS,
DOT4,
BFE_U32, // Extract range of bits with zero extension to 32-bits.
BFE_I32, // Extract range of bits with sign extension to 32-bits.
@@ -248,4 +295,4 @@ enum {
} // End namespace llvm
-#endif // AMDGPUISELLOWERING_H
+#endif
diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp
index fef5b8cac5ba..5beaa6841c94 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -86,21 +86,6 @@ AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// TODO: Implement this function
return nullptr;
}
-bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
- MachineBasicBlock &MBB) const {
- while (iter != MBB.end()) {
- switch (iter->getOpcode()) {
- default:
- break;
- case AMDGPU::BRANCH_COND_i32:
- case AMDGPU::BRANCH_COND_f32:
- case AMDGPU::BRANCH:
- return true;
- };
- ++iter;
- }
- return false;
-}
void
AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -147,7 +132,6 @@ bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const
} else if (isRegisterStore(*MI)) {
int ValOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::val);
- AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
unsigned Address = calculateIndirectAddress(RegIndex, Channel);
@@ -215,15 +199,30 @@ AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
return 0;
}
-bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
- int64_t Offset1, int64_t Offset2,
- unsigned NumLoads) const {
- assert(Offset2 > Offset1
- && "Second offset should be larger than first offset!");
- // If we have less than 16 loads in a row, and the offsets are within 16,
- // then schedule together.
- // TODO: Make the loads schedule near if it fits in a cacheline
- return (NumLoads < 16 && (Offset2 - Offset1) < 16);
+bool AMDGPUInstrInfo::enableClusterLoads() const {
+ return true;
+}
+
+// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
+// the first 16 loads will be interleaved with the stores, and the next 16 will
+// be clustered as expected. It should really split into 2 16 store batches.
+//
+// Loads are clustered until this returns false, rather than trying to schedule
+// groups of stores. This also means we have to deal with saying different
+// address space loads should be clustered, and ones which might cause bank
+// conflicts.
+//
+// This might be deprecated so it might not be worth that much effort to fix.
+bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
+ int64_t Offset0, int64_t Offset1,
+ unsigned NumLoads) const {
+ assert(Offset1 > Offset0 &&
+ "Second offset should be larger than first offset!");
+ // If we have less than 16 loads in a row, and the offsets are within 64
+ // bytes, then schedule together.
+
+ // A cacheline is 64 bytes (for global memory).
+ return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
}
bool
@@ -320,7 +319,10 @@ int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
return -1;
}
- Offset = MF.getTarget().getFrameLowering()->getFrameIndexOffset(MF, -1);
+ Offset = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getFrameIndexOffset(MF, -1);
return getIndirectIndexBegin(MF) + Offset;
}
@@ -335,12 +337,12 @@ int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
}
// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
-// header files, so we need to wrap it in a function that takes unsigned
+// header files, so we need to wrap it in a function that takes unsigned
// instead.
namespace llvm {
namespace AMDGPU {
int getMCOpcode(uint16_t Opcode, unsigned Gen) {
- return getMCOpcode(Opcode);
+ return getMCOpcodeGen(Opcode, (enum Subtarget)Gen);
}
}
}
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
index d5041f558163..da9833d25a52 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -13,10 +13,9 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AMDGPUINSTRUCTIONINFO_H
-#define AMDGPUINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUINSTRINFO_H
+#define LLVM_LIB_TARGET_R600_AMDGPUINSTRINFO_H
-#include "AMDGPUInstrInfo.h"
#include "AMDGPURegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include <map>
@@ -41,8 +40,6 @@ class MachineInstrBuilder;
class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
private:
const AMDGPURegisterInfo RI;
- bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
- MachineBasicBlock &MBB) const;
virtual void anchor();
protected:
const AMDGPUSubtarget &ST;
@@ -74,11 +71,6 @@ public:
LiveVariables *LV) const override;
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const = 0;
-
bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -101,6 +93,7 @@ protected:
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr *LoadMI) const override;
+public:
/// \returns the smallest register index that will be accessed by an indirect
/// read or write or -1 if indirect addressing is not used by this program.
int getIndirectIndexBegin(const MachineFunction &MF) const;
@@ -109,7 +102,6 @@ protected:
/// read or write or -1 if indirect addressing is not used by this program.
int getIndirectIndexEnd(const MachineFunction &MF) const;
-public:
bool canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const override;
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
@@ -120,6 +112,9 @@ public:
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore,
unsigned *LoadRegIndex = nullptr) const override;
+
+ bool enableClusterLoads() const override;
+
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const override;
@@ -196,4 +191,4 @@ namespace AMDGPU {
#define AMDGPU_FLAG_REGISTER_LOAD (UINT64_C(1) << 63)
#define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62)
-#endif // AMDGPUINSTRINFO_H
+#endif
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
index 820f1a80d75e..0e34392bd50d 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -23,6 +23,14 @@ def AMDGPUTrigPreOp : SDTypeProfile<1, 2,
[SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
>;
+def AMDGPULdExpOp : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
+>;
+
+def AMDGPUFPClassOp : SDTypeProfile<1, 2,
+ [SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>]
+>;
+
def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
[SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
>;
@@ -52,12 +60,20 @@ def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>;
// out = 1.0 / sqrt(a) result clamped to +/- max_float.
def AMDGPUrsq_clamped : SDNode<"AMDGPUISD::RSQ_CLAMPED", SDTFPUnaryOp>;
-// out = max(a, b) a and b are floats
-def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
- [SDNPCommutative, SDNPAssociative]
+def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>;
+
+def AMDGPUfp_class : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>;
+
+// out = max(a, b) a and b are floats, where a nan comparison fails.
+// This is not commutative because this gives the second operand:
+// x < nan ? x : nan -> nan
+// nan < x ? nan : x -> x
+def AMDGPUfmax_legacy : SDNode<"AMDGPUISD::FMAX_LEGACY", SDTFPBinOp,
+ []
>;
def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>;
+def AMDGPUmad : SDNode<"AMDGPUISD::MAD", SDTFPTernaryOp, []>;
// out = max(a, b) a and b are signed ints
def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
@@ -69,12 +85,12 @@ def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
-// out = min(a, b) a and b are floats
-def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
- [SDNPCommutative, SDNPAssociative]
+// out = min(a, b) a and b are floats, where a nan comparison fails.
+def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp,
+ []
>;
-// out = min(a, b) a snd b are signed ints
+// out = min(a, b) a and b are signed ints
def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
@@ -84,6 +100,37 @@ def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
+// FIXME: TableGen doesn't like commutative instructions with more
+// than 2 operands.
+// out = max(a, b, c) a, b and c are floats
+def AMDGPUfmax3 : SDNode<"AMDGPUISD::FMAX3", SDTFPTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = max(a, b, c) a, b, and c are signed ints
+def AMDGPUsmax3 : SDNode<"AMDGPUISD::SMAX3", AMDGPUDTIntTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = max(a, b, c) a, b and c are unsigned ints
+def AMDGPUumax3 : SDNode<"AMDGPUISD::UMAX3", AMDGPUDTIntTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = min(a, b, c) a, b and c are floats
+def AMDGPUfmin3 : SDNode<"AMDGPUISD::FMIN3", SDTFPTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = min(a, b, c) a, b and c are signed ints
+def AMDGPUsmin3 : SDNode<"AMDGPUISD::SMIN3", AMDGPUDTIntTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = min(a, b) a and b are unsigned ints
+def AMDGPUumin3 : SDNode<"AMDGPUISD::UMIN3", AMDGPUDTIntTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
SDTIntToFPOp, []>;
@@ -130,7 +177,7 @@ def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) | src)
//
// src0: vec4(src, 0, 0, mask)
-// src1: dst - rat offset (aka pointer) in dwords
+// src1: dst - rat offset (aka pointer) in dwords
def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR",
SDTypeProfile<0, 2, []>,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index cd3560378e57..4e536c37b0bd 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -23,6 +23,8 @@ class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instructio
let Pattern = pattern;
let Itinerary = NullALU;
+ let isCodeGenOnly = 1;
+
let TSFlags{63} = isRegisterLoad;
let TSFlags{62} = isRegisterStore;
}
@@ -71,6 +73,11 @@ def COND_OEQ : PatLeaf <
[{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}]
>;
+def COND_ONE : PatLeaf <
+ (cond),
+ [{return N->get() == ISD::SETONE || N->get() == ISD::SETNE;}]
+>;
+
def COND_OGT : PatLeaf <
(cond),
[{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}]
@@ -91,23 +98,28 @@ def COND_OLE : PatLeaf <
[{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}]
>;
-def COND_UNE : PatLeaf <
- (cond),
- [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}]
->;
def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>;
def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>;
//===----------------------------------------------------------------------===//
-// PatLeafs for unsigned comparisons
+// PatLeafs for unsigned / unordered comparisons
//===----------------------------------------------------------------------===//
+def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>;
+def COND_UNE : PatLeaf <(cond), [{return N->get() == ISD::SETUNE;}]>;
def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>;
def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>;
def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>;
def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>;
+// XXX - For some reason R600 version is preferring to use unordered
+// for setne?
+def COND_UNE_NE : PatLeaf <
+ (cond),
+ [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}]
+>;
+
//===----------------------------------------------------------------------===//
// PatLeafs for signed comparisons
//===----------------------------------------------------------------------===//
@@ -133,7 +145,7 @@ def COND_NE : PatLeaf <
def COND_NULL : PatLeaf <
(cond),
- [{return false;}]
+ [{(void)N; return false;}]
>;
//===----------------------------------------------------------------------===//
@@ -195,6 +207,14 @@ def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
+def az_extloadi8_flat : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
+ return isFlatLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def sextloadi8_flat : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
+ return isFlatLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
def az_extloadi8_constant : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
@@ -223,6 +243,14 @@ def sextloadi16_global : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
+def az_extloadi16_flat : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
+ return isFlatLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def sextloadi16_flat : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
+ return isFlatLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
def az_extloadi16_constant : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
@@ -248,6 +276,11 @@ def az_extloadi32_global : PatFrag<(ops node:$ptr),
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
+def az_extloadi32_flat : PatFrag<(ops node:$ptr),
+ (az_extloadi32 node:$ptr), [{
+ return isFlatLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
def az_extloadi32_constant : PatFrag<(ops node:$ptr),
(az_extloadi32 node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
@@ -263,6 +296,16 @@ def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr),
return isGlobalStore(dyn_cast<StoreSDNode>(N));
}]>;
+def truncstorei8_flat : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei8 node:$val, node:$ptr), [{
+ return isFlatStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def truncstorei16_flat : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei16 node:$val, node:$ptr), [{
+ return isFlatStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
def local_store : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return isLocalStore(dyn_cast<StoreSDNode>(N));
@@ -282,6 +325,17 @@ def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return isLocalLoad(dyn_cast<LoadSDNode>(N));
}]>;
+class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
+ return cast<MemSDNode>(N)->getAlignment() % 8 == 0;
+}]>;
+
+def local_load_aligned8bytes : Aligned8Bytes <
+ (ops node:$ptr), (local_load node:$ptr)
+>;
+
+def local_store_aligned8bytes : Aligned8Bytes <
+ (ops node:$val, node:$ptr), (local_store node:$val, node:$ptr)
+>;
class local_binary_atomic_op<SDNode atomic_op> :
PatFrag<(ops node:$ptr, node:$value),
@@ -307,6 +361,7 @@ def mskor_global : PatFrag<(ops node:$val, node:$ptr),
return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
}]>;
+
def atomic_cmp_swap_32_local :
PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
(atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
@@ -323,6 +378,45 @@ def atomic_cmp_swap_64_local :
AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}]>;
+def flat_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isFlatLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def flat_store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return isFlatStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
+ (AMDGPUstore_mskor node:$val, node:$ptr), [{
+ return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
+}]>;
+
+class global_binary_atomic_op<SDNode atomic_op> : PatFrag<
+ (ops node:$ptr, node:$value),
+ (atomic_op node:$ptr, node:$value),
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]
+>;
+
+def atomic_swap_global : global_binary_atomic_op<atomic_swap>;
+def atomic_add_global : global_binary_atomic_op<atomic_load_add>;
+def atomic_and_global : global_binary_atomic_op<atomic_load_and>;
+def atomic_max_global : global_binary_atomic_op<atomic_load_max>;
+def atomic_min_global : global_binary_atomic_op<atomic_load_min>;
+def atomic_or_global : global_binary_atomic_op<atomic_load_or>;
+def atomic_sub_global : global_binary_atomic_op<atomic_load_sub>;
+def atomic_umax_global : global_binary_atomic_op<atomic_load_umax>;
+def atomic_umin_global : global_binary_atomic_op<atomic_load_umin>;
+def atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
+
+//===----------------------------------------------------------------------===//
+// Misc Pattern Fragments
+//===----------------------------------------------------------------------===//
+
+def fmad : PatFrag <
+ (ops node:$src0, node:$src1, node:$src2),
+ (fadd (fmul node:$src0, node:$src1), node:$src2)
+>;
class Constants {
int TWO_PI = 0x40c90fdb;
@@ -442,8 +536,9 @@ class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
// BFI_INT patterns
-multiclass BFIPatterns <Instruction BFI_INT, Instruction LoadImm32> {
-
+multiclass BFIPatterns <Instruction BFI_INT,
+ Instruction LoadImm32,
+ RegisterClass RC64> {
// Definition from ISA doc:
// (y & x) | (z & ~x)
def : Pat <
@@ -465,8 +560,8 @@ multiclass BFIPatterns <Instruction BFI_INT, Instruction LoadImm32> {
def : Pat <
(f64 (fcopysign f64:$src0, f64:$src1)),
- (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (i32 (EXTRACT_SUBREG $src0, sub0)), sub0),
+ (REG_SEQUENCE RC64,
+ (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
(BFI_INT (LoadImm32 0x7fffffff),
(i32 (EXTRACT_SUBREG $src0, sub1)),
(i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
diff --git a/lib/Target/R600/AMDGPUIntrinsicInfo.cpp b/lib/Target/R600/AMDGPUIntrinsicInfo.cpp
index 58916a995496..e94bb6013d83 100644
--- a/lib/Target/R600/AMDGPUIntrinsicInfo.cpp
+++ b/lib/Target/R600/AMDGPUIntrinsicInfo.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
#include "AMDGPUGenIntrinsics.inc"
#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
-AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm)
+AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo()
: TargetIntrinsicInfo() {}
std::string AMDGPUIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
diff --git a/lib/Target/R600/AMDGPUIntrinsicInfo.h b/lib/Target/R600/AMDGPUIntrinsicInfo.h
index 5be68a217da5..4c95b5ec0974 100644
--- a/lib/Target/R600/AMDGPUIntrinsicInfo.h
+++ b/lib/Target/R600/AMDGPUIntrinsicInfo.h
@@ -11,8 +11,8 @@
/// \brief Interface for the AMDGPU Implementation of the Intrinsic Info class.
//
//===-----------------------------------------------------------------------===//
-#ifndef AMDGPU_INTRINSICINFO_H
-#define AMDGPU_INTRINSICINFO_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUINTRINSICINFO_H
+#define LLVM_LIB_TARGET_R600_AMDGPUINTRINSICINFO_H
#include "llvm/IR/Intrinsics.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -33,7 +33,7 @@ enum ID {
class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo {
public:
- AMDGPUIntrinsicInfo(TargetMachine *tm);
+ AMDGPUIntrinsicInfo();
std::string getName(unsigned IntrId, Type **Tys = nullptr,
unsigned numTys = 0) const override;
unsigned lookupName(const char *Name, unsigned Len) const override;
@@ -45,4 +45,4 @@ public:
} // end namespace llvm
-#endif // AMDGPU_INTRINSICINFO_H
+#endif
diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp
index ce5c41ceb267..1995ef2b0c9e 100644
--- a/lib/Target/R600/AMDGPUMCInstLower.cpp
+++ b/lib/Target/R600/AMDGPUMCInstLower.cpp
@@ -40,8 +40,13 @@ AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st):
{ }
enum AMDGPUMCInstLower::SISubtarget
-AMDGPUMCInstLower::AMDGPUSubtargetToSISubtarget(unsigned) const {
- return AMDGPUMCInstLower::SI;
+AMDGPUMCInstLower::AMDGPUSubtargetToSISubtarget(unsigned Gen) const {
+ switch (Gen) {
+ default:
+ return AMDGPUMCInstLower::SI;
+ case AMDGPUSubtarget::VOLCANIC_ISLANDS:
+ return AMDGPUMCInstLower::VI;
+ }
}
unsigned AMDGPUMCInstLower::getMCOpcode(unsigned MIOpcode) const {
@@ -63,13 +68,6 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
switch (MO.getType()) {
default:
llvm_unreachable("unknown operand type");
- case MachineOperand::MO_FPImmediate: {
- const APFloat &FloatValue = MO.getFPImm()->getValueAPF();
- assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle &&
- "Only floating point immediates are supported at the moment.");
- MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat());
- break;
- }
case MachineOperand::MO_Immediate:
MCOp = MCOperand::CreateImm(MO.getImm());
break;
@@ -104,7 +102,7 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
#ifdef _DEBUG
StringRef Err;
- if (!TM.getInstrInfo()->verifyInstruction(MI, Err)) {
+ if (!TM.getSubtargetImpl()->getInstrInfo()->verifyInstruction(MI, Err)) {
errs() << "Warning: Illegal instruction detected: " << Err << "\n";
MI->dump();
}
@@ -128,8 +126,9 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
std::string &DisasmLine = DisasmLines.back();
raw_string_ostream DisasmStream(DisasmLine);
- AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(), *TM.getInstrInfo(),
- *TM.getRegisterInfo());
+ AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(),
+ *TM.getSubtargetImpl()->getInstrInfo(),
+ *TM.getSubtargetImpl()->getRegisterInfo());
InstPrinter.printInst(&TmpInst, DisasmStream, StringRef());
// Disassemble instruction/operands to hex representation.
diff --git a/lib/Target/R600/AMDGPUMCInstLower.h b/lib/Target/R600/AMDGPUMCInstLower.h
index 58fe34d32d31..0ae4d11bf1d1 100644
--- a/lib/Target/R600/AMDGPUMCInstLower.h
+++ b/lib/Target/R600/AMDGPUMCInstLower.h
@@ -8,8 +8,8 @@
/// \file
//===----------------------------------------------------------------------===//
-#ifndef AMDGPU_MCINSTLOWER_H
-#define AMDGPU_MCINSTLOWER_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUMCINSTLOWER_H
+#define LLVM_LIB_TARGET_R600_AMDGPUMCINSTLOWER_H
namespace llvm {
@@ -22,7 +22,8 @@ class AMDGPUMCInstLower {
// This must be kept in sync with the SISubtarget class in SIInstrInfo.td
enum SISubtarget {
- SI = 0
+ SI = 0,
+ VI = 1
};
MCContext &Ctx;
@@ -45,4 +46,4 @@ public:
} // End namespace llvm
-#endif //AMDGPU_MCINSTLOWER_H
+#endif
diff --git a/lib/Target/R600/AMDGPUMachineFunction.cpp b/lib/Target/R600/AMDGPUMachineFunction.cpp
index 90af80113ece..0f3f9e26528b 100644
--- a/lib/Target/R600/AMDGPUMachineFunction.cpp
+++ b/lib/Target/R600/AMDGPUMachineFunction.cpp
@@ -12,7 +12,9 @@ void AMDGPUMachineFunction::anchor() {}
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MachineFunctionInfo(),
ShaderType(ShaderType::COMPUTE),
- LDSSize(0) {
+ LDSSize(0),
+ ScratchSize(0),
+ IsKernel(true) {
AttributeSet Set = MF.getFunction()->getAttributes();
Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
ShaderTypeAttribute);
diff --git a/lib/Target/R600/AMDGPUMachineFunction.h b/lib/Target/R600/AMDGPUMachineFunction.h
index 0854d588eeba..f5e4694e76f6 100644
--- a/lib/Target/R600/AMDGPUMachineFunction.h
+++ b/lib/Target/R600/AMDGPUMachineFunction.h
@@ -10,8 +10,8 @@
/// \file
//===----------------------------------------------------------------------===//
-#ifndef AMDGPUMACHINEFUNCTION_H
-#define AMDGPUMACHINEFUNCTION_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUMACHINEFUNCTION_H
+#define LLVM_LIB_TARGET_R600_AMDGPUMACHINEFUNCTION_H
#include "llvm/CodeGen/MachineFunction.h"
#include <map>
@@ -30,10 +30,16 @@ public:
/// Number of bytes in the LDS that are being used.
unsigned LDSSize;
+ /// Start of implicit kernel args
+ unsigned ABIArgOffset;
+
unsigned getShaderType() const {
return ShaderType;
}
+
+ unsigned ScratchSize;
+ bool IsKernel;
};
}
-#endif // AMDGPUMACHINEFUNCTION_H
+#endif
diff --git a/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
index 218750d445e6..b81fef47d55a 100644
--- a/lib/Target/R600/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
@@ -36,11 +36,9 @@ class AMDGPUPromoteAlloca : public FunctionPass,
public:
AMDGPUPromoteAlloca(const AMDGPUSubtarget &st) : FunctionPass(ID), ST(st),
LocalMemAvailable(0) { }
- virtual bool doInitialization(Module &M);
- virtual bool runOnFunction(Function &F);
- virtual const char *getPassName() const {
- return "AMDGPU Promote Alloca";
- }
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+ const char *getPassName() const override { return "AMDGPU Promote Alloca"; }
void visitAlloca(AllocaInst &I);
};
@@ -107,14 +105,16 @@ static VectorType *arrayTypeToVecType(const Type *ArrayTy) {
ArrayTy->getArrayNumElements());
}
-static Value* calculateVectorIndex(Value *Ptr,
- std::map<GetElementPtrInst*, Value*> GEPIdx) {
+static Value *
+calculateVectorIndex(Value *Ptr,
+ const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
if (isa<AllocaInst>(Ptr))
return Constant::getNullValue(Type::getInt32Ty(Ptr->getContext()));
GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
- return GEPIdx[GEP];
+ auto I = GEPIdx.find(GEP);
+ return I == GEPIdx.end() ? nullptr : I->second;
}
static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
@@ -234,7 +234,8 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
return true;
}
-static void collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
+static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
+ bool Success = true;
for (User *User : Val->users()) {
if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())
continue;
@@ -242,11 +243,20 @@ static void collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
WorkList.push_back(User);
continue;
}
+
+ // FIXME: Correctly handle ptrtoint instructions.
+ Instruction *UseInst = dyn_cast<Instruction>(User);
+ if (UseInst && UseInst->getOpcode() == Instruction::PtrToInt)
+ return false;
+
if (!User->getType()->isPointerTy())
continue;
+
WorkList.push_back(User);
- collectUsesWithPtrTypes(User, WorkList);
+
+ Success &= collectUsesWithPtrTypes(User, WorkList);
}
+ return Success;
}
void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
@@ -274,6 +284,13 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
return;
}
+ std::vector<Value*> WorkList;
+
+ if (!collectUsesWithPtrTypes(&I, WorkList)) {
+ DEBUG(dbgs() << " Do not know how to convert all uses\n");
+ return;
+ }
+
DEBUG(dbgs() << "Promoting alloca to local memory\n");
LocalMemAvailable -= AllocaSize;
@@ -320,10 +337,6 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
I.replaceAllUsesWith(Offset);
I.eraseFromParent();
- std::vector<Value*> WorkList;
-
- collectUsesWithPtrTypes(Offset, WorkList);
-
for (std::vector<Value*>::iterator i = WorkList.begin(),
e = WorkList.end(); i != e; ++i) {
Value *V = *i;
@@ -331,6 +344,13 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
if (!Call) {
Type *EltTy = V->getType()->getPointerElementType();
PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
+
+ // The operand's value should be corrected on its own.
+ if (isa<AddrSpaceCastInst>(V))
+ continue;
+
+ // FIXME: It doesn't really make sense to try to do this for all
+ // instructions.
V->mutateType(NewTy);
continue;
}
diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp
index 34332808f865..57b054bc2a61 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.cpp
+++ b/lib/Target/R600/AMDGPURegisterInfo.cpp
@@ -42,8 +42,7 @@ void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- assert(!"Subroutines not supported yet");
- return 0;
+ return AMDGPU::NoRegister;
}
unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const {
diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h
index 46aa7a17dfca..f27576ab9736 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.h
+++ b/lib/Target/R600/AMDGPURegisterInfo.h
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AMDGPUREGISTERINFO_H
-#define AMDGPUREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUREGISTERINFO_H
+#define LLVM_LIB_TARGET_R600_AMDGPUREGISTERINFO_H
#include "llvm/ADT/BitVector.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -51,7 +51,7 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
unsigned getSubRegFromChannel(unsigned Channel) const;
const MCPhysReg* getCalleeSavedRegs(const MachineFunction *MF) const override;
- virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS) const override;
unsigned getFrameRegister(const MachineFunction &MF) const override;
@@ -62,4 +62,4 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
} // End namespace llvm
-#endif // AMDIDSAREGISTERINFO_H
+#endif
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index e3c2a50ab828..597e558e6634 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -13,12 +13,13 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUSubtarget.h"
+#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
+#include "R600MachineScheduler.h"
+#include "SIISelLowering.h"
#include "SIInstrInfo.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallString.h"
-
using namespace llvm;
#define DEBUG_TYPE "amdgpu-subtarget"
@@ -28,26 +29,23 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "AMDGPUGenSubtargetInfo.inc"
-AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS) :
- AMDGPUGenSubtargetInfo(TT, GPU, FS),
- DevName(GPU),
- Is64bit(false),
- DumpCode(false),
- R600ALUInst(false),
- HasVertexCache(false),
- TexVTXClauseSize(0),
- Gen(AMDGPUSubtarget::R600),
- FP64(false),
- FP64Denormals(false),
- FP32Denormals(false),
- CaymanISA(false),
- EnableIRStructurizer(true),
- EnablePromoteAlloca(false),
- EnableIfCvt(true),
- WavefrontSize(0),
- CFALUBug(false),
- LocalMemorySize(0),
- InstrItins(getInstrItineraryForCPU(GPU)) {
+static std::string computeDataLayout(const AMDGPUSubtarget &ST) {
+ std::string Ret = "e-p:32:32";
+
+ if (ST.is64bit()) {
+ // 32-bit private, local, and region pointers. 64-bit global and constant.
+ Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64";
+ }
+
+ Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256"
+ "-v512:512-v1024:1024-v2048:2048-n32:64";
+
+ return Ret;
+}
+
+AMDGPUSubtarget &
+AMDGPUSubtarget::initializeSubtargetDependencies(StringRef GPU, StringRef FS) {
+ // Determine default and user-specified characteristics
// On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
// enabled, but some instructions do not respect them and they run at the
// double precision rate, so don't enable by default.
@@ -61,16 +59,37 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS) :
ParseSubtargetFeatures(GPU, FullFS);
+ // FIXME: I don't think think Evergreen has any useful support for
+ // denormals, but should be checked. Should we issue a warning somewhere
+ // if someone tries to enable these?
if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- InstrInfo.reset(new R600InstrInfo(*this));
-
- // FIXME: I don't think think Evergreen has any useful support for
- // denormals, but should be checked. Should we issue a warning somewhere if
- // someone tries to enable these?
FP32Denormals = false;
FP64Denormals = false;
+ }
+ return *this;
+}
+
+AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
+ TargetMachine &TM)
+ : AMDGPUGenSubtargetInfo(TT, GPU, FS), DevName(GPU), Is64bit(false),
+ DumpCode(false), R600ALUInst(false), HasVertexCache(false),
+ TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
+ FP64Denormals(false), FP32Denormals(false), CaymanISA(false),
+ FlatAddressSpace(false), EnableIRStructurizer(true),
+ EnablePromoteAlloca(false), EnableIfCvt(true),
+ EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
+ DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),
+ FrameLowering(TargetFrameLowering::StackGrowsUp,
+ 64 * 16, // Maximum stack alignment (long16)
+ 0),
+ InstrItins(getInstrItineraryForCPU(GPU)),
+ TargetTriple(TT) {
+ if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ InstrInfo.reset(new R600InstrInfo(*this));
+ TLInfo.reset(new R600TargetLowering(TM));
} else {
InstrInfo.reset(new SIInstrInfo(*this));
+ TLInfo.reset(new SITargetLowering(TM));
}
}
@@ -87,3 +106,10 @@ unsigned AMDGPUSubtarget::getStackEntrySize() const {
llvm_unreachable("Illegal wavefront size.");
}
}
+
+unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const {
+ switch(getGeneration()) {
+ default: llvm_unreachable("ChipID unknown");
+ case SEA_ISLANDS: return 12;
+ }
+}
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index a844b37b6be5..90179d79d25d 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -12,25 +12,26 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AMDGPUSUBTARGET_H
-#define AMDGPUSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUSUBTARGET_H
+#define LLVM_LIB_TARGET_R600_AMDGPUSUBTARGET_H
#include "AMDGPU.h"
+#include "AMDGPUFrameLowering.h"
#include "AMDGPUInstrInfo.h"
+#include "AMDGPUIntrinsicInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "R600ISelLowering.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#define GET_SUBTARGETINFO_HEADER
#include "AMDGPUGenSubtargetInfo.inc"
-#define MAX_CB_SIZE (1 << 16)
-
namespace llvm {
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
- std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
-
public:
enum Generation {
R600 = 0,
@@ -38,7 +39,8 @@ public:
EVERGREEN,
NORTHERN_ISLANDS,
SOUTHERN_ISLANDS,
- SEA_ISLANDS
+ SEA_ISLANDS,
+ VOLCANIC_ISLANDS,
};
private:
@@ -53,24 +55,41 @@ private:
bool FP64Denormals;
bool FP32Denormals;
bool CaymanISA;
+ bool FlatAddressSpace;
bool EnableIRStructurizer;
bool EnablePromoteAlloca;
bool EnableIfCvt;
+ bool EnableLoadStoreOpt;
unsigned WavefrontSize;
bool CFALUBug;
int LocalMemorySize;
+ const DataLayout DL;
+ AMDGPUFrameLowering FrameLowering;
+ std::unique_ptr<AMDGPUTargetLowering> TLInfo;
+ std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
InstrItineraryData InstrItins;
+ Triple TargetTriple;
public:
- AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
+ AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS, TargetMachine &TM);
+ AMDGPUSubtarget &initializeSubtargetDependencies(StringRef GPU, StringRef FS);
- const AMDGPUInstrInfo *getInstrInfo() const {
+ const AMDGPUFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const AMDGPUInstrInfo *getInstrInfo() const override {
return InstrInfo.get();
}
-
- const InstrItineraryData &getInstrItineraryData() const {
- return InstrItins;
+ const AMDGPURegisterInfo *getRegisterInfo() const override {
+ return &InstrInfo->getRegisterInfo();
+ }
+ AMDGPUTargetLowering *getTargetLowering() const override {
+ return TLInfo.get();
+ }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
}
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
@@ -107,6 +126,10 @@ public:
return FP64Denormals;
}
+ bool hasFlatAddressSpace() const {
+ return FlatAddressSpace;
+ }
+
bool hasBFE() const {
return (getGeneration() >= EVERGREEN);
}
@@ -158,6 +181,10 @@ public:
return EnableIfCvt;
}
+ bool loadStoreOptEnabled() const {
+ return EnableLoadStoreOpt;
+ }
+
unsigned getWavefrontSize() const {
return WavefrontSize;
}
@@ -173,6 +200,8 @@ public:
return LocalMemorySize;
}
+ unsigned getAmdKernelCodeChipID() const;
+
bool enableMachineScheduler() const override {
return getGeneration() <= NORTHERN_ISLANDS;
}
@@ -192,8 +221,11 @@ public:
bool r600ALUEncoding() const {
return R600ALUInst;
}
+ bool isAmdHsaOS() const {
+ return TargetTriple.getOS() == Triple::AMDHSA;
+ }
};
} // End namespace llvm
-#endif // AMDGPUSUBTARGET_H
+#endif
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 56ba719e6863..a1da7172d53f 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -22,6 +22,7 @@
#include "SIInstrInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Verifier.h"
@@ -38,6 +39,7 @@ using namespace llvm;
extern "C" void LLVMInitializeR600Target() {
// Register the target
RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
+ RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget);
}
static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
@@ -48,46 +50,20 @@ static MachineSchedRegistry
SchedCustomRegistry("r600", "Run R600's custom scheduler",
createR600MachineScheduler);
-static std::string computeDataLayout(const AMDGPUSubtarget &ST) {
- std::string Ret = "e-p:32:32";
-
- if (ST.is64bit()) {
- // 32-bit local, and region pointers. 64-bit private, global, and constant.
- Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64";
- }
-
- Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256"
- "-v512:512-v1024:1024-v2048:2048-n32:64";
-
- return Ret;
-}
-
AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- TargetOptions Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OptLevel
-)
-:
- LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
- Subtarget(TT, CPU, FS),
- Layout(computeDataLayout(Subtarget)),
- FrameLowering(TargetFrameLowering::StackGrowsUp,
- 64 * 16 // Maximum stack alignment (long16)
- , 0),
- IntrinsicInfo(this),
- InstrItins(&Subtarget.getInstrItineraryData()) {
- // TLInfo uses InstrInfo so it must be initialized after.
- if (Subtarget.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- TLInfo.reset(new R600TargetLowering(*this));
- } else {
- TLInfo.reset(new SITargetLowering(*this));
- }
+ StringRef CPU, StringRef FS,
+ TargetOptions Options, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OptLevel)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
+ TLOF(new TargetLoweringObjectFileELF()),
+ Subtarget(TT, CPU, FS, *this), IntrinsicInfo() {
setRequiresStructuredCFG(true);
initAsmInfo();
}
AMDGPUTargetMachine::~AMDGPUTargetMachine() {
+ delete TLOF;
}
namespace {
@@ -108,13 +84,14 @@ public:
return nullptr;
}
- virtual void addCodeGenPrepare();
+ void addIRPasses() override;
+ void addCodeGenPrepare() override;
bool addPreISel() override;
bool addInstSelector() override;
- bool addPreRegAlloc() override;
- bool addPostRegAlloc() override;
- bool addPreSched2() override;
- bool addPreEmitPass() override;
+ void addPreRegAlloc() override;
+ void addPostRegAlloc() override;
+ void addPreSched2() override;
+ void addPreEmitPass() override;
};
} // End of anonymous namespace
@@ -134,6 +111,19 @@ void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
PM.add(createAMDGPUTargetTransformInfoPass(this));
}
+void AMDGPUPassConfig::addIRPasses() {
+ // Function calls are not supported, so make sure we inline everything.
+ addPass(createAMDGPUAlwaysInlinePass());
+ addPass(createAlwaysInlinerPass());
+ // We need to add the barrier noop pass, otherwise adding the function
+ // inlining pass will cause all of the PassConfigs passes to be run
+ // one function at a time, which means if we have a nodule with two
+ // functions, then we will generate code for the first function
+ // without ever running any passes on the second.
+ addPass(createBarrierNoopPass());
+ TargetPassConfig::addIRPasses();
+}
+
void AMDGPUPassConfig::addCodeGenPrepare() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
if (ST.isPromoteAllocaEnabled()) {
@@ -161,61 +151,82 @@ AMDGPUPassConfig::addPreISel() {
}
bool AMDGPUPassConfig::addInstSelector() {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+
addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
- addPass(createSILowerI1CopiesPass());
+
+ if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ addPass(createSILowerI1CopiesPass());
+ addPass(createSIFixSGPRCopiesPass(*TM));
+ addPass(createSIFoldOperandsPass());
+ }
+
return false;
}
-bool AMDGPUPassConfig::addPreRegAlloc() {
+void AMDGPUPassConfig::addPreRegAlloc() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
addPass(createR600VectorRegMerger(*TM));
} else {
- addPass(createSIFixSGPRCopiesPass(*TM));
- // SIFixSGPRCopies can generate a lot of duplicate instructions,
- // so we need to run MachineCSE afterwards.
- addPass(&MachineCSEID);
- addPass(createSIShrinkInstructionsPass());
- initializeSIFixSGPRLiveRangesPass(*PassRegistry::getPassRegistry());
- insertPass(&RegisterCoalescerID, &SIFixSGPRLiveRangesID);
+ if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) {
+ // Don't do this with no optimizations since it throws away debug info by
+ // merging nonadjacent loads.
+
+ // This should be run after scheduling, but before register allocation. It
+ // also need extra copies to the address operand to be eliminated.
+ initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
+ insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
+ }
+
+ addPass(createSIShrinkInstructionsPass(), false);
+ addPass(createSIFixSGPRLiveRangesPass(), false);
}
- return false;
}
-bool AMDGPUPassConfig::addPostRegAlloc() {
+void AMDGPUPassConfig::addPostRegAlloc() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
- addPass(createSIShrinkInstructionsPass());
if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
- addPass(createSIInsertWaits(*TM));
+ addPass(createSIPrepareScratchRegs(), false);
+ addPass(createSIShrinkInstructionsPass(), false);
}
- return false;
}
-bool AMDGPUPassConfig::addPreSched2() {
+void AMDGPUPassConfig::addPreSched2() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
- addPass(createR600EmitClauseMarkers());
+ addPass(createR600EmitClauseMarkers(), false);
if (ST.isIfCvtEnabled())
- addPass(&IfConverterID);
+ addPass(&IfConverterID, false);
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
- addPass(createR600ClauseMergePass(*TM));
- return false;
+ addPass(createR600ClauseMergePass(*TM), false);
+ if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ addPass(createSIInsertWaits(*TM), false);
+ }
}
-bool AMDGPUPassConfig::addPreEmitPass() {
+void AMDGPUPassConfig::addPreEmitPass() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- addPass(createAMDGPUCFGStructurizerPass());
- addPass(createR600ExpandSpecialInstrsPass(*TM));
- addPass(&FinalizeMachineBundlesID);
- addPass(createR600Packetizer(*TM));
- addPass(createR600ControlFlowFinalizer(*TM));
+ addPass(createAMDGPUCFGStructurizerPass(), false);
+ addPass(createR600ExpandSpecialInstrsPass(*TM), false);
+ addPass(&FinalizeMachineBundlesID, false);
+ addPass(createR600Packetizer(*TM), false);
+ addPass(createR600ControlFlowFinalizer(*TM), false);
} else {
- addPass(createSILowerControlFlowPass(*TM));
+ addPass(createSILowerControlFlowPass(*TM), false);
}
-
- return false;
}
+
+
+//===----------------------------------------------------------------------===//
+// GCN Target Machine (SI+)
+//===----------------------------------------------------------------------===//
+
+GCNTargetMachine::GCNTargetMachine(const Target &T, StringRef TT, StringRef FS,
+ StringRef CPU, TargetOptions Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL) :
+ AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) { }
diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h
index 3bb15beb6bf1..66b30700d883 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.h
+++ b/lib/Target/R600/AMDGPUTargetMachine.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AMDGPU_TARGET_MACHINE_H
-#define AMDGPU_TARGET_MACHINE_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUTARGETMACHINE_H
+#define LLVM_LIB_TARGET_R600_AMDGPUTARGETMACHINE_H
#include "AMDGPUFrameLowering.h"
#include "AMDGPUInstrInfo.h"
@@ -24,48 +24,48 @@
namespace llvm {
-class AMDGPUTargetMachine : public LLVMTargetMachine {
+//===----------------------------------------------------------------------===//
+// AMDGPU Target Machine (R600+)
+//===----------------------------------------------------------------------===//
+class AMDGPUTargetMachine : public LLVMTargetMachine {
+protected:
+ TargetLoweringObjectFile *TLOF;
AMDGPUSubtarget Subtarget;
- const DataLayout Layout;
- AMDGPUFrameLowering FrameLowering;
AMDGPUIntrinsicInfo IntrinsicInfo;
- std::unique_ptr<AMDGPUTargetLowering> TLInfo;
- const InstrItineraryData *InstrItins;
public:
AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
StringRef CPU, TargetOptions Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OL);
~AMDGPUTargetMachine();
- const AMDGPUFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
- }
- const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
- return &IntrinsicInfo;
- }
- const AMDGPUInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
const AMDGPUSubtarget *getSubtargetImpl() const override {
return &Subtarget;
}
- const AMDGPURegisterInfo *getRegisterInfo() const override {
- return &getInstrInfo()->getRegisterInfo();
- }
- AMDGPUTargetLowering *getTargetLowering() const override {
- return TLInfo.get();
- }
- const InstrItineraryData *getInstrItineraryData() const override {
- return InstrItins;
+ const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
+ return &IntrinsicInfo;
}
- const DataLayout *getDataLayout() const override { return &Layout; }
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
/// \brief Register R600 analysis passes with a pass manager.
void addAnalysisPasses(PassManagerBase &PM) override;
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// GCN Target Machine (SI+)
+//===----------------------------------------------------------------------===//
+
+class GCNTargetMachine : public AMDGPUTargetMachine {
+
+public:
+ GCNTargetMachine(const Target &T, StringRef TT, StringRef FS,
+ StringRef CPU, TargetOptions Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL);
};
} // End namespace llvm
-#endif // AMDGPU_TARGET_MACHINE_H
+#endif
diff --git a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
index 88934b65876e..e7bc00635f75 100644
--- a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
@@ -52,7 +52,7 @@ public:
AMDGPUTTI(const AMDGPUTargetMachine *TM)
: ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
- TLI(TM->getTargetLowering()) {
+ TLI(TM->getSubtargetImpl()->getTargetLowering()) {
initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry());
}
@@ -74,16 +74,14 @@ public:
bool hasBranchDivergence() const override;
- void getUnrollingPreferences(Loop *L,
+ void getUnrollingPreferences(const Function *F, Loop *L,
UnrollingPreferences &UP) const override;
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const override;
unsigned getNumberOfRegisters(bool Vector) const override;
unsigned getRegisterBitWidth(bool Vector) const override;
- unsigned getMaximumUnrollFactor() const override;
-
- /// @}
+ unsigned getMaxInterleaveFactor() const override;
};
} // end anonymous namespace
@@ -99,8 +97,14 @@ llvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) {
bool AMDGPUTTI::hasBranchDivergence() const { return true; }
-void AMDGPUTTI::getUnrollingPreferences(Loop *L,
+void AMDGPUTTI::getUnrollingPreferences(const Function *, Loop *L,
UnrollingPreferences &UP) const {
+ UP.Threshold = 300; // Twice the default.
+ UP.Count = UINT_MAX;
+ UP.Partial = true;
+
+ // TODO: Do we want runtime unrolling?
+
for (const BasicBlock *BB : L->getBlocks()) {
for (const Instruction &I : *BB) {
const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
@@ -120,7 +124,7 @@ void AMDGPUTTI::getUnrollingPreferences(Loop *L,
//
// Don't use the maximum allowed value here as it will make some
// programs way too big.
- UP.Threshold = 500;
+ UP.Threshold = 800;
}
}
}
@@ -147,7 +151,7 @@ unsigned AMDGPUTTI::getRegisterBitWidth(bool) const {
return 32;
}
-unsigned AMDGPUTTI::getMaximumUnrollFactor() const {
+unsigned AMDGPUTTI::getMaxInterleaveFactor() const {
// Semi-arbitrary large amount.
return 64;
}
diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
index f3a03914391c..ee6e8ecfb29d 100644
--- a/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -11,6 +11,7 @@
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "R600InstrInfo.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallVector.h"
@@ -160,7 +161,7 @@ public:
bool prepare();
bool runOnMachineFunction(MachineFunction &MF) override {
- TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
+ TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
TRI = &TII->getRegisterInfo();
DEBUG(MF.dump(););
OrderedBlks.clear();
@@ -337,7 +338,7 @@ protected:
void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = nullptr);
MachineBasicBlock *findNearestCommonPostDom(std::set<MachineBasicBlock *>&);
- /// This is work around solution for findNearestCommonDominator not avaiable
+ /// This is work around solution for findNearestCommonDominator not available
/// to post dom a proper fix should go to Dominators.h.
MachineBasicBlock *findNearestCommonPostDom(MachineBasicBlock *MBB1,
MachineBasicBlock *MBB2);
diff --git a/lib/Target/R600/AMDKernelCodeT.h b/lib/Target/R600/AMDKernelCodeT.h
new file mode 100644
index 000000000000..4d3041ff3db8
--- /dev/null
+++ b/lib/Target/R600/AMDKernelCodeT.h
@@ -0,0 +1,704 @@
+//===-- AMDGPUKernelCodeT.h - Print AMDGPU assembly code ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file AMDKernelCodeT.h
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDKERNELCODET_H
+#define AMDKERNELCODET_H
+
+#include <cstddef>
+#include <cstdint>
+
+//---------------------------------------------------------------------------//
+// AMD Kernel Code, and its dependencies //
+//---------------------------------------------------------------------------//
+
+typedef uint8_t hsa_powertwo8_t;
+typedef uint32_t hsa_ext_code_kind_t;
+typedef uint8_t hsa_ext_brig_profile8_t;
+typedef uint8_t hsa_ext_brig_machine_model8_t;
+typedef uint64_t hsa_ext_control_directive_present64_t;
+typedef uint16_t hsa_ext_exception_kind16_t;
+typedef uint32_t hsa_ext_code_kind32_t;
+
+typedef struct hsa_dim3_s {
+ uint32_t x;
+ uint32_t y;
+ uint32_t z;
+} hsa_dim3_t;
+
+/// The version of the amd_*_code_t struct. Minor versions must be
+/// backward compatible.
+typedef uint32_t amd_code_version32_t;
+enum amd_code_version_t {
+ AMD_CODE_VERSION_MAJOR = 0,
+ AMD_CODE_VERSION_MINOR = 1
+};
+
+/// The values used to define the number of bytes to use for the
+/// swizzle element size.
+enum amd_element_byte_size_t {
+ AMD_ELEMENT_2_BYTES = 0,
+ AMD_ELEMENT_4_BYTES = 1,
+ AMD_ELEMENT_8_BYTES = 2,
+ AMD_ELEMENT_16_BYTES = 3
+};
+
+/// Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
+/// COMPUTE_PGM_RSRC2 registers.
+typedef uint64_t amd_compute_pgm_resource_register64_t;
+
+/// Every amd_*_code_t has the following properties, which are composed of
+/// a number of bit fields. Every bit field has a mask (AMD_CODE_PROPERTY_*),
+/// bit width (AMD_CODE_PROPERTY_*_WIDTH, and bit shift amount
+/// (AMD_CODE_PROPERTY_*_SHIFT) for convenient access. Unused bits must be 0.
+///
+/// (Note that bit fields cannot be used as their layout is
+/// implementation defined in the C standard and so cannot be used to
+/// specify an ABI)
+typedef uint32_t amd_code_property32_t;
+enum amd_code_property_mask_t {
+
+ /// Enable the setup of the SGPR user data registers
+ /// (AMD_CODE_PROPERTY_ENABLE_SGPR_*), see documentation of amd_kernel_code_t
+ /// for initial register state.
+ ///
+ /// The total number of SGPRuser data registers requested must not
+ /// exceed 16. Any requests beyond 16 will be ignored.
+ ///
+ /// Used to set COMPUTE_PGM_RSRC2.USER_SGPR (set to total count of
+ /// SGPR user data registers enabled up to 16).
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT = 0,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT = 2,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT = 3,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT = 4,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT = 5,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT = 6,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT = 7,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT,
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT = 8,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT,
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT = 9,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT,
+
+ /// Control wave ID base counter for GDS ordered-append. Used to set
+ /// COMPUTE_DISPATCH_INITIATOR.ORDERED_APPEND_ENBL. (Not sure if
+ /// ORDERED_APPEND_MODE also needs to be settable)
+ AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT = 10,
+ AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS = ((1 << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT,
+
+ /// The interleave (swizzle) element size in bytes required by the
+ /// code for private memory. This must be 2, 4, 8 or 16. This value
+ /// is provided to the finalizer when it is invoked and is recorded
+ /// here. The hardware will interleave the memory requests of each
+ /// lane of a wavefront by this element size to ensure each
+ /// work-item gets a distinct memory memory location. Therefore, the
+ /// finalizer ensures that all load and store operations done to
+ /// private memory do not exceed this size. For example, if the
+ /// element size is 4 (32-bits or dword) and a 64-bit value must be
+ /// loaded, the finalizer will generate two 32-bit loads. This
+ /// ensures that the interleaving will get the the work-item
+ /// specific dword for both halves of the 64-bit value. If it just
+ /// did a 64-bit load then it would get one dword which belonged to
+ /// its own work-item, but the second dword would belong to the
+ /// adjacent lane work-item since the interleaving is in dwords.
+ ///
+ /// The value used must match the value that the runtime configures
+ /// the GPU flat scratch (SH_STATIC_MEM_CONFIG.ELEMENT_SIZE). This
+ /// is generally DWORD.
+ ///
+ /// Use values from the amd_element_byte_size_t enum.
+ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT = 11,
+ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH = 2,
+ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE = ((1 << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT,
+
+ /// Are global memory addresses 64 bits. Must match
+ /// amd_kernel_code_t.hsail_machine_model ==
+ /// HSA_MACHINE_LARGE. Must also match
+ /// SH_MEM_CONFIG.PTR32 (GFX6 (SI)/GFX7 (CI)),
+ /// SH_MEM_CONFIG.ADDRESS_MODE (GFX8 (VI)+).
+ AMD_CODE_PROPERTY_IS_PTR64_SHIFT = 13,
+ AMD_CODE_PROPERTY_IS_PTR64_WIDTH = 1,
+ AMD_CODE_PROPERTY_IS_PTR64 = ((1 << AMD_CODE_PROPERTY_IS_PTR64_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_PTR64_SHIFT,
+
+ /// Indicate if the generated ISA is using a dynamically sized call
+ /// stack. This can happen if calls are implemented using a call
+ /// stack and recursion, alloca or calls to indirect functions are
+ /// present. In these cases the Finalizer cannot compute the total
+ /// private segment size at compile time. In this case the
+ /// workitem_private_segment_byte_size only specifies the statically
+ /// know private segment size, and additional space must be added
+ /// for the call stack.
+ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT = 14,
+ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH = 1,
+ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK = ((1 << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT,
+
+ /// Indicate if code generated has support for debugging.
+ AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT = 15,
+ AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH = 1,
+ AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT
+};
+
+/// @brief The hsa_ext_control_directives_t specifies the values for the HSAIL
+/// control directives. These control how the finalizer generates code. This
+/// struct is used both as an argument to hsaFinalizeKernel to specify values for
+/// the control directives, and is used in HsaKernelCode to record the values of
+/// the control directives that the finalize used when generating the code which
+/// either came from the finalizer argument or explicit HSAIL control
+/// directives. See the definition of the control directives in HSA Programmer's
+/// Reference Manual which also defines how the values specified as finalizer
+/// arguments have to agree with the control directives in the HSAIL code.
+typedef struct hsa_ext_control_directives_s {
+ /// This is a bit set indicating which control directives have been
+ /// specified. If the value is 0 then there are no control directives specified
+ /// and the rest of the fields can be ignored. The bits are accessed using the
+ /// hsa_ext_control_directives_present_mask_t. Any control directive that is not
+ /// enabled in this bit set must have the value of all 0s.
+ hsa_ext_control_directive_present64_t enabled_control_directives;
+
+ /// If enableBreakExceptions is not enabled then must be 0, otherwise must be
+ /// non-0 and specifies the set of HSAIL exceptions that must have the BREAK
+ /// policy enabled. If this set is not empty then the generated code may have
+ /// lower performance than if the set is empty. If the kernel being finalized
+ /// has any enablebreakexceptions control directives, then the values specified
+ /// by this argument are unioned with the values in these control
+ /// directives. If any of the functions the kernel calls have an
+ /// enablebreakexceptions control directive, then they must be equal or a
+ /// subset of, this union.
+ hsa_ext_exception_kind16_t enable_break_exceptions;
+
+ /// If enableDetectExceptions is not enabled then must be 0, otherwise must be
+ /// non-0 and specifies the set of HSAIL exceptions that must have the DETECT
+ /// policy enabled. If this set is not empty then the generated code may have
+ /// lower performance than if the set is empty. However, an implementation
+ /// should endeavour to make the performance impact small. If the kernel being
+ /// finalized has any enabledetectexceptions control directives, then the
+ /// values specified by this argument are unioned with the values in these
+ /// control directives. If any of the functions the kernel calls have an
+ /// enabledetectexceptions control directive, then they must be equal or a
+ /// subset of, this union.
+ hsa_ext_exception_kind16_t enable_detect_exceptions;
+
+ /// If maxDynamicGroupSize is not enabled then must be 0, and any amount of
+ /// dynamic group segment can be allocated for a dispatch, otherwise the value
+ /// specifies the maximum number of bytes of dynamic group segment that can be
+ /// allocated for a dispatch. If the kernel being finalized has any
+ /// maxdynamicsize control directives, then the values must be the same, and
+ /// must be the same as this argument if it is enabled. This value can be used
+ /// by the finalizer to determine the maximum number of bytes of group memory
+ /// used by each work-group by adding this value to the group memory required
+ /// for all group segment variables used by the kernel and all functions it
+ /// calls, and group memory used to implement other HSAIL features such as
+ /// fbarriers and the detect exception operations. This can allow the finalizer
+ /// to determine the expected number of work-groups that can be executed by a
+ /// compute unit and allow more resources to be allocated to the work-items if
+ /// it is known that fewer work-groups can be executed due to group memory
+ /// limitations.
+ uint32_t max_dynamic_group_size;
+
+ /// If maxFlatGridSize is not enabled then must be 0, otherwise must be greater
+ /// than 0. See HSA Programmer's Reference Manual description of
+ /// maxflatgridsize control directive.
+ uint32_t max_flat_grid_size;
+
+ /// If maxFlatWorkgroupSize is not enabled then must be 0, otherwise must be
+ /// greater than 0. See HSA Programmer's Reference Manual description of
+ /// maxflatworkgroupsize control directive.
+ uint32_t max_flat_workgroup_size;
+
+ /// If requestedWorkgroupsPerCu is not enabled then must be 0, and the
+ /// finalizer is free to generate ISA that may result in any number of
+ /// work-groups executing on a single compute unit. Otherwise, the finalizer
+ /// should attempt to generate ISA that will allow the specified number of
+ /// work-groups to execute on a single compute unit. This is only a hint and
+ /// can be ignored by the finalizer. If the kernel being finalized, or any of
+ /// the functions it calls, has a requested control directive, then the values
+ /// must be the same. This can be used to determine the number of resources
+ /// that should be allocated to a single work-group and work-item. For example,
+ /// a low value may allow more resources to be allocated, resulting in higher
+ /// per work-item performance, as it is known there will never be more than the
+ /// specified number of work-groups actually executing on the compute
+ /// unit. Conversely, a high value may allocate fewer resources, resulting in
+ /// lower per work-item performance, which is offset by the fact it allows more
+ /// work-groups to actually execute on the compute unit.
+ uint32_t requested_workgroups_per_cu;
+
+ /// If not enabled then all elements for Dim3 must be 0, otherwise every
+ /// element must be greater than 0. See HSA Programmer's Reference Manual
+ /// description of requiredgridsize control directive.
+ hsa_dim3_t required_grid_size;
+
+ /// If requiredWorkgroupSize is not enabled then all elements for Dim3 must be
+ /// 0, and the produced code can be dispatched with any legal work-group range
+ /// consistent with the dispatch dimensions. Otherwise, the code produced must
+ /// always be dispatched with the specified work-group range. No element of the
+ /// specified range must be 0. It must be consistent with required_dimensions
+ /// and max_flat_workgroup_size. If the kernel being finalized, or any of the
+ /// functions it calls, has a requiredworkgroupsize control directive, then the
+ /// values must be the same. Specifying a value can allow the finalizer to
+ /// optimize work-group id operations, and if the number of work-items in the
+ /// work-group is less than the WAVESIZE then barrier operations can be
+ /// optimized to just a memory fence.
+ hsa_dim3_t required_workgroup_size;
+
+ /// If requiredDim is not enabled then must be 0 and the produced kernel code
+ /// can be dispatched with 1, 2 or 3 dimensions. If enabled then the value is
+ /// 1..3 and the code produced must only be dispatched with a dimension that
+ /// matches. Other values are illegal. If the kernel being finalized, or any of
+ /// the functions it calls, has a requireddimsize control directive, then the
+ /// values must be the same. This can be used to optimize the code generated to
+ /// compute the absolute and flat work-group and work-item id, and the dim
+ /// HSAIL operations.
+ uint8_t required_dim;
+
+ /// Reserved. Must be 0.
+ uint8_t reserved[75];
+} hsa_ext_control_directives_t;
+
+/// AMD Kernel Code Object (amd_kernel_code_t). GPU CP uses the AMD Kernel
+/// Code Object to set up the hardware to execute the kernel dispatch.
+///
+/// Initial Kernel Register State.
+///
+/// Initial kernel register state will be set up by CP/SPI prior to the start
+/// of execution of every wavefront. This is limited by the constraints of the
+/// current hardware.
+///
+/// The order of the SGPR registers is defined, but the Finalizer can specify
+/// which ones are actually setup in the amd_kernel_code_t object using the
+/// enable_sgpr_* bit fields. The register numbers used for enabled registers
+/// are dense starting at SGPR0: the first enabled register is SGPR0, the next
+/// enabled register is SGPR1 etc.; disabled registers do not have an SGPR
+/// number.
+///
+/// The initial SGPRs comprise up to 16 User SRGPs that are set up by CP and
+/// apply to all waves of the grid. It is possible to specify more than 16 User
+/// SGPRs using the enable_sgpr_* bit fields, in which case only the first 16
+/// are actually initialized. These are then immediately followed by the System
+/// SGPRs that are set up by ADC/SPI and can have different values for each wave
+/// of the grid dispatch.
+///
+/// SGPR register initial state is defined as follows:
+///
+/// Private Segment Buffer (enable_sgpr_private_segment_buffer):
+/// Number of User SGPR registers: 4. V# that can be used, together with
+/// Scratch Wave Offset as an offset, to access the Private/Spill/Arg
+/// segments using a segment address. It must be set as follows:
+/// - Base address: of the scratch memory area used by the dispatch. It
+/// does not include the scratch wave offset. It will be the per process
+/// SH_HIDDEN_PRIVATE_BASE_VMID plus any offset from this dispatch (for
+/// example there may be a per pipe offset, or per AQL Queue offset).
+/// - Stride + data_format: Element Size * Index Stride (???)
+/// - Cache swizzle: ???
+/// - Swizzle enable: SH_STATIC_MEM_CONFIG.SWIZZLE_ENABLE (must be 1 for
+/// scratch)
+/// - Num records: Flat Scratch Work Item Size / Element Size (???)
+/// - Dst_sel_*: ???
+/// - Num_format: ???
+/// - Element_size: SH_STATIC_MEM_CONFIG.ELEMENT_SIZE (will be DWORD, must
+/// agree with amd_kernel_code_t.privateElementSize)
+/// - Index_stride: SH_STATIC_MEM_CONFIG.INDEX_STRIDE (will be 64 as must
+/// be number of wavefront lanes for scratch, must agree with
+/// amd_kernel_code_t.wavefrontSize)
+/// - Add tid enable: 1
+/// - ATC: from SH_MEM_CONFIG.PRIVATE_ATC,
+/// - Hash_enable: ???
+/// - Heap: ???
+/// - Mtype: from SH_STATIC_MEM_CONFIG.PRIVATE_MTYPE
+/// - Type: 0 (a buffer) (???)
+///
+/// Dispatch Ptr (enable_sgpr_dispatch_ptr):
+/// Number of User SGPR registers: 2. 64 bit address of AQL dispatch packet
+/// for kernel actually executing.
+///
+/// Queue Ptr (enable_sgpr_queue_ptr):
+/// Number of User SGPR registers: 2. 64 bit address of AmdQueue object for
+/// AQL queue on which the dispatch packet was queued.
+///
+/// Kernarg Segment Ptr (enable_sgpr_kernarg_segment_ptr):
+/// Number of User SGPR registers: 2. 64 bit address of Kernarg segment. This
+/// is directly copied from the kernargPtr in the dispatch packet. Having CP
+/// load it once avoids loading it at the beginning of every wavefront.
+///
+/// Dispatch Id (enable_sgpr_dispatch_id):
+/// Number of User SGPR registers: 2. 64 bit Dispatch ID of the dispatch
+/// packet being executed.
+///
+/// Flat Scratch Init (enable_sgpr_flat_scratch_init):
+/// Number of User SGPR registers: 2. This is 2 SGPRs.
+///
+/// For CI/VI:
+/// The first SGPR is a 32 bit byte offset from SH_MEM_HIDDEN_PRIVATE_BASE
+/// to base of memory for scratch for this dispatch. This is the same offset
+/// used in computing the Scratch Segment Buffer base address. The value of
+/// Scratch Wave Offset must be added by the kernel code and moved to
+/// SGPRn-4 for use as the FLAT SCRATCH BASE in flat memory instructions.
+///
+/// The second SGPR is 32 bit byte size of a single work-item’s scratch
+/// memory usage. This is directly loaded from the dispatch packet Private
+/// Segment Byte Size and rounded up to a multiple of DWORD.
+///
+/// \todo [Does CP need to round this to >4 byte alignment?]
+///
+/// The kernel code must move to SGPRn-3 for use as the FLAT SCRATCH SIZE in
+/// flat memory instructions. Having CP load it once avoids loading it at
+/// the beginning of every wavefront.
+///
+/// For PI:
+/// This is the 64 bit base address of the scratch backing memory for
+/// allocated by CP for this dispatch.
+///
+/// Private Segment Size (enable_sgpr_private_segment_size):
+/// Number of User SGPR registers: 1. The 32 bit byte size of a single
+/// work-item’s scratch memory allocation. This is the value from the dispatch
+/// packet. Private Segment Byte Size rounded up by CP to a multiple of DWORD.
+///
+/// \todo [Does CP need to round this to >4 byte alignment?]
+///
+/// Having CP load it once avoids loading it at the beginning of every
+/// wavefront.
+///
+/// \todo [This will not be used for CI/VI since it is the same value as
+/// the second SGPR of Flat Scratch Init. However, it is need for PI which
+/// changes meaning of Flat Scratchg Init..]
+///
+/// Grid Work-Group Count X (enable_sgpr_grid_workgroup_count_x):
+/// Number of User SGPR registers: 1. 32 bit count of the number of
+/// work-groups in the X dimension for the grid being executed. Computed from
+/// the fields in the HsaDispatchPacket as
+/// ((gridSize.x+workgroupSize.x-1)/workgroupSize.x).
+///
+/// Grid Work-Group Count Y (enable_sgpr_grid_workgroup_count_y):
+/// Number of User SGPR registers: 1. 32 bit count of the number of
+/// work-groups in the Y dimension for the grid being executed. Computed from
+/// the fields in the HsaDispatchPacket as
+/// ((gridSize.y+workgroupSize.y-1)/workgroupSize.y).
+///
+/// Only initialized if <16 previous SGPRs initialized.
+///
+/// Grid Work-Group Count Z (enable_sgpr_grid_workgroup_count_z):
+/// Number of User SGPR registers: 1. 32 bit count of the number of
+/// work-groups in the Z dimension for the grid being executed. Computed
+/// from the fields in the HsaDispatchPacket as
+/// ((gridSize.z+workgroupSize.z-1)/workgroupSize.z).
+///
+/// Only initialized if <16 previous SGPRs initialized.
+///
+/// Work-Group Id X (enable_sgpr_workgroup_id_x):
+/// Number of System SGPR registers: 1. 32 bit work group id in X dimension
+/// of grid for wavefront. Always present.
+///
+/// Work-Group Id Y (enable_sgpr_workgroup_id_y):
+/// Number of System SGPR registers: 1. 32 bit work group id in Y dimension
+/// of grid for wavefront.
+///
+/// Work-Group Id Z (enable_sgpr_workgroup_id_z):
+/// Number of System SGPR registers: 1. 32 bit work group id in Z dimension
+/// of grid for wavefront. If present then Work-group Id Y will also be
+/// present
+///
+/// Work-Group Info (enable_sgpr_workgroup_info):
+/// Number of System SGPR registers: 1. {first_wave, 14’b0000,
+/// ordered_append_term[10:0], threadgroup_size_in_waves[5:0]}
+///
+/// Private Segment Wave Byte Offset
+/// (enable_sgpr_private_segment_wave_byte_offset):
+/// Number of System SGPR registers: 1. 32 bit byte offset from base of
+/// dispatch scratch base. Must be used as an offset with Private/Spill/Arg
+/// segment address when using Scratch Segment Buffer. It must be added to
+/// Flat Scratch Offset if setting up FLAT SCRATCH for flat addressing.
+///
+///
+/// The order of the VGPR registers is defined, but the Finalizer can specify
+/// which ones are actually setup in the amd_kernel_code_t object using the
+/// enableVgpr* bit fields. The register numbers used for enabled registers
+/// are dense starting at VGPR0: the first enabled register is VGPR0, the next
+/// enabled register is VGPR1 etc.; disabled registers do not have an VGPR
+/// number.
+///
+/// VGPR register initial state is defined as follows:
+///
+/// Work-Item Id X (always initialized):
+/// Number of registers: 1. 32 bit work item id in X dimension of work-group
+/// for wavefront lane.
+///
+/// Work-Item Id X (enable_vgpr_workitem_id > 0):
+/// Number of registers: 1. 32 bit work item id in Y dimension of work-group
+/// for wavefront lane.
+///
+/// Work-Item Id X (enable_vgpr_workitem_id > 0):
+/// Number of registers: 1. 32 bit work item id in Z dimension of work-group
+/// for wavefront lane.
+///
+///
+/// The setting of registers is being done by existing GPU hardware as follows:
+/// 1) SGPRs before the Work-Group Ids are set by CP using the 16 User Data
+/// registers.
+/// 2) Work-group Id registers X, Y, Z are set by SPI which supports any
+/// combination including none.
+/// 3) Scratch Wave Offset is also set by SPI which is why its value cannot
+/// be added into the value Flat Scratch Offset which would avoid the
+/// Finalizer generated prolog having to do the add.
+/// 4) The VGPRs are set by SPI which only supports specifying either (X),
+/// (X, Y) or (X, Y, Z).
+///
+/// Flat Scratch Dispatch Offset and Flat Scratch Size are adjacent SGRRs so
+/// they can be moved as a 64 bit value to the hardware required SGPRn-3 and
+/// SGPRn-4 respectively using the Finalizer ?FLAT_SCRATCH? Register.
+///
+/// The global segment can be accessed either using flat operations or buffer
+/// operations. If buffer operations are used then the Global Buffer used to
+/// access HSAIL Global/Readonly/Kernarg (which are combine) segments using a
+/// segment address is not passed into the kernel code by CP since its base
+/// address is always 0. Instead the Finalizer generates prolog code to
+/// initialize 4 SGPRs with a V# that has the following properties, and then
+/// uses that in the buffer instructions:
+/// - base address of 0
+/// - no swizzle
+/// - ATC=1
+/// - MTYPE set to support memory coherence specified in
+/// amd_kernel_code_t.globalMemoryCoherence
+///
+/// When the Global Buffer is used to access the Kernarg segment, must add the
+/// dispatch packet kernArgPtr to a kernarg segment address before using this V#.
+/// Alternatively scalar loads can be used if the kernarg offset is uniform, as
+/// the kernarg segment is constant for the duration of the kernel execution.
+///
+typedef struct amd_kernel_code_s {
+ /// The AMD major version of the Code Object. Must be the value
+ /// AMD_CODE_VERSION_MAJOR.
+ amd_code_version32_t amd_code_version_major;
+
+ /// The AMD minor version of the Code Object. Minor versions must be
+ /// backward compatible. Must be the value
+ /// AMD_CODE_VERSION_MINOR.
+ amd_code_version32_t amd_code_version_minor;
+
+ /// The byte size of this struct. Must be set to
+ /// sizeof(amd_kernel_code_t). Used for backward
+ /// compatibility.
+ uint32_t struct_byte_size;
+
+ /// The target chip instruction set for which code has been
+ /// generated. Values are from the E_SC_INSTRUCTION_SET enumeration
+ /// in sc/Interface/SCCommon.h.
+ uint32_t target_chip;
+
+ /// Byte offset (possibly negative) from start of amd_kernel_code_t
+ /// object to kernel's entry point instruction. The actual code for
+ /// the kernel is required to be 256 byte aligned to match hardware
+ /// requirements (SQ cache line is 16). The code must be position
+ /// independent code (PIC) for AMD devices to give runtime the
+ /// option of copying code to discrete GPU memory or APU L2
+ /// cache. The Finalizer should endeavour to allocate all kernel
+ /// machine code in contiguous memory pages so that a device
+ /// pre-fetcher will tend to only pre-fetch Kernel Code objects,
+ /// improving cache performance.
+ int64_t kernel_code_entry_byte_offset;
+
+ /// Range of bytes to consider prefetching expressed as an offset
+ /// and size. The offset is from the start (possibly negative) of
+ /// amd_kernel_code_t object. Set both to 0 if no prefetch
+ /// information is available.
+ ///
+ /// \todo ttye 11/15/2013 Is the prefetch definition we want? Did
+ /// not make the size a uint64_t as prefetching more than 4GiB seems
+ /// excessive.
+ int64_t kernel_code_prefetch_byte_offset;
+ uint64_t kernel_code_prefetch_byte_size;
+
+ /// Number of bytes of scratch backing memory required for full
+ /// occupancy of target chip. This takes into account the number of
+ /// bytes of scratch per work-item, the wavefront size, the maximum
+ /// number of wavefronts per CU, and the number of CUs. This is an
+ /// upper limit on scratch. If the grid being dispatched is small it
+ /// may only need less than this. If the kernel uses no scratch, or
+ /// the Finalizer has not computed this value, it must be 0.
+ uint64_t max_scratch_backing_memory_byte_size;
+
+ /// Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
+ /// COMPUTE_PGM_RSRC2 registers.
+ amd_compute_pgm_resource_register64_t compute_pgm_resource_registers;
+
+ /// Code properties. See amd_code_property_mask_t for a full list of
+ /// properties.
+ amd_code_property32_t code_properties;
+
+ /// The amount of memory required for the combined private, spill
+ /// and arg segments for a work-item in bytes. If
+ /// is_dynamic_callstack is 1 then additional space must be added to
+ /// this value for the call stack.
+ uint32_t workitem_private_segment_byte_size;
+
+ /// The amount of group segment memory required by a work-group in
+ /// bytes. This does not include any dynamically allocated group
+ /// segment memory that may be added when the kernel is
+ /// dispatched.
+ uint32_t workgroup_group_segment_byte_size;
+
+ /// Number of byte of GDS required by kernel dispatch. Must be 0 if
+ /// not using GDS.
+ uint32_t gds_segment_byte_size;
+
+ /// The size in bytes of the kernarg segment that holds the values
+ /// of the arguments to the kernel. This could be used by CP to
+ /// prefetch the kernarg segment pointed to by the dispatch packet.
+ uint64_t kernarg_segment_byte_size;
+
+ /// Number of fbarrier's used in the kernel and all functions it
+ /// calls. If the implementation uses group memory to allocate the
+ /// fbarriers then that amount must already be included in the
+ /// workgroup_group_segment_byte_size total.
+ uint32_t workgroup_fbarrier_count;
+
+ /// Number of scalar registers used by a wavefront. This includes
+ /// the special SGPRs for VCC, Flat Scratch Base, Flat Scratch Size
+ /// and XNACK (for GFX8 (VI)). It does not include the 16 SGPR added if a
+ /// trap handler is enabled. Used to set COMPUTE_PGM_RSRC1.SGPRS.
+ uint16_t wavefront_sgpr_count;
+
+ /// Number of vector registers used by each work-item. Used to set
+ /// COMPUTE_PGM_RSRC1.VGPRS.
+ uint16_t workitem_vgpr_count;
+
+ /// If reserved_vgpr_count is 0 then must be 0. Otherwise, this is the
+ /// first fixed VGPR number reserved.
+ uint16_t reserved_vgpr_first;
+
+ /// The number of consecutive VGPRs reserved by the client. If
+ /// is_debug_supported then this count includes VGPRs reserved
+ /// for debugger use.
+ uint16_t reserved_vgpr_count;
+
+ /// If reserved_sgpr_count is 0 then must be 0. Otherwise, this is the
+ /// first fixed SGPR number reserved.
+ uint16_t reserved_sgpr_first;
+
+ /// The number of consecutive SGPRs reserved by the client. If
+ /// is_debug_supported then this count includes SGPRs reserved
+ /// for debugger use.
+ uint16_t reserved_sgpr_count;
+
+ /// If is_debug_supported is 0 then must be 0. Otherwise, this is the
+ /// fixed SGPR number used to hold the wave scratch offset for the
+ /// entire kernel execution, or uint16_t(-1) if the register is not
+ /// used or not known.
+ uint16_t debug_wavefront_private_segment_offset_sgpr;
+
+ /// If is_debug_supported is 0 then must be 0. Otherwise, this is the
+ /// fixed SGPR number of the first of 4 SGPRs used to hold the
+ /// scratch V# used for the entire kernel execution, or uint16_t(-1)
+ /// if the registers are not used or not known.
+ uint16_t debug_private_segment_buffer_sgpr;
+
+ /// The maximum byte alignment of variables used by the kernel in
+ /// the specified memory segment. Expressed as a power of two. Must
+ /// be at least HSA_POWERTWO_16.
+ hsa_powertwo8_t kernarg_segment_alignment;
+ hsa_powertwo8_t group_segment_alignment;
+ hsa_powertwo8_t private_segment_alignment;
+
+ uint8_t reserved3;
+
+ /// Type of code object.
+ hsa_ext_code_kind32_t code_type;
+
+ /// Reserved for code properties if any are defined in the future.
+ /// There are currently no code properties so this field must be 0.
+ uint32_t reserved4;
+
+ /// Wavefront size expressed as a power of two. Must be a power of 2
+ /// in range 1..64 inclusive. Used to support runtime query that
+ /// obtains wavefront size, which may be used by application to
+ /// allocated dynamic group memory and set the dispatch work-group
+ /// size.
+ hsa_powertwo8_t wavefront_size;
+
+ /// The optimization level specified when the kernel was
+ /// finalized.
+ uint8_t optimization_level;
+
+ /// The HSAIL profile defines which features are used. This
+ /// information is from the HSAIL version directive. If this
+ /// amd_kernel_code_t is not generated from an HSAIL compilation
+ /// unit then must be 0.
+ hsa_ext_brig_profile8_t hsail_profile;
+
+ /// The HSAIL machine model gives the address sizes used by the
+ /// code. This information is from the HSAIL version directive. If
+ /// not generated from an HSAIL compilation unit then must still
+ /// indicate for what machine mode the code is generated.
+ hsa_ext_brig_machine_model8_t hsail_machine_model;
+
+ /// The HSAIL major version. This information is from the HSAIL
+ /// version directive. If this amd_kernel_code_t is not
+ /// generated from an HSAIL compilation unit then must be 0.
+ uint32_t hsail_version_major;
+
+ /// The HSAIL minor version. This information is from the HSAIL
+ /// version directive. If this amd_kernel_code_t is not
+ /// generated from an HSAIL compilation unit then must be 0.
+ uint32_t hsail_version_minor;
+
+ /// Reserved for HSAIL target options if any are defined in the
+ /// future. There are currently no target options so this field
+ /// must be 0.
+ uint16_t reserved5;
+
+ /// Reserved. Must be 0.
+ uint16_t reserved6;
+
+ /// The values should be the actually values used by the finalizer
+ /// in generating the code. This may be the union of values
+ /// specified as finalizer arguments and explicit HSAIL control
+ /// directives. If the finalizer chooses to ignore a control
+ /// directive, and not generate constrained code, then the control
+ /// directive should not be marked as enabled even though it was
+ /// present in the HSAIL or finalizer argument. The values are
+ /// intended to reflect the constraints that the code actually
+ /// requires to correctly execute, not the values that were
+ /// actually specified at finalize time.
+ hsa_ext_control_directives_t control_directive;
+
+ /// The code can immediately follow the amd_kernel_code_t, or can
+ /// come after subsequent amd_kernel_code_t structs when there are
+ /// multiple kernels in the compilation unit.
+
+} amd_kernel_code_t;
+
+#endif // AMDKERNELCODET_H
diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
new file mode 100644
index 000000000000..3b4ba1a8e8e9
--- /dev/null
+++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
@@ -0,0 +1,320 @@
+//===-- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUAsmParser : public MCTargetAsmParser {
+ MCSubtargetInfo &STI;
+ MCAsmParser &Parser;
+
+
+ /// @name Auto-generated Match Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "AMDGPUGenAsmMatcher.inc"
+
+ /// }
+
+public:
+ AMDGPUAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
+ const MCInstrInfo &_MII,
+ const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ }
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) override;
+ bool ParseDirective(AsmToken DirectiveID) override;
+ OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
+
+ bool parseCnt(int64_t &IntVal);
+ OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
+};
+
+class AMDGPUOperand : public MCParsedAsmOperand {
+ enum KindTy {
+ Token,
+ Immediate
+ } Kind;
+
+public:
+ AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct ImmOp {
+ int64_t Val;
+ };
+
+ union {
+ TokOp Tok;
+ ImmOp Imm;
+ };
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateImm(getImm()));
+ }
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ llvm_unreachable("addRegOperands");
+ }
+ StringRef getToken() const {
+ return StringRef(Tok.Data, Tok.Length);
+ }
+ bool isToken() const override {
+ return Kind == Token;
+ }
+
+ bool isImm() const override {
+ return Kind == Immediate;
+ }
+
+ int64_t getImm() const {
+ return Imm.Val;
+ }
+
+ bool isReg() const override {
+ return false;
+ }
+
+ unsigned getReg() const override {
+ return 0;
+ }
+
+ bool isMem() const override {
+ return false;
+ }
+
+ SMLoc getStartLoc() const override {
+ return SMLoc();
+ }
+
+ SMLoc getEndLoc() const override {
+ return SMLoc();
+ }
+
+ void print(raw_ostream &OS) const override { }
+
+ static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val) {
+ auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
+ Op->Imm.Val = Val;
+ return Op;
+ }
+
+ static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc) {
+ auto Res = llvm::make_unique<AMDGPUOperand>(Token);
+ Res->Tok.Data = Str.data();
+ Res->Tok.Length = Str.size();
+ return Res;
+ }
+
+ bool isSWaitCnt() const;
+};
+
+}
+
+bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+ return true;
+}
+
+
+bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) {
+ MCInst Inst;
+
+ switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
+ case Match_Success:
+ Inst.setLoc(IDLoc);
+ Out.EmitInstruction(Inst, STI);
+ return false;
+ case Match_MissingFeature:
+ return Error(IDLoc, "instruction use requires an option to be enabled");
+ case Match_MnemonicFail:
+ return Error(IDLoc, "unrecognized instruction mnemonic");
+ case Match_InvalidOperand: {
+ if (ErrorInfo != ~0ULL) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ }
+ return Error(IDLoc, "invalid operand for instruction");
+ }
+ }
+ llvm_unreachable("Implement any new match types added!");
+}
+
+bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
+ return true;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
+
+ // Try to parse with a custom parser
+ OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+
+ // If we successfully parsed the operand or if there as an error parsing,
+ // we are done.
+ if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
+ return ResTy;
+
+ switch(getLexer().getKind()) {
+ case AsmToken::Integer: {
+ int64_t IntVal;
+ if (getParser().parseAbsoluteExpression(IntVal))
+ return MatchOperand_ParseFail;
+ Operands.push_back(AMDGPUOperand::CreateImm(IntVal));
+ return MatchOperand_Success;
+ }
+ default:
+ return MatchOperand_NoMatch;
+ }
+}
+
+bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+ StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) {
+ // Add the instruction mnemonic
+ Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ return false;
+
+ AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
+ switch (Res) {
+ case MatchOperand_Success: return false;
+ case MatchOperand_ParseFail: return Error(NameLoc,
+ "Failed parsing operand");
+ case MatchOperand_NoMatch: return Error(NameLoc, "Not a valid operand");
+ }
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// s_waitcnt
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
+ StringRef CntName = Parser.getTok().getString();
+ int64_t CntVal;
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::LParen))
+ return true;
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Integer))
+ return true;
+
+ if (getParser().parseAbsoluteExpression(CntVal))
+ return true;
+
+ if (getLexer().isNot(AsmToken::RParen))
+ return true;
+
+ Parser.Lex();
+ if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma))
+ Parser.Lex();
+
+ int CntShift;
+ int CntMask;
+
+ if (CntName == "vmcnt") {
+ CntMask = 0xf;
+ CntShift = 0;
+ } else if (CntName == "expcnt") {
+ CntMask = 0x7;
+ CntShift = 4;
+ } else if (CntName == "lgkmcnt") {
+ CntMask = 0x7;
+ CntShift = 8;
+ } else {
+ return true;
+ }
+
+ IntVal &= ~(CntMask << CntShift);
+ IntVal |= (CntVal << CntShift);
+ return false;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
+ // Disable all counters by default.
+ // vmcnt [3:0]
+ // expcnt [6:4]
+ // lgkmcnt [10:8]
+ int64_t CntVal = 0x77f;
+
+ switch(getLexer().getKind()) {
+ default: return MatchOperand_ParseFail;
+ case AsmToken::Integer:
+ // The operand can be an integer value.
+ if (getParser().parseAbsoluteExpression(CntVal))
+ return MatchOperand_ParseFail;
+ break;
+
+ case AsmToken::Identifier:
+ do {
+ if (parseCnt(CntVal))
+ return MatchOperand_ParseFail;
+ } while(getLexer().isNot(AsmToken::EndOfStatement));
+ break;
+ }
+ Operands.push_back(AMDGPUOperand::CreateImm(CntVal));
+ return MatchOperand_Success;
+}
+
+bool AMDGPUOperand::isSWaitCnt() const {
+ return isImm();
+}
+
+/// Force static initialization.
+extern "C" void LLVMInitializeR600AsmParser() {
+ RegisterMCAsmParser<AMDGPUAsmParser> A(TheAMDGPUTarget);
+ RegisterMCAsmParser<AMDGPUAsmParser> B(TheGCNTarget);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "AMDGPUGenAsmMatcher.inc"
+
diff --git a/lib/Target/R600/AsmParser/CMakeLists.txt b/lib/Target/R600/AsmParser/CMakeLists.txt
new file mode 100644
index 000000000000..1b42af73740e
--- /dev/null
+++ b/lib/Target/R600/AsmParser/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMR600AsmParser
+ AMDGPUAsmParser.cpp
+ )
diff --git a/lib/Target/R600/AsmParser/LLVMBuild.txt b/lib/Target/R600/AsmParser/LLVMBuild.txt
new file mode 100644
index 000000000000..940e4cee6dfd
--- /dev/null
+++ b/lib/Target/R600/AsmParser/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/R600/AsmParser/LLVMBuild.txt -------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = R600AsmParser
+parent = R600
+required_libraries = MC MCParser R600Desc R600Info Support
+add_to_library_groups = R600
diff --git a/lib/Target/R600/AsmParser/Makefile b/lib/Target/R600/AsmParser/Makefile
new file mode 100644
index 000000000000..e6689b54b6ba
--- /dev/null
+++ b/lib/Target/R600/AsmParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/R600/AsmParser/Makefile ----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMR600AsmParser
+
+# Hack: we need to include 'main' R600 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/R600/CIInstructions.td b/lib/Target/R600/CIInstructions.td
new file mode 100644
index 000000000000..fdb58bbebd54
--- /dev/null
+++ b/lib/Target/R600/CIInstructions.td
@@ -0,0 +1,42 @@
+//===-- CIInstructions.td - CI Instruction Defintions ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Instruction definitions for CI and newer.
+//===----------------------------------------------------------------------===//
+
+
+def isCIVI : Predicate <
+ "Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
+ "Subtarget.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
+>;
+
+//===----------------------------------------------------------------------===//
+// VOP1 Instructions
+//===----------------------------------------------------------------------===//
+
+let SubtargetPredicate = isCIVI in {
+
+defm V_TRUNC_F64 : VOP1Inst <vop1<0x17>, "v_trunc_f64",
+ VOP_F64_F64, ftrunc
+>;
+defm V_CEIL_F64 : VOP1Inst <vop1<0x18>, "v_ceil_f64",
+ VOP_F64_F64, fceil
+>;
+defm V_FLOOR_F64 : VOP1Inst <vop1<0x1A>, "v_floor_f64",
+ VOP_F64_F64, ffloor
+>;
+defm V_RNDNE_F64 : VOP1Inst <vop1<0x19>, "v_rndne_f64",
+ VOP_F64_F64, frint
+>;
+defm V_LOG_LEGACY_F32 : VOP1Inst <vop1<0x45, 0x4c>, "v_log_legacy_f32",
+ VOP_F32_F32
+>;
+defm V_EXP_LEGACY_F32 : VOP1Inst <vop1<0x46, 0x4b>, "v_exp_legacy_f32",
+ VOP_F32_F32
+>;
+} // End SubtargetPredicate = isCIVI
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index 49a7f8aa18c8..5a4bae2f93cc 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -6,13 +6,15 @@ tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)
tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic)
-tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)
tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM AMDGPUGenAsmMatcher.inc -gen-asm-matcher)
add_public_tablegen_target(AMDGPUCommonTableGen)
add_llvm_target(R600CodeGen
AMDILCFGStructurizer.cpp
+ AMDGPUAlwaysInlinePass.cpp
AMDGPUAsmPrinter.cpp
AMDGPUFrameLowering.cpp
AMDGPUIntrinsicInfo.cpp
@@ -41,17 +43,21 @@ add_llvm_target(R600CodeGen
SIAnnotateControlFlow.cpp
SIFixSGPRCopies.cpp
SIFixSGPRLiveRanges.cpp
+ SIFoldOperands.cpp
SIInsertWaits.cpp
SIInstrInfo.cpp
SIISelLowering.cpp
+ SILoadStoreOptimizer.cpp
SILowerControlFlow.cpp
SILowerI1Copies.cpp
SIMachineFunctionInfo.cpp
+ SIPrepareScratchRegs.cpp
SIRegisterInfo.cpp
SIShrinkInstructions.cpp
SITypeRewriter.cpp
)
+add_subdirectory(AsmParser)
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/R600/CaymanInstructions.td b/lib/Target/R600/CaymanInstructions.td
index 26303452c101..58b5ce24b4a3 100644
--- a/lib/Target/R600/CaymanInstructions.td
+++ b/lib/Target/R600/CaymanInstructions.td
@@ -46,6 +46,8 @@ def SIN_cm : SIN_Common<0x8D>;
def COS_cm : COS_Common<0x8E>;
} // End isVector = 1
+defm : RsqPat<RECIPSQRT_IEEE_cm, f32>;
+
def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td
index 484e52250d1b..f24f76b7fe16 100644
--- a/lib/Target/R600/EvergreenInstructions.td
+++ b/lib/Target/R600/EvergreenInstructions.td
@@ -69,6 +69,7 @@ def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
+defm : RsqPat<RECIPSQRT_IEEE_eg, f32>;
def SIN_eg : SIN_Common<0x8D>;
def COS_eg : COS_Common<0x8E>;
@@ -256,6 +257,12 @@ def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
let Predicates = [isEGorCayman] in {
+// Should be predicated on FeatureFP64
+// def FMA_64 : R600_3OP <
+// 0xA, "FMA_64",
+// [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))]
+// >;
+
// BFE_UINT - bit_extract, an optimization for mask and shift
// Src0 = Input
// Src1 = Offset
@@ -295,7 +302,7 @@ def : Pat<(i32 (sext_inreg i32:$src, i8)),
def : Pat<(i32 (sext_inreg i32:$src, i16)),
(BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>;
-defm : BFIPatterns <BFI_INT_eg, MOV_IMM_I32>;
+defm : BFIPatterns <BFI_INT_eg, MOV_IMM_I32, R600_Reg64>;
def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT",
[(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))],
@@ -312,6 +319,7 @@ def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>;
def : ROTRPattern <BIT_ALIGN_INT_eg>;
def MULADD_eg : MULADD_Common<0x14>;
def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
+def FMA_eg : FMA_Common<0x7>;
def ASHR_eg : ASHR_Common<0x15>;
def LSHR_eg : LSHR_Common<0x16>;
def LSHL_eg : LSHL_Common<0x17>;
@@ -466,21 +474,47 @@ class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> :
let DisableEncoding = "$dst";
}
-class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> :
+class R600_LDS_1A2D <bits<6> lds_op, dag outs, string name, list<dag> pattern,
+ string dst =""> :
R600_LDS <
- lds_op,
- (outs),
+ lds_op, outs,
(ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel,
R600_Reg32:$src2, REL:$src2_rel, SEL:$src2_sel,
LAST:$last, R600_Pred:$pred_sel, BANK_SWIZZLE:$bank_swizzle),
- " "#name# "$last $src0$src0_rel, $src1$src1_rel, $src2$src2_rel, $pred_sel",
+ " "#name# "$last "#dst#"$src0$src0_rel, $src1$src1_rel, $src2$src2_rel, $pred_sel",
pattern> {
+
+ field string BaseOp;
+
+ let LDS_1A1D = 0;
let LDS_1A2D = 1;
}
+class R600_LDS_1A2D_NORET <bits<6> lds_op, string name, list<dag> pattern> :
+ R600_LDS_1A2D <lds_op, (outs), name, pattern> {
+ let BaseOp = name;
+}
+
+class R600_LDS_1A2D_RET <bits<6> lds_op, string name, list<dag> pattern> :
+ R600_LDS_1A2D <lds_op, (outs R600_Reg32:$dst), name, pattern> {
+
+ let BaseOp = name;
+ let usesCustomInserter = 1;
+ let DisableEncoding = "$dst";
+}
+
def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >;
def LDS_SUB : R600_LDS_1A1D_NORET <0x1, "LDS_SUB", [] >;
+def LDS_AND : R600_LDS_1A1D_NORET <0x9, "LDS_AND", [] >;
+def LDS_OR : R600_LDS_1A1D_NORET <0xa, "LDS_OR", [] >;
+def LDS_XOR : R600_LDS_1A1D_NORET <0xb, "LDS_XOR", [] >;
+def LDS_WRXCHG: R600_LDS_1A1D_NORET <0xd, "LDS_WRXCHG", [] >;
+def LDS_CMPST: R600_LDS_1A2D_NORET <0x10, "LDS_CMPST", [] >;
+def LDS_MIN_INT : R600_LDS_1A1D_NORET <0x5, "LDS_MIN_INT", [] >;
+def LDS_MAX_INT : R600_LDS_1A1D_NORET <0x6, "LDS_MAX_INT", [] >;
+def LDS_MIN_UINT : R600_LDS_1A1D_NORET <0x7, "LDS_MIN_UINT", [] >;
+def LDS_MAX_UINT : R600_LDS_1A1D_NORET <0x8, "LDS_MAX_UINT", [] >;
def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE",
[(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)]
>;
@@ -496,6 +530,33 @@ def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD",
def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB",
[(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))]
>;
+def LDS_AND_RET : R600_LDS_1A1D_RET <0x29, "LDS_AND",
+ [(set i32:$dst, (atomic_load_and_local i32:$src0, i32:$src1))]
+>;
+def LDS_OR_RET : R600_LDS_1A1D_RET <0x2a, "LDS_OR",
+ [(set i32:$dst, (atomic_load_or_local i32:$src0, i32:$src1))]
+>;
+def LDS_XOR_RET : R600_LDS_1A1D_RET <0x2b, "LDS_XOR",
+ [(set i32:$dst, (atomic_load_xor_local i32:$src0, i32:$src1))]
+>;
+def LDS_MIN_INT_RET : R600_LDS_1A1D_RET <0x25, "LDS_MIN_INT",
+ [(set i32:$dst, (atomic_load_min_local i32:$src0, i32:$src1))]
+>;
+def LDS_MAX_INT_RET : R600_LDS_1A1D_RET <0x26, "LDS_MAX_INT",
+ [(set i32:$dst, (atomic_load_max_local i32:$src0, i32:$src1))]
+>;
+def LDS_MIN_UINT_RET : R600_LDS_1A1D_RET <0x27, "LDS_MIN_UINT",
+ [(set i32:$dst, (atomic_load_umin_local i32:$src0, i32:$src1))]
+>;
+def LDS_MAX_UINT_RET : R600_LDS_1A1D_RET <0x28, "LDS_MAX_UINT",
+ [(set i32:$dst, (atomic_load_umax_local i32:$src0, i32:$src1))]
+>;
+def LDS_WRXCHG_RET : R600_LDS_1A1D_RET <0x2d, "LDS_WRXCHG",
+ [(set i32:$dst, (atomic_swap_local i32:$src0, i32:$src1))]
+>;
+def LDS_CMPST_RET : R600_LDS_1A2D_RET <0x30, "LDS_CMPST",
+ [(set i32:$dst, (atomic_cmp_swap_32_local i32:$src0, i32:$src1, i32:$src2))]
+>;
def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET",
[(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))]
>;
@@ -529,7 +590,7 @@ def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>;
// SHA-256 Patterns
def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
-def : FROUNDPat <CNDGE_eg>;
+def : FROUNDPat <CNDGE_eg, CNDGT_eg>;
def EG_ExportSwz : ExportSwzInst {
let Word1{19-16} = 0; // BURST_COUNT
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index 0927040cb5bc..8271c6f45fb9 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -10,8 +10,10 @@
#include "AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/MathExtras.h"
@@ -40,6 +42,81 @@ void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
}
+void AMDGPUInstPrinter::printU8ImmDecOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << formatDec(MI->getOperand(OpNo).getImm() & 0xff);
+}
+
+void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff);
+}
+
+void AMDGPUInstPrinter::printOffen(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm())
+ O << " offen";
+}
+
+void AMDGPUInstPrinter::printIdxen(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm())
+ O << " idxen";
+}
+
+void AMDGPUInstPrinter::printAddr64(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm())
+ O << " addr64";
+}
+
+void AMDGPUInstPrinter::printMBUFOffset(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm()) {
+ O << " offset:";
+ printU16ImmDecOperand(MI, OpNo, O);
+ }
+}
+
+void AMDGPUInstPrinter::printDSOffset(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ uint16_t Imm = MI->getOperand(OpNo).getImm();
+ if (Imm != 0) {
+ O << " offset:";
+ printU16ImmDecOperand(MI, OpNo, O);
+ }
+}
+
+void AMDGPUInstPrinter::printDSOffset0(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << " offset0:";
+ printU8ImmDecOperand(MI, OpNo, O);
+}
+
+void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << " offset1:";
+ printU8ImmDecOperand(MI, OpNo, O);
+}
+
+void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm())
+ O << " glc";
+}
+
+void AMDGPUInstPrinter::printSLC(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm())
+ O << " slc";
+}
+
+void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm())
+ O << " tfe";
+}
+
void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
switch (reg) {
case AMDGPU::VCC:
@@ -54,6 +131,27 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
case AMDGPU::M0:
O << "m0";
return;
+ case AMDGPU::FLAT_SCR:
+ O << "flat_scratch";
+ return;
+ case AMDGPU::VCC_LO:
+ O << "vcc_lo";
+ return;
+ case AMDGPU::VCC_HI:
+ O << "vcc_hi";
+ return;
+ case AMDGPU::EXEC_LO:
+ O << "exec_lo";
+ return;
+ case AMDGPU::EXEC_HI:
+ O << "exec_hi";
+ return;
+ case AMDGPU::FLAT_SCR_LO:
+ O << "flat_scratch_lo";
+ return;
+ case AMDGPU::FLAT_SCR_HI:
+ O << "flat_scratch_hi";
+ return;
default:
break;
}
@@ -110,26 +208,62 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
O << Type << '[' << RegIdx << ':' << (RegIdx + NumRegs - 1) << ']';
}
-void AMDGPUInstPrinter::printImmediate(uint32_t Imm, raw_ostream &O) {
+void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, raw_ostream &O) {
int32_t SImm = static_cast<int32_t>(Imm);
if (SImm >= -16 && SImm <= 64) {
O << SImm;
return;
}
- if (Imm == FloatToBits(1.0f) ||
- Imm == FloatToBits(-1.0f) ||
- Imm == FloatToBits(0.5f) ||
- Imm == FloatToBits(-0.5f) ||
- Imm == FloatToBits(2.0f) ||
- Imm == FloatToBits(-2.0f) ||
- Imm == FloatToBits(4.0f) ||
- Imm == FloatToBits(-4.0f)) {
- O << BitsToFloat(Imm);
+ if (Imm == FloatToBits(0.0f))
+ O << "0.0";
+ else if (Imm == FloatToBits(1.0f))
+ O << "1.0";
+ else if (Imm == FloatToBits(-1.0f))
+ O << "-1.0";
+ else if (Imm == FloatToBits(0.5f))
+ O << "0.5";
+ else if (Imm == FloatToBits(-0.5f))
+ O << "-0.5";
+ else if (Imm == FloatToBits(2.0f))
+ O << "2.0";
+ else if (Imm == FloatToBits(-2.0f))
+ O << "-2.0";
+ else if (Imm == FloatToBits(4.0f))
+ O << "4.0";
+ else if (Imm == FloatToBits(-4.0f))
+ O << "-4.0";
+ else
+ O << formatHex(static_cast<uint64_t>(Imm));
+}
+
+void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, raw_ostream &O) {
+ int64_t SImm = static_cast<int64_t>(Imm);
+ if (SImm >= -16 && SImm <= 64) {
+ O << SImm;
return;
}
- O << formatHex(static_cast<uint64_t>(Imm));
+ if (Imm == DoubleToBits(0.0))
+ O << "0.0";
+ else if (Imm == DoubleToBits(1.0))
+ O << "1.0";
+ else if (Imm == DoubleToBits(-1.0))
+ O << "-1.0";
+ else if (Imm == DoubleToBits(0.5))
+ O << "0.5";
+ else if (Imm == DoubleToBits(-0.5))
+ O << "-0.5";
+ else if (Imm == DoubleToBits(2.0))
+ O << "2.0";
+ else if (Imm == DoubleToBits(-2.0))
+ O << "-2.0";
+ else if (Imm == DoubleToBits(4.0))
+ O << "4.0";
+ else if (Imm == DoubleToBits(-4.0))
+ O << "-4.0";
+ else
+ llvm_unreachable("64-bit literal constants not supported");
}
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
@@ -147,27 +281,55 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
break;
}
} else if (Op.isImm()) {
- printImmediate(Op.getImm(), O);
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ int RCID = Desc.OpInfo[OpNo].RegClass;
+ if (RCID != -1) {
+ const MCRegisterClass &ImmRC = MRI.getRegClass(RCID);
+ if (ImmRC.getSize() == 4)
+ printImmediate32(Op.getImm(), O);
+ else if (ImmRC.getSize() == 8)
+ printImmediate64(Op.getImm(), O);
+ else
+ llvm_unreachable("Invalid register class size");
+ } else {
+ // We hit this for the immediate instruction bits that don't yet have a
+ // custom printer.
+ // TODO: Eventually this should be unnecessary.
+ O << formatDec(Op.getImm());
+ }
} else if (Op.isFPImm()) {
- O << Op.getFPImm();
+ // We special case 0.0 because otherwise it will be printed as an integer.
+ if (Op.getFPImm() == 0.0)
+ O << "0.0";
+ else {
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ const MCRegisterClass &ImmRC = MRI.getRegClass(Desc.OpInfo[OpNo].RegClass);
+
+ if (ImmRC.getSize() == 4)
+ printImmediate32(FloatToBits(Op.getFPImm()), O);
+ else if (ImmRC.getSize() == 8)
+ printImmediate64(DoubleToBits(Op.getFPImm()), O);
+ else
+ llvm_unreachable("Invalid register class size");
+ }
} else if (Op.isExpr()) {
const MCExpr *Exp = Op.getExpr();
Exp->print(O);
} else {
- assert(!"unknown operand type in printOperand");
+ llvm_unreachable("unknown operand type in printOperand");
}
}
void AMDGPUInstPrinter::printOperandAndMods(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned InputModifiers = MI->getOperand(OpNo).getImm();
- if (InputModifiers & 0x1)
- O << "-";
- if (InputModifiers & 0x2)
- O << "|";
+ if (InputModifiers & SISrcMods::NEG)
+ O << '-';
+ if (InputModifiers & SISrcMods::ABS)
+ O << '|';
printOperand(MI, OpNo + 1, O);
- if (InputModifiers & 0x2)
- O << "|";
+ if (InputModifiers & SISrcMods::ABS)
+ O << '|';
}
void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
@@ -181,7 +343,7 @@ void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
} else if (Imm == 0) {
O << "P10";
} else {
- assert(!"Invalid interpolation parameter slot");
+ llvm_unreachable("Invalid interpolation parameter slot");
}
}
@@ -214,6 +376,23 @@ void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
printIfSet(MI, OpNo, O, "_SAT");
}
+void AMDGPUInstPrinter::printClampSI(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm())
+ O << " clamp";
+}
+
+void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ int Imm = MI->getOperand(OpNo).getImm();
+ if (Imm == SIOutMods::MUL2)
+ O << " mul:2";
+ else if (Imm == SIOutMods::MUL4)
+ O << " mul:4";
+ else if (Imm == SIOutMods::DIV2)
+ O << " div:2";
+}
+
void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
int32_t Imm = MI->getOperand(OpNo).getImm();
@@ -281,7 +460,7 @@ void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
sel -= 512;
int cb = sel >> 12;
sel &= 4095;
- O << cb << "[" << sel << "]";
+ O << cb << '[' << sel << ']';
} else if (sel >= 448) {
sel -= 448;
O << sel;
@@ -290,7 +469,7 @@ void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
}
if (sel >= 0)
- O << "." << chans[chan];
+ O << '.' << chans[chan];
}
void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
@@ -323,25 +502,25 @@ void AMDGPUInstPrinter::printRSel(const MCInst *MI, unsigned OpNo,
unsigned Sel = MI->getOperand(OpNo).getImm();
switch (Sel) {
case 0:
- O << "X";
+ O << 'X';
break;
case 1:
- O << "Y";
+ O << 'Y';
break;
case 2:
- O << "Z";
+ O << 'Z';
break;
case 3:
- O << "W";
+ O << 'W';
break;
case 4:
- O << "0";
+ O << '0';
break;
case 5:
- O << "1";
+ O << '1';
break;
case 7:
- O << "_";
+ O << '_';
break;
default:
break;
@@ -353,10 +532,10 @@ void AMDGPUInstPrinter::printCT(const MCInst *MI, unsigned OpNo,
unsigned CT = MI->getOperand(OpNo).getImm();
switch (CT) {
case 0:
- O << "U";
+ O << 'U';
break;
case 1:
- O << "N";
+ O << 'N';
break;
default:
break;
@@ -368,10 +547,10 @@ void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
int KCacheMode = MI->getOperand(OpNo).getImm();
if (KCacheMode > 0) {
int KCacheBank = MI->getOperand(OpNo - 2).getImm();
- O << "CB" << KCacheBank <<":";
+ O << "CB" << KCacheBank << ':';
int KCacheAddr = MI->getOperand(OpNo + 2).getImm();
- int LineSize = (KCacheMode == 1)?16:32;
- O << KCacheAddr * 16 << "-" << KCacheAddr * 16 + LineSize;
+ int LineSize = (KCacheMode == 1) ? 16 : 32;
+ O << KCacheAddr * 16 << '-' << KCacheAddr * 16 + LineSize;
}
}
@@ -415,12 +594,26 @@ void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
unsigned Vmcnt = SImm16 & 0xF;
unsigned Expcnt = (SImm16 >> 4) & 0xF;
unsigned Lgkmcnt = (SImm16 >> 8) & 0xF;
- if (Vmcnt != 0xF)
- O << "vmcnt(" << Vmcnt << ") ";
- if (Expcnt != 0x7)
- O << "expcnt(" << Expcnt << ") ";
- if (Lgkmcnt != 0x7)
- O << "lgkmcnt(" << Lgkmcnt << ")";
+
+ bool NeedSpace = false;
+
+ if (Vmcnt != 0xF) {
+ O << "vmcnt(" << Vmcnt << ')';
+ NeedSpace = true;
+ }
+
+ if (Expcnt != 0x7) {
+ if (NeedSpace)
+ O << ' ';
+ O << "expcnt(" << Expcnt << ')';
+ NeedSpace = true;
+ }
+
+ if (Lgkmcnt != 0x7) {
+ if (NeedSpace)
+ O << ' ';
+ O << "lgkmcnt(" << Lgkmcnt << ')';
+ }
}
#include "AMDGPUGenAsmWriter.inc"
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
index 6ca717076cde..1d43c7acbe74 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -10,8 +10,8 @@
/// \file
//===----------------------------------------------------------------------===//
-#ifndef AMDGPUINSTPRINTER_H
-#define AMDGPUINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_R600_INSTPRINTER_AMDGPUINSTPRINTER_H
+#define LLVM_LIB_TARGET_R600_INSTPRINTER_AMDGPUINSTPRINTER_H
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
@@ -34,9 +34,22 @@ public:
private:
void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU32ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printOffen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printIdxen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printAddr64(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printDSOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printDSOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printDSOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printGLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printTFE(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printRegOperand(unsigned RegNo, raw_ostream &O);
- void printImmediate(uint32_t Imm, raw_ostream &O);
+ void printImmediate32(uint32_t I, raw_ostream &O);
+ void printImmediate64(uint64_t I, raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printOperandAndMods(const MCInst *MI, unsigned OpNo, raw_ostream &O);
static void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O);
@@ -45,6 +58,8 @@ private:
StringRef Asm, StringRef Default = "");
static void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
static void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ static void printClampSI(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ static void printOModSI(const MCInst *MI, unsigned OpNo, raw_ostream &O);
static void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
static void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O);
static void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -65,4 +80,4 @@ private:
} // End namespace llvm
-#endif // AMDGPUINSTRPRINTER_H
+#endif
diff --git a/lib/Target/R600/LLVMBuild.txt b/lib/Target/R600/LLVMBuild.txt
index 908872b55cd2..f3f254fdcbad 100644
--- a/lib/Target/R600/LLVMBuild.txt
+++ b/lib/Target/R600/LLVMBuild.txt
@@ -16,17 +16,18 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = InstPrinter MCTargetDesc TargetInfo
+subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo
[component_0]
type = TargetGroup
name = R600
parent = Target
+has_asmparser = 1
has_asmprinter = 1
[component_1]
type = Library
name = R600CodeGen
parent = R600
-required_libraries = Analysis AsmPrinter CodeGen Core MC R600AsmPrinter R600Desc R600Info Scalar SelectionDAG Support Target
+required_libraries = Analysis AsmPrinter CodeGen Core IPO MC R600AsmParser R600AsmPrinter R600Desc R600Info Scalar SelectionDAG Support Target TransformUtils
add_to_library_groups = R600
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
index d55f27b04554..d0c634fb7e42 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -29,7 +29,7 @@ public:
const MCAsmLayout &Layout) override {
//XXX: Implement if necessary.
}
- void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
MCValue Target, bool &IsPCRel,
uint64_t &FixedValue) override {
@@ -57,9 +57,7 @@ public:
assert(!"Not implemented");
}
bool mayNeedRelaxation(const MCInst &Inst) const override { return false; }
- bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override {
- return true;
- }
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
};
@@ -116,6 +114,13 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
return Infos[Kind - FirstTargetFixupKind];
}
+bool AMDGPUAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ for (unsigned i = 0; i < Count; ++i)
+ OW->Write8(0);
+
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// ELFAMDGPUAsmBackend class
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h b/lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h
index 4b12e548a56f..01021d67ffd9 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_AMDGPUFIXUPKINDS_H
-#define LLVM_AMDGPUFIXUPKINDS_H
+#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUFIXUPKINDS_H
+#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUFIXUPKINDS_H
#include "llvm/MC/MCFixup.h"
@@ -31,4 +31,4 @@ enum Fixups {
}
}
-#endif // LLVM_AMDGPUFIXUPKINDS_H
+#endif
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 78bbe0a163c9..19d89fb27caa 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -11,21 +11,15 @@
#include "AMDGPUMCAsmInfo.h"
using namespace llvm;
-AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
+AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfoELF() {
HasSingleParameterDotFile = false;
//===------------------------------------------------------------------===//
- HasSubsectionsViaSymbols = true;
- HasMachoZeroFillDirective = false;
- HasMachoTBSSDirective = false;
- HasStaticCtorDtorReferenceInStaticMode = false;
- LinkerRequiresNonEmptyDwarfLines = true;
MaxInstLength = 16;
SeparatorString = "\n";
CommentString = ";";
- LabelSuffix = ":";
+ PrivateLabelPrefix = "";
InlineAsmStart = ";#ASMSTART";
InlineAsmEnd = ";#ASMEND";
- AssemblerDialect = 0;
//===--- Data Emission Directives -------------------------------------===//
ZeroDirective = ".zero";
@@ -35,28 +29,15 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
Data16bitsDirective = ".short\t";
Data32bitsDirective = ".long\t";
Data64bitsDirective = ".quad\t";
- GPRel32Directive = nullptr;
SunStyleELFSectionSwitchSyntax = true;
UsesELFSectionDirectiveForBSS = true;
- //===--- Alignment Information ----------------------------------------===//
- AlignmentIsInBytes = true;
- TextAlignFillValue = 0;
-
//===--- Global Variable Emission Directives --------------------------===//
- GlobalDirective = ".global";
- HasSetDirective = false;
HasAggressiveSymbolFolding = true;
COMMDirectiveAlignmentIsInBytes = false;
HasDotTypeDotSizeDirective = false;
HasNoDeadStrip = true;
WeakRefDirective = ".weakref\t";
//===--- Dwarf Emission Directives -----------------------------------===//
- HasLEB128 = true;
SupportsDebugInformation = true;
}
-
-const MCSection*
-AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const {
- return nullptr;
-}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
index 59aebece540a..8f75c76c4257 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
@@ -1,4 +1,4 @@
-//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - AMDGPU MCAsm Interface ----------===//
+//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - AMDGPU MCAsm Interface -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,18 +11,22 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AMDGPUMCASMINFO_H
-#define AMDGPUMCASMINFO_H
+#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCASMINFO_H
+#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
class StringRef;
-class AMDGPUMCAsmInfo : public MCAsmInfo {
+// If you need to create another MCAsmInfo class, which inherits from MCAsmInfo,
+// you will need to make sure your new class sets PrivateGlobalPrefix to
+// a prefix that won't appeary in a fuction name. The default value
+// for PrivateGlobalPrefix is 'L', so it will consider any function starting
+// with 'L' as a local symbol.
+class AMDGPUMCAsmInfo : public MCAsmInfoELF {
public:
explicit AMDGPUMCAsmInfo(StringRef &TT);
- const MCSection* getNonexecutableStackSection(MCContext &CTX) const override;
};
} // namespace llvm
-#endif // AMDGPUMCASMINFO_H
+#endif
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
index d5e432de564c..c95742762233 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AMDGPUCODEEMITTER_H
-#define AMDGPUCODEEMITTER_H
+#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCCODEEMITTER_H
+#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCCODEEMITTER_H
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/Support/raw_ostream.h"
@@ -47,4 +47,4 @@ public:
} // End namespace llvm
-#endif // AMDGPUCODEEMITTER_H
+#endif
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 38a295659f96..83403ba04870 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -15,6 +15,7 @@
#include "AMDGPUMCTargetDesc.h"
#include "AMDGPUMCAsmInfo.h"
#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "SIDefines.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -84,31 +85,37 @@ static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &_OS,
- MCCodeEmitter *_Emitter,
- const MCSubtargetInfo &STI,
- bool RelaxAll,
- bool NoExecStack) {
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, false, false);
+ raw_ostream &_OS, MCCodeEmitter *_Emitter,
+ const MCSubtargetInfo &STI, bool RelaxAll) {
+ return createELFStreamer(Ctx, MAB, _OS, _Emitter, false);
}
extern "C" void LLVMInitializeR600TargetMC() {
RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
+ RegisterMCAsmInfo<AMDGPUMCAsmInfo> Z(TheGCNTarget);
TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheGCNTarget, createAMDGPUMCCodeGenInfo);
TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheGCNTarget, createAMDGPUMCInstrInfo);
TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheGCNTarget, createAMDGPUMCRegisterInfo);
TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheGCNTarget, createAMDGPUMCSubtargetInfo);
TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheGCNTarget, createAMDGPUMCInstPrinter);
TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createAMDGPUMCCodeEmitter);
TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheGCNTarget, createAMDGPUAsmBackend);
TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheGCNTarget, createMCStreamer);
}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
index f6b3376da32c..bc8cd53d84b4 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
//
-#ifndef AMDGPUMCTARGETDESC_H
-#define AMDGPUMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H
+#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H
#include "llvm/ADT/StringRef.h"
@@ -30,6 +30,7 @@ class Target;
class raw_ostream;
extern Target TheAMDGPUTarget;
+extern Target TheGCNTarget;
MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
@@ -55,4 +56,4 @@ MCObjectWriter *createAMDGPUELFObjectWriter(raw_ostream &OS);
#define GET_SUBTARGETINFO_ENUM
#include "AMDGPUGenSubtargetInfo.inc"
-#endif // AMDGPUMCTARGETDESC_H
+#endif
diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
index 78776c11d75d..640de3f9fc84 100644
--- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -14,9 +14,10 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
#include "MCTargetDesc/AMDGPUFixupKinds.h"
+#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixup.h"
@@ -84,12 +85,10 @@ MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc,
unsigned OpNo) const {
+ unsigned OpType = Desc.OpInfo[OpNo].OperandType;
- unsigned RegClass = Desc.OpInfo[OpNo].RegClass;
- return (AMDGPU::SSrc_32RegClassID == RegClass) ||
- (AMDGPU::SSrc_64RegClassID == RegClass) ||
- (AMDGPU::VSrc_32RegClassID == RegClass) ||
- (AMDGPU::VSrc_64RegClassID == RegClass);
+ return OpType == AMDGPU::OPERAND_REG_IMM32 ||
+ OpType == AMDGPU::OPERAND_REG_INLINE_C;
}
uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO) const {
diff --git a/lib/Target/R600/Makefile b/lib/Target/R600/Makefile
index 1b3ebbe8c8f3..64a7c8c045c5 100644
--- a/lib/Target/R600/Makefile
+++ b/lib/Target/R600/Makefile
@@ -16,8 +16,8 @@ BUILT_SOURCES = AMDGPUGenRegisterInfo.inc AMDGPUGenInstrInfo.inc \
AMDGPUGenDAGISel.inc AMDGPUGenSubtargetInfo.inc \
AMDGPUGenMCCodeEmitter.inc AMDGPUGenCallingConv.inc \
AMDGPUGenIntrinsics.inc AMDGPUGenDFAPacketizer.inc \
- AMDGPUGenAsmWriter.inc
+ AMDGPUGenAsmWriter.inc AMDGPUGenAsmMatcher.inc
-DIRS = InstPrinter TargetInfo MCTargetDesc
+DIRS = AsmParser InstPrinter TargetInfo MCTargetDesc
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
index ce17d7cb7f13..cff97cdb3beb 100644
--- a/lib/Target/R600/Processors.td
+++ b/lib/Target/R600/Processors.td
@@ -83,28 +83,38 @@ def : Proc<"cayman", R600_VLIW4_Itin,
// Southern Islands
//===----------------------------------------------------------------------===//
-def : Proc<"SI", SI_Itin, [FeatureSouthernIslands]>;
+def : ProcessorModel<"SI", SIFullSpeedModel, [FeatureSouthernIslands]>;
-def : Proc<"tahiti", SI_Itin, [FeatureSouthernIslands]>;
+def : ProcessorModel<"tahiti", SIFullSpeedModel, [FeatureSouthernIslands]>;
-def : Proc<"pitcairn", SI_Itin, [FeatureSouthernIslands]>;
+def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
-def : Proc<"verde", SI_Itin, [FeatureSouthernIslands]>;
+def : ProcessorModel<"verde", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
-def : Proc<"oland", SI_Itin, [FeatureSouthernIslands]>;
+def : ProcessorModel<"oland", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
-def : Proc<"hainan", SI_Itin, [FeatureSouthernIslands]>;
+def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
//===----------------------------------------------------------------------===//
// Sea Islands
//===----------------------------------------------------------------------===//
-def : Proc<"bonaire", SI_Itin, [FeatureSeaIslands]>;
+def : ProcessorModel<"bonaire", SIQuarterSpeedModel, [FeatureSeaIslands]>;
-def : Proc<"kabini", SI_Itin, [FeatureSeaIslands]>;
+def : ProcessorModel<"kabini", SIQuarterSpeedModel, [FeatureSeaIslands]>;
-def : Proc<"kaveri", SI_Itin, [FeatureSeaIslands]>;
+def : ProcessorModel<"kaveri", SIQuarterSpeedModel, [FeatureSeaIslands]>;
-def : Proc<"hawaii", SI_Itin, [FeatureSeaIslands]>;
+def : ProcessorModel<"hawaii", SIFullSpeedModel, [FeatureSeaIslands]>;
-def : Proc<"mullins", SI_Itin, [FeatureSeaIslands]>;
+def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>;
+
+//===----------------------------------------------------------------------===//
+// Volcanic Islands
+//===----------------------------------------------------------------------===//
+
+def : ProcessorModel<"tonga", SIFullSpeedModel, [FeatureVolcanicIslands]>;
+
+def : ProcessorModel<"iceland", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
+
+def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
diff --git a/lib/Target/R600/R600ClauseMergePass.cpp b/lib/Target/R600/R600ClauseMergePass.cpp
index 92bf0df96254..f07be0001fb8 100644
--- a/lib/Target/R600/R600ClauseMergePass.cpp
+++ b/lib/Target/R600/R600ClauseMergePass.cpp
@@ -18,6 +18,7 @@
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -167,7 +168,7 @@ bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
}
bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
+ TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index e37767a0719d..edaf27841ca7 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -336,7 +336,7 @@ private:
getHWInstrDesc(IsTex?CF_TC:CF_VC))
.addImm(0) // ADDR
.addImm(AluInstCount - 1); // COUNT
- return ClauseFile(MIb, ClauseContent);
+ return ClauseFile(MIb, std::move(ClauseContent));
}
void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const {
@@ -426,7 +426,7 @@ private:
}
assert(ClauseContent.size() < 128 && "ALU clause is too big");
ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
- return ClauseFile(ClauseHead, ClauseContent);
+ return ClauseFile(ClauseHead, std::move(ClauseContent));
}
void
@@ -459,11 +459,9 @@ private:
void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
}
- void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr)
- const {
- for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end();
- It != E; ++It) {
- MachineInstr *MI = *It;
+ void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
+ unsigned Addr) const {
+ for (MachineInstr *MI : MIs) {
CounterPropagateAddr(MI, Addr);
}
}
@@ -477,8 +475,9 @@ public:
}
bool runOnMachineFunction(MachineFunction &MF) override {
- TII=static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
- TRI=static_cast<const R600RegisterInfo *>(MF.getTarget().getRegisterInfo());
+ TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ TRI = static_cast<const R600RegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
CFStack CFStack(ST, MFI->getShaderType());
@@ -542,7 +541,7 @@ public:
std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
std::set<MachineInstr *>());
Pair.second.insert(MIb);
- LoopStack.push_back(Pair);
+ LoopStack.push_back(std::move(Pair));
MI->eraseFromParent();
CfCount++;
break;
@@ -550,7 +549,7 @@ public:
case AMDGPU::ENDLOOP: {
CFStack.popLoop();
std::pair<unsigned, std::set<MachineInstr *> > Pair =
- LoopStack.back();
+ std::move(LoopStack.back());
LoopStack.pop_back();
CounterPropagateAddr(Pair.second, CfCount);
BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h
index f2f28fe469b5..51d87eda31d1 100644
--- a/lib/Target/R600/R600Defines.h
+++ b/lib/Target/R600/R600Defines.h
@@ -8,8 +8,8 @@
/// \file
//===----------------------------------------------------------------------===//
-#ifndef R600DEFINES_H_
-#define R600DEFINES_H_
+#ifndef LLVM_LIB_TARGET_R600_R600DEFINES_H
+#define LLVM_LIB_TARGET_R600_R600DEFINES_H
#include "llvm/MC/MCRegisterInfo.h"
@@ -168,4 +168,4 @@ namespace OpName {
#define R_0288E8_SQ_LDS_ALLOC 0x0288E8
-#endif // R600DEFINES_H_
+#endif
diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp
index 38afebef400e..fdc20302f4a3 100644
--- a/lib/Target/R600/R600EmitClauseMarkers.cpp
+++ b/lib/Target/R600/R600EmitClauseMarkers.cpp
@@ -19,6 +19,7 @@
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -297,7 +298,7 @@ public:
}
bool runOnMachineFunction(MachineFunction &MF) override {
- TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
+ TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
diff --git a/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
index 732b06dc15c7..211d392e8fcc 100644
--- a/lib/Target/R600/R600ExpandSpecialInstrs.cpp
+++ b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
@@ -19,6 +19,7 @@
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -65,7 +66,7 @@ void R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr *NewMI,
}
bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
+ TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
const R600RegisterInfo &TRI = TII->getRegisterInfo();
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index 52315bf0f338..595f69884544 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -122,12 +122,19 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
// EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
// spaces, so it is custom lowered to handle those where it isn't.
- setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
- setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
- setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
+
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
+ }
setOperationAction(ISD::STORE, MVT::i8, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
@@ -181,8 +188,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SUBE, VT, Expand);
}
- setBooleanContents(ZeroOrNegativeOneBooleanContent);
- setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
setSchedulingPreference(Sched::Source);
}
@@ -192,7 +197,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineBasicBlock::iterator I = *MI;
const R600InstrInfo *TII =
- static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
+ static_cast<const R600InstrInfo *>(MF->getSubtarget().getInstrInfo());
switch (MI->getOpcode()) {
default:
@@ -202,7 +207,10 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
assert(DstIdx != -1);
MachineInstrBuilder NewMI;
- if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
+ // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
+ // LDS_1A2D support and remove this special case.
+ if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
+ MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
return BB;
NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
@@ -645,8 +653,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
MachineSDNode *interp;
if (ijb < 0) {
const MachineFunction &MF = DAG.getMachineFunction();
- const R600InstrInfo *TII =
- static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
+ const R600InstrInfo *TII = static_cast<const R600InstrInfo *>(
+ MF.getSubtarget().getInstrInfo());
interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
return DAG.getTargetExtractSubreg(
@@ -806,6 +814,9 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::r600_read_local_size_z:
return LowerImplicitParameter(DAG, VT, DL, 8);
+ case Intrinsic::AMDGPU_read_workdim:
+ return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
+
case Intrinsic::r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_X, VT);
@@ -901,74 +912,7 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
}
case ISD::UDIVREM: {
SDValue Op = SDValue(N, 0);
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
-
- SDValue one = DAG.getConstant(1, HalfVT);
- SDValue zero = DAG.getConstant(0, HalfVT);
-
- //HiLo split
- SDValue LHS = N->getOperand(0);
- SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
- SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
-
- SDValue RHS = N->getOperand(1);
- SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
- SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
-
- // Get Speculative values
- SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
- SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
-
- SDValue REM_Hi = zero;
- SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
-
- SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
- SDValue DIV_Lo = zero;
-
- const unsigned halfBitWidth = HalfVT.getSizeInBits();
-
- for (unsigned i = 0; i < halfBitWidth; ++i) {
- SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
- // Get Value of high bit
- SDValue HBit;
- if (halfBitWidth == 32 && Subtarget->hasBFE()) {
- HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
- } else {
- HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
- HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
- }
-
- SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
- DAG.getConstant(halfBitWidth - 1, HalfVT));
- REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
- REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
-
- REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
- REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
-
-
- SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
-
- SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
- SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE);
-
- DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
-
- // Update REM
-
- SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
-
- REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE);
- REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
- REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
- }
-
- SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
- SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
- Results.push_back(DIV);
- Results.push_back(REM);
+ LowerUDIVREM64(Op, DAG, Results);
break;
}
}
@@ -1176,6 +1120,13 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
SDValue CC = Op.getOperand(4);
SDValue Temp;
+ if (VT == MVT::f32) {
+ DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
+ SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
+ if (MinMax)
+ return MinMax;
+ }
+
// LHS and RHS are guaranteed to be the same value type
EVT CompareVT = LHS.getValueType();
@@ -1430,8 +1381,8 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// Lowering for indirect addressing
const MachineFunction &MF = DAG.getMachineFunction();
- const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
- getTargetMachine().getFrameLowering());
+ const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
+ getTargetMachine().getSubtargetImpl()->getFrameLowering());
unsigned StackWidth = TFL->getStackWidth(MF);
Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
@@ -1543,7 +1494,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
SDValue MergedValues[2] = {
- SplitVectorLoad(Op, DAG),
+ ScalarizeVectorLoad(Op, DAG),
Chain
};
return DAG.getMergeValues(MergedValues, DL);
@@ -1613,6 +1564,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
LoadNode->getPointerInfo(), MemVT,
LoadNode->isVolatile(),
LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(),
LoadNode->getAlignment());
SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
@@ -1627,8 +1579,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
// Lowering for indirect addressing
const MachineFunction &MF = DAG.getMachineFunction();
- const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
- getTargetMachine().getFrameLowering());
+ const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
+ getTargetMachine().getSubtargetImpl()->getFrameLowering());
unsigned StackWidth = TFL->getStackWidth(MF);
Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
@@ -1691,10 +1643,10 @@ SDValue R600TargetLowering::LowerFormalArguments(
SDLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
MachineFunction &MF = DAG.getMachineFunction();
- unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->getShaderType();
+ R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
SmallVector<ISD::InputArg, 8> LocalIns;
@@ -1704,10 +1656,15 @@ SDValue R600TargetLowering::LowerFormalArguments(
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
CCValAssign &VA = ArgLocs[i];
- EVT VT = Ins[i].VT;
- EVT MemVT = LocalIns[i].VT;
+ const ISD::InputArg &In = Ins[i];
+ EVT VT = In.VT;
+ EVT MemVT = VA.getLocVT();
+ if (!VT.isVector() && MemVT.isVector()) {
+ // Get load source type if scalarized.
+ MemVT = MemVT.getVectorElementType();
+ }
- if (ShaderType != ShaderType::COMPUTE) {
+ if (MFI->getShaderType() != ShaderType::COMPUTE) {
unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
InVals.push_back(Register);
@@ -1715,7 +1672,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
}
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUAS::CONSTANT_BUFFER_0);
+ AMDGPUAS::CONSTANT_BUFFER_0);
// i64 isn't a legal type, so the register type used ends up as i32, which
// isn't expected here. It attempts to create this sextload, but it ends up
@@ -1724,18 +1681,33 @@ SDValue R600TargetLowering::LowerFormalArguments(
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
+ ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
+ if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
+ // FIXME: This should really check the extload type, but the handling of
+ // extload vector parameters seems to be broken.
+
+ // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ Ext = ISD::SEXTLOAD;
+ }
+
+ // Compute the offset from the value.
+ // XXX - I think PartOffset should give you this, but it seems to give the
+ // size of the register which isn't useful.
+
+ unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset();
+ unsigned PartOffset = VA.getLocMemOffset();
+ unsigned Offset = 36 + VA.getLocMemOffset();
- // FIXME: This should really check the extload type, but the handling of
- // extload vecto parameters seems to be broken.
- //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
- ISD::LoadExtType Ext = ISD::SEXTLOAD;
- SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
- DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
- MachinePointerInfo(UndefValue::get(PtrTy)),
- MemVT, false, false, 4);
+ MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
+ SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
+ DAG.getConstant(Offset, MVT::i32),
+ DAG.getUNDEF(MVT::i32),
+ PtrInfo,
+ MemVT, false, true, true, 4);
// 4 is the preferred alignment for the CONSTANT memory space.
InVals.push_back(Arg);
+ MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
}
return Chain;
}
@@ -2081,7 +2053,7 @@ static bool
FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
const R600InstrInfo *TII =
- static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
+ static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
if (!Src.isMachineOpcode())
return false;
switch (Src.getMachineOpcode()) {
@@ -2206,7 +2178,7 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
SelectionDAG &DAG) const {
const R600InstrInfo *TII =
- static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
+ static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
if (!Node->isMachineOpcode())
return Node;
unsigned Opcode = Node->getMachineOpcode();
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
index d22c8c98a542..10ebc10ccdba 100644
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef R600ISELLOWERING_H
-#define R600ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_R600_R600ISELLOWERING_H
+#define LLVM_LIB_TARGET_R600_R600ISELLOWERING_H
#include "AMDGPUISelLowering.h"
@@ -74,4 +74,4 @@ private:
} // End namespace llvm;
-#endif // R600ISELLOWERING_H
+#endif
diff --git a/lib/Target/R600/R600InstrFormats.td b/lib/Target/R600/R600InstrFormats.td
index 9428babcbefd..0ffd485476ec 100644
--- a/lib/Target/R600/R600InstrFormats.td
+++ b/lib/Target/R600/R600InstrFormats.td
@@ -38,6 +38,9 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
let Pattern = pattern;
let Itinerary = itin;
+ // No AsmMatcher support.
+ let isCodeGenOnly = 1;
+
let TSFlags{4} = Trig;
let TSFlags{5} = Op3;
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 99920b7761a7..653fd0d52757 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -571,7 +571,7 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
if (!isLastAluTrans)
return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
- TransOps = IGSrcs.back();
+ TransOps = std::move(IGSrcs.back());
IGSrcs.pop_back();
ValidSwizzle.pop_back();
@@ -654,10 +654,10 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
return fitsConstReadLimitations(Consts);
}
-DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
- const ScheduleDAG *DAG) const {
- const InstrItineraryData *II = TM->getInstrItineraryData();
- return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
+DFAPacketizer *
+R600InstrInfo::CreateTargetScheduleState(const TargetSubtargetInfo &STI) const {
+ const InstrItineraryData *II = STI.getInstrItineraryData();
+ return static_cast<const AMDGPUSubtarget &>(STI).createDFAPacketizer(II);
}
static bool
@@ -1082,9 +1082,8 @@ bool R600InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
const MachineFunction &MF) const {
- const AMDGPUFrameLowering *TFL =
- static_cast<const AMDGPUFrameLowering*>(
- MF.getTarget().getFrameLowering());
+ const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
+ MF.getSubtarget().getFrameLowering());
unsigned StackWidth = TFL->getStackWidth(MF);
int End = getIndirectIndexEnd(MF);
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index 1c3cb637a178..d3dc0e58daa1 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef R600INSTRUCTIONINFO_H_
-#define R600INSTRUCTIONINFO_H_
+#ifndef LLVM_LIB_TARGET_R600_R600INSTRINFO_H
+#define LLVM_LIB_TARGET_R600_R600INSTRINFO_H
#include "AMDGPUInstrInfo.h"
#include "R600Defines.h"
@@ -154,8 +154,8 @@ namespace llvm {
bool isMov(unsigned Opcode) const override;
- DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
- const ScheduleDAG *DAG) const override;
+ DFAPacketizer *
+ CreateTargetScheduleState(const TargetSubtargetInfo &) const override;
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
@@ -206,7 +206,7 @@ namespace llvm {
int getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const override { return 1;}
- virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+ bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
/// \brief Reserve the registers that may be accesed using indirect addressing.
void reserveIndirectRegisters(BitVector &Reserved,
@@ -298,4 +298,4 @@ int getLDSNoRetOp(uint16_t Opcode);
} // End llvm namespace
-#endif // R600INSTRINFO_H_
+#endif
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 704507d368ec..b1d3ce276eee 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -475,13 +475,13 @@ class ExportBufWord1 {
multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
(ExportInst
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $reg, sub0),
0, 61, 0, 7, 7, 7, cf_inst, 0)
>;
def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
(ExportInst
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $reg, sub0),
0, 61, 7, 0, 7, 7, cf_inst, 0)
>;
@@ -513,17 +513,17 @@ multiclass SteamOutputExportPattern<Instruction ExportInst,
// Stream1
def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
(i32 imm:$arraybase), (i32 1), (i32 imm:$mask)),
- (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
+ (ExportInst $src, 0, imm:$arraybase,
4095, imm:$mask, buf1inst, 0)>;
// Stream2
def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
(i32 imm:$arraybase), (i32 2), (i32 imm:$mask)),
- (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
+ (ExportInst $src, 0, imm:$arraybase,
4095, imm:$mask, buf2inst, 0)>;
// Stream3
def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
(i32 imm:$arraybase), (i32 3), (i32 imm:$mask)),
- (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
+ (ExportInst $src, 0, imm:$arraybase,
4095, imm:$mask, buf3inst, 0)>;
}
@@ -674,8 +674,9 @@ def ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
// Non-IEEE MUL: 0 * anything = 0
def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>;
def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
-def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>;
-def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
+// TODO: Do these actually match the regular fmin/fmax behavior?
+def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax_legacy>;
+def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin_legacy>;
// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
// so some of the instruction names don't match the asm string.
@@ -697,7 +698,7 @@ def SGE : R600_2OP <
def SNE : R600_2OP <
0xB, "SETNE",
- [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_UNE))]
+ [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_UNE_NE))]
>;
def SETE_DX10 : R600_2OP <
@@ -715,9 +716,10 @@ def SETGE_DX10 : R600_2OP <
[(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGE))]
>;
+// FIXME: This should probably be COND_ONE
def SETNE_DX10 : R600_2OP <
0xF, "SETNE_DX10",
- [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_UNE))]
+ [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_UNE_NE))]
>;
def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
@@ -915,6 +917,11 @@ class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
[(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
>;
+class FMA_Common <bits<5> inst> : R600_3OP <
+ inst, "FMA",
+ [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))], VecALU
+>;
+
class CNDE_Common <bits<5> inst> : R600_3OP <
inst, "CNDE",
[(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))]
@@ -1068,7 +1075,7 @@ class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
}
class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
- inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
+ inst, "RECIP_IEEE", [(set f32:$dst, (AMDGPUrcp f32:$src0))]
> {
let Itinerary = TransALU;
}
@@ -1114,6 +1121,7 @@ def FNEG_R600 : FNEG<R600_Reg32>;
// Helper patterns for complex intrinsics
//===----------------------------------------------------------------------===//
+// FIXME: Should be predicated on unsafe fp math.
multiclass DIV_Common <InstR600 recip_ieee> {
def : Pat<
(int_AMDGPU_div f32:$src0, f32:$src1),
@@ -1124,6 +1132,8 @@ def : Pat<
(fdiv f32:$src0, f32:$src1),
(MUL_IEEE $src0, (recip_ieee $src1))
>;
+
+def : RcpPat<recip_ieee, f32>;
}
class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee>
@@ -1133,9 +1143,12 @@ class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ie
>;
// FROUND pattern
-class FROUNDPat<Instruction CNDGE> : Pat <
+class FROUNDPat<Instruction CNDGE, Instruction CNDGT> : Pat <
(AMDGPUround f32:$x),
- (CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x))
+ (CNDGE $x,
+ (CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x)),
+ (CNDGT (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x))
+ )
>;
@@ -1180,7 +1193,9 @@ let Predicates = [isR600] in {
def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
- def : FROUNDPat <CNDGE_r600>;
+ defm : RsqPat<RECIPSQRT_IEEE_r600, f32>;
+
+ def : FROUNDPat <CNDGE_r600, CNDGT_r600>;
def R600_ExportSwz : ExportSwzInst {
let Word1{20-17} = 0; // BURST_COUNT
@@ -1350,7 +1365,7 @@ def CONST_COPY : Instruction {
let Pattern =
[(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
let AsmString = "CONST_COPY";
- let neverHasSideEffects = 1;
+ let hasSideEffects = 0;
let isAsCheapAsAMove = 1;
let Itinerary = NullALU;
}
@@ -1482,6 +1497,7 @@ class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let isCodeGenOnly = 1;
}
multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> {
diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h
index b0ae22e806a9..263561edd30d 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.h
+++ b/lib/Target/R600/R600MachineFunctionInfo.h
@@ -10,8 +10,8 @@
/// \file
//===----------------------------------------------------------------------===//
-#ifndef R600MACHINEFUNCTIONINFO_H
-#define R600MACHINEFUNCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_R600_R600MACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_R600_R600MACHINEFUNCTIONINFO_H
#include "AMDGPUMachineFunction.h"
#include "llvm/ADT/BitVector.h"
@@ -31,4 +31,4 @@ public:
} // End llvm namespace
-#endif //R600MACHINEFUNCTIONINFO_H
+#endif
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
index 7ea654cb14cd..d782713cab65 100644
--- a/lib/Target/R600/R600MachineScheduler.cpp
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -14,7 +14,6 @@
#include "R600MachineScheduler.h"
#include "AMDGPUSubtarget.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/PassManager.h"
@@ -76,21 +75,25 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
float ALUFetchRationEstimate =
(AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) /
(FetchInstCount + Available[IDFetch].size());
- unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
- DEBUG( dbgs() << NeededWF << " approx. Wavefronts Required\n" );
- // We assume the local GPR requirements to be "dominated" by the requirement
- // of the TEX clause (which consumes 128 bits regs) ; ALU inst before and
- // after TEX are indeed likely to consume or generate values from/for the
- // TEX clause.
- // Available[IDFetch].size() * 2 : GPRs required in the Fetch clause
- // We assume that fetch instructions are either TnXYZW = TEX TnXYZW (need
- // one GPR) or TmXYZW = TnXYZW (need 2 GPR).
- // (TODO : use RegisterPressure)
- // If we are going too use too many GPR, we flush Fetch instruction to lower
- // register pressure on 128 bits regs.
- unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();
- if (NeededWF > getWFCountLimitedByGPR(NearRegisterRequirement))
+ if (ALUFetchRationEstimate == 0) {
AllowSwitchFromAlu = true;
+ } else {
+ unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
+ DEBUG( dbgs() << NeededWF << " approx. Wavefronts Required\n" );
+ // We assume the local GPR requirements to be "dominated" by the requirement
+ // of the TEX clause (which consumes 128 bits regs) ; ALU inst before and
+ // after TEX are indeed likely to consume or generate values from/for the
+ // TEX clause.
+ // Available[IDFetch].size() * 2 : GPRs required in the Fetch clause
+ // We assume that fetch instructions are either TnXYZW = TEX TnXYZW (need
+ // one GPR) or TmXYZW = TnXYZW (need 2 GPR).
+ // (TODO : use RegisterPressure)
+ // If we are going too use too many GPR, we flush Fetch instruction to lower
+ // register pressure on 128 bits regs.
+ unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();
+ if (NeededWF > getWFCountLimitedByGPR(NearRegisterRequirement))
+ AllowSwitchFromAlu = true;
+ }
}
if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
index fd475af2bf8f..fc5b95c28e71 100644
--- a/lib/Target/R600/R600MachineScheduler.h
+++ b/lib/Target/R600/R600MachineScheduler.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef R600MACHINESCHEDULER_H_
-#define R600MACHINESCHEDULER_H_
+#ifndef LLVM_LIB_TARGET_R600_R600MACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_R600_R600MACHINESCHEDULER_H
#include "R600InstrInfo.h"
#include "llvm/ADT/PriorityQueue.h"
diff --git a/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
index 2314136f2227..742c0e0451cb 100644
--- a/lib/Target/R600/R600OptimizeVectorRegisters.cpp
+++ b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
@@ -30,6 +30,7 @@
#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
#include "R600InstrInfo.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -279,9 +280,8 @@ bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI,
continue;
if (PreviousRegSeqByReg[MOp->getReg()].empty())
continue;
- std::vector<MachineInstr *> MIs = PreviousRegSeqByReg[MOp->getReg()];
- for (unsigned i = 0, e = MIs.size(); i < e; i++) {
- CompatibleRSI = PreviousRegSeq[MIs[i]];
+ for (MachineInstr *MI : PreviousRegSeqByReg[MOp->getReg()]) {
+ CompatibleRSI = PreviousRegSeq[MI];
if (RSI == CompatibleRSI)
continue;
if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan))
@@ -314,7 +314,7 @@ void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) {
}
bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
- TII = static_cast<const R600InstrInfo *>(Fn.getTarget().getInstrInfo());
+ TII = static_cast<const R600InstrInfo *>(Fn.getSubtarget().getInstrInfo());
MRI = &(Fn.getRegInfo());
for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
MBB != MBBe; ++MBB) {
diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp
index 74cf30974d58..ddf68c91cdf3 100644
--- a/lib/Target/R600/R600Packetizer.cpp
+++ b/lib/Target/R600/R600Packetizer.cpp
@@ -148,11 +148,11 @@ private:
}
public:
// Ctor.
- R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
- MachineDominatorTree &MDT)
- : VLIWPacketizerList(MF, MLI, MDT, true),
- TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())),
- TRI(TII->getRegisterInfo()) {
+ R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI)
+ : VLIWPacketizerList(MF, MLI, true),
+ TII(static_cast<const R600InstrInfo *>(
+ MF.getSubtarget().getInstrInfo())),
+ TRI(TII->getRegisterInfo()) {
VLIW5 = !MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
}
@@ -328,12 +328,11 @@ public:
};
bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
- const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
- MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
// Instantiate the packetizer.
- R600PacketizerList Packetizer(Fn, MLI, MDT);
+ R600PacketizerList Packetizer(Fn, MLI);
// DFA state table should not be empty.
assert(Packetizer.getResourceTracker() && "Empty DFA table!");
diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h
index 247808b6e7b6..f1a8a41b9a5d 100644
--- a/lib/Target/R600/R600RegisterInfo.h
+++ b/lib/Target/R600/R600RegisterInfo.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef R600REGISTERINFO_H_
-#define R600REGISTERINFO_H_
+#ifndef LLVM_LIB_TARGET_R600_R600REGISTERINFO_H
+#define LLVM_LIB_TARGET_R600_R600REGISTERINFO_H
#include "AMDGPURegisterInfo.h"
@@ -46,4 +46,4 @@ struct R600RegisterInfo : public AMDGPURegisterInfo {
} // End namespace llvm
-#endif // AMDIDSAREGISTERINFO_H_
+#endif
diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h
index b7e7a2d000b3..73a9c73d8e7b 100644
--- a/lib/Target/R600/SIDefines.h
+++ b/lib/Target/R600/SIDefines.h
@@ -8,21 +8,88 @@
/// \file
//===----------------------------------------------------------------------===//
-#ifndef SIDEFINES_H_
-#define SIDEFINES_H_
+#include "llvm/MC/MCInstrDesc.h"
+
+#ifndef LLVM_LIB_TARGET_R600_SIDEFINES_H
+#define LLVM_LIB_TARGET_R600_SIDEFINES_H
namespace SIInstrFlags {
+// This needs to be kept in sync with the field bits in InstSI.
enum {
- MIMG = 1 << 3,
- SMRD = 1 << 4,
- VOP1 = 1 << 5,
- VOP2 = 1 << 6,
- VOP3 = 1 << 7,
- VOPC = 1 << 8,
- SALU = 1 << 9
+ SALU = 1 << 3,
+ VALU = 1 << 4,
+
+ SOP1 = 1 << 5,
+ SOP2 = 1 << 6,
+ SOPC = 1 << 7,
+ SOPK = 1 << 8,
+ SOPP = 1 << 9,
+
+ VOP1 = 1 << 10,
+ VOP2 = 1 << 11,
+ VOP3 = 1 << 12,
+ VOPC = 1 << 13,
+
+ MUBUF = 1 << 14,
+ MTBUF = 1 << 15,
+ SMRD = 1 << 16,
+ DS = 1 << 17,
+ MIMG = 1 << 18,
+ FLAT = 1 << 19
};
}
+namespace llvm {
+namespace AMDGPU {
+ enum OperandType {
+ /// Operand with register or 32-bit immediate
+ OPERAND_REG_IMM32 = llvm::MCOI::OPERAND_FIRST_TARGET,
+ /// Operand with register or inline constant
+ OPERAND_REG_INLINE_C
+ };
+}
+}
+
+namespace SIInstrFlags {
+ enum Flags {
+ // First 4 bits are the instruction encoding
+ VM_CNT = 1 << 0,
+ EXP_CNT = 1 << 1,
+ LGKM_CNT = 1 << 2
+ };
+
+ // v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
+ // The result is true if any of these tests are true.
+ enum ClassFlags {
+ S_NAN = 1 << 0, // Signaling NaN
+ Q_NAN = 1 << 1, // Quiet NaN
+ N_INFINITY = 1 << 2, // Negative infinity
+ N_NORMAL = 1 << 3, // Negative normal
+ N_SUBNORMAL = 1 << 4, // Negative subnormal
+ N_ZERO = 1 << 5, // Negative zero
+ P_ZERO = 1 << 6, // Positive zero
+ P_SUBNORMAL = 1 << 7, // Positive subnormal
+ P_NORMAL = 1 << 8, // Positive normal
+ P_INFINITY = 1 << 9 // Positive infinity
+ };
+}
+
+namespace SISrcMods {
+ enum {
+ NEG = 1 << 0,
+ ABS = 1 << 1
+ };
+}
+
+namespace SIOutMods {
+ enum {
+ NONE = 0,
+ MUL2 = 1,
+ MUL4 = 2,
+ DIV2 = 3
+ };
+}
+
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS 0x00B02C
#define S_00B02C_EXTRA_LDS_SIZE(x) (((x) & 0xFF) << 8)
@@ -32,7 +99,14 @@ enum {
#define S_00B028_VGPRS(x) (((x) & 0x3F) << 0)
#define S_00B028_SGPRS(x) (((x) & 0x0F) << 6)
#define R_00B84C_COMPUTE_PGM_RSRC2 0x00B84C
-#define S_00B02C_SCRATCH_EN(x) (((x) & 0x1) << 0)
+#define S_00B84C_SCRATCH_EN(x) (((x) & 0x1) << 0)
+#define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1)
+#define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7)
+#define S_00B84C_TGID_Y_EN(x) (((x) & 0x1) << 8)
+#define S_00B84C_TGID_Z_EN(x) (((x) & 0x1) << 9)
+#define S_00B84C_TG_SIZE_EN(x) (((x) & 0x1) << 10)
+#define S_00B84C_TIDIG_COMP_CNT(x) (((x) & 0x03) << 11)
+
#define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15)
#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
@@ -89,4 +163,4 @@ enum {
#define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860
#define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12)
-#endif // SIDEFINES_H_
+#endif
diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp
index 5f714535abeb..cd1b3acc5c87 100644
--- a/lib/Target/R600/SIFixSGPRCopies.cpp
+++ b/lib/Target/R600/SIFixSGPRCopies.cpp
@@ -66,6 +66,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -135,12 +136,12 @@ const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses(
const MachineRegisterInfo &MRI,
unsigned Reg,
unsigned SubReg) const {
- // The Reg parameter to the function must always be defined by either a PHI
- // or a COPY, therefore it cannot be a physical register.
- assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
- "Reg cannot be a physical register");
- const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ const TargetRegisterClass *RC
+ = TargetRegisterInfo::isVirtualRegister(Reg) ?
+ MRI.getRegClass(Reg) :
+ TRI->getRegClass(Reg);
+
RC = TRI->getSubRegClass(RC, SubReg);
for (MachineRegisterInfo::use_instr_iterator
I = MRI.use_instr_begin(Reg), E = MRI.use_instr_end(); I != E; ++I) {
@@ -181,7 +182,12 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
unsigned DstReg = Copy.getOperand(0).getReg();
unsigned SrcReg = Copy.getOperand(1).getReg();
unsigned SrcSubReg = Copy.getOperand(1).getSubReg();
- const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
+
+ const TargetRegisterClass *DstRC
+ = TargetRegisterInfo::isVirtualRegister(DstReg) ?
+ MRI.getRegClass(DstReg) :
+ TRI->getRegClass(DstReg);
+
const TargetRegisterClass *SrcRC;
if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
@@ -195,10 +201,10 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
- const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
- MF.getTarget().getRegisterInfo());
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
- MF.getTarget().getInstrInfo());
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
@@ -216,20 +222,21 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
switch (MI.getOpcode()) {
default: continue;
case AMDGPU::PHI: {
- DEBUG(dbgs() << " Fixing PHI:\n");
- DEBUG(MI.print(dbgs()));
+ DEBUG(dbgs() << "Fixing PHI: " << MI);
- for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
- unsigned Reg = MI.getOperand(i).getReg();
- const TargetRegisterClass *RC = inferRegClassFromDef(TRI, MRI, Reg,
- MI.getOperand(0).getSubReg());
- MRI.constrainRegClass(Reg, RC);
+ for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+ const MachineOperand &Op = MI.getOperand(i);
+ unsigned Reg = Op.getReg();
+ const TargetRegisterClass *RC
+ = inferRegClassFromDef(TRI, MRI, Reg, Op.getSubReg());
+
+ MRI.constrainRegClass(Op.getReg(), RC);
}
unsigned Reg = MI.getOperand(0).getReg();
const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg,
MI.getOperand(0).getSubReg());
- if (TRI->getCommonSubClass(RC, &AMDGPU::VReg_32RegClass)) {
- MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass);
+ if (TRI->getCommonSubClass(RC, &AMDGPU::VGPR_32RegClass)) {
+ MRI.constrainRegClass(Reg, &AMDGPU::VGPR_32RegClass);
}
if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
@@ -237,14 +244,66 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
// If a PHI node defines an SGPR and any of its operands are VGPRs,
// then we need to move it to the VALU.
+ //
+ // Also, if a PHI node defines an SGPR and has all SGPR operands
+ // we must move it to the VALU, because the SGPR operands will
+ // all end up being assigned the same register, which means
+ // there is a potential for a conflict if different threads take
+ // different control flow paths.
+ //
+ // For Example:
+ //
+ // sgpr0 = def;
+ // ...
+ // sgpr1 = def;
+ // ...
+ // sgpr2 = PHI sgpr0, sgpr1
+ // use sgpr2;
+ //
+ // Will Become:
+ //
+ // sgpr2 = def;
+ // ...
+ // sgpr2 = def;
+ // ...
+ // use sgpr2
+ //
+ // FIXME: This is OK if the branching decision is made based on an
+ // SGPR value.
+ bool SGPRBranch = false;
+
+ // The one exception to this rule is when one of the operands
+ // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
+ // instruction. In this case, there we know the program will
+ // never enter the second block (the loop) without entering
+ // the first block (where the condition is computed), so there
+ // is no chance for values to be over-written.
+
+ bool HasBreakDef = false;
for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
unsigned Reg = MI.getOperand(i).getReg();
if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
TII->moveToVALU(MI);
break;
}
+ MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
+ assert(DefInstr);
+ switch(DefInstr->getOpcode()) {
+
+ case AMDGPU::SI_BREAK:
+ case AMDGPU::SI_IF_BREAK:
+ case AMDGPU::SI_ELSE_BREAK:
+ // If we see a PHI instruction that defines an SGPR, then that PHI
+ // instruction has already been considered and should have
+ // a *_BREAK as an operand.
+ case AMDGPU::PHI:
+ HasBreakDef = true;
+ break;
+ }
}
+ if (!SGPRBranch && !HasBreakDef)
+ TII->moveToVALU(MI);
break;
}
case AMDGPU::REG_SEQUENCE: {
@@ -252,8 +311,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
!hasVGPROperands(MI, TRI))
continue;
- DEBUG(dbgs() << "Fixing REG_SEQUENCE:\n");
- DEBUG(MI.print(dbgs()));
+ DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
TII->moveToVALU(MI);
break;
@@ -265,8 +323,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
if (TRI->isSGPRClass(DstRC) &&
(TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
- DEBUG(dbgs() << " Fixing INSERT_SUBREG:\n");
- DEBUG(MI.print(dbgs()));
+ DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
TII->moveToVALU(MI);
}
break;
@@ -274,5 +331,6 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
}
}
}
- return false;
+
+ return true;
}
diff --git a/lib/Target/R600/SIFixSGPRLiveRanges.cpp b/lib/Target/R600/SIFixSGPRLiveRanges.cpp
index 7d116eef396c..f34c37580432 100644
--- a/lib/Target/R600/SIFixSGPRLiveRanges.cpp
+++ b/lib/Target/R600/SIFixSGPRLiveRanges.cpp
@@ -9,18 +9,49 @@
//
/// \file
/// SALU instructions ignore control flow, so we need to modify the live ranges
-/// of the registers they define.
+/// of the registers they define in some cases.
///
-/// The strategy is to view the entire program as if it were a single basic
-/// block and calculate the intervals accordingly. We implement this
-/// by walking this list of segments for each LiveRange and setting the
-/// end of each segment equal to the start of the segment that immediately
-/// follows it.
+/// The main case we need to handle is when a def is used in one side of a
+/// branch and not another. For example:
+///
+/// %def
+/// IF
+/// ...
+/// ...
+/// ELSE
+/// %use
+/// ...
+/// ENDIF
+///
+/// Here we need the register allocator to avoid assigning any of the defs
+/// inside of the IF to the same register as %def. In traditional live
+/// interval analysis %def is not live inside the IF branch, however, since
+/// SALU instructions inside of IF will be executed even if the branch is not
+/// taken, there is the chance that one of the instructions will overwrite the
+/// value of %def, so the use in ELSE will see the wrong value.
+///
+/// The strategy we use for solving this is to add an extra use after the ENDIF:
+///
+/// %def
+/// IF
+/// ...
+/// ...
+/// ELSE
+/// %use
+/// ...
+/// ENDIF
+/// %use
+///
+/// Adding this use will make the def live thoughout the IF branch, which is
+/// what we want.
#include "AMDGPU.h"
+#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
@@ -40,16 +71,15 @@ public:
initializeSIFixSGPRLiveRangesPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
- virtual const char *getPassName() const override {
+ const char *getPassName() const override {
return "SI Fix SGPR live ranges";
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LiveIntervals>();
- AU.addPreserved<LiveIntervals>();
- AU.addPreserved<SlotIndexes>();
+ AU.addRequired<MachinePostDominatorTree>();
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -60,6 +90,7 @@ public:
INITIALIZE_PASS_BEGIN(SIFixSGPRLiveRanges, DEBUG_TYPE,
"SI Fix SGPR Live Ranges", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_END(SIFixSGPRLiveRanges, DEBUG_TYPE,
"SI Fix SGPR Live Ranges", false, false)
@@ -73,36 +104,86 @@ FunctionPass *llvm::createSIFixSGPRLiveRangesPass() {
bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
- MF.getTarget().getRegisterInfo());
+ MF.getSubtarget().getRegisterInfo());
LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
+ MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>();
+ std::vector<std::pair<unsigned, LiveRange *>> SGPRLiveRanges;
+
+ // First pass, collect all live intervals for SGPRs
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ for (const MachineOperand &MO : MI.defs()) {
+ if (MO.isImplicit())
+ continue;
+ unsigned Def = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Def)) {
+ if (TRI->isSGPRClass(MRI.getRegClass(Def)))
+ SGPRLiveRanges.push_back(
+ std::make_pair(Def, &LIS->getInterval(Def)));
+ } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) {
+ SGPRLiveRanges.push_back(
+ std::make_pair(Def, &LIS->getRegUnit(Def)));
+ }
+ }
+ }
+ }
+ // Second pass fix the intervals
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
-
MachineBasicBlock &MBB = *BI;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- MachineInstr &MI = *I;
- MachineOperand *ExecUse = MI.findRegisterUseOperand(AMDGPU::EXEC);
- if (ExecUse)
+ if (MBB.succ_size() < 2)
+ continue;
+
+ // We have structured control flow, so number of succesors should be two.
+ assert(MBB.succ_size() == 2);
+ MachineBasicBlock *SuccA = *MBB.succ_begin();
+ MachineBasicBlock *SuccB = *(++MBB.succ_begin());
+ MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB);
+
+ if (!NCD)
+ continue;
+
+ MachineBasicBlock::iterator NCDTerm = NCD->getFirstTerminator();
+
+ if (NCDTerm != NCD->end() && NCDTerm->getOpcode() == AMDGPU::SI_ELSE) {
+ assert(NCD->succ_size() == 2);
+ // We want to make sure we insert the Use after the ENDIF, not after
+ // the ELSE.
+ NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(),
+ *(++NCD->succ_begin()));
+ }
+ assert(SuccA && SuccB);
+ for (std::pair<unsigned, LiveRange*> RegLR : SGPRLiveRanges) {
+ unsigned Reg = RegLR.first;
+ LiveRange *LR = RegLR.second;
+
+ // FIXME: We could be smarter here. If the register is Live-In to
+ // one block, but the other doesn't have any SGPR defs, then there
+ // won't be a conflict. Also, if the branch decision is based on
+ // a value in an SGPR, then there will be no conflict.
+ bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA);
+ bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB);
+
+ if ((!LiveInToA && !LiveInToB) ||
+ (LiveInToA && LiveInToB))
continue;
- for (const MachineOperand &Def : MI.operands()) {
- if (!Def.isReg() || !Def.isDef() ||!TargetRegisterInfo::isVirtualRegister(Def.getReg()))
- continue;
-
- const TargetRegisterClass *RC = MRI.getRegClass(Def.getReg());
-
- if (!TRI->isSGPRClass(RC))
- continue;
- LiveInterval &LI = LIS->getInterval(Def.getReg());
- for (unsigned i = 0, e = LI.size() - 1; i != e; ++i) {
- LiveRange::Segment &Seg = LI.segments[i];
- LiveRange::Segment &Next = LI.segments[i + 1];
- Seg.end = Next.start;
- }
- }
+ // This interval is live in to one successor, but not the other, so
+ // we need to update its range so it is live in to both.
+ DEBUG(dbgs() << "Possible SGPR conflict detected " << " in " << *LR <<
+ " BB#" << SuccA->getNumber() << ", BB#" <<
+ SuccB->getNumber() <<
+ " with NCD = " << NCD->getNumber() << '\n');
+
+ // FIXME: Need to figure out how to update LiveRange here so this pass
+ // will be able to preserve LiveInterval analysis.
+ BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(),
+ TII->get(AMDGPU::SGPR_USE))
+ .addReg(Reg, RegState::Implicit);
+ DEBUG(NCD->getFirstNonPHI()->dump());
}
}
diff --git a/lib/Target/R600/SIFoldOperands.cpp b/lib/Target/R600/SIFoldOperands.cpp
new file mode 100644
index 000000000000..d8ffa4f75505
--- /dev/null
+++ b/lib/Target/R600/SIFoldOperands.cpp
@@ -0,0 +1,275 @@
+//===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "si-fold-operands"
+using namespace llvm;
+
+namespace {
+
+class SIFoldOperands : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ SIFoldOperands() : MachineFunctionPass(ID) {
+ initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const char *getPassName() const override {
+ return "SI Fold Operands";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+struct FoldCandidate {
+ MachineInstr *UseMI;
+ unsigned UseOpNo;
+ MachineOperand *OpToFold;
+ uint64_t ImmToFold;
+
+ FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
+ UseMI(MI), UseOpNo(OpNo) {
+
+ if (FoldOp->isImm()) {
+ OpToFold = nullptr;
+ ImmToFold = FoldOp->getImm();
+ } else {
+ assert(FoldOp->isReg());
+ OpToFold = FoldOp;
+ }
+ }
+
+ bool isImm() const {
+ return !OpToFold;
+ }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE,
+ "SI Fold Operands", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE,
+ "SI Fold Operands", false, false)
+
+char SIFoldOperands::ID = 0;
+
+char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
+
+FunctionPass *llvm::createSIFoldOperandsPass() {
+ return new SIFoldOperands();
+}
+
+static bool isSafeToFold(unsigned Opcode) {
+ switch(Opcode) {
+ case AMDGPU::V_MOV_B32_e32:
+ case AMDGPU::V_MOV_B32_e64:
+ case AMDGPU::V_MOV_B64_PSEUDO:
+ case AMDGPU::S_MOV_B32:
+ case AMDGPU::S_MOV_B64:
+ case AMDGPU::COPY:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool updateOperand(FoldCandidate &Fold,
+ const TargetRegisterInfo &TRI) {
+ MachineInstr *MI = Fold.UseMI;
+ MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
+ assert(Old.isReg());
+
+ if (Fold.isImm()) {
+ Old.ChangeToImmediate(Fold.ImmToFold);
+ return true;
+ }
+
+ MachineOperand *New = Fold.OpToFold;
+ if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
+ TargetRegisterInfo::isVirtualRegister(New->getReg())) {
+ Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
+ return true;
+ }
+
+ // FIXME: Handle physical registers.
+
+ return false;
+}
+
+static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
+ MachineInstr *MI, unsigned OpNo,
+ MachineOperand *OpToFold,
+ const SIInstrInfo *TII) {
+ if (!TII->isOperandLegal(MI, OpNo, OpToFold)) {
+ // Operand is not legal, so try to commute the instruction to
+ // see if this makes it possible to fold.
+ unsigned CommuteIdx0;
+ unsigned CommuteIdx1;
+ bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1);
+
+ if (CanCommute) {
+ if (CommuteIdx0 == OpNo)
+ OpNo = CommuteIdx1;
+ else if (CommuteIdx1 == OpNo)
+ OpNo = CommuteIdx0;
+ }
+
+ if (!CanCommute || !TII->commuteInstruction(MI))
+ return false;
+
+ if (!TII->isOperandLegal(MI, OpNo, OpToFold))
+ return false;
+ }
+
+ FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
+ return true;
+}
+
+bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+ MachineBasicBlock::iterator I, Next;
+ for (I = MBB.begin(); I != MBB.end(); I = Next) {
+ Next = std::next(I);
+ MachineInstr &MI = *I;
+
+ if (!isSafeToFold(MI.getOpcode()))
+ continue;
+
+ MachineOperand &OpToFold = MI.getOperand(1);
+ bool FoldingImm = OpToFold.isImm();
+
+ // FIXME: We could also be folding things like FrameIndexes and
+ // TargetIndexes.
+ if (!FoldingImm && !OpToFold.isReg())
+ continue;
+
+ // Folding immediates with more than one use will increase program side.
+ // FIXME: This will also reduce register usage, which may be better
+ // in some cases. A better heuristic is needed.
+ if (FoldingImm && !TII->isInlineConstant(OpToFold) &&
+ !MRI.hasOneUse(MI.getOperand(0).getReg()))
+ continue;
+
+ // FIXME: Fold operands with subregs.
+ if (OpToFold.isReg() &&
+ (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) ||
+ OpToFold.getSubReg()))
+ continue;
+
+ std::vector<FoldCandidate> FoldList;
+ for (MachineRegisterInfo::use_iterator
+ Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
+ Use != E; ++Use) {
+
+ MachineInstr *UseMI = Use->getParent();
+ const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());
+
+ // FIXME: Fold operands with subregs.
+ if (UseOp.isReg() && UseOp.getSubReg() && OpToFold.isReg()) {
+ continue;
+ }
+
+ APInt Imm;
+
+ if (FoldingImm) {
+ const TargetRegisterClass *UseRC = MRI.getRegClass(UseOp.getReg());
+ Imm = APInt(64, OpToFold.getImm());
+
+ // Split 64-bit constants into 32-bits for folding.
+ if (UseOp.getSubReg()) {
+ if (UseRC->getSize() != 8)
+ continue;
+
+ if (UseOp.getSubReg() == AMDGPU::sub0) {
+ Imm = Imm.getLoBits(32);
+ } else {
+ assert(UseOp.getSubReg() == AMDGPU::sub1);
+ Imm = Imm.getHiBits(32);
+ }
+ }
+
+ // In order to fold immediates into copies, we need to change the
+ // copy to a MOV.
+ if (UseMI->getOpcode() == AMDGPU::COPY) {
+ unsigned MovOp = TII->getMovOpcode(
+ MRI.getRegClass(UseMI->getOperand(0).getReg()));
+ if (MovOp == AMDGPU::COPY)
+ continue;
+
+ UseMI->setDesc(TII->get(MovOp));
+ }
+ }
+
+ const MCInstrDesc &UseDesc = UseMI->getDesc();
+
+ // Don't fold into target independent nodes. Target independent opcodes
+ // don't have defined register classes.
+ if (UseDesc.isVariadic() ||
+ UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
+ continue;
+
+ if (FoldingImm) {
+ MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
+ tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII);
+ continue;
+ }
+
+ tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII);
+
+ // FIXME: We could try to change the instruction from 64-bit to 32-bit
+ // to enable more folding opportunites. The shrink operands pass
+ // already does this.
+ }
+
+ for (FoldCandidate &Fold : FoldList) {
+ if (updateOperand(Fold, TRI)) {
+ // Clear kill flags.
+ if (!Fold.isImm()) {
+ assert(Fold.OpToFold && Fold.OpToFold->isReg());
+ Fold.OpToFold->setIsKill(false);
+ }
+ DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
+ Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
+ }
+ }
+ }
+ }
+ return false;
+}
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 5a148a24810a..0a3fa2f930d7 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -25,6 +25,7 @@
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -43,7 +44,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass);
- addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
+ addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass);
addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass);
addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass);
@@ -52,29 +53,14 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
- addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
+ addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
- addRegisterClass(MVT::v16i32, &AMDGPU::VReg_512RegClass);
+ addRegisterClass(MVT::v16i32, &AMDGPU::SReg_512RegClass);
addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
computeRegisterProperties();
- // Condition Codes
- setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
- setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
- setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
- setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
- setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
- setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
-
- setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
- setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
- setCondCodeAction(ISD::SETUGE, MVT::f64, Expand);
- setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
- setCondCodeAction(ISD::SETULE, MVT::f64, Expand);
- setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
-
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
@@ -89,6 +75,11 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FSIN, MVT::f32, Custom);
setOperationAction(ISD::FCOS, MVT::f32, Custom);
+ setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
+
// We need to custom lower vector stores from local memory
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
@@ -102,8 +93,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
- setOperationAction(ISD::SELECT, MVT::f32, Promote);
- AddPromotedToType(ISD::SELECT, MVT::f32, MVT::i32);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Promote);
AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);
@@ -116,6 +105,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Legal);
+
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom);
@@ -128,8 +119,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Custom);
-
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -140,23 +130,33 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
- setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand);
- setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand);
+ for (MVT VT : MVT::integer_valuetypes()) {
+ if (VT == MVT::i64)
+ continue;
+
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
- setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
- setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Legal);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Legal);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand);
+ }
+
+ for (MVT VT : MVT::integer_vector_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i16, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v16i16, Expand);
+ }
+
+ for (MVT VT : MVT::fp_valuetypes())
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
setTruncStoreAction(MVT::i32, MVT::i8, Custom);
setTruncStoreAction(MVT::i32, MVT::i16, Custom);
@@ -167,9 +167,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::LOAD, MVT::i1, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
-
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
@@ -196,10 +193,12 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
case ISD::BITCAST:
case ISD::EXTRACT_VECTOR_ELT:
case ISD::INSERT_VECTOR_ELT:
- case ISD::CONCAT_VECTORS:
case ISD::INSERT_SUBVECTOR:
case ISD::EXTRACT_SUBVECTOR:
break;
+ case ISD::CONCAT_VECTORS:
+ setOperationAction(Op, VT, Custom);
+ break;
default:
setOperationAction(Op, VT, Expand);
break;
@@ -207,13 +206,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
}
}
- for (int I = MVT::v1f64; I <= MVT::v8f64; ++I) {
- MVT::SimpleValueType VT = static_cast<MVT::SimpleValueType>(I);
- setOperationAction(ISD::FTRUNC, VT, Expand);
- setOperationAction(ISD::FCEIL, VT, Expand);
- setOperationAction(ISD::FFLOOR, VT, Expand);
- }
-
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
@@ -221,18 +213,38 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FRINT, MVT::f64, Legal);
}
- // FIXME: These should be removed and handled the same was as f32 fneg. Source
- // modifiers also work for the double instructions.
- setOperationAction(ISD::FNEG, MVT::f64, Expand);
- setOperationAction(ISD::FABS, MVT::f64, Expand);
-
setOperationAction(ISD::FDIV, MVT::f32, Custom);
+ setTargetDAGCombine(ISD::FADD);
+ setTargetDAGCombine(ISD::FSUB);
+ setTargetDAGCombine(ISD::FMINNUM);
+ setTargetDAGCombine(ISD::FMAXNUM);
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::SETCC);
-
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::UINT_TO_FP);
+ // All memory operations. Some folding on the pointer operand is done to help
+ // matching the constant offsets in the addressing modes.
+ setTargetDAGCombine(ISD::LOAD);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD);
+ setTargetDAGCombine(ISD::ATOMIC_STORE);
+ setTargetDAGCombine(ISD::ATOMIC_CMP_SWAP);
+ setTargetDAGCombine(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
+ setTargetDAGCombine(ISD::ATOMIC_SWAP);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_ADD);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_SUB);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_AND);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_OR);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_XOR);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_NAND);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_MIN);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_MAX);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_UMIN);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_UMAX);
+
setSchedulingPreference(Sched::RegPressure);
}
@@ -240,15 +252,63 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
// TargetLowering queries
//===----------------------------------------------------------------------===//
-bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
- unsigned AddrSpace,
- bool *IsFast) const {
+bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
+ EVT) const {
+ // SI has some legal vector types, but no legal vector operations. Say no
+ // shuffles are legal in order to prefer scalarizing some vector operations.
+ return false;
+}
+
+// FIXME: This really needs an address space argument. The immediate offset
+// size is different for different sets of memory instruction sets.
+
+// The single offset DS instructions have a 16-bit unsigned byte offset.
+//
+// MUBUF / MTBUF have a 12-bit unsigned byte offset, and additionally can do r +
+// r + i with addr64. 32-bit has more addressing mode options. Depending on the
+// resource constant, it can also do (i64 r0) + (i32 r1) * (i14 i).
+//
+// SMRD instructions have an 8-bit, dword offset.
+//
+bool SITargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // Allow a 16-bit unsigned immediate field, since this is what DS instructions
+ // use.
+ if (!isUInt<16>(AM.BaseOffs))
+ return false;
+
+ // Only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ default: // Don't allow n * r
+ return false;
+ }
+
+ return true;
+}
+
+bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned AddrSpace,
+ unsigned Align,
+ bool *IsFast) const {
if (IsFast)
*IsFast = false;
- // XXX: This depends on the address space and also we may want to revist
- // the alignment values we specify in the DataLayout.
-
// TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
// which isn't a simple VT.
if (!VT.isSimple() || VT == MVT::Other)
@@ -261,8 +321,12 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
// XXX - The only mention I see of this in the ISA manual is for LDS direct
// reads the "byte address and must be dword aligned". Is it also true for the
// normal loads and stores?
- if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS)
- return false;
+ if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS) {
+ // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
+ // aligned, 8 byte access in a single operation using ds_read2/write2_b32
+ // with adjacent offsets.
+ return Align % 4 == 0;
+ }
// 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
// byte-address are ignored, thus forcing Dword alignment.
@@ -272,6 +336,26 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
return VT.bitsGT(MVT::i32);
}
+EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+ unsigned SrcAlign, bool IsMemset,
+ bool ZeroMemset,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ // FIXME: Should account for address space here.
+
+ // The default fallback uses the private pointer size as a guess for a type to
+ // use. Make sure we switch these to 64-bit accesses.
+
+ if (Size >= 16 && DstAlign >= 4) // XXX: Should only do for global
+ return MVT::v4i32;
+
+ if (Size >= 8 && DstAlign >= 4)
+ return MVT::v2i32;
+
+ // Use the default.
+ return MVT::Other;
+}
+
TargetLoweringBase::LegalizeTypeAction
SITargetLowering::getPreferredVectorAction(EVT VT) const {
if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
@@ -282,25 +366,37 @@ SITargetLowering::getPreferredVectorAction(EVT VT) const {
bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
return TII->isInlineConstant(Imm);
}
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
- SDLoc DL, SDValue Chain,
+ SDLoc SL, SDValue Chain,
unsigned Offset, bool Signed) const {
+ const DataLayout *DL = getDataLayout();
+ MachineFunction &MF = DAG.getMachineFunction();
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
+ unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::INPUT_PTR);
+
+ Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUAS::CONSTANT_ADDRESS);
- SDValue BasePtr = DAG.getCopyFromReg(Chain, DL,
- MRI.getLiveInVirtReg(AMDGPU::SGPR0_SGPR1), MVT::i64);
- SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
+ PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
+ SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
+ MRI.getLiveInVirtReg(InputPtrReg), MVT::i64);
+ SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, BasePtr,
DAG.getConstant(Offset, MVT::i64));
- return DAG.getExtLoad(Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL, VT, Chain, Ptr,
- MachinePointerInfo(UndefValue::get(PtrTy)), MemVT,
- false, false, MemVT.getSizeInBits() >> 3);
-
+ SDValue PtrOffset = DAG.getUNDEF(getPointerTy(AMDGPUAS::CONSTANT_ADDRESS));
+ MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+
+ return DAG.getLoad(ISD::UNINDEXED, Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD,
+ VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT,
+ false, // isVolatile
+ true, // isNonTemporal
+ true, // isInvariant
+ DL->getABITypeAlignment(Ty)); // Alignment
}
SDValue SITargetLowering::LowerFormalArguments(
@@ -311,7 +407,9 @@ SDValue SITargetLowering::LowerFormalArguments(
SDLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetMachine &TM = getTargetMachine();
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo*>(TM.getSubtargetImpl()->getRegisterInfo());
MachineFunction &MF = DAG.getMachineFunction();
FunctionType *FType = MF.getFunction()->getFunctionType();
@@ -320,7 +418,7 @@ SDValue SITargetLowering::LowerFormalArguments(
assert(CallConv == CallingConv::C);
SmallVector<ISD::InputArg, 16> Splits;
- uint32_t Skipped = 0;
+ BitVector Skipped(Ins.size());
for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) {
const ISD::InputArg &Arg = Ins[i];
@@ -333,7 +431,7 @@ SDValue SITargetLowering::LowerFormalArguments(
if (!Arg.Used) {
// We can savely skip PS inputs
- Skipped |= 1 << i;
+ Skipped.set(i);
++PSInputNum;
continue;
}
@@ -364,8 +462,8 @@ SDValue SITargetLowering::LowerFormalArguments(
}
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
// At least one interpolation mode must be enabled or else the GPU will hang.
if (Info->getShaderType() == ShaderType::PIXEL &&
@@ -378,13 +476,31 @@ SDValue SITargetLowering::LowerFormalArguments(
// The pointer to the list of arguments is stored in SGPR0, SGPR1
// The pointer to the scratch buffer is stored in SGPR2, SGPR3
if (Info->getShaderType() == ShaderType::COMPUTE) {
- Info->NumUserSGPRs = 4;
- CCInfo.AllocateReg(AMDGPU::SGPR0);
- CCInfo.AllocateReg(AMDGPU::SGPR1);
- CCInfo.AllocateReg(AMDGPU::SGPR2);
- CCInfo.AllocateReg(AMDGPU::SGPR3);
- MF.addLiveIn(AMDGPU::SGPR0_SGPR1, &AMDGPU::SReg_64RegClass);
- MF.addLiveIn(AMDGPU::SGPR2_SGPR3, &AMDGPU::SReg_64RegClass);
+ if (Subtarget->isAmdHsaOS())
+ Info->NumUserSGPRs = 2; // FIXME: Need to support scratch buffers.
+ else
+ Info->NumUserSGPRs = 4;
+
+ unsigned InputPtrReg =
+ TRI->getPreloadedValue(MF, SIRegisterInfo::INPUT_PTR);
+ unsigned InputPtrRegLo =
+ TRI->getPhysRegSubReg(InputPtrReg, &AMDGPU::SReg_32RegClass, 0);
+ unsigned InputPtrRegHi =
+ TRI->getPhysRegSubReg(InputPtrReg, &AMDGPU::SReg_32RegClass, 1);
+
+ unsigned ScratchPtrReg =
+ TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
+ unsigned ScratchPtrRegLo =
+ TRI->getPhysRegSubReg(ScratchPtrReg, &AMDGPU::SReg_32RegClass, 0);
+ unsigned ScratchPtrRegHi =
+ TRI->getPhysRegSubReg(ScratchPtrReg, &AMDGPU::SReg_32RegClass, 1);
+
+ CCInfo.AllocateReg(InputPtrRegLo);
+ CCInfo.AllocateReg(InputPtrRegHi);
+ CCInfo.AllocateReg(ScratchPtrRegLo);
+ CCInfo.AllocateReg(ScratchPtrRegHi);
+ MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass);
+ MF.addLiveIn(ScratchPtrReg, &AMDGPU::SReg_64RegClass);
}
if (Info->getShaderType() == ShaderType::COMPUTE) {
@@ -397,23 +513,36 @@ SDValue SITargetLowering::LowerFormalArguments(
for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
const ISD::InputArg &Arg = Ins[i];
- if (Skipped & (1 << i)) {
+ if (Skipped[i]) {
InVals.push_back(DAG.getUNDEF(Arg.VT));
continue;
}
CCValAssign &VA = ArgLocs[ArgIdx++];
- EVT VT = VA.getLocVT();
+ MVT VT = VA.getLocVT();
if (VA.isMemLoc()) {
VT = Ins[i].VT;
EVT MemVT = Splits[i].VT;
+ const unsigned Offset = 36 + VA.getLocMemOffset();
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(),
- 36 + VA.getLocMemOffset(),
- Ins[i].Flags.isSExt());
+ Offset, Ins[i].Flags.isSExt());
+
+ const PointerType *ParamTy =
+ dyn_cast<PointerType>(FType->getParamType(Ins[i].OrigArgIndex));
+ if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
+ ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ // On SI local pointers are just offsets into LDS, so they are always
+ // less than 16-bits. On CI and newer they could potentially be
+ // real pointers, so we can't guarantee their size.
+ Arg = DAG.getNode(ISD::AssertZext, DL, Arg.getValueType(), Arg,
+ DAG.getValueType(MVT::i16));
+ }
+
InVals.push_back(Arg);
+ Info->ABIArgOffset = Offset + MemVT.getStoreSize();
continue;
}
assert(VA.isRegLoc() && "Parameter must be in a register!");
@@ -466,69 +595,13 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const {
MachineBasicBlock::iterator I = *MI;
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
switch (MI->getOpcode()) {
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::BRANCH: return BB;
- case AMDGPU::SI_ADDR64_RSRC: {
- unsigned SuperReg = MI->getOperand(0).getReg();
- unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
- unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
- unsigned SubRegHiHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned SubRegHiLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64), SubRegLo)
- .addOperand(MI->getOperand(1));
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo)
- .addImm(0);
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiHi)
- .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SubRegHi)
- .addReg(SubRegHiLo)
- .addImm(AMDGPU::sub0)
- .addReg(SubRegHiHi)
- .addImm(AMDGPU::sub1);
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SuperReg)
- .addReg(SubRegLo)
- .addImm(AMDGPU::sub0_sub1)
- .addReg(SubRegHi)
- .addImm(AMDGPU::sub2_sub3);
- MI->eraseFromParent();
- break;
- }
- case AMDGPU::SI_BUFFER_RSRC: {
- unsigned SuperReg = MI->getOperand(0).getReg();
- unsigned Args[4];
- for (unsigned i = 0, e = 4; i < e; ++i) {
- MachineOperand &Arg = MI->getOperand(i + 1);
-
- if (Arg.isReg()) {
- Args[i] = Arg.getReg();
- continue;
- }
-
- assert(Arg.isImm());
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), Reg)
- .addImm(Arg.getImm());
- Args[i] = Reg;
- }
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE),
- SuperReg)
- .addReg(Args[0])
- .addImm(AMDGPU::sub0)
- .addReg(Args[1])
- .addImm(AMDGPU::sub1)
- .addReg(Args[2])
- .addImm(AMDGPU::sub2)
- .addReg(Args[3])
- .addImm(AMDGPU::sub3);
- MI->eraseFromParent();
- break;
- }
case AMDGPU::V_SUB_F64: {
unsigned DestReg = MI->getOperand(0).getReg();
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), DestReg)
@@ -536,8 +609,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
.addReg(MI->getOperand(1).getReg())
.addImm(1) // SRC1 modifiers
.addReg(MI->getOperand(2).getReg())
- .addImm(0) // SRC2 modifiers
- .addImm(0) // src2
.addImm(0) // CLAMP
.addImm(0); // OMOD
MI->eraseFromParent();
@@ -555,58 +626,15 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MI->eraseFromParent();
break;
}
- case AMDGPU::FABS_SI: {
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32),
- Reg)
- .addImm(0x7fffffff);
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_AND_B32_e32),
- MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg())
- .addReg(Reg);
- MI->eraseFromParent();
- break;
- }
- case AMDGPU::FNEG_SI: {
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32),
- Reg)
- .addImm(0x80000000);
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_XOR_B32_e32),
- MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg())
- .addReg(Reg);
- MI->eraseFromParent();
- break;
- }
- case AMDGPU::FCLAMP_SI: {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F32_e64),
- MI->getOperand(0).getReg())
- .addImm(0) // SRC0 modifiers
- .addOperand(MI->getOperand(1))
- .addImm(0) // SRC1 modifiers
- .addImm(0) // SRC1
- .addImm(1) // CLAMP
- .addImm(0); // OMOD
- MI->eraseFromParent();
- }
}
return BB;
}
-EVT SITargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT SITargetLowering::getSetCCResultType(LLVMContext &Ctx, EVT VT) const {
if (!VT.isVector()) {
return MVT::i1;
}
- return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
}
MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
@@ -636,8 +664,6 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
//===----------------------------------------------------------------------===//
SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
- MachineFunction &MF = DAG.getMachineFunction();
- SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
@@ -656,114 +682,13 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::FDIV: return LowerFDIV(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
- case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
- case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntrinsicID =
- cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
- switch (IntrinsicID) {
- default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
- case Intrinsic::r600_read_ngroups_x:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0, false);
- case Intrinsic::r600_read_ngroups_y:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4, false);
- case Intrinsic::r600_read_ngroups_z:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8, false);
- case Intrinsic::r600_read_global_size_x:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12, false);
- case Intrinsic::r600_read_global_size_y:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16, false);
- case Intrinsic::r600_read_global_size_z:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20, false);
- case Intrinsic::r600_read_local_size_x:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24, false);
- case Intrinsic::r600_read_local_size_y:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28, false);
- case Intrinsic::r600_read_local_size_z:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32, false);
- case Intrinsic::r600_read_tgid_x:
- return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
- AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 0), VT);
- case Intrinsic::r600_read_tgid_y:
- return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
- AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 1), VT);
- case Intrinsic::r600_read_tgid_z:
- return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
- AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2), VT);
- case Intrinsic::r600_read_tidig_x:
- return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
- AMDGPU::VGPR0, VT);
- case Intrinsic::r600_read_tidig_y:
- return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
- AMDGPU::VGPR1, VT);
- case Intrinsic::r600_read_tidig_z:
- return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
- AMDGPU::VGPR2, VT);
- case AMDGPUIntrinsic::SI_load_const: {
- SDValue Ops [] = {
- Op.getOperand(1),
- Op.getOperand(2)
- };
-
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(),
- MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant,
- VT.getSizeInBits() / 8, 4);
- return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
- Op->getVTList(), Ops, VT, MMO);
- }
- case AMDGPUIntrinsic::SI_sample:
- return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG);
- case AMDGPUIntrinsic::SI_sampleb:
- return LowerSampleIntrinsic(AMDGPUISD::SAMPLEB, Op, DAG);
- case AMDGPUIntrinsic::SI_sampled:
- return LowerSampleIntrinsic(AMDGPUISD::SAMPLED, Op, DAG);
- case AMDGPUIntrinsic::SI_samplel:
- return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
- case AMDGPUIntrinsic::SI_vs_load_input:
- return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
- Op.getOperand(1),
- Op.getOperand(2),
- Op.getOperand(3));
- }
+ case ISD::GlobalAddress: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ return LowerGlobalAddress(MFI, Op, DAG);
}
-
- case ISD::INTRINSIC_VOID:
- SDValue Chain = Op.getOperand(0);
- unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-
- switch (IntrinsicID) {
- case AMDGPUIntrinsic::SI_tbuffer_store: {
- SDLoc DL(Op);
- SDValue Ops [] = {
- Chain,
- Op.getOperand(2),
- Op.getOperand(3),
- Op.getOperand(4),
- Op.getOperand(5),
- Op.getOperand(6),
- Op.getOperand(7),
- Op.getOperand(8),
- Op.getOperand(9),
- Op.getOperand(10),
- Op.getOperand(11),
- Op.getOperand(12),
- Op.getOperand(13),
- Op.getOperand(14)
- };
- EVT VT = Op.getOperand(3).getValueType();
-
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(),
- MachineMemOperand::MOStore,
- VT.getSizeInBits() / 8, 4);
- return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
- Op->getVTList(), Ops, VT, MMO);
- }
- default:
- break;
- }
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
}
return SDValue();
}
@@ -786,16 +711,9 @@ static SDNode *findUser(SDValue Value, unsigned Opcode) {
SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
- MachineFunction &MF = DAG.getMachineFunction();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- const SIRegisterInfo &TRI = TII->getRegisterInfo();
FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op);
unsigned FrameIndex = FINode->getIndex();
- CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
- TRI.getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET), MVT::i32);
-
return DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
}
@@ -849,7 +767,9 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
BR->getOperand(0),
BRCOND.getOperand(2)
};
- DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops);
+ SDValue NewBR = DAG.getNode(ISD::BR, DL, BR->getVTList(), Ops);
+ DAG.ReplaceAllUsesWith(BR, NewBR.getNode());
+ BR = NewBR.getNode();
}
SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
@@ -905,6 +825,139 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
}
+SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo*>(MF.getSubtarget().getRegisterInfo());
+
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+
+ switch (IntrinsicID) {
+ case Intrinsic::r600_read_ngroups_x:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::NGROUPS_X, false);
+ case Intrinsic::r600_read_ngroups_y:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::NGROUPS_Y, false);
+ case Intrinsic::r600_read_ngroups_z:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::NGROUPS_Z, false);
+ case Intrinsic::r600_read_global_size_x:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::GLOBAL_SIZE_X, false);
+ case Intrinsic::r600_read_global_size_y:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::GLOBAL_SIZE_Y, false);
+ case Intrinsic::r600_read_global_size_z:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
+ case Intrinsic::r600_read_local_size_x:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::LOCAL_SIZE_X, false);
+ case Intrinsic::r600_read_local_size_y:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::LOCAL_SIZE_Y, false);
+ case Intrinsic::r600_read_local_size_z:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::LOCAL_SIZE_Z, false);
+
+ case Intrinsic::AMDGPU_read_workdim:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset,
+ false);
+
+ case Intrinsic::r600_read_tgid_x:
+ return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
+ TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT);
+ case Intrinsic::r600_read_tgid_y:
+ return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
+ TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Y), VT);
+ case Intrinsic::r600_read_tgid_z:
+ return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
+ TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Z), VT);
+ case Intrinsic::r600_read_tidig_x:
+ return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
+ TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_X), VT);
+ case Intrinsic::r600_read_tidig_y:
+ return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
+ TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Y), VT);
+ case Intrinsic::r600_read_tidig_z:
+ return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
+ TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Z), VT);
+ case AMDGPUIntrinsic::SI_load_const: {
+ SDValue Ops[] = {
+ Op.getOperand(1),
+ Op.getOperand(2)
+ };
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant,
+ VT.getStoreSize(), 4);
+ return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
+ Op->getVTList(), Ops, VT, MMO);
+ }
+ case AMDGPUIntrinsic::SI_sample:
+ return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG);
+ case AMDGPUIntrinsic::SI_sampleb:
+ return LowerSampleIntrinsic(AMDGPUISD::SAMPLEB, Op, DAG);
+ case AMDGPUIntrinsic::SI_sampled:
+ return LowerSampleIntrinsic(AMDGPUISD::SAMPLED, Op, DAG);
+ case AMDGPUIntrinsic::SI_samplel:
+ return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
+ case AMDGPUIntrinsic::SI_vs_load_input:
+ return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
+ Op.getOperand(1),
+ Op.getOperand(2),
+ Op.getOperand(3));
+ default:
+ return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ }
+}
+
+SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SDValue Chain = Op.getOperand(0);
+ unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+
+ switch (IntrinsicID) {
+ case AMDGPUIntrinsic::SI_tbuffer_store: {
+ SDLoc DL(Op);
+ SDValue Ops[] = {
+ Chain,
+ Op.getOperand(2),
+ Op.getOperand(3),
+ Op.getOperand(4),
+ Op.getOperand(5),
+ Op.getOperand(6),
+ Op.getOperand(7),
+ Op.getOperand(8),
+ Op.getOperand(9),
+ Op.getOperand(10),
+ Op.getOperand(11),
+ Op.getOperand(12),
+ Op.getOperand(13),
+ Op.getOperand(14)
+ };
+
+ EVT VT = Op.getOperand(3).getValueType();
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOStore,
+ VT.getStoreSize(), 4);
+ return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
+ Op->getVTList(), Ops, VT, MMO);
+ }
+ default:
+ return SDValue();
+ }
+}
+
SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
LoadSDNode *Load = cast<LoadSDNode>(Op);
@@ -923,7 +976,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
break;
// fall-through
case AMDGPUAS::LOCAL_ADDRESS:
- return SplitVectorLoad(Op, DAG);
+ return ScalarizeVectorLoad(Op, DAG);
}
}
@@ -1027,7 +1080,7 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
const APFloat K1Val(BitsToFloat(0x2f800000));
const SDValue K1 = DAG.getConstantFP(K1Val, MVT::f32);
- const SDValue One = DAG.getTargetConstantFP(1.0, MVT::f32);
+ const SDValue One = DAG.getConstantFP(1.0, MVT::f32);
EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f32);
@@ -1073,7 +1126,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
if (VT.isVector() && VT.getVectorNumElements() > 4)
- return SplitVectorStore(Op, DAG);
+ return ScalarizeVectorStore(Op, DAG);
return SDValue();
}
@@ -1082,7 +1135,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return Ret;
if (VT.isVector() && VT.getVectorNumElements() >= 8)
- return SplitVectorStore(Op, DAG);
+ return ScalarizeVectorStore(Op, DAG);
if (VT == MVT::i1)
return DAG.getTruncStore(Store->getChain(), DL,
@@ -1114,7 +1167,7 @@ SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
//===----------------------------------------------------------------------===//
SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
- DAGCombinerInfo &DCI) {
+ DAGCombinerInfo &DCI) const {
EVT VT = N->getValueType(0);
EVT ScalarVT = VT.getScalarType();
if (ScalarVT != MVT::f32)
@@ -1162,8 +1215,21 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
EVT LoadVT = getEquivalentMemType(*DAG.getContext(), SrcVT);
EVT RegVT = getEquivalentLoadRegType(*DAG.getContext(), SrcVT);
EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f32, NElts);
-
LoadSDNode *Load = cast<LoadSDNode>(Src);
+
+ unsigned AS = Load->getAddressSpace();
+ unsigned Align = Load->getAlignment();
+ Type *Ty = LoadVT.getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
+
+ // Don't try to replace the load if we have to expand it due to alignment
+ // problems. Otherwise we will end up scalarizing the load, and trying to
+ // repack into the vector for no real reason.
+ if (Align < ABIAlignment &&
+ !allowsMisalignedMemoryAccesses(LoadVT, AS, Align, nullptr)) {
+ return SDValue();
+ }
+
SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, RegVT,
Load->getChain(),
Load->getBasePtr(),
@@ -1203,33 +1269,259 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
return SDValue();
}
+// (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2)
+
+// This is a variant of
+// (mul (add x, c1), c2) -> add (mul x, c2), (mul c1, c2),
+//
+// The normal DAG combiner will do this, but only if the add has one use since
+// that would increase the number of instructions.
+//
+// This prevents us from seeing a constant offset that can be folded into a
+// memory instruction's addressing mode. If we know the resulting add offset of
+// a pointer can be folded into an addressing offset, we can replace the pointer
+// operand with the add of new constant offset. This eliminates one of the uses,
+// and may allow the remaining use to also be simplified.
+//
+SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
+ unsigned AddrSpace,
+ DAGCombinerInfo &DCI) const {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ if (N0.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ const ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N1);
+ if (!CN1)
+ return SDValue();
+
+ const ConstantSDNode *CAdd = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!CAdd)
+ return SDValue();
+
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
+
+ // If the resulting offset is too large, we can't fold it into the addressing
+ // mode offset.
+ APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue();
+ if (!TII->canFoldOffset(Offset.getZExtValue(), AddrSpace))
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc SL(N);
+ EVT VT = N->getValueType(0);
+
+ SDValue ShlX = DAG.getNode(ISD::SHL, SL, VT, N0.getOperand(0), N1);
+ SDValue COffset = DAG.getConstant(Offset, MVT::i32);
+
+ return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset);
+}
+
+SDValue SITargetLowering::performAndCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+
+ // (and (fcmp ord x, x), (fcmp une (fabs x), inf)) ->
+ // fp_class x, ~(s_nan | q_nan | n_infinity | p_infinity)
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ if (LHS.getOpcode() == ISD::SETCC &&
+ RHS.getOpcode() == ISD::SETCC) {
+ ISD::CondCode LCC = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
+ ISD::CondCode RCC = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
+
+ SDValue X = LHS.getOperand(0);
+ SDValue Y = RHS.getOperand(0);
+ if (Y.getOpcode() != ISD::FABS || Y.getOperand(0) != X)
+ return SDValue();
+
+ if (LCC == ISD::SETO) {
+ if (X != LHS.getOperand(1))
+ return SDValue();
+
+ if (RCC == ISD::SETUNE) {
+ const ConstantFPSDNode *C1 = dyn_cast<ConstantFPSDNode>(RHS.getOperand(1));
+ if (!C1 || !C1->isInfinity() || C1->isNegative())
+ return SDValue();
+
+ const uint32_t Mask = SIInstrFlags::N_NORMAL |
+ SIInstrFlags::N_SUBNORMAL |
+ SIInstrFlags::N_ZERO |
+ SIInstrFlags::P_ZERO |
+ SIInstrFlags::P_SUBNORMAL |
+ SIInstrFlags::P_NORMAL;
+
+ static_assert(((~(SIInstrFlags::S_NAN |
+ SIInstrFlags::Q_NAN |
+ SIInstrFlags::N_INFINITY |
+ SIInstrFlags::P_INFINITY)) & 0x3ff) == Mask,
+ "mask not equal");
+
+ return DAG.getNode(AMDGPUISD::FP_CLASS, SDLoc(N), MVT::i1,
+ X, DAG.getConstant(Mask, MVT::i32));
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue SITargetLowering::performOrCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // or (fp_class x, c1), (fp_class x, c2) -> fp_class x, (c1 | c2)
+ if (LHS.getOpcode() == AMDGPUISD::FP_CLASS &&
+ RHS.getOpcode() == AMDGPUISD::FP_CLASS) {
+ SDValue Src = LHS.getOperand(0);
+ if (Src != RHS.getOperand(0))
+ return SDValue();
+
+ const ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
+ const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
+ if (!CLHS || !CRHS)
+ return SDValue();
+
+ // Only 10 bits are used.
+ static const uint32_t MaxMask = 0x3ff;
+
+ uint32_t NewMask = (CLHS->getZExtValue() | CRHS->getZExtValue()) & MaxMask;
+ return DAG.getNode(AMDGPUISD::FP_CLASS, SDLoc(N), MVT::i1,
+ Src, DAG.getConstant(NewMask, MVT::i32));
+ }
+
+ return SDValue();
+}
+
+SDValue SITargetLowering::performClassCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Mask = N->getOperand(1);
+
+ // fp_class x, 0 -> false
+ if (const ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Mask)) {
+ if (CMask->isNullValue())
+ return DAG.getConstant(0, MVT::i1);
+ }
+
+ return SDValue();
+}
+
+static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
+ switch (Opc) {
+ case ISD::FMAXNUM:
+ return AMDGPUISD::FMAX3;
+ case AMDGPUISD::SMAX:
+ return AMDGPUISD::SMAX3;
+ case AMDGPUISD::UMAX:
+ return AMDGPUISD::UMAX3;
+ case ISD::FMINNUM:
+ return AMDGPUISD::FMIN3;
+ case AMDGPUISD::SMIN:
+ return AMDGPUISD::SMIN3;
+ case AMDGPUISD::UMIN:
+ return AMDGPUISD::UMIN3;
+ default:
+ llvm_unreachable("Not a min/max opcode");
+ }
+}
+
+SDValue SITargetLowering::performMin3Max3Combine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ unsigned Opc = N->getOpcode();
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // Only do this if the inner op has one use since this will just increases
+ // register pressure for no benefit.
+
+ // max(max(a, b), c)
+ if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
+ SDLoc DL(N);
+ return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
+ DL,
+ N->getValueType(0),
+ Op0.getOperand(0),
+ Op0.getOperand(1),
+ Op1);
+ }
+
+ // max(a, max(b, c))
+ if (Op1.getOpcode() == Opc && Op1.hasOneUse()) {
+ SDLoc DL(N);
+ return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
+ DL,
+ N->getValueType(0),
+ Op0,
+ Op1.getOperand(0),
+ Op1.getOperand(1));
+ }
+
+ return SDValue();
+}
+
+SDValue SITargetLowering::performSetCCCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc SL(N);
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ EVT VT = LHS.getValueType();
+
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return SDValue();
+
+ // Match isinf pattern
+ // (fcmp oeq (fabs x), inf) -> (fp_class x, (p_infinity | n_infinity))
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ if (CC == ISD::SETOEQ && LHS.getOpcode() == ISD::FABS) {
+ const ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
+ if (!CRHS)
+ return SDValue();
+
+ const APFloat &APF = CRHS->getValueAPF();
+ if (APF.isInfinity() && !APF.isNegative()) {
+ unsigned Mask = SIInstrFlags::P_INFINITY | SIInstrFlags::N_INFINITY;
+ return DAG.getNode(AMDGPUISD::FP_CLASS, SL, MVT::i1,
+ LHS.getOperand(0), DAG.getConstant(Mask, MVT::i32));
+ }
+ }
+
+ return SDValue();
+}
+
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
- EVT VT = N->getValueType(0);
switch (N->getOpcode()) {
- default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
- case ISD::SETCC: {
- SDValue Arg0 = N->getOperand(0);
- SDValue Arg1 = N->getOperand(1);
- SDValue CC = N->getOperand(2);
- ConstantSDNode * C = nullptr;
- ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get();
-
- // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
- if (VT == MVT::i1
- && Arg0.getOpcode() == ISD::SIGN_EXTEND
- && Arg0.getOperand(0).getValueType() == MVT::i1
- && (C = dyn_cast<ConstantSDNode>(Arg1))
- && C->isNullValue()
- && CCOp == ISD::SETNE) {
- return SimplifySetCC(VT, Arg0.getOperand(0),
- DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL);
- }
- break;
- }
+ default:
+ return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
+ case ISD::SETCC:
+ return performSetCCCombine(N, DCI);
+ case ISD::FMAXNUM: // TODO: What about fmax_legacy?
+ case ISD::FMINNUM:
+ case AMDGPUISD::SMAX:
+ case AMDGPUISD::SMIN:
+ case AMDGPUISD::UMAX:
+ case AMDGPUISD::UMIN: {
+ if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
+ getTargetMachine().getOptLevel() > CodeGenOpt::None)
+ return performMin3Max3Combine(N, DCI);
+ break;
+ }
case AMDGPUISD::CVT_F32_UBYTE0:
case AMDGPUISD::CVT_F32_UBYTE1:
@@ -1254,22 +1546,155 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case ISD::UINT_TO_FP: {
return performUCharToFloatCombine(N, DCI);
+
+ case ISD::FADD: {
+ if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
+ break;
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::f32)
+ break;
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // These should really be instruction patterns, but writing patterns with
+ // source modiifiers is a pain.
+
+ // fadd (fadd (a, a), b) -> mad 2.0, a, b
+ if (LHS.getOpcode() == ISD::FADD) {
+ SDValue A = LHS.getOperand(0);
+ if (A == LHS.getOperand(1)) {
+ const SDValue Two = DAG.getConstantFP(2.0, MVT::f32);
+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, Two, A, RHS);
+ }
+ }
+
+ // fadd (b, fadd (a, a)) -> mad 2.0, a, b
+ if (RHS.getOpcode() == ISD::FADD) {
+ SDValue A = RHS.getOperand(0);
+ if (A == RHS.getOperand(1)) {
+ const SDValue Two = DAG.getConstantFP(2.0, MVT::f32);
+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, Two, A, LHS);
+ }
+ }
+
+ break;
+ }
+ case ISD::FSUB: {
+ if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
+ break;
+
+ EVT VT = N->getValueType(0);
+
+ // Try to get the fneg to fold into the source modifier. This undoes generic
+ // DAG combines and folds them into the mad.
+ if (VT == MVT::f32) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ if (LHS.getOpcode() == ISD::FMUL) {
+ // (fsub (fmul a, b), c) -> mad a, b, (fneg c)
+
+ SDValue A = LHS.getOperand(0);
+ SDValue B = LHS.getOperand(1);
+ SDValue C = DAG.getNode(ISD::FNEG, DL, VT, RHS);
+
+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, A, B, C);
+ }
+
+ if (RHS.getOpcode() == ISD::FMUL) {
+ // (fsub c, (fmul a, b)) -> mad (fneg a), b, c
+
+ SDValue A = DAG.getNode(ISD::FNEG, DL, VT, RHS.getOperand(0));
+ SDValue B = RHS.getOperand(1);
+ SDValue C = LHS;
+
+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, A, B, C);
+ }
+
+ if (LHS.getOpcode() == ISD::FADD) {
+ // (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c)
+
+ SDValue A = LHS.getOperand(0);
+ if (A == LHS.getOperand(1)) {
+ const SDValue Two = DAG.getConstantFP(2.0, MVT::f32);
+ SDValue NegRHS = DAG.getNode(ISD::FNEG, DL, VT, RHS);
+
+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, Two, A, NegRHS);
+ }
+ }
+
+ if (RHS.getOpcode() == ISD::FADD) {
+ // (fsub c, (fadd a, a)) -> mad -2.0, a, c
+
+ SDValue A = RHS.getOperand(0);
+ if (A == RHS.getOperand(1)) {
+ const SDValue NegTwo = DAG.getConstantFP(-2.0, MVT::f32);
+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, NegTwo, A, LHS);
+ }
+ }
+ }
+
+ break;
}
}
+ case ISD::LOAD:
+ case ISD::STORE:
+ case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_STORE:
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX: { // TODO: Target mem intrinsics.
+ if (DCI.isBeforeLegalize())
+ break;
+
+ MemSDNode *MemNode = cast<MemSDNode>(N);
+ SDValue Ptr = MemNode->getBasePtr();
+ // TODO: We could also do this for multiplies.
+ unsigned AS = MemNode->getAddressSpace();
+ if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) {
+ SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI);
+ if (NewPtr) {
+ SmallVector<SDValue, 8> NewOps;
+ for (unsigned I = 0, E = MemNode->getNumOperands(); I != E; ++I)
+ NewOps.push_back(MemNode->getOperand(I));
+
+ NewOps[N->getOpcode() == ISD::STORE ? 2 : 1] = NewPtr;
+ return SDValue(DAG.UpdateNodeOperands(MemNode, NewOps), 0);
+ }
+ }
+ break;
+ }
+ case ISD::AND:
+ return performAndCombine(N, DCI);
+ case ISD::OR:
+ return performOrCombine(N, DCI);
+ case AMDGPUISD::FP_CLASS:
+ return performClassCombine(N, DCI);
+ }
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
}
/// \brief Test if RegClass is one of the VSrc classes
static bool isVSrc(unsigned RegClass) {
- return AMDGPU::VSrc_32RegClassID == RegClass ||
- AMDGPU::VSrc_64RegClassID == RegClass;
-}
-
-/// \brief Test if RegClass is one of the SSrc classes
-static bool isSSrc(unsigned RegClass) {
- return AMDGPU::SSrc_32RegClassID == RegClass ||
- AMDGPU::SSrc_64RegClassID == RegClass;
+ switch(RegClass) {
+ default: return false;
+ case AMDGPU::VS_32RegClassID:
+ case AMDGPU::VS_64RegClassID:
+ return true;
+ }
}
/// \brief Analyze the possible immediate value Op
@@ -1278,75 +1703,36 @@ static bool isSSrc(unsigned RegClass) {
/// and the immediate value if it's a literal immediate
int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
- union {
- int32_t I;
- float F;
- } Imm;
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) {
- if (Node->getZExtValue() >> 32) {
- return -1;
- }
- Imm.I = Node->getSExtValue();
- } else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N)) {
- if (N->getValueType(0) != MVT::f32)
+ if (Node->getZExtValue() >> 32)
return -1;
- Imm.F = Node->getValueAPF().convertToFloat();
- } else
- return -1; // It isn't an immediate
-
- if ((Imm.I >= -16 && Imm.I <= 64) ||
- Imm.F == 0.5f || Imm.F == -0.5f ||
- Imm.F == 1.0f || Imm.F == -1.0f ||
- Imm.F == 2.0f || Imm.F == -2.0f ||
- Imm.F == 4.0f || Imm.F == -4.0f)
- return 0; // It's an inline immediate
-
- return Imm.I; // It's a literal immediate
-}
-
-/// \brief Try to fold an immediate directly into an instruction
-bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate,
- bool &ScalarSlotUsed) const {
-
- MachineSDNode *Mov = dyn_cast<MachineSDNode>(Operand);
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- if (!Mov || !TII->isMov(Mov->getMachineOpcode()))
- return false;
- const SDValue &Op = Mov->getOperand(0);
- int32_t Value = analyzeImmediate(Op.getNode());
- if (Value == -1) {
- // Not an immediate at all
- return false;
+ if (TII->isInlineConstant(Node->getAPIntValue()))
+ return 0;
- } else if (Value == 0) {
- // Inline immediates can always be fold
- Operand = Op;
- return true;
+ return Node->getZExtValue();
+ }
- } else if (Value == Immediate) {
- // Already fold literal immediate
- Operand = Op;
- return true;
+ if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N)) {
+ if (TII->isInlineConstant(Node->getValueAPF().bitcastToAPInt()))
+ return 0;
- } else if (!ScalarSlotUsed && !Immediate) {
- // Fold this literal immediate
- ScalarSlotUsed = true;
- Immediate = Value;
- Operand = Op;
- return true;
+ if (Node->getValueType(0) == MVT::f32)
+ return FloatToBits(Node->getValueAPF().convertToFloat());
+ return -1;
}
- return false;
+ return -1;
}
const TargetRegisterClass *SITargetLowering::getRegClassForNode(
SelectionDAG &DAG, const SDValue &Op) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
const SIRegisterInfo &TRI = TII->getRegisterInfo();
if (!Op->isMachineOpcode()) {
@@ -1375,10 +1761,9 @@ const TargetRegisterClass *SITargetLowering::getRegClassForNode(
// If the COPY_TO_REGCLASS instruction is copying to a VSrc register
// class, then the register class for the value could be either a
// VReg or and SReg. In order to get a more accurate
- if (OpClassID == AMDGPU::VSrc_32RegClassID ||
- OpClassID == AMDGPU::VSrc_64RegClassID) {
+ if (isVSrc(OpClassID))
return getRegClassForNode(DAG, Op.getOperand(0));
- }
+
return TRI.getRegClass(OpClassID);
case AMDGPU::EXTRACT_SUBREG: {
int SubIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
@@ -1398,7 +1783,8 @@ const TargetRegisterClass *SITargetLowering::getRegClassForNode(
/// \brief Does "Op" fit into register class "RegClass" ?
bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
unsigned RegClass) const {
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const TargetRegisterClass *RC = getRegClassForNode(DAG, Op);
if (!RC) {
return false;
@@ -1406,242 +1792,6 @@ bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
return TRI->getRegClass(RegClass)->hasSubClassEq(RC);
}
-/// \brief Make sure that we don't exeed the number of allowed scalars
-void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
- unsigned RegClass,
- bool &ScalarSlotUsed) const {
-
- // First map the operands register class to a destination class
- if (RegClass == AMDGPU::VSrc_32RegClassID)
- RegClass = AMDGPU::VReg_32RegClassID;
- else if (RegClass == AMDGPU::VSrc_64RegClassID)
- RegClass = AMDGPU::VReg_64RegClassID;
- else
- return;
-
- // Nothing to do if they fit naturally
- if (fitsRegClass(DAG, Operand, RegClass))
- return;
-
- // If the scalar slot isn't used yet use it now
- if (!ScalarSlotUsed) {
- ScalarSlotUsed = true;
- return;
- }
-
- // This is a conservative aproach. It is possible that we can't determine the
- // correct register class and copy too often, but better safe than sorry.
-
- SDNode *Node;
- // We can't use COPY_TO_REGCLASS with FrameIndex arguments.
- if (isa<FrameIndexSDNode>(Operand)) {
- unsigned Opcode = Operand.getValueType() == MVT::i32 ?
- AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
- Node = DAG.getMachineNode(Opcode, SDLoc(), Operand.getValueType(),
- Operand);
- } else {
- SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32);
- Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, SDLoc(),
- Operand.getValueType(), Operand, RC);
- }
- Operand = SDValue(Node, 0);
-}
-
-/// \returns true if \p Node's operands are different from the SDValue list
-/// \p Ops
-static bool isNodeChanged(const SDNode *Node, const std::vector<SDValue> &Ops) {
- for (unsigned i = 0, e = Node->getNumOperands(); i < e; ++i) {
- if (Ops[i].getNode() != Node->getOperand(i).getNode()) {
- return true;
- }
- }
- return false;
-}
-
-/// \brief Try to fold the Nodes operands into the Node
-SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
- SelectionDAG &DAG) const {
-
- // Original encoding (either e32 or e64)
- int Opcode = Node->getMachineOpcode();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- const MCInstrDesc *Desc = &TII->get(Opcode);
-
- unsigned NumDefs = Desc->getNumDefs();
- unsigned NumOps = Desc->getNumOperands();
-
- // Commuted opcode if available
- int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1;
- const MCInstrDesc *DescRev = OpcodeRev == -1 ? nullptr : &TII->get(OpcodeRev);
-
- assert(!DescRev || DescRev->getNumDefs() == NumDefs);
- assert(!DescRev || DescRev->getNumOperands() == NumOps);
-
- // e64 version if available, -1 otherwise
- int OpcodeE64 = AMDGPU::getVOPe64(Opcode);
- const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? nullptr : &TII->get(OpcodeE64);
- int InputModifiers[3] = {0};
-
- assert(!DescE64 || DescE64->getNumDefs() == NumDefs);
-
- int32_t Immediate = Desc->getSize() == 4 ? 0 : -1;
- bool HaveVSrc = false, HaveSSrc = false;
-
- // First figure out what we already have in this instruction.
- for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
- i != e && Op < NumOps; ++i, ++Op) {
-
- unsigned RegClass = Desc->OpInfo[Op].RegClass;
- if (isVSrc(RegClass))
- HaveVSrc = true;
- else if (isSSrc(RegClass))
- HaveSSrc = true;
- else
- continue;
-
- int32_t Imm = analyzeImmediate(Node->getOperand(i).getNode());
- if (Imm != -1 && Imm != 0) {
- // Literal immediate
- Immediate = Imm;
- }
- }
-
- // If we neither have VSrc nor SSrc, it makes no sense to continue.
- if (!HaveVSrc && !HaveSSrc)
- return Node;
-
- // No scalar allowed when we have both VSrc and SSrc
- bool ScalarSlotUsed = HaveVSrc && HaveSSrc;
-
- // Second go over the operands and try to fold them
- std::vector<SDValue> Ops;
- bool Promote2e64 = false;
- for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
- i != e && Op < NumOps; ++i, ++Op) {
-
- const SDValue &Operand = Node->getOperand(i);
- Ops.push_back(Operand);
-
- // Already folded immediate?
- if (isa<ConstantSDNode>(Operand.getNode()) ||
- isa<ConstantFPSDNode>(Operand.getNode()))
- continue;
-
- // Is this a VSrc or SSrc operand?
- unsigned RegClass = Desc->OpInfo[Op].RegClass;
- if (isVSrc(RegClass) || isSSrc(RegClass)) {
- // Try to fold the immediates
- if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
- // Folding didn't work, make sure we don't hit the SReg limit.
- ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
- }
- continue;
- } else {
- // If it's not a VSrc or SSrc operand check if we have a GlobalAddress.
- // These will be lowered to immediates, so we will need to insert a MOV.
- if (isa<GlobalAddressSDNode>(Ops[i])) {
- SDNode *Node = DAG.getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(),
- Operand.getValueType(), Operand);
- Ops[i] = SDValue(Node, 0);
- }
- }
-
- if (i == 1 && DescRev && fitsRegClass(DAG, Ops[0], RegClass)) {
-
- unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass;
- assert(isVSrc(OtherRegClass) || isSSrc(OtherRegClass));
-
- // Test if it makes sense to swap operands
- if (foldImm(Ops[1], Immediate, ScalarSlotUsed) ||
- (!fitsRegClass(DAG, Ops[1], RegClass) &&
- fitsRegClass(DAG, Ops[1], OtherRegClass))) {
-
- // Swap commutable operands
- std::swap(Ops[0], Ops[1]);
-
- Desc = DescRev;
- DescRev = nullptr;
- continue;
- }
- }
-
- if (Immediate)
- continue;
-
- if (DescE64) {
- // Test if it makes sense to switch to e64 encoding
- unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass;
- if (!isVSrc(OtherRegClass) && !isSSrc(OtherRegClass))
- continue;
-
- int32_t TmpImm = -1;
- if (foldImm(Ops[i], TmpImm, ScalarSlotUsed) ||
- (!fitsRegClass(DAG, Ops[i], RegClass) &&
- fitsRegClass(DAG, Ops[1], OtherRegClass))) {
-
- // Switch to e64 encoding
- Immediate = -1;
- Promote2e64 = true;
- Desc = DescE64;
- DescE64 = nullptr;
- }
- }
-
- if (!DescE64 && !Promote2e64)
- continue;
- if (!Operand.isMachineOpcode())
- continue;
- if (Operand.getMachineOpcode() == AMDGPU::FNEG_SI) {
- Ops.pop_back();
- Ops.push_back(Operand.getOperand(0));
- InputModifiers[i] = 1;
- Promote2e64 = true;
- if (!DescE64)
- continue;
- Desc = DescE64;
- DescE64 = nullptr;
- }
- else if (Operand.getMachineOpcode() == AMDGPU::FABS_SI) {
- Ops.pop_back();
- Ops.push_back(Operand.getOperand(0));
- InputModifiers[i] = 2;
- Promote2e64 = true;
- if (!DescE64)
- continue;
- Desc = DescE64;
- DescE64 = nullptr;
- }
- }
-
- if (Promote2e64) {
- std::vector<SDValue> OldOps(Ops);
- Ops.clear();
- for (unsigned i = 0; i < OldOps.size(); ++i) {
- // src_modifier
- Ops.push_back(DAG.getTargetConstant(InputModifiers[i], MVT::i32));
- Ops.push_back(OldOps[i]);
- }
- // Add the modifier flags while promoting
- for (unsigned i = 0; i < 2; ++i)
- Ops.push_back(DAG.getTargetConstant(0, MVT::i32));
- }
-
- // Add optional chain and glue
- for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i)
- Ops.push_back(Node->getOperand(i));
-
- // Nodes that have a glue result are not CSE'd by getMachineNode(), so in
- // this case a brand new node is always be created, even if the operands
- // are the same as before. So, manually check if anything has been changed.
- if (Desc->Opcode == Opcode && !isNodeChanged(Node, Ops)) {
- return Node;
- }
-
- // Create a complete new instruction
- return DAG.getMachineNode(Desc->Opcode, SDLoc(Node), Node->getVTList(), Ops);
-}
-
/// \brief Helper function for adjustWritemask
static unsigned SubIdx2Lane(unsigned Idx) {
switch (Idx) {
@@ -1706,7 +1856,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
// If we only got one lane, replace it with a copy
// (if NewDmask has only one bit set...)
if (NewDmask && (NewDmask & (NewDmask-1)) == 0) {
- SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32);
+ SDValue RC = DAG.getTargetConstant(AMDGPU::VGPR_32RegClassID, MVT::i32);
SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
SDLoc(), Users[Lane]->getValueType(0),
SDValue(Node, 0), RC);
@@ -1733,46 +1883,185 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
}
}
+/// \brief Legalize target independent instructions (e.g. INSERT_SUBREG)
+/// with frame index operands.
+/// LLVM assumes that inputs are to these instructions are registers.
+void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
+ SelectionDAG &DAG) const {
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
+ if (!isa<FrameIndexSDNode>(Node->getOperand(i))) {
+ Ops.push_back(Node->getOperand(i));
+ continue;
+ }
+
+ SDLoc DL(Node);
+ Ops.push_back(SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL,
+ Node->getOperand(i).getValueType(),
+ Node->getOperand(i)), 0));
+ }
+
+ DAG.UpdateNodeOperands(Node, Ops);
+}
+
/// \brief Fold the instructions after selecting them.
SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
SelectionDAG &DAG) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
Node = AdjustRegClass(Node, DAG);
if (TII->isMIMG(Node->getMachineOpcode()))
adjustWritemask(Node, DAG);
- return foldOperands(Node, DAG);
+ if (Node->getMachineOpcode() == AMDGPU::INSERT_SUBREG ||
+ Node->getMachineOpcode() == AMDGPU::REG_SEQUENCE) {
+ legalizeTargetIndependentNode(Node, DAG);
+ return Node;
+ }
+ return Node;
}
/// \brief Assign the register class depending on the number of
/// bits set in the writemask
void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- if (!TII->isMIMG(MI->getOpcode()))
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
+
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ TII->legalizeOperands(MI);
+
+ if (TII->isMIMG(MI->getOpcode())) {
+ unsigned VReg = MI->getOperand(0).getReg();
+ unsigned Writemask = MI->getOperand(1).getImm();
+ unsigned BitsSet = 0;
+ for (unsigned i = 0; i < 4; ++i)
+ BitsSet += Writemask & (1 << i) ? 1 : 0;
+
+ const TargetRegisterClass *RC;
+ switch (BitsSet) {
+ default: return;
+ case 1: RC = &AMDGPU::VGPR_32RegClass; break;
+ case 2: RC = &AMDGPU::VReg_64RegClass; break;
+ case 3: RC = &AMDGPU::VReg_96RegClass; break;
+ }
+
+ unsigned NewOpcode = TII->getMaskedMIMGOp(MI->getOpcode(), BitsSet);
+ MI->setDesc(TII->get(NewOpcode));
+ MRI.setRegClass(VReg, RC);
return;
+ }
- unsigned VReg = MI->getOperand(0).getReg();
- unsigned Writemask = MI->getOperand(1).getImm();
- unsigned BitsSet = 0;
- for (unsigned i = 0; i < 4; ++i)
- BitsSet += Writemask & (1 << i) ? 1 : 0;
+ // Replace unused atomics with the no return version.
+ int NoRetAtomicOp = AMDGPU::getAtomicNoRetOp(MI->getOpcode());
+ if (NoRetAtomicOp != -1) {
+ if (!Node->hasAnyUseOfValue(0)) {
+ MI->setDesc(TII->get(NoRetAtomicOp));
+ MI->RemoveOperand(0);
+ }
- const TargetRegisterClass *RC;
- switch (BitsSet) {
- default: return;
- case 1: RC = &AMDGPU::VReg_32RegClass; break;
- case 2: RC = &AMDGPU::VReg_64RegClass; break;
- case 3: RC = &AMDGPU::VReg_96RegClass; break;
+ return;
}
+}
- unsigned NewOpcode = TII->getMaskedMIMGOp(MI->getOpcode(), BitsSet);
- MI->setDesc(TII->get(NewOpcode));
- MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
- MRI.setRegClass(VReg, RC);
+static SDValue buildSMovImm32(SelectionDAG &DAG, SDLoc DL, uint64_t Val) {
+ SDValue K = DAG.getTargetConstant(Val, MVT::i32);
+ return SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, K), 0);
+}
+
+MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
+ SDLoc DL,
+ SDValue Ptr) const {
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
+#if 1
+ // XXX - Workaround for moveToVALU not handling different register class
+ // inserts for REG_SEQUENCE.
+
+ // Build the half of the subregister with the constants.
+ const SDValue Ops0[] = {
+ DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, MVT::i32),
+ buildSMovImm32(DAG, DL, 0),
+ DAG.getTargetConstant(AMDGPU::sub0, MVT::i32),
+ buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
+ DAG.getTargetConstant(AMDGPU::sub1, MVT::i32)
+ };
+
+ SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL,
+ MVT::v2i32, Ops0), 0);
+
+ // Combine the constants and the pointer.
+ const SDValue Ops1[] = {
+ DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32),
+ Ptr,
+ DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32),
+ SubRegHi,
+ DAG.getTargetConstant(AMDGPU::sub2_sub3, MVT::i32)
+ };
+
+ return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1);
+#else
+ const SDValue Ops[] = {
+ DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32),
+ Ptr,
+ DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32),
+ buildSMovImm32(DAG, DL, 0),
+ DAG.getTargetConstant(AMDGPU::sub2, MVT::i32),
+ buildSMovImm32(DAG, DL, TII->getDefaultRsrcFormat() >> 32),
+ DAG.getTargetConstant(AMDGPU::sub3, MVT::i32)
+ };
+
+ return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
+
+#endif
+}
+
+/// \brief Return a resource descriptor with the 'Add TID' bit enabled
+/// The TID (Thread ID) is multipled by the stride value (bits [61:48]
+/// of the resource descriptor) to create an offset, which is added to the
+/// resource ponter.
+MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
+ SDLoc DL,
+ SDValue Ptr,
+ uint32_t RsrcDword1,
+ uint64_t RsrcDword2And3) const {
+ SDValue PtrLo = DAG.getTargetExtractSubreg(AMDGPU::sub0, DL, MVT::i32, Ptr);
+ SDValue PtrHi = DAG.getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr);
+ if (RsrcDword1) {
+ PtrHi = SDValue(DAG.getMachineNode(AMDGPU::S_OR_B32, DL, MVT::i32, PtrHi,
+ DAG.getConstant(RsrcDword1, MVT::i32)), 0);
+ }
+
+ SDValue DataLo = buildSMovImm32(DAG, DL,
+ RsrcDword2And3 & UINT64_C(0xFFFFFFFF));
+ SDValue DataHi = buildSMovImm32(DAG, DL, RsrcDword2And3 >> 32);
+
+ const SDValue Ops[] = {
+ DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32),
+ PtrLo,
+ DAG.getTargetConstant(AMDGPU::sub0, MVT::i32),
+ PtrHi,
+ DAG.getTargetConstant(AMDGPU::sub1, MVT::i32),
+ DataLo,
+ DAG.getTargetConstant(AMDGPU::sub2, MVT::i32),
+ DataHi,
+ DAG.getTargetConstant(AMDGPU::sub3, MVT::i32)
+ };
+
+ return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
+}
+
+MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,
+ SDLoc DL,
+ SDValue Ptr) const {
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
+ uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE |
+ 0xffffffff; // Size
+
+ return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
}
MachineSDNode *SITargetLowering::AdjustRegClass(MachineSDNode *N,
@@ -1800,12 +2089,26 @@ MachineSDNode *SITargetLowering::AdjustRegClass(MachineSDNode *N,
return N;
}
ConstantSDNode *Offset = cast<ConstantSDNode>(N->getOperand(1));
- SDValue Ops[] = {
- SDValue(DAG.getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::i128,
- DAG.getConstant(0, MVT::i64)), 0),
- N->getOperand(0),
- DAG.getConstant(Offset->getSExtValue() << 2, MVT::i32)
- };
+
+ const SDValue Zero64 = DAG.getTargetConstant(0, MVT::i64);
+ SDValue Ptr(DAG.getMachineNode(AMDGPU::S_MOV_B64, DL, MVT::i64, Zero64), 0);
+ MachineSDNode *RSrc = wrapAddr64Rsrc(DAG, DL, Ptr);
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(SDValue(RSrc, 0));
+ Ops.push_back(N->getOperand(0));
+
+ // The immediate offset is in dwords on SI and in bytes on VI.
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ Ops.push_back(DAG.getTargetConstant(Offset->getSExtValue(), MVT::i32));
+ else
+ Ops.push_back(DAG.getTargetConstant(Offset->getSExtValue() << 2, MVT::i32));
+
+ // Copy remaining operands so we keep any chain and glue nodes that follow
+ // the normal operands.
+ for (unsigned I = 2, E = N->getNumOperands(); I != E; ++I)
+ Ops.push_back(N->getOperand(I));
+
return DAG.getMachineNode(NewOpcode, DL, N->getVTList(), Ops);
}
}
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index d106d4abb187..876fd8c9f369 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SIISELLOWERING_H
-#define SIISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_R600_SIISELLOWERING_H
+#define LLVM_LIB_TARGET_R600_SIISELLOWERING_H
#include "AMDGPUISelLowering.h"
#include "SIInstrInfo.h"
@@ -27,6 +27,9 @@ class SITargetLowering : public AMDGPUTargetLowering {
SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const override;
+
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -34,30 +37,49 @@ class SITargetLowering : public AMDGPUTargetLowering {
SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool Signed) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
- bool foldImm(SDValue &Operand, int32_t &Immediate,
- bool &ScalarSlotUsed) const;
const TargetRegisterClass *getRegClassForNode(SelectionDAG &DAG,
const SDValue &Op) const;
bool fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
unsigned RegClass) const;
- void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
- unsigned RegClass, bool &ScalarSlotUsed) const;
- SDNode *foldOperands(MachineSDNode *N, SelectionDAG &DAG) const;
void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
MachineSDNode *AdjustRegClass(MachineSDNode *N, SelectionDAG &DAG) const;
- static SDValue performUCharToFloatCombine(SDNode *N,
- DAGCombinerInfo &DCI);
+ SDValue performUCharToFloatCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const;
+ SDValue performSHLPtrCombine(SDNode *N,
+ unsigned AS,
+ DAGCombinerInfo &DCI) const;
+ SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ SDValue performMin3Max3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
public:
SITargetLowering(TargetMachine &tm);
- bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS,
- bool *IsFast) const override;
+
+ bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
+ EVT /*VT*/) const override;
+
+ bool isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const override;
+
+ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
+ unsigned Align,
+ bool *IsFast) const override;
+
+ EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+ unsigned SrcAlign, bool IsMemset,
+ bool ZeroMemset,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const override;
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(EVT VT) const override;
@@ -85,8 +107,19 @@ public:
int32_t analyzeImmediate(const SDNode *N) const;
SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
unsigned Reg, EVT VT) const override;
+ void legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const;
+
+ MachineSDNode *wrapAddr64Rsrc(SelectionDAG &DAG, SDLoc DL, SDValue Ptr) const;
+ MachineSDNode *buildRSRC(SelectionDAG &DAG,
+ SDLoc DL,
+ SDValue Ptr,
+ uint32_t RsrcDword1,
+ uint64_t RsrcDword2And3) const;
+ MachineSDNode *buildScratchRSRC(SelectionDAG &DAG,
+ SDLoc DL,
+ SDValue Ptr) const;
};
} // End namespace llvm
-#endif //SIISELLOWERING_H
+#endif
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index 7dfc31bdfa01..181b11643bf3 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -17,6 +17,8 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIDefines.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -39,6 +41,12 @@ typedef union {
} Counters;
+typedef enum {
+ OTHER,
+ SMEM,
+ VMEM
+} InstType;
+
typedef Counters RegCounters[512];
typedef std::pair<unsigned, unsigned> RegInterval;
@@ -71,6 +79,9 @@ private:
/// \brief Different export instruction types seen since last wait.
unsigned ExpInstrTypesSeen;
+ /// \brief Type of the last opcode.
+ InstType LastOpcodeType;
+
/// \brief Get increment/decrement amount for this instruction.
Counters getHwCounts(MachineInstr &MI);
@@ -81,7 +92,8 @@ private:
RegInterval getRegInterval(MachineOperand &Op);
/// \brief Handle instructions async components
- void pushInstruction(MachineInstr &MI);
+ void pushInstruction(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I);
/// \brief Insert the actual wait instruction
bool insertWait(MachineBasicBlock &MBB,
@@ -174,6 +186,29 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
if (!MI.getDesc().mayStore())
return false;
+ // Check if this operand is the value being stored.
+ // Special case for DS instructions, since the address
+ // operand comes before the value operand and it may have
+ // multiple data operands.
+
+ if (TII->isDS(MI.getOpcode())) {
+ MachineOperand *Data = TII->getNamedOperand(MI, AMDGPU::OpName::data);
+ if (Data && Op.isIdenticalTo(*Data))
+ return true;
+
+ MachineOperand *Data0 = TII->getNamedOperand(MI, AMDGPU::OpName::data0);
+ if (Data0 && Op.isIdenticalTo(*Data0))
+ return true;
+
+ MachineOperand *Data1 = TII->getNamedOperand(MI, AMDGPU::OpName::data1);
+ if (Data1 && Op.isIdenticalTo(*Data1))
+ return true;
+
+ return false;
+ }
+
+ // NOTE: This assumes that the value operand is before the
+ // address operand, and that there is only one value operand.
for (MachineInstr::mop_iterator I = MI.operands_begin(),
E = MI.operands_end(); I != E; ++I) {
@@ -201,10 +236,11 @@ RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
return Result;
}
-void SIInsertWaits::pushInstruction(MachineInstr &MI) {
+void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
// Get the hardware counter increments and sum them up
- Counters Increment = getHwCounts(MI);
+ Counters Increment = getHwCounts(*I);
unsigned Sum = 0;
for (unsigned i = 0; i < 3; ++i) {
@@ -213,17 +249,42 @@ void SIInsertWaits::pushInstruction(MachineInstr &MI) {
}
// If we don't increase anything then that's it
- if (Sum == 0)
+ if (Sum == 0) {
+ LastOpcodeType = OTHER;
return;
+ }
+
+ if (TRI->ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ // Any occurence of consecutive VMEM or SMEM instructions forms a VMEM
+ // or SMEM clause, respectively.
+ //
+ // The temporary workaround is to break the clauses with S_NOP.
+ //
+ // The proper solution would be to allocate registers such that all source
+ // and destination registers don't overlap, e.g. this is illegal:
+ // r0 = load r2
+ // r2 = load r0
+ if ((LastOpcodeType == SMEM && TII->isSMRD(I->getOpcode())) ||
+ (LastOpcodeType == VMEM && Increment.Named.VM)) {
+ // Insert a NOP to break the clause.
+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
+ .addImm(0);
+ }
+
+ if (TII->isSMRD(I->getOpcode()))
+ LastOpcodeType = SMEM;
+ else if (Increment.Named.VM)
+ LastOpcodeType = VMEM;
+ }
// Remember which export instructions we have seen
if (Increment.Named.EXP) {
- ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2;
+ ExpInstrTypesSeen |= I->getOpcode() == AMDGPU::EXP ? 1 : 2;
}
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- MachineOperand &Op = MI.getOperand(i);
+ MachineOperand &Op = I->getOperand(i);
if (!isOpRelevant(Op))
continue;
@@ -300,6 +361,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
((Counts.Named.EXP & 0x7) << 4) |
((Counts.Named.LGKM & 0x7) << 8));
+ LastOpcodeType = OTHER;
return true;
}
@@ -346,13 +408,15 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
bool Changes = false;
- TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
- TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ TRI =
+ static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
MRI = &MF.getRegInfo();
WaitedOn = ZeroCounts;
LastIssued = ZeroCounts;
+ LastOpcodeType = OTHER;
memset(&UsedRegs, 0, sizeof(UsedRegs));
memset(&DefinedRegs, 0, sizeof(DefinedRegs));
@@ -364,8 +428,12 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
- Changes |= insertWait(MBB, I, handleOperands(*I));
- pushInstruction(*I);
+ // Wait for everything before a barrier.
+ if (I->getOpcode() == AMDGPU::S_BARRIER)
+ Changes |= insertWait(MBB, I, LastIssued);
+ else
+ Changes |= insertWait(MBB, I, handleOperands(*I));
+ pushInstruction(MBB, I);
}
// Wait for everything at the end of the MBB
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 00e69ddbeea4..99a1df36c1f4 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -17,24 +17,58 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
field bits<1> VM_CNT = 0;
field bits<1> EXP_CNT = 0;
field bits<1> LGKM_CNT = 0;
- field bits<1> MIMG = 0;
- field bits<1> SMRD = 0;
+
+ field bits<1> SALU = 0;
+ field bits<1> VALU = 0;
+
+ field bits<1> SOP1 = 0;
+ field bits<1> SOP2 = 0;
+ field bits<1> SOPC = 0;
+ field bits<1> SOPK = 0;
+ field bits<1> SOPP = 0;
+
field bits<1> VOP1 = 0;
field bits<1> VOP2 = 0;
field bits<1> VOP3 = 0;
field bits<1> VOPC = 0;
- field bits<1> SALU = 0;
+ field bits<1> MUBUF = 0;
+ field bits<1> MTBUF = 0;
+ field bits<1> SMRD = 0;
+ field bits<1> DS = 0;
+ field bits<1> MIMG = 0;
+ field bits<1> FLAT = 0;
+
+ // These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = VM_CNT;
let TSFlags{1} = EXP_CNT;
let TSFlags{2} = LGKM_CNT;
- let TSFlags{3} = MIMG;
- let TSFlags{4} = SMRD;
- let TSFlags{5} = VOP1;
- let TSFlags{6} = VOP2;
- let TSFlags{7} = VOP3;
- let TSFlags{8} = VOPC;
- let TSFlags{9} = SALU;
+
+ let TSFlags{3} = SALU;
+ let TSFlags{4} = VALU;
+
+ let TSFlags{5} = SOP1;
+ let TSFlags{6} = SOP2;
+ let TSFlags{7} = SOPC;
+ let TSFlags{8} = SOPK;
+ let TSFlags{9} = SOPP;
+
+ let TSFlags{10} = VOP1;
+ let TSFlags{11} = VOP2;
+ let TSFlags{12} = VOP3;
+ let TSFlags{13} = VOPC;
+
+ let TSFlags{14} = MUBUF;
+ let TSFlags{15} = MTBUF;
+ let TSFlags{16} = SMRD;
+ let TSFlags{17} = DS;
+ let TSFlags{18} = MIMG;
+ let TSFlags{19} = FLAT;
+
+ // Most instructions require adjustments after selection to satisfy
+ // operand requirements.
+ let hasPostISelHook = 1;
+ let SchedRW = [Write32Bit];
}
class Enc32 {
@@ -49,6 +83,44 @@ class Enc64 {
int Size = 8;
}
+let Uses = [EXEC] in {
+
+class VOPCCommon <dag ins, string asm, list<dag> pattern> :
+ InstSI <(outs VCCReg:$dst), ins, asm, pattern> {
+
+ let DisableEncoding = "$dst";
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let VOPC = 1;
+ let VALU = 1;
+ let Size = 4;
+}
+
+class VOP1Common <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let VOP1 = 1;
+ let VALU = 1;
+ let Size = 4;
+}
+
+class VOP2Common <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern> {
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let VOP2 = 1;
+ let VALU = 1;
+ let Size = 4;
+}
+
class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
InstSI <outs, ins, asm, pattern> {
@@ -56,11 +128,20 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
let mayStore = 0;
let hasSideEffects = 0;
let UseNamedOperandTable = 1;
+ // Using complex patterns gives VOP3 patterns a very high complexity rating,
+ // but standalone patterns are almost always prefered, so we need to adjust the
+ // priority lower. The goal is to use a high number to reduce complexity to
+ // zero (or less than zero).
+ let AddedComplexity = -1000;
+
let VOP3 = 1;
+ let VALU = 1;
int Size = 8;
}
+} // End Uses = [EXEC]
+
//===----------------------------------------------------------------------===//
// Scalar operations
//===----------------------------------------------------------------------===//
@@ -134,22 +215,26 @@ class SMRDe <bits<5> op, bits<1> imm> : Enc32 {
let Inst{31-27} = 0x18; //encoding
}
-class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI<outs, ins, asm, pattern>, SOP1e <op> {
-
+let SchedRW = [WriteSALU] in {
+class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI<outs, ins, asm, pattern> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let SALU = 1;
+ let SOP1 = 1;
}
-class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern>, SOP2e<op> {
+class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let SALU = 1;
+ let SOP2 = 1;
+
+ let UseNamedOperandTable = 1;
}
class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
@@ -160,31 +245,48 @@ class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayStore = 0;
let hasSideEffects = 0;
let SALU = 1;
+ let SOPC = 1;
+
+ let UseNamedOperandTable = 1;
}
-class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins , asm, pattern>, SOPKe<op> {
+class SOPK <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins , asm, pattern> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let SALU = 1;
+ let SOPK = 1;
+
+ let UseNamedOperandTable = 1;
}
-class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> :
+class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern = []> :
InstSI <(outs), ins, asm, pattern >, SOPPe <op> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let isCodeGenOnly = 0;
let SALU = 1;
+ let SOPP = 1;
+
+ let UseNamedOperandTable = 1;
}
-class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm,
- list<dag> pattern> : InstSI<outs, ins, asm, pattern>, SMRDe<op, imm> {
+} // let SchedRW = [WriteSALU]
+
+class SMRD <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI<outs, ins, asm, pattern> {
let LGKM_CNT = 1;
let SMRD = 1;
+ let mayStore = 0;
+ let mayLoad = 1;
+ let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let SchedRW = [WriteSMEM];
}
//===----------------------------------------------------------------------===//
@@ -410,8 +512,27 @@ class MIMGe <bits<7> op> : Enc64 {
let Inst{57-53} = SSAMP{6-2};
}
-class EXPe : Enc64 {
+class FLATe<bits<7> op> : Enc64 {
+ bits<8> addr;
+ bits<8> data;
+ bits<8> vdst;
+ bits<1> slc;
+ bits<1> glc;
+ bits<1> tfe;
+ // 15-0 is reserved.
+ let Inst{16} = glc;
+ let Inst{17} = slc;
+ let Inst{24-18} = op;
+ let Inst{31-26} = 0x37; // Encoding.
+ let Inst{39-32} = addr;
+ let Inst{47-40} = data;
+ // 54-48 is reserved.
+ let Inst{55} = tfe;
+ let Inst{63-56} = vdst;
+}
+
+class EXPe : Enc64 {
bits<4> EN;
bits<6> TGT;
bits<1> COMPR;
@@ -437,48 +558,23 @@ class EXPe : Enc64 {
let Uses = [EXEC] in {
class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern>, VOP1e<op> {
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let UseNamedOperandTable = 1;
- let VOP1 = 1;
-}
+ VOP1Common <outs, ins, asm, pattern>,
+ VOP1e<op>;
class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern>, VOP2e<op> {
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let UseNamedOperandTable = 1;
- let VOP2 = 1;
-}
-
-class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
- VOP3Common <outs, ins, asm, pattern>, VOP3e<op>;
+ VOP2Common <outs, ins, asm, pattern>, VOP2e<op>;
class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
VOP3Common <outs, ins, asm, pattern>, VOP3be<op>;
class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
- InstSI <(outs VCCReg:$dst), ins, asm, pattern>, VOPCe <op> {
+ VOPCCommon <ins, asm, pattern>, VOPCe <op>;
- let DisableEncoding = "$dst";
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let UseNamedOperandTable = 1;
- let VOPC = 1;
-}
-
-class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern>, VINTRPe<op> {
-
- let neverHasSideEffects = 1;
+class VINTRPCommon <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern> {
let mayLoad = 1;
let mayStore = 0;
+ let hasSideEffects = 0;
}
} // End Uses = [EXEC]
@@ -489,29 +585,56 @@ class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Uses = [EXEC] in {
-class DS <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern> , DSe<op> {
+class DS <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern> {
let LGKM_CNT = 1;
+ let DS = 1;
+ let UseNamedOperandTable = 1;
+ let DisableEncoding = "$m0";
+ let SchedRW = [WriteLDS];
}
-class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI<outs, ins, asm, pattern>, MUBUFe <op> {
+class DS_si <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ DS <outs, ins, asm, pattern>, DSe<op>;
+
+class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI<outs, ins, asm, pattern> {
let VM_CNT = 1;
let EXP_CNT = 1;
+ let MUBUF = 1;
- let neverHasSideEffects = 1;
+ let hasSideEffects = 0;
let UseNamedOperandTable = 1;
+ let SchedRW = [WriteVMEM];
}
-class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI<outs, ins, asm, pattern>, MTBUFe <op> {
+class MTBUF <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI<outs, ins, asm, pattern> {
let VM_CNT = 1;
let EXP_CNT = 1;
+ let MTBUF = 1;
- let neverHasSideEffects = 1;
+ let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let SchedRW = [WriteVMEM];
+}
+
+class FLAT <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI<outs, ins, asm, pattern>, FLATe <op> {
+ let FLAT = 1;
+ // Internally, FLAT instruction are executed as both an LDS and a
+ // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
+ // and are not considered done until both have been decremented.
+ let VM_CNT = 1;
+ let LGKM_CNT = 1;
+
+ let Uses = [EXEC, FLAT_SCR]; // M0
+
+ let UseNamedOperandTable = 1;
+ let hasSideEffects = 0;
}
class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
@@ -520,16 +643,9 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let VM_CNT = 1;
let EXP_CNT = 1;
let MIMG = 1;
-}
-def EXP : InstSI<
- (outs),
- (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
- VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
- "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
- [] >, EXPe {
-
- let EXP_CNT = 1;
+ let hasSideEffects = 0; // XXX ????
}
+
} // End Uses = [EXEC]
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index 51f453292da8..1a4c0d4e57b0 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -17,10 +17,13 @@
#include "AMDGPUTargetMachine.h"
#include "SIDefines.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -32,6 +35,259 @@ SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
// TargetInstrInfo callbacks
//===----------------------------------------------------------------------===//
+static unsigned getNumOperandsNoGlue(SDNode *Node) {
+ unsigned N = Node->getNumOperands();
+ while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
+ --N;
+ return N;
+}
+
+static SDValue findChainOperand(SDNode *Load) {
+ SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
+ assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node");
+ return LastOp;
+}
+
+/// \brief Returns true if both nodes have the same value for the given
+/// operand \p Op, or if both nodes do not have this operand.
+static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
+ unsigned Opc0 = N0->getMachineOpcode();
+ unsigned Opc1 = N1->getMachineOpcode();
+
+ int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
+ int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
+
+ if (Op0Idx == -1 && Op1Idx == -1)
+ return true;
+
+
+ if ((Op0Idx == -1 && Op1Idx != -1) ||
+ (Op1Idx == -1 && Op0Idx != -1))
+ return false;
+
+ // getNamedOperandIdx returns the index for the MachineInstr's operands,
+ // which includes the result as the first operand. We are indexing into the
+ // MachineSDNode's operands, so we need to skip the result operand to get
+ // the real index.
+ --Op0Idx;
+ --Op1Idx;
+
+ return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
+}
+
+bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
+ int64_t &Offset0,
+ int64_t &Offset1) const {
+ if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode())
+ return false;
+
+ unsigned Opc0 = Load0->getMachineOpcode();
+ unsigned Opc1 = Load1->getMachineOpcode();
+
+ // Make sure both are actually loads.
+ if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad())
+ return false;
+
+ if (isDS(Opc0) && isDS(Opc1)) {
+
+ // FIXME: Handle this case:
+ if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
+ return false;
+
+ // Check base reg.
+ if (Load0->getOperand(1) != Load1->getOperand(1))
+ return false;
+
+ // Check chain.
+ if (findChainOperand(Load0) != findChainOperand(Load1))
+ return false;
+
+ // Skip read2 / write2 variants for simplicity.
+ // TODO: We should report true if the used offsets are adjacent (excluded
+ // st64 versions).
+ if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 ||
+ AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
+ return false;
+
+ Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
+ Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
+ return true;
+ }
+
+ if (isSMRD(Opc0) && isSMRD(Opc1)) {
+ assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1));
+
+ // Check base reg.
+ if (Load0->getOperand(0) != Load1->getOperand(0))
+ return false;
+
+ // Check chain.
+ if (findChainOperand(Load0) != findChainOperand(Load1))
+ return false;
+
+ Offset0 = cast<ConstantSDNode>(Load0->getOperand(1))->getZExtValue();
+ Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue();
+ return true;
+ }
+
+ // MUBUF and MTBUF can access the same addresses.
+ if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) {
+
+ // MUBUF and MTBUF have vaddr at different indices.
+ if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) ||
+ findChainOperand(Load0) != findChainOperand(Load1) ||
+ !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) ||
+ !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
+ return false;
+
+ int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
+ int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
+
+ if (OffIdx0 == -1 || OffIdx1 == -1)
+ return false;
+
+ // getNamedOperandIdx returns the index for MachineInstrs. Since they
+ // inlcude the output in the operand list, but SDNodes don't, we need to
+ // subtract the index by one.
+ --OffIdx0;
+ --OffIdx1;
+
+ SDValue Off0 = Load0->getOperand(OffIdx0);
+ SDValue Off1 = Load1->getOperand(OffIdx1);
+
+ // The offset might be a FrameIndexSDNode.
+ if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
+ return false;
+
+ Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
+ Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
+ return true;
+ }
+
+ return false;
+}
+
+static bool isStride64(unsigned Opc) {
+ switch (Opc) {
+ case AMDGPU::DS_READ2ST64_B32:
+ case AMDGPU::DS_READ2ST64_B64:
+ case AMDGPU::DS_WRITE2ST64_B32:
+ case AMDGPU::DS_WRITE2ST64_B64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SIInstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt,
+ unsigned &BaseReg, unsigned &Offset,
+ const TargetRegisterInfo *TRI) const {
+ unsigned Opc = LdSt->getOpcode();
+ if (isDS(Opc)) {
+ const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
+ AMDGPU::OpName::offset);
+ if (OffsetImm) {
+ // Normal, single offset LDS instruction.
+ const MachineOperand *AddrReg = getNamedOperand(*LdSt,
+ AMDGPU::OpName::addr);
+
+ BaseReg = AddrReg->getReg();
+ Offset = OffsetImm->getImm();
+ return true;
+ }
+
+ // The 2 offset instructions use offset0 and offset1 instead. We can treat
+ // these as a load with a single offset if the 2 offsets are consecutive. We
+ // will use this for some partially aligned loads.
+ const MachineOperand *Offset0Imm = getNamedOperand(*LdSt,
+ AMDGPU::OpName::offset0);
+ const MachineOperand *Offset1Imm = getNamedOperand(*LdSt,
+ AMDGPU::OpName::offset1);
+
+ uint8_t Offset0 = Offset0Imm->getImm();
+ uint8_t Offset1 = Offset1Imm->getImm();
+ assert(Offset1 > Offset0);
+
+ if (Offset1 - Offset0 == 1) {
+ // Each of these offsets is in element sized units, so we need to convert
+ // to bytes of the individual reads.
+
+ unsigned EltSize;
+ if (LdSt->mayLoad())
+ EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2;
+ else {
+ assert(LdSt->mayStore());
+ int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
+ EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize();
+ }
+
+ if (isStride64(Opc))
+ EltSize *= 64;
+
+ const MachineOperand *AddrReg = getNamedOperand(*LdSt,
+ AMDGPU::OpName::addr);
+ BaseReg = AddrReg->getReg();
+ Offset = EltSize * Offset0;
+ return true;
+ }
+
+ return false;
+ }
+
+ if (isMUBUF(Opc) || isMTBUF(Opc)) {
+ if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1)
+ return false;
+
+ const MachineOperand *AddrReg = getNamedOperand(*LdSt,
+ AMDGPU::OpName::vaddr);
+ if (!AddrReg)
+ return false;
+
+ const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
+ AMDGPU::OpName::offset);
+ BaseReg = AddrReg->getReg();
+ Offset = OffsetImm->getImm();
+ return true;
+ }
+
+ if (isSMRD(Opc)) {
+ const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
+ AMDGPU::OpName::offset);
+ if (!OffsetImm)
+ return false;
+
+ const MachineOperand *SBaseReg = getNamedOperand(*LdSt,
+ AMDGPU::OpName::sbase);
+ BaseReg = SBaseReg->getReg();
+ Offset = OffsetImm->getImm();
+ return true;
+ }
+
+ return false;
+}
+
+bool SIInstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
+ MachineInstr *SecondLdSt,
+ unsigned NumLoads) const {
+ unsigned Opc0 = FirstLdSt->getOpcode();
+ unsigned Opc1 = SecondLdSt->getOpcode();
+
+ // TODO: This needs finer tuning
+ if (NumLoads > 4)
+ return false;
+
+ if (isDS(Opc0) && isDS(Opc1))
+ return true;
+
+ if (isSMRD(Opc0) && isSMRD(Opc1))
+ return true;
+
+ if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1)))
+ return true;
+
+ return false;
+}
+
void
SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
@@ -70,26 +326,6 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned Opcode;
const int16_t *SubIndices;
- if (AMDGPU::M0 == DestReg) {
- // Check if M0 isn't already set to this value
- for (MachineBasicBlock::reverse_iterator E = MBB.rend(),
- I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) {
-
- if (!I->definesRegister(AMDGPU::M0))
- continue;
-
- unsigned Opc = I->getOpcode();
- if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32)
- break;
-
- if (!I->readsRegister(SrcReg))
- break;
-
- // The copy isn't necessary
- return;
- }
- }
-
if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
@@ -117,8 +353,8 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opcode = AMDGPU::S_MOV_B32;
SubIndices = Sub0_15;
- } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) {
- assert(AMDGPU::VReg_32RegClass.contains(SrcReg) ||
+ } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) {
+ assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
AMDGPU::SReg_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
@@ -182,6 +418,27 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
return Opcode;
}
+unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
+
+ if (DstRC->getSize() == 4) {
+ return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
+ } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) {
+ return AMDGPU::S_MOV_B64;
+ } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) {
+ return AMDGPU::V_MOV_B64_PSEUDO;
+ }
+ return AMDGPU::COPY;
+}
+
+static bool shouldTryToSpillVGPRs(MachineFunction *MF) {
+
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+
+ // FIXME: Implement spilling for other shader types.
+ return MFI->getShaderType() == ShaderType::COMPUTE;
+
+}
+
void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill,
@@ -190,49 +447,49 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineFrameInfo *FrameInfo = MF->getFrameInfo();
DebugLoc DL = MBB.findDebugLoc(MI);
- unsigned KillFlag = isKill ? RegState::Kill : 0;
+ int Opcode = -1;
- if (RI.hasVGPRs(RC)) {
- LLVMContext &Ctx = MF->getFunction()->getContext();
- Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Can't spill VGPR!");
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0)
- .addReg(SrcReg);
- } else if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
- unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MF);
- unsigned TgtReg = MFI->SpillTracker.LaneVGPR;
-
- BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), TgtReg)
- .addReg(SrcReg, KillFlag)
- .addImm(Lane);
- MFI->SpillTracker.addSpilledReg(FrameIndex, TgtReg, Lane);
- } else if (RI.isSGPRClass(RC)) {
+ if (RI.isSGPRClass(RC)) {
// We are only allowed to create one new instruction when spilling
- // registers, so we need to use pseudo instruction for vector
- // registers.
- //
- // Reserve a spot in the spill tracker for each sub-register of
- // the vector register.
- unsigned NumSubRegs = RC->getSize() / 4;
- unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MF, NumSubRegs);
- MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR,
- FirstLane);
-
- unsigned Opcode;
+ // registers, so we need to use pseudo instruction for spilling
+ // SGPRs.
switch (RC->getSize() * 8) {
- case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break;
- case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
- case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
- case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
- default: llvm_unreachable("Cannot spill register class");
+ case 32: Opcode = AMDGPU::SI_SPILL_S32_SAVE; break;
+ case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break;
+ case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
+ case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
+ case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
}
+ } else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) {
+ MFI->setHasSpilledVGPRs();
- BuildMI(MBB, MI, DL, get(Opcode), MFI->SpillTracker.LaneVGPR)
+ switch(RC->getSize() * 8) {
+ case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break;
+ case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break;
+ case 96: Opcode = AMDGPU::SI_SPILL_V96_SAVE; break;
+ case 128: Opcode = AMDGPU::SI_SPILL_V128_SAVE; break;
+ case 256: Opcode = AMDGPU::SI_SPILL_V256_SAVE; break;
+ case 512: Opcode = AMDGPU::SI_SPILL_V512_SAVE; break;
+ }
+ }
+
+ if (Opcode != -1) {
+ FrameInfo->setObjectAlignment(FrameIndex, 4);
+ BuildMI(MBB, MI, DL, get(Opcode))
.addReg(SrcReg)
- .addImm(FrameIndex);
+ .addFrameIndex(FrameIndex)
+ // Place-holder registers, these will be filled in by
+ // SIPrepareScratchRegs.
+ .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
+ .addReg(AMDGPU::SGPR0, RegState::Undef);
} else {
- llvm_unreachable("VGPR spilling not supported");
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
+ " spill register");
+ BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
+ .addReg(SrcReg);
}
}
@@ -242,55 +499,142 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
- SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ MachineFrameInfo *FrameInfo = MF->getFrameInfo();
DebugLoc DL = MBB.findDebugLoc(MI);
+ int Opcode = -1;
- if (RI.hasVGPRs(RC)) {
- LLVMContext &Ctx = MF->getFunction()->getContext();
- Ctx.emitError("SIInstrInfo::loadRegToStackSlot - Can't retrieve spilled VGPR!");
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
- .addImm(0);
- } else if (RI.isSGPRClass(RC)){
- unsigned Opcode;
+ if (RI.isSGPRClass(RC)){
switch(RC->getSize() * 8) {
- case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break;
- case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break;
- case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
- case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
- case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
- default: llvm_unreachable("Cannot spill register class");
+ case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break;
+ case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break;
+ case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
+ case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
+ case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
}
+ } else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) {
+ switch(RC->getSize() * 8) {
+ case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break;
+ case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break;
+ case 96: Opcode = AMDGPU::SI_SPILL_V96_RESTORE; break;
+ case 128: Opcode = AMDGPU::SI_SPILL_V128_RESTORE; break;
+ case 256: Opcode = AMDGPU::SI_SPILL_V256_RESTORE; break;
+ case 512: Opcode = AMDGPU::SI_SPILL_V512_RESTORE; break;
+ }
+ }
- SIMachineFunctionInfo::SpilledReg Spill =
- MFI->SpillTracker.getSpilledReg(FrameIndex);
-
+ if (Opcode != -1) {
+ FrameInfo->setObjectAlignment(FrameIndex, 4);
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
- .addReg(Spill.VGPR)
- .addImm(FrameIndex);
+ .addFrameIndex(FrameIndex)
+ // Place-holder registers, these will be filled in by
+ // SIPrepareScratchRegs.
+ .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
+ .addReg(AMDGPU::SGPR0, RegState::Undef);
+
} else {
- llvm_unreachable("VGPR spilling not supported");
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
+ " restore register");
+ BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
}
}
-static unsigned getNumSubRegsForSpillOp(unsigned Op) {
-
- switch (Op) {
- case AMDGPU::SI_SPILL_S512_SAVE:
- case AMDGPU::SI_SPILL_S512_RESTORE:
- return 16;
- case AMDGPU::SI_SPILL_S256_SAVE:
- case AMDGPU::SI_SPILL_S256_RESTORE:
- return 8;
- case AMDGPU::SI_SPILL_S128_SAVE:
- case AMDGPU::SI_SPILL_S128_RESTORE:
- return 4;
- case AMDGPU::SI_SPILL_S64_SAVE:
- case AMDGPU::SI_SPILL_S64_RESTORE:
- return 2;
- case AMDGPU::SI_SPILL_S32_RESTORE:
- return 1;
- default: llvm_unreachable("Invalid spill opcode");
+/// \param @Offset Offset in bytes of the FrameIndex being spilled
+unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ RegScavenger *RS, unsigned TmpReg,
+ unsigned FrameOffset,
+ unsigned Size) const {
+ MachineFunction *MF = MBB.getParent();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ const AMDGPUSubtarget &ST = MF->getTarget().getSubtarget<AMDGPUSubtarget>();
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo*>(ST.getRegisterInfo());
+ DebugLoc DL = MBB.findDebugLoc(MI);
+ unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF);
+ unsigned WavefrontSize = ST.getWavefrontSize();
+
+ unsigned TIDReg = MFI->getTIDReg();
+ if (!MFI->hasCalculatedTID()) {
+ MachineBasicBlock &Entry = MBB.getParent()->front();
+ MachineBasicBlock::iterator Insert = Entry.front();
+ DebugLoc DL = Insert->getDebugLoc();
+
+ TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass);
+ if (TIDReg == AMDGPU::NoRegister)
+ return TIDReg;
+
+
+ if (MFI->getShaderType() == ShaderType::COMPUTE &&
+ WorkGroupSize > WavefrontSize) {
+
+ unsigned TIDIGXReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_X);
+ unsigned TIDIGYReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Y);
+ unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z);
+ unsigned InputPtrReg =
+ TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR);
+ static const unsigned TIDIGRegs[3] = {
+ TIDIGXReg, TIDIGYReg, TIDIGZReg
+ };
+ for (unsigned Reg : TIDIGRegs) {
+ if (!Entry.isLiveIn(Reg))
+ Entry.addLiveIn(Reg);
+ }
+
+ RS->enterBasicBlock(&Entry);
+ unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
+ unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
+ BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
+ .addReg(InputPtrReg)
+ .addImm(SI::KernelInputOffsets::NGROUPS_Z);
+ BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
+ .addReg(InputPtrReg)
+ .addImm(SI::KernelInputOffsets::NGROUPS_Y);
+
+ // NGROUPS.X * NGROUPS.Y
+ BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
+ .addReg(STmp1)
+ .addReg(STmp0);
+ // (NGROUPS.X * NGROUPS.Y) * TIDIG.X
+ BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
+ .addReg(STmp1)
+ .addReg(TIDIGXReg);
+ // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
+ BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
+ .addReg(STmp0)
+ .addReg(TIDIGYReg)
+ .addReg(TIDReg);
+ // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
+ BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg)
+ .addReg(TIDReg)
+ .addReg(TIDIGZReg);
+ } else {
+ // Get the wave id
+ BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
+ TIDReg)
+ .addImm(-1)
+ .addImm(0);
+
+ BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e32),
+ TIDReg)
+ .addImm(-1)
+ .addReg(TIDReg);
+ }
+
+ BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
+ TIDReg)
+ .addImm(2)
+ .addReg(TIDReg);
+ MFI->setTIDReg(TIDReg);
}
+
+ // Add FrameIndex to LDS offset
+ unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
+ .addImm(LDSOffset)
+ .addReg(TIDReg);
+
+ return TmpReg;
}
void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
@@ -308,59 +652,11 @@ void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
}
bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
- SIMachineFunctionInfo *MFI =
- MI->getParent()->getParent()->getInfo<SIMachineFunctionInfo>();
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MBB.findDebugLoc(MI);
switch (MI->getOpcode()) {
default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
- // SGPR register spill
- case AMDGPU::SI_SPILL_S512_SAVE:
- case AMDGPU::SI_SPILL_S256_SAVE:
- case AMDGPU::SI_SPILL_S128_SAVE:
- case AMDGPU::SI_SPILL_S64_SAVE: {
- unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
- unsigned FrameIndex = MI->getOperand(2).getImm();
-
- for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
- SIMachineFunctionInfo::SpilledReg Spill;
- unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(1).getReg(),
- &AMDGPU::SGPR_32RegClass, i);
- Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);
-
- BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32),
- MI->getOperand(0).getReg())
- .addReg(SubReg)
- .addImm(Spill.Lane + i);
- }
- MI->eraseFromParent();
- break;
- }
-
- // SGPR register restore
- case AMDGPU::SI_SPILL_S512_RESTORE:
- case AMDGPU::SI_SPILL_S256_RESTORE:
- case AMDGPU::SI_SPILL_S128_RESTORE:
- case AMDGPU::SI_SPILL_S64_RESTORE:
- case AMDGPU::SI_SPILL_S32_RESTORE: {
- unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
-
- for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
- SIMachineFunctionInfo::SpilledReg Spill;
- unsigned FrameIndex = MI->getOperand(2).getImm();
- unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(0).getReg(),
- &AMDGPU::SGPR_32RegClass, i);
- Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);
-
- BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), SubReg)
- .addReg(MI->getOperand(1).getReg())
- .addImm(Spill.Lane + i);
- }
- insertNOPs(MI, 3);
- MI->eraseFromParent();
- break;
- }
case AMDGPU::SI_CONSTDATA_PTR: {
unsigned Reg = MI->getOperand(0).getReg();
unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
@@ -369,7 +665,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg);
// Add 32-bit offset from this instruction to the start of the constant data.
- BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_I32), RegLo)
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_U32), RegLo)
.addReg(RegLo)
.addTargetIndex(AMDGPU::TI_CONSTDATA_START)
.addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit);
@@ -381,6 +677,39 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MI->eraseFromParent();
break;
}
+ case AMDGPU::SGPR_USE:
+ // This is just a placeholder for register allocation.
+ MI->eraseFromParent();
+ break;
+
+ case AMDGPU::V_MOV_B64_PSEUDO: {
+ unsigned Dst = MI->getOperand(0).getReg();
+ unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
+ unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
+
+ const MachineOperand &SrcOp = MI->getOperand(1);
+ // FIXME: Will this work for 64-bit floating point immediates?
+ assert(!SrcOp.isFPImm());
+ if (SrcOp.isImm()) {
+ APInt Imm(64, SrcOp.getImm());
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
+ .addImm(Imm.getLoBits(32).getZExtValue())
+ .addReg(Dst, RegState::Implicit);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
+ .addImm(Imm.getHiBits(32).getZExtValue())
+ .addReg(Dst, RegState::Implicit);
+ } else {
+ assert(SrcOp.isReg());
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
+ .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
+ .addReg(Dst, RegState::Implicit);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
+ .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
+ .addReg(Dst, RegState::Implicit);
+ }
+ MI->eraseFromParent();
+ break;
+ }
}
return true;
}
@@ -388,35 +717,66 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
bool NewMI) const {
- MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
- if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg())
+ if (MI->getNumOperands() < 3)
return nullptr;
- // Cannot commute VOP2 if src0 is SGPR.
- if (isVOP2(MI->getOpcode()) && MI->getOperand(1).isReg() &&
- RI.isSGPRClass(MRI.getRegClass(MI->getOperand(1).getReg())))
- return nullptr;
+ int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src0);
+ assert(Src0Idx != -1 && "Should always have src0 operand");
- if (!MI->getOperand(2).isReg()) {
- // XXX: Commute instructions with FPImm operands
- if (NewMI || MI->getOperand(2).isFPImm() ||
+ MachineOperand &Src0 = MI->getOperand(Src0Idx);
+ if (!Src0.isReg())
+ return nullptr;
+
+ int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src1);
+ if (Src1Idx == -1)
+ return nullptr;
+
+ MachineOperand &Src1 = MI->getOperand(Src1Idx);
+
+ // Make sure it's legal to commute operands for VOP2.
+ if (isVOP2(MI->getOpcode()) &&
+ (!isOperandLegal(MI, Src0Idx, &Src1) ||
+ !isOperandLegal(MI, Src1Idx, &Src0))) {
+ return nullptr;
+ }
+
+ if (!Src1.isReg()) {
+ // Allow commuting instructions with Imm operands.
+ if (NewMI || !Src1.isImm() ||
(!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) {
return nullptr;
}
- // XXX: Commute VOP3 instructions with abs and neg set.
- if (isVOP3(MI->getOpcode()) &&
- (MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::abs)).getImm() ||
- MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::neg)).getImm()))
- return nullptr;
+ // Be sure to copy the source modifiers to the right place.
+ if (MachineOperand *Src0Mods
+ = getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
+ MachineOperand *Src1Mods
+ = getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers);
+
+ int Src0ModsVal = Src0Mods->getImm();
+ if (!Src1Mods && Src0ModsVal != 0)
+ return nullptr;
+
+ // XXX - This assert might be a lie. It might be useful to have a neg
+ // modifier with 0.0.
+ int Src1ModsVal = Src1Mods->getImm();
+ assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates");
+
+ Src1Mods->setImm(Src0ModsVal);
+ Src0Mods->setImm(Src1ModsVal);
+ }
+
+ unsigned Reg = Src0.getReg();
+ unsigned SubReg = Src0.getSubReg();
+ if (Src1.isImm())
+ Src0.ChangeToImmediate(Src1.getImm());
+ else
+ llvm_unreachable("Should only have immediates");
- unsigned Reg = MI->getOperand(1).getReg();
- unsigned SubReg = MI->getOperand(1).getSubReg();
- MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm());
- MI->getOperand(2).ChangeToRegister(Reg, false);
- MI->getOperand(2).setSubReg(SubReg);
+ Src1.ChangeToRegister(Reg, false);
+ Src1.setSubReg(SubReg);
} else {
MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
}
@@ -427,6 +787,44 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
return MI;
}
+// This needs to be implemented because the source modifiers may be inserted
+// between the true commutable operands, and the base
+// TargetInstrInfo::commuteInstruction uses it.
+bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
+ unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isCommutable())
+ return false;
+
+ unsigned Opc = MI->getOpcode();
+ int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+ if (Src0Idx == -1)
+ return false;
+
+ // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on
+ // immediate.
+ if (!MI->getOperand(Src0Idx).isReg())
+ return false;
+
+ int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
+ if (Src1Idx == -1)
+ return false;
+
+ if (!MI->getOperand(Src1Idx).isReg())
+ return false;
+
+ // If any source modifiers are set, the generic instruction commuting won't
+ // understand how to copy the source modifiers.
+ if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) ||
+ hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers))
+ return false;
+
+ SrcOpIdx1 = Src0Idx;
+ SrcOpIdx2 = Src1Idx;
+ return true;
+}
+
MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned DstReg,
@@ -463,51 +861,106 @@ SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI,
}
}
-namespace llvm {
-namespace AMDGPU {
-// Helper function generated by tablegen. We are wrapping this with
-// an SIInstrInfo function that reutrns bool rather than int.
-int isDS(uint16_t Opcode);
-}
+static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
+ int WidthB, int OffsetB) {
+ int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
+ int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
+ int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
+ return LowOffset + LowWidth <= HighOffset;
}
-bool SIInstrInfo::isDS(uint16_t Opcode) const {
- return ::AMDGPU::isDS(Opcode) != -1;
-}
+bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
+ MachineInstr *MIb) const {
+ unsigned BaseReg0, Offset0;
+ unsigned BaseReg1, Offset1;
+
+ if (getLdStBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
+ getLdStBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
+ assert(MIa->hasOneMemOperand() && MIb->hasOneMemOperand() &&
+ "read2 / write2 not expected here yet");
+ unsigned Width0 = (*MIa->memoperands_begin())->getSize();
+ unsigned Width1 = (*MIb->memoperands_begin())->getSize();
+ if (BaseReg0 == BaseReg1 &&
+ offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
+ return true;
+ }
+ }
-int SIInstrInfo::isMIMG(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::MIMG;
+ return false;
}
-int SIInstrInfo::isSMRD(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::SMRD;
-}
+bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
+ MachineInstr *MIb,
+ AliasAnalysis *AA) const {
+ unsigned Opc0 = MIa->getOpcode();
+ unsigned Opc1 = MIb->getOpcode();
-bool SIInstrInfo::isVOP1(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::VOP1;
-}
+ assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
+ "MIa must load from or modify a memory location");
+ assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
+ "MIb must load from or modify a memory location");
-bool SIInstrInfo::isVOP2(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::VOP2;
-}
+ if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects())
+ return false;
-bool SIInstrInfo::isVOP3(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::VOP3;
-}
+ // XXX - Can we relax this between address spaces?
+ if (MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
+ return false;
-bool SIInstrInfo::isVOPC(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::VOPC;
-}
+ // TODO: Should we check the address space from the MachineMemOperand? That
+ // would allow us to distinguish objects we know don't alias based on the
+ // underlying addres space, even if it was lowered to a different one,
+ // e.g. private accesses lowered to use MUBUF instructions on a scratch
+ // buffer.
+ if (isDS(Opc0)) {
+ if (isDS(Opc1))
+ return checkInstOffsetsDoNotOverlap(MIa, MIb);
+
+ return !isFLAT(Opc1);
+ }
+
+ if (isMUBUF(Opc0) || isMTBUF(Opc0)) {
+ if (isMUBUF(Opc1) || isMTBUF(Opc1))
+ return checkInstOffsetsDoNotOverlap(MIa, MIb);
+
+ return !isFLAT(Opc1) && !isSMRD(Opc1);
+ }
+
+ if (isSMRD(Opc0)) {
+ if (isSMRD(Opc1))
+ return checkInstOffsetsDoNotOverlap(MIa, MIb);
-bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const {
- return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU;
+ return !isFLAT(Opc1) && !isMUBUF(Opc0) && !isMTBUF(Opc0);
+ }
+
+ if (isFLAT(Opc0)) {
+ if (isFLAT(Opc1))
+ return checkInstOffsetsDoNotOverlap(MIa, MIb);
+
+ return false;
+ }
+
+ return false;
}
bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
- int32_t Val = Imm.getSExtValue();
- if (Val >= -16 && Val <= 64)
+ int64_t SVal = Imm.getSExtValue();
+ if (SVal >= -16 && SVal <= 64)
return true;
+ if (Imm.getBitWidth() == 64) {
+ uint64_t Val = Imm.getZExtValue();
+ return (DoubleToBits(0.0) == Val) ||
+ (DoubleToBits(1.0) == Val) ||
+ (DoubleToBits(-1.0) == Val) ||
+ (DoubleToBits(0.5) == Val) ||
+ (DoubleToBits(-0.5) == Val) ||
+ (DoubleToBits(2.0) == Val) ||
+ (DoubleToBits(-2.0) == Val) ||
+ (DoubleToBits(4.0) == Val) ||
+ (DoubleToBits(-4.0) == Val);
+ }
+
// The actual type of the operand does not seem to matter as long
// as the bits match one of the inline immediate values. For example:
//
@@ -516,32 +969,28 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
//
// 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
// floating-point, so it is a legal inline immediate.
-
- return (APInt::floatToBits(0.0f) == Imm) ||
- (APInt::floatToBits(1.0f) == Imm) ||
- (APInt::floatToBits(-1.0f) == Imm) ||
- (APInt::floatToBits(0.5f) == Imm) ||
- (APInt::floatToBits(-0.5f) == Imm) ||
- (APInt::floatToBits(2.0f) == Imm) ||
- (APInt::floatToBits(-2.0f) == Imm) ||
- (APInt::floatToBits(4.0f) == Imm) ||
- (APInt::floatToBits(-4.0f) == Imm);
+ uint32_t Val = Imm.getZExtValue();
+
+ return (FloatToBits(0.0f) == Val) ||
+ (FloatToBits(1.0f) == Val) ||
+ (FloatToBits(-1.0f) == Val) ||
+ (FloatToBits(0.5f) == Val) ||
+ (FloatToBits(-0.5f) == Val) ||
+ (FloatToBits(2.0f) == Val) ||
+ (FloatToBits(-2.0f) == Val) ||
+ (FloatToBits(4.0f) == Val) ||
+ (FloatToBits(-4.0f) == Val);
}
bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const {
if (MO.isImm())
return isInlineConstant(APInt(32, MO.getImm(), true));
- if (MO.isFPImm()) {
- APFloat FpImm = MO.getFPImm()->getValueAPF();
- return isInlineConstant(FpImm.bitcastToAPInt());
- }
-
return false;
}
bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const {
- return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO);
+ return MO.isImm() && !isInlineConstant(MO);
}
static bool compareMachineOp(const MachineOperand &Op0,
@@ -554,8 +1003,6 @@ static bool compareMachineOp(const MachineOperand &Op0,
return Op0.getReg() == Op1.getReg();
case MachineOperand::MO_Immediate:
return Op0.getImm() == Op1.getImm();
- case MachineOperand::MO_FPImmediate:
- return Op0.getFPImm() == Op1.getFPImm();
default:
llvm_unreachable("Didn't expect to be comparing these operand types");
}
@@ -565,7 +1012,7 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
const MachineOperand &MO) const {
const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo];
- assert(MO.isImm() || MO.isFPImm());
+ assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
return true;
@@ -573,16 +1020,91 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
if (OpInfo.RegClass < 0)
return false;
- return RI.regClassCanUseImmediate(OpInfo.RegClass);
+ if (isLiteralConstant(MO))
+ return RI.opCanUseLiteralConstant(OpInfo.OperandType);
+
+ return RI.opCanUseInlineConstant(OpInfo.OperandType);
+}
+
+bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) const {
+ switch (AS) {
+ case AMDGPUAS::GLOBAL_ADDRESS: {
+ // MUBUF instructions a 12-bit offset in bytes.
+ return isUInt<12>(OffsetSize);
+ }
+ case AMDGPUAS::CONSTANT_ADDRESS: {
+ // SMRD instructions have an 8-bit offset in dwords on SI and
+ // a 20-bit offset in bytes on VI.
+ if (RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ return isUInt<20>(OffsetSize);
+ else
+ return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
+ }
+ case AMDGPUAS::LOCAL_ADDRESS:
+ case AMDGPUAS::REGION_ADDRESS: {
+ // The single offset versions have a 16-bit offset in bytes.
+ return isUInt<16>(OffsetSize);
+ }
+ case AMDGPUAS::PRIVATE_ADDRESS:
+ // Indirect register addressing does not use any offsets.
+ default:
+ return 0;
+ }
}
bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
return AMDGPU::getVOPe32(Opcode) != -1;
}
+bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
+ // The src0_modifier operand is present on all instructions
+ // that have modifiers.
+
+ return AMDGPU::getNamedOperandIdx(Opcode,
+ AMDGPU::OpName::src0_modifiers) != -1;
+}
+
+bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
+ unsigned OpName) const {
+ const MachineOperand *Mods = getNamedOperand(MI, OpName);
+ return Mods && Mods->getImm();
+}
+
+bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
+ const MachineOperand &MO) const {
+ // Literal constants use the constant bus.
+ if (isLiteralConstant(MO))
+ return true;
+
+ if (!MO.isReg() || !MO.isUse())
+ return false;
+
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
+
+ // FLAT_SCR is just an SGPR pair.
+ if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
+ return true;
+
+ // EXEC register uses the constant bus.
+ if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
+ return true;
+
+ // SGPRs use the constant bus
+ if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
+ (!MO.isImplicit() &&
+ (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
+ AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) {
+ return true;
+ }
+
+ return false;
+}
+
bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
StringRef &ErrInfo) const {
uint16_t Opcode = MI->getOpcode();
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
@@ -596,23 +1118,27 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
}
// Make sure the register classes are correct
- for (unsigned i = 0, e = Desc.getNumOperands(); i != e; ++i) {
+ for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
+ if (MI->getOperand(i).isFPImm()) {
+ ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
+ "all fp values to integers.";
+ return false;
+ }
+
switch (Desc.OpInfo[i].OperandType) {
case MCOI::OPERAND_REGISTER: {
- int RegClass = Desc.OpInfo[i].RegClass;
- if (!RI.regClassCanUseImmediate(RegClass) &&
- (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm())) {
- ErrInfo = "Expected register, but got immediate";
- return false;
+ if (MI->getOperand(i).isImm() &&
+ !isImmOperandLegal(MI, i, MI->getOperand(i))) {
+ ErrInfo = "Illegal immediate value for operand.";
+ return false;
+ }
}
- }
break;
case MCOI::OPERAND_IMMEDIATE:
// Check if this operand is an immediate.
// FrameIndex operands will be replaced by immediates, so they are
// allowed.
- if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm() &&
- !MI->getOperand(i).isFI()) {
+ if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFI()) {
ErrInfo = "Expected immediate, but got non-immediate";
return false;
}
@@ -641,31 +1167,27 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
// Verify VOP*
if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) {
+ // Only look at the true operands. Only a real operand can use the constant
+ // bus, and we don't want to check pseudo-operands like the source modifier
+ // flags.
+ const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
+
unsigned ConstantBusCount = 0;
unsigned SGPRUsed = AMDGPU::NoRegister;
- for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isUse() &&
- !TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
-
- // EXEC register uses the constant bus.
- if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
- ++ConstantBusCount;
+ for (int OpIdx : OpIndices) {
+ if (OpIdx == -1)
+ break;
- // SGPRs use the constant bus
- if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
- (!MO.isImplicit() &&
- (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
- AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) {
- if (SGPRUsed != MO.getReg()) {
+ const MachineOperand &MO = MI->getOperand(OpIdx);
+ if (usesConstantBus(MRI, MO)) {
+ if (MO.isReg()) {
+ if (MO.getReg() != SGPRUsed)
++ConstantBusCount;
- SGPRUsed = MO.getReg();
- }
+ SGPRUsed = MO.getReg();
+ } else {
+ ++ConstantBusCount;
}
}
- // Literal constants use the constant bus.
- if (isLiteralConstant(MO))
- ++ConstantBusCount;
}
if (ConstantBusCount > 1) {
ErrInfo = "VOP* instruction uses the constant bus more than once";
@@ -676,7 +1198,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
// Verify SRC1 for VOP2 and VOPC
if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) {
const MachineOperand &Src1 = MI->getOperand(Src1Idx);
- if (Src1.isImm() || Src1.isFPImm()) {
+ if (Src1.isImm()) {
ErrInfo = "VOP[2C] src1 cannot be an immediate.";
return false;
}
@@ -701,11 +1223,9 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
// Verify misc. restrictions on specific instructions.
if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
- MI->dump();
-
- const MachineOperand &Src0 = MI->getOperand(2);
- const MachineOperand &Src1 = MI->getOperand(3);
- const MachineOperand &Src2 = MI->getOperand(4);
+ const MachineOperand &Src0 = MI->getOperand(Src0Idx);
+ const MachineOperand &Src1 = MI->getOperand(Src1Idx);
+ const MachineOperand &Src2 = MI->getOperand(Src2Idx);
if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
if (!compareMachineOp(Src0, Src1) &&
!compareMachineOp(Src0, Src2)) {
@@ -728,10 +1248,13 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_MOV_B32:
return MI.getOperand(1).isReg() ?
AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
- case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32;
+ case AMDGPU::S_ADD_I32:
+ case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32;
case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
- case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32;
+ case AMDGPU::S_SUB_I32:
+ case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32;
case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
+ case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
@@ -779,8 +1302,13 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
const MCInstrDesc &Desc = get(MI.getOpcode());
if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
- Desc.OpInfo[OpNo].RegClass == -1)
- return MRI.getRegClass(MI.getOperand(OpNo).getReg());
+ Desc.OpInfo[OpNo].RegClass == -1) {
+ unsigned Reg = MI.getOperand(OpNo).getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return MRI.getRegClass(Reg);
+ return RI.getRegClass(Reg);
+ }
unsigned RCID = Desc.OpInfo[OpNo].RegClass;
return RI.getRegClass(RCID);
@@ -800,21 +1328,28 @@ bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
MachineBasicBlock::iterator I = MI;
+ MachineBasicBlock *MBB = MI->getParent();
MachineOperand &MO = MI->getOperand(OpIdx);
- MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
const TargetRegisterClass *RC = RI.getRegClass(RCID);
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
- if (MO.isReg()) {
+ if (MO.isReg())
Opcode = AMDGPU::COPY;
- } else if (RI.isSGPRClass(RC)) {
+ else if (RI.isSGPRClass(RC))
Opcode = AMDGPU::S_MOV_B32;
- }
+
const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
+ if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
+ VRC = &AMDGPU::VReg_64RegClass;
+ else
+ VRC = &AMDGPU::VGPR_32RegClass;
+
unsigned Reg = MRI.createVirtualRegister(VRC);
- BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode),
- Reg).addOperand(MO);
+ DebugLoc DL = MBB->findDebugLoc(I);
+ BuildMI(*MI->getParent(), I, DL, get(Opcode), Reg)
+ .addOperand(MO);
MO.ChangeToRegister(Reg, false);
}
@@ -834,13 +1369,15 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
// value so we don't need to worry about merging its subreg index with the
// SubIdx passed to this function. The register coalescer should be able to
// eliminate this extra copy.
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
- NewSuperReg)
- .addOperand(SuperReg);
+ MachineBasicBlock *MBB = MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
+ .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
+
+ BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
+ .addReg(NewSuperReg, 0, SubIdx);
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
- SubReg)
- .addReg(NewSuperReg, 0, SubIdx);
return SubReg;
}
@@ -896,8 +1433,68 @@ unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
return Dst;
}
+// Change the order of operands from (0, 1, 2) to (0, 2, 1)
+void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const {
+ assert(Inst->getNumExplicitOperands() == 3);
+ MachineOperand Op1 = Inst->getOperand(1);
+ Inst->RemoveOperand(1);
+ Inst->addOperand(Op1);
+}
+
+bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
+ const MachineOperand *MO) const {
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ const MCInstrDesc &InstDesc = get(MI->getOpcode());
+ const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
+ const TargetRegisterClass *DefinedRC =
+ OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
+ if (!MO)
+ MO = &MI->getOperand(OpIdx);
+
+ if (isVALU(InstDesc.Opcode) && usesConstantBus(MRI, *MO)) {
+ unsigned SGPRUsed =
+ MO->isReg() ? MO->getReg() : (unsigned)AMDGPU::NoRegister;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (i == OpIdx)
+ continue;
+ if (usesConstantBus(MRI, MI->getOperand(i)) &&
+ MI->getOperand(i).isReg() && MI->getOperand(i).getReg() != SGPRUsed) {
+ return false;
+ }
+ }
+ }
+
+ if (MO->isReg()) {
+ assert(DefinedRC);
+ const TargetRegisterClass *RC = MRI.getRegClass(MO->getReg());
+
+ // In order to be legal, the common sub-class must be equal to the
+ // class of the current operand. For example:
+ //
+ // v_mov_b32 s0 ; Operand defined as vsrc_32
+ // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
+ //
+ // s_sendmsg 0, s0 ; Operand defined as m0reg
+ // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
+
+ return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
+ }
+
+
+ // Handle non-register types that are treated like immediates.
+ assert(MO->isImm() || MO->isTargetIndex() || MO->isFI());
+
+ if (!DefinedRC) {
+ // This operand expects an immediate.
+ return true;
+ }
+
+ return isImmOperandLegal(MI, OpIdx, *MO);
+}
+
void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::src0);
int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
@@ -907,45 +1504,40 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
// Legalize VOP2
if (isVOP2(MI->getOpcode()) && Src1Idx != -1) {
- MachineOperand &Src0 = MI->getOperand(Src0Idx);
- MachineOperand &Src1 = MI->getOperand(Src1Idx);
-
- // If the instruction implicitly reads VCC, we can't have any SGPR operands,
- // so move any.
- bool ReadsVCC = MI->readsRegister(AMDGPU::VCC, &RI);
- if (ReadsVCC && Src0.isReg() &&
- RI.isSGPRClass(MRI.getRegClass(Src0.getReg()))) {
+ // Legalize src0
+ if (!isOperandLegal(MI, Src0Idx))
legalizeOpWithMove(MI, Src0Idx);
- return;
- }
- if (ReadsVCC && Src1.isReg() &&
- RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) {
- legalizeOpWithMove(MI, Src1Idx);
+ // Legalize src1
+ if (isOperandLegal(MI, Src1Idx))
return;
- }
- // Legalize VOP2 instructions where src1 is not a VGPR. An SGPR input must
- // be the first operand, and there can only be one.
- if (Src1.isImm() || Src1.isFPImm() ||
- (Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())))) {
- if (MI->isCommutable()) {
- if (commuteInstruction(MI))
- return;
- }
- legalizeOpWithMove(MI, Src1Idx);
+ // Usually src0 of VOP2 instructions allow more types of inputs
+ // than src1, so try to commute the instruction to decrease our
+ // chances of having to insert a MOV instruction to legalize src1.
+ if (MI->isCommutable()) {
+ if (commuteInstruction(MI))
+ // If we are successful in commuting, then we know MI is legal, so
+ // we are done.
+ return;
}
+
+ legalizeOpWithMove(MI, Src1Idx);
+ return;
}
// XXX - Do any VOP3 instructions read VCC?
// Legalize VOP3
if (isVOP3(MI->getOpcode())) {
- int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx};
- unsigned SGPRReg = AMDGPU::NoRegister;
+ int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx };
+
+ // Find the one SGPR operand we are allowed to use.
+ unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
+
for (unsigned i = 0; i < 3; ++i) {
int Idx = VOP3Idx[i];
if (Idx == -1)
- continue;
+ break;
MachineOperand &MO = MI->getOperand(Idx);
if (MO.isReg()) {
@@ -1045,106 +1637,216 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
// Legalize MUBUF* instructions
// FIXME: If we start using the non-addr64 instructions for compute, we
// may need to legalize them here.
+ int SRsrcIdx =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc);
+ if (SRsrcIdx != -1) {
+ // We have an MUBUF instruction
+ MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx);
+ unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass;
+ if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
+ RI.getRegClass(SRsrcRC))) {
+ // The operands are legal.
+ // FIXME: We may need to legalize operands besided srsrc.
+ return;
+ }
- int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::srsrc);
- int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::vaddr);
- if (SRsrcIdx != -1 && VAddrIdx != -1) {
- const TargetRegisterClass *VAddrRC =
- RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass);
-
- if(VAddrRC->getSize() == 8 &&
- MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) {
- // We have a MUBUF instruction that uses a 64-bit vaddr register and
- // srsrc has the incorrect register class. In order to fix this, we
- // need to extract the pointer from the resource descriptor (srsrc),
- // add it to the value of vadd, then store the result in the vaddr
- // operand. Then, we need to set the pointer field of the resource
- // descriptor to zero.
+ MachineBasicBlock &MBB = *MI->getParent();
+ // Extract the the ptr from the resource descriptor.
- MachineBasicBlock &MBB = *MI->getParent();
- MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx);
- MachineOperand &VAddrOp = MI->getOperand(VAddrIdx);
- unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi;
- unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
- unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
- unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
- unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
-
- // SRsrcPtrLo = srsrc:sub0
- SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp,
- &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
-
- // SRsrcPtrHi = srsrc:sub1
- SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp,
- &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
-
- // VAddrLo = vaddr:sub0
- VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp,
- &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
-
- // VAddrHi = vaddr:sub1
- VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp,
- &AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
-
- // NewVaddrLo = SRsrcPtrLo + VAddrLo
+ // SRsrcPtrLo = srsrc:sub0
+ unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc,
+ &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VGPR_32RegClass);
+
+ // SRsrcPtrHi = srsrc:sub1
+ unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc,
+ &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VGPR_32RegClass);
+
+ // Create an empty resource descriptor
+ unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+ uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
+
+ // Zero64 = 0
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
+ Zero64)
+ .addImm(0);
+
+ // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+ SRsrcFormatLo)
+ .addImm(RsrcDataFormat & 0xFFFFFFFF);
+
+ // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+ SRsrcFormatHi)
+ .addImm(RsrcDataFormat >> 32);
+
+ // NewSRsrc = {Zero64, SRsrcFormat}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
+ NewSRsrc)
+ .addReg(Zero64)
+ .addImm(AMDGPU::sub0_sub1)
+ .addReg(SRsrcFormatLo)
+ .addImm(AMDGPU::sub2)
+ .addReg(SRsrcFormatHi)
+ .addImm(AMDGPU::sub3);
+
+ MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
+ unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ unsigned NewVAddrLo;
+ unsigned NewVAddrHi;
+ if (VAddr) {
+ // This is already an ADDR64 instruction so we need to add the pointer
+ // extracted from the resource descriptor to the current value of VAddr.
+ NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ // NewVaddrLo = SRsrcPtrLo + VAddr:sub0
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
NewVAddrLo)
.addReg(SRsrcPtrLo)
- .addReg(VAddrLo)
- .addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit);
+ .addReg(VAddr->getReg(), 0, AMDGPU::sub0)
+ .addReg(AMDGPU::VCC, RegState::ImplicitDefine);
- // NewVaddrHi = SRsrcPtrHi + VAddrHi
+ // NewVaddrHi = SRsrcPtrHi + VAddr:sub1
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
NewVAddrHi)
.addReg(SRsrcPtrHi)
- .addReg(VAddrHi)
+ .addReg(VAddr->getReg(), 0, AMDGPU::sub1)
.addReg(AMDGPU::VCC, RegState::ImplicitDefine)
.addReg(AMDGPU::VCC, RegState::Implicit);
- // NewVaddr = {NewVaddrHi, NewVaddrLo}
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
- NewVAddr)
- .addReg(NewVAddrLo)
- .addImm(AMDGPU::sub0)
- .addReg(NewVAddrHi)
- .addImm(AMDGPU::sub1);
+ } else {
+ // This instructions is the _OFFSET variant, so we need to convert it to
+ // ADDR64.
+ MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata);
+ MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset);
+ MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset);
+ assert(SOffset->isImm() && SOffset->getImm() == 0 && "Legalizing MUBUF "
+ "with non-zero soffset is not implemented");
+ (void)SOffset;
+
+ // Create the new instruction.
+ unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode());
+ MachineInstr *Addr64 =
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
+ .addOperand(*VData)
+ .addOperand(*SRsrc)
+ .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
+ // This will be replaced later
+ // with the new value of vaddr.
+ .addOperand(*Offset);
+
+ MI->removeFromParent();
+ MI = Addr64;
+
+ NewVAddrLo = SRsrcPtrLo;
+ NewVAddrHi = SRsrcPtrHi;
+ VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
+ SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc);
+ }
- // Zero64 = 0
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
- Zero64)
- .addImm(0);
+ // NewVaddr = {NewVaddrHi, NewVaddrLo}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
+ NewVAddr)
+ .addReg(NewVAddrLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(NewVAddrHi)
+ .addImm(AMDGPU::sub1);
- // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
- SRsrcFormatLo)
- .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
-
- // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
- SRsrcFormatHi)
- .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
-
- // NewSRsrc = {Zero64, SRsrcFormat}
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
- NewSRsrc)
- .addReg(Zero64)
- .addImm(AMDGPU::sub0_sub1)
- .addReg(SRsrcFormatLo)
- .addImm(AMDGPU::sub2)
- .addReg(SRsrcFormatHi)
- .addImm(AMDGPU::sub3);
- // Update the instruction to use NewVaddr
- MI->getOperand(VAddrIdx).setReg(NewVAddr);
- // Update the instruction to use NewSRsrc
- MI->getOperand(SRsrcIdx).setReg(NewSRsrc);
+ // Update the instruction to use NewVaddr
+ VAddr->setReg(NewVAddr);
+ // Update the instruction to use NewSRsrc
+ SRsrc->setReg(NewSRsrc);
+ }
+}
+
+void SIInstrInfo::splitSMRD(MachineInstr *MI,
+ const TargetRegisterClass *HalfRC,
+ unsigned HalfImmOp, unsigned HalfSGPROp,
+ MachineInstr *&Lo, MachineInstr *&Hi) const {
+
+ DebugLoc DL = MI->getDebugLoc();
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ unsigned RegLo = MRI.createVirtualRegister(HalfRC);
+ unsigned RegHi = MRI.createVirtualRegister(HalfRC);
+ unsigned HalfSize = HalfRC->getSize();
+ const MachineOperand *OffOp =
+ getNamedOperand(*MI, AMDGPU::OpName::offset);
+ const MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase);
+
+ // The SMRD has an 8-bit offset in dwords on SI and a 20-bit offset in bytes
+ // on VI.
+ if (OffOp) {
+ bool isVI = RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
+ unsigned OffScale = isVI ? 1 : 4;
+ // Handle the _IMM variant
+ unsigned LoOffset = OffOp->getImm() * OffScale;
+ unsigned HiOffset = LoOffset + HalfSize;
+ Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo)
+ .addOperand(*SBase)
+ .addImm(LoOffset / OffScale);
+
+ if (!isUInt<20>(HiOffset) || (!isVI && !isUInt<8>(HiOffset / OffScale))) {
+ unsigned OffsetSGPR =
+ MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR)
+ .addImm(HiOffset); // The offset in register is in bytes.
+ Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi)
+ .addOperand(*SBase)
+ .addReg(OffsetSGPR);
+ } else {
+ Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi)
+ .addOperand(*SBase)
+ .addImm(HiOffset / OffScale);
}
+ } else {
+ // Handle the _SGPR variant
+ MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff);
+ Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo)
+ .addOperand(*SBase)
+ .addOperand(*SOff);
+ unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR)
+ .addOperand(*SOff)
+ .addImm(HalfSize);
+ Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp))
+ .addOperand(*SBase)
+ .addReg(OffsetSGPR);
+ }
+
+ unsigned SubLo, SubHi;
+ switch (HalfSize) {
+ case 4:
+ SubLo = AMDGPU::sub0;
+ SubHi = AMDGPU::sub1;
+ break;
+ case 8:
+ SubLo = AMDGPU::sub0_sub1;
+ SubHi = AMDGPU::sub2_sub3;
+ break;
+ case 16:
+ SubLo = AMDGPU::sub0_sub1_sub2_sub3;
+ SubHi = AMDGPU::sub4_sub5_sub6_sub7;
+ break;
+ case 32:
+ SubLo = AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
+ SubHi = AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15;
+ break;
+ default:
+ llvm_unreachable("Unhandled HalfSize");
}
+
+ BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE))
+ .addOperand(MI->getOperand(0))
+ .addReg(RegLo)
+ .addImm(SubLo)
+ .addReg(RegHi)
+ .addImm(SubHi);
}
void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const {
@@ -1155,7 +1857,7 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
case AMDGPU::S_LOAD_DWORDX2_IMM:
case AMDGPU::S_LOAD_DWORDX2_SGPR:
case AMDGPU::S_LOAD_DWORDX4_IMM:
- case AMDGPU::S_LOAD_DWORDX4_SGPR:
+ case AMDGPU::S_LOAD_DWORDX4_SGPR: {
unsigned NewOpcode = getVALUOp(*MI);
unsigned RegOffset;
unsigned ImmOffset;
@@ -1165,10 +1867,13 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
ImmOffset = 0;
} else {
assert(MI->getOperand(2).isImm());
- // SMRD instructions take a dword offsets and MUBUF instructions
- // take a byte offset.
- ImmOffset = MI->getOperand(2).getImm() << 2;
+ // SMRD instructions take a dword offsets on SI and byte offset on VI
+ // and MUBUF instructions always take a byte offset.
+ ImmOffset = MI->getOperand(2).getImm();
+ if (RI.ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
+ ImmOffset <<= 2;
RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+
if (isUInt<12>(ImmOffset)) {
BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
RegOffset)
@@ -1186,13 +1891,14 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1)
.addImm(0);
BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2)
- .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
+ .addImm(RsrcDataFormat & 0xFFFFFFFF);
BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3)
- .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
+ .addImm(RsrcDataFormat >> 32);
BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc)
.addReg(DWord0)
.addImm(AMDGPU::sub0)
@@ -1202,14 +1908,44 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
.addImm(AMDGPU::sub2)
.addReg(DWord3)
.addImm(AMDGPU::sub3);
- MI->setDesc(get(NewOpcode));
- if (MI->getOperand(2).isReg()) {
- MI->getOperand(2).setReg(MI->getOperand(1).getReg());
- } else {
- MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false);
- }
- MI->getOperand(1).setReg(SRsrc);
- MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
+ MI->setDesc(get(NewOpcode));
+ if (MI->getOperand(2).isReg()) {
+ MI->getOperand(2).setReg(MI->getOperand(1).getReg());
+ } else {
+ MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false);
+ }
+ MI->getOperand(1).setReg(SRsrc);
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
+
+ const TargetRegisterClass *NewDstRC =
+ RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass);
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
+ MRI.replaceRegWith(DstReg, NewDstReg);
+ break;
+ }
+ case AMDGPU::S_LOAD_DWORDX8_IMM:
+ case AMDGPU::S_LOAD_DWORDX8_SGPR: {
+ MachineInstr *Lo, *Hi;
+ splitSMRD(MI, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM,
+ AMDGPU::S_LOAD_DWORDX4_SGPR, Lo, Hi);
+ MI->eraseFromParent();
+ moveSMRDToVALU(Lo, MRI);
+ moveSMRDToVALU(Hi, MRI);
+ break;
+ }
+
+ case AMDGPU::S_LOAD_DWORDX16_IMM:
+ case AMDGPU::S_LOAD_DWORDX16_SGPR: {
+ MachineInstr *Lo, *Hi;
+ splitSMRD(MI, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM,
+ AMDGPU::S_LOAD_DWORDX8_SGPR, Lo, Hi);
+ MI->eraseFromParent();
+ moveSMRDToVALU(Lo, MRI);
+ moveSMRDToVALU(Hi, MRI);
+ break;
+ }
}
}
@@ -1281,8 +2017,32 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
Inst->eraseFromParent();
continue;
+ case AMDGPU::S_BFE_I64: {
+ splitScalar64BitBFE(Worklist, Inst);
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ case AMDGPU::S_LSHL_B32:
+ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
+ swapOperands(Inst);
+ }
+ break;
+ case AMDGPU::S_ASHR_I32:
+ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
+ swapOperands(Inst);
+ }
+ break;
+ case AMDGPU::S_LSHR_B32:
+ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
+ swapOperands(Inst);
+ }
+ break;
+
case AMDGPU::S_BFE_U64:
- case AMDGPU::S_BFE_I64:
case AMDGPU::S_BFM_B64:
llvm_unreachable("Moving this op to VALU not implemented");
}
@@ -1311,17 +2071,9 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
// We are converting these to a BFE, so we need to add the missing
// operands for the size and offset.
unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
- Inst->addOperand(Inst->getOperand(1));
- Inst->getOperand(1).ChangeToImmediate(0);
- Inst->addOperand(MachineOperand::CreateImm(0));
- Inst->addOperand(MachineOperand::CreateImm(0));
Inst->addOperand(MachineOperand::CreateImm(0));
Inst->addOperand(MachineOperand::CreateImm(Size));
- // XXX - Other pointless operands. There are 4, but it seems you only need
- // 3 to not hit an assertion later in MCInstLower.
- Inst->addOperand(MachineOperand::CreateImm(0));
- Inst->addOperand(MachineOperand::CreateImm(0));
} else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
// The VALU version adds the second operand to the result, so insert an
// extra 0 operand.
@@ -1340,16 +2092,9 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
-
Inst->RemoveOperand(2); // Remove old immediate.
- Inst->addOperand(Inst->getOperand(1));
- Inst->getOperand(1).ChangeToImmediate(0);
- Inst->addOperand(MachineOperand::CreateImm(0));
Inst->addOperand(MachineOperand::CreateImm(Offset));
- Inst->addOperand(MachineOperand::CreateImm(0));
Inst->addOperand(MachineOperand::CreateImm(BitWidth));
- Inst->addOperand(MachineOperand::CreateImm(0));
- Inst->addOperand(MachineOperand::CreateImm(0));
}
// Update the destination register class.
@@ -1403,7 +2148,7 @@ unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
}
const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
- return &AMDGPU::VReg_32RegClass;
+ return &AMDGPU::VGPR_32RegClass;
}
void SIInstrInfo::splitScalar64BitUnaryOp(
@@ -1562,6 +2307,67 @@ void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist
Worklist.push_back(Second);
}
+void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst) const {
+ MachineBasicBlock &MBB = *Inst->getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineBasicBlock::iterator MII = Inst;
+ DebugLoc DL = Inst->getDebugLoc();
+
+ MachineOperand &Dest = Inst->getOperand(0);
+ uint32_t Imm = Inst->getOperand(2).getImm();
+ uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
+ uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
+
+ (void) Offset;
+
+ // Only sext_inreg cases handled.
+ assert(Inst->getOpcode() == AMDGPU::S_BFE_I64 &&
+ BitWidth <= 32 &&
+ Offset == 0 &&
+ "Not implemented");
+
+ if (BitWidth < 32) {
+ unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
+ .addReg(Inst->getOperand(1).getReg(), 0, AMDGPU::sub0)
+ .addImm(0)
+ .addImm(BitWidth);
+
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
+ .addImm(31)
+ .addReg(MidRegLo);
+
+ BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
+ .addReg(MidRegLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(MidRegHi)
+ .addImm(AMDGPU::sub1);
+
+ MRI.replaceRegWith(Dest.getReg(), ResultReg);
+ return;
+ }
+
+ MachineOperand &Src = Inst->getOperand(1);
+ unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
+ .addImm(31)
+ .addReg(Src.getReg(), 0, AMDGPU::sub0);
+
+ BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
+ .addReg(Src.getReg(), 0, AMDGPU::sub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(TmpReg)
+ .addImm(AMDGPU::sub1);
+
+ MRI.replaceRegWith(Dest.getReg(), ResultReg);
+}
+
void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
MachineInstr *Inst) const {
// Add the implict and explicit register definitions.
@@ -1580,13 +2386,81 @@ void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
}
}
+unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
+ int OpIndices[3]) const {
+ const MCInstrDesc &Desc = get(MI->getOpcode());
+
+ // Find the one SGPR operand we are allowed to use.
+ unsigned SGPRReg = AMDGPU::NoRegister;
+
+ // First we need to consider the instruction's operand requirements before
+ // legalizing. Some operands are required to be SGPRs, such as implicit uses
+ // of VCC, but we are still bound by the constant bus requirement to only use
+ // one.
+ //
+ // If the operand's class is an SGPR, we can never move it.
+
+ for (const MachineOperand &MO : MI->implicit_operands()) {
+ // We only care about reads.
+ if (MO.isDef())
+ continue;
+
+ if (MO.getReg() == AMDGPU::VCC)
+ return AMDGPU::VCC;
+
+ if (MO.getReg() == AMDGPU::FLAT_SCR)
+ return AMDGPU::FLAT_SCR;
+ }
+
+ unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+ for (unsigned i = 0; i < 3; ++i) {
+ int Idx = OpIndices[i];
+ if (Idx == -1)
+ break;
+
+ const MachineOperand &MO = MI->getOperand(Idx);
+ if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass))
+ SGPRReg = MO.getReg();
+
+ if (MO.isReg() && RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
+ UsedSGPRs[i] = MO.getReg();
+ }
+
+ if (SGPRReg != AMDGPU::NoRegister)
+ return SGPRReg;
+
+ // We don't have a required SGPR operand, so we have a bit more freedom in
+ // selecting operands to move.
+
+ // Try to select the most used SGPR. If an SGPR is equal to one of the
+ // others, we choose that.
+ //
+ // e.g.
+ // V_FMA_F32 v0, s0, s0, s0 -> No moves
+ // V_FMA_F32 v0, s0, s1, s0 -> Move s1
+
+ if (UsedSGPRs[0] != AMDGPU::NoRegister) {
+ if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
+ SGPRReg = UsedSGPRs[0];
+ }
+
+ if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
+ if (UsedSGPRs[1] == UsedSGPRs[2])
+ SGPRReg = UsedSGPRs[1];
+ }
+
+ return SGPRReg;
+}
+
MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned ValueReg,
unsigned Address, unsigned OffsetReg) const {
const DebugLoc &DL = MBB->findDebugLoc(I);
- unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
+ unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister(
getIndirectIndexBegin(*MBB->getParent()));
return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1))
@@ -1604,7 +2478,7 @@ MachineInstrBuilder SIInstrInfo::buildIndirectRead(
unsigned ValueReg,
unsigned Address, unsigned OffsetReg) const {
const DebugLoc &DL = MBB->findDebugLoc(I);
- unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
+ unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister(
getIndirectIndexBegin(*MBB->getParent()));
return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC))
@@ -1626,7 +2500,7 @@ void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
for (int Index = Begin; Index <= End; ++Index)
- Reserved.set(AMDGPU::VReg_32RegClass.getRegister(Index));
+ Reserved.set(AMDGPU::VGPR_32RegClass.getRegister(Index));
for (int Index = std::max(0, Begin - 1); Index <= End; ++Index)
Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
@@ -1644,11 +2518,19 @@ void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
}
-const MachineOperand *SIInstrInfo::getNamedOperand(const MachineInstr& MI,
- unsigned OperandName) const {
+MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
+ unsigned OperandName) const {
int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
if (Idx == -1)
return nullptr;
return &MI.getOperand(Idx);
}
+
+uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
+ uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
+ if (ST.isAmdHsaOS())
+ RsrcDataFormat |= (1ULL << 56);
+
+ return RsrcDataFormat;
+}
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 4687539fdf58..f766dc85e86a 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -13,10 +13,11 @@
//===----------------------------------------------------------------------===//
-#ifndef SIINSTRINFO_H
-#define SIINSTRINFO_H
+#ifndef LLVM_LIB_TARGET_R600_SIINSTRINFO_H
+#define LLVM_LIB_TARGET_R600_SIINSTRINFO_H
#include "AMDGPUInstrInfo.h"
+#include "SIDefines.h"
#include "SIRegisterInfo.h"
namespace llvm {
@@ -44,6 +45,8 @@ private:
const TargetRegisterClass *RC,
const MachineOperand &Op) const;
+ void swapOperands(MachineBasicBlock::iterator Inst) const;
+
void splitScalar64BitUnaryOp(SmallVectorImpl<MachineInstr *> &Worklist,
MachineInstr *Inst, unsigned Opcode) const;
@@ -52,9 +55,16 @@ private:
void splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
MachineInstr *Inst) const;
+ void splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst) const;
void addDescImplicitUseDef(const MCInstrDesc &Desc, MachineInstr *MI) const;
+ bool checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
+ MachineInstr *MIb) const;
+
+ unsigned findUsedSGPR(const MachineInstr *MI, int OpIndices[3]) const;
+
public:
explicit SIInstrInfo(const AMDGPUSubtarget &st);
@@ -62,11 +72,30 @@ public:
return RI;
}
+ bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1,
+ int64_t &Offset2) const override;
+
+ bool getLdStBaseRegImmOfs(MachineInstr *LdSt,
+ unsigned &BaseReg, unsigned &Offset,
+ const TargetRegisterInfo *TRI) const final;
+
+ bool shouldClusterLoads(MachineInstr *FirstLdSt,
+ MachineInstr *SecondLdSt,
+ unsigned NumLoads) const final;
+
void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
+ unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ RegScavenger *RS,
+ unsigned TmpReg,
+ unsigned Offset,
+ unsigned Size) const;
+
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIndex,
@@ -79,29 +108,102 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
- virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+ bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
+ // \brief Returns an opcode that can be used to move a value to a \p DstRC
+ // register. If there is no hardware instruction that can store to \p
+ // DstRC, then AMDGPU::COPY is returned.
+ unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
unsigned commuteOpcode(unsigned Opcode) const;
MachineInstr *commuteInstruction(MachineInstr *MI,
- bool NewMI=false) const override;
+ bool NewMI = false) const override;
+ bool findCommutedOpIndices(MachineInstr *MI,
+ unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const override;
bool isTriviallyReMaterializable(const MachineInstr *MI,
AliasAnalysis *AA = nullptr) const;
+ bool areMemAccessesTriviallyDisjoint(
+ MachineInstr *MIa, MachineInstr *MIb,
+ AliasAnalysis *AA = nullptr) const override;
+
MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned DstReg, unsigned SrcReg) const override;
bool isMov(unsigned Opcode) const override;
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override;
- bool isDS(uint16_t Opcode) const;
- int isMIMG(uint16_t Opcode) const;
- int isSMRD(uint16_t Opcode) const;
- bool isVOP1(uint16_t Opcode) const;
- bool isVOP2(uint16_t Opcode) const;
- bool isVOP3(uint16_t Opcode) const;
- bool isVOPC(uint16_t Opcode) const;
+
+ bool isSALU(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::SALU;
+ }
+
+ bool isVALU(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VALU;
+ }
+
+ bool isSOP1(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::SOP1;
+ }
+
+ bool isSOP2(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::SOP2;
+ }
+
+ bool isSOPC(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::SOPC;
+ }
+
+ bool isSOPK(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::SOPK;
+ }
+
+ bool isSOPP(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::SOPP;
+ }
+
+ bool isVOP1(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOP1;
+ }
+
+ bool isVOP2(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOP2;
+ }
+
+ bool isVOP3(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOP3;
+ }
+
+ bool isVOPC(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOPC;
+ }
+
+ bool isMUBUF(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
+ }
+
+ bool isMTBUF(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
+ }
+
+ bool isSMRD(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::SMRD;
+ }
+
+ bool isDS(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::DS;
+ }
+
+ bool isMIMG(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::MIMG;
+ }
+
+ bool isFLAT(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::FLAT;
+ }
+
bool isInlineConstant(const APInt &Imm) const;
bool isInlineConstant(const MachineOperand &MO) const;
bool isLiteralConstant(const MachineOperand &MO) const;
@@ -109,14 +211,28 @@ public:
bool isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
const MachineOperand &MO) const;
+ /// \brief Return true if the given offset Size in bytes can be folded into
+ /// the immediate offsets of a memory instruction for the given address space.
+ bool canFoldOffset(unsigned OffsetSize, unsigned AS) const;
+
/// \brief Return true if this 64-bit VALU instruction has a 32-bit encoding.
/// This function will return false if you pass it a 32-bit instruction.
bool hasVALU32BitEncoding(unsigned Opcode) const;
+ /// \brief Returns true if this operand uses the constant bus.
+ bool usesConstantBus(const MachineRegisterInfo &MRI,
+ const MachineOperand &MO) const;
+
+ /// \brief Return true if this instruction has any modifiers.
+ /// e.g. src[012]_mod, omod, clamp.
+ bool hasModifiers(unsigned Opcode) const;
+
+ bool hasModifiersSet(const MachineInstr &MI,
+ unsigned OpName) const;
+
bool verifyInstruction(const MachineInstr *MI,
StringRef &ErrInfo) const override;
- bool isSALUInstr(const MachineInstr &MI) const;
static unsigned getVALUOp(const MachineInstr &MI);
bool isSALUOpSupportedOnVALU(const MachineInstr &MI) const;
@@ -144,10 +260,21 @@ public:
/// instead of MOV.
void legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const;
+ /// \brief Check if \p MO is a legal operand if it was the \p OpIdx Operand
+ /// for \p MI.
+ bool isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
+ const MachineOperand *MO = nullptr) const;
+
/// \brief Legalize all operands in this instruction. This function may
/// create new instruction and insert them before \p MI.
void legalizeOperands(MachineInstr *MI) const;
+ /// \brief Split an SMRD instruction into two smaller loads of half the
+ // size storing the results in \p Lo and \p Hi.
+ void splitSMRD(MachineInstr *MI, const TargetRegisterClass *HalfRC,
+ unsigned HalfImmOp, unsigned HalfSGPROp,
+ MachineInstr *&Lo, MachineInstr *&Hi) const;
+
void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const;
/// \brief Replace this instruction's opcode with the equivalent VALU
@@ -181,8 +308,15 @@ public:
/// \brief Returns the operand named \p Op. If \p MI does not have an
/// operand named \c Op, this function returns nullptr.
- const MachineOperand *getNamedOperand(const MachineInstr& MI,
- unsigned OperandName) const;
+ MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const;
+
+ const MachineOperand *getNamedOperand(const MachineInstr &MI,
+ unsigned OpName) const {
+ return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
+ }
+
+ uint64_t getDefaultRsrcDataFormat() const;
+
};
namespace AMDGPU {
@@ -192,21 +326,34 @@ namespace AMDGPU {
int getCommuteRev(uint16_t Opcode);
int getCommuteOrig(uint16_t Opcode);
int getMCOpcode(uint16_t Opcode, unsigned Gen);
+ int getAddr64Inst(uint16_t Opcode);
+ int getAtomicRetOp(uint16_t Opcode);
+ int getAtomicNoRetOp(uint16_t Opcode);
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
const uint64_t RSRC_TID_ENABLE = 1LL << 55;
} // End namespace AMDGPU
-} // End namespace llvm
+namespace SI {
+namespace KernelInputOffsets {
+
+/// Offsets in bytes from the start of the input buffer
+enum Offsets {
+ NGROUPS_X = 0,
+ NGROUPS_Y = 4,
+ NGROUPS_Z = 8,
+ GLOBAL_SIZE_X = 12,
+ GLOBAL_SIZE_Y = 16,
+ GLOBAL_SIZE_Z = 20,
+ LOCAL_SIZE_X = 24,
+ LOCAL_SIZE_Y = 28,
+ LOCAL_SIZE_Z = 32
+};
+
+} // End namespace KernelInputOffsets
+} // End namespace SI
-namespace SIInstrFlags {
- enum Flags {
- // First 4 bits are the instruction encoding
- VM_CNT = 1 << 0,
- EXP_CNT = 1 << 1,
- LGKM_CNT = 1 << 2
- };
-}
+} // End namespace llvm
-#endif //SIINSTRINFO_H
+#endif
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index b0ac20f558d0..7cc9588c8e4b 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -7,11 +7,61 @@
//
//===----------------------------------------------------------------------===//
+class vop {
+ field bits<9> SI3;
+ field bits<10> VI3;
+}
+
+class vopc <bits<8> si, bits<8> vi = !add(0x40, si)> : vop {
+ field bits<8> SI = si;
+ field bits<8> VI = vi;
+
+ field bits<9> SI3 = {0, si{7-0}};
+ field bits<10> VI3 = {0, 0, vi{7-0}};
+}
+
+class vop1 <bits<8> si, bits<8> vi = si> : vop {
+ field bits<8> SI = si;
+ field bits<8> VI = vi;
+
+ field bits<9> SI3 = {1, 1, si{6-0}};
+ field bits<10> VI3 = !add(0x140, vi);
+}
+
+class vop2 <bits<6> si, bits<6> vi = si> : vop {
+ field bits<6> SI = si;
+ field bits<6> VI = vi;
+
+ field bits<9> SI3 = {1, 0, 0, si{5-0}};
+ field bits<10> VI3 = {0, 1, 0, 0, vi{5-0}};
+}
+
+class vop3 <bits<9> si, bits<10> vi = {0, si}> : vop {
+ let SI3 = si;
+ let VI3 = vi;
+}
+
+class sop1 <bits<8> si, bits<8> vi = si> {
+ field bits<8> SI = si;
+ field bits<8> VI = vi;
+}
+
+class sop2 <bits<7> si, bits<7> vi = si> {
+ field bits<7> SI = si;
+ field bits<7> VI = vi;
+}
+
+class sopk <bits<5> si, bits<5> vi = si> {
+ field bits<5> SI = si;
+ field bits<5> VI = vi;
+}
+
// Execpt for the NONE field, this must be kept in sync with the SISubtarget enum
// in AMDGPUMCInstLower.h
def SISubtarget {
int NONE = -1;
int SI = 0;
+ int VI = 1;
}
//===----------------------------------------------------------------------===//
@@ -105,6 +155,22 @@ def as_i32imm: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i32);
}]>;
+def as_i64imm: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i64);
+}]>;
+
+// Copied from the AArch64 backend:
+def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
+return CurDAG->getTargetConstant(
+ N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i32);
+}]>;
+
+// Copied from the AArch64 backend:
+def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
+return CurDAG->getTargetConstant(
+ N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i64);
+}]>;
+
def IMM8bit : PatLeaf <(imm),
[{return isUInt<8>(N->getZExtValue());}]
>;
@@ -117,6 +183,10 @@ def IMM16bit : PatLeaf <(imm),
[{return isUInt<16>(N->getZExtValue());}]
>;
+def IMM20bit : PatLeaf <(imm),
+ [{return isUInt<20>(N->getZExtValue());}]
+>;
+
def IMM32bit : PatLeaf <(imm),
[{return isUInt<32>(N->getZExtValue());}]
>;
@@ -130,13 +200,17 @@ class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
return isInlineImmediate(N);
}]>;
+class InlineFPImm <ValueType vt> : PatLeaf <(vt fpimm), [{
+ return isInlineImmediate(N);
+}]>;
+
class SGPRImm <dag frag> : PatLeaf<frag, [{
if (TM.getSubtarget<AMDGPUSubtarget>().getGeneration() <
AMDGPUSubtarget::SOUTHERN_ISLANDS) {
return false;
}
const SIRegisterInfo *SIRI =
- static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
+ static_cast<const SIRegisterInfo*>(TM.getSubtargetImpl()->getRegisterInfo());
for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
U != E; ++U) {
if (SIRI->isSGPRClass(getOperandRegClass(*U, U.getOperandNo()))) {
@@ -159,13 +233,70 @@ def sopp_brtarget : Operand<OtherVT> {
let OperandType = "OPERAND_PCREL";
}
+include "SIInstrFormats.td"
+include "VIInstrFormats.td"
+
+let OperandType = "OPERAND_IMMEDIATE" in {
+
+def offen : Operand<i1> {
+ let PrintMethod = "printOffen";
+}
+def idxen : Operand<i1> {
+ let PrintMethod = "printIdxen";
+}
+def addr64 : Operand<i1> {
+ let PrintMethod = "printAddr64";
+}
+def mbuf_offset : Operand<i16> {
+ let PrintMethod = "printMBUFOffset";
+}
+def ds_offset : Operand<i16> {
+ let PrintMethod = "printDSOffset";
+}
+def ds_offset0 : Operand<i8> {
+ let PrintMethod = "printDSOffset0";
+}
+def ds_offset1 : Operand<i8> {
+ let PrintMethod = "printDSOffset1";
+}
+def glc : Operand <i1> {
+ let PrintMethod = "printGLC";
+}
+def slc : Operand <i1> {
+ let PrintMethod = "printSLC";
+}
+def tfe : Operand <i1> {
+ let PrintMethod = "printTFE";
+}
+
+def omod : Operand <i32> {
+ let PrintMethod = "printOModSI";
+}
+
+def ClampMod : Operand <i1> {
+ let PrintMethod = "printClampSI";
+}
+
+} // End OperandType = "OPERAND_IMMEDIATE"
+
//===----------------------------------------------------------------------===//
// Complex patterns
//===----------------------------------------------------------------------===//
+def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">;
+def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
+
def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
def MUBUFAddr64 : ComplexPattern<i64, 3, "SelectMUBUFAddr64">;
+def MUBUFAddr64Atomic : ComplexPattern<i64, 4, "SelectMUBUFAddr64">;
def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
+def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
+def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
+
+def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
+def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
+def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">;
+def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
//===----------------------------------------------------------------------===//
// SI assembler operands
@@ -174,9 +305,20 @@ def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
def SIOperand {
int ZERO = 0x80;
int VCC = 0x6A;
+ int FLAT_SCR = 0x68;
}
-include "SIInstrFormats.td"
+def SRCMODS {
+ int NONE = 0;
+}
+
+def DSTCLAMP {
+ int NONE = 0;
+}
+
+def DSTOMOD {
+ int NONE = 0;
+}
//===----------------------------------------------------------------------===//
//
@@ -194,43 +336,175 @@ include "SIInstrFormats.td"
//
//===----------------------------------------------------------------------===//
+class SIMCInstr <string pseudo, int subtarget> {
+ string PseudoInstr = pseudo;
+ int Subtarget = subtarget;
+}
+
+//===----------------------------------------------------------------------===//
+// EXP classes
+//===----------------------------------------------------------------------===//
+
+class EXPCommon : InstSI<
+ (outs),
+ (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
+ VGPR_32:$src0, VGPR_32:$src1, VGPR_32:$src2, VGPR_32:$src3),
+ "exp $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
+ [] > {
+
+ let EXP_CNT = 1;
+ let Uses = [EXEC];
+}
+
+multiclass EXP_m {
+
+ let isPseudo = 1 in {
+ def "" : EXPCommon, SIMCInstr <"exp", SISubtarget.NONE> ;
+ }
+
+ def _si : EXPCommon, SIMCInstr <"exp", SISubtarget.SI>, EXPe;
+
+ def _vi : EXPCommon, SIMCInstr <"exp", SISubtarget.VI>, EXPe_vi;
+}
+
//===----------------------------------------------------------------------===//
// Scalar classes
//===----------------------------------------------------------------------===//
-class SOP1_32 <bits<8> op, string opName, list<dag> pattern> : SOP1 <
- op, (outs SReg_32:$dst), (ins SSrc_32:$src0),
- opName#" $dst, $src0", pattern
->;
+class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
+ SOP1 <outs, ins, "", pattern>,
+ SIMCInstr<opName, SISubtarget.NONE> {
+ let isPseudo = 1;
+}
-class SOP1_64 <bits<8> op, string opName, list<dag> pattern> : SOP1 <
- op, (outs SReg_64:$dst), (ins SSrc_64:$src0),
- opName#" $dst, $src0", pattern
->;
+class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm,
+ list<dag> pattern> :
+ SOP1 <outs, ins, asm, pattern>,
+ SOP1e <op.SI>,
+ SIMCInstr<opName, SISubtarget.SI>;
+
+class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm,
+ list<dag> pattern> :
+ SOP1 <outs, ins, asm, pattern>,
+ SOP1e <op.VI>,
+ SIMCInstr<opName, SISubtarget.VI>;
+
+multiclass SOP1_32 <sop1 op, string opName, list<dag> pattern> {
+ def "" : SOP1_Pseudo <opName, (outs SReg_32:$dst), (ins SSrc_32:$src0),
+ pattern>;
+
+ def _si : SOP1_Real_si <op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0),
+ opName#" $dst, $src0", pattern>;
+
+ def _vi : SOP1_Real_vi <op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0),
+ opName#" $dst, $src0", pattern>;
+}
+
+multiclass SOP1_64 <sop1 op, string opName, list<dag> pattern> {
+ def "" : SOP1_Pseudo <opName, (outs SReg_64:$dst), (ins SSrc_64:$src0),
+ pattern>;
+
+ def _si : SOP1_Real_si <op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0),
+ opName#" $dst, $src0", pattern>;
+
+ def _vi : SOP1_Real_vi <op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0),
+ opName#" $dst, $src0", pattern>;
+}
+
+// no input, 64-bit output.
+multiclass SOP1_64_0 <sop1 op, string opName, list<dag> pattern> {
+ def "" : SOP1_Pseudo <opName, (outs SReg_64:$dst), (ins), pattern>;
+
+ def _si : SOP1_Real_si <op, opName, (outs SReg_64:$dst), (ins),
+ opName#" $dst", pattern> {
+ let SSRC0 = 0;
+ }
+
+ def _vi : SOP1_Real_vi <op, opName, (outs SReg_64:$dst), (ins),
+ opName#" $dst", pattern> {
+ let SSRC0 = 0;
+ }
+}
// 64-bit input, 32-bit output.
-class SOP1_32_64 <bits<8> op, string opName, list<dag> pattern> : SOP1 <
- op, (outs SReg_32:$dst), (ins SSrc_64:$src0),
- opName#" $dst, $src0", pattern
->;
+multiclass SOP1_32_64 <sop1 op, string opName, list<dag> pattern> {
+ def "" : SOP1_Pseudo <opName, (outs SReg_32:$dst), (ins SSrc_64:$src0),
+ pattern>;
-class SOP2_32 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
- op, (outs SReg_32:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
- opName#" $dst, $src0, $src1", pattern
->;
+ def _si : SOP1_Real_si <op, opName, (outs SReg_32:$dst), (ins SSrc_64:$src0),
+ opName#" $dst, $src0", pattern>;
-class SOP2_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
- op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_64:$src1),
- opName#" $dst, $src0, $src1", pattern
->;
+ def _vi : SOP1_Real_vi <op, opName, (outs SReg_32:$dst), (ins SSrc_64:$src0),
+ opName#" $dst, $src0", pattern>;
+}
-class SOP2_SHIFT_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
- op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_32:$src1),
- opName#" $dst, $src0, $src1", pattern
->;
+class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> :
+ SOP2<outs, ins, "", pattern>,
+ SIMCInstr<opName, SISubtarget.NONE> {
+ let isPseudo = 1;
+ let Size = 4;
+}
+
+class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm,
+ list<dag> pattern> :
+ SOP2<outs, ins, asm, pattern>,
+ SOP2e<op.SI>,
+ SIMCInstr<opName, SISubtarget.SI>;
+
+class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm,
+ list<dag> pattern> :
+ SOP2<outs, ins, asm, pattern>,
+ SOP2e<op.VI>,
+ SIMCInstr<opName, SISubtarget.VI>;
+
+multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> {
+ def "" : SOP2_Pseudo <opName, (outs SReg_32:$dst),
+ (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc), pattern>;
+
+ def _si : SOP2_Real_si <op, opName, (outs SReg_32:$dst),
+ (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc),
+ opName#" $dst, $src0, $src1 [$scc]", pattern>;
+
+ def _vi : SOP2_Real_vi <op, opName, (outs SReg_32:$dst),
+ (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc),
+ opName#" $dst, $src0, $src1 [$scc]", pattern>;
+}
+
+multiclass SOP2_32 <sop2 op, string opName, list<dag> pattern> {
+ def "" : SOP2_Pseudo <opName, (outs SReg_32:$dst),
+ (ins SSrc_32:$src0, SSrc_32:$src1), pattern>;
+ def _si : SOP2_Real_si <op, opName, (outs SReg_32:$dst),
+ (ins SSrc_32:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1", pattern>;
-class SOPC_Helper <bits<7> op, RegisterClass rc, ValueType vt,
+ def _vi : SOP2_Real_vi <op, opName, (outs SReg_32:$dst),
+ (ins SSrc_32:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1", pattern>;
+}
+
+multiclass SOP2_64 <sop2 op, string opName, list<dag> pattern> {
+ def "" : SOP2_Pseudo <opName, (outs SReg_64:$dst),
+ (ins SSrc_64:$src0, SSrc_64:$src1), pattern>;
+
+ def _si : SOP2_Real_si <op, opName, (outs SReg_64:$dst),
+ (ins SSrc_64:$src0, SSrc_64:$src1), opName#" $dst, $src0, $src1", pattern>;
+
+ def _vi : SOP2_Real_vi <op, opName, (outs SReg_64:$dst),
+ (ins SSrc_64:$src0, SSrc_64:$src1), opName#" $dst, $src0, $src1", pattern>;
+}
+
+multiclass SOP2_64_32 <sop2 op, string opName, list<dag> pattern> {
+ def "" : SOP2_Pseudo <opName, (outs SReg_64:$dst),
+ (ins SSrc_64:$src0, SSrc_32:$src1), pattern>;
+
+ def _si : SOP2_Real_si <op, opName, (outs SReg_64:$dst),
+ (ins SSrc_64:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1", pattern>;
+
+ def _vi : SOP2_Real_vi <op, opName, (outs SReg_64:$dst),
+ (ins SSrc_64:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1", pattern>;
+}
+
+
+class SOPC_Helper <bits<7> op, RegisterOperand rc, ValueType vt,
string opName, PatLeaf cond> : SOPC <
op, (outs SCCReg:$dst), (ins rc:$src0, rc:$src1),
opName#" $dst, $src0, $src1", []>;
@@ -241,28 +515,90 @@ class SOPC_32<bits<7> op, string opName, PatLeaf cond = COND_NULL>
class SOPC_64<bits<7> op, string opName, PatLeaf cond = COND_NULL>
: SOPC_Helper<op, SSrc_64, i64, opName, cond>;
-class SOPK_32 <bits<5> op, string opName, list<dag> pattern> : SOPK <
- op, (outs SReg_32:$dst), (ins i16imm:$src0),
- opName#" $dst, $src0", pattern
->;
+class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
+ SOPK <outs, ins, "", pattern>,
+ SIMCInstr<opName, SISubtarget.NONE> {
+ let isPseudo = 1;
+}
-class SOPK_64 <bits<5> op, string opName, list<dag> pattern> : SOPK <
- op, (outs SReg_64:$dst), (ins i16imm:$src0),
- opName#" $dst, $src0", pattern
->;
+class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm,
+ list<dag> pattern> :
+ SOPK <outs, ins, asm, pattern>,
+ SOPKe <op.SI>,
+ SIMCInstr<opName, SISubtarget.SI>;
+
+class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm,
+ list<dag> pattern> :
+ SOPK <outs, ins, asm, pattern>,
+ SOPKe <op.VI>,
+ SIMCInstr<opName, SISubtarget.VI>;
+
+multiclass SOPK_32 <sopk op, string opName, list<dag> pattern> {
+ def "" : SOPK_Pseudo <opName, (outs SReg_32:$dst), (ins u16imm:$src0),
+ pattern>;
+
+ def _si : SOPK_Real_si <op, opName, (outs SReg_32:$dst), (ins u16imm:$src0),
+ opName#" $dst, $src0", pattern>;
+
+ def _vi : SOPK_Real_vi <op, opName, (outs SReg_32:$dst), (ins u16imm:$src0),
+ opName#" $dst, $src0", pattern>;
+}
+
+multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> {
+ def "" : SOPK_Pseudo <opName, (outs SCCReg:$dst),
+ (ins SReg_32:$src0, u16imm:$src1), pattern>;
+
+ def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
+ (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0", pattern>;
+
+ def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
+ (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0", pattern>;
+}
+
+//===----------------------------------------------------------------------===//
+// SMRD classes
+//===----------------------------------------------------------------------===//
+
+class SMRD_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
+ SMRD <outs, ins, "", pattern>,
+ SIMCInstr<opName, SISubtarget.NONE> {
+ let isPseudo = 1;
+}
+
+class SMRD_Real_si <bits<5> op, string opName, bit imm, dag outs, dag ins,
+ string asm> :
+ SMRD <outs, ins, asm, []>,
+ SMRDe <op, imm>,
+ SIMCInstr<opName, SISubtarget.SI>;
-multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass,
+class SMRD_Real_vi <bits<8> op, string opName, bit imm, dag outs, dag ins,
+ string asm> :
+ SMRD <outs, ins, asm, []>,
+ SMEMe_vi <op, imm>,
+ SIMCInstr<opName, SISubtarget.VI>;
+
+multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins,
+ string asm, list<dag> pattern> {
+
+ def "" : SMRD_Pseudo <opName, outs, ins, pattern>;
+
+ def _si : SMRD_Real_si <op, opName, imm, outs, ins, asm>;
+
+ def _vi : SMRD_Real_vi <{0, 0, 0, op}, opName, imm, outs, ins, asm>;
+}
+
+multiclass SMRD_Helper <bits<5> op, string opName, RegisterClass baseClass,
RegisterClass dstClass> {
- def _IMM : SMRD <
- op, 1, (outs dstClass:$dst),
+ defm _IMM : SMRD_m <
+ op, opName#"_IMM", 1, (outs dstClass:$dst),
(ins baseClass:$sbase, u32imm:$offset),
- asm#" $dst, $sbase, $offset", []
+ opName#" $dst, $sbase, $offset", []
>;
- def _SGPR : SMRD <
- op, 0, (outs dstClass:$dst),
+ defm _SGPR : SMRD_m <
+ op, opName#"_SGPR", 0, (outs dstClass:$dst),
(ins baseClass:$sbase, SReg_32:$soff),
- asm#" $dst, $sbase, $soff", []
+ opName#" $dst, $sbase, $soff", []
>;
}
@@ -270,6 +606,210 @@ multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass,
// Vector ALU classes
//===----------------------------------------------------------------------===//
+// This must always be right before the operand being input modified.
+def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
+ let PrintMethod = "printOperandAndMods";
+}
+def InputModsNoDefault : Operand <i32> {
+ let PrintMethod = "printOperandAndMods";
+}
+
+class getNumSrcArgs<ValueType Src1, ValueType Src2> {
+ int ret =
+ !if (!eq(Src1.Value, untyped.Value), 1, // VOP1
+ !if (!eq(Src2.Value, untyped.Value), 2, // VOP2
+ 3)); // VOP3
+}
+
+// Returns the register class to use for the destination of VOP[123C]
+// instructions for the given VT.
+class getVALUDstForVT<ValueType VT> {
+ RegisterClass ret = !if(!eq(VT.Size, 32), VGPR_32,
+ !if(!eq(VT.Size, 64), VReg_64,
+ SReg_64)); // else VT == i1
+}
+
+// Returns the register class to use for source 0 of VOP[12C]
+// instructions for the given VT.
+class getVOPSrc0ForVT<ValueType VT> {
+ RegisterOperand ret = !if(!eq(VT.Size, 32), VSrc_32, VSrc_64);
+}
+
+// Returns the register class to use for source 1 of VOP[12C] for the
+// given VT.
+class getVOPSrc1ForVT<ValueType VT> {
+ RegisterClass ret = !if(!eq(VT.Size, 32), VGPR_32, VReg_64);
+}
+
+// Returns the register classes for the source arguments of a VOP[12C]
+// instruction for the given SrcVTs.
+class getInRC32 <list<ValueType> SrcVT> {
+ list<DAGOperand> ret = [
+ getVOPSrc0ForVT<SrcVT[0]>.ret,
+ getVOPSrc1ForVT<SrcVT[1]>.ret
+ ];
+}
+
+// Returns the register class to use for sources of VOP3 instructions for the
+// given VT.
+class getVOP3SrcForVT<ValueType VT> {
+ RegisterOperand ret = !if(!eq(VT.Size, 32), VCSrc_32, VCSrc_64);
+}
+
+// Returns the register classes for the source arguments of a VOP3
+// instruction for the given SrcVTs.
+class getInRC64 <list<ValueType> SrcVT> {
+ list<DAGOperand> ret = [
+ getVOP3SrcForVT<SrcVT[0]>.ret,
+ getVOP3SrcForVT<SrcVT[1]>.ret,
+ getVOP3SrcForVT<SrcVT[2]>.ret
+ ];
+}
+
+// Returns 1 if the source arguments have modifiers, 0 if they do not.
+class hasModifiers<ValueType SrcVT> {
+ bit ret = !if(!eq(SrcVT.Value, f32.Value), 1,
+ !if(!eq(SrcVT.Value, f64.Value), 1, 0));
+}
+
+// Returns the input arguments for VOP[12C] instructions for the given SrcVT.
+class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
+ dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1
+ !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2
+ (ins)));
+}
+
+// Returns the input arguments for VOP3 instructions for the given SrcVT.
+class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
+ RegisterOperand Src2RC, int NumSrcArgs,
+ bit HasModifiers> {
+
+ dag ret =
+ !if (!eq(NumSrcArgs, 1),
+ !if (!eq(HasModifiers, 1),
+ // VOP1 with modifiers
+ (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
+ ClampMod:$clamp, omod:$omod)
+ /* else */,
+ // VOP1 without modifiers
+ (ins Src0RC:$src0)
+ /* endif */ ),
+ !if (!eq(NumSrcArgs, 2),
+ !if (!eq(HasModifiers, 1),
+ // VOP 2 with modifiers
+ (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
+ InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
+ ClampMod:$clamp, omod:$omod)
+ /* else */,
+ // VOP2 without modifiers
+ (ins Src0RC:$src0, Src1RC:$src1)
+ /* endif */ )
+ /* NumSrcArgs == 3 */,
+ !if (!eq(HasModifiers, 1),
+ // VOP3 with modifiers
+ (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
+ InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
+ InputModsNoDefault:$src2_modifiers, Src2RC:$src2,
+ ClampMod:$clamp, omod:$omod)
+ /* else */,
+ // VOP3 without modifiers
+ (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)
+ /* endif */ )));
+}
+
+// Returns the assembly string for the inputs and outputs of a VOP[12C]
+// instruction. This does not add the _e32 suffix, so it can be reused
+// by getAsm64.
+class getAsm32 <int NumSrcArgs> {
+ string src1 = ", $src1";
+ string src2 = ", $src2";
+ string ret = " $dst, $src0"#
+ !if(!eq(NumSrcArgs, 1), "", src1)#
+ !if(!eq(NumSrcArgs, 3), src2, "");
+}
+
+// Returns the assembly string for the inputs and outputs of a VOP3
+// instruction.
+class getAsm64 <int NumSrcArgs, bit HasModifiers> {
+ string src0 = "$src0_modifiers,";
+ string src1 = !if(!eq(NumSrcArgs, 1), "",
+ !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
+ " $src1_modifiers,"));
+ string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
+ string ret =
+ !if(!eq(HasModifiers, 0),
+ getAsm32<NumSrcArgs>.ret,
+ " $dst, "#src0#src1#src2#"$clamp"#"$omod");
+}
+
+
+class VOPProfile <list<ValueType> _ArgVT> {
+
+ field list<ValueType> ArgVT = _ArgVT;
+
+ field ValueType DstVT = ArgVT[0];
+ field ValueType Src0VT = ArgVT[1];
+ field ValueType Src1VT = ArgVT[2];
+ field ValueType Src2VT = ArgVT[3];
+ field RegisterClass DstRC = getVALUDstForVT<DstVT>.ret;
+ field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
+ field RegisterClass Src1RC32 = getVOPSrc1ForVT<Src1VT>.ret;
+ field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
+ field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
+ field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
+
+ field int NumSrcArgs = getNumSrcArgs<Src1VT, Src2VT>.ret;
+ field bit HasModifiers = hasModifiers<Src0VT>.ret;
+
+ field dag Outs = (outs DstRC:$dst);
+
+ field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
+ field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
+ HasModifiers>.ret;
+
+ field string Asm32 = "_e32"#getAsm32<NumSrcArgs>.ret;
+ field string Asm64 = getAsm64<NumSrcArgs, HasModifiers>.ret;
+}
+
+def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
+def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
+def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
+def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>;
+def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>;
+def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>;
+def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
+def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
+def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
+
+def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
+def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>;
+def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>;
+def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
+def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
+def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
+def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> {
+ let Src0RC32 = VCSrc_32;
+}
+
+def VOP_I1_F32_I32 : VOPProfile <[i1, f32, i32, untyped]> {
+ let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
+ let Asm64 = " $dst, $src0_modifiers, $src1";
+}
+
+def VOP_I1_F64_I32 : VOPProfile <[i1, f64, i32, untyped]> {
+ let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
+ let Asm64 = " $dst, $src0_modifiers, $src1";
+}
+
+def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
+def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
+
+def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
+def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
+def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
+def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
+
+
class VOP <string opName> {
string OpName = opName;
}
@@ -279,399 +819,943 @@ class VOP2_REV <string revOp, bit isOrig> {
bit IsOrig = isOrig;
}
-class SIMCInstr <string pseudo, int subtarget> {
- string PseudoInstr = pseudo;
- int Subtarget = subtarget;
+class AtomicNoRet <string noRetOp, bit isRet> {
+ string NoRetOp = noRetOp;
+ bit IsRet = isRet;
+}
+
+class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
+ VOP1Common <outs, ins, "", pattern>,
+ VOP <opName>,
+ SIMCInstr <opName#"_e32", SISubtarget.NONE> {
+ let isPseudo = 1;
+}
+
+multiclass VOP1_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
+ string opName> {
+ def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
+
+ def _si : VOP1<op.SI, outs, ins, asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.SI>;
+ def _vi : VOP1<op.VI, outs, ins, asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.VI>;
+}
+
+class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
+ VOP2Common <outs, ins, "", pattern>,
+ VOP <opName>,
+ SIMCInstr<opName#"_e32", SISubtarget.NONE> {
+ let isPseudo = 1;
+}
+
+multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
+ string opName, string revOpSI, string revOpVI> {
+ def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
+ VOP2_REV<revOpSI#"_e32", !eq(revOpSI, opName)>;
+
+ def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
+ VOP2_REV<revOpSI#"_e32_si", !eq(revOpSI, opName)>,
+ SIMCInstr <opName#"_e32", SISubtarget.SI>;
+ def _vi : VOP2 <op.VI, outs, ins, opName#asm, []>,
+ VOP2_REV<revOpVI#"_e32_vi", !eq(revOpVI, opName)>,
+ SIMCInstr <opName#"_e32", SISubtarget.VI>;
+}
+
+class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> {
+
+ bits<2> src0_modifiers = !if(HasModifiers, ?, 0);
+ bits<2> src1_modifiers = !if(HasModifiers, !if(HasSrc1, ?, 0), 0);
+ bits<2> src2_modifiers = !if(HasModifiers, !if(HasSrc2, ? ,0) ,0);
+ bits<2> omod = !if(HasModifiers, ?, 0);
+ bits<1> clamp = !if(HasModifiers, ?, 0);
+ bits<9> src1 = !if(HasSrc1, ?, 0);
+ bits<9> src2 = !if(HasSrc2, ?, 0);
}
class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
VOP3Common <outs, ins, "", pattern>,
VOP <opName>,
- SIMCInstr<opName, SISubtarget.NONE> {
+ SIMCInstr<opName#"_e64", SISubtarget.NONE> {
let isPseudo = 1;
}
class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
- VOP3 <op, outs, ins, asm, []>,
- SIMCInstr<opName, SISubtarget.SI>;
+ VOP3Common <outs, ins, asm, []>,
+ VOP3e <op>,
+ SIMCInstr<opName#"_e64", SISubtarget.SI>;
+
+class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
+ VOP3Common <outs, ins, asm, []>,
+ VOP3e_vi <op>,
+ SIMCInstr <opName#"_e64", SISubtarget.VI>;
+
+// VI only instruction
+class VOP3_vi <bits<10> op, string opName, dag outs, dag ins, string asm,
+ list<dag> pattern, int NumSrcArgs, bit HasMods = 1> :
+ VOP3Common <outs, ins, asm, pattern>,
+ VOP <opName>,
+ VOP3e_vi <op>,
+ VOP3DisableFields<!if(!eq(NumSrcArgs, 1), 0, 1),
+ !if(!eq(NumSrcArgs, 2), 0, 1),
+ HasMods>;
-multiclass VOP3_m <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName> {
+multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
+ string opName, int NumSrcArgs, bit HasMods = 1> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
- def _si : VOP3_Real_si <op, outs, ins, asm, opName>;
-
+ def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
+ VOP3DisableFields<!if(!eq(NumSrcArgs, 1), 0, 1),
+ !if(!eq(NumSrcArgs, 2), 0, 1),
+ HasMods>;
+ def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>,
+ VOP3DisableFields<!if(!eq(NumSrcArgs, 1), 0, 1),
+ !if(!eq(NumSrcArgs, 2), 0, 1),
+ HasMods>;
}
-multiclass VOP3_1_m <bits<8> op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName> {
+// VOP3_m without source modifiers
+multiclass VOP3_m_nosrcmod <vop op, dag outs, dag ins, string asm, list<dag> pattern,
+ string opName, int NumSrcArgs, bit HasMods = 1> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
- let src1 = 0, src1_modifiers = 0, src2 = 0, src2_modifiers = 0 in {
-
- def _si : VOP3_Real_si <
- {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- outs, ins, asm, opName
- >;
-
- } // src1 = 0, src1_modifiers = 0, src2 = 0, src2_modifiers = 0
+ let src0_modifiers = 0,
+ src1_modifiers = 0,
+ src2_modifiers = 0 in {
+ def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>;
+ def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>;
+ }
}
-multiclass VOP3_2_m <bits<6> op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, string revOp> {
+multiclass VOP3_1_m <vop op, dag outs, dag ins, string asm,
+ list<dag> pattern, string opName, bit HasMods = 1> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
- let src2 = 0, src2_modifiers = 0 in {
+ def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
+ VOP3DisableFields<0, 0, HasMods>;
- def _si : VOP3_Real_si <
- {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- outs, ins, asm, opName>,
- VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
-
- } // src2 = 0, src2_modifiers = 0
+ def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>,
+ VOP3DisableFields<0, 0, HasMods>;
}
-// This must always be right before the operand being input modified.
-def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
- let PrintMethod = "printOperandAndMods";
+multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm,
+ list<dag> pattern, string opName, string revOpSI, string revOpVI,
+ bit HasMods = 1, bit UseFullOp = 0> {
+
+ def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
+ VOP2_REV<revOpSI#"_e64", !eq(revOpSI, opName)>;
+
+ def _si : VOP3_Real_si <op.SI3,
+ outs, ins, asm, opName>,
+ VOP2_REV<revOpSI#"_e64_si", !eq(revOpSI, opName)>,
+ VOP3DisableFields<1, 0, HasMods>;
+
+ def _vi : VOP3_Real_vi <op.VI3,
+ outs, ins, asm, opName>,
+ VOP2_REV<revOpVI#"_e64_vi", !eq(revOpVI, opName)>,
+ VOP3DisableFields<1, 0, HasMods>;
}
-multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
- string opName, list<dag> pattern> {
-
- def _e32 : VOP1 <
- op, (outs drc:$dst), (ins src:$src0),
- opName#"_e32 $dst, $src0", pattern
- >, VOP <opName>;
-
- defm _e64 : VOP3_1_m <
- op,
- (outs drc:$dst),
- (ins InputMods:$src0_modifiers, src:$src0, i32imm:$clamp, i32imm:$omod),
- opName#"_e64 $dst, $src0_modifiers, $clamp, $omod", [], opName>;
-}
-
-multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern>
- : VOP1_Helper <op, VReg_32, VSrc_32, opName, pattern>;
-
-multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern>
- : VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>;
-
-multiclass VOP1_32_64 <bits<8> op, string opName, list<dag> pattern>
- : VOP1_Helper <op, VReg_32, VSrc_64, opName, pattern>;
-
-multiclass VOP1_64_32 <bits<8> op, string opName, list<dag> pattern>
- : VOP1_Helper <op, VReg_64, VSrc_32, opName, pattern>;
-
-multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
- string opName, list<dag> pattern, string revOp> {
- def _e32 : VOP2 <
- op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1),
- opName#"_e32 $dst, $src0, $src1", pattern
- >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
-
- defm _e64 : VOP3_2_m <
- op,
- (outs vrc:$dst),
- (ins InputMods:$src0_modifiers, arc:$src0,
- InputMods:$src1_modifiers, arc:$src1,
- i32imm:$clamp, i32imm:$omod),
- opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", [],
- opName, revOp>;
-}
-
-multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern,
- string revOp = opName>
- : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern, revOp>;
-
-multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern,
- string revOp = opName>
- : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>;
-
-multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
- RegisterClass src0_rc, string revOp = opName> {
-
- def _e32 : VOP2 <
- op, (outs VReg_32:$dst), (ins src0_rc:$src0, VReg_32:$src1),
- opName#"_e32 $dst, $src0, $src1", pattern
- >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
-
- def _e64 : VOP3b <
- {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- (outs VReg_32:$dst),
- (ins InputMods: $src0_modifiers, VSrc_32:$src0,
- InputMods:$src1_modifiers, VSrc_32:$src1,
- i32imm:$clamp, i32imm:$omod),
- opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", []
- >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
- let src2 = 0;
- let src2_modifiers = 0;
- /* the VOP2 variant puts the carry out into VCC, the VOP3 variant
- can write it into any SGPR. We currently don't use the carry out,
- so for now hardcode it to VCC as well */
- let sdst = SIOperand.VCC;
- }
+multiclass VOP3b_2_m <vop op, dag outs, dag ins, string asm,
+ list<dag> pattern, string opName, string revOp,
+ bit HasMods = 1, bit UseFullOp = 0> {
+ def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
+ VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
+
+ // The VOP2 variant puts the carry out into VCC, the VOP3 variant
+ // can write it into any SGPR. We currently don't use the carry out,
+ // so for now hardcode it to VCC as well.
+ let sdst = SIOperand.VCC, Defs = [VCC] in {
+ def _si : VOP3b <op.SI3, outs, ins, asm, pattern>,
+ VOP3DisableFields<1, 0, HasMods>,
+ SIMCInstr<opName#"_e64", SISubtarget.SI>,
+ VOP2_REV<revOp#"_e64_si", !eq(revOp, opName)>;
+
+ // TODO: Do we need this VI variant here?
+ /*def _vi : VOP3b_vi <op.VI3, outs, ins, asm, pattern>,
+ VOP3DisableFields<1, 0, HasMods>,
+ SIMCInstr<opName#"_e64", SISubtarget.VI>,
+ VOP2_REV<revOp#"_e64_vi", !eq(revOp, opName)>;*/
+ } // End sdst = SIOperand.VCC, Defs = [VCC]
}
-multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
- string opName, ValueType vt, PatLeaf cond, bit defExec = 0> {
- def _e32 : VOPC <
- op, (ins arc:$src0, vrc:$src1),
- opName#"_e32 $dst, $src0, $src1", []
- >, VOP <opName> {
+multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
+ list<dag> pattern, string opName,
+ bit HasMods, bit defExec> {
+
+ def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
+
+ def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
+ VOP3DisableFields<1, 0, HasMods> {
let Defs = !if(defExec, [EXEC], []);
}
- def _e64 : VOP3 <
- {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- (outs SReg_64:$dst),
- (ins InputMods:$src0_modifiers, arc:$src0,
- InputMods:$src1_modifiers, arc:$src1,
- InstFlag:$clamp, InstFlag:$omod),
- opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod",
- !if(!eq(!cast<string>(cond), "COND_NULL"), []<dag>,
- [(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))]
- )
- >, VOP <opName> {
+ def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>,
+ VOP3DisableFields<1, 0, HasMods> {
let Defs = !if(defExec, [EXEC], []);
- let src2 = 0;
- let src2_modifiers = 0;
}
}
-multiclass VOPC_32 <bits<8> op, string opName,
- ValueType vt = untyped, PatLeaf cond = COND_NULL>
- : VOPC_Helper <op, VReg_32, VSrc_32, opName, vt, cond>;
+multiclass VOP1_Helper <vop1 op, string opName, dag outs,
+ dag ins32, string asm32, list<dag> pat32,
+ dag ins64, string asm64, list<dag> pat64,
+ bit HasMods> {
+
+ defm _e32 : VOP1_m <op, outs, ins32, opName#asm32, pat32, opName>;
+
+ defm _e64 : VOP3_1_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName, HasMods>;
+}
+
+multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
+ SDPatternOperator node = null_frag> : VOP1_Helper <
+ op, opName, P.Outs,
+ P.Ins32, P.Asm32, [],
+ P.Ins64, P.Asm64,
+ !if(P.HasModifiers,
+ [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
+ i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0))]),
+ P.HasModifiers
+>;
+
+multiclass VOP1InstSI <vop1 op, string opName, VOPProfile P,
+ SDPatternOperator node = null_frag> {
+
+ def _e32 : VOP1 <op.SI, P.Outs, P.Ins32, opName#P.Asm32, []>,
+ VOP <opName>;
+
+ def _e64 : VOP3Common <P.Outs, P.Ins64, opName#P.Asm64,
+ !if(P.HasModifiers,
+ [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
+ i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0))])>,
+ VOP <opName>,
+ VOP3e <op.SI3>,
+ VOP3DisableFields<0, 0, P.HasModifiers>;
+}
+
+multiclass VOP2_Helper <vop2 op, string opName, dag outs,
+ dag ins32, string asm32, list<dag> pat32,
+ dag ins64, string asm64, list<dag> pat64,
+ string revOpSI, string revOpVI, bit HasMods> {
+ defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOpSI, revOpVI>;
-multiclass VOPC_64 <bits<8> op, string opName,
- ValueType vt = untyped, PatLeaf cond = COND_NULL>
- : VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond>;
+ defm _e64 : VOP3_2_m <op,
+ outs, ins64, opName#"_e64"#asm64, pat64, opName, revOpSI, revOpVI, HasMods
+ >;
+}
+
+multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,
+ SDPatternOperator node = null_frag,
+ string revOpSI = opName, string revOpVI = revOpSI> : VOP2_Helper <
+ op, opName, P.Outs,
+ P.Ins32, P.Asm32, [],
+ P.Ins64, P.Asm64,
+ !if(P.HasModifiers,
+ [(set P.DstVT:$dst,
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod)),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
+ revOpSI, revOpVI, P.HasModifiers
+>;
-multiclass VOPCX_32 <bits<8> op, string opName,
- ValueType vt = untyped, PatLeaf cond = COND_NULL>
- : VOPC_Helper <op, VReg_32, VSrc_32, opName, vt, cond, 1>;
+multiclass VOP2b_Helper <vop2 op, string opName, dag outs,
+ dag ins32, string asm32, list<dag> pat32,
+ dag ins64, string asm64, list<dag> pat64,
+ string revOp, bit HasMods> {
-multiclass VOPCX_64 <bits<8> op, string opName,
- ValueType vt = untyped, PatLeaf cond = COND_NULL>
- : VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond, 1>;
+ defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp, revOp>;
+
+ defm _e64 : VOP3b_2_m <op,
+ outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
+ >;
+}
-multiclass VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3_m <
- op, (outs VReg_32:$dst),
- (ins InputMods: $src0_modifiers, VSrc_32:$src0, InputMods:$src1_modifiers,
- VSrc_32:$src1, InputMods:$src2_modifiers, VSrc_32:$src2,
- InstFlag:$clamp, InstFlag:$omod),
- opName#" $dst, $src0_modifiers, $src1, $src2, $clamp, $omod", pattern, opName
+multiclass VOP2bInst <vop2 op, string opName, VOPProfile P,
+ SDPatternOperator node = null_frag,
+ string revOp = opName> : VOP2b_Helper <
+ op, opName, P.Outs,
+ P.Ins32, P.Asm32, [],
+ P.Ins64, P.Asm64,
+ !if(P.HasModifiers,
+ [(set P.DstVT:$dst,
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod)),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
+ revOp, P.HasModifiers
>;
-class VOP3_64_32 <bits <9> op, string opName, list<dag> pattern> : VOP3 <
- op, (outs VReg_64:$dst),
- (ins VSrc_64:$src0, VSrc_32:$src1),
- opName#" $dst, $src0, $src1", pattern
->, VOP <opName> {
+class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
+ VOPCCommon <ins, "", pattern>,
+ VOP <opName>,
+ SIMCInstr<opName#"_e32", SISubtarget.NONE> {
+ let isPseudo = 1;
+}
+
+multiclass VOPC_m <vopc op, dag outs, dag ins, string asm, list<dag> pattern,
+ string opName, bit DefExec> {
+ def "" : VOPC_Pseudo <outs, ins, pattern, opName>;
+
+ def _si : VOPC<op.SI, ins, asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.SI> {
+ let Defs = !if(DefExec, [EXEC], []);
+ }
+
+ def _vi : VOPC<op.VI, ins, asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.VI> {
+ let Defs = !if(DefExec, [EXEC], []);
+ }
+}
- let src2 = 0;
- let src2_modifiers = 0;
- let src0_modifiers = 0;
- let clamp = 0;
- let omod = 0;
+multiclass VOPC_Helper <vopc op, string opName,
+ dag ins32, string asm32, list<dag> pat32,
+ dag out64, dag ins64, string asm64, list<dag> pat64,
+ bit HasMods, bit DefExec> {
+ defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
+
+ defm _e64 : VOP3_C_m <op, out64, ins64, opName#"_e64"#asm64, pat64,
+ opName, HasMods, DefExec>;
}
-class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
- op, (outs VReg_64:$dst),
- (ins InputMods:$src0_modifiers, VSrc_64:$src0,
- InputMods:$src1_modifiers, VSrc_64:$src1,
- InputMods:$src2_modifiers, VSrc_64:$src2,
- InstFlag:$clamp, InstFlag:$omod),
- opName#" $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers, $clamp, $omod", pattern
->, VOP <opName>;
+multiclass VOPCInst <vopc op, string opName,
+ VOPProfile P, PatLeaf cond = COND_NULL,
+ bit DefExec = 0> : VOPC_Helper <
+ op, opName,
+ P.Ins32, P.Asm32, [],
+ (outs SReg_64:$dst), P.Ins64, P.Asm64,
+ !if(P.HasModifiers,
+ [(set i1:$dst,
+ (setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod)),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+ cond))],
+ [(set i1:$dst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]),
+ P.HasModifiers, DefExec
+>;
+
+multiclass VOPCClassInst <vopc op, string opName, VOPProfile P,
+ bit DefExec = 0> : VOPC_Helper <
+ op, opName,
+ P.Ins32, P.Asm32, [],
+ (outs SReg_64:$dst), P.Ins64, P.Asm64,
+ !if(P.HasModifiers,
+ [(set i1:$dst,
+ (AMDGPUfp_class (P.Src0VT (VOP3Mods0Clamp0OMod P.Src0VT:$src0, i32:$src0_modifiers)), P.Src1VT:$src1))],
+ [(set i1:$dst, (AMDGPUfp_class P.Src0VT:$src0, P.Src1VT:$src1))]),
+ P.HasModifiers, DefExec
+>;
+
+
+multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
+ VOPCInst <op, opName, VOP_F32_F32_F32, cond>;
+
+multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
+ VOPCInst <op, opName, VOP_F64_F64_F64, cond>;
+
+multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
+ VOPCInst <op, opName, VOP_I32_I32_I32, cond>;
+
+multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
+ VOPCInst <op, opName, VOP_I64_I64_I64, cond>;
+
+
+multiclass VOPCX <vopc op, string opName, VOPProfile P,
+ PatLeaf cond = COND_NULL>
+ : VOPCInst <op, opName, P, cond, 1>;
+
+multiclass VOPCX_F32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
+ VOPCX <op, opName, VOP_F32_F32_F32, cond>;
+multiclass VOPCX_F64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
+ VOPCX <op, opName, VOP_F64_F64_F64, cond>;
-class VOP3b_Helper <bits<9> op, RegisterClass vrc, RegisterClass arc,
- string opName, list<dag> pattern> : VOP3 <
- op, (outs vrc:$dst0, SReg_64:$dst1),
- (ins arc:$src0, arc:$src1, arc:$src2,
- InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
- opName#" $dst0, $dst1, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
->, VOP <opName>;
+multiclass VOPCX_I32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
+ VOPCX <op, opName, VOP_I32_I32_I32, cond>;
+multiclass VOPCX_I64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
+ VOPCX <op, opName, VOP_I64_I64_I64, cond>;
-class VOP3b_64 <bits<9> op, string opName, list<dag> pattern> :
+multiclass VOP3_Helper <vop3 op, string opName, dag outs, dag ins, string asm,
+ list<dag> pat, int NumSrcArgs, bit HasMods> : VOP3_m <
+ op, outs, ins, opName#asm, pat, opName, NumSrcArgs, HasMods
+>;
+
+multiclass VOPC_CLASS_F32 <vopc op, string opName> :
+ VOPCClassInst <op, opName, VOP_I1_F32_I32, 0>;
+
+multiclass VOPCX_CLASS_F32 <vopc op, string opName> :
+ VOPCClassInst <op, opName, VOP_I1_F32_I32, 1>;
+
+multiclass VOPC_CLASS_F64 <vopc op, string opName> :
+ VOPCClassInst <op, opName, VOP_I1_F64_I32, 0>;
+
+multiclass VOPCX_CLASS_F64 <vopc op, string opName> :
+ VOPCClassInst <op, opName, VOP_I1_F64_I32, 1>;
+
+multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
+ SDPatternOperator node = null_frag> : VOP3_Helper <
+ op, opName, P.Outs, P.Ins64, P.Asm64,
+ !if(!eq(P.NumSrcArgs, 3),
+ !if(P.HasModifiers,
+ [(set P.DstVT:$dst,
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod)),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+ (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))],
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1,
+ P.Src2VT:$src2))]),
+ !if(!eq(P.NumSrcArgs, 2),
+ !if(P.HasModifiers,
+ [(set P.DstVT:$dst,
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod)),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))])
+ /* P.NumSrcArgs == 1 */,
+ !if(P.HasModifiers,
+ [(set P.DstVT:$dst,
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod))))],
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0))]))),
+ P.NumSrcArgs, P.HasModifiers
+>;
+
+class VOP3InstVI <bits<10> op, string opName, VOPProfile P,
+ SDPatternOperator node = null_frag> : VOP3_vi <
+ op, opName#"_vi", P.Outs, P.Ins64, opName#P.Asm64,
+ !if(!eq(P.NumSrcArgs, 3),
+ !if(P.HasModifiers,
+ [(set P.DstVT:$dst,
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod)),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+ (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))],
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1,
+ P.Src2VT:$src2))]),
+ !if(!eq(P.NumSrcArgs, 2),
+ !if(P.HasModifiers,
+ [(set P.DstVT:$dst,
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod)),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))])
+ /* P.NumSrcArgs == 1 */,
+ !if(P.HasModifiers,
+ [(set P.DstVT:$dst,
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod))))],
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0))]))),
+ P.NumSrcArgs, P.HasModifiers
+>;
+
+multiclass VOP3b_Helper <vop op, RegisterClass vrc, RegisterOperand arc,
+ string opName, list<dag> pattern> :
+ VOP3b_2_m <
+ op, (outs vrc:$vdst, SReg_64:$sdst),
+ (ins InputModsNoDefault:$src0_modifiers, arc:$src0,
+ InputModsNoDefault:$src1_modifiers, arc:$src1,
+ InputModsNoDefault:$src2_modifiers, arc:$src2,
+ ClampMod:$clamp, omod:$omod),
+ opName#" $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", pattern,
+ opName, opName, 1, 1
+>;
+
+multiclass VOP3b_64 <vop3 op, string opName, list<dag> pattern> :
VOP3b_Helper <op, VReg_64, VSrc_64, opName, pattern>;
-class VOP3b_32 <bits<9> op, string opName, list<dag> pattern> :
- VOP3b_Helper <op, VReg_32, VSrc_32, opName, pattern>;
+multiclass VOP3b_32 <vop3 op, string opName, list<dag> pattern> :
+ VOP3b_Helper <op, VGPR_32, VSrc_32, opName, pattern>;
+
+
+class Vop3ModPat<Instruction Inst, VOPProfile P, SDPatternOperator node> : Pat<
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+ (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))),
+ (Inst i32:$src0_modifiers, P.Src0VT:$src0,
+ i32:$src1_modifiers, P.Src1VT:$src1,
+ i32:$src2_modifiers, P.Src2VT:$src2,
+ i1:$clamp,
+ i32:$omod)>;
+
+//===----------------------------------------------------------------------===//
+// Interpolation opcodes
+//===----------------------------------------------------------------------===//
+
+class VINTRP_Pseudo <string opName, dag outs, dag ins, string asm,
+ list<dag> pattern> :
+ VINTRPCommon <outs, ins, asm, pattern>,
+ SIMCInstr<opName, SISubtarget.NONE> {
+ let isPseudo = 1;
+}
+
+class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins,
+ string asm, list<dag> pattern> :
+ VINTRPCommon <outs, ins, asm, pattern>,
+ VINTRPe <op>,
+ SIMCInstr<opName, SISubtarget.SI>;
+
+class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
+ string asm, list<dag> pattern> :
+ VINTRPCommon <outs, ins, asm, pattern>,
+ VINTRPe_vi <op>,
+ SIMCInstr<opName, SISubtarget.VI>;
+
+multiclass VINTRP_m <bits <2> op, string opName, dag outs, dag ins, string asm,
+ string disableEncoding = "", string constraints = "",
+ list<dag> pattern = []> {
+ let DisableEncoding = disableEncoding,
+ Constraints = constraints in {
+ def "" : VINTRP_Pseudo <opName, outs, ins, asm, pattern>;
+
+ def _si : VINTRP_Real_si <op, opName, outs, ins, asm, pattern>;
+
+ def _vi : VINTRP_Real_vi <op, opName, outs, ins, asm, pattern>;
+ }
+}
//===----------------------------------------------------------------------===//
// Vector I/O classes
//===----------------------------------------------------------------------===//
-class DS_1A <bits<8> op, dag outs, dag ins, string asm, list<dag> pat> :
- DS <op, outs, ins, asm, pat> {
+class DS_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
+ DS <outs, ins, "", pattern>,
+ SIMCInstr <opName, SISubtarget.NONE> {
+ let isPseudo = 1;
+}
+
+class DS_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
+ DS <outs, ins, asm, []>,
+ DSe <op>,
+ SIMCInstr <opName, SISubtarget.SI>;
+
+class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
+ DS <outs, ins, asm, []>,
+ DSe_vi <op>,
+ SIMCInstr <opName, SISubtarget.VI>;
+
+class DS_1A_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
+ DS <outs, ins, asm, []>,
+ DSe <op>,
+ SIMCInstr <opName, SISubtarget.SI> {
+
+ // Single load interpret the 2 i8imm operands as a single i16 offset.
bits<16> offset;
+ let offset0 = offset{7-0};
+ let offset1 = offset{15-8};
+}
+
+class DS_1A_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
+ DS <outs, ins, asm, []>,
+ DSe_vi <op>,
+ SIMCInstr <opName, SISubtarget.VI> {
// Single load interpret the 2 i8imm operands as a single i16 offset.
+ bits<16> offset;
let offset0 = offset{7-0};
let offset1 = offset{15-8};
}
-class DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
+multiclass DS_1A_Load_m <bits<8> op, string opName, dag outs, dag ins, string asm,
+ list<dag> pat> {
+ let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+ def "" : DS_Pseudo <opName, outs, ins, pat>;
+
+ let data0 = 0, data1 = 0 in {
+ def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
+ }
+ }
+}
+
+multiclass DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass>
+ : DS_1A_Load_m <
op,
+ asm,
(outs regClass:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, u16imm:$offset),
- asm#" $vdst, $addr, $offset, [M0]",
- []> {
- let data0 = 0;
- let data1 = 0;
- let mayLoad = 1;
- let mayStore = 0;
+ (ins i1imm:$gds, VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0),
+ asm#" $vdst, $addr"#"$offset"#" [M0]",
+ []>;
+
+multiclass DS_Load2_m <bits<8> op, string opName, dag outs, dag ins, string asm,
+ list<dag> pat> {
+ let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+ def "" : DS_Pseudo <opName, outs, ins, pat>;
+
+ let data0 = 0, data1 = 0 in {
+ def _si : DS_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
+ }
+ }
}
-class DS_Load2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
+multiclass DS_Load2_Helper <bits<8> op, string asm, RegisterClass regClass>
+ : DS_Load2_m <
op,
+ asm,
(outs regClass:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, u8imm:$offset0, u8imm:$offset1),
- asm#" $gds, $vdst, $addr, $offset0, $offset1, [M0]",
- []> {
- let data0 = 0;
- let data1 = 0;
- let mayLoad = 1;
- let mayStore = 0;
+ (ins i1imm:$gds, VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
+ M0Reg:$m0),
+ asm#" $vdst, $addr"#"$offset0"#"$offset1 [M0]",
+ []>;
+
+multiclass DS_1A_Store_m <bits<8> op, string opName, dag outs, dag ins,
+ string asm, list<dag> pat> {
+ let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
+ def "" : DS_Pseudo <opName, outs, ins, pat>;
+
+ let data1 = 0, vdst = 0 in {
+ def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
+ }
+ }
}
-class DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
+multiclass DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass>
+ : DS_1A_Store_m <
op,
+ asm,
(outs),
- (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, u16imm:$offset),
- asm#" $addr, $data0, $offset [M0]",
- []> {
- let data1 = 0;
- let mayStore = 1;
- let mayLoad = 0;
- let vdst = 0;
+ (ins i1imm:$gds, VGPR_32:$addr, regClass:$data0, ds_offset:$offset, M0Reg:$m0),
+ asm#" $addr, $data0"#"$offset"#" [M0]",
+ []>;
+
+multiclass DS_Store_m <bits<8> op, string opName, dag outs, dag ins,
+ string asm, list<dag> pat> {
+ let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
+ def "" : DS_Pseudo <opName, outs, ins, pat>;
+
+ let vdst = 0 in {
+ def _si : DS_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
+ }
+ }
}
-class DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
+multiclass DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass>
+ : DS_Store_m <
op,
+ asm,
(outs),
- (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, u8imm:$offset0, u8imm:$offset1),
- asm#" $addr, $data0, $data1, $offset0, $offset1 [M0]",
- []> {
- let mayStore = 1;
- let mayLoad = 0;
- let vdst = 0;
+ (ins i1imm:$gds, VGPR_32:$addr, regClass:$data0, regClass:$data1,
+ ds_offset0:$offset0, ds_offset1:$offset1, M0Reg:$m0),
+ asm#" $addr, $data0, $data1"#"$offset0"#"$offset1 [M0]",
+ []>;
+
+class DS_1A_si <bits<8> op, dag outs, dag ins, string asm, list<dag> pat> :
+ DS_si <op, outs, ins, asm, pat> {
+ bits<16> offset;
+
+ // Single load interpret the 2 i8imm operands as a single i16 offset.
+ let offset0 = offset{7-0};
+ let offset1 = offset{15-8};
+
+ let hasSideEffects = 0;
}
// 1 address, 1 data.
-class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
+class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A_si <
op,
(outs rc:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, rc:$data0, u16imm:$offset),
- asm#" $vdst, $addr, $data0, $offset, [M0]",
- []> {
+ (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
+ asm#" $vdst, $addr, $data0"#"$offset"#" [M0]", []>,
+ AtomicNoRet<noRetOp, 1> {
let data1 = 0;
let mayStore = 1;
let mayLoad = 1;
+
+ let hasPostISelHook = 1; // Adjusted to no return version.
}
// 1 address, 2 data.
-class DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
+class DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A_si <
op,
(outs rc:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, u16imm:$offset),
- asm#" $vdst, $addr, $data0, $data1, $offset, [M0]",
- []> {
+ (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
+ asm#" $vdst, $addr, $data0, $data1"#"$offset"#" [M0]",
+ []>,
+ AtomicNoRet<noRetOp, 1> {
let mayStore = 1;
let mayLoad = 1;
+ let hasPostISelHook = 1; // Adjusted to no return version.
}
// 1 address, 2 data.
-class DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
+class DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A_si <
op,
(outs),
- (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, u16imm:$offset),
- asm#" $addr, $data0, $data1, $offset, [M0]",
- []> {
+ (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
+ asm#" $addr, $data0, $data1"#"$offset"#" [M0]",
+ []>,
+ AtomicNoRet<noRetOp, 0> {
let mayStore = 1;
let mayLoad = 1;
}
// 1 address, 1 data.
-class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
+class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A_si <
op,
(outs),
- (ins i1imm:$gds, VReg_32:$addr, rc:$data0, u16imm:$offset),
- asm#" $addr, $data0, $offset, [M0]",
- []> {
+ (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
+ asm#" $addr, $data0"#"$offset"#" [M0]",
+ []>,
+ AtomicNoRet<noRetOp, 0> {
let data1 = 0;
let mayStore = 1;
let mayLoad = 1;
}
-class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
- op,
- (outs),
+//===----------------------------------------------------------------------===//
+// MTBUF classes
+//===----------------------------------------------------------------------===//
+
+class MTBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
+ MTBUF <outs, ins, "", pattern>,
+ SIMCInstr<opName, SISubtarget.NONE> {
+ let isPseudo = 1;
+}
+
+class MTBUF_Real_si <bits<3> op, string opName, dag outs, dag ins,
+ string asm> :
+ MTBUF <outs, ins, asm, []>,
+ MTBUFe <op>,
+ SIMCInstr<opName, SISubtarget.SI>;
+
+class MTBUF_Real_vi <bits<4> op, string opName, dag outs, dag ins, string asm> :
+ MTBUF <outs, ins, asm, []>,
+ MTBUFe_vi <op>,
+ SIMCInstr <opName, SISubtarget.VI>;
+
+multiclass MTBUF_m <bits<3> op, string opName, dag outs, dag ins, string asm,
+ list<dag> pattern> {
+
+ def "" : MTBUF_Pseudo <opName, outs, ins, pattern>;
+
+ def _si : MTBUF_Real_si <op, opName, outs, ins, asm>;
+
+ def _vi : MTBUF_Real_vi <{0, op{2}, op{1}, op{0}}, opName, outs, ins, asm>;
+
+}
+
+let mayStore = 1, mayLoad = 0 in {
+
+multiclass MTBUF_Store_Helper <bits<3> op, string opName,
+ RegisterClass regClass> : MTBUF_m <
+ op, opName, (outs),
(ins regClass:$vdata, u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
- i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
- SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
- asm#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
- #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset",
- []> {
- let mayStore = 1;
- let mayLoad = 0;
+ i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr,
+ SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SCSrc_32:$soffset),
+ opName#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
+ #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", []
+>;
+
+} // mayStore = 1, mayLoad = 0
+
+let mayLoad = 1, mayStore = 0 in {
+
+multiclass MTBUF_Load_Helper <bits<3> op, string opName,
+ RegisterClass regClass> : MTBUF_m <
+ op, opName, (outs regClass:$dst),
+ (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
+ i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, SReg_128:$srsrc,
+ i1imm:$slc, i1imm:$tfe, SCSrc_32:$soffset),
+ opName#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
+ #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", []
+>;
+
+} // mayLoad = 1, mayStore = 0
+
+//===----------------------------------------------------------------------===//
+// MUBUF classes
+//===----------------------------------------------------------------------===//
+
+class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
+ let lds = 0;
+}
+
+class MUBUF_vi <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ MUBUF <outs, ins, asm, pattern>, MUBUFe_vi <op> {
+ let lds = 0;
+}
+
+class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
+
+ bit IsAddr64 = is_addr64;
+ string OpName = NAME # suffix;
+}
+
+class MUBUFAtomicAddr64 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern>
+ : MUBUF_si <op, outs, ins, asm, pattern> {
+
+ let offen = 0;
+ let idxen = 0;
+ let addr64 = 1;
+ let tfe = 0;
+ let lds = 0;
+ let soffset = 128;
+}
+
+class MUBUFAtomicOffset <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern>
+ : MUBUF_si <op, outs, ins, asm, pattern> {
+
+ let offen = 0;
+ let idxen = 0;
+ let addr64 = 0;
+ let tfe = 0;
+ let lds = 0;
+ let vaddr = 0;
+}
+
+multiclass MUBUF_Atomic <bits<7> op, string name, RegisterClass rc,
+ ValueType vt, SDPatternOperator atomic> {
+
+ let mayStore = 1, mayLoad = 1, hasPostISelHook = 1 in {
+
+ // No return variants
+ let glc = 0 in {
+
+ def _ADDR64 : MUBUFAtomicAddr64 <
+ op, (outs),
+ (ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
+ mbuf_offset:$offset, slc:$slc),
+ name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#"$slc", []
+ >, MUBUFAddr64Table<1>, AtomicNoRet<NAME#"_ADDR64", 0>;
+
+ def _OFFSET : MUBUFAtomicOffset <
+ op, (outs),
+ (ins rc:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
+ SCSrc_32:$soffset, slc:$slc),
+ name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", []
+ >, MUBUFAddr64Table<0>, AtomicNoRet<NAME#"_OFFSET", 0>;
+ } // glc = 0
+
+ // Variant that return values
+ let glc = 1, Constraints = "$vdata = $vdata_in",
+ DisableEncoding = "$vdata_in" in {
+
+ def _RTN_ADDR64 : MUBUFAtomicAddr64 <
+ op, (outs rc:$vdata),
+ (ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr,
+ mbuf_offset:$offset, slc:$slc),
+ name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#" glc"#"$slc",
+ [(set vt:$vdata,
+ (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i16:$offset,
+ i1:$slc), vt:$vdata_in))]
+ >, MUBUFAddr64Table<1, "_RTN">, AtomicNoRet<NAME#"_ADDR64", 1>;
+
+ def _RTN_OFFSET : MUBUFAtomicOffset <
+ op, (outs rc:$vdata),
+ (ins rc:$vdata_in, SReg_128:$srsrc, mbuf_offset:$offset,
+ SCSrc_32:$soffset, slc:$slc),
+ name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc",
+ [(set vt:$vdata,
+ (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
+ i1:$slc), vt:$vdata_in))]
+ >, MUBUFAddr64Table<0, "_RTN">, AtomicNoRet<NAME#"_OFFSET", 1>;
+
+ } // glc = 1
+
+ } // mayStore = 1, mayLoad = 1, hasPostISelHook = 1
}
multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass,
ValueType load_vt = i32,
SDPatternOperator ld = null_frag> {
- let lds = 0, mayLoad = 1 in {
+ let mayLoad = 1, mayStore = 0 in {
let addr64 = 0 in {
let offen = 0, idxen = 0, vaddr = 0 in {
- def _OFFSET : MUBUF <op, (outs regClass:$vdata),
+ def _OFFSET : MUBUF_si <op, (outs regClass:$vdata),
(ins SReg_128:$srsrc,
- u16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
- i1imm:$slc, i1imm:$tfe),
- asm#" $vdata, $srsrc + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+ mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc,
+ slc:$slc, tfe:$tfe),
+ asm#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
+ [(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc,
+ i32:$soffset, i16:$offset,
+ i1:$glc, i1:$slc, i1:$tfe)))]>,
+ MUBUFAddr64Table<0>;
}
let offen = 1, idxen = 0 in {
- def _OFFEN : MUBUF <op, (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_32:$vaddr,
- SSrc_32:$soffset, u16imm:$offset, i1imm:$glc, i1imm:$slc,
- i1imm:$tfe),
- asm#" $vdata, $srsrc + $vaddr + $soffset + $offset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+ def _OFFEN : MUBUF_si <op, (outs regClass:$vdata),
+ (ins SReg_128:$srsrc, VGPR_32:$vaddr,
+ SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc,
+ tfe:$tfe),
+ asm#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
}
let offen = 0, idxen = 1 in {
- def _IDXEN : MUBUF <op, (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_32:$vaddr,
- u16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
- i1imm:$slc, i1imm:$tfe),
- asm#" $vdata, $srsrc[$vaddr] + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+ def _IDXEN : MUBUF_si <op, (outs regClass:$vdata),
+ (ins SReg_128:$srsrc, VGPR_32:$vaddr,
+ mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc,
+ slc:$slc, tfe:$tfe),
+ asm#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
}
let offen = 1, idxen = 1 in {
- def _BOTHEN : MUBUF <op, (outs regClass:$vdata),
+ def _BOTHEN : MUBUF_si <op, (outs regClass:$vdata),
(ins SReg_128:$srsrc, VReg_64:$vaddr,
- SSrc_32:$soffset, i1imm:$glc,
- i1imm:$slc, i1imm:$tfe),
- asm#" $vdata, $srsrc[$vaddr[0]] + $vaddr[1] + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+ SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe),
+ asm#" $vdata, $vaddr, $srsrc, $soffset, idxen offen"#"$glc"#"$slc"#"$tfe", []>;
}
}
let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in {
- def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_64:$vaddr, u16imm:$offset),
- asm#" $vdata, $srsrc + $vaddr + $offset",
+ def _ADDR64 : MUBUF_si <op, (outs regClass:$vdata),
+ (ins SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset),
+ asm#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset",
[(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc,
- i64:$vaddr, u16imm:$offset)))]>;
+ i64:$vaddr, i16:$offset)))]>, MUBUFAddr64Table<1>;
+ }
+ }
+}
+
+multiclass MUBUF_Load_Helper_vi <bits<7> op, string asm, RegisterClass regClass,
+ ValueType load_vt = i32,
+ SDPatternOperator ld = null_frag> {
+
+ let lds = 0, mayLoad = 1 in {
+ let offen = 0, idxen = 0, vaddr = 0 in {
+ def _OFFSET : MUBUF_vi <op, (outs regClass:$vdata),
+ (ins SReg_128:$srsrc,
+ mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc,
+ slc:$slc, tfe:$tfe),
+ asm#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
+ [(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc,
+ i32:$soffset, i16:$offset,
+ i1:$glc, i1:$slc, i1:$tfe)))]>,
+ MUBUFAddr64Table<0>;
+ }
+
+ let offen = 1, idxen = 0 in {
+ def _OFFEN : MUBUF_vi <op, (outs regClass:$vdata),
+ (ins SReg_128:$srsrc, VGPR_32:$vaddr,
+ SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc,
+ tfe:$tfe),
+ asm#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+ }
+
+ let offen = 0, idxen = 1 in {
+ def _IDXEN : MUBUF_vi <op, (outs regClass:$vdata),
+ (ins SReg_128:$srsrc, VGPR_32:$vaddr,
+ mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc,
+ slc:$slc, tfe:$tfe),
+ asm#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+ }
+
+ let offen = 1, idxen = 1 in {
+ def _BOTHEN : MUBUF_vi <op, (outs regClass:$vdata),
+ (ins SReg_128:$srsrc, VReg_64:$vaddr,
+ SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe),
+ asm#" $vdata, $vaddr, $srsrc, $soffset, idxen offen"#"$glc"#"$slc"#"$tfe", []>;
}
}
}
@@ -679,23 +1763,51 @@ multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass,
multiclass MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass,
ValueType store_vt, SDPatternOperator st> {
- def "" : MUBUF <
- op, (outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_32:$vaddr, SSrc_32:$soffset,
- u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$slc,
- i1imm:$tfe),
- name#" $vdata, $srsrc, $vaddr, $soffset, $offset $offen $idxen $glc $slc $tfe",
- []
- > {
- let addr64 = 0;
- }
+ let mayLoad = 0, mayStore = 1 in {
+ let addr64 = 0 in {
+
+ def "" : MUBUF_si <
+ op, (outs),
+ (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset,
+ mbuf_offset:$offset, offen:$offen, idxen:$idxen, glc:$glc, slc:$slc,
+ tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset"#"$offen"#"$idxen"#"$offset"#
+ "$glc"#"$slc"#"$tfe",
+ []
+ >;
- def _ADDR64 : MUBUF <
+ let offen = 0, idxen = 0, vaddr = 0 in {
+ def _OFFSET : MUBUF_si <
+ op, (outs),
+ (ins vdataClass:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
+ SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe),
+ name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
+ [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
+ i16:$offset, i1:$glc, i1:$slc,
+ i1:$tfe))]
+ >, MUBUFAddr64Table<0>;
+ } // offen = 0, idxen = 0, vaddr = 0
+
+ let offen = 1, idxen = 0 in {
+ def _OFFEN : MUBUF_si <
+ op, (outs),
+ (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#
+ "$glc"#"$slc"#"$tfe",
+ []
+ >;
+ } // end offen = 1, idxen = 0
+
+ } // End addr64 = 0
+
+ def _ADDR64 : MUBUF_si <
op, (outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, u16imm:$offset),
- name#" $vdata, $srsrc + $vaddr + $offset",
+ (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset),
+ name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset",
[(st store_vt:$vdata,
- (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, u16imm:$offset))]> {
+ (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i16:$offset))]>, MUBUFAddr64Table<1>
+ {
let mayLoad = 0;
let mayStore = 1;
@@ -705,24 +1817,35 @@ multiclass MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass
let idxen = 0;
let glc = 0;
let addr64 = 1;
- let lds = 0;
let slc = 0;
let tfe = 0;
let soffset = 128; // ZERO
}
+ } // End mayLoad = 0, mayStore = 1
}
-class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
- op,
- (outs regClass:$dst),
- (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
- i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, SReg_128:$srsrc,
- i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
- asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
- #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset",
- []> {
+class FLAT_Load_Helper <bits<7> op, string asm, RegisterClass regClass> :
+ FLAT <op, (outs regClass:$data),
+ (ins VReg_64:$addr),
+ asm#" $data, $addr, [M0, FLAT_SCRATCH]", []> {
+ let glc = 0;
+ let slc = 0;
+ let tfe = 0;
let mayLoad = 1;
- let mayStore = 0;
+}
+
+class FLAT_Store_Helper <bits<7> op, string name, RegisterClass vdataClass> :
+ FLAT <op, (outs), (ins vdataClass:$data, VReg_64:$addr),
+ name#" $data, $addr, [M0, FLAT_SCRATCH]",
+ []> {
+
+ let mayLoad = 0;
+ let mayStore = 1;
+
+ // Encoding
+ let glc = 0;
+ let slc = 0;
+ let tfe = 0;
}
class MIMG_Mask <string op, int channels> {
@@ -750,7 +1873,7 @@ class MIMG_NoSampler_Helper <bits<7> op, string asm,
multiclass MIMG_NoSampler_Src_Helper <bits<7> op, string asm,
RegisterClass dst_rc,
int channels> {
- def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_32>,
+ def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32>,
MIMG_Mask<asm#"_V1", channels>;
def _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>,
MIMG_Mask<asm#"_V2", channels>;
@@ -759,7 +1882,7 @@ multiclass MIMG_NoSampler_Src_Helper <bits<7> op, string asm,
}
multiclass MIMG_NoSampler <bits<7> op, string asm> {
- defm _V1 : MIMG_NoSampler_Src_Helper <op, asm, VReg_32, 1>;
+ defm _V1 : MIMG_NoSampler_Src_Helper <op, asm, VGPR_32, 1>;
defm _V2 : MIMG_NoSampler_Src_Helper <op, asm, VReg_64, 2>;
defm _V3 : MIMG_NoSampler_Src_Helper <op, asm, VReg_96, 3>;
defm _V4 : MIMG_NoSampler_Src_Helper <op, asm, VReg_128, 4>;
@@ -784,7 +1907,7 @@ class MIMG_Sampler_Helper <bits<7> op, string asm,
multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm,
RegisterClass dst_rc,
int channels> {
- def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_32>,
+ def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32>,
MIMG_Mask<asm#"_V1", channels>;
def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64>,
MIMG_Mask<asm#"_V2", channels>;
@@ -797,7 +1920,7 @@ multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm,
}
multiclass MIMG_Sampler <bits<7> op, string asm> {
- defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VReg_32, 1>;
+ defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1>;
defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2>;
defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3>;
defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4>;
@@ -831,7 +1954,7 @@ class MIMG_Gather_Helper <bits<7> op, string asm,
multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
RegisterClass dst_rc,
int channels> {
- def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_32>,
+ def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32>,
MIMG_Mask<asm#"_V1", channels>;
def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64>,
MIMG_Mask<asm#"_V2", channels>;
@@ -844,7 +1967,7 @@ multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
}
multiclass MIMG_Gather <bits<7> op, string asm> {
- defm _V1 : MIMG_Gather_Src_Helper<op, asm, VReg_32, 1>;
+ defm _V1 : MIMG_Gather_Src_Helper<op, asm, VGPR_32, 1>;
defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2>;
defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3>;
defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4>;
@@ -898,20 +2021,40 @@ def getCommuteOrig : InstrMapping {
let ValueCols = [["1"]];
}
-def isDS : InstrMapping {
- let FilterClass = "DS";
- let RowFields = ["Inst"];
- let ColFields = ["Size"];
- let KeyCol = ["8"];
- let ValueCols = [["8"]];
-}
-
-def getMCOpcode : InstrMapping {
+def getMCOpcodeGen : InstrMapping {
let FilterClass = "SIMCInstr";
let RowFields = ["PseudoInstr"];
let ColFields = ["Subtarget"];
let KeyCol = [!cast<string>(SISubtarget.NONE)];
- let ValueCols = [[!cast<string>(SISubtarget.SI)]];
+ let ValueCols = [[!cast<string>(SISubtarget.SI)],[!cast<string>(SISubtarget.VI)]];
+}
+
+def getAddr64Inst : InstrMapping {
+ let FilterClass = "MUBUFAddr64Table";
+ let RowFields = ["OpName"];
+ let ColFields = ["IsAddr64"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
+// Maps an atomic opcode to its version with a return value.
+def getAtomicRetOp : InstrMapping {
+ let FilterClass = "AtomicNoRet";
+ let RowFields = ["NoRetOp"];
+ let ColFields = ["IsRet"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
+// Maps an atomic opcode to its returnless version.
+def getAtomicNoRetOp : InstrMapping {
+ let FilterClass = "AtomicNoRet";
+ let RowFields = ["NoRetOp"];
+ let ColFields = ["IsRet"];
+ let KeyCol = ["1"];
+ let ValueCols = [["0"]];
}
include "SIInstructions.td"
+include "CIInstructions.td"
+include "VIInstructions.td"
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index aecd847a2ba1..e05b6bb7d0f1 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -26,18 +26,37 @@ def SendMsgImm : Operand<i32> {
let PrintMethod = "printSendMsg";
}
-def isSI : Predicate<"Subtarget.getGeneration() "
+def isGCN : Predicate<"Subtarget.getGeneration() "
">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
-
+def isSICI : Predicate<
+ "Subtarget.getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
+ "Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
+>;
def isCI : Predicate<"Subtarget.getGeneration() "
">= AMDGPUSubtarget::SEA_ISLANDS">;
+def isVI : Predicate <
+ "Subtarget.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS"
+>;
-def isCFDepth0 : Predicate<"isCFDepth0()">;
+def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">;
-def WAIT_FLAG : InstFlag<"printWaitFlag">;
+def SWaitMatchClass : AsmOperandClass {
+ let Name = "SWaitCnt";
+ let RenderMethod = "addImmOperands";
+ let ParserMethod = "parseSWaitCntOps";
+}
-let SubtargetPredicate = isSI in {
-let OtherPredicates = [isCFDepth0] in {
+def WAIT_FLAG : InstFlag<"printWaitFlag"> {
+ let ParserMatchClass = SWaitMatchClass;
+}
+
+let SubtargetPredicate = isGCN in {
+
+//===----------------------------------------------------------------------===//
+// EXP Instructions
+//===----------------------------------------------------------------------===//
+
+defm EXP : EXP_m;
//===----------------------------------------------------------------------===//
// SMRD Instructions
@@ -48,129 +67,135 @@ let mayLoad = 1 in {
// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
// SMRD instructions, because the SGPR_32 register class does not include M0
// and writing to M0 from an SMRD instruction will hang the GPU.
-defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SGPR_32>;
-defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>;
-defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>;
-defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>;
-defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>;
+defm S_LOAD_DWORD : SMRD_Helper <0x00, "s_load_dword", SReg_64, SGPR_32>;
+defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "s_load_dwordx2", SReg_64, SReg_64>;
+defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "s_load_dwordx4", SReg_64, SReg_128>;
+defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "s_load_dwordx8", SReg_64, SReg_256>;
+defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "s_load_dwordx16", SReg_64, SReg_512>;
defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
- 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SGPR_32
+ 0x08, "s_buffer_load_dword", SReg_128, SGPR_32
>;
defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
- 0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64
+ 0x09, "s_buffer_load_dwordx2", SReg_128, SReg_64
>;
defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
- 0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128
+ 0x0a, "s_buffer_load_dwordx4", SReg_128, SReg_128
>;
defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
- 0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256
+ 0x0b, "s_buffer_load_dwordx8", SReg_128, SReg_256
>;
defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
- 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512
+ 0x0c, "s_buffer_load_dwordx16", SReg_128, SReg_512
>;
} // mayLoad = 1
-//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
-//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
+//def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>;
+//def S_DCACHE_INV : SMRD_ <0x0000001f, "s_dcache_inv", []>;
//===----------------------------------------------------------------------===//
// SOP1 Instructions
//===----------------------------------------------------------------------===//
-let neverHasSideEffects = 1 in {
-
let isMoveImm = 1 in {
-def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>;
-def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>;
-def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>;
-def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>;
+ let isReMaterializable = 1 in {
+ defm S_MOV_B32 : SOP1_32 <sop1<0x03, 0x00>, "s_mov_b32", []>;
+ defm S_MOV_B64 : SOP1_64 <sop1<0x04, 0x01>, "s_mov_b64", []>;
+ } // let isRematerializeable = 1
+
+ let Uses = [SCC] in {
+ defm S_CMOV_B32 : SOP1_32 <sop1<0x05, 0x02>, "s_cmov_b32", []>;
+ defm S_CMOV_B64 : SOP1_64 <sop1<0x06, 0x03>, "s_cmov_b64", []>;
+ } // End Uses = [SCC]
} // End isMoveImm = 1
-def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32",
- [(set i32:$dst, (not i32:$src0))]
->;
+let Defs = [SCC] in {
+ defm S_NOT_B32 : SOP1_32 <sop1<0x07, 0x04>, "s_not_b32",
+ [(set i32:$dst, (not i32:$src0))]
+ >;
-def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64",
- [(set i64:$dst, (not i64:$src0))]
->;
-def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>;
-def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>;
-def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32",
+ defm S_NOT_B64 : SOP1_64 <sop1<0x08, 0x05>, "s_not_b64",
+ [(set i64:$dst, (not i64:$src0))]
+ >;
+ defm S_WQM_B32 : SOP1_32 <sop1<0x09, 0x06>, "s_wqm_b32", []>;
+ defm S_WQM_B64 : SOP1_64 <sop1<0x0a, 0x07>, "s_wqm_b64", []>;
+} // End Defs = [SCC]
+
+
+defm S_BREV_B32 : SOP1_32 <sop1<0x0b, 0x08>, "s_brev_b32",
[(set i32:$dst, (AMDGPUbrev i32:$src0))]
>;
-def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>;
-} // End neverHasSideEffects = 1
+defm S_BREV_B64 : SOP1_64 <sop1<0x0c, 0x09>, "s_brev_b64", []>;
-////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>;
-////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>;
-def S_BCNT1_I32_B32 : SOP1_32 <0x0000000f, "S_BCNT1_I32_B32",
- [(set i32:$dst, (ctpop i32:$src0))]
->;
-def S_BCNT1_I32_B64 : SOP1_32_64 <0x00000010, "S_BCNT1_I32_B64", []>;
+let Defs = [SCC] in {
+ //defm S_BCNT0_I32_B32 : SOP1_BCNT0 <sop1<0x0d, 0x0a>, "s_bcnt0_i32_b32", []>;
+ //defm S_BCNT0_I32_B64 : SOP1_BCNT0 <sop1<0x0e, 0x0b>, "s_bcnt0_i32_b64", []>;
+ defm S_BCNT1_I32_B32 : SOP1_32 <sop1<0x0f, 0x0c>, "s_bcnt1_i32_b32",
+ [(set i32:$dst, (ctpop i32:$src0))]
+ >;
+ defm S_BCNT1_I32_B64 : SOP1_32_64 <sop1<0x10, 0x0d>, "s_bcnt1_i32_b64", []>;
+} // End Defs = [SCC]
-////def S_FF0_I32_B32 : SOP1_32 <0x00000011, "S_FF0_I32_B32", []>;
-////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>;
-def S_FF1_I32_B32 : SOP1_32 <0x00000013, "S_FF1_I32_B32",
+//defm S_FF0_I32_B32 : SOP1_32 <sop1<0x11, 0x0e>, "s_ff0_i32_b32", []>;
+//defm S_FF0_I32_B64 : SOP1_FF0 <sop1<0x12, 0x0f>, "s_ff0_i32_b64", []>;
+defm S_FF1_I32_B32 : SOP1_32 <sop1<0x13, 0x10>, "s_ff1_i32_b32",
[(set i32:$dst, (cttz_zero_undef i32:$src0))]
>;
-////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>;
+////defm S_FF1_I32_B64 : SOP1_FF1 <sop1<0x14, 0x11>, "s_ff1_i32_b64", []>;
-def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32",
+defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32",
[(set i32:$dst, (ctlz_zero_undef i32:$src0))]
>;
-//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
-def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
-//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
-def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8",
+//defm S_FLBIT_I32_B64 : SOP1_32 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
+defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32", []>;
+//defm S_FLBIT_I32_I64 : SOP1_32 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>;
+defm S_SEXT_I32_I8 : SOP1_32 <sop1<0x19, 0x16>, "s_sext_i32_i8",
[(set i32:$dst, (sext_inreg i32:$src0, i8))]
>;
-def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16",
+defm S_SEXT_I32_I16 : SOP1_32 <sop1<0x1a, 0x17>, "s_sext_i32_i16",
[(set i32:$dst, (sext_inreg i32:$src0, i16))]
>;
-////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>;
-////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
-////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
-////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>;
-def S_GETPC_B64 : SOP1 <
- 0x0000001f, (outs SReg_64:$dst), (ins), "S_GETPC_B64 $dst", []
-> {
- let SSRC0 = 0;
-}
-def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>;
-def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>;
-def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>;
-
-let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC] in {
-
-def S_AND_SAVEEXEC_B64 : SOP1_64 <0x00000024, "S_AND_SAVEEXEC_B64", []>;
-def S_OR_SAVEEXEC_B64 : SOP1_64 <0x00000025, "S_OR_SAVEEXEC_B64", []>;
-def S_XOR_SAVEEXEC_B64 : SOP1_64 <0x00000026, "S_XOR_SAVEEXEC_B64", []>;
-def S_ANDN2_SAVEEXEC_B64 : SOP1_64 <0x00000027, "S_ANDN2_SAVEEXEC_B64", []>;
-def S_ORN2_SAVEEXEC_B64 : SOP1_64 <0x00000028, "S_ORN2_SAVEEXEC_B64", []>;
-def S_NAND_SAVEEXEC_B64 : SOP1_64 <0x00000029, "S_NAND_SAVEEXEC_B64", []>;
-def S_NOR_SAVEEXEC_B64 : SOP1_64 <0x0000002a, "S_NOR_SAVEEXEC_B64", []>;
-def S_XNOR_SAVEEXEC_B64 : SOP1_64 <0x0000002b, "S_XNOR_SAVEEXEC_B64", []>;
-
-} // End hasSideEffects = 1
-
-def S_QUADMASK_B32 : SOP1_32 <0x0000002c, "S_QUADMASK_B32", []>;
-def S_QUADMASK_B64 : SOP1_64 <0x0000002d, "S_QUADMASK_B64", []>;
-def S_MOVRELS_B32 : SOP1_32 <0x0000002e, "S_MOVRELS_B32", []>;
-def S_MOVRELS_B64 : SOP1_64 <0x0000002f, "S_MOVRELS_B64", []>;
-def S_MOVRELD_B32 : SOP1_32 <0x00000030, "S_MOVRELD_B32", []>;
-def S_MOVRELD_B64 : SOP1_64 <0x00000031, "S_MOVRELD_B64", []>;
-//def S_CBRANCH_JOIN : SOP1_ <0x00000032, "S_CBRANCH_JOIN", []>;
-def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>;
-def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>;
-def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>;
+////defm S_BITSET0_B32 : SOP1_BITSET0 <sop1<0x1b, 0x18>, "s_bitset0_b32", []>;
+////defm S_BITSET0_B64 : SOP1_BITSET0 <sop1<0x1c, 0x19>, "s_bitset0_b64", []>;
+////defm S_BITSET1_B32 : SOP1_BITSET1 <sop1<0x1d, 0x1a>, "s_bitset1_b32", []>;
+////defm S_BITSET1_B64 : SOP1_BITSET1 <sop1<0x1e, 0x1b>, "s_bitset1_b64", []>;
+defm S_GETPC_B64 : SOP1_64_0 <sop1<0x1f, 0x1c>, "s_getpc_b64", []>;
+defm S_SETPC_B64 : SOP1_64 <sop1<0x20, 0x1d>, "s_setpc_b64", []>;
+defm S_SWAPPC_B64 : SOP1_64 <sop1<0x21, 0x1e>, "s_swappc_b64", []>;
+defm S_RFE_B64 : SOP1_64 <sop1<0x22, 0x1f>, "s_rfe_b64", []>;
+
+let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC] in {
+
+defm S_AND_SAVEEXEC_B64 : SOP1_64 <sop1<0x24, 0x20>, "s_and_saveexec_b64", []>;
+defm S_OR_SAVEEXEC_B64 : SOP1_64 <sop1<0x25, 0x21>, "s_or_saveexec_b64", []>;
+defm S_XOR_SAVEEXEC_B64 : SOP1_64 <sop1<0x26, 0x22>, "s_xor_saveexec_b64", []>;
+defm S_ANDN2_SAVEEXEC_B64 : SOP1_64 <sop1<0x27, 0x23>, "s_andn2_saveexec_b64", []>;
+defm S_ORN2_SAVEEXEC_B64 : SOP1_64 <sop1<0x28, 0x24>, "s_orn2_saveexec_b64", []>;
+defm S_NAND_SAVEEXEC_B64 : SOP1_64 <sop1<0x29, 0x25>, "s_nand_saveexec_b64", []>;
+defm S_NOR_SAVEEXEC_B64 : SOP1_64 <sop1<0x2a, 0x26>, "s_nor_saveexec_b64", []>;
+defm S_XNOR_SAVEEXEC_B64 : SOP1_64 <sop1<0x2b, 0x27>, "s_xnor_saveexec_b64", []>;
+
+} // End hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC]
+
+defm S_QUADMASK_B32 : SOP1_32 <sop1<0x2c, 0x28>, "s_quadmask_b32", []>;
+defm S_QUADMASK_B64 : SOP1_64 <sop1<0x2d, 0x29>, "s_quadmask_b64", []>;
+defm S_MOVRELS_B32 : SOP1_32 <sop1<0x2e, 0x2a>, "s_movrels_b32", []>;
+defm S_MOVRELS_B64 : SOP1_64 <sop1<0x2f, 0x2b>, "s_movrels_b64", []>;
+defm S_MOVRELD_B32 : SOP1_32 <sop1<0x30, 0x2c>, "s_movreld_b32", []>;
+defm S_MOVRELD_B64 : SOP1_64 <sop1<0x31, 0x2d>, "s_movreld_b64", []>;
+//defm S_CBRANCH_JOIN : SOP1_ <sop1<0x32, 0x2e>, "s_cbranch_join", []>;
+defm S_MOV_REGRD_B32 : SOP1_32 <sop1<0x33, 0x2f>, "s_mov_regrd_b32", []>;
+let Defs = [SCC] in {
+ defm S_ABS_I32 : SOP1_32 <sop1<0x34, 0x30>, "s_abs_i32", []>;
+} // End Defs = [SCC]
+defm S_MOV_FED_B32 : SOP1_32 <sop1<0x35, 0x31>, "s_mov_fed_b32", []>;
//===----------------------------------------------------------------------===//
// SOP2 Instructions
@@ -178,145 +203,161 @@ def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>;
let Defs = [SCC] in { // Carry out goes to SCC
let isCommutable = 1 in {
-def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>;
-def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32",
+defm S_ADD_U32 : SOP2_32 <sop2<0x00>, "s_add_u32", []>;
+defm S_ADD_I32 : SOP2_32 <sop2<0x02>, "s_add_i32",
[(set i32:$dst, (add SSrc_32:$src0, SSrc_32:$src1))]
>;
} // End isCommutable = 1
-def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>;
-def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32",
+defm S_SUB_U32 : SOP2_32 <sop2<0x01>, "s_sub_u32", []>;
+defm S_SUB_I32 : SOP2_32 <sop2<0x03>, "s_sub_i32",
[(set i32:$dst, (sub SSrc_32:$src0, SSrc_32:$src1))]
>;
let Uses = [SCC] in { // Carry in comes from SCC
let isCommutable = 1 in {
-def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32",
+defm S_ADDC_U32 : SOP2_32 <sop2<0x04>, "s_addc_u32",
[(set i32:$dst, (adde (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
} // End isCommutable = 1
-def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32",
+defm S_SUBB_U32 : SOP2_32 <sop2<0x05>, "s_subb_u32",
[(set i32:$dst, (sube (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
} // End Uses = [SCC]
-} // End Defs = [SCC]
-def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32",
+defm S_MIN_I32 : SOP2_32 <sop2<0x06>, "s_min_i32",
[(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]
>;
-def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32",
+defm S_MIN_U32 : SOP2_32 <sop2<0x07>, "s_min_u32",
[(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]
>;
-def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32",
+defm S_MAX_I32 : SOP2_32 <sop2<0x08>, "s_max_i32",
[(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]
>;
-def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32",
+defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32",
[(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]
>;
+} // End Defs = [SCC]
-def S_CSELECT_B32 : SOP2 <
- 0x0000000a, (outs SReg_32:$dst),
- (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
- []
->;
+defm S_CSELECT_B32 : SOP2_SELECT_32 <sop2<0x0a>, "s_cselect_b32", []>;
-def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
+let Uses = [SCC] in {
+ defm S_CSELECT_B64 : SOP2_64 <sop2<0x0b>, "s_cselect_b64", []>;
+} // End Uses = [SCC]
-def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32",
+let Defs = [SCC] in {
+defm S_AND_B32 : SOP2_32 <sop2<0x0e, 0x0c>, "s_and_b32",
[(set i32:$dst, (and i32:$src0, i32:$src1))]
>;
-def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
+defm S_AND_B64 : SOP2_64 <sop2<0x0f, 0x0d>, "s_and_b64",
[(set i64:$dst, (and i64:$src0, i64:$src1))]
>;
-def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32",
+defm S_OR_B32 : SOP2_32 <sop2<0x10, 0x0e>, "s_or_b32",
[(set i32:$dst, (or i32:$src0, i32:$src1))]
>;
-def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64",
+defm S_OR_B64 : SOP2_64 <sop2<0x11, 0x0f>, "s_or_b64",
[(set i64:$dst, (or i64:$src0, i64:$src1))]
>;
-def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32",
+defm S_XOR_B32 : SOP2_32 <sop2<0x12, 0x10>, "s_xor_b32",
[(set i32:$dst, (xor i32:$src0, i32:$src1))]
>;
-def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64",
+defm S_XOR_B64 : SOP2_64 <sop2<0x13, 0x11>, "s_xor_b64",
[(set i64:$dst, (xor i64:$src0, i64:$src1))]
>;
-def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>;
-def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>;
-def S_ORN2_B32 : SOP2_32 <0x00000016, "S_ORN2_B32", []>;
-def S_ORN2_B64 : SOP2_64 <0x00000017, "S_ORN2_B64", []>;
-def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>;
-def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>;
-def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>;
-def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>;
-def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>;
-def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>;
+defm S_ANDN2_B32 : SOP2_32 <sop2<0x14, 0x12>, "s_andn2_b32", []>;
+defm S_ANDN2_B64 : SOP2_64 <sop2<0x15, 0x13>, "s_andn2_b64", []>;
+defm S_ORN2_B32 : SOP2_32 <sop2<0x16, 0x14>, "s_orn2_b32", []>;
+defm S_ORN2_B64 : SOP2_64 <sop2<0x17, 0x15>, "s_orn2_b64", []>;
+defm S_NAND_B32 : SOP2_32 <sop2<0x18, 0x16>, "s_nand_b32", []>;
+defm S_NAND_B64 : SOP2_64 <sop2<0x19, 0x17>, "s_nand_b64", []>;
+defm S_NOR_B32 : SOP2_32 <sop2<0x1a, 0x18>, "s_nor_b32", []>;
+defm S_NOR_B64 : SOP2_64 <sop2<0x1b, 0x19>, "s_nor_b64", []>;
+defm S_XNOR_B32 : SOP2_32 <sop2<0x1c, 0x1a>, "s_xnor_b32", []>;
+defm S_XNOR_B64 : SOP2_64 <sop2<0x1d, 0x1b>, "s_xnor_b64", []>;
+} // End Defs = [SCC]
// Use added complexity so these patterns are preferred to the VALU patterns.
let AddedComplexity = 1 in {
+let Defs = [SCC] in {
-def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32",
+defm S_LSHL_B32 : SOP2_32 <sop2<0x1e, 0x1c>, "s_lshl_b32",
[(set i32:$dst, (shl i32:$src0, i32:$src1))]
>;
-def S_LSHL_B64 : SOP2_SHIFT_64 <0x0000001f, "S_LSHL_B64",
+defm S_LSHL_B64 : SOP2_64_32 <sop2<0x1f, 0x1d>, "s_lshl_b64",
[(set i64:$dst, (shl i64:$src0, i32:$src1))]
>;
-def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32",
+defm S_LSHR_B32 : SOP2_32 <sop2<0x20, 0x1e>, "s_lshr_b32",
[(set i32:$dst, (srl i32:$src0, i32:$src1))]
>;
-def S_LSHR_B64 : SOP2_SHIFT_64 <0x00000021, "S_LSHR_B64",
+defm S_LSHR_B64 : SOP2_64_32 <sop2<0x21, 0x1f>, "s_lshr_b64",
[(set i64:$dst, (srl i64:$src0, i32:$src1))]
>;
-def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32",
+defm S_ASHR_I32 : SOP2_32 <sop2<0x22, 0x20>, "s_ashr_i32",
[(set i32:$dst, (sra i32:$src0, i32:$src1))]
>;
-def S_ASHR_I64 : SOP2_SHIFT_64 <0x00000023, "S_ASHR_I64",
+defm S_ASHR_I64 : SOP2_64_32 <sop2<0x23, 0x21>, "s_ashr_i64",
[(set i64:$dst, (sra i64:$src0, i32:$src1))]
>;
+} // End Defs = [SCC]
+
+defm S_BFM_B32 : SOP2_32 <sop2<0x24, 0x22>, "s_bfm_b32", []>;
+defm S_BFM_B64 : SOP2_64 <sop2<0x25, 0x23>, "s_bfm_b64", []>;
+defm S_MUL_I32 : SOP2_32 <sop2<0x26, 0x24>, "s_mul_i32",
+ [(set i32:$dst, (mul i32:$src0, i32:$src1))]
+>;
} // End AddedComplexity = 1
-def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>;
-def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>;
-def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>;
-def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>;
-def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>;
-def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>;
-def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
-//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
-def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
+let Defs = [SCC] in {
+defm S_BFE_U32 : SOP2_32 <sop2<0x27, 0x25>, "s_bfe_u32", []>;
+defm S_BFE_I32 : SOP2_32 <sop2<0x28, 0x26>, "s_bfe_i32", []>;
+defm S_BFE_U64 : SOP2_64 <sop2<0x29, 0x27>, "s_bfe_u64", []>;
+defm S_BFE_I64 : SOP2_64_32 <sop2<0x2a, 0x28>, "s_bfe_i64", []>;
+} // End Defs = [SCC]
+
+//defm S_CBRANCH_G_FORK : SOP2_ <sop2<0x2b, 0x29>, "s_cbranch_g_fork", []>;
+let Defs = [SCC] in {
+defm S_ABSDIFF_I32 : SOP2_32 <sop2<0x2c, 0x2a>, "s_absdiff_i32", []>;
+} // End Defs = [SCC]
//===----------------------------------------------------------------------===//
// SOPC Instructions
//===----------------------------------------------------------------------===//
-def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32">;
-def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32">;
-def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32">;
-def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32">;
-def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32">;
-def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32">;
-def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32">;
-def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32">;
-def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32">;
-def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32">;
-def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32">;
-def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32">;
-////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>;
-////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>;
-////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>;
-////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>;
-//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>;
+def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "s_cmp_eq_i32">;
+def S_CMP_LG_I32 : SOPC_32 <0x00000001, "s_cmp_lg_i32">;
+def S_CMP_GT_I32 : SOPC_32 <0x00000002, "s_cmp_gt_i32">;
+def S_CMP_GE_I32 : SOPC_32 <0x00000003, "s_cmp_ge_i32">;
+def S_CMP_LT_I32 : SOPC_32 <0x00000004, "s_cmp_lt_i32">;
+def S_CMP_LE_I32 : SOPC_32 <0x00000005, "s_cmp_le_i32">;
+def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "s_cmp_eq_u32">;
+def S_CMP_LG_U32 : SOPC_32 <0x00000007, "s_cmp_lg_u32">;
+def S_CMP_GT_U32 : SOPC_32 <0x00000008, "s_cmp_gt_u32">;
+def S_CMP_GE_U32 : SOPC_32 <0x00000009, "s_cmp_ge_u32">;
+def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "s_cmp_lt_u32">;
+def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "s_cmp_le_u32">;
+////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "s_bitcmp0_b32", []>;
+////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "s_bitcmp1_b32", []>;
+////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "s_bitcmp0_b64", []>;
+////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "s_bitcmp1_b64", []>;
+//def S_SETVSKIP : SOPC_ <0x00000010, "s_setvskip", []>;
//===----------------------------------------------------------------------===//
// SOPK Instructions
//===----------------------------------------------------------------------===//
-def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>;
-def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>;
+let isReMaterializable = 1 in {
+defm S_MOVK_I32 : SOPK_32 <sopk<0x00>, "s_movk_i32", []>;
+} // End isReMaterializable = 1
+let Uses = [SCC] in {
+ defm S_CMOVK_I32 : SOPK_32 <sopk<0x02, 0x01>, "s_cmovk_i32", []>;
+}
+
+let isCompare = 1 in {
/*
This instruction is disabled for now until we can figure out how to teach
@@ -330,50 +371,46 @@ SCC = S_CMPK_EQ_I32 SGPR0, imm
VCC = COPY SCC
VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1
-def S_CMPK_EQ_I32 : SOPK <
- 0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1),
- "S_CMPK_EQ_I32",
+defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32",
[(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))]
>;
*/
-let isCompare = 1, Defs = [SCC] in {
-def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
-def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
-def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>;
-def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>;
-def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>;
-def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>;
-def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>;
-def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
-def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
-def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
-def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
-} // End isCompare = 1, Defs = [SCC]
-
-let Defs = [SCC], isCommutable = 1 in {
- def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
- def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>;
-}
+defm S_CMPK_LG_I32 : SOPK_SCC <sopk<0x04, 0x03>, "s_cmpk_lg_i32", []>;
+defm S_CMPK_GT_I32 : SOPK_SCC <sopk<0x05, 0x04>, "s_cmpk_gt_i32", []>;
+defm S_CMPK_GE_I32 : SOPK_SCC <sopk<0x06, 0x05>, "s_cmpk_ge_i32", []>;
+defm S_CMPK_LT_I32 : SOPK_SCC <sopk<0x07, 0x06>, "s_cmpk_lt_i32", []>;
+defm S_CMPK_LE_I32 : SOPK_SCC <sopk<0x08, 0x07>, "s_cmpk_le_i32", []>;
+defm S_CMPK_EQ_U32 : SOPK_SCC <sopk<0x09, 0x08>, "s_cmpk_eq_u32", []>;
+defm S_CMPK_LG_U32 : SOPK_SCC <sopk<0x0a, 0x09>, "s_cmpk_lg_u32", []>;
+defm S_CMPK_GT_U32 : SOPK_SCC <sopk<0x0b, 0x0a>, "s_cmpk_gt_u32", []>;
+defm S_CMPK_GE_U32 : SOPK_SCC <sopk<0x0c, 0x0b>, "s_cmpk_ge_u32", []>;
+defm S_CMPK_LT_U32 : SOPK_SCC <sopk<0x0d, 0x0c>, "s_cmpk_lt_u32", []>;
+defm S_CMPK_LE_U32 : SOPK_SCC <sopk<0x0e, 0x0d>, "s_cmpk_le_u32", []>;
+} // End isCompare = 1
-//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>;
-def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>;
-def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>;
-def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
-//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>;
-//def EXP : EXP_ <0x00000000, "EXP", []>;
+let isCommutable = 1 in {
+ let Defs = [SCC], isCommutable = 1 in {
+ defm S_ADDK_I32 : SOPK_32 <sopk<0x0f, 0x0e>, "s_addk_i32", []>;
+ }
+ defm S_MULK_I32 : SOPK_32 <sopk<0x10, 0x0f>, "s_mulk_i32", []>;
+}
-} // End let OtherPredicates = [isCFDepth0]
+//defm S_CBRANCH_I_FORK : SOPK_ <sopk<0x11, 0x10>, "s_cbranch_i_fork", []>;
+defm S_GETREG_B32 : SOPK_32 <sopk<0x12, 0x11>, "s_getreg_b32", []>;
+defm S_SETREG_B32 : SOPK_32 <sopk<0x13, 0x12>, "s_setreg_b32", []>;
+defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>;
+//defm S_SETREG_IMM32_B32 : SOPK_32 <sopk<0x15, 0x14>, "s_setreg_imm32_b32", []>;
//===----------------------------------------------------------------------===//
// SOPP Instructions
//===----------------------------------------------------------------------===//
-def S_NOP : SOPP <0x00000000, (ins i16imm:$simm16), "S_NOP $simm16", []>;
+def S_NOP : SOPP <0x00000000, (ins i16imm:$simm16), "s_nop $simm16">;
let isTerminator = 1 in {
-def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM",
+def S_ENDPGM : SOPP <0x00000001, (ins), "s_endpgm",
[(IL_retflag)]> {
let simm16 = 0;
let isBarrier = 1;
@@ -382,7 +419,7 @@ def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM",
let isBranch = 1 in {
def S_BRANCH : SOPP <
- 0x00000002, (ins sopp_brtarget:$simm16), "S_BRANCH $simm16",
+ 0x00000002, (ins sopp_brtarget:$simm16), "s_branch $simm16",
[(br bb:$simm16)]> {
let isBarrier = 1;
}
@@ -390,36 +427,31 @@ def S_BRANCH : SOPP <
let DisableEncoding = "$scc" in {
def S_CBRANCH_SCC0 : SOPP <
0x00000004, (ins sopp_brtarget:$simm16, SCCReg:$scc),
- "S_CBRANCH_SCC0 $simm16", []
+ "s_cbranch_scc0 $simm16"
>;
def S_CBRANCH_SCC1 : SOPP <
0x00000005, (ins sopp_brtarget:$simm16, SCCReg:$scc),
- "S_CBRANCH_SCC1 $simm16",
- []
+ "s_cbranch_scc1 $simm16"
>;
} // End DisableEncoding = "$scc"
def S_CBRANCH_VCCZ : SOPP <
0x00000006, (ins sopp_brtarget:$simm16, VCCReg:$vcc),
- "S_CBRANCH_VCCZ $simm16",
- []
+ "s_cbranch_vccz $simm16"
>;
def S_CBRANCH_VCCNZ : SOPP <
0x00000007, (ins sopp_brtarget:$simm16, VCCReg:$vcc),
- "S_CBRANCH_VCCNZ $simm16",
- []
+ "s_cbranch_vccnz $simm16"
>;
let DisableEncoding = "$exec" in {
def S_CBRANCH_EXECZ : SOPP <
0x00000008, (ins sopp_brtarget:$simm16, EXECReg:$exec),
- "S_CBRANCH_EXECZ $simm16",
- []
+ "s_cbranch_execz $simm16"
>;
def S_CBRANCH_EXECNZ : SOPP <
0x00000009, (ins sopp_brtarget:$simm16, EXECReg:$exec),
- "S_CBRANCH_EXECNZ $simm16",
- []
+ "s_cbranch_execnz $simm16"
>;
} // End DisableEncoding = "$exec"
@@ -428,7 +460,7 @@ def S_CBRANCH_EXECNZ : SOPP <
} // End isTerminator = 1
let hasSideEffects = 1 in {
-def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",
+def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier",
[(int_AMDGPU_barrier_local)]
> {
let simm16 = 0;
@@ -438,27 +470,29 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",
let mayStore = 1;
}
-def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",
- []
->;
-//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
-//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
-//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
+def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16">;
+def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
+def S_SLEEP : SOPP <0x0000000e, (ins i16imm:$simm16), "s_sleep $simm16">;
+def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$sim16), "s_setprio $sim16">;
let Uses = [EXEC] in {
- def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16",
+ def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "s_sendmsg $simm16",
[(int_SI_sendmsg imm:$simm16, M0Reg:$m0)]
> {
let DisableEncoding = "$m0";
}
} // End Uses = [EXEC]
-//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
-//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
-//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
-//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
-//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
-//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
+def S_SENDMSGHALT : SOPP <0x00000011, (ins i16imm:$simm16), "s_sendmsghalt $simm16">;
+def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;
+def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
+ let simm16 = 0;
+}
+def S_INCPERFLEVEL : SOPP <0x00000014, (ins i16imm:$simm16), "s_incperflevel $simm16">;
+def S_DECPERFLEVEL : SOPP <0x00000015, (ins i16imm:$simm16), "s_decperflevel $simm16">;
+def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> {
+ let simm16 = 0;
+}
} // End hasSideEffects
//===----------------------------------------------------------------------===//
@@ -467,256 +501,260 @@ let Uses = [EXEC] in {
let isCompare = 1 in {
-defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32">;
-defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", f32, COND_OLT>;
-defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", f32, COND_OEQ>;
-defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", f32, COND_OLE>;
-defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", f32, COND_OGT>;
-defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32">;
-defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", f32, COND_OGE>;
-defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", f32, COND_O>;
-defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", f32, COND_UO>;
-defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32">;
-defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32">;
-defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32">;
-defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32">;
-defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_UNE>;
-defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32">;
-defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32">;
+defm V_CMP_F_F32 : VOPC_F32 <vopc<0x0, 0x40>, "v_cmp_f_f32">;
+defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1, 0x41>, "v_cmp_lt_f32", COND_OLT>;
+defm V_CMP_EQ_F32 : VOPC_F32 <vopc<0x2, 0x42>, "v_cmp_eq_f32", COND_OEQ>;
+defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3, 0x43>, "v_cmp_le_f32", COND_OLE>;
+defm V_CMP_GT_F32 : VOPC_F32 <vopc<0x4, 0x44>, "v_cmp_gt_f32", COND_OGT>;
+defm V_CMP_LG_F32 : VOPC_F32 <vopc<0x5, 0x45>, "v_cmp_lg_f32", COND_ONE>;
+defm V_CMP_GE_F32 : VOPC_F32 <vopc<0x6, 0x46>, "v_cmp_ge_f32", COND_OGE>;
+defm V_CMP_O_F32 : VOPC_F32 <vopc<0x7, 0x47>, "v_cmp_o_f32", COND_O>;
+defm V_CMP_U_F32 : VOPC_F32 <vopc<0x8, 0x48>, "v_cmp_u_f32", COND_UO>;
+defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9, 0x49>, "v_cmp_nge_f32", COND_ULT>;
+defm V_CMP_NLG_F32 : VOPC_F32 <vopc<0xa, 0x4a>, "v_cmp_nlg_f32", COND_UEQ>;
+defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb, 0x4b>, "v_cmp_ngt_f32", COND_ULE>;
+defm V_CMP_NLE_F32 : VOPC_F32 <vopc<0xc, 0x4c>, "v_cmp_nle_f32", COND_UGT>;
+defm V_CMP_NEQ_F32 : VOPC_F32 <vopc<0xd, 0x4d>, "v_cmp_neq_f32", COND_UNE>;
+defm V_CMP_NLT_F32 : VOPC_F32 <vopc<0xe, 0x4e>, "v_cmp_nlt_f32", COND_UGE>;
+defm V_CMP_TRU_F32 : VOPC_F32 <vopc<0xf, 0x4f>, "v_cmp_tru_f32">;
let hasSideEffects = 1 in {
-defm V_CMPX_F_F32 : VOPCX_32 <0x00000010, "V_CMPX_F_F32">;
-defm V_CMPX_LT_F32 : VOPCX_32 <0x00000011, "V_CMPX_LT_F32">;
-defm V_CMPX_EQ_F32 : VOPCX_32 <0x00000012, "V_CMPX_EQ_F32">;
-defm V_CMPX_LE_F32 : VOPCX_32 <0x00000013, "V_CMPX_LE_F32">;
-defm V_CMPX_GT_F32 : VOPCX_32 <0x00000014, "V_CMPX_GT_F32">;
-defm V_CMPX_LG_F32 : VOPCX_32 <0x00000015, "V_CMPX_LG_F32">;
-defm V_CMPX_GE_F32 : VOPCX_32 <0x00000016, "V_CMPX_GE_F32">;
-defm V_CMPX_O_F32 : VOPCX_32 <0x00000017, "V_CMPX_O_F32">;
-defm V_CMPX_U_F32 : VOPCX_32 <0x00000018, "V_CMPX_U_F32">;
-defm V_CMPX_NGE_F32 : VOPCX_32 <0x00000019, "V_CMPX_NGE_F32">;
-defm V_CMPX_NLG_F32 : VOPCX_32 <0x0000001a, "V_CMPX_NLG_F32">;
-defm V_CMPX_NGT_F32 : VOPCX_32 <0x0000001b, "V_CMPX_NGT_F32">;
-defm V_CMPX_NLE_F32 : VOPCX_32 <0x0000001c, "V_CMPX_NLE_F32">;
-defm V_CMPX_NEQ_F32 : VOPCX_32 <0x0000001d, "V_CMPX_NEQ_F32">;
-defm V_CMPX_NLT_F32 : VOPCX_32 <0x0000001e, "V_CMPX_NLT_F32">;
-defm V_CMPX_TRU_F32 : VOPCX_32 <0x0000001f, "V_CMPX_TRU_F32">;
+defm V_CMPX_F_F32 : VOPCX_F32 <vopc<0x10, 0x50>, "v_cmpx_f_f32">;
+defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11, 0x51>, "v_cmpx_lt_f32">;
+defm V_CMPX_EQ_F32 : VOPCX_F32 <vopc<0x12, 0x52>, "v_cmpx_eq_f32">;
+defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13, 0x53>, "v_cmpx_le_f32">;
+defm V_CMPX_GT_F32 : VOPCX_F32 <vopc<0x14, 0x54>, "v_cmpx_gt_f32">;
+defm V_CMPX_LG_F32 : VOPCX_F32 <vopc<0x15, 0x55>, "v_cmpx_lg_f32">;
+defm V_CMPX_GE_F32 : VOPCX_F32 <vopc<0x16, 0x56>, "v_cmpx_ge_f32">;
+defm V_CMPX_O_F32 : VOPCX_F32 <vopc<0x17, 0x57>, "v_cmpx_o_f32">;
+defm V_CMPX_U_F32 : VOPCX_F32 <vopc<0x18, 0x58>, "v_cmpx_u_f32">;
+defm V_CMPX_NGE_F32 : VOPCX_F32 <vopc<0x19, 0x59>, "v_cmpx_nge_f32">;
+defm V_CMPX_NLG_F32 : VOPCX_F32 <vopc<0x1a, 0x5a>, "v_cmpx_nlg_f32">;
+defm V_CMPX_NGT_F32 : VOPCX_F32 <vopc<0x1b, 0x5b>, "v_cmpx_ngt_f32">;
+defm V_CMPX_NLE_F32 : VOPCX_F32 <vopc<0x1c, 0x5c>, "v_cmpx_nle_f32">;
+defm V_CMPX_NEQ_F32 : VOPCX_F32 <vopc<0x1d, 0x5d>, "v_cmpx_neq_f32">;
+defm V_CMPX_NLT_F32 : VOPCX_F32 <vopc<0x1e, 0x5e>, "v_cmpx_nlt_f32">;
+defm V_CMPX_TRU_F32 : VOPCX_F32 <vopc<0x1f, 0x5f>, "v_cmpx_tru_f32">;
} // End hasSideEffects = 1
-defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">;
-defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", f64, COND_OLT>;
-defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", f64, COND_OEQ>;
-defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", f64, COND_OLE>;
-defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", f64, COND_OGT>;
-defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64">;
-defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", f64, COND_OGE>;
-defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64", f64, COND_O>;
-defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64", f64, COND_UO>;
-defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64">;
-defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64">;
-defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64">;
-defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64">;
-defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", f64, COND_UNE>;
-defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">;
-defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">;
+defm V_CMP_F_F64 : VOPC_F64 <vopc<0x20, 0x60>, "v_cmp_f_f64">;
+defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21, 0x61>, "v_cmp_lt_f64", COND_OLT>;
+defm V_CMP_EQ_F64 : VOPC_F64 <vopc<0x22, 0x62>, "v_cmp_eq_f64", COND_OEQ>;
+defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23, 0x63>, "v_cmp_le_f64", COND_OLE>;
+defm V_CMP_GT_F64 : VOPC_F64 <vopc<0x24, 0x64>, "v_cmp_gt_f64", COND_OGT>;
+defm V_CMP_LG_F64 : VOPC_F64 <vopc<0x25, 0x65>, "v_cmp_lg_f64", COND_ONE>;
+defm V_CMP_GE_F64 : VOPC_F64 <vopc<0x26, 0x66>, "v_cmp_ge_f64", COND_OGE>;
+defm V_CMP_O_F64 : VOPC_F64 <vopc<0x27, 0x67>, "v_cmp_o_f64", COND_O>;
+defm V_CMP_U_F64 : VOPC_F64 <vopc<0x28, 0x68>, "v_cmp_u_f64", COND_UO>;
+defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29, 0x69>, "v_cmp_nge_f64", COND_ULT>;
+defm V_CMP_NLG_F64 : VOPC_F64 <vopc<0x2a, 0x6a>, "v_cmp_nlg_f64", COND_UEQ>;
+defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b, 0x6b>, "v_cmp_ngt_f64", COND_ULE>;
+defm V_CMP_NLE_F64 : VOPC_F64 <vopc<0x2c, 0x6c>, "v_cmp_nle_f64", COND_UGT>;
+defm V_CMP_NEQ_F64 : VOPC_F64 <vopc<0x2d, 0x6d>, "v_cmp_neq_f64", COND_UNE>;
+defm V_CMP_NLT_F64 : VOPC_F64 <vopc<0x2e, 0x6e>, "v_cmp_nlt_f64", COND_UGE>;
+defm V_CMP_TRU_F64 : VOPC_F64 <vopc<0x2f, 0x6f>, "v_cmp_tru_f64">;
let hasSideEffects = 1 in {
-defm V_CMPX_F_F64 : VOPCX_64 <0x00000030, "V_CMPX_F_F64">;
-defm V_CMPX_LT_F64 : VOPCX_64 <0x00000031, "V_CMPX_LT_F64">;
-defm V_CMPX_EQ_F64 : VOPCX_64 <0x00000032, "V_CMPX_EQ_F64">;
-defm V_CMPX_LE_F64 : VOPCX_64 <0x00000033, "V_CMPX_LE_F64">;
-defm V_CMPX_GT_F64 : VOPCX_64 <0x00000034, "V_CMPX_GT_F64">;
-defm V_CMPX_LG_F64 : VOPCX_64 <0x00000035, "V_CMPX_LG_F64">;
-defm V_CMPX_GE_F64 : VOPCX_64 <0x00000036, "V_CMPX_GE_F64">;
-defm V_CMPX_O_F64 : VOPCX_64 <0x00000037, "V_CMPX_O_F64">;
-defm V_CMPX_U_F64 : VOPCX_64 <0x00000038, "V_CMPX_U_F64">;
-defm V_CMPX_NGE_F64 : VOPCX_64 <0x00000039, "V_CMPX_NGE_F64">;
-defm V_CMPX_NLG_F64 : VOPCX_64 <0x0000003a, "V_CMPX_NLG_F64">;
-defm V_CMPX_NGT_F64 : VOPCX_64 <0x0000003b, "V_CMPX_NGT_F64">;
-defm V_CMPX_NLE_F64 : VOPCX_64 <0x0000003c, "V_CMPX_NLE_F64">;
-defm V_CMPX_NEQ_F64 : VOPCX_64 <0x0000003d, "V_CMPX_NEQ_F64">;
-defm V_CMPX_NLT_F64 : VOPCX_64 <0x0000003e, "V_CMPX_NLT_F64">;
-defm V_CMPX_TRU_F64 : VOPCX_64 <0x0000003f, "V_CMPX_TRU_F64">;
+defm V_CMPX_F_F64 : VOPCX_F64 <vopc<0x30, 0x70>, "v_cmpx_f_f64">;
+defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31, 0x71>, "v_cmpx_lt_f64">;
+defm V_CMPX_EQ_F64 : VOPCX_F64 <vopc<0x32, 0x72>, "v_cmpx_eq_f64">;
+defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33, 0x73>, "v_cmpx_le_f64">;
+defm V_CMPX_GT_F64 : VOPCX_F64 <vopc<0x34, 0x74>, "v_cmpx_gt_f64">;
+defm V_CMPX_LG_F64 : VOPCX_F64 <vopc<0x35, 0x75>, "v_cmpx_lg_f64">;
+defm V_CMPX_GE_F64 : VOPCX_F64 <vopc<0x36, 0x76>, "v_cmpx_ge_f64">;
+defm V_CMPX_O_F64 : VOPCX_F64 <vopc<0x37, 0x77>, "v_cmpx_o_f64">;
+defm V_CMPX_U_F64 : VOPCX_F64 <vopc<0x38, 0x78>, "v_cmpx_u_f64">;
+defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39, 0x79>, "v_cmpx_nge_f64">;
+defm V_CMPX_NLG_F64 : VOPCX_F64 <vopc<0x3a, 0x7a>, "v_cmpx_nlg_f64">;
+defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b, 0x7b>, "v_cmpx_ngt_f64">;
+defm V_CMPX_NLE_F64 : VOPCX_F64 <vopc<0x3c, 0x7c>, "v_cmpx_nle_f64">;
+defm V_CMPX_NEQ_F64 : VOPCX_F64 <vopc<0x3d, 0x7d>, "v_cmpx_neq_f64">;
+defm V_CMPX_NLT_F64 : VOPCX_F64 <vopc<0x3e, 0x7e>, "v_cmpx_nlt_f64">;
+defm V_CMPX_TRU_F64 : VOPCX_F64 <vopc<0x3f, 0x7f>, "v_cmpx_tru_f64">;
} // End hasSideEffects = 1
-defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32">;
-defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32">;
-defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32">;
-defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32">;
-defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32">;
-defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32">;
-defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32">;
-defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32">;
-defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32">;
-defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32">;
-defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32">;
-defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32">;
-defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32">;
-defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32">;
-defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32">;
-defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32">;
+let SubtargetPredicate = isSICI in {
+
+defm V_CMPS_F_F32 : VOPC_F32 <vopc<0x40>, "v_cmps_f_f32">;
+defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "v_cmps_lt_f32">;
+defm V_CMPS_EQ_F32 : VOPC_F32 <vopc<0x42>, "v_cmps_eq_f32">;
+defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "v_cmps_le_f32">;
+defm V_CMPS_GT_F32 : VOPC_F32 <vopc<0x44>, "v_cmps_gt_f32">;
+defm V_CMPS_LG_F32 : VOPC_F32 <vopc<0x45>, "v_cmps_lg_f32">;
+defm V_CMPS_GE_F32 : VOPC_F32 <vopc<0x46>, "v_cmps_ge_f32">;
+defm V_CMPS_O_F32 : VOPC_F32 <vopc<0x47>, "v_cmps_o_f32">;
+defm V_CMPS_U_F32 : VOPC_F32 <vopc<0x48>, "v_cmps_u_f32">;
+defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "v_cmps_nge_f32">;
+defm V_CMPS_NLG_F32 : VOPC_F32 <vopc<0x4a>, "v_cmps_nlg_f32">;
+defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "v_cmps_ngt_f32">;
+defm V_CMPS_NLE_F32 : VOPC_F32 <vopc<0x4c>, "v_cmps_nle_f32">;
+defm V_CMPS_NEQ_F32 : VOPC_F32 <vopc<0x4d>, "v_cmps_neq_f32">;
+defm V_CMPS_NLT_F32 : VOPC_F32 <vopc<0x4e>, "v_cmps_nlt_f32">;
+defm V_CMPS_TRU_F32 : VOPC_F32 <vopc<0x4f>, "v_cmps_tru_f32">;
let hasSideEffects = 1 in {
-defm V_CMPSX_F_F32 : VOPCX_32 <0x00000050, "V_CMPSX_F_F32">;
-defm V_CMPSX_LT_F32 : VOPCX_32 <0x00000051, "V_CMPSX_LT_F32">;
-defm V_CMPSX_EQ_F32 : VOPCX_32 <0x00000052, "V_CMPSX_EQ_F32">;
-defm V_CMPSX_LE_F32 : VOPCX_32 <0x00000053, "V_CMPSX_LE_F32">;
-defm V_CMPSX_GT_F32 : VOPCX_32 <0x00000054, "V_CMPSX_GT_F32">;
-defm V_CMPSX_LG_F32 : VOPCX_32 <0x00000055, "V_CMPSX_LG_F32">;
-defm V_CMPSX_GE_F32 : VOPCX_32 <0x00000056, "V_CMPSX_GE_F32">;
-defm V_CMPSX_O_F32 : VOPCX_32 <0x00000057, "V_CMPSX_O_F32">;
-defm V_CMPSX_U_F32 : VOPCX_32 <0x00000058, "V_CMPSX_U_F32">;
-defm V_CMPSX_NGE_F32 : VOPCX_32 <0x00000059, "V_CMPSX_NGE_F32">;
-defm V_CMPSX_NLG_F32 : VOPCX_32 <0x0000005a, "V_CMPSX_NLG_F32">;
-defm V_CMPSX_NGT_F32 : VOPCX_32 <0x0000005b, "V_CMPSX_NGT_F32">;
-defm V_CMPSX_NLE_F32 : VOPCX_32 <0x0000005c, "V_CMPSX_NLE_F32">;
-defm V_CMPSX_NEQ_F32 : VOPCX_32 <0x0000005d, "V_CMPSX_NEQ_F32">;
-defm V_CMPSX_NLT_F32 : VOPCX_32 <0x0000005e, "V_CMPSX_NLT_F32">;
-defm V_CMPSX_TRU_F32 : VOPCX_32 <0x0000005f, "V_CMPSX_TRU_F32">;
+defm V_CMPSX_F_F32 : VOPCX_F32 <vopc<0x50>, "v_cmpsx_f_f32">;
+defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "v_cmpsx_lt_f32">;
+defm V_CMPSX_EQ_F32 : VOPCX_F32 <vopc<0x52>, "v_cmpsx_eq_f32">;
+defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "v_cmpsx_le_f32">;
+defm V_CMPSX_GT_F32 : VOPCX_F32 <vopc<0x54>, "v_cmpsx_gt_f32">;
+defm V_CMPSX_LG_F32 : VOPCX_F32 <vopc<0x55>, "v_cmpsx_lg_f32">;
+defm V_CMPSX_GE_F32 : VOPCX_F32 <vopc<0x56>, "v_cmpsx_ge_f32">;
+defm V_CMPSX_O_F32 : VOPCX_F32 <vopc<0x57>, "v_cmpsx_o_f32">;
+defm V_CMPSX_U_F32 : VOPCX_F32 <vopc<0x58>, "v_cmpsx_u_f32">;
+defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "v_cmpsx_nge_f32">;
+defm V_CMPSX_NLG_F32 : VOPCX_F32 <vopc<0x5a>, "v_cmpsx_nlg_f32">;
+defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "v_cmpsx_ngt_f32">;
+defm V_CMPSX_NLE_F32 : VOPCX_F32 <vopc<0x5c>, "v_cmpsx_nle_f32">;
+defm V_CMPSX_NEQ_F32 : VOPCX_F32 <vopc<0x5d>, "v_cmpsx_neq_f32">;
+defm V_CMPSX_NLT_F32 : VOPCX_F32 <vopc<0x5e>, "v_cmpsx_nlt_f32">;
+defm V_CMPSX_TRU_F32 : VOPCX_F32 <vopc<0x5f>, "v_cmpsx_tru_f32">;
} // End hasSideEffects = 1
-defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64">;
-defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64">;
-defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64">;
-defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64">;
-defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64">;
-defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64">;
-defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64">;
-defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64">;
-defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64">;
-defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64">;
-defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64">;
-defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64">;
-defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64">;
-defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64">;
-defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64">;
-defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64">;
+defm V_CMPS_F_F64 : VOPC_F64 <vopc<0x60>, "v_cmps_f_f64">;
+defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "v_cmps_lt_f64">;
+defm V_CMPS_EQ_F64 : VOPC_F64 <vopc<0x62>, "v_cmps_eq_f64">;
+defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "v_cmps_le_f64">;
+defm V_CMPS_GT_F64 : VOPC_F64 <vopc<0x64>, "v_cmps_gt_f64">;
+defm V_CMPS_LG_F64 : VOPC_F64 <vopc<0x65>, "v_cmps_lg_f64">;
+defm V_CMPS_GE_F64 : VOPC_F64 <vopc<0x66>, "v_cmps_ge_f64">;
+defm V_CMPS_O_F64 : VOPC_F64 <vopc<0x67>, "v_cmps_o_f64">;
+defm V_CMPS_U_F64 : VOPC_F64 <vopc<0x68>, "v_cmps_u_f64">;
+defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "v_cmps_nge_f64">;
+defm V_CMPS_NLG_F64 : VOPC_F64 <vopc<0x6a>, "v_cmps_nlg_f64">;
+defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "v_cmps_ngt_f64">;
+defm V_CMPS_NLE_F64 : VOPC_F64 <vopc<0x6c>, "v_cmps_nle_f64">;
+defm V_CMPS_NEQ_F64 : VOPC_F64 <vopc<0x6d>, "v_cmps_neq_f64">;
+defm V_CMPS_NLT_F64 : VOPC_F64 <vopc<0x6e>, "v_cmps_nlt_f64">;
+defm V_CMPS_TRU_F64 : VOPC_F64 <vopc<0x6f>, "v_cmps_tru_f64">;
let hasSideEffects = 1, Defs = [EXEC] in {
-defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64">;
-defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64">;
-defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64">;
-defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64">;
-defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64">;
-defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64">;
-defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64">;
-defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64">;
-defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64">;
-defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64">;
-defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64">;
-defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64">;
-defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64">;
-defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64">;
-defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64">;
-defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64">;
+defm V_CMPSX_F_F64 : VOPC_F64 <vopc<0x70>, "v_cmpsx_f_f64">;
+defm V_CMPSX_LT_F64 : VOPC_F64 <vopc<0x71>, "v_cmpsx_lt_f64">;
+defm V_CMPSX_EQ_F64 : VOPC_F64 <vopc<0x72>, "v_cmpsx_eq_f64">;
+defm V_CMPSX_LE_F64 : VOPC_F64 <vopc<0x73>, "v_cmpsx_le_f64">;
+defm V_CMPSX_GT_F64 : VOPC_F64 <vopc<0x74>, "v_cmpsx_gt_f64">;
+defm V_CMPSX_LG_F64 : VOPC_F64 <vopc<0x75>, "v_cmpsx_lg_f64">;
+defm V_CMPSX_GE_F64 : VOPC_F64 <vopc<0x76>, "v_cmpsx_ge_f64">;
+defm V_CMPSX_O_F64 : VOPC_F64 <vopc<0x77>, "v_cmpsx_o_f64">;
+defm V_CMPSX_U_F64 : VOPC_F64 <vopc<0x78>, "v_cmpsx_u_f64">;
+defm V_CMPSX_NGE_F64 : VOPC_F64 <vopc<0x79>, "v_cmpsx_nge_f64">;
+defm V_CMPSX_NLG_F64 : VOPC_F64 <vopc<0x7a>, "v_cmpsx_nlg_f64">;
+defm V_CMPSX_NGT_F64 : VOPC_F64 <vopc<0x7b>, "v_cmpsx_ngt_f64">;
+defm V_CMPSX_NLE_F64 : VOPC_F64 <vopc<0x7c>, "v_cmpsx_nle_f64">;
+defm V_CMPSX_NEQ_F64 : VOPC_F64 <vopc<0x7d>, "v_cmpsx_neq_f64">;
+defm V_CMPSX_NLT_F64 : VOPC_F64 <vopc<0x7e>, "v_cmpsx_nlt_f64">;
+defm V_CMPSX_TRU_F64 : VOPC_F64 <vopc<0x7f>, "v_cmpsx_tru_f64">;
} // End hasSideEffects = 1, Defs = [EXEC]
-defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32">;
-defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", i32, COND_SLT>;
-defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", i32, COND_EQ>;
-defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", i32, COND_SLE>;
-defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", i32, COND_SGT>;
-defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", i32, COND_NE>;
-defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_SGE>;
-defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32">;
+} // End SubtargetPredicate = isSICI
+
+defm V_CMP_F_I32 : VOPC_I32 <vopc<0x80, 0xc0>, "v_cmp_f_i32">;
+defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81, 0xc1>, "v_cmp_lt_i32", COND_SLT>;
+defm V_CMP_EQ_I32 : VOPC_I32 <vopc<0x82, 0xc2>, "v_cmp_eq_i32", COND_EQ>;
+defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83, 0xc3>, "v_cmp_le_i32", COND_SLE>;
+defm V_CMP_GT_I32 : VOPC_I32 <vopc<0x84, 0xc4>, "v_cmp_gt_i32", COND_SGT>;
+defm V_CMP_NE_I32 : VOPC_I32 <vopc<0x85, 0xc5>, "v_cmp_ne_i32", COND_NE>;
+defm V_CMP_GE_I32 : VOPC_I32 <vopc<0x86, 0xc6>, "v_cmp_ge_i32", COND_SGE>;
+defm V_CMP_T_I32 : VOPC_I32 <vopc<0x87, 0xc7>, "v_cmp_t_i32">;
let hasSideEffects = 1 in {
-defm V_CMPX_F_I32 : VOPCX_32 <0x00000090, "V_CMPX_F_I32">;
-defm V_CMPX_LT_I32 : VOPCX_32 <0x00000091, "V_CMPX_LT_I32">;
-defm V_CMPX_EQ_I32 : VOPCX_32 <0x00000092, "V_CMPX_EQ_I32">;
-defm V_CMPX_LE_I32 : VOPCX_32 <0x00000093, "V_CMPX_LE_I32">;
-defm V_CMPX_GT_I32 : VOPCX_32 <0x00000094, "V_CMPX_GT_I32">;
-defm V_CMPX_NE_I32 : VOPCX_32 <0x00000095, "V_CMPX_NE_I32">;
-defm V_CMPX_GE_I32 : VOPCX_32 <0x00000096, "V_CMPX_GE_I32">;
-defm V_CMPX_T_I32 : VOPCX_32 <0x00000097, "V_CMPX_T_I32">;
+defm V_CMPX_F_I32 : VOPCX_I32 <vopc<0x90, 0xd0>, "v_cmpx_f_i32">;
+defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91, 0xd1>, "v_cmpx_lt_i32">;
+defm V_CMPX_EQ_I32 : VOPCX_I32 <vopc<0x92, 0xd2>, "v_cmpx_eq_i32">;
+defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93, 0xd3>, "v_cmpx_le_i32">;
+defm V_CMPX_GT_I32 : VOPCX_I32 <vopc<0x94, 0xd4>, "v_cmpx_gt_i32">;
+defm V_CMPX_NE_I32 : VOPCX_I32 <vopc<0x95, 0xd5>, "v_cmpx_ne_i32">;
+defm V_CMPX_GE_I32 : VOPCX_I32 <vopc<0x96, 0xd6>, "v_cmpx_ge_i32">;
+defm V_CMPX_T_I32 : VOPCX_I32 <vopc<0x97, 0xd7>, "v_cmpx_t_i32">;
} // End hasSideEffects = 1
-defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64">;
-defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", i64, COND_SLT>;
-defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64", i64, COND_EQ>;
-defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64", i64, COND_SLE>;
-defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64", i64, COND_SGT>;
-defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", i64, COND_NE>;
-defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", i64, COND_SGE>;
-defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64">;
+defm V_CMP_F_I64 : VOPC_I64 <vopc<0xa0, 0xe0>, "v_cmp_f_i64">;
+defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1, 0xe1>, "v_cmp_lt_i64", COND_SLT>;
+defm V_CMP_EQ_I64 : VOPC_I64 <vopc<0xa2, 0xe2>, "v_cmp_eq_i64", COND_EQ>;
+defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3, 0xe3>, "v_cmp_le_i64", COND_SLE>;
+defm V_CMP_GT_I64 : VOPC_I64 <vopc<0xa4, 0xe4>, "v_cmp_gt_i64", COND_SGT>;
+defm V_CMP_NE_I64 : VOPC_I64 <vopc<0xa5, 0xe5>, "v_cmp_ne_i64", COND_NE>;
+defm V_CMP_GE_I64 : VOPC_I64 <vopc<0xa6, 0xe6>, "v_cmp_ge_i64", COND_SGE>;
+defm V_CMP_T_I64 : VOPC_I64 <vopc<0xa7, 0xe7>, "v_cmp_t_i64">;
let hasSideEffects = 1 in {
-defm V_CMPX_F_I64 : VOPCX_64 <0x000000b0, "V_CMPX_F_I64">;
-defm V_CMPX_LT_I64 : VOPCX_64 <0x000000b1, "V_CMPX_LT_I64">;
-defm V_CMPX_EQ_I64 : VOPCX_64 <0x000000b2, "V_CMPX_EQ_I64">;
-defm V_CMPX_LE_I64 : VOPCX_64 <0x000000b3, "V_CMPX_LE_I64">;
-defm V_CMPX_GT_I64 : VOPCX_64 <0x000000b4, "V_CMPX_GT_I64">;
-defm V_CMPX_NE_I64 : VOPCX_64 <0x000000b5, "V_CMPX_NE_I64">;
-defm V_CMPX_GE_I64 : VOPCX_64 <0x000000b6, "V_CMPX_GE_I64">;
-defm V_CMPX_T_I64 : VOPCX_64 <0x000000b7, "V_CMPX_T_I64">;
+defm V_CMPX_F_I64 : VOPCX_I64 <vopc<0xb0, 0xf0>, "v_cmpx_f_i64">;
+defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1, 0xf1>, "v_cmpx_lt_i64">;
+defm V_CMPX_EQ_I64 : VOPCX_I64 <vopc<0xb2, 0xf2>, "v_cmpx_eq_i64">;
+defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3, 0xf3>, "v_cmpx_le_i64">;
+defm V_CMPX_GT_I64 : VOPCX_I64 <vopc<0xb4, 0xf4>, "v_cmpx_gt_i64">;
+defm V_CMPX_NE_I64 : VOPCX_I64 <vopc<0xb5, 0xf5>, "v_cmpx_ne_i64">;
+defm V_CMPX_GE_I64 : VOPCX_I64 <vopc<0xb6, 0xf6>, "v_cmpx_ge_i64">;
+defm V_CMPX_T_I64 : VOPCX_I64 <vopc<0xb7, 0xf7>, "v_cmpx_t_i64">;
} // End hasSideEffects = 1
-defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32">;
-defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", i32, COND_ULT>;
-defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32", i32, COND_EQ>;
-defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32", i32, COND_ULE>;
-defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32", i32, COND_UGT>;
-defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", i32, COND_NE>;
-defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", i32, COND_UGE>;
-defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32">;
+defm V_CMP_F_U32 : VOPC_I32 <vopc<0xc0, 0xc8>, "v_cmp_f_u32">;
+defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1, 0xc9>, "v_cmp_lt_u32", COND_ULT>;
+defm V_CMP_EQ_U32 : VOPC_I32 <vopc<0xc2, 0xca>, "v_cmp_eq_u32", COND_EQ>;
+defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3, 0xcb>, "v_cmp_le_u32", COND_ULE>;
+defm V_CMP_GT_U32 : VOPC_I32 <vopc<0xc4, 0xcc>, "v_cmp_gt_u32", COND_UGT>;
+defm V_CMP_NE_U32 : VOPC_I32 <vopc<0xc5, 0xcd>, "v_cmp_ne_u32", COND_NE>;
+defm V_CMP_GE_U32 : VOPC_I32 <vopc<0xc6, 0xce>, "v_cmp_ge_u32", COND_UGE>;
+defm V_CMP_T_U32 : VOPC_I32 <vopc<0xc7, 0xcf>, "v_cmp_t_u32">;
let hasSideEffects = 1 in {
-defm V_CMPX_F_U32 : VOPCX_32 <0x000000d0, "V_CMPX_F_U32">;
-defm V_CMPX_LT_U32 : VOPCX_32 <0x000000d1, "V_CMPX_LT_U32">;
-defm V_CMPX_EQ_U32 : VOPCX_32 <0x000000d2, "V_CMPX_EQ_U32">;
-defm V_CMPX_LE_U32 : VOPCX_32 <0x000000d3, "V_CMPX_LE_U32">;
-defm V_CMPX_GT_U32 : VOPCX_32 <0x000000d4, "V_CMPX_GT_U32">;
-defm V_CMPX_NE_U32 : VOPCX_32 <0x000000d5, "V_CMPX_NE_U32">;
-defm V_CMPX_GE_U32 : VOPCX_32 <0x000000d6, "V_CMPX_GE_U32">;
-defm V_CMPX_T_U32 : VOPCX_32 <0x000000d7, "V_CMPX_T_U32">;
+defm V_CMPX_F_U32 : VOPCX_I32 <vopc<0xd0, 0xd8>, "v_cmpx_f_u32">;
+defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1, 0xd9>, "v_cmpx_lt_u32">;
+defm V_CMPX_EQ_U32 : VOPCX_I32 <vopc<0xd2, 0xda>, "v_cmpx_eq_u32">;
+defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3, 0xdb>, "v_cmpx_le_u32">;
+defm V_CMPX_GT_U32 : VOPCX_I32 <vopc<0xd4, 0xdc>, "v_cmpx_gt_u32">;
+defm V_CMPX_NE_U32 : VOPCX_I32 <vopc<0xd5, 0xdd>, "v_cmpx_ne_u32">;
+defm V_CMPX_GE_U32 : VOPCX_I32 <vopc<0xd6, 0xde>, "v_cmpx_ge_u32">;
+defm V_CMPX_T_U32 : VOPCX_I32 <vopc<0xd7, 0xdf>, "v_cmpx_t_u32">;
} // End hasSideEffects = 1
-defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64">;
-defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", i64, COND_ULT>;
-defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64", i64, COND_EQ>;
-defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64", i64, COND_ULE>;
-defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64", i64, COND_UGT>;
-defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", i64, COND_NE>;
-defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", i64, COND_UGE>;
-defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64">;
+defm V_CMP_F_U64 : VOPC_I64 <vopc<0xe0, 0xe8>, "v_cmp_f_u64">;
+defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1, 0xe9>, "v_cmp_lt_u64", COND_ULT>;
+defm V_CMP_EQ_U64 : VOPC_I64 <vopc<0xe2, 0xea>, "v_cmp_eq_u64", COND_EQ>;
+defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3, 0xeb>, "v_cmp_le_u64", COND_ULE>;
+defm V_CMP_GT_U64 : VOPC_I64 <vopc<0xe4, 0xec>, "v_cmp_gt_u64", COND_UGT>;
+defm V_CMP_NE_U64 : VOPC_I64 <vopc<0xe5, 0xed>, "v_cmp_ne_u64", COND_NE>;
+defm V_CMP_GE_U64 : VOPC_I64 <vopc<0xe6, 0xee>, "v_cmp_ge_u64", COND_UGE>;
+defm V_CMP_T_U64 : VOPC_I64 <vopc<0xe7, 0xef>, "v_cmp_t_u64">;
let hasSideEffects = 1 in {
-defm V_CMPX_F_U64 : VOPCX_64 <0x000000f0, "V_CMPX_F_U64">;
-defm V_CMPX_LT_U64 : VOPCX_64 <0x000000f1, "V_CMPX_LT_U64">;
-defm V_CMPX_EQ_U64 : VOPCX_64 <0x000000f2, "V_CMPX_EQ_U64">;
-defm V_CMPX_LE_U64 : VOPCX_64 <0x000000f3, "V_CMPX_LE_U64">;
-defm V_CMPX_GT_U64 : VOPCX_64 <0x000000f4, "V_CMPX_GT_U64">;
-defm V_CMPX_NE_U64 : VOPCX_64 <0x000000f5, "V_CMPX_NE_U64">;
-defm V_CMPX_GE_U64 : VOPCX_64 <0x000000f6, "V_CMPX_GE_U64">;
-defm V_CMPX_T_U64 : VOPCX_64 <0x000000f7, "V_CMPX_T_U64">;
+defm V_CMPX_F_U64 : VOPCX_I64 <vopc<0xf0, 0xf8>, "v_cmpx_f_u64">;
+defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1, 0xf9>, "v_cmpx_lt_u64">;
+defm V_CMPX_EQ_U64 : VOPCX_I64 <vopc<0xf2, 0xfa>, "v_cmpx_eq_u64">;
+defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3, 0xfb>, "v_cmpx_le_u64">;
+defm V_CMPX_GT_U64 : VOPCX_I64 <vopc<0xf4, 0xfc>, "v_cmpx_gt_u64">;
+defm V_CMPX_NE_U64 : VOPCX_I64 <vopc<0xf5, 0xfd>, "v_cmpx_ne_u64">;
+defm V_CMPX_GE_U64 : VOPCX_I64 <vopc<0xf6, 0xfe>, "v_cmpx_ge_u64">;
+defm V_CMPX_T_U64 : VOPCX_I64 <vopc<0xf7, 0xff>, "v_cmpx_t_u64">;
} // End hasSideEffects = 1
-defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32">;
+defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <vopc<0x88, 0x10>, "v_cmp_class_f32">;
let hasSideEffects = 1 in {
-defm V_CMPX_CLASS_F32 : VOPCX_32 <0x00000098, "V_CMPX_CLASS_F32">;
+defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <vopc<0x98, 0x11>, "v_cmpx_class_f32">;
} // End hasSideEffects = 1
-defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64">;
+defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <vopc<0xa8, 0x12>, "v_cmp_class_f64">;
let hasSideEffects = 1 in {
-defm V_CMPX_CLASS_F64 : VOPCX_64 <0x000000b8, "V_CMPX_CLASS_F64">;
+defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <vopc<0xb8, 0x13>, "v_cmpx_class_f64">;
} // End hasSideEffects = 1
} // End isCompare = 1
@@ -726,88 +764,88 @@ defm V_CMPX_CLASS_F64 : VOPCX_64 <0x000000b8, "V_CMPX_CLASS_F64">;
//===----------------------------------------------------------------------===//
-def DS_ADD_U32 : DS_1A1D_NORET <0x0, "DS_ADD_U32", VReg_32>;
-def DS_SUB_U32 : DS_1A1D_NORET <0x1, "DS_SUB_U32", VReg_32>;
-def DS_RSUB_U32 : DS_1A1D_NORET <0x2, "DS_RSUB_U32", VReg_32>;
-def DS_INC_U32 : DS_1A1D_NORET <0x3, "DS_INC_U32", VReg_32>;
-def DS_DEC_U32 : DS_1A1D_NORET <0x4, "DS_DEC_U32", VReg_32>;
-def DS_MIN_I32 : DS_1A1D_NORET <0x5, "DS_MIN_I32", VReg_32>;
-def DS_MAX_I32 : DS_1A1D_NORET <0x6, "DS_MAX_I32", VReg_32>;
-def DS_MIN_U32 : DS_1A1D_NORET <0x7, "DS_MIN_U32", VReg_32>;
-def DS_MAX_U32 : DS_1A1D_NORET <0x8, "DS_MAX_U32", VReg_32>;
-def DS_AND_B32 : DS_1A1D_NORET <0x9, "DS_AND_B32", VReg_32>;
-def DS_OR_B32 : DS_1A1D_NORET <0xa, "DS_OR_B32", VReg_32>;
-def DS_XOR_B32 : DS_1A1D_NORET <0xb, "DS_XOR_B32", VReg_32>;
-def DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "DS_MSKOR_B32", VReg_32>;
-def DS_CMPST_B32 : DS_1A2D_NORET <0x10, "DS_CMPST_B32", VReg_32>;
-def DS_CMPST_F32 : DS_1A2D_NORET <0x11, "DS_CMPST_F32", VReg_32>;
-def DS_MIN_F32 : DS_1A1D_NORET <0x12, "DS_MIN_F32", VReg_32>;
-def DS_MAX_F32 : DS_1A1D_NORET <0x13, "DS_MAX_F32", VReg_32>;
-
-def DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "DS_ADD_RTN_U32", VReg_32>;
-def DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "DS_SUB_RTN_U32", VReg_32>;
-def DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "DS_RSUB_RTN_U32", VReg_32>;
-def DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "DS_INC_RTN_U32", VReg_32>;
-def DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "DS_DEC_RTN_U32", VReg_32>;
-def DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "DS_MIN_RTN_I32", VReg_32>;
-def DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "DS_MAX_RTN_I32", VReg_32>;
-def DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "DS_MIN_RTN_U32", VReg_32>;
-def DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "DS_MAX_RTN_U32", VReg_32>;
-def DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "DS_AND_RTN_B32", VReg_32>;
-def DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "DS_OR_RTN_B32", VReg_32>;
-def DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "DS_XOR_RTN_B32", VReg_32>;
-def DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "DS_MSKOR_RTN_B32", VReg_32>;
-def DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "DS_WRXCHG_RTN_B32", VReg_32>;
-//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "DS_WRXCHG2_RTN_B32", VReg_32>;
-//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "DS_WRXCHG2_RTN_B32", VReg_32>;
-def DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "DS_CMPST_RTN_B32", VReg_32>;
-def DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "DS_CMPST_RTN_F32", VReg_32>;
-def DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "DS_MIN_RTN_F32", VReg_32>;
-def DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "DS_MAX_RTN_F32", VReg_32>;
+def DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VGPR_32>;
+def DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VGPR_32>;
+def DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VGPR_32>;
+def DS_INC_U32 : DS_1A1D_NORET <0x3, "ds_inc_u32", VGPR_32>;
+def DS_DEC_U32 : DS_1A1D_NORET <0x4, "ds_dec_u32", VGPR_32>;
+def DS_MIN_I32 : DS_1A1D_NORET <0x5, "ds_min_i32", VGPR_32>;
+def DS_MAX_I32 : DS_1A1D_NORET <0x6, "ds_max_i32", VGPR_32>;
+def DS_MIN_U32 : DS_1A1D_NORET <0x7, "ds_min_u32", VGPR_32>;
+def DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VGPR_32>;
+def DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VGPR_32>;
+def DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VGPR_32>;
+def DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VGPR_32>;
+def DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
+def DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VGPR_32>;
+def DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VGPR_32>;
+def DS_MIN_F32 : DS_1A1D_NORET <0x12, "ds_min_f32", VGPR_32>;
+def DS_MAX_F32 : DS_1A1D_NORET <0x13, "ds_max_f32", VGPR_32>;
+
+def DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
+def DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
+def DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
+def DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "ds_inc_rtn_u32", VGPR_32, "ds_inc_u32">;
+def DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "ds_dec_rtn_u32", VGPR_32, "ds_dec_u32">;
+def DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "ds_min_rtn_i32", VGPR_32, "ds_min_i32">;
+def DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "ds_max_rtn_i32", VGPR_32, "ds_max_i32">;
+def DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "ds_min_rtn_u32", VGPR_32, "ds_min_u32">;
+def DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VGPR_32, "ds_max_u32">;
+def DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VGPR_32, "ds_and_b32">;
+def DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VGPR_32, "ds_or_b32">;
+def DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">;
+def DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
+def DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VGPR_32>;
+//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2_b32">;
+//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2st64_b32">;
+def DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">;
+def DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
+def DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
+def DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
let SubtargetPredicate = isCI in {
-def DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "DS_WRAP_RTN_F32", VReg_32>;
+def DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
} // End isCI
-def DS_ADD_U64 : DS_1A1D_NORET <0x40, "DS_ADD_U64", VReg_32>;
-def DS_SUB_U64 : DS_1A1D_NORET <0x41, "DS_SUB_U64", VReg_32>;
-def DS_RSUB_U64 : DS_1A1D_NORET <0x42, "DS_RSUB_U64", VReg_32>;
-def DS_INC_U64 : DS_1A1D_NORET <0x43, "DS_INC_U64", VReg_32>;
-def DS_DEC_U64 : DS_1A1D_NORET <0x44, "DS_DEC_U64", VReg_32>;
-def DS_MIN_I64 : DS_1A1D_NORET <0x45, "DS_MIN_I64", VReg_64>;
-def DS_MAX_I64 : DS_1A1D_NORET <0x46, "DS_MAX_I64", VReg_64>;
-def DS_MIN_U64 : DS_1A1D_NORET <0x47, "DS_MIN_U64", VReg_64>;
-def DS_MAX_U64 : DS_1A1D_NORET <0x48, "DS_MAX_U64", VReg_64>;
-def DS_AND_B64 : DS_1A1D_NORET <0x49, "DS_AND_B64", VReg_64>;
-def DS_OR_B64 : DS_1A1D_NORET <0x4a, "DS_OR_B64", VReg_64>;
-def DS_XOR_B64 : DS_1A1D_NORET <0x4b, "DS_XOR_B64", VReg_64>;
-def DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "DS_MSKOR_B64", VReg_64>;
-def DS_CMPST_B64 : DS_1A2D_NORET <0x50, "DS_CMPST_B64", VReg_64>;
-def DS_CMPST_F64 : DS_1A2D_NORET <0x51, "DS_CMPST_F64", VReg_64>;
-def DS_MIN_F64 : DS_1A1D_NORET <0x52, "DS_MIN_F64", VReg_64>;
-def DS_MAX_F64 : DS_1A1D_NORET <0x53, "DS_MAX_F64", VReg_64>;
-
-def DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "DS_ADD_RTN_U64", VReg_64>;
-def DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "DS_SUB_RTN_U64", VReg_64>;
-def DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "DS_RSUB_RTN_U64", VReg_64>;
-def DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "DS_INC_RTN_U64", VReg_64>;
-def DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "DS_DEC_RTN_U64", VReg_64>;
-def DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "DS_MIN_RTN_I64", VReg_64>;
-def DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "DS_MAX_RTN_I64", VReg_64>;
-def DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "DS_MIN_RTN_U64", VReg_64>;
-def DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "DS_MAX_RTN_U64", VReg_64>;
-def DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "DS_AND_RTN_B64", VReg_64>;
-def DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "DS_OR_RTN_B64", VReg_64>;
-def DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "DS_XOR_RTN_B64", VReg_64>;
-def DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "DS_MSKOR_RTN_B64", VReg_64>;
-def DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "DS_WRXCHG_RTN_B64", VReg_64>;
-//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "DS_WRXCHG2_RTN_B64", VReg_64>;
-//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "DS_WRXCHG2_RTN_B64", VReg_64>;
-def DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "DS_CMPST_RTN_B64", VReg_64>;
-def DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "DS_CMPST_RTN_F64", VReg_64>;
-def DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "DS_MIN_F64", VReg_64>;
-def DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "DS_MAX_F64", VReg_64>;
+def DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>;
+def DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>;
+def DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>;
+def DS_INC_U64 : DS_1A1D_NORET <0x43, "ds_inc_u64", VReg_64>;
+def DS_DEC_U64 : DS_1A1D_NORET <0x44, "ds_dec_u64", VReg_64>;
+def DS_MIN_I64 : DS_1A1D_NORET <0x45, "ds_min_i64", VReg_64>;
+def DS_MAX_I64 : DS_1A1D_NORET <0x46, "ds_max_i64", VReg_64>;
+def DS_MIN_U64 : DS_1A1D_NORET <0x47, "ds_min_u64", VReg_64>;
+def DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>;
+def DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>;
+def DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>;
+def DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>;
+def DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
+def DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>;
+def DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>;
+def DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>;
+def DS_MAX_F64 : DS_1A1D_NORET <0x53, "ds_max_f64", VReg_64>;
+
+def DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "ds_add_rtn_u64", VReg_64, "ds_add_u64">;
+def DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "ds_sub_rtn_u64", VReg_64, "ds_sub_u64">;
+def DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "ds_rsub_rtn_u64", VReg_64, "ds_rsub_u64">;
+def DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "ds_inc_rtn_u64", VReg_64, "ds_inc_u64">;
+def DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "ds_dec_rtn_u64", VReg_64, "ds_dec_u64">;
+def DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "ds_min_rtn_i64", VReg_64, "ds_min_i64">;
+def DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "ds_max_rtn_i64", VReg_64, "ds_max_i64">;
+def DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "ds_min_rtn_u64", VReg_64, "ds_min_u64">;
+def DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64">;
+def DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">;
+def DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">;
+def DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">;
+def DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
+def DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">;
+//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2_b64">;
+//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2st64_b64">;
+def DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">;
+def DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">;
+def DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_f64", VReg_64, "ds_min_f64">;
+def DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_f64", VReg_64, "ds_max_f64">;
//let SubtargetPredicate = isCI in {
// DS_CONDXCHG32_RTN_B64
@@ -816,719 +854,951 @@ def DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "DS_MAX_F64", VReg_64>;
// TODO: _SRC2_* forms
-def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
-def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
-def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
-def DS_WRITE_B64 : DS_Store_Helper <0x00000004d, "DS_WRITE_B64", VReg_64>;
+defm DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "ds_write_b32", VGPR_32>;
+defm DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "ds_write_b8", VGPR_32>;
+defm DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "ds_write_b16", VGPR_32>;
+defm DS_WRITE_B64 : DS_Store_Helper <0x00000004d, "ds_write_b64", VReg_64>;
-def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>;
-def DS_READ_I8 : DS_Load_Helper <0x00000039, "DS_READ_I8", VReg_32>;
-def DS_READ_U8 : DS_Load_Helper <0x0000003a, "DS_READ_U8", VReg_32>;
-def DS_READ_I16 : DS_Load_Helper <0x0000003b, "DS_READ_I16", VReg_32>;
-def DS_READ_U16 : DS_Load_Helper <0x0000003c, "DS_READ_U16", VReg_32>;
-def DS_READ_B64 : DS_Load_Helper <0x00000076, "DS_READ_B64", VReg_64>;
+defm DS_READ_B32 : DS_Load_Helper <0x00000036, "ds_read_b32", VGPR_32>;
+defm DS_READ_I8 : DS_Load_Helper <0x00000039, "ds_read_i8", VGPR_32>;
+defm DS_READ_U8 : DS_Load_Helper <0x0000003a, "ds_read_u8", VGPR_32>;
+defm DS_READ_I16 : DS_Load_Helper <0x0000003b, "ds_read_i16", VGPR_32>;
+defm DS_READ_U16 : DS_Load_Helper <0x0000003c, "ds_read_u16", VGPR_32>;
+defm DS_READ_B64 : DS_Load_Helper <0x00000076, "ds_read_b64", VReg_64>;
// 2 forms.
-def DS_WRITE2_B32 : DS_Load2_Helper <0x0000000E, "DS_WRITE2_B32", VReg_64>;
-def DS_WRITE2_B64 : DS_Load2_Helper <0x0000004E, "DS_WRITE2_B64", VReg_128>;
+defm DS_WRITE2_B32 : DS_Store2_Helper <0x0000000E, "ds_write2_b32", VGPR_32>;
+defm DS_WRITE2ST64_B32 : DS_Store2_Helper <0x0000000F, "ds_write2st64_b32", VGPR_32>;
+defm DS_WRITE2_B64 : DS_Store2_Helper <0x0000004E, "ds_write2_b64", VReg_64>;
+defm DS_WRITE2ST64_B64 : DS_Store2_Helper <0x0000004F, "ds_write2st64_b64", VReg_64>;
-def DS_READ2_B32 : DS_Load2_Helper <0x00000037, "DS_READ2_B32", VReg_64>;
-def DS_READ2_B64 : DS_Load2_Helper <0x00000075, "DS_READ2_B64", VReg_128>;
-
-// TODO: DS_READ2ST64_B32, DS_READ2ST64_B64,
-// DS_WRITE2ST64_B32, DS_WRITE2ST64_B64
+defm DS_READ2_B32 : DS_Load2_Helper <0x00000037, "ds_read2_b32", VReg_64>;
+defm DS_READ2ST64_B32 : DS_Load2_Helper <0x00000038, "ds_read2st64_b32", VReg_64>;
+defm DS_READ2_B64 : DS_Load2_Helper <0x00000075, "ds_read2_b64", VReg_128>;
+defm DS_READ2ST64_B64 : DS_Load2_Helper <0x00000076, "ds_read2st64_b64", VReg_128>;
//===----------------------------------------------------------------------===//
// MUBUF Instructions
//===----------------------------------------------------------------------===//
-//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
-//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
-//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>;
-defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>;
-//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "BUFFER_STORE_FORMAT_X", []>;
-//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>;
-//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>;
-//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>;
+let SubtargetPredicate = isSICI in {
+
+//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "buffer_load_format_x", []>;
+//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "buffer_load_format_xy", []>;
+//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "buffer_load_format_xyz", []>;
+defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "buffer_load_format_xyzw", VReg_128>;
+//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "buffer_store_format_x", []>;
+//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "buffer_store_format_xy", []>;
+//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "buffer_store_format_xyz", []>;
+//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "buffer_store_format_xyzw", []>;
defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <
- 0x00000008, "BUFFER_LOAD_UBYTE", VReg_32, i32, az_extloadi8_global
+ 0x00000008, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global
>;
defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper <
- 0x00000009, "BUFFER_LOAD_SBYTE", VReg_32, i32, sextloadi8_global
+ 0x00000009, "buffer_load_sbyte", VGPR_32, i32, sextloadi8_global
>;
defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper <
- 0x0000000a, "BUFFER_LOAD_USHORT", VReg_32, i32, az_extloadi16_global
+ 0x0000000a, "buffer_load_ushort", VGPR_32, i32, az_extloadi16_global
>;
defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <
- 0x0000000b, "BUFFER_LOAD_SSHORT", VReg_32, i32, sextloadi16_global
+ 0x0000000b, "buffer_load_sshort", VGPR_32, i32, sextloadi16_global
>;
defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <
- 0x0000000c, "BUFFER_LOAD_DWORD", VReg_32, i32, global_load
+ 0x0000000c, "buffer_load_dword", VGPR_32, i32, global_load
>;
defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <
- 0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64, v2i32, global_load
+ 0x0000000d, "buffer_load_dwordx2", VReg_64, v2i32, global_load
>;
defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <
- 0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128, v4i32, global_load
+ 0x0000000e, "buffer_load_dwordx4", VReg_128, v4i32, global_load
>;
defm BUFFER_STORE_BYTE : MUBUF_Store_Helper <
- 0x00000018, "BUFFER_STORE_BYTE", VReg_32, i32, truncstorei8_global
+ 0x00000018, "buffer_store_byte", VGPR_32, i32, truncstorei8_global
>;
defm BUFFER_STORE_SHORT : MUBUF_Store_Helper <
- 0x0000001a, "BUFFER_STORE_SHORT", VReg_32, i32, truncstorei16_global
+ 0x0000001a, "buffer_store_short", VGPR_32, i32, truncstorei16_global
>;
defm BUFFER_STORE_DWORD : MUBUF_Store_Helper <
- 0x0000001c, "BUFFER_STORE_DWORD", VReg_32, i32, global_store
+ 0x0000001c, "buffer_store_dword", VGPR_32, i32, global_store
>;
defm BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
- 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64, v2i32, global_store
+ 0x0000001d, "buffer_store_dwordx2", VReg_64, v2i32, global_store
>;
defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
- 0x0000001e, "BUFFER_STORE_DWORDX4", VReg_128, v4i32, global_store
->;
-//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
-//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
-//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>;
-//def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>;
-//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>;
-//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>;
-//def BUFFER_ATOMIC_UMIN : MUBUF_ <0x00000036, "BUFFER_ATOMIC_UMIN", []>;
-//def BUFFER_ATOMIC_SMAX : MUBUF_ <0x00000037, "BUFFER_ATOMIC_SMAX", []>;
-//def BUFFER_ATOMIC_UMAX : MUBUF_ <0x00000038, "BUFFER_ATOMIC_UMAX", []>;
-//def BUFFER_ATOMIC_AND : MUBUF_ <0x00000039, "BUFFER_ATOMIC_AND", []>;
-//def BUFFER_ATOMIC_OR : MUBUF_ <0x0000003a, "BUFFER_ATOMIC_OR", []>;
-//def BUFFER_ATOMIC_XOR : MUBUF_ <0x0000003b, "BUFFER_ATOMIC_XOR", []>;
-//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "BUFFER_ATOMIC_INC", []>;
-//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "BUFFER_ATOMIC_DEC", []>;
-//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "BUFFER_ATOMIC_FCMPSWAP", []>;
-//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "BUFFER_ATOMIC_FMIN", []>;
-//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "BUFFER_ATOMIC_FMAX", []>;
-//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "BUFFER_ATOMIC_SWAP_X2", []>;
-//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "BUFFER_ATOMIC_CMPSWAP_X2", []>;
-//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "BUFFER_ATOMIC_ADD_X2", []>;
-//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "BUFFER_ATOMIC_SUB_X2", []>;
-//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "BUFFER_ATOMIC_RSUB_X2", []>;
-//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "BUFFER_ATOMIC_SMIN_X2", []>;
-//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "BUFFER_ATOMIC_UMIN_X2", []>;
-//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "BUFFER_ATOMIC_SMAX_X2", []>;
-//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "BUFFER_ATOMIC_UMAX_X2", []>;
-//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "BUFFER_ATOMIC_AND_X2", []>;
-//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "BUFFER_ATOMIC_OR_X2", []>;
-//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "BUFFER_ATOMIC_XOR_X2", []>;
-//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "BUFFER_ATOMIC_INC_X2", []>;
-//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "BUFFER_ATOMIC_DEC_X2", []>;
-//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "BUFFER_ATOMIC_FCMPSWAP_X2", []>;
-//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "BUFFER_ATOMIC_FMIN_X2", []>;
-//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "BUFFER_ATOMIC_FMAX_X2", []>;
-//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "BUFFER_WBINVL1_SC", []>;
-//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "BUFFER_WBINVL1", []>;
+ 0x0000001e, "buffer_store_dwordx4", VReg_128, v4i32, global_store
+>;
+//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "buffer_atomic_swap", []>;
+defm BUFFER_ATOMIC_SWAP : MUBUF_Atomic <
+ 0x00000030, "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global
+>;
+//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "buffer_atomic_cmpswap", []>;
+defm BUFFER_ATOMIC_ADD : MUBUF_Atomic <
+ 0x00000032, "buffer_atomic_add", VGPR_32, i32, atomic_add_global
+>;
+defm BUFFER_ATOMIC_SUB : MUBUF_Atomic <
+ 0x00000033, "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global
+>;
+//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "buffer_atomic_rsub", []>;
+defm BUFFER_ATOMIC_SMIN : MUBUF_Atomic <
+ 0x00000035, "buffer_atomic_smin", VGPR_32, i32, atomic_min_global
+>;
+defm BUFFER_ATOMIC_UMIN : MUBUF_Atomic <
+ 0x00000036, "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global
+>;
+defm BUFFER_ATOMIC_SMAX : MUBUF_Atomic <
+ 0x00000037, "buffer_atomic_smax", VGPR_32, i32, atomic_max_global
+>;
+defm BUFFER_ATOMIC_UMAX : MUBUF_Atomic <
+ 0x00000038, "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global
+>;
+defm BUFFER_ATOMIC_AND : MUBUF_Atomic <
+ 0x00000039, "buffer_atomic_and", VGPR_32, i32, atomic_and_global
+>;
+defm BUFFER_ATOMIC_OR : MUBUF_Atomic <
+ 0x0000003a, "buffer_atomic_or", VGPR_32, i32, atomic_or_global
+>;
+defm BUFFER_ATOMIC_XOR : MUBUF_Atomic <
+ 0x0000003b, "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global
+>;
+//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "buffer_atomic_inc", []>;
+//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "buffer_atomic_dec", []>;
+//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "buffer_atomic_fcmpswap", []>;
+//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "buffer_atomic_fmin", []>;
+//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "buffer_atomic_fmax", []>;
+//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "buffer_atomic_swap_x2", []>;
+//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "buffer_atomic_cmpswap_x2", []>;
+//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "buffer_atomic_add_x2", []>;
+//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "buffer_atomic_sub_x2", []>;
+//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "buffer_atomic_rsub_x2", []>;
+//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "buffer_atomic_smin_x2", []>;
+//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "buffer_atomic_umin_x2", []>;
+//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "buffer_atomic_smax_x2", []>;
+//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "buffer_atomic_umax_x2", []>;
+//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "buffer_atomic_and_x2", []>;
+//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "buffer_atomic_or_x2", []>;
+//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "buffer_atomic_xor_x2", []>;
+//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "buffer_atomic_inc_x2", []>;
+//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "buffer_atomic_dec_x2", []>;
+//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "buffer_atomic_fcmpswap_x2", []>;
+//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "buffer_atomic_fmin_x2", []>;
+//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "buffer_atomic_fmax_x2", []>;
+//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "buffer_wbinvl1_sc", []>;
+//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "buffer_wbinvl1", []>;
+
+} // End SubtargetPredicate = isSICI
//===----------------------------------------------------------------------===//
// MTBUF Instructions
//===----------------------------------------------------------------------===//
-//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "TBUFFER_LOAD_FORMAT_X", []>;
-//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>;
-//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>;
-def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>;
-def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "TBUFFER_STORE_FORMAT_X", VReg_32>;
-def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "TBUFFER_STORE_FORMAT_XY", VReg_64>;
-def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", VReg_128>;
-def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", VReg_128>;
+//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "tbuffer_load_format_x", []>;
+//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "tbuffer_load_format_xy", []>;
+//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "tbuffer_load_format_xyz", []>;
+defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "tbuffer_load_format_xyzw", VReg_128>;
+defm TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "tbuffer_store_format_x", VGPR_32>;
+defm TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "tbuffer_store_format_xy", VReg_64>;
+defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "tbuffer_store_format_xyz", VReg_128>;
+defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "tbuffer_store_format_xyzw", VReg_128>;
//===----------------------------------------------------------------------===//
// MIMG Instructions
//===----------------------------------------------------------------------===//
-defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "IMAGE_LOAD">;
-defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "IMAGE_LOAD_MIP">;
-//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>;
-//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>;
-//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>;
-//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK_SGN", 0x00000005>;
-//def IMAGE_STORE : MIMG_NoPattern_ <"IMAGE_STORE", 0x00000008>;
-//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>;
-//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>;
-//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>;
-defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "IMAGE_GET_RESINFO">;
-//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>;
-//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>;
-//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>;
-//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_SUB", 0x00000012>;
-//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_RSUB", 0x00000013>;
-//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMIN", 0x00000014>;
-//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMIN", 0x00000015>;
-//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMAX", 0x00000016>;
-//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMAX", 0x00000017>;
-//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"IMAGE_ATOMIC_AND", 0x00000018>;
-//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"IMAGE_ATOMIC_OR", 0x00000019>;
-//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"IMAGE_ATOMIC_XOR", 0x0000001a>;
-//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"IMAGE_ATOMIC_INC", 0x0000001b>;
-//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"IMAGE_ATOMIC_DEC", 0x0000001c>;
-//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>;
-//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>;
-//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>;
-defm IMAGE_SAMPLE : MIMG_Sampler <0x00000020, "IMAGE_SAMPLE">;
-defm IMAGE_SAMPLE_CL : MIMG_Sampler <0x00000021, "IMAGE_SAMPLE_CL">;
-defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "IMAGE_SAMPLE_D">;
-defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <0x00000023, "IMAGE_SAMPLE_D_CL">;
-defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "IMAGE_SAMPLE_L">;
-defm IMAGE_SAMPLE_B : MIMG_Sampler <0x00000025, "IMAGE_SAMPLE_B">;
-defm IMAGE_SAMPLE_B_CL : MIMG_Sampler <0x00000026, "IMAGE_SAMPLE_B_CL">;
-defm IMAGE_SAMPLE_LZ : MIMG_Sampler <0x00000027, "IMAGE_SAMPLE_LZ">;
-defm IMAGE_SAMPLE_C : MIMG_Sampler <0x00000028, "IMAGE_SAMPLE_C">;
-defm IMAGE_SAMPLE_C_CL : MIMG_Sampler <0x00000029, "IMAGE_SAMPLE_C_CL">;
-defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "IMAGE_SAMPLE_C_D">;
-defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <0x0000002b, "IMAGE_SAMPLE_C_D_CL">;
-defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "IMAGE_SAMPLE_C_L">;
-defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">;
-defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler <0x0000002e, "IMAGE_SAMPLE_C_B_CL">;
-defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <0x0000002f, "IMAGE_SAMPLE_C_LZ">;
-defm IMAGE_SAMPLE_O : MIMG_Sampler <0x00000030, "IMAGE_SAMPLE_O">;
-defm IMAGE_SAMPLE_CL_O : MIMG_Sampler <0x00000031, "IMAGE_SAMPLE_CL_O">;
-defm IMAGE_SAMPLE_D_O : MIMG_Sampler <0x00000032, "IMAGE_SAMPLE_D_O">;
-defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <0x00000033, "IMAGE_SAMPLE_D_CL_O">;
-defm IMAGE_SAMPLE_L_O : MIMG_Sampler <0x00000034, "IMAGE_SAMPLE_L_O">;
-defm IMAGE_SAMPLE_B_O : MIMG_Sampler <0x00000035, "IMAGE_SAMPLE_B_O">;
-defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler <0x00000036, "IMAGE_SAMPLE_B_CL_O">;
-defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <0x00000037, "IMAGE_SAMPLE_LZ_O">;
-defm IMAGE_SAMPLE_C_O : MIMG_Sampler <0x00000038, "IMAGE_SAMPLE_C_O">;
-defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler <0x00000039, "IMAGE_SAMPLE_C_CL_O">;
-defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <0x0000003a, "IMAGE_SAMPLE_C_D_O">;
-defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <0x0000003b, "IMAGE_SAMPLE_C_D_CL_O">;
-defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <0x0000003c, "IMAGE_SAMPLE_C_L_O">;
-defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler <0x0000003d, "IMAGE_SAMPLE_C_B_O">;
-defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler <0x0000003e, "IMAGE_SAMPLE_C_B_CL_O">;
-defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <0x0000003f, "IMAGE_SAMPLE_C_LZ_O">;
-defm IMAGE_GATHER4 : MIMG_Gather <0x00000040, "IMAGE_GATHER4">;
-defm IMAGE_GATHER4_CL : MIMG_Gather <0x00000041, "IMAGE_GATHER4_CL">;
-defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "IMAGE_GATHER4_L">;
-defm IMAGE_GATHER4_B : MIMG_Gather <0x00000045, "IMAGE_GATHER4_B">;
-defm IMAGE_GATHER4_B_CL : MIMG_Gather <0x00000046, "IMAGE_GATHER4_B_CL">;
-defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "IMAGE_GATHER4_LZ">;
-defm IMAGE_GATHER4_C : MIMG_Gather <0x00000048, "IMAGE_GATHER4_C">;
-defm IMAGE_GATHER4_C_CL : MIMG_Gather <0x00000049, "IMAGE_GATHER4_C_CL">;
-defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "IMAGE_GATHER4_C_L">;
-defm IMAGE_GATHER4_C_B : MIMG_Gather <0x0000004d, "IMAGE_GATHER4_C_B">;
-defm IMAGE_GATHER4_C_B_CL : MIMG_Gather <0x0000004e, "IMAGE_GATHER4_C_B_CL">;
-defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "IMAGE_GATHER4_C_LZ">;
-defm IMAGE_GATHER4_O : MIMG_Gather <0x00000050, "IMAGE_GATHER4_O">;
-defm IMAGE_GATHER4_CL_O : MIMG_Gather <0x00000051, "IMAGE_GATHER4_CL_O">;
-defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "IMAGE_GATHER4_L_O">;
-defm IMAGE_GATHER4_B_O : MIMG_Gather <0x00000055, "IMAGE_GATHER4_B_O">;
-defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "IMAGE_GATHER4_B_CL_O">;
-defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "IMAGE_GATHER4_LZ_O">;
-defm IMAGE_GATHER4_C_O : MIMG_Gather <0x00000058, "IMAGE_GATHER4_C_O">;
-defm IMAGE_GATHER4_C_CL_O : MIMG_Gather <0x00000059, "IMAGE_GATHER4_C_CL_O">;
-defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "IMAGE_GATHER4_C_L_O">;
-defm IMAGE_GATHER4_C_B_O : MIMG_Gather <0x0000005d, "IMAGE_GATHER4_C_B_O">;
-defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "IMAGE_GATHER4_C_B_CL_O">;
-defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "IMAGE_GATHER4_C_LZ_O">;
-defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "IMAGE_GET_LOD">;
-defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, "IMAGE_SAMPLE_CD">;
-defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <0x00000069, "IMAGE_SAMPLE_CD_CL">;
-defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <0x0000006a, "IMAGE_SAMPLE_C_CD">;
-defm IMAGE_SAMPLE_C_CD_CL : MIMG_Sampler <0x0000006b, "IMAGE_SAMPLE_C_CD_CL">;
-defm IMAGE_SAMPLE_CD_O : MIMG_Sampler <0x0000006c, "IMAGE_SAMPLE_CD_O">;
-defm IMAGE_SAMPLE_CD_CL_O : MIMG_Sampler <0x0000006d, "IMAGE_SAMPLE_CD_CL_O">;
-defm IMAGE_SAMPLE_C_CD_O : MIMG_Sampler <0x0000006e, "IMAGE_SAMPLE_C_CD_O">;
-defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "IMAGE_SAMPLE_C_CD_CL_O">;
-//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>;
-//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>;
+defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "image_load">;
+defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "image_load_mip">;
+//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"image_load_pck", 0x00000002>;
+//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"image_load_pck_sgn", 0x00000003>;
+//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"image_load_mip_pck", 0x00000004>;
+//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"image_load_mip_pck_sgn", 0x00000005>;
+//def IMAGE_STORE : MIMG_NoPattern_ <"image_store", 0x00000008>;
+//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"image_store_mip", 0x00000009>;
+//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"image_store_pck", 0x0000000a>;
+//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"image_store_mip_pck", 0x0000000b>;
+defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">;
+//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"image_atomic_swap", 0x0000000f>;
+//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"image_atomic_cmpswap", 0x00000010>;
+//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"image_atomic_add", 0x00000011>;
+//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"image_atomic_sub", 0x00000012>;
+//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"image_atomic_rsub", 0x00000013>;
+//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"image_atomic_smin", 0x00000014>;
+//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"image_atomic_umin", 0x00000015>;
+//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"image_atomic_smax", 0x00000016>;
+//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"image_atomic_umax", 0x00000017>;
+//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"image_atomic_and", 0x00000018>;
+//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"image_atomic_or", 0x00000019>;
+//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"image_atomic_xor", 0x0000001a>;
+//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"image_atomic_inc", 0x0000001b>;
+//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"image_atomic_dec", 0x0000001c>;
+//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d>;
+//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>;
+//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>;
+defm IMAGE_SAMPLE : MIMG_Sampler <0x00000020, "image_sample">;
+defm IMAGE_SAMPLE_CL : MIMG_Sampler <0x00000021, "image_sample_cl">;
+defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "image_sample_d">;
+defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <0x00000023, "image_sample_d_cl">;
+defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "image_sample_l">;
+defm IMAGE_SAMPLE_B : MIMG_Sampler <0x00000025, "image_sample_b">;
+defm IMAGE_SAMPLE_B_CL : MIMG_Sampler <0x00000026, "image_sample_b_cl">;
+defm IMAGE_SAMPLE_LZ : MIMG_Sampler <0x00000027, "image_sample_lz">;
+defm IMAGE_SAMPLE_C : MIMG_Sampler <0x00000028, "image_sample_c">;
+defm IMAGE_SAMPLE_C_CL : MIMG_Sampler <0x00000029, "image_sample_c_cl">;
+defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "image_sample_c_d">;
+defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <0x0000002b, "image_sample_c_d_cl">;
+defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "image_sample_c_l">;
+defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "image_sample_c_b">;
+defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler <0x0000002e, "image_sample_c_b_cl">;
+defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <0x0000002f, "image_sample_c_lz">;
+defm IMAGE_SAMPLE_O : MIMG_Sampler <0x00000030, "image_sample_o">;
+defm IMAGE_SAMPLE_CL_O : MIMG_Sampler <0x00000031, "image_sample_cl_o">;
+defm IMAGE_SAMPLE_D_O : MIMG_Sampler <0x00000032, "image_sample_d_o">;
+defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <0x00000033, "image_sample_d_cl_o">;
+defm IMAGE_SAMPLE_L_O : MIMG_Sampler <0x00000034, "image_sample_l_o">;
+defm IMAGE_SAMPLE_B_O : MIMG_Sampler <0x00000035, "image_sample_b_o">;
+defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler <0x00000036, "image_sample_b_cl_o">;
+defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <0x00000037, "image_sample_lz_o">;
+defm IMAGE_SAMPLE_C_O : MIMG_Sampler <0x00000038, "image_sample_c_o">;
+defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler <0x00000039, "image_sample_c_cl_o">;
+defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <0x0000003a, "image_sample_c_d_o">;
+defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <0x0000003b, "image_sample_c_d_cl_o">;
+defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <0x0000003c, "image_sample_c_l_o">;
+defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler <0x0000003d, "image_sample_c_b_o">;
+defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler <0x0000003e, "image_sample_c_b_cl_o">;
+defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <0x0000003f, "image_sample_c_lz_o">;
+defm IMAGE_GATHER4 : MIMG_Gather <0x00000040, "image_gather4">;
+defm IMAGE_GATHER4_CL : MIMG_Gather <0x00000041, "image_gather4_cl">;
+defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "image_gather4_l">;
+defm IMAGE_GATHER4_B : MIMG_Gather <0x00000045, "image_gather4_b">;
+defm IMAGE_GATHER4_B_CL : MIMG_Gather <0x00000046, "image_gather4_b_cl">;
+defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "image_gather4_lz">;
+defm IMAGE_GATHER4_C : MIMG_Gather <0x00000048, "image_gather4_c">;
+defm IMAGE_GATHER4_C_CL : MIMG_Gather <0x00000049, "image_gather4_c_cl">;
+defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "image_gather4_c_l">;
+defm IMAGE_GATHER4_C_B : MIMG_Gather <0x0000004d, "image_gather4_c_b">;
+defm IMAGE_GATHER4_C_B_CL : MIMG_Gather <0x0000004e, "image_gather4_c_b_cl">;
+defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "image_gather4_c_lz">;
+defm IMAGE_GATHER4_O : MIMG_Gather <0x00000050, "image_gather4_o">;
+defm IMAGE_GATHER4_CL_O : MIMG_Gather <0x00000051, "image_gather4_cl_o">;
+defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "image_gather4_l_o">;
+defm IMAGE_GATHER4_B_O : MIMG_Gather <0x00000055, "image_gather4_b_o">;
+defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "image_gather4_b_cl_o">;
+defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "image_gather4_lz_o">;
+defm IMAGE_GATHER4_C_O : MIMG_Gather <0x00000058, "image_gather4_c_o">;
+defm IMAGE_GATHER4_C_CL_O : MIMG_Gather <0x00000059, "image_gather4_c_cl_o">;
+defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "image_gather4_c_l_o">;
+defm IMAGE_GATHER4_C_B_O : MIMG_Gather <0x0000005d, "image_gather4_c_b_o">;
+defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "image_gather4_c_b_cl_o">;
+defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">;
+defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "image_get_lod">;
+defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, "image_sample_cd">;
+defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <0x00000069, "image_sample_cd_cl">;
+defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <0x0000006a, "image_sample_c_cd">;
+defm IMAGE_SAMPLE_C_CD_CL : MIMG_Sampler <0x0000006b, "image_sample_c_cd_cl">;
+defm IMAGE_SAMPLE_CD_O : MIMG_Sampler <0x0000006c, "image_sample_cd_o">;
+defm IMAGE_SAMPLE_CD_CL_O : MIMG_Sampler <0x0000006d, "image_sample_cd_cl_o">;
+defm IMAGE_SAMPLE_C_CD_O : MIMG_Sampler <0x0000006e, "image_sample_c_cd_o">;
+defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o">;
+//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", 0x0000007e>;
+//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>;
+
+//===----------------------------------------------------------------------===//
+// Flat Instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasFlatAddressSpace] in {
+def FLAT_LOAD_UBYTE : FLAT_Load_Helper <0x00000008, "flat_load_ubyte", VGPR_32>;
+def FLAT_LOAD_SBYTE : FLAT_Load_Helper <0x00000009, "flat_load_sbyte", VGPR_32>;
+def FLAT_LOAD_USHORT : FLAT_Load_Helper <0x0000000a, "flat_load_ushort", VGPR_32>;
+def FLAT_LOAD_SSHORT : FLAT_Load_Helper <0x0000000b, "flat_load_sshort", VGPR_32>;
+def FLAT_LOAD_DWORD : FLAT_Load_Helper <0x0000000c, "flat_load_dword", VGPR_32>;
+def FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <0x0000000d, "flat_load_dwordx2", VReg_64>;
+def FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <0x0000000e, "flat_load_dwordx4", VReg_128>;
+def FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <0x00000010, "flat_load_dwordx3", VReg_96>;
+
+def FLAT_STORE_BYTE : FLAT_Store_Helper <
+ 0x00000018, "flat_store_byte", VGPR_32
+>;
+
+def FLAT_STORE_SHORT : FLAT_Store_Helper <
+ 0x0000001a, "flat_store_short", VGPR_32
+>;
+
+def FLAT_STORE_DWORD : FLAT_Store_Helper <
+ 0x0000001c, "flat_store_dword", VGPR_32
+>;
+
+def FLAT_STORE_DWORDX2 : FLAT_Store_Helper <
+ 0x0000001d, "flat_store_dwordx2", VReg_64
+>;
+
+def FLAT_STORE_DWORDX4 : FLAT_Store_Helper <
+ 0x0000001e, "flat_store_dwordx4", VReg_128
+>;
+
+def FLAT_STORE_DWORDX3 : FLAT_Store_Helper <
+ 0x0000001e, "flat_store_dwordx3", VReg_96
+>;
+
+//def FLAT_ATOMIC_SWAP : FLAT_ <0x00000030, "flat_atomic_swap", []>;
+//def FLAT_ATOMIC_CMPSWAP : FLAT_ <0x00000031, "flat_atomic_cmpswap", []>;
+//def FLAT_ATOMIC_ADD : FLAT_ <0x00000032, "flat_atomic_add", []>;
+//def FLAT_ATOMIC_SUB : FLAT_ <0x00000033, "flat_atomic_sub", []>;
+//def FLAT_ATOMIC_RSUB : FLAT_ <0x00000034, "flat_atomic_rsub", []>;
+//def FLAT_ATOMIC_SMIN : FLAT_ <0x00000035, "flat_atomic_smin", []>;
+//def FLAT_ATOMIC_UMIN : FLAT_ <0x00000036, "flat_atomic_umin", []>;
+//def FLAT_ATOMIC_SMAX : FLAT_ <0x00000037, "flat_atomic_smax", []>;
+//def FLAT_ATOMIC_UMAX : FLAT_ <0x00000038, "flat_atomic_umax", []>;
+//def FLAT_ATOMIC_AND : FLAT_ <0x00000039, "flat_atomic_and", []>;
+//def FLAT_ATOMIC_OR : FLAT_ <0x0000003a, "flat_atomic_or", []>;
+//def FLAT_ATOMIC_XOR : FLAT_ <0x0000003b, "flat_atomic_xor", []>;
+//def FLAT_ATOMIC_INC : FLAT_ <0x0000003c, "flat_atomic_inc", []>;
+//def FLAT_ATOMIC_DEC : FLAT_ <0x0000003d, "flat_atomic_dec", []>;
+//def FLAT_ATOMIC_FCMPSWAP : FLAT_ <0x0000003e, "flat_atomic_fcmpswap", []>;
+//def FLAT_ATOMIC_FMIN : FLAT_ <0x0000003f, "flat_atomic_fmin", []>;
+//def FLAT_ATOMIC_FMAX : FLAT_ <0x00000040, "flat_atomic_fmax", []>;
+//def FLAT_ATOMIC_SWAP_X2 : FLAT_X2 <0x00000050, "flat_atomic_swap_x2", []>;
+//def FLAT_ATOMIC_CMPSWAP_X2 : FLAT_X2 <0x00000051, "flat_atomic_cmpswap_x2", []>;
+//def FLAT_ATOMIC_ADD_X2 : FLAT_X2 <0x00000052, "flat_atomic_add_x2", []>;
+//def FLAT_ATOMIC_SUB_X2 : FLAT_X2 <0x00000053, "flat_atomic_sub_x2", []>;
+//def FLAT_ATOMIC_RSUB_X2 : FLAT_X2 <0x00000054, "flat_atomic_rsub_x2", []>;
+//def FLAT_ATOMIC_SMIN_X2 : FLAT_X2 <0x00000055, "flat_atomic_smin_x2", []>;
+//def FLAT_ATOMIC_UMIN_X2 : FLAT_X2 <0x00000056, "flat_atomic_umin_x2", []>;
+//def FLAT_ATOMIC_SMAX_X2 : FLAT_X2 <0x00000057, "flat_atomic_smax_x2", []>;
+//def FLAT_ATOMIC_UMAX_X2 : FLAT_X2 <0x00000058, "flat_atomic_umax_x2", []>;
+//def FLAT_ATOMIC_AND_X2 : FLAT_X2 <0x00000059, "flat_atomic_and_x2", []>;
+//def FLAT_ATOMIC_OR_X2 : FLAT_X2 <0x0000005a, "flat_atomic_or_x2", []>;
+//def FLAT_ATOMIC_XOR_X2 : FLAT_X2 <0x0000005b, "flat_atomic_xor_x2", []>;
+//def FLAT_ATOMIC_INC_X2 : FLAT_X2 <0x0000005c, "flat_atomic_inc_x2", []>;
+//def FLAT_ATOMIC_DEC_X2 : FLAT_X2 <0x0000005d, "flat_atomic_dec_x2", []>;
+//def FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_X2 <0x0000005e, "flat_atomic_fcmpswap_x2", []>;
+//def FLAT_ATOMIC_FMIN_X2 : FLAT_X2 <0x0000005f, "flat_atomic_fmin_x2", []>;
+//def FLAT_ATOMIC_FMAX_X2 : FLAT_X2 <0x00000060, "flat_atomic_fmax_x2", []>;
+
+} // End HasFlatAddressSpace predicate
//===----------------------------------------------------------------------===//
// VOP1 Instructions
//===----------------------------------------------------------------------===//
-//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>;
+//def V_NOP : VOP1_ <0x00000000, "v_nop", []>;
-let neverHasSideEffects = 1, isMoveImm = 1 in {
-defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
-} // End neverHasSideEffects = 1, isMoveImm = 1
+let isMoveImm = 1 in {
+defm V_MOV_B32 : VOP1Inst <vop1<0x1>, "v_mov_b32", VOP_I32_I32>;
+} // End isMoveImm = 1
let Uses = [EXEC] in {
+// FIXME: Specify SchedRW for READFIRSTLANE_B32
+
def V_READFIRSTLANE_B32 : VOP1 <
0x00000002,
(outs SReg_32:$vdst),
- (ins VReg_32:$src0),
- "V_READFIRSTLANE_B32 $vdst, $src0",
+ (ins VGPR_32:$src0),
+ "v_readfirstlane_b32 $vdst, $src0",
[]
>;
}
-defm V_CVT_I32_F64 : VOP1_32_64 <0x00000003, "V_CVT_I32_F64",
- [(set i32:$dst, (fp_to_sint f64:$src0))]
+let SchedRW = [WriteQuarterRate32] in {
+
+defm V_CVT_I32_F64 : VOP1Inst <vop1<0x3>, "v_cvt_i32_f64",
+ VOP_I32_F64, fp_to_sint
>;
-defm V_CVT_F64_I32 : VOP1_64_32 <0x00000004, "V_CVT_F64_I32",
- [(set f64:$dst, (sint_to_fp i32:$src0))]
+defm V_CVT_F64_I32 : VOP1Inst <vop1<0x4>, "v_cvt_f64_i32",
+ VOP_F64_I32, sint_to_fp
>;
-defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
- [(set f32:$dst, (sint_to_fp i32:$src0))]
+defm V_CVT_F32_I32 : VOP1Inst <vop1<0x5>, "v_cvt_f32_i32",
+ VOP_F32_I32, sint_to_fp
>;
-defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32",
- [(set f32:$dst, (uint_to_fp i32:$src0))]
+defm V_CVT_F32_U32 : VOP1Inst <vop1<0x6>, "v_cvt_f32_u32",
+ VOP_F32_I32, uint_to_fp
>;
-defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32",
- [(set i32:$dst, (fp_to_uint f32:$src0))]
+defm V_CVT_U32_F32 : VOP1Inst <vop1<0x7>, "v_cvt_u32_f32",
+ VOP_I32_F32, fp_to_uint
>;
-defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
- [(set i32:$dst, (fp_to_sint f32:$src0))]
+defm V_CVT_I32_F32 : VOP1Inst <vop1<0x8>, "v_cvt_i32_f32",
+ VOP_I32_F32, fp_to_sint
>;
-defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
-defm V_CVT_F16_F32 : VOP1_32 <0x0000000a, "V_CVT_F16_F32",
- [(set i32:$dst, (fp_to_f16 f32:$src0))]
+defm V_MOV_FED_B32 : VOP1Inst <vop1<0x9>, "v_mov_fed_b32", VOP_I32_I32>;
+defm V_CVT_F16_F32 : VOP1Inst <vop1<0xa>, "v_cvt_f16_f32",
+ VOP_I32_F32, fp_to_f16
>;
-defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16",
- [(set f32:$dst, (f16_to_fp i32:$src0))]
+defm V_CVT_F32_F16 : VOP1Inst <vop1<0xb>, "v_cvt_f32_f16",
+ VOP_F32_I32, f16_to_fp
>;
-//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
-//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>;
-//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>;
-defm V_CVT_F32_F64 : VOP1_32_64 <0x0000000f, "V_CVT_F32_F64",
- [(set f32:$dst, (fround f64:$src0))]
+//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "v_cvt_rpi_i32_f32", []>;
+//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "v_cvt_flr_i32_f32", []>;
+//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "v_cvt_off_f32_i4", []>;
+defm V_CVT_F32_F64 : VOP1Inst <vop1<0xf>, "v_cvt_f32_f64",
+ VOP_F32_F64, fround
>;
-defm V_CVT_F64_F32 : VOP1_64_32 <0x00000010, "V_CVT_F64_F32",
- [(set f64:$dst, (fextend f32:$src0))]
+defm V_CVT_F64_F32 : VOP1Inst <vop1<0x10>, "v_cvt_f64_f32",
+ VOP_F64_F32, fextend
>;
-defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0",
- [(set f32:$dst, (AMDGPUcvt_f32_ubyte0 i32:$src0))]
+defm V_CVT_F32_UBYTE0 : VOP1Inst <vop1<0x11>, "v_cvt_f32_ubyte0",
+ VOP_F32_I32, AMDGPUcvt_f32_ubyte0
>;
-defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1",
- [(set f32:$dst, (AMDGPUcvt_f32_ubyte1 i32:$src0))]
+defm V_CVT_F32_UBYTE1 : VOP1Inst <vop1<0x12>, "v_cvt_f32_ubyte1",
+ VOP_F32_I32, AMDGPUcvt_f32_ubyte1
>;
-defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2",
- [(set f32:$dst, (AMDGPUcvt_f32_ubyte2 i32:$src0))]
+defm V_CVT_F32_UBYTE2 : VOP1Inst <vop1<0x13>, "v_cvt_f32_ubyte2",
+ VOP_F32_I32, AMDGPUcvt_f32_ubyte2
>;
-defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3",
- [(set f32:$dst, (AMDGPUcvt_f32_ubyte3 i32:$src0))]
+defm V_CVT_F32_UBYTE3 : VOP1Inst <vop1<0x14>, "v_cvt_f32_ubyte3",
+ VOP_F32_I32, AMDGPUcvt_f32_ubyte3
>;
-defm V_CVT_U32_F64 : VOP1_32_64 <0x00000015, "V_CVT_U32_F64",
- [(set i32:$dst, (fp_to_uint f64:$src0))]
+defm V_CVT_U32_F64 : VOP1Inst <vop1<0x15>, "v_cvt_u32_f64",
+ VOP_I32_F64, fp_to_uint
>;
-defm V_CVT_F64_U32 : VOP1_64_32 <0x00000016, "V_CVT_F64_U32",
- [(set f64:$dst, (uint_to_fp i32:$src0))]
+defm V_CVT_F64_U32 : VOP1Inst <vop1<0x16>, "v_cvt_f64_u32",
+ VOP_F64_I32, uint_to_fp
>;
-defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
- [(set f32:$dst, (AMDGPUfract f32:$src0))]
->;
-defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32",
- [(set f32:$dst, (ftrunc f32:$src0))]
+} // let SchedRW = [WriteQuarterRate32]
+
+defm V_FRACT_F32 : VOP1Inst <vop1<0x20, 0x1b>, "v_fract_f32",
+ VOP_F32_F32, AMDGPUfract
>;
-defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32",
- [(set f32:$dst, (fceil f32:$src0))]
+defm V_TRUNC_F32 : VOP1Inst <vop1<0x21, 0x1c>, "v_trunc_f32",
+ VOP_F32_F32, ftrunc
>;
-defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32",
- [(set f32:$dst, (frint f32:$src0))]
+defm V_CEIL_F32 : VOP1Inst <vop1<0x22, 0x1d>, "v_ceil_f32",
+ VOP_F32_F32, fceil
>;
-defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32",
- [(set f32:$dst, (ffloor f32:$src0))]
+defm V_RNDNE_F32 : VOP1Inst <vop1<0x23, 0x1e>, "v_rndne_f32",
+ VOP_F32_F32, frint
>;
-defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32",
- [(set f32:$dst, (fexp2 f32:$src0))]
+defm V_FLOOR_F32 : VOP1Inst <vop1<0x24, 0x1f>, "v_floor_f32",
+ VOP_F32_F32, ffloor
>;
-defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
-defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32",
- [(set f32:$dst, (flog2 f32:$src0))]
+defm V_EXP_F32 : VOP1Inst <vop1<0x25, 0x20>, "v_exp_f32",
+ VOP_F32_F32, fexp2
>;
-defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
-defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
-defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
- [(set f32:$dst, (AMDGPUrcp f32:$src0))]
+let SchedRW = [WriteQuarterRate32] in {
+
+defm V_LOG_F32 : VOP1Inst <vop1<0x27, 0x21>, "v_log_f32",
+ VOP_F32_F32, flog2
>;
-defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
-defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32",
- [(set f32:$dst, (AMDGPUrsq_clamped f32:$src0))]
+defm V_RCP_F32 : VOP1Inst <vop1<0x2a, 0x22>, "v_rcp_f32",
+ VOP_F32_F32, AMDGPUrcp
>;
-defm V_RSQ_LEGACY_F32 : VOP1_32 <
- 0x0000002d, "V_RSQ_LEGACY_F32",
- [(set f32:$dst, (AMDGPUrsq_legacy f32:$src0))]
+defm V_RCP_IFLAG_F32 : VOP1Inst <vop1<0x2b, 0x23>, "v_rcp_iflag_f32",
+ VOP_F32_F32
>;
-defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32",
- [(set f32:$dst, (AMDGPUrsq f32:$src0))]
+defm V_RSQ_F32 : VOP1Inst <vop1<0x2e, 0x24>, "v_rsq_f32",
+ VOP_F32_F32, AMDGPUrsq
>;
-defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64",
- [(set f64:$dst, (AMDGPUrcp f64:$src0))]
+
+} //let SchedRW = [WriteQuarterRate32]
+
+let SchedRW = [WriteDouble] in {
+
+defm V_RCP_F64 : VOP1Inst <vop1<0x2f, 0x25>, "v_rcp_f64",
+ VOP_F64_F64, AMDGPUrcp
>;
-defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
-defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64",
- [(set f64:$dst, (AMDGPUrsq f64:$src0))]
+defm V_RSQ_F64 : VOP1Inst <vop1<0x31, 0x26>, "v_rsq_f64",
+ VOP_F64_F64, AMDGPUrsq
>;
-defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64",
- [(set f64:$dst, (AMDGPUrsq_clamped f64:$src0))]
+
+} // let SchedRW = [WriteDouble];
+
+defm V_SQRT_F32 : VOP1Inst <vop1<0x33, 0x27>, "v_sqrt_f32",
+ VOP_F32_F32, fsqrt
>;
-defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32",
- [(set f32:$dst, (fsqrt f32:$src0))]
+
+let SchedRW = [WriteDouble] in {
+
+defm V_SQRT_F64 : VOP1Inst <vop1<0x34, 0x28>, "v_sqrt_f64",
+ VOP_F64_F64, fsqrt
+>;
+
+} // let SchedRW = [WriteDouble]
+
+defm V_SIN_F32 : VOP1Inst <vop1<0x35, 0x29>, "v_sin_f32",
+ VOP_F32_F32, AMDGPUsin
>;
-defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64",
- [(set f64:$dst, (fsqrt f64:$src0))]
+defm V_COS_F32 : VOP1Inst <vop1<0x36, 0x2a>, "v_cos_f32",
+ VOP_F32_F32, AMDGPUcos
>;
-defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32",
- [(set f32:$dst, (AMDGPUsin f32:$src0))]
+defm V_NOT_B32 : VOP1Inst <vop1<0x37, 0x2b>, "v_not_b32", VOP_I32_I32>;
+defm V_BFREV_B32 : VOP1Inst <vop1<0x38, 0x2c>, "v_bfrev_b32", VOP_I32_I32>;
+defm V_FFBH_U32 : VOP1Inst <vop1<0x39, 0x2d>, "v_ffbh_u32", VOP_I32_I32>;
+defm V_FFBL_B32 : VOP1Inst <vop1<0x3a, 0x2e>, "v_ffbl_b32", VOP_I32_I32>;
+defm V_FFBH_I32 : VOP1Inst <vop1<0x3b, 0x2f>, "v_ffbh_i32", VOP_I32_I32>;
+//defm V_FREXP_EXP_I32_F64 : VOPInst <0x0000003c, "v_frexp_exp_i32_f64", VOP_I32_F32>;
+defm V_FREXP_MANT_F64 : VOP1Inst <vop1<0x3d, 0x31>, "v_frexp_mant_f64",
+ VOP_F64_F64
>;
-defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32",
- [(set f32:$dst, (AMDGPUcos f32:$src0))]
+defm V_FRACT_F64 : VOP1Inst <vop1<0x3e, 0x32>, "v_fract_f64", VOP_F64_F64>;
+//defm V_FREXP_EXP_I32_F32 : VOPInst <0x0000003f, "v_frexp_exp_i32_f32", VOP_I32_F32>;
+defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40, 0x34>, "v_frexp_mant_f32",
+ VOP_F32_F32
>;
-defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>;
-defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>;
-defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>;
-defm V_FFBL_B32 : VOP1_32 <0x0000003a, "V_FFBL_B32", []>;
-defm V_FFBH_I32 : VOP1_32 <0x0000003b, "V_FFBH_I32", []>;
-//defm V_FREXP_EXP_I32_F64 : VOP1_32 <0x0000003c, "V_FREXP_EXP_I32_F64", []>;
-defm V_FREXP_MANT_F64 : VOP1_64 <0x0000003d, "V_FREXP_MANT_F64", []>;
-defm V_FRACT_F64 : VOP1_64 <0x0000003e, "V_FRACT_F64", []>;
-//defm V_FREXP_EXP_I32_F32 : VOP1_32 <0x0000003f, "V_FREXP_EXP_I32_F32", []>;
-defm V_FREXP_MANT_F32 : VOP1_32 <0x00000040, "V_FREXP_MANT_F32", []>;
-//def V_CLREXCP : VOP1_ <0x00000041, "V_CLREXCP", []>;
-defm V_MOVRELD_B32 : VOP1_32 <0x00000042, "V_MOVRELD_B32", []>;
-defm V_MOVRELS_B32 : VOP1_32 <0x00000043, "V_MOVRELS_B32", []>;
-defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>;
+//def V_CLREXCP : VOP1_ <0x00000041, "v_clrexcp", []>;
+defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_I32_I32>;
+defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_I32_I32>;
+defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_I32_I32>;
+// These instruction only exist on SI and CI
+let SubtargetPredicate = isSICI in {
-//===----------------------------------------------------------------------===//
-// VINTRP Instructions
-//===----------------------------------------------------------------------===//
+let SchedRW = [WriteQuarterRate32] in {
-def V_INTERP_P1_F32 : VINTRP <
- 0x00000000,
- (outs VReg_32:$dst),
- (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
- "V_INTERP_P1_F32 $dst, $i, $attr_chan, $attr, [$m0]",
- []> {
- let DisableEncoding = "$m0";
-}
+defm V_LOG_CLAMP_F32 : VOP1InstSI <vop1<0x26>, "v_log_clamp_f32", VOP_F32_F32>;
+defm V_RCP_CLAMP_F32 : VOP1InstSI <vop1<0x28>, "v_rcp_clamp_f32", VOP_F32_F32>;
+defm V_RCP_LEGACY_F32 : VOP1InstSI <vop1<0x29>, "v_rcp_legacy_f32", VOP_F32_F32>;
+defm V_RSQ_CLAMP_F32 : VOP1InstSI <vop1<0x2c>, "v_rsq_clamp_f32",
+ VOP_F32_F32, AMDGPUrsq_clamped
+>;
+defm V_RSQ_LEGACY_F32 : VOP1InstSI <vop1<0x2d>, "v_rsq_legacy_f32",
+ VOP_F32_F32, AMDGPUrsq_legacy
+>;
-def V_INTERP_P2_F32 : VINTRP <
- 0x00000001,
- (outs VReg_32:$dst),
- (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
- "V_INTERP_P2_F32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]",
- []> {
+} // End let SchedRW = [WriteQuarterRate32]
- let Constraints = "$src0 = $dst";
- let DisableEncoding = "$src0,$m0";
+let SchedRW = [WriteDouble] in {
-}
+defm V_RCP_CLAMP_F64 : VOP1InstSI <vop1<0x30>, "v_rcp_clamp_f64", VOP_F64_F64>;
+defm V_RSQ_CLAMP_F64 : VOP1InstSI <vop1<0x32>, "v_rsq_clamp_f64",
+ VOP_F64_F64, AMDGPUrsq_clamped
+>;
-def V_INTERP_MOV_F32 : VINTRP <
- 0x00000002,
- (outs VReg_32:$dst),
+} // End SchedRW = [WriteDouble]
+
+} // End SubtargetPredicate = isSICI
+
+//===----------------------------------------------------------------------===//
+// VINTRP Instructions
+//===----------------------------------------------------------------------===//
+
+// FIXME: Specify SchedRW for VINTRP insturctions.
+defm V_INTERP_P1_F32 : VINTRP_m <
+ 0x00000000, "v_interp_p1_f32",
+ (outs VGPR_32:$dst),
+ (ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
+ "v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [$m0]",
+ "$m0">;
+
+defm V_INTERP_P2_F32 : VINTRP_m <
+ 0x00000001, "v_interp_p2_f32",
+ (outs VGPR_32:$dst),
+ (ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
+ "v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]",
+ "$src0,$m0",
+ "$src0 = $dst">;
+
+defm V_INTERP_MOV_F32 : VINTRP_m <
+ 0x00000002, "v_interp_mov_f32",
+ (outs VGPR_32:$dst),
(ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
- "V_INTERP_MOV_F32 $dst, $src0, $attr_chan, $attr, [$m0]",
- []> {
- let DisableEncoding = "$m0";
-}
+ "v_interp_mov_f32 $dst, $src0, $attr_chan, $attr, [$m0]",
+ "$m0">;
//===----------------------------------------------------------------------===//
// VOP2 Instructions
//===----------------------------------------------------------------------===//
-def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
- (ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc),
- "V_CNDMASK_B32_e32 $dst, $src0, $src1, [$vcc]",
- []
->{
- let DisableEncoding = "$vcc";
-}
+defm V_CNDMASK_B32_e64 : VOP3_m_nosrcmod <vop3<0x100>, (outs VGPR_32:$dst),
+ (ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2),
+ "v_cndmask_b32_e64 $dst, $src0, $src1, $src2",
+ [(set i32:$dst, (select i1:$src2, i32:$src1, i32:$src0))],
+ "v_cndmask_b32_e64", 3
+>;
-def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst),
- (ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2,
- InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
- "V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg",
- [(set i32:$dst, (select i1:$src2, i32:$src1, i32:$src0))]
-> {
- let src0_modifiers = 0;
- let src1_modifiers = 0;
- let src2_modifiers = 0;
-}
-def V_READLANE_B32 : VOP2 <
- 0x00000001,
- (outs SReg_32:$vdst),
- (ins VReg_32:$src0, SSrc_32:$vsrc1),
- "V_READLANE_B32 $vdst, $src0, $vsrc1",
- []
+let isCommutable = 1 in {
+defm V_ADD_F32 : VOP2Inst <vop2<0x3, 0x1>, "v_add_f32",
+ VOP_F32_F32_F32, fadd
>;
-def V_WRITELANE_B32 : VOP2 <
- 0x00000002,
- (outs VReg_32:$vdst),
- (ins SReg_32:$src0, SSrc_32:$vsrc1),
- "V_WRITELANE_B32 $vdst, $src0, $vsrc1",
- []
+defm V_SUB_F32 : VOP2Inst <vop2<0x4, 0x2>, "v_sub_f32", VOP_F32_F32_F32, fsub>;
+defm V_SUBREV_F32 : VOP2Inst <vop2<0x5, 0x3>, "v_subrev_f32",
+ VOP_F32_F32_F32, null_frag, "v_sub_f32"
>;
+} // End isCommutable = 1
let isCommutable = 1 in {
-defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
- [(set f32:$dst, (fadd f32:$src0, f32:$src1))]
+
+defm V_MUL_LEGACY_F32 : VOP2Inst <vop2<0x7, 0x4>, "v_mul_legacy_f32",
+ VOP_F32_F32_F32, int_AMDGPU_mul
>;
-defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
- [(set f32:$dst, (fsub f32:$src0, f32:$src1))]
+defm V_MUL_F32 : VOP2Inst <vop2<0x8, 0x5>, "v_mul_f32",
+ VOP_F32_F32_F32, fmul
>;
-defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">;
-} // End isCommutable = 1
-defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>;
+defm V_MUL_I32_I24 : VOP2Inst <vop2<0x9, 0x6>, "v_mul_i32_i24",
+ VOP_I32_I32_I32, AMDGPUmul_i24
+>;
+//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "v_mul_hi_i32_i24", []>;
+defm V_MUL_U32_U24 : VOP2Inst <vop2<0xb, 0x8>, "v_mul_u32_u24",
+ VOP_I32_I32_I32, AMDGPUmul_u24
+>;
+//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "v_mul_hi_u32_u24", []>;
-let isCommutable = 1 in {
+defm V_MIN_F32 : VOP2Inst <vop2<0xf, 0xa>, "v_min_f32", VOP_F32_F32_F32,
+ fminnum>;
+defm V_MAX_F32 : VOP2Inst <vop2<0x10, 0xb>, "v_max_f32", VOP_F32_F32_F32,
+ fmaxnum>;
+defm V_MIN_I32 : VOP2Inst <vop2<0x11, 0xc>, "v_min_i32", VOP_I32_I32_I32,
+ AMDGPUsmin
+>;
+defm V_MAX_I32 : VOP2Inst <vop2<0x12, 0xd>, "v_max_i32", VOP_I32_I32_I32,
+ AMDGPUsmax
+>;
+defm V_MIN_U32 : VOP2Inst <vop2<0x13, 0xe>, "v_min_u32", VOP_I32_I32_I32,
+ AMDGPUumin
+>;
+defm V_MAX_U32 : VOP2Inst <vop2<0x14, 0xf>, "v_max_u32", VOP_I32_I32_I32,
+ AMDGPUumax
+>;
-defm V_MUL_LEGACY_F32 : VOP2_32 <
- 0x00000007, "V_MUL_LEGACY_F32",
- [(set f32:$dst, (int_AMDGPU_mul f32:$src0, f32:$src1))]
+// No non-Rev Op on VI
+defm V_LSHRREV_B32 : VOP2Inst <
+ vop2<0x16, 0x10>, "v_lshrrev_b32", VOP_I32_I32_I32, null_frag,
+ "v_lshr_b32", "v_lshrrev_b32"
>;
-defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
- [(set f32:$dst, (fmul f32:$src0, f32:$src1))]
+// No non-Rev OP on VI
+defm V_ASHRREV_I32 : VOP2Inst <
+ vop2<0x18, 0x11>, "v_ashrrev_i32", VOP_I32_I32_I32, null_frag,
+ "v_ashr_i32", "v_ashrrev_i32"
>;
+// No non-Rev OP on VI
+defm V_LSHLREV_B32 : VOP2Inst <
+ vop2<0x1a, 0x12>, "v_lshlrev_b32", VOP_I32_I32_I32, null_frag,
+ "v_lshl_b32", "v_lshlrev_b32"
+>;
-defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24",
- [(set i32:$dst, (AMDGPUmul_i24 i32:$src0, i32:$src1))]
+defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32",
+ VOP_I32_I32_I32, and>;
+defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32",
+ VOP_I32_I32_I32, or
>;
-//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>;
-defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24",
- [(set i32:$dst, (AMDGPUmul_u24 i32:$src0, i32:$src1))]
+defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32",
+ VOP_I32_I32_I32, xor
>;
-//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>;
+defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_F32_F32_F32>;
+} // End isCommutable = 1
+
+defm V_MADMK_F32 : VOP2Inst <vop2<0x20, 0x17>, "v_madmk_f32", VOP_F32_F32_F32>;
+
+let isCommutable = 1 in {
+defm V_MADAK_F32 : VOP2Inst <vop2<0x21, 0x18>, "v_madak_f32", VOP_F32_F32_F32>;
+} // End isCommutable = 1
+
+let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
+// No patterns so that the scalar instructions are always selected.
+// The scalar versions will be replaced with vector when needed later.
+
+// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
+// but the VI instructions behave the same as the SI versions.
+defm V_ADD_I32 : VOP2bInst <vop2<0x25, 0x19>, "v_add_i32",
+ VOP_I32_I32_I32, add
+>;
+defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32",
+ VOP_I32_I32_I32, sub
+>;
-defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
- [(set f32:$dst, (AMDGPUfmin f32:$src0, f32:$src1))]
+defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32",
+ VOP_I32_I32_I32, null_frag, "v_sub_i32"
>;
-defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
- [(set f32:$dst, (AMDGPUfmax f32:$src0, f32:$src1))]
+let Uses = [VCC] in { // Carry-in comes from VCC
+defm V_ADDC_U32 : VOP2bInst <vop2<0x28, 0x1c>, "v_addc_u32",
+ VOP_I32_I32_I32_VCC, adde
+>;
+defm V_SUBB_U32 : VOP2bInst <vop2<0x29, 0x1d>, "v_subb_u32",
+ VOP_I32_I32_I32_VCC, sube
+>;
+defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",
+ VOP_I32_I32_I32_VCC, null_frag, "v_subb_u32"
>;
-defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
-defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
-defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32",
- [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]>;
-defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32",
- [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]>;
-defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32",
- [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]>;
-defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32",
- [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]>;
+} // End Uses = [VCC]
+} // End isCommutable = 1, Defs = [VCC]
-defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
- [(set i32:$dst, (srl i32:$src0, i32:$src1))]
+// These instructions only exist on SI and CI
+let SubtargetPredicate = isSICI in {
+
+def V_READLANE_B32 : VOP2 <
+ 0x00000001,
+ (outs SReg_32:$vdst),
+ (ins VGPR_32:$src0, SSrc_32:$vsrc1),
+ "v_readlane_b32 $vdst, $src0, $vsrc1",
+ []
+>;
+
+def V_WRITELANE_B32 : VOP2 <
+ 0x00000002,
+ (outs VGPR_32:$vdst),
+ (ins SReg_32:$src0, SSrc_32:$vsrc1),
+ "v_writelane_b32 $vdst, $src0, $vsrc1",
+ []
>;
-defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
+let isCommutable = 1 in {
+defm V_MAC_LEGACY_F32 : VOP2Inst <vop2<0x6>, "v_mac_legacy_f32",
+ VOP_F32_F32_F32
+>;
+} // End isCommutable = 1
-defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
- [(set i32:$dst, (sra i32:$src0, i32:$src1))]
+defm V_MIN_LEGACY_F32 : VOP2Inst <vop2<0xd>, "v_min_legacy_f32",
+ VOP_F32_F32_F32, AMDGPUfmin_legacy
+>;
+defm V_MAX_LEGACY_F32 : VOP2Inst <vop2<0xe>, "v_max_legacy_f32",
+ VOP_F32_F32_F32, AMDGPUfmax_legacy
+>;
+
+let isCommutable = 1 in {
+defm V_LSHR_B32 : VOP2Inst <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32, srl>;
+defm V_ASHR_I32 : VOP2Inst <vop2<0x17>, "v_ashr_i32",
+ VOP_I32_I32_I32, sra
>;
-defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
let hasPostISelHook = 1 in {
+defm V_LSHL_B32 : VOP2Inst <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>;
+}
-defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
- [(set i32:$dst, (shl i32:$src0, i32:$src1))]
+} // End isCommutable = 1
+
+defm V_BFM_B32 : VOP2Inst <vop2<0x1e>, "v_bfm_b32", VOP_I32_I32_I32,
+ AMDGPUbfm>;
+defm V_BCNT_U32_B32 : VOP2Inst <vop2<0x22>, "v_bcnt_u32_b32", VOP_I32_I32_I32>;
+defm V_MBCNT_LO_U32_B32 : VOP2Inst <vop2<0x23>, "v_mbcnt_lo_u32_b32",
+ VOP_I32_I32_I32
+>;
+defm V_MBCNT_HI_U32_B32 : VOP2Inst <vop2<0x24>, "v_mbcnt_hi_u32_b32",
+ VOP_I32_I32_I32
+>;
+defm V_LDEXP_F32 : VOP2Inst <vop2<0x2b>, "v_ldexp_f32",
+ VOP_F32_F32_I32, AMDGPUldexp
>;
-}
-defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
+////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "v_cvt_pkaccum_u8_f32", []>;
+////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "v_cvt_pknorm_i16_f32", []>;
+////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "v_cvt_pknorm_u16_f32", []>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <vop2<0x2f>, "v_cvt_pkrtz_f16_f32",
+ VOP_I32_F32_F32, int_SI_packf16
+>;
+////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "v_cvt_pk_u16_u32", []>;
+////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "v_cvt_pk_i16_i32", []>;
-defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
- [(set i32:$dst, (and i32:$src0, i32:$src1))]>;
-defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
- [(set i32:$dst, (or i32:$src0, i32:$src1))]
+} // End let SubtargetPredicate = SICI
+//===----------------------------------------------------------------------===//
+// VOP3 Instructions
+//===----------------------------------------------------------------------===//
+
+let isCommutable = 1 in {
+defm V_MAD_LEGACY_F32 : VOP3Inst <vop3<0x140, 0x1c0>, "v_mad_legacy_f32",
+ VOP_F32_F32_F32_F32
>;
-defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
- [(set i32:$dst, (xor i32:$src0, i32:$src1))]
+
+defm V_MAD_F32 : VOP3Inst <vop3<0x141, 0x1c1>, "v_mad_f32",
+ VOP_F32_F32_F32_F32, fmad
>;
+defm V_MAD_I32_I24 : VOP3Inst <vop3<0x142, 0x1c2>, "v_mad_i32_i24",
+ VOP_I32_I32_I32_I32, AMDGPUmad_i24
+>;
+defm V_MAD_U32_U24 : VOP3Inst <vop3<0x143, 0x1c3>, "v_mad_u32_u24",
+ VOP_I32_I32_I32_I32, AMDGPUmad_u24
+>;
} // End isCommutable = 1
-defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32",
- [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))]>;
-defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
-defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
-defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
-defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
-defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
-defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
+defm V_CUBEID_F32 : VOP3Inst <vop3<0x144, 0x1c4>, "v_cubeid_f32",
+ VOP_F32_F32_F32_F32
+>;
+defm V_CUBESC_F32 : VOP3Inst <vop3<0x145, 0x1c5>, "v_cubesc_f32",
+ VOP_F32_F32_F32_F32
+>;
+defm V_CUBETC_F32 : VOP3Inst <vop3<0x146, 0x1c6>, "v_cubetc_f32",
+ VOP_F32_F32_F32_F32
+>;
+defm V_CUBEMA_F32 : VOP3Inst <vop3<0x147, 0x1c7>, "v_cubema_f32",
+ VOP_F32_F32_F32_F32
+>;
-let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
-// No patterns so that the scalar instructions are always selected.
-// The scalar versions will be replaced with vector when needed later.
-defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
- [(set i32:$dst, (add i32:$src0, i32:$src1))], VSrc_32>;
-defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
- [(set i32:$dst, (sub i32:$src0, i32:$src1))], VSrc_32>;
-defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], VSrc_32,
- "V_SUB_I32">;
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+defm V_BFE_U32 : VOP3Inst <vop3<0x148, 0x1c8>, "v_bfe_u32",
+ VOP_I32_I32_I32_I32, AMDGPUbfe_u32
+>;
+defm V_BFE_I32 : VOP3Inst <vop3<0x149, 0x1c9>, "v_bfe_i32",
+ VOP_I32_I32_I32_I32, AMDGPUbfe_i32
+>;
+}
-let Uses = [VCC] in { // Carry-in comes from VCC
-defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32",
- [(set i32:$dst, (adde i32:$src0, i32:$src1))], VReg_32>;
-defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32",
- [(set i32:$dst, (sube i32:$src0, i32:$src1))], VReg_32>;
-defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], VReg_32,
- "V_SUBB_U32">;
-} // End Uses = [VCC]
-} // End isCommutable = 1, Defs = [VCC]
+defm V_BFI_B32 : VOP3Inst <vop3<0x14a, 0x1ca>, "v_bfi_b32",
+ VOP_I32_I32_I32_I32, AMDGPUbfi
+>;
-defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
-////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
-////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
-////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
-defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
- [(set i32:$dst, (int_SI_packf16 f32:$src0, f32:$src1))]
+let isCommutable = 1 in {
+defm V_FMA_F32 : VOP3Inst <vop3<0x14b, 0x1cb>, "v_fma_f32",
+ VOP_F32_F32_F32_F32, fma
+>;
+defm V_FMA_F64 : VOP3Inst <vop3<0x14c, 0x1cc>, "v_fma_f64",
+ VOP_F64_F64_F64_F64, fma
>;
-////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
-////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
+} // End isCommutable = 1
-//===----------------------------------------------------------------------===//
-// VOP3 Instructions
-//===----------------------------------------------------------------------===//
+//def V_LERP_U8 : VOP3_U8 <0x0000014d, "v_lerp_u8", []>;
+defm V_ALIGNBIT_B32 : VOP3Inst <vop3<0x14e, 0x1ce>, "v_alignbit_b32",
+ VOP_I32_I32_I32_I32
+>;
+defm V_ALIGNBYTE_B32 : VOP3Inst <vop3<0x14f, 0x1cf>, "v_alignbyte_b32",
+ VOP_I32_I32_I32_I32
+>;
-let neverHasSideEffects = 1 in {
+// Only on SI
+defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
+ VOP_F32_F32_F32_F32>;
+defm V_MIN3_F32 : VOP3Inst <vop3<0x151>, "v_min3_f32",
+ VOP_F32_F32_F32_F32, AMDGPUfmin3>;
-defm V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>;
-defm V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32",
- [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
+defm V_MIN3_I32 : VOP3Inst <vop3<0x152>, "v_min3_i32",
+ VOP_I32_I32_I32_I32, AMDGPUsmin3
>;
-defm V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24",
- [(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))]
+defm V_MIN3_U32 : VOP3Inst <vop3<0x153>, "v_min3_u32",
+ VOP_I32_I32_I32_I32, AMDGPUumin3
>;
-defm V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24",
- [(set i32:$dst, (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2))]
+defm V_MAX3_F32 : VOP3Inst <vop3<0x154>, "v_max3_f32",
+ VOP_F32_F32_F32_F32, AMDGPUfmax3
+>;
+defm V_MAX3_I32 : VOP3Inst <vop3<0x155>, "v_max3_i32",
+ VOP_I32_I32_I32_I32, AMDGPUsmax3
+>;
+defm V_MAX3_U32 : VOP3Inst <vop3<0x156>, "v_max3_u32",
+ VOP_I32_I32_I32_I32, AMDGPUumax3
+>;
+//def V_MED3_F32 : VOP3_MED3 <0x00000157, "v_med3_f32", []>;
+//def V_MED3_I32 : VOP3_MED3 <0x00000158, "v_med3_i32", []>;
+//def V_MED3_U32 : VOP3_MED3 <0x00000159, "v_med3_u32", []>;
+//def V_SAD_U8 : VOP3_U8 <0x0000015a, "v_sad_u8", []>;
+//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "v_sad_hi_u8", []>;
+//def V_SAD_U16 : VOP3_U16 <0x0000015c, "v_sad_u16", []>;
+defm V_SAD_U32 : VOP3Inst <vop3<0x15d, 0x1dc>, "v_sad_u32",
+ VOP_I32_I32_I32_I32
+>;
+////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>;
+defm V_DIV_FIXUP_F32 : VOP3Inst <
+ vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup
>;
-} // End neverHasSideEffects
+let SchedRW = [WriteDouble] in {
-defm V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>;
-defm V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>;
-defm V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;
-defm V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
+defm V_DIV_FIXUP_F64 : VOP3Inst <
+ vop3<0x160, 0x1df>, "v_div_fixup_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fixup
+>;
-let neverHasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
-defm V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32",
- [(set i32:$dst, (AMDGPUbfe_u32 i32:$src0, i32:$src1, i32:$src2))]>;
-defm V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32",
- [(set i32:$dst, (AMDGPUbfe_i32 i32:$src0, i32:$src1, i32:$src2))]>;
-}
+} // let SchedRW = [WriteDouble]
-defm V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32",
- [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))]>;
-defm V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32",
- [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))]
->;
-def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64",
- [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))]
->;
-//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
-defm V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>;
-
-defm V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>;
-defm V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
-////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>;
-////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>;
-////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>;
-////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "V_MAX3_F32", []>;
-////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "V_MAX3_I32", []>;
-////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "V_MAX3_U32", []>;
-////def V_MED3_F32 : VOP3_MED3 <0x00000157, "V_MED3_F32", []>;
-////def V_MED3_I32 : VOP3_MED3 <0x00000158, "V_MED3_I32", []>;
-////def V_MED3_U32 : VOP3_MED3 <0x00000159, "V_MED3_U32", []>;
-//def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>;
-//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>;
-//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>;
-defm V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
-////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>;
-defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32",
- [(set f32:$dst, (AMDGPUdiv_fixup f32:$src0, f32:$src1, f32:$src2))]
->;
-def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64",
- [(set f64:$dst, (AMDGPUdiv_fixup f64:$src0, f64:$src1, f64:$src2))]
->;
-
-def V_LSHL_B64 : VOP3_64_32 <0x00000161, "V_LSHL_B64",
- [(set i64:$dst, (shl i64:$src0, i32:$src1))]
+defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64",
+ VOP_I64_I64_I32, shl
>;
-def V_LSHR_B64 : VOP3_64_32 <0x00000162, "V_LSHR_B64",
- [(set i64:$dst, (srl i64:$src0, i32:$src1))]
+
+// Only on SI
+defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64",
+ VOP_I64_I64_I32, srl
>;
-def V_ASHR_I64 : VOP3_64_32 <0x00000163, "V_ASHR_I64",
- [(set i64:$dst, (sra i64:$src0, i32:$src1))]
+
+// Only on SI
+defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64",
+ VOP_I64_I64_I32, sra
>;
+let SchedRW = [WriteDouble] in {
let isCommutable = 1 in {
-def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>;
-def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>;
-def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>;
-def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>;
+defm V_ADD_F64 : VOP3Inst <vop3<0x164, 0x280>, "v_add_f64",
+ VOP_F64_F64_F64, fadd
+>;
+defm V_MUL_F64 : VOP3Inst <vop3<0x165, 0x281>, "v_mul_f64",
+ VOP_F64_F64_F64, fmul
+>;
+
+defm V_MIN_F64 : VOP3Inst <vop3<0x166, 0x282>, "v_min_f64",
+ VOP_F64_F64_F64, fminnum
+>;
+defm V_MAX_F64 : VOP3Inst <vop3<0x167, 0x283>, "v_max_f64",
+ VOP_F64_F64_F64, fmaxnum
+>;
} // isCommutable = 1
-def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
+defm V_LDEXP_F64 : VOP3Inst <vop3<0x168, 0x284>, "v_ldexp_f64",
+ VOP_F64_F64_I32, AMDGPUldexp
+>;
-let isCommutable = 1 in {
+} // let SchedRW = [WriteDouble]
-defm V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
-defm V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
-defm V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
-defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
+let isCommutable = 1, SchedRW = [WriteQuarterRate32] in {
-} // isCommutable = 1
+defm V_MUL_LO_U32 : VOP3Inst <vop3<0x169, 0x285>, "v_mul_lo_u32",
+ VOP_I32_I32_I32
+>;
+defm V_MUL_HI_U32 : VOP3Inst <vop3<0x16a, 0x286>, "v_mul_hi_u32",
+ VOP_I32_I32_I32
+>;
+
+defm V_MUL_LO_I32 : VOP3Inst <vop3<0x16b, 0x285>, "v_mul_lo_i32",
+ VOP_I32_I32_I32
+>;
+defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c, 0x287>, "v_mul_hi_i32",
+ VOP_I32_I32_I32
+>;
+
+} // isCommutable = 1, SchedRW = [WriteQuarterRate32]
-def V_DIV_SCALE_F32 : VOP3b_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
+defm V_DIV_SCALE_F32 : VOP3b_32 <vop3<0x16d, 0x1e0>, "v_div_scale_f32", []>;
+let SchedRW = [WriteDouble] in {
// Double precision division pre-scale.
-def V_DIV_SCALE_F64 : VOP3b_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
+defm V_DIV_SCALE_F64 : VOP3b_64 <vop3<0x16e, 0x1e1>, "v_div_scale_f64", []>;
+} // let SchedRW = [WriteDouble]
-defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32",
- [(set f32:$dst, (AMDGPUdiv_fmas f32:$src0, f32:$src1, f32:$src2))]
+let isCommutable = 1 in {
+defm V_DIV_FMAS_F32 : VOP3Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32",
+ VOP_F32_F32_F32_F32, AMDGPUdiv_fmas
>;
-def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64",
- [(set f64:$dst, (AMDGPUdiv_fmas f64:$src0, f64:$src1, f64:$src2))]
+let SchedRW = [WriteDouble] in {
+defm V_DIV_FMAS_F64 : VOP3Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64",
+ VOP_F64_F64_F64_F64, AMDGPUdiv_fmas
>;
-//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>;
-//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
-//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
-def V_TRIG_PREOP_F64 : VOP3_64_32 <0x00000174, "V_TRIG_PREOP_F64",
- [(set f64:$dst, (AMDGPUtrig_preop f64:$src0, i32:$src1))]
+} // End SchedRW = [WriteDouble]
+} // End isCommutable = 1
+
+//def V_MSAD_U8 : VOP3_U8 <0x00000171, "v_msad_u8", []>;
+//def V_QSAD_U8 : VOP3_U8 <0x00000172, "v_qsad_u8", []>;
+//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "v_mqsad_u8", []>;
+
+let SchedRW = [WriteDouble] in {
+defm V_TRIG_PREOP_F64 : VOP3Inst <
+ vop3<0x174, 0x292>, "v_trig_preop_f64", VOP_F64_F64_I32, AMDGPUtrig_preop
>;
+} // let SchedRW = [WriteDouble]
+
//===----------------------------------------------------------------------===//
// Pseudo Instructions
//===----------------------------------------------------------------------===//
-
let isCodeGenOnly = 1, isPseudo = 1 in {
-def V_MOV_I1 : InstSI <
- (outs VReg_1:$dst),
- (ins i1imm:$src),
- "", [(set i1:$dst, (imm:$src))]
->;
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+// 64-bit vector move instruction. This is mainly used by the SIFoldOperands
+// pass to enable folding of inline immediates.
+def V_MOV_B64_PSEUDO : InstSI <(outs VReg_64:$dst), (ins VSrc_64:$src0), "", []>;
+} // end let hasSideEffects = 0, mayLoad = 0, mayStore = 0
-def V_AND_I1 : InstSI <
- (outs VReg_1:$dst), (ins VReg_1:$src0, VReg_1:$src1), "",
- [(set i1:$dst, (and i1:$src0, i1:$src1))]
->;
-
-def V_OR_I1 : InstSI <
- (outs VReg_1:$dst), (ins VReg_1:$src0, VReg_1:$src1), "",
- [(set i1:$dst, (or i1:$src0, i1:$src1))]
->;
-
-def V_XOR_I1 : InstSI <
- (outs VReg_1:$dst), (ins VReg_1:$src0, VReg_1:$src1), "",
- [(set i1:$dst, (xor i1:$src0, i1:$src1))]
->;
+let hasSideEffects = 1 in {
+def SGPR_USE : InstSI <(outs),(ins), "", []>;
+}
// SI pseudo instructions. These are used by the CFG structurizer pass
// and should be lowered to ISA instructions prior to codegen.
@@ -1557,7 +1827,7 @@ def SI_ELSE : InstSI <
def SI_LOOP : InstSI <
(outs),
(ins SReg_64:$saved, brtarget:$target),
- "SI_LOOP $saved, $target",
+ "si_loop $saved, $target",
[(int_SI_loop i64:$saved, bb:$target)]
>;
@@ -1566,35 +1836,35 @@ def SI_LOOP : InstSI <
def SI_BREAK : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$src),
- "SI_ELSE $dst, $src",
+ "si_else $dst, $src",
[(set i64:$dst, (int_SI_break i64:$src))]
>;
def SI_IF_BREAK : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$vcc, SReg_64:$src),
- "SI_IF_BREAK $dst, $vcc, $src",
+ "si_if_break $dst, $vcc, $src",
[(set i64:$dst, (int_SI_if_break i1:$vcc, i64:$src))]
>;
def SI_ELSE_BREAK : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$src0, SReg_64:$src1),
- "SI_ELSE_BREAK $dst, $src0, $src1",
+ "si_else_break $dst, $src0, $src1",
[(set i64:$dst, (int_SI_else_break i64:$src0, i64:$src1))]
>;
def SI_END_CF : InstSI <
(outs),
(ins SReg_64:$saved),
- "SI_END_CF $saved",
+ "si_end_cf $saved",
[(int_SI_end_cf i64:$saved)]
>;
def SI_KILL : InstSI <
(outs),
(ins VSrc_32:$src),
- "SI_KILL $src",
+ "si_kill $src",
[(int_AMDGPU_kill f32:$src)]
>;
@@ -1603,12 +1873,12 @@ def SI_KILL : InstSI <
let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
-//defm SI_ : RegisterLoadStore <VReg_32, FRAMEri, ADDRIndirect>;
+//defm SI_ : RegisterLoadStore <VGPR_32, FRAMEri, ADDRIndirect>;
let UseNamedOperandTable = 1 in {
def SI_RegisterLoad : InstSI <
- (outs VReg_32:$dst, SReg_64:$temp),
+ (outs VGPR_32:$dst, SReg_64:$temp),
(ins FRAMEri32:$addr, i32imm:$chan),
"", []
> {
@@ -1618,7 +1888,7 @@ def SI_RegisterLoad : InstSI <
class SIRegStore<dag outs> : InstSI <
outs,
- (ins VReg_32:$val, FRAMEri32:$addr, i32imm:$chan),
+ (ins VGPR_32:$val, FRAMEri32:$addr, i32imm:$chan),
"", []
> {
let isRegisterStore = 1;
@@ -1634,22 +1904,22 @@ def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>;
} // End UseNamedOperandTable = 1
def SI_INDIRECT_SRC : InstSI <
- (outs VReg_32:$dst, SReg_64:$temp),
+ (outs VGPR_32:$dst, SReg_64:$temp),
(ins unknown:$src, VSrc_32:$idx, i32imm:$off),
- "SI_INDIRECT_SRC $dst, $temp, $src, $idx, $off",
+ "si_indirect_src $dst, $temp, $src, $idx, $off",
[]
>;
class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
(outs rc:$dst, SReg_64:$temp),
- (ins unknown:$src, VSrc_32:$idx, i32imm:$off, VReg_32:$val),
- "SI_INDIRECT_DST $dst, $temp, $src, $idx, $off, $val",
+ (ins unknown:$src, VSrc_32:$idx, i32imm:$off, VGPR_32:$val),
+ "si_indirect_dst $dst, $temp, $src, $idx, $off, $val",
[]
> {
let Constraints = "$src = $dst";
}
-def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VReg_32>;
+def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VGPR_32>;
def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
@@ -1659,24 +1929,10 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
let usesCustomInserter = 1 in {
-// This pseudo instruction takes a pointer as input and outputs a resource
-// constant that can be used with the ADDR64 MUBUF instructions.
-def SI_ADDR64_RSRC : InstSI <
- (outs SReg_128:$srsrc),
- (ins SSrc_64:$ptr),
- "", []
->;
-
-def SI_BUFFER_RSRC : InstSI <
- (outs SReg_128:$srsrc),
- (ins SReg_32:$ptr_lo, SReg_32:$ptr_hi, SSrc_32:$data_lo, SSrc_32:$data_hi),
- "", []
->;
-
def V_SUB_F64 : InstSI <
(outs VReg_64:$dst),
(ins VReg_64:$src0, VReg_64:$src1),
- "V_SUB_F64 $dst, $src0, $src1",
+ "v_sub_f64 $dst, $src0, $src1",
[(set f64:$dst, (fsub f64:$src0, f64:$src1))]
>;
@@ -1684,18 +1940,20 @@ def V_SUB_F64 : InstSI <
multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
- def _SAVE : InstSI <
- (outs VReg_32:$dst),
- (ins sgpr_class:$src, i32imm:$frame_idx),
- "", []
- >;
-
- def _RESTORE : InstSI <
- (outs sgpr_class:$dst),
- (ins VReg_32:$src, i32imm:$frame_idx),
- "", []
- >;
-
+ let UseNamedOperandTable = 1 in {
+ def _SAVE : InstSI <
+ (outs),
+ (ins sgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
+ SReg_32:$scratch_offset),
+ "", []
+ >;
+
+ def _RESTORE : InstSI <
+ (outs sgpr_class:$dst),
+ (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
+ "", []
+ >;
+ } // End UseNamedOperandTable = 1
}
defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32>;
@@ -1704,6 +1962,30 @@ defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
+multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
+ let UseNamedOperandTable = 1 in {
+ def _SAVE : InstSI <
+ (outs),
+ (ins vgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
+ SReg_32:$scratch_offset),
+ "", []
+ >;
+
+ def _RESTORE : InstSI <
+ (outs vgpr_class:$dst),
+ (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
+ "", []
+ >;
+ } // End UseNamedOperandTable = 1
+}
+
+defm SI_SPILL_V32 : SI_SPILL_VGPR <VGPR_32>;
+defm SI_SPILL_V64 : SI_SPILL_VGPR <VReg_64>;
+defm SI_SPILL_V96 : SI_SPILL_VGPR <VReg_96>;
+defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>;
+defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;
+defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;
+
let Defs = [SCC] in {
def SI_CONSTDATA_PTR : InstSI <
@@ -1716,13 +1998,15 @@ def SI_CONSTDATA_PTR : InstSI <
} // end IsCodeGenOnly, isPseudo
-} // end SubtargetPredicate = SI
+} // end SubtargetPredicate = isGCN
-let Predicates = [isSI] in {
+let Predicates = [isGCN] in {
def : Pat<
(int_AMDGPU_cndlt f32:$src0, f32:$src1, f32:$src2),
- (V_CNDMASK_B32_e64 $src2, $src1, (V_CMP_GT_F32_e64 0, $src0))
+ (V_CNDMASK_B32_e64 $src2, $src1,
+ (V_CMP_GT_F32_e64 SRCMODS.NONE, 0, SRCMODS.NONE, $src0,
+ DSTCLAMP.NONE, DSTOMOD.NONE))
>;
def : Pat <
@@ -1730,12 +2014,16 @@ def : Pat <
(SI_KILL 0xbf800000)
>;
+let Predicates = [isSICI] in {
+
/* int_SI_vs_load_input */
def : Pat<
(SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
>;
+} // End Predicates = [isSICI]
+
/* int_SI_export */
def : Pat <
(int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
@@ -1750,7 +2038,7 @@ def : Pat <
multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
- // 1. Offset as 8bit DWORD immediate
+ // 1. SI-CI: Offset as 8bit DWORD immediate
def : Pat <
(constant_load (add i64:$sbase, (i64 IMM8bitDWORD:$offset))),
(vt (Instr_IMM $sbase, (as_dword_i32imm $offset)))
@@ -1769,6 +2057,28 @@ multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
>;
}
+multiclass SMRD_Pattern_vi <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
+
+ // 1. VI: Offset as 20bit immediate in bytes
+ def : Pat <
+ (constant_load (add i64:$sbase, (i64 IMM20bit:$offset))),
+ (vt (Instr_IMM $sbase, (as_i32imm $offset)))
+ >;
+
+ // 2. Offset loaded in an 32bit SGPR
+ def : Pat <
+ (constant_load (add i64:$sbase, (i64 IMM32bit:$offset))),
+ (vt (Instr_SGPR $sbase, (S_MOV_B32 (i32 (as_i32imm $offset)))))
+ >;
+
+ // 3. No offset at all
+ def : Pat <
+ (constant_load i64:$sbase),
+ (vt (Instr_IMM $sbase, 0))
+ >;
+}
+
+let Predicates = [isSICI] in {
defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
@@ -1776,6 +2086,19 @@ defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>;
+} // End Predicates = [isSICI]
+
+let Predicates = [isVI] in {
+defm : SMRD_Pattern_vi <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
+defm : SMRD_Pattern_vi <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
+defm : SMRD_Pattern_vi <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
+defm : SMRD_Pattern_vi <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
+defm : SMRD_Pattern_vi <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
+defm : SMRD_Pattern_vi <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
+defm : SMRD_Pattern_vi <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>;
+} // End Predicates = [isVI]
+
+let Predicates = [isSICI] in {
// 1. Offset as 8bit DWORD immediate
def : Pat <
@@ -1783,42 +2106,36 @@ def : Pat <
(S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset))
>;
+} // End Predicates = [isSICI]
+
// 2. Offset loaded in an 32bit SGPR
def : Pat <
(SIload_constant v4i32:$sbase, imm:$offset),
(S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
>;
-} // Predicates = [isSI] in {
-
//===----------------------------------------------------------------------===//
// SOP1 Patterns
//===----------------------------------------------------------------------===//
-let Predicates = [isSI, isCFDepth0] in {
-
def : Pat <
(i64 (ctpop i64:$src)),
- (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (S_BCNT1_I32_B64 $src), sub0),
- (S_MOV_B32 0), sub1)
+ (i64 (REG_SEQUENCE SReg_64,
+ (S_BCNT1_I32_B64 $src), sub0,
+ (S_MOV_B32 0), sub1))
>;
//===----------------------------------------------------------------------===//
// SOP2 Patterns
//===----------------------------------------------------------------------===//
-// V_ADD_I32_e32/S_ADD_I32 produces carry in VCC/SCC. For the vector
+// V_ADD_I32_e32/S_ADD_U32 produces carry in VCC/SCC. For the vector
// case, the sgpr-copies pass will fix this to use the vector version.
def : Pat <
(i32 (addc i32:$src0, i32:$src1)),
- (S_ADD_I32 $src0, $src1)
+ (S_ADD_U32 $src0, $src1)
>;
-} // Predicates = [isSI, isCFDepth0]
-
-let Predicates = [isSI] in {
-
//===----------------------------------------------------------------------===//
// SOPP Patterns
//===----------------------------------------------------------------------===//
@@ -1842,49 +2159,9 @@ defm : RsqPat<V_RSQ_F32_e32, f32>;
// VOP2 Patterns
//===----------------------------------------------------------------------===//
-class BinOp64Pat <SDNode node, Instruction inst> : Pat <
- (node i64:$src0, i64:$src1),
- (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (inst (EXTRACT_SUBREG i64:$src0, sub0),
- (EXTRACT_SUBREG i64:$src1, sub0)), sub0),
- (inst (EXTRACT_SUBREG i64:$src0, sub1),
- (EXTRACT_SUBREG i64:$src1, sub1)), sub1)
->;
-
-def : BinOp64Pat <or, V_OR_B32_e32>;
-def : BinOp64Pat <xor, V_XOR_B32_e32>;
-
-class SextInReg <ValueType vt, int ShiftAmt> : Pat <
- (sext_inreg i32:$src0, vt),
- (V_ASHRREV_I32_e32 ShiftAmt, (V_LSHLREV_B32_e32 ShiftAmt, $src0))
->;
-
-def : SextInReg <i8, 24>;
-def : SextInReg <i16, 16>;
-
def : Pat <
(i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)),
- (V_BCNT_U32_B32_e32 $popcnt, $val)
->;
-
-def : Pat <
- (i32 (ctpop i32:$popcnt)),
- (V_BCNT_U32_B32_e64 $popcnt, 0, 0, 0)
->;
-
-def : Pat <
- (i64 (ctpop i64:$src)),
- (INSERT_SUBREG
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (V_BCNT_U32_B32_e32 (EXTRACT_SUBREG $src, sub1),
- (V_BCNT_U32_B32_e64 (EXTRACT_SUBREG $src, sub0), 0, 0, 0)),
- sub0),
- (V_MOV_B32_e32 0), sub1)
->;
-
-def : Pat <
- (addc i32:$src0, i32:$src1),
- (V_ADD_I32_e32 $src0, $src1)
+ (V_BCNT_U32_B32_e64 $popcnt, $val)
>;
/********** ======================= **********/
@@ -2222,10 +2499,10 @@ foreach Index = 0-15 in {
}
def : BitConvert <i32, f32, SReg_32>;
-def : BitConvert <i32, f32, VReg_32>;
+def : BitConvert <i32, f32, VGPR_32>;
def : BitConvert <f32, i32, SReg_32>;
-def : BitConvert <f32, i32, VReg_32>;
+def : BitConvert <f32, i32, VGPR_32>;
def : BitConvert <i64, f64, VReg_64>;
@@ -2258,62 +2535,63 @@ def : BitConvert <v16f32, v16i32, VReg_512>;
/********** Src & Dst modifiers **********/
/********** =================== **********/
-def FCLAMP_SI : AMDGPUShaderInst <
- (outs VReg_32:$dst),
- (ins VSrc_32:$src0),
- "FCLAMP_SI $dst, $src0",
- []
-> {
- let usesCustomInserter = 1;
-}
-
def : Pat <
- (AMDGPUclamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
- (FCLAMP_SI f32:$src)
+ (AMDGPUclamp (VOP3Mods0Clamp f32:$src0, i32:$src0_modifiers, i32:$omod),
+ (f32 FP_ZERO), (f32 FP_ONE)),
+ (V_ADD_F32_e64 $src0_modifiers, $src0, 0, 0, 1, $omod)
>;
/********** ================================ **********/
/********** Floating point absolute/negative **********/
/********** ================================ **********/
-// Manipulate the sign bit directly, as e.g. using the source negation modifier
-// in V_ADD_F32_e64 $src, 0, [...] does not result in -0.0 for $src == +0.0,
-// breaking the piglit *s-floatBitsToInt-neg* tests
-
-// TODO: Look into not implementing isFNegFree/isFAbsFree for SI, and possibly
-// removing these patterns
+// Prevent expanding both fneg and fabs.
+// FIXME: Should use S_OR_B32
def : Pat <
(fneg (fabs f32:$src)),
(V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */
>;
-def FABS_SI : AMDGPUShaderInst <
- (outs VReg_32:$dst),
- (ins VSrc_32:$src0),
- "FABS_SI $dst, $src0",
- []
-> {
- let usesCustomInserter = 1;
-}
+// FIXME: Should use S_OR_B32
+def : Pat <
+ (fneg (fabs f64:$src)),
+ (REG_SEQUENCE VReg_64,
+ (i32 (EXTRACT_SUBREG f64:$src, sub0)),
+ sub0,
+ (V_OR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
+ (V_MOV_B32_e32 0x80000000)), // Set sign bit.
+ sub1)
+>;
def : Pat <
(fabs f32:$src),
- (FABS_SI f32:$src)
+ (V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff))
>;
-def FNEG_SI : AMDGPUShaderInst <
- (outs VReg_32:$dst),
- (ins VSrc_32:$src0),
- "FNEG_SI $dst, $src0",
- []
-> {
- let usesCustomInserter = 1;
-}
-
def : Pat <
(fneg f32:$src),
- (FNEG_SI f32:$src)
+ (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000))
+>;
+
+def : Pat <
+ (fabs f64:$src),
+ (REG_SEQUENCE VReg_64,
+ (i32 (EXTRACT_SUBREG f64:$src, sub0)),
+ sub0,
+ (V_AND_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
+ (V_MOV_B32_e32 0x7fffffff)), // Set sign bit.
+ sub1)
+>;
+
+def : Pat <
+ (fneg f64:$src),
+ (REG_SEQUENCE VReg_64,
+ (i32 (EXTRACT_SUBREG f64:$src, sub0)),
+ sub0,
+ (V_XOR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
+ (V_MOV_B32_e32 0x80000000)),
+ sub1)
>;
/********** ================== **********/
@@ -2327,7 +2605,7 @@ def : Pat <
def : Pat <
(SGPRImm<(f32 fpimm)>:$imm),
- (S_MOV_B32 fpimm:$imm)
+ (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm)))
>;
def : Pat <
@@ -2337,7 +2615,7 @@ def : Pat <
def : Pat <
(f32 fpimm:$imm),
- (V_MOV_B32_e32 fpimm:$imm)
+ (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm)))
>;
def : Pat <
@@ -2345,21 +2623,38 @@ def : Pat <
(S_MOV_B64 InlineImm<i64>:$imm)
>;
+// XXX - Should this use a s_cmp to set SCC?
+
+// Set to sign-extended 64-bit value (true = -1, false = 0)
+def : Pat <
+ (i1 imm:$imm),
+ (S_MOV_B64 (i64 (as_i64imm $imm)))
+>;
+
+def : Pat <
+ (f64 InlineFPImm<f64>:$imm),
+ (S_MOV_B64 (f64 (bitcast_fpimm_to_i64 InlineFPImm<f64>:$imm)))
+>;
+
/********** ===================== **********/
/********** Interpolation Paterns **********/
/********** ===================== **********/
+// The value of $params is constant through out the entire kernel.
+// We need to use S_MOV_B32 $params, because CSE ignores copies, so
+// without it we end up with a lot of redundant moves.
+
def : Pat <
(int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params),
- (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, $params)
+ (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, (S_MOV_B32 $params))
>;
def : Pat <
- (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, v2i32:$ij),
+ (int_SI_fs_interp imm:$attr_chan, imm:$attr, i32:$params, v2i32:$ij),
(V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0),
- imm:$attr_chan, imm:$attr, i32:$params),
+ imm:$attr_chan, imm:$attr, (S_MOV_B32 $params)),
(EXTRACT_SUBREG $ij, sub1),
- imm:$attr_chan, imm:$attr, $params)
+ imm:$attr_chan, imm:$attr, (S_MOV_B32 $params))
>;
/********** ================== **********/
@@ -2376,28 +2671,30 @@ def : Pat <
def : Pat<
(fdiv f64:$src0, f64:$src1),
- (V_MUL_F64 $src0, (V_RCP_F64_e32 $src1), (i64 0))
+ (V_MUL_F64 0 /* src0_modifiers */, $src0,
+ 0 /* src1_modifiers */, (V_RCP_F64_e32 $src1),
+ 0 /* clamp */, 0 /* omod */)
>;
def : Pat <
(int_AMDGPU_cube v4f32:$src),
- (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
- (V_CUBETC_F32 (EXTRACT_SUBREG $src, sub0),
- (EXTRACT_SUBREG $src, sub1),
- (EXTRACT_SUBREG $src, sub2)),
- sub0),
- (V_CUBESC_F32 (EXTRACT_SUBREG $src, sub0),
- (EXTRACT_SUBREG $src, sub1),
- (EXTRACT_SUBREG $src, sub2)),
- sub1),
- (V_CUBEMA_F32 (EXTRACT_SUBREG $src, sub0),
- (EXTRACT_SUBREG $src, sub1),
- (EXTRACT_SUBREG $src, sub2)),
- sub2),
- (V_CUBEID_F32 (EXTRACT_SUBREG $src, sub0),
- (EXTRACT_SUBREG $src, sub1),
- (EXTRACT_SUBREG $src, sub2)),
- sub3)
+ (REG_SEQUENCE VReg_128,
+ (V_CUBETC_F32 0 /* src0_modifiers */, (EXTRACT_SUBREG $src, sub0),
+ 0 /* src1_modifiers */, (EXTRACT_SUBREG $src, sub1),
+ 0 /* src2_modifiers */, (EXTRACT_SUBREG $src, sub2),
+ 0 /* clamp */, 0 /* omod */), sub0,
+ (V_CUBESC_F32 0 /* src0_modifiers */, (EXTRACT_SUBREG $src, sub0),
+ 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),
+ 0 /* src2_modifiers */,(EXTRACT_SUBREG $src, sub2),
+ 0 /* clamp */, 0 /* omod */), sub1,
+ (V_CUBEMA_F32 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub0),
+ 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),
+ 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub2),
+ 0 /* clamp */, 0 /* omod */), sub2,
+ (V_CUBEID_F32 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub0),
+ 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),
+ 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub2),
+ 0 /* clamp */, 0 /* omod */), sub3)
>;
def : Pat <
@@ -2413,12 +2710,16 @@ class Ext32Pat <SDNode ext> : Pat <
def : Ext32Pat <zext>;
def : Ext32Pat <anyext>;
+let Predicates = [isSICI] in {
+
// Offset in an 32Bit VGPR
def : Pat <
(SIload_constant v4i32:$sbase, i32:$voff),
(BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0, 0)
>;
+} // End Predicates = [isSICI]
+
// The multiplication scales from [0,1] to the unsigned integer range
def : Pat <
(AMDGPUurecip i32:$src0),
@@ -2427,12 +2728,16 @@ def : Pat <
(V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
>;
+let Predicates = [isSICI] in {
+
def : Pat <
(int_SI_tid),
(V_MBCNT_HI_U32_B32_e32 0xffffffff,
- (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0, 0, 0))
+ (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0))
>;
+}
+
//===----------------------------------------------------------------------===//
// VOP3 Patterns
//===----------------------------------------------------------------------===//
@@ -2441,84 +2746,74 @@ def : IMad24Pat<V_MAD_I32_I24>;
def : UMad24Pat<V_MAD_U32_U24>;
def : Pat <
- (fadd f64:$src0, f64:$src1),
- (V_ADD_F64 $src0, $src1, (i64 0))
->;
-
-def : Pat <
- (fmul f64:$src0, f64:$src1),
- (V_MUL_F64 $src0, $src1, (i64 0))
->;
-
-def : Pat <
- (mul i32:$src0, i32:$src1),
- (V_MUL_LO_I32 $src0, $src1, (i32 0))
->;
-
-def : Pat <
(mulhu i32:$src0, i32:$src1),
- (V_MUL_HI_U32 $src0, $src1, (i32 0))
+ (V_MUL_HI_U32 $src0, $src1)
>;
def : Pat <
(mulhs i32:$src0, i32:$src1),
- (V_MUL_HI_I32 $src0, $src1, (i32 0))
+ (V_MUL_HI_I32 $src0, $src1)
>;
-defm : BFIPatterns <V_BFI_B32, S_MOV_B32>;
+def : Vop3ModPat<V_MAD_F32, VOP_F32_F32_F32_F32, AMDGPUmad>;
+
+
+defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
def : ROTRPattern <V_ALIGNBIT_B32>;
/********** ======================= **********/
/********** Load/Store Patterns **********/
/********** ======================= **********/
-multiclass DSReadPat <DS inst, ValueType vt, PatFrag frag> {
- def : Pat <
- (vt (frag (add i32:$ptr, (i32 IMM16bit:$offset)))),
- (inst (i1 0), $ptr, (as_i16imm $offset))
- >;
+class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
+ (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
+ (inst (i1 0), $ptr, (as_i16imm $offset), (S_MOV_B32 -1))
+>;
- def : Pat <
- (frag i32:$src0),
- (vt (inst 0, $src0, 0))
- >;
-}
+def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
+def : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>;
+def : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
+def : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
+def : DSReadPat <DS_READ_B32, i32, local_load>;
-defm : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
-defm : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>;
-defm : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
-defm : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
-defm : DSReadPat <DS_READ_B32, i32, local_load>;
-defm : DSReadPat <DS_READ_B64, v2i32, local_load>;
+let AddedComplexity = 100 in {
-multiclass DSWritePat <DS inst, ValueType vt, PatFrag frag> {
- def : Pat <
- (frag vt:$value, (add i32:$ptr, (i32 IMM16bit:$offset))),
- (inst (i1 0), $ptr, $value, (as_i16imm $offset))
- >;
+def : DSReadPat <DS_READ_B64, v2i32, local_load_aligned8bytes>;
- def : Pat <
- (frag vt:$val, i32:$ptr),
- (inst 0, $ptr, $val, 0)
- >;
-}
+} // End AddedComplexity = 100
-defm : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
-defm : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
-defm : DSWritePat <DS_WRITE_B32, i32, local_store>;
-defm : DSWritePat <DS_WRITE_B64, v2i32, local_store>;
+def : Pat <
+ (v2i32 (local_load (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
+ i8:$offset1))),
+ (DS_READ2_B32 (i1 0), $ptr, $offset0, $offset1, (S_MOV_B32 -1))
+>;
-multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> {
- def : Pat <
- (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$value),
- (inst (i1 0), $ptr, $value, (as_i16imm $offset))
- >;
+class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
+ (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
+ (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1))
+>;
- def : Pat <
- (frag i32:$ptr, vt:$val),
- (inst 0, $ptr, $val, 0)
- >;
-}
+def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
+def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
+def : DSWritePat <DS_WRITE_B32, i32, local_store>;
+
+let AddedComplexity = 100 in {
+
+def : DSWritePat <DS_WRITE_B64, v2i32, local_store_aligned8bytes>;
+} // End AddedComplexity = 100
+
+def : Pat <
+ (local_store v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
+ i8:$offset1)),
+ (DS_WRITE2_B32 (i1 0), $ptr, (EXTRACT_SUBREG $value, sub0),
+ (EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
+ (S_MOV_B32 -1))
+>;
+
+class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
+ (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
+ (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1))
+>;
// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
//
@@ -2530,69 +2825,56 @@ multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> {
// We also load this -1 with s_mov_b32 / s_mov_b64 even though this
// needs to be a VGPR. The SGPR copy pass will fix this, and it's
// easier since there is no v_mov_b64.
-multiclass DSAtomicIncRetPat<DS inst, ValueType vt,
- Instruction LoadImm, PatFrag frag> {
- def : Pat <
- (frag (add i32:$ptr, (i32 IMM16bit:$offset)), (vt 1)),
- (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
- >;
-
- def : Pat <
- (frag i32:$ptr, (vt 1)),
- (inst 0, $ptr, (LoadImm (vt -1)), 0)
- >;
-}
+class DSAtomicIncRetPat<DS inst, ValueType vt,
+ Instruction LoadImm, PatFrag frag> : Pat <
+ (frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
+ (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (S_MOV_B32 -1))
+>;
-multiclass DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> {
- def : Pat <
- (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$cmp, vt:$swap),
- (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
- >;
- def : Pat <
- (frag i32:$ptr, vt:$cmp, vt:$swap),
- (inst 0, $ptr, $cmp, $swap, 0)
- >;
-}
+class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
+ (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
+ (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset), (S_MOV_B32 -1))
+>;
// 32-bit atomics.
-defm : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
- S_MOV_B32, atomic_load_add_local>;
-defm : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
- S_MOV_B32, atomic_load_sub_local>;
-
-defm : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
-defm : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
-defm : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
-defm : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
-defm : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
-defm : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
-defm : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
-defm : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
-defm : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
-defm : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
-
-defm : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
+def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
+ S_MOV_B32, atomic_load_add_local>;
+def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
+ S_MOV_B32, atomic_load_sub_local>;
+
+def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
+def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
+def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
+def : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
+def : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
+def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
+
+def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
// 64-bit atomics.
-defm : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
- S_MOV_B64, atomic_load_add_local>;
-defm : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
- S_MOV_B64, atomic_load_sub_local>;
+def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
+ S_MOV_B64, atomic_load_add_local>;
+def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
+ S_MOV_B64, atomic_load_sub_local>;
-defm : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
-defm : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
-defm : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
-defm : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
-defm : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
-defm : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
-defm : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
-defm : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
-defm : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
-defm : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;
+def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
+def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
+def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
+def : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
+def : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
+def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;
-defm : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
+def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
//===----------------------------------------------------------------------===//
@@ -2602,12 +2884,12 @@ defm : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
PatFrag constant_ld> {
def : Pat <
- (vt (constant_ld (add i64:$ptr, i64:$offset))),
- (Instr_ADDR64 (SI_ADDR64_RSRC $ptr), $offset, 0)
+ (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i16:$offset))),
+ (Instr_ADDR64 $srsrc, $vaddr, $offset)
>;
-
}
+let Predicates = [isSICI] in {
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
@@ -2615,6 +2897,7 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32, constant_load>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, v2i32, constant_load>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32, constant_load>;
+} // End Predicates = [isSICI]
class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <
(vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr,
@@ -2622,6 +2905,7 @@ class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <
(Instr $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
>;
+let Predicates = [isSICI] in {
def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i32, extloadi8_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, i32, sextloadi16_private>;
@@ -2629,6 +2913,7 @@ def : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, i32, extloadi16_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, i32, load_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, v2i32, load_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, v4i32, load_private>;
+} // End Predicates = [isSICI]
// BUFFER_LOAD_DWORD*, addr64=0
multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen,
@@ -2667,26 +2952,28 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
>;
}
+let Predicates = [isSICI] in {
defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
+} // End Predicates = [isSICI]
class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
- (st vt:$value, (MUBUFAddr32 v4i32:$srsrc, i32:$vaddr, i32:$soffset,
- u16imm:$offset, i1imm:$offen, i1imm:$idxen,
- i1imm:$glc, i1imm:$slc, i1imm:$tfe)),
- (Instr $value, $srsrc, $vaddr, $soffset, $offset, $offen, $idxen,
- $glc, $slc, $tfe)
+ (st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
+ u16imm:$offset)),
+ (Instr $value, $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
>;
-def : MUBUFScratchStorePat <BUFFER_STORE_BYTE, i32, truncstorei8_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_SHORT, i32, truncstorei16_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_DWORD, i32, store_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2, v2i32, store_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4, v4i32, store_private>;
+let Predicates = [isSICI] in {
+def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;
+def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i32, truncstorei16_private>;
+def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>;
+def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
+def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;
+} // End Predicates = [isSICI]
/*
class MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
@@ -2694,11 +2981,13 @@ class MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
(Instr $value, $srsrc, $vaddr, $offset)
>;
+let Predicates = [isSICI] in {
def : MUBUFStore_Pattern <BUFFER_STORE_BYTE_ADDR64, i32, truncstorei8_private>;
def : MUBUFStore_Pattern <BUFFER_STORE_SHORT_ADDR64, i32, truncstorei16_private>;
def : MUBUFStore_Pattern <BUFFER_STORE_DWORD_ADDR64, i32, store_private>;
def : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2_ADDR64, v2i32, store_private>;
def : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4_ADDR64, v4i32, store_private>;
+} // End Predicates = [isSICI]
*/
@@ -2725,29 +3014,26 @@ def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
let SubtargetPredicate = isCI in {
-// Sea island new arithmetic instructinos
-let neverHasSideEffects = 1 in {
-defm V_TRUNC_F64 : VOP1_64 <0x00000017, "V_TRUNC_F64",
- [(set f64:$dst, (ftrunc f64:$src0))]
->;
-defm V_CEIL_F64 : VOP1_64 <0x00000018, "V_CEIL_F64",
- [(set f64:$dst, (fceil f64:$src0))]
+defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "v_qsad_pk_u16_u8",
+ VOP_I32_I32_I32
>;
-defm V_FLOOR_F64 : VOP1_64 <0x0000001A, "V_FLOOR_F64",
- [(set f64:$dst, (ffloor f64:$src0))]
+defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "v_mqsad_u16_u8",
+ VOP_I32_I32_I32
>;
-defm V_RNDNE_F64 : VOP1_64 <0x00000019, "V_RNDNE_F64",
- [(set f64:$dst, (frint f64:$src0))]
+defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "v_mqsad_u32_u8",
+ VOP_I32_I32_I32
>;
-defm V_QSAD_PK_U16_U8 : VOP3_32 <0x00000173, "V_QSAD_PK_U16_U8", []>;
-defm V_MQSAD_U16_U8 : VOP3_32 <0x000000172, "V_MQSAD_U16_U8", []>;
-defm V_MQSAD_U32_U8 : VOP3_32 <0x00000175, "V_MQSAD_U32_U8", []>;
-def V_MAD_U64_U32 : VOP3_64 <0x00000176, "V_MAD_U64_U32", []>;
+let isCommutable = 1 in {
+defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32",
+ VOP_I64_I32_I32_I64
+>;
// XXX - Does this set VCC?
-def V_MAD_I64_I32 : VOP3_64 <0x00000177, "V_MAD_I64_I32", []>;
-} // End neverHasSideEffects = 1
+defm V_MAD_I64_I32 : VOP3Inst <vop3<0x177>, "v_mad_i64_i32",
+ VOP_I64_I32_I32_I64
+>;
+} // End isCommutable = 1
// Remaining instructions:
// FLAT_*
@@ -2756,8 +3042,6 @@ def V_MAD_I64_I32 : VOP3_64 <0x00000177, "V_MAD_I64_I32", []>;
// S_CBRANCH_CDBGSYS_OR_USER
// S_CBRANCH_CDBGSYS_AND_USER
// S_DCACHE_INV_VOL
-// V_EXP_LEGACY_F32
-// V_LOG_LEGACY_F32
// DS_NOP
// DS_GWS_SEMA_RELEASE_ALL
// DS_WRAP_RTN_B32
@@ -2770,8 +3054,39 @@ def V_MAD_I64_I32 : VOP3_64 <0x00000177, "V_MAD_I64_I32", []>;
// BUFFER_LOAD_DWORDX3
// BUFFER_STORE_DWORDX3
-} // End iSCI
+} // End isCI
+
+//===----------------------------------------------------------------------===//
+// Flat Patterns
+//===----------------------------------------------------------------------===//
+
+class FLATLoad_Pattern <FLAT Instr_ADDR64, ValueType vt,
+ PatFrag flat_ld> :
+ Pat <(vt (flat_ld i64:$ptr)),
+ (Instr_ADDR64 $ptr)
+>;
+
+def : FLATLoad_Pattern <FLAT_LOAD_SBYTE, i32, sextloadi8_flat>;
+def : FLATLoad_Pattern <FLAT_LOAD_UBYTE, i32, az_extloadi8_flat>;
+def : FLATLoad_Pattern <FLAT_LOAD_SSHORT, i32, sextloadi16_flat>;
+def : FLATLoad_Pattern <FLAT_LOAD_USHORT, i32, az_extloadi16_flat>;
+def : FLATLoad_Pattern <FLAT_LOAD_DWORD, i32, flat_load>;
+def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, flat_load>;
+def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, az_extloadi32_flat>;
+def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, v2i32, flat_load>;
+def : FLATLoad_Pattern <FLAT_LOAD_DWORDX4, v4i32, flat_load>;
+
+class FLATStore_Pattern <FLAT Instr, ValueType vt, PatFrag st> :
+ Pat <(st vt:$value, i64:$ptr),
+ (Instr $value, $ptr)
+ >;
+def : FLATStore_Pattern <FLAT_STORE_BYTE, i32, truncstorei8_flat>;
+def : FLATStore_Pattern <FLAT_STORE_SHORT, i32, truncstorei16_flat>;
+def : FLATStore_Pattern <FLAT_STORE_DWORD, i32, flat_store>;
+def : FLATStore_Pattern <FLAT_STORE_DWORDX2, i64, flat_store>;
+def : FLATStore_Pattern <FLAT_STORE_DWORDX2, v2i32, flat_store>;
+def : FLATStore_Pattern <FLAT_STORE_DWORDX4, v4i32, flat_store>;
/********** ====================== **********/
/********** Indirect adressing **********/
@@ -2821,44 +3136,37 @@ defm : SI_INDIRECT_Pattern <v16i32, i32, SI_INDIRECT_DST_V16>;
def : Pat<(i32 (sext_inreg i32:$src, i1)),
(S_BFE_I32 i32:$src, 65536)>; // 0 | 1 << 16
-// TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it
-// might not be worth the effort, and will need to expand to shifts when
-// fixing SGPR copies.
-
// Handle sext_inreg in i64
def : Pat <
(i64 (sext_inreg i64:$src, i1)),
- (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (S_BFE_I32 (EXTRACT_SUBREG i64:$src, sub0), 65536), sub0), // 0 | 1 << 16
- (S_MOV_B32 -1), sub1)
+ (S_BFE_I64 i64:$src, 0x10000) // 0 | 1 << 16
>;
def : Pat <
(i64 (sext_inreg i64:$src, i8)),
- (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (S_SEXT_I32_I8 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
- (S_MOV_B32 -1), sub1)
+ (S_BFE_I64 i64:$src, 0x80000) // 0 | 8 << 16
>;
def : Pat <
(i64 (sext_inreg i64:$src, i16)),
- (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (S_SEXT_I32_I16 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
- (S_MOV_B32 -1), sub1)
+ (S_BFE_I64 i64:$src, 0x100000) // 0 | 16 << 16
+>;
+
+def : Pat <
+ (i64 (sext_inreg i64:$src, i32)),
+ (S_BFE_I64 i64:$src, 0x200000) // 0 | 32 << 16
>;
class ZExt_i64_i32_Pat <SDNode ext> : Pat <
(i64 (ext i32:$src)),
- (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $src, sub0),
- (S_MOV_B32 0), sub1)
+ (REG_SEQUENCE SReg_64, $src, sub0, (S_MOV_B32 0), sub1)
>;
class ZExt_i64_i1_Pat <SDNode ext> : Pat <
(i64 (ext i1:$src)),
- (INSERT_SUBREG
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0),
- (S_MOV_B32 0), sub1)
+ (REG_SEQUENCE VReg_64,
+ (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0,
+ (S_MOV_B32 0), sub1)
>;
@@ -2869,20 +3177,38 @@ def : ZExt_i64_i1_Pat<anyext>;
def : Pat <
(i64 (sext i32:$src)),
- (INSERT_SUBREG
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $src, sub0),
- (S_ASHR_I32 $src, 31), sub1)
+ (REG_SEQUENCE SReg_64, $src, sub0,
+ (S_ASHR_I32 $src, 31), sub1)
>;
def : Pat <
(i64 (sext i1:$src)),
- (INSERT_SUBREG
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)),
- (V_CNDMASK_B32_e64 0, -1, $src), sub0),
+ (REG_SEQUENCE VReg_64,
+ (V_CNDMASK_B32_e64 0, -1, $src), sub0,
(V_CNDMASK_B32_e64 0, -1, $src), sub1)
>;
+// If we need to perform a logical operation on i1 values, we need to
+// use vector comparisons since there is only one SCC register. Vector
+// comparisions still write to a pair of SGPRs, so treat these as
+// 64-bit comparisons. When legalizing SGPR copies, instructions
+// resulting in the copies from SCC to these instructions will be
+// moved to the VALU.
+def : Pat <
+ (i1 (and i1:$src0, i1:$src1)),
+ (S_AND_B64 $src0, $src1)
+>;
+
+def : Pat <
+ (i1 (or i1:$src0, i1:$src1)),
+ (S_OR_B64 $src0, $src1)
+>;
+
+def : Pat <
+ (i1 (xor i1:$src0, i1:$src1)),
+ (S_XOR_B64 $src0, $src1)
+>;
+
def : Pat <
(f32 (sint_to_fp i1:$src)),
(V_CNDMASK_B32_e64 (i32 0), CONST.FP32_NEG_ONE, $src)
@@ -2895,7 +3221,7 @@ def : Pat <
def : Pat <
(f64 (sint_to_fp i1:$src)),
- (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src))
+ (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src))
>;
def : Pat <
@@ -2914,13 +3240,25 @@ def : Pat <
def : Pat <
(i1 (trunc i32:$a)),
- (V_CMP_EQ_I32_e64 (V_AND_B32_e32 (i32 1), $a), 1)
+ (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1)
+>;
+
+def : Pat <
+ (i32 (bswap i32:$a)),
+ (V_BFI_B32 (S_MOV_B32 0x00ff00ff),
+ (V_ALIGNBIT_B32 $a, $a, 24),
+ (V_ALIGNBIT_B32 $a, $a, 8))
+>;
+
+def : Pat <
+ (f32 (select i1:$src2, f32:$src1, f32:$src0)),
+ (V_CNDMASK_B32_e64 $src0, $src1, $src2)
>;
//============================================================================//
// Miscellaneous Optimization Patterns
//============================================================================//
-def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e32>;
+def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>;
-} // End isSI predicate
+} // End isGCN predicate
diff --git a/lib/Target/R600/SILoadStoreOptimizer.cpp b/lib/Target/R600/SILoadStoreOptimizer.cpp
new file mode 100644
index 000000000000..0cb67465012d
--- /dev/null
+++ b/lib/Target/R600/SILoadStoreOptimizer.cpp
@@ -0,0 +1,434 @@
+//===-- SILoadStoreOptimizer.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to fuse DS instructions with close by immediate offsets.
+// This will fuse operations such as
+// ds_read_b32 v0, v2 offset:16
+// ds_read_b32 v1, v2 offset:32
+// ==>
+// ds_read2_b32 v[0:1], v2, offset0:4 offset1:8
+//
+//
+// Future improvements:
+//
+// - This currently relies on the scheduler to place loads and stores next to
+// each other, and then only merges adjacent pairs of instructions. It would
+// be good to be more flexible with interleaved instructions, and possibly run
+// before scheduling. It currently missing stores of constants because loading
+// the constant into the data register is placed between the stores, although
+// this is arguably a scheduling problem.
+//
+// - Live interval recomputing seems inefficient. This currently only matches
+// one pair, and recomputes live intervals and moves on to the next pair. It
+// would be better to compute a list of all merges that need to occur
+//
+// - With a list of instructions to process, we can also merge more. If a
+// cluster of loads have offsets that are too large to fit in the 8-bit
+// offsets, but are close enough to fit in the 8 bits, we can add to the base
+// pointer and use the new reduced offsets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-load-store-opt"
+
+namespace {
+
+class SILoadStoreOptimizer : public MachineFunctionPass {
+private:
+ const TargetMachine *TM;
+ const SIInstrInfo *TII;
+ const SIRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ LiveIntervals *LIS;
+
+
+ static bool offsetsCanBeCombined(unsigned Offset0,
+ unsigned Offset1,
+ unsigned EltSize);
+
+ MachineBasicBlock::iterator findMatchingDSInst(MachineBasicBlock::iterator I,
+ unsigned EltSize);
+
+ void updateRegDefsUses(unsigned SrcReg,
+ unsigned DstReg,
+ unsigned SubIdx);
+
+ MachineBasicBlock::iterator mergeRead2Pair(
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Paired,
+ unsigned EltSize);
+
+ MachineBasicBlock::iterator mergeWrite2Pair(
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Paired,
+ unsigned EltSize);
+
+public:
+ static char ID;
+
+ SILoadStoreOptimizer() :
+ MachineFunctionPass(ID),
+ TM(nullptr),
+ TII(nullptr),
+ TRI(nullptr),
+ MRI(nullptr),
+ LIS(nullptr) {
+
+ }
+
+ SILoadStoreOptimizer(const TargetMachine &TM_) :
+ MachineFunctionPass(ID),
+ TM(&TM_),
+ TII(static_cast<const SIInstrInfo*>(TM->getSubtargetImpl()->getInstrInfo())) {
+ initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool optimizeBlock(MachineBasicBlock &MBB);
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const char *getPassName() const override {
+ return "SI Load / Store Optimizer";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<LiveVariables>();
+ AU.addRequired<LiveIntervals>();
+
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE,
+ "SI Load / Store Optimizer", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(SILoadStoreOptimizer, DEBUG_TYPE,
+ "SI Load / Store Optimizer", false, false)
+
+char SILoadStoreOptimizer::ID = 0;
+
+char &llvm::SILoadStoreOptimizerID = SILoadStoreOptimizer::ID;
+
+FunctionPass *llvm::createSILoadStoreOptimizerPass(TargetMachine &TM) {
+ return new SILoadStoreOptimizer(TM);
+}
+
+bool SILoadStoreOptimizer::offsetsCanBeCombined(unsigned Offset0,
+ unsigned Offset1,
+ unsigned Size) {
+ // XXX - Would the same offset be OK? Is there any reason this would happen or
+ // be useful?
+ if (Offset0 == Offset1)
+ return false;
+
+ // This won't be valid if the offset isn't aligned.
+ if ((Offset0 % Size != 0) || (Offset1 % Size != 0))
+ return false;
+
+ unsigned EltOffset0 = Offset0 / Size;
+ unsigned EltOffset1 = Offset1 / Size;
+
+ // Check if the new offsets fit in the reduced 8-bit range.
+ if (isUInt<8>(EltOffset0) && isUInt<8>(EltOffset1))
+ return true;
+
+ // If the offset in elements doesn't fit in 8-bits, we might be able to use
+ // the stride 64 versions.
+ if ((EltOffset0 % 64 != 0) || (EltOffset1 % 64) != 0)
+ return false;
+
+ return isUInt<8>(EltOffset0 / 64) && isUInt<8>(EltOffset1 / 64);
+}
+
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I,
+ unsigned EltSize){
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator MBBI = I;
+ ++MBBI;
+
+ if (MBBI->getOpcode() != I->getOpcode())
+ return E;
+
+ // Don't merge volatiles.
+ if (MBBI->hasOrderedMemoryRef())
+ return E;
+
+ int AddrIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::addr);
+ const MachineOperand &AddrReg0 = I->getOperand(AddrIdx);
+ const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx);
+
+ // Check same base pointer. Be careful of subregisters, which can occur with
+ // vectors of pointers.
+ if (AddrReg0.getReg() == AddrReg1.getReg() &&
+ AddrReg0.getSubReg() == AddrReg1.getSubReg()) {
+ int OffsetIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(),
+ AMDGPU::OpName::offset);
+ unsigned Offset0 = I->getOperand(OffsetIdx).getImm() & 0xffff;
+ unsigned Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff;
+
+ // Check both offsets fit in the reduced range.
+ if (offsetsCanBeCombined(Offset0, Offset1, EltSize))
+ return MBBI;
+ }
+
+ return E;
+}
+
+void SILoadStoreOptimizer::updateRegDefsUses(unsigned SrcReg,
+ unsigned DstReg,
+ unsigned SubIdx) {
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg),
+ E = MRI->reg_end(); I != E; ) {
+ MachineOperand &O = *I;
+ ++I;
+ O.substVirtReg(DstReg, SubIdx, *TRI);
+ }
+}
+
+MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Paired,
+ unsigned EltSize) {
+ MachineBasicBlock *MBB = I->getParent();
+
+ // Be careful, since the addresses could be subregisters themselves in weird
+ // cases, like vectors of pointers.
+ const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
+ const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0);
+
+ unsigned DestReg0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst)->getReg();
+ unsigned DestReg1
+ = TII->getNamedOperand(*Paired, AMDGPU::OpName::vdst)->getReg();
+
+ unsigned Offset0
+ = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
+ unsigned Offset1
+ = TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
+
+ unsigned NewOffset0 = Offset0 / EltSize;
+ unsigned NewOffset1 = Offset1 / EltSize;
+ unsigned Opc = (EltSize == 4) ? AMDGPU::DS_READ2_B32 : AMDGPU::DS_READ2_B64;
+
+ // Prefer the st64 form if we can use it, even if we can fit the offset in the
+ // non st64 version. I'm not sure if there's any real reason to do this.
+ bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
+ if (UseST64) {
+ NewOffset0 /= 64;
+ NewOffset1 /= 64;
+ Opc = (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64;
+ }
+
+ assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
+ (NewOffset0 != NewOffset1) &&
+ "Computed offset doesn't fit");
+
+ const MCInstrDesc &Read2Desc = TII->get(Opc);
+
+ const TargetRegisterClass *SuperRC
+ = (EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
+ unsigned DestReg = MRI->createVirtualRegister(SuperRC);
+
+ DebugLoc DL = I->getDebugLoc();
+ MachineInstrBuilder Read2
+ = BuildMI(*MBB, I, DL, Read2Desc, DestReg)
+ .addImm(0) // gds
+ .addOperand(*AddrReg) // addr
+ .addImm(NewOffset0) // offset0
+ .addImm(NewOffset1) // offset1
+ .addOperand(*M0Reg) // M0
+ .addMemOperand(*I->memoperands_begin())
+ .addMemOperand(*Paired->memoperands_begin());
+
+ LIS->InsertMachineInstrInMaps(Read2);
+
+ unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
+ unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
+ updateRegDefsUses(DestReg0, DestReg, SubRegIdx0);
+ updateRegDefsUses(DestReg1, DestReg, SubRegIdx1);
+
+ LIS->RemoveMachineInstrFromMaps(I);
+ LIS->RemoveMachineInstrFromMaps(Paired);
+ I->eraseFromParent();
+ Paired->eraseFromParent();
+
+ LiveInterval &AddrRegLI = LIS->getInterval(AddrReg->getReg());
+ LIS->shrinkToUses(&AddrRegLI);
+
+ LiveInterval &M0RegLI = LIS->getInterval(M0Reg->getReg());
+ LIS->shrinkToUses(&M0RegLI);
+
+ // Currently m0 is treated as a register class with one member instead of an
+ // implicit physical register. We are using the virtual register for the first
+ // one, but we still need to update the live range of the now unused second m0
+ // virtual register to avoid verifier errors.
+ const MachineOperand *PairedM0Reg
+ = TII->getNamedOperand(*Paired, AMDGPU::OpName::m0);
+ LiveInterval &PairedM0RegLI = LIS->getInterval(PairedM0Reg->getReg());
+ LIS->shrinkToUses(&PairedM0RegLI);
+
+ LIS->getInterval(DestReg); // Create new LI
+
+ DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
+ return Read2.getInstr();
+}
+
+MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Paired,
+ unsigned EltSize) {
+ MachineBasicBlock *MBB = I->getParent();
+
+ // Be sure to use .addOperand(), and not .addReg() with these. We want to be
+ // sure we preserve the subregister index and any register flags set on them.
+ const MachineOperand *Addr = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
+ const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0);
+ const MachineOperand *Data0 = TII->getNamedOperand(*I, AMDGPU::OpName::data0);
+ const MachineOperand *Data1
+ = TII->getNamedOperand(*Paired, AMDGPU::OpName::data0);
+
+
+ unsigned Offset0
+ = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
+ unsigned Offset1
+ = TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
+
+ unsigned NewOffset0 = Offset0 / EltSize;
+ unsigned NewOffset1 = Offset1 / EltSize;
+ unsigned Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2_B32 : AMDGPU::DS_WRITE2_B64;
+
+ // Prefer the st64 form if we can use it, even if we can fit the offset in the
+ // non st64 version. I'm not sure if there's any real reason to do this.
+ bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
+ if (UseST64) {
+ NewOffset0 /= 64;
+ NewOffset1 /= 64;
+ Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64;
+ }
+
+ assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
+ (NewOffset0 != NewOffset1) &&
+ "Computed offset doesn't fit");
+
+ const MCInstrDesc &Write2Desc = TII->get(Opc);
+ DebugLoc DL = I->getDebugLoc();
+
+ MachineInstrBuilder Write2
+ = BuildMI(*MBB, I, DL, Write2Desc)
+ .addImm(0) // gds
+ .addOperand(*Addr) // addr
+ .addOperand(*Data0) // data0
+ .addOperand(*Data1) // data1
+ .addImm(NewOffset0) // offset0
+ .addImm(NewOffset1) // offset1
+ .addOperand(*M0Reg) // m0
+ .addMemOperand(*I->memoperands_begin())
+ .addMemOperand(*Paired->memoperands_begin());
+
+ // XXX - How do we express subregisters here?
+ unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg(),
+ M0Reg->getReg()};
+
+ LIS->RemoveMachineInstrFromMaps(I);
+ LIS->RemoveMachineInstrFromMaps(Paired);
+ I->eraseFromParent();
+ Paired->eraseFromParent();
+
+ LIS->repairIntervalsInRange(MBB, Write2, Write2, OrigRegs);
+
+ DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
+ return Write2.getInstr();
+}
+
+// Scan through looking for adjacent LDS operations with constant offsets from
+// the same base register. We rely on the scheduler to do the hard work of
+// clustering nearby loads, and assume these are all adjacent.
+bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
+ MachineInstr &MI = *I;
+
+ // Don't combine if volatile.
+ if (MI.hasOrderedMemoryRef()) {
+ ++I;
+ continue;
+ }
+
+ unsigned Opc = MI.getOpcode();
+ if (Opc == AMDGPU::DS_READ_B32 || Opc == AMDGPU::DS_READ_B64) {
+ unsigned Size = (Opc == AMDGPU::DS_READ_B64) ? 8 : 4;
+ MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size);
+ if (Match != E) {
+ Modified = true;
+ I = mergeRead2Pair(I, Match, Size);
+ } else {
+ ++I;
+ }
+
+ continue;
+ } else if (Opc == AMDGPU::DS_WRITE_B32 || Opc == AMDGPU::DS_WRITE_B64) {
+ unsigned Size = (Opc == AMDGPU::DS_WRITE_B64) ? 8 : 4;
+ MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size);
+ if (Match != E) {
+ Modified = true;
+ I = mergeWrite2Pair(I, Match, Size);
+ } else {
+ ++I;
+ }
+
+ continue;
+ }
+
+ ++I;
+ }
+
+ return Modified;
+}
+
+bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
+ const TargetSubtargetInfo *STM = MF.getTarget().getSubtargetImpl();
+ TRI = static_cast<const SIRegisterInfo*>(STM->getRegisterInfo());
+ TII = static_cast<const SIInstrInfo*>(STM->getInstrInfo());
+ MRI = &MF.getRegInfo();
+
+ LIS = &getAnalysis<LiveIntervals>();
+
+ DEBUG(dbgs() << "Running SILoadStoreOptimizer\n");
+
+ assert(!MRI->isSSA());
+
+ bool Modified = false;
+
+ for (MachineBasicBlock &MBB : MF)
+ Modified |= optimizeBlock(MBB);
+
+ return Modified;
+}
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index 75b5a5e027ff..068b22f37704 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -49,8 +49,10 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -86,7 +88,6 @@ private:
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
- void InitM0ForLDS(MachineBasicBlock::iterator MI);
void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
void IndirectSrc(MachineInstr &MI);
void IndirectDst(MachineInstr &MI);
@@ -307,10 +308,9 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
#endif
// Clear this thread from the exec mask if the operand is negative
- if ((Op.isImm() || Op.isFPImm())) {
+ if ((Op.isImm())) {
// Constant operand: Set exec mask to 0 or do nothing
- if (Op.isImm() ? (Op.getImm() & 0x80000000) :
- Op.getFPImm()->isNegative()) {
+ if (Op.getImm() & 0x80000000) {
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
.addImm(0);
}
@@ -323,14 +323,6 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
MI.eraseFromParent();
}
-/// The m0 register stores the maximum allowable address for LDS reads and
-/// writes. Its value must be at least the size in bytes of LDS allocated by
-/// the shader. For simplicity, we set it to the maximum possible value.
-void SILowerControlFlowPass::InitM0ForLDS(MachineBasicBlock::iterator MI) {
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),
- AMDGPU::M0).addImm(0xffffffff);
-}
-
void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
MachineBasicBlock &MBB = *MI.getParent();
@@ -347,7 +339,7 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
} else {
assert(AMDGPU::SReg_64RegClass.contains(Save));
- assert(AMDGPU::VReg_32RegClass.contains(Idx));
+ assert(AMDGPU::VGPR_32RegClass.contains(Idx));
// Save the EXEC mask
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
@@ -389,12 +381,6 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
.addReg(Save);
}
- // FIXME: Are there any values other than the LDS address clamp that need to
- // be stored in the m0 register and may be live for more than a few
- // instructions? If so, we should save the m0 register at the beginning
- // of this function and restore it here.
- // FIXME: Add support for LDS direct loads.
- InitM0ForLDS(&MI);
MI.eraseFromParent();
}
@@ -442,13 +428,14 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
}
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
- TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ TRI =
+ static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
bool HaveKill = false;
- bool NeedM0 = false;
bool NeedWQM = false;
+ bool NeedFlat = false;
unsigned Depth = 0;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
@@ -460,10 +447,12 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
Next = std::next(I);
MachineInstr &MI = *I;
- if (TII->isDS(MI.getOpcode())) {
- NeedM0 = true;
+ if (TII->isDS(MI.getOpcode()))
NeedWQM = true;
- }
+
+ // Flat uses m0 in case it needs to access LDS.
+ if (TII->isFLAT(MI.getOpcode()))
+ NeedFlat = true;
switch (MI.getOpcode()) {
default: break;
@@ -530,23 +519,54 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::V_INTERP_MOV_F32:
NeedWQM = true;
break;
-
}
}
}
- if (NeedM0) {
- MachineBasicBlock &MBB = MF.front();
- // Initialize M0 to a value that won't cause LDS access to be discarded
- // due to offset clamping
- InitM0ForLDS(MBB.getFirstNonPHI());
- }
-
if (NeedWQM && MFI->getShaderType() == ShaderType::PIXEL) {
MachineBasicBlock &MBB = MF.front();
BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
AMDGPU::EXEC).addReg(AMDGPU::EXEC);
}
+ // FIXME: This seems inappropriate to do here.
+ if (NeedFlat && MFI->IsKernel) {
+ // Insert the prologue initializing the SGPRs pointing to the scratch space
+ // for flat accesses.
+ const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+
+ // TODO: What to use with function calls?
+
+ // FIXME: This is reporting stack size that is used in a scratch buffer
+ // rather than registers as well.
+ uint64_t StackSizeBytes = FrameInfo->getStackSize();
+
+ int IndirectBegin
+ = static_cast<const AMDGPUInstrInfo*>(TII)->getIndirectIndexBegin(MF);
+ // Convert register index to 256-byte unit.
+ uint64_t StackOffset = IndirectBegin < 0 ? 0 : (4 * IndirectBegin / 256);
+
+ assert((StackSizeBytes < 0xffff) && StackOffset < 0xffff &&
+ "Stack limits should be smaller than 16-bits");
+
+ // Initialize the flat scratch register pair.
+ // TODO: Can we use one s_mov_b64 here?
+
+ // Offset is in units of 256-bytes.
+ MachineBasicBlock &MBB = MF.front();
+ DebugLoc NoDL;
+ MachineBasicBlock::iterator Start = MBB.getFirstNonPHI();
+ const MCInstrDesc &SMovK = TII->get(AMDGPU::S_MOVK_I32);
+
+ assert(isInt<16>(StackOffset) && isInt<16>(StackSizeBytes));
+
+ BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_LO)
+ .addImm(StackOffset);
+
+ // Documentation says size is "per-thread scratch size in bytes"
+ BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_HI)
+ .addImm(StackSizeBytes);
+ }
+
return true;
}
diff --git a/lib/Target/R600/SILowerI1Copies.cpp b/lib/Target/R600/SILowerI1Copies.cpp
index db19235995be..67421e231d8d 100644
--- a/lib/Target/R600/SILowerI1Copies.cpp
+++ b/lib/Target/R600/SILowerI1Copies.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "si-i1-copies"
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -39,14 +40,14 @@ public:
initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
- virtual const char *getPassName() const override {
- return "SI Lower il Copies";
+ const char *getPassName() const override {
+ return "SI Lower i1 Copies";
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineDominatorTree>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -55,10 +56,10 @@ public:
} // End anonymous namespace.
INITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE,
- "SI Lower il Copies", false, false)
+ "SI Lower i1 Copies", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(SILowerI1Copies, DEBUG_TYPE,
- "SI Lower il Copies", false, false)
+ "SI Lower i1 Copies", false, false)
char SILowerI1Copies::ID = 0;
@@ -70,9 +71,9 @@ FunctionPass *llvm::createSILowerI1CopiesPass() {
bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
- MF.getTarget().getInstrInfo());
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
std::vector<unsigned> I1Defs;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
@@ -84,71 +85,67 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
Next = std::next(I);
MachineInstr &MI = *I;
- if (MI.getOpcode() == AMDGPU::V_MOV_I1) {
- I1Defs.push_back(MI.getOperand(0).getReg());
- MI.setDesc(TII->get(AMDGPU::V_MOV_B32_e32));
+ if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF) {
+ unsigned Reg = MI.getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ if (RC == &AMDGPU::VReg_1RegClass)
+ MRI.setRegClass(Reg, &AMDGPU::SReg_64RegClass);
continue;
}
- if (MI.getOpcode() == AMDGPU::V_AND_I1) {
- I1Defs.push_back(MI.getOperand(0).getReg());
- MI.setDesc(TII->get(AMDGPU::V_AND_B32_e32));
+ if (MI.getOpcode() != AMDGPU::COPY)
continue;
- }
- if (MI.getOpcode() == AMDGPU::V_OR_I1) {
- I1Defs.push_back(MI.getOperand(0).getReg());
- MI.setDesc(TII->get(AMDGPU::V_OR_B32_e32));
- continue;
- }
+ const MachineOperand &Dst = MI.getOperand(0);
+ const MachineOperand &Src = MI.getOperand(1);
- if (MI.getOpcode() == AMDGPU::V_XOR_I1) {
- I1Defs.push_back(MI.getOperand(0).getReg());
- MI.setDesc(TII->get(AMDGPU::V_XOR_B32_e32));
+ if (!TargetRegisterInfo::isVirtualRegister(Src.getReg()) ||
+ !TargetRegisterInfo::isVirtualRegister(Dst.getReg()))
continue;
- }
-
- if (MI.getOpcode() != AMDGPU::COPY ||
- !TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()) ||
- !TargetRegisterInfo::isVirtualRegister(MI.getOperand(1).getReg()))
- continue;
-
- const TargetRegisterClass *DstRC =
- MRI.getRegClass(MI.getOperand(0).getReg());
- const TargetRegisterClass *SrcRC =
- MRI.getRegClass(MI.getOperand(1).getReg());
+ const TargetRegisterClass *DstRC = MRI.getRegClass(Dst.getReg());
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(Src.getReg());
if (DstRC == &AMDGPU::VReg_1RegClass &&
TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) {
- I1Defs.push_back(MI.getOperand(0).getReg());
- BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CNDMASK_B32_e64))
- .addOperand(MI.getOperand(0))
- .addImm(0)
- .addImm(-1)
- .addOperand(MI.getOperand(1))
- .addImm(0)
- .addImm(0)
- .addImm(0)
- .addImm(0);
+ I1Defs.push_back(Dst.getReg());
+ DebugLoc DL = MI.getDebugLoc();
+
+ MachineInstr *DefInst = MRI.getUniqueVRegDef(Src.getReg());
+ if (DefInst->getOpcode() == AMDGPU::S_MOV_B64) {
+ if (DefInst->getOperand(1).isImm()) {
+ I1Defs.push_back(Dst.getReg());
+
+ int64_t Val = DefInst->getOperand(1).getImm();
+ assert(Val == 0 || Val == -1);
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_MOV_B32_e32))
+ .addOperand(Dst)
+ .addImm(Val);
+ MI.eraseFromParent();
+ continue;
+ }
+ }
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64))
+ .addOperand(Dst)
+ .addImm(0)
+ .addImm(-1)
+ .addOperand(Src);
MI.eraseFromParent();
} else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
SrcRC == &AMDGPU::VReg_1RegClass) {
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64))
- .addOperand(MI.getOperand(0))
- .addImm(0)
- .addOperand(MI.getOperand(1))
- .addImm(0)
- .addImm(0)
- .addImm(0)
- .addImm(0);
+ .addOperand(Dst)
+ .addOperand(Src)
+ .addImm(0);
MI.eraseFromParent();
}
}
}
for (unsigned Reg : I1Defs)
- MRI.setRegClass(Reg, &AMDGPU::VReg_32RegClass);
+ MRI.setRegClass(Reg, &AMDGPU::VGPR_32RegClass);
return false;
}
diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp
index c53a7e10d548..198dd568374e 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.cpp
+++ b/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -10,8 +10,10 @@
#include "SIMachineFunctionInfo.h"
+#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
@@ -26,72 +28,50 @@ void SIMachineFunctionInfo::anchor() {}
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
+ TIDReg(AMDGPU::NoRegister),
+ HasSpilledVGPRs(false),
PSInputAddr(0),
- SpillTracker(),
- NumUserSGPRs(0) { }
+ NumUserSGPRs(0),
+ LDSWaveSpillSize(0) { }
-static unsigned createLaneVGPR(MachineRegisterInfo &MRI, MachineFunction *MF) {
- unsigned VGPR = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
+ MachineFunction *MF,
+ unsigned FrameIndex,
+ unsigned SubIdx) {
+ const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+ const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo*>(
+ MF->getTarget().getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
+ Offset += SubIdx * 4;
- // We need to add this register as live out for the function, in order to
- // have the live range calculated directly.
- //
- // When register spilling begins, we have already calculated the live
- // live intervals for all the registers. Since we are spilling SGPRs to
- // VGPRs, we need to update the Lane VGPR's live interval every time we
- // spill or restore a register.
- //
- // Unfortunately, there is no good way to update the live interval as
- // the TargetInstrInfo callbacks for spilling and restoring don't give
- // us access to the live interval information.
- //
- // We are lucky, though, because the InlineSpiller calls
- // LiveRangeEdit::calculateRegClassAndHint() which iterates through
- // all the new register that have been created when restoring a register
- // and calls LiveIntervals::getInterval(), which creates and computes
- // the live interval for the newly created register. However, once this
- // live intervals is created, it doesn't change and since we usually reuse
- // the Lane VGPR multiple times, this means any uses after the first aren't
- // added to the live interval.
- //
- // To work around this, we add Lane VGPRs to the functions live out list,
- // so that we can guarantee its live range will cover all of its uses.
+ unsigned LaneVGPRIdx = Offset / (64 * 4);
+ unsigned Lane = (Offset / 4) % 64;
- for (MachineBasicBlock &MBB : *MF) {
- if (MBB.back().getOpcode() == AMDGPU::S_ENDPGM) {
- MBB.back().addOperand(*MF, MachineOperand::CreateReg(VGPR, false, true));
- return VGPR;
- }
- }
+ struct SpilledReg Spill;
- LLVMContext &Ctx = MF->getFunction()->getContext();
- Ctx.emitError("Could not find S_ENDPGM instruction.");
+ if (!LaneVGPRs.count(LaneVGPRIdx)) {
+ unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
+ LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
+ MRI.setPhysRegUsed(LaneVGPR);
- return VGPR;
-}
-
-unsigned SIMachineFunctionInfo::RegSpillTracker::reserveLanes(
- MachineRegisterInfo &MRI, MachineFunction *MF, unsigned NumRegs) {
- unsigned StartLane = CurrentLane;
- CurrentLane += NumRegs;
- if (!LaneVGPR) {
- LaneVGPR = createLaneVGPR(MRI, MF);
- } else {
- if (CurrentLane >= MAX_LANES) {
- StartLane = CurrentLane = 0;
- LaneVGPR = createLaneVGPR(MRI, MF);
+ // Add this register as live-in to all blocks to avoid machine verifer
+ // complaining about use of an undefined physical register.
+ for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
+ BI != BE; ++BI) {
+ BI->addLiveIn(LaneVGPR);
}
}
- return StartLane;
-}
-void SIMachineFunctionInfo::RegSpillTracker::addSpilledReg(unsigned FrameIndex,
- unsigned Reg,
- int Lane) {
- SpilledRegisters[FrameIndex] = SpilledReg(Reg, Lane);
+ Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
+ Spill.Lane = Lane;
+ return Spill;
}
-const SIMachineFunctionInfo::SpilledReg&
-SIMachineFunctionInfo::RegSpillTracker::getSpilledReg(unsigned FrameIndex) {
- return SpilledRegisters[FrameIndex];
+unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
+ const MachineFunction &MF) const {
+ const AMDGPUSubtarget &ST = MF.getTarget().getSubtarget<AMDGPUSubtarget>();
+ // FIXME: We should get this information from kernel attributes if it
+ // is available.
+ return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();
}
diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h
index 9684d285cec2..71852717d7e6 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.h
+++ b/lib/Target/R600/SIMachineFunctionInfo.h
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
-#ifndef SIMACHINEFUNCTIONINFO_H_
-#define SIMACHINEFUNCTIONINFO_H_
+#ifndef LLVM_LIB_TARGET_R600_SIMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_R600_SIMACHINEFUNCTIONINFO_H
#include "AMDGPUMachineFunction.h"
+#include "SIRegisterInfo.h"
#include <map>
namespace llvm {
@@ -26,6 +27,10 @@ class MachineRegisterInfo;
/// tells the hardware which interpolation parameters to load.
class SIMachineFunctionInfo : public AMDGPUMachineFunction {
void anchor() override;
+
+ unsigned TIDReg;
+ bool HasSpilledVGPRs;
+
public:
struct SpilledReg {
@@ -36,33 +41,25 @@ public:
bool hasLane() { return Lane != -1;}
};
- struct RegSpillTracker {
- private:
- unsigned CurrentLane;
- std::map<unsigned, SpilledReg> SpilledRegisters;
- public:
- unsigned LaneVGPR;
- RegSpillTracker() : CurrentLane(0), SpilledRegisters(), LaneVGPR(0) { }
- /// \p NumRegs The number of consecutive registers what need to be spilled.
- /// This function will ensure that all registers are stored in
- /// the same VGPR.
- /// \returns The lane to be used for storing the first register.
- unsigned reserveLanes(MachineRegisterInfo &MRI, MachineFunction *MF,
- unsigned NumRegs = 1);
- void addSpilledReg(unsigned FrameIndex, unsigned Reg, int Lane = -1);
- const SpilledReg& getSpilledReg(unsigned FrameIndex);
- bool programSpillsRegisters() { return !SpilledRegisters.empty(); }
- };
-
// SIMachineFunctionInfo definition
SIMachineFunctionInfo(const MachineFunction &MF);
+ SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex,
+ unsigned SubIdx);
unsigned PSInputAddr;
- struct RegSpillTracker SpillTracker;
unsigned NumUserSGPRs;
+ std::map<unsigned, unsigned> LaneVGPRs;
+ unsigned LDSWaveSpillSize;
+ bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
+ unsigned getTIDReg() const { return TIDReg; };
+ void setTIDReg(unsigned Reg) { TIDReg = Reg; }
+ bool hasSpilledVGPRs() const { return HasSpilledVGPRs; }
+ void setHasSpilledVGPRs(bool Spill = true) { HasSpilledVGPRs = Spill; }
+
+ unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
};
} // End namespace llvm
-#endif //_SIMACHINEFUNCTIONINFO_H_
+#endif
diff --git a/lib/Target/R600/SIPrepareScratchRegs.cpp b/lib/Target/R600/SIPrepareScratchRegs.cpp
new file mode 100644
index 000000000000..f0e7edec6b48
--- /dev/null
+++ b/lib/Target/R600/SIPrepareScratchRegs.cpp
@@ -0,0 +1,196 @@
+//===-- SIPrepareScratchRegs.cpp - Use predicates for control flow --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// This pass loads scratch pointer and scratch offset into a register or a
+/// frame index which can be used anywhere in the program. These values will
+/// be used for spilling VGPRs.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIDefines.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+
+using namespace llvm;
+
+namespace {
+
+class SIPrepareScratchRegs : public MachineFunctionPass {
+
+private:
+ static char ID;
+
+public:
+ SIPrepareScratchRegs() : MachineFunctionPass(ID) { }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const char *getPassName() const override {
+ return "SI prepare scratch registers";
+ }
+
+};
+
+} // End anonymous namespace
+
+char SIPrepareScratchRegs::ID = 0;
+
+FunctionPass *llvm::createSIPrepareScratchRegs() {
+ return new SIPrepareScratchRegs();
+}
+
+bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const SIRegisterInfo *TRI = &TII->getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+ MachineBasicBlock *Entry = MF.begin();
+ MachineBasicBlock::iterator I = Entry->begin();
+ DebugLoc DL = I->getDebugLoc();
+
+ // FIXME: If we don't have enough VGPRs for SGPR spilling we will need to
+ // run this pass.
+ if (!MFI->hasSpilledVGPRs())
+ return false;
+
+ unsigned ScratchPtrPreloadReg =
+ TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
+ unsigned ScratchOffsetPreloadReg =
+ TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
+
+ if (!Entry->isLiveIn(ScratchPtrPreloadReg))
+ Entry->addLiveIn(ScratchPtrPreloadReg);
+
+ if (!Entry->isLiveIn(ScratchOffsetPreloadReg))
+ Entry->addLiveIn(ScratchOffsetPreloadReg);
+
+ // Load the scratch pointer
+ unsigned ScratchPtrReg =
+ TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass);
+ int ScratchPtrFI = -1;
+
+ if (ScratchPtrReg != AMDGPU::NoRegister) {
+ // Found an SGPR to use.
+ MRI.setPhysRegUsed(ScratchPtrReg);
+ BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B64), ScratchPtrReg)
+ .addReg(ScratchPtrPreloadReg);
+ } else {
+ // No SGPR is available, we must spill.
+ ScratchPtrFI = FrameInfo->CreateSpillStackObject(8, 4);
+ BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S64_SAVE))
+ .addReg(ScratchPtrPreloadReg)
+ .addFrameIndex(ScratchPtrFI);
+ }
+
+ // Load the scratch offset.
+ unsigned ScratchOffsetReg =
+ TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass);
+ int ScratchOffsetFI = ~0;
+
+ if (ScratchOffsetReg != AMDGPU::NoRegister) {
+ // Found an SGPR to use
+ MRI.setPhysRegUsed(ScratchOffsetReg);
+ BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B32), ScratchOffsetReg)
+ .addReg(ScratchOffsetPreloadReg);
+ } else {
+ // No SGPR is available, we must spill.
+ ScratchOffsetFI = FrameInfo->CreateSpillStackObject(4,4);
+ BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S32_SAVE))
+ .addReg(ScratchOffsetPreloadReg)
+ .addFrameIndex(ScratchOffsetFI);
+ }
+
+
+ // Now that we have the scratch pointer and offset values, we need to
+ // add them to all the SI_SPILL_V* instructions.
+
+ RegScavenger RS;
+ bool UseRegScavenger =
+ (ScratchPtrReg == AMDGPU::NoRegister ||
+ ScratchOffsetReg == AMDGPU::NoRegister);
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+ if (UseRegScavenger)
+ RS.enterBasicBlock(&MBB);
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ DebugLoc DL = MI.getDebugLoc();
+ switch(MI.getOpcode()) {
+ default: break;;
+ case AMDGPU::SI_SPILL_V512_SAVE:
+ case AMDGPU::SI_SPILL_V256_SAVE:
+ case AMDGPU::SI_SPILL_V128_SAVE:
+ case AMDGPU::SI_SPILL_V96_SAVE:
+ case AMDGPU::SI_SPILL_V64_SAVE:
+ case AMDGPU::SI_SPILL_V32_SAVE:
+ case AMDGPU::SI_SPILL_V32_RESTORE:
+ case AMDGPU::SI_SPILL_V64_RESTORE:
+ case AMDGPU::SI_SPILL_V128_RESTORE:
+ case AMDGPU::SI_SPILL_V256_RESTORE:
+ case AMDGPU::SI_SPILL_V512_RESTORE:
+
+ // Scratch Pointer
+ if (ScratchPtrReg == AMDGPU::NoRegister) {
+ ScratchPtrReg = RS.scavengeRegister(&AMDGPU::SGPR_64RegClass, 0);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S64_RESTORE),
+ ScratchPtrReg)
+ .addFrameIndex(ScratchPtrFI)
+ .addReg(AMDGPU::NoRegister)
+ .addReg(AMDGPU::NoRegister);
+ } else if (!MBB.isLiveIn(ScratchPtrReg)) {
+ MBB.addLiveIn(ScratchPtrReg);
+ }
+
+ if (ScratchOffsetReg == AMDGPU::NoRegister) {
+ ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
+ ScratchOffsetReg)
+ .addFrameIndex(ScratchOffsetFI)
+ .addReg(AMDGPU::NoRegister)
+ .addReg(AMDGPU::NoRegister);
+ } else if (!MBB.isLiveIn(ScratchOffsetReg)) {
+ MBB.addLiveIn(ScratchOffsetReg);
+ }
+
+ if (ScratchPtrReg == AMDGPU::NoRegister ||
+ ScratchOffsetReg == AMDGPU::NoRegister) {
+ LLVMContext &Ctx = MF.getFunction()->getContext();
+ Ctx.emitError("ran out of SGPRs for spilling VGPRs");
+ ScratchPtrReg = AMDGPU::SGPR0;
+ ScratchOffsetReg = AMDGPU::SGPR0;
+ }
+ MI.getOperand(2).setReg(ScratchPtrReg);
+ MI.getOperand(3).setReg(ScratchOffsetReg);
+
+ break;
+ }
+ if (UseRegScavenger)
+ RS.forward();
+ }
+ }
+ return true;
+}
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index 2a9a2ac5dd61..f9feea470f15 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -20,7 +20,10 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st)
@@ -30,7 +33,21 @@ SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st)
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(AMDGPU::EXEC);
+
+ // EXEC_LO and EXEC_HI could be allocated and used as regular register,
+ // but this seems likely to result in bugs, so I'm marking them as reserved.
+ Reserved.set(AMDGPU::EXEC_LO);
+ Reserved.set(AMDGPU::EXEC_HI);
+
Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
+ Reserved.set(AMDGPU::FLAT_SCR);
+ Reserved.set(AMDGPU::FLAT_SCR_LO);
+ Reserved.set(AMDGPU::FLAT_SCR_HI);
+
+ // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs
+ Reserved.set(AMDGPU::VGPR255);
+ Reserved.set(AMDGPU::VGPR254);
+
return Reserved;
}
@@ -43,23 +60,238 @@ bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const
return Fn.getFrameInfo()->hasStackObjects();
}
+static unsigned getNumSubRegsForSpillOp(unsigned Op) {
+
+ switch (Op) {
+ case AMDGPU::SI_SPILL_S512_SAVE:
+ case AMDGPU::SI_SPILL_S512_RESTORE:
+ case AMDGPU::SI_SPILL_V512_SAVE:
+ case AMDGPU::SI_SPILL_V512_RESTORE:
+ return 16;
+ case AMDGPU::SI_SPILL_S256_SAVE:
+ case AMDGPU::SI_SPILL_S256_RESTORE:
+ case AMDGPU::SI_SPILL_V256_SAVE:
+ case AMDGPU::SI_SPILL_V256_RESTORE:
+ return 8;
+ case AMDGPU::SI_SPILL_S128_SAVE:
+ case AMDGPU::SI_SPILL_S128_RESTORE:
+ case AMDGPU::SI_SPILL_V128_SAVE:
+ case AMDGPU::SI_SPILL_V128_RESTORE:
+ return 4;
+ case AMDGPU::SI_SPILL_V96_SAVE:
+ case AMDGPU::SI_SPILL_V96_RESTORE:
+ return 3;
+ case AMDGPU::SI_SPILL_S64_SAVE:
+ case AMDGPU::SI_SPILL_S64_RESTORE:
+ case AMDGPU::SI_SPILL_V64_SAVE:
+ case AMDGPU::SI_SPILL_V64_RESTORE:
+ return 2;
+ case AMDGPU::SI_SPILL_S32_SAVE:
+ case AMDGPU::SI_SPILL_S32_RESTORE:
+ case AMDGPU::SI_SPILL_V32_SAVE:
+ case AMDGPU::SI_SPILL_V32_RESTORE:
+ return 1;
+ default: llvm_unreachable("Invalid spill opcode");
+ }
+}
+
+void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
+ unsigned LoadStoreOp,
+ unsigned Value,
+ unsigned ScratchPtr,
+ unsigned ScratchOffset,
+ int64_t Offset,
+ RegScavenger *RS) const {
+
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
+ MachineBasicBlock *MBB = MI->getParent();
+ const MachineFunction *MF = MI->getParent()->getParent();
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ DebugLoc DL = MI->getDebugLoc();
+ bool IsLoad = TII->get(LoadStoreOp).mayLoad();
+
+ bool RanOutOfSGPRs = false;
+ unsigned SOffset = ScratchOffset;
+
+ unsigned RsrcReg = RS->scavengeRegister(&AMDGPU::SReg_128RegClass, MI, 0);
+ if (RsrcReg == AMDGPU::NoRegister) {
+ RanOutOfSGPRs = true;
+ RsrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
+ }
+
+ unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+ unsigned Size = NumSubRegs * 4;
+
+ uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
+ 0xffffffff; // Size
+
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B64),
+ getSubReg(RsrcReg, AMDGPU::sub0_sub1))
+ .addReg(ScratchPtr)
+ .addReg(RsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
+ getSubReg(RsrcReg, AMDGPU::sub2))
+ .addImm(Rsrc & 0xffffffff)
+ .addReg(RsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
+ getSubReg(RsrcReg, AMDGPU::sub3))
+ .addImm(Rsrc >> 32)
+ .addReg(RsrcReg, RegState::ImplicitDefine);
+
+ if (!isUInt<12>(Offset + Size)) {
+ SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
+ if (SOffset == AMDGPU::NoRegister) {
+ RanOutOfSGPRs = true;
+ SOffset = AMDGPU::SGPR0;
+ }
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
+ .addReg(ScratchOffset)
+ .addImm(Offset);
+ Offset = 0;
+ }
+
+ if (RanOutOfSGPRs)
+ Ctx.emitError("Ran out of SGPRs for spilling VGPRS");
+
+ for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) {
+ unsigned SubReg = NumSubRegs > 1 ?
+ getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) :
+ Value;
+ bool IsKill = (i == e - 1);
+
+ BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
+ .addReg(SubReg, getDefRegState(IsLoad))
+ .addReg(RsrcReg, getKillRegState(IsKill))
+ .addImm(Offset)
+ .addReg(SOffset, getKillRegState(IsKill))
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .addReg(Value, RegState::Implicit | getDefRegState(IsLoad));
+ }
+}
+
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
MachineFunction *MF = MI->getParent()->getParent();
+ MachineBasicBlock *MBB = MI->getParent();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo *FrameInfo = MF->getFrameInfo();
const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
+ DebugLoc DL = MI->getDebugLoc();
+
MachineOperand &FIOp = MI->getOperand(FIOperandNum);
int Index = MI->getOperand(FIOperandNum).getIndex();
- int64_t Offset = FrameInfo->getObjectOffset(Index);
- FIOp.ChangeToImmediate(Offset);
- if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) {
- unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VReg_32RegClass, MI, SPAdj);
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
- .addImm(Offset);
- FIOp.ChangeToRegister(TmpReg, false);
+ switch (MI->getOpcode()) {
+ // SGPR register spill
+ case AMDGPU::SI_SPILL_S512_SAVE:
+ case AMDGPU::SI_SPILL_S256_SAVE:
+ case AMDGPU::SI_SPILL_S128_SAVE:
+ case AMDGPU::SI_SPILL_S64_SAVE:
+ case AMDGPU::SI_SPILL_S32_SAVE: {
+ unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+
+ for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
+ unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
+ &AMDGPU::SGPR_32RegClass, i);
+ struct SIMachineFunctionInfo::SpilledReg Spill =
+ MFI->getSpilledReg(MF, Index, i);
+
+ if (Spill.VGPR == AMDGPU::NoRegister) {
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ Ctx.emitError("Ran out of VGPRs for spilling SGPR");
+ }
+
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
+ .addReg(SubReg)
+ .addImm(Spill.Lane);
+
+ }
+ MI->eraseFromParent();
+ break;
+ }
+
+ // SGPR register restore
+ case AMDGPU::SI_SPILL_S512_RESTORE:
+ case AMDGPU::SI_SPILL_S256_RESTORE:
+ case AMDGPU::SI_SPILL_S128_RESTORE:
+ case AMDGPU::SI_SPILL_S64_RESTORE:
+ case AMDGPU::SI_SPILL_S32_RESTORE: {
+ unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+
+ for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
+ unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
+ &AMDGPU::SGPR_32RegClass, i);
+ bool isM0 = SubReg == AMDGPU::M0;
+ struct SIMachineFunctionInfo::SpilledReg Spill =
+ MFI->getSpilledReg(MF, Index, i);
+
+ if (Spill.VGPR == AMDGPU::NoRegister) {
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ Ctx.emitError("Ran out of VGPRs for spilling SGPR");
+ }
+
+ if (isM0) {
+ SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
+ }
+
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
+ .addReg(Spill.VGPR)
+ .addImm(Spill.Lane)
+ .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
+ if (isM0) {
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(SubReg);
+ }
+ }
+ TII->insertNOPs(MI, 3);
+ MI->eraseFromParent();
+ break;
+ }
+
+ // VGPR register spill
+ case AMDGPU::SI_SPILL_V512_SAVE:
+ case AMDGPU::SI_SPILL_V256_SAVE:
+ case AMDGPU::SI_SPILL_V128_SAVE:
+ case AMDGPU::SI_SPILL_V96_SAVE:
+ case AMDGPU::SI_SPILL_V64_SAVE:
+ case AMDGPU::SI_SPILL_V32_SAVE:
+ buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
+ TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
+ TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
+ TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
+ FrameInfo->getObjectOffset(Index), RS);
+ MI->eraseFromParent();
+ break;
+ case AMDGPU::SI_SPILL_V32_RESTORE:
+ case AMDGPU::SI_SPILL_V64_RESTORE:
+ case AMDGPU::SI_SPILL_V128_RESTORE:
+ case AMDGPU::SI_SPILL_V256_RESTORE:
+ case AMDGPU::SI_SPILL_V512_RESTORE: {
+ buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
+ TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
+ TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
+ TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
+ FrameInfo->getObjectOffset(Index), RS);
+ MI->eraseFromParent();
+ break;
+ }
+
+ default: {
+ int64_t Offset = FrameInfo->getObjectOffset(Index);
+ FIOp.ChangeToImmediate(Offset);
+ if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) {
+ unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, SPAdj);
+ BuildMI(*MBB, MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+ .addImm(Offset);
+ FIOp.ChangeToRegister(TmpReg, false);
+ }
+ }
}
}
@@ -67,7 +299,7 @@ const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
MVT VT) const {
switch(VT.SimpleTy) {
default:
- case MVT::i32: return &AMDGPU::VReg_32RegClass;
+ case MVT::i32: return &AMDGPU::VGPR_32RegClass;
}
}
@@ -78,13 +310,17 @@ unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const {
const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
assert(!TargetRegisterInfo::isVirtualRegister(Reg));
- const TargetRegisterClass *BaseClasses[] = {
- &AMDGPU::VReg_32RegClass,
+ static const TargetRegisterClass *BaseClasses[] = {
+ &AMDGPU::VGPR_32RegClass,
&AMDGPU::SReg_32RegClass,
&AMDGPU::VReg_64RegClass,
&AMDGPU::SReg_64RegClass,
+ &AMDGPU::VReg_96RegClass,
+ &AMDGPU::VReg_128RegClass,
&AMDGPU::SReg_128RegClass,
- &AMDGPU::SReg_256RegClass
+ &AMDGPU::VReg_256RegClass,
+ &AMDGPU::SReg_256RegClass,
+ &AMDGPU::VReg_512RegClass
};
for (const TargetRegisterClass *BaseClass : BaseClasses) {
@@ -95,15 +331,8 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
return nullptr;
}
-bool SIRegisterInfo::isSGPRClass(const TargetRegisterClass *RC) const {
- if (!RC) {
- return false;
- }
- return !hasVGPRs(RC);
-}
-
bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
- return getCommonSubClass(&AMDGPU::VReg_32RegClass, RC) ||
+ return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) ||
getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) ||
getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) ||
getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) ||
@@ -118,7 +347,7 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
} else if (SRC == &AMDGPU::SCCRegRegClass) {
return &AMDGPU::VCCRegRegClass;
} else if (getCommonSubClass(SRC, &AMDGPU::SGPR_32RegClass)) {
- return &AMDGPU::VReg_32RegClass;
+ return &AMDGPU::VGPR_32RegClass;
} else if (getCommonSubClass(SRC, &AMDGPU::SGPR_64RegClass)) {
return &AMDGPU::VReg_64RegClass;
} else if (getCommonSubClass(SRC, &AMDGPU::SReg_128RegClass)) {
@@ -148,24 +377,61 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg,
const TargetRegisterClass *SubRC,
unsigned Channel) const {
+
+ switch (Reg) {
+ case AMDGPU::VCC:
+ switch(Channel) {
+ case 0: return AMDGPU::VCC_LO;
+ case 1: return AMDGPU::VCC_HI;
+ default: llvm_unreachable("Invalid SubIdx for VCC");
+ }
+
+ case AMDGPU::FLAT_SCR:
+ switch (Channel) {
+ case 0:
+ return AMDGPU::FLAT_SCR_LO;
+ case 1:
+ return AMDGPU::FLAT_SCR_HI;
+ default:
+ llvm_unreachable("Invalid SubIdx for FLAT_SCR");
+ }
+ break;
+
+ case AMDGPU::EXEC:
+ switch (Channel) {
+ case 0:
+ return AMDGPU::EXEC_LO;
+ case 1:
+ return AMDGPU::EXEC_HI;
+ default:
+ llvm_unreachable("Invalid SubIdx for EXEC");
+ }
+ break;
+ }
+
+ const TargetRegisterClass *RC = getPhysRegClass(Reg);
+ // 32-bit registers don't have sub-registers, so we can just return the
+ // Reg. We need to have this check here, because the calculation below
+ // using getHWRegIndex() will fail with special 32-bit registers like
+ // VCC_LO, VCC_HI, EXEC_LO, EXEC_HI and M0.
+ if (RC->getSize() == 4) {
+ assert(Channel == 0);
+ return Reg;
+ }
+
unsigned Index = getHWRegIndex(Reg);
return SubRC->getRegister(Index + Channel);
}
-bool SIRegisterInfo::regClassCanUseImmediate(int RCID) const {
- switch (RCID) {
- default: return false;
- case AMDGPU::SSrc_32RegClassID:
- case AMDGPU::SSrc_64RegClassID:
- case AMDGPU::VSrc_32RegClassID:
- case AMDGPU::VSrc_64RegClassID:
- return true;
- }
+bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
+ return OpType == AMDGPU::OPERAND_REG_IMM32;
}
-bool SIRegisterInfo::regClassCanUseImmediate(
- const TargetRegisterClass *RC) const {
- return regClassCanUseImmediate(RC->getID());
+bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
+ if (opCanUseLiteralConstant(OpType))
+ return true;
+
+ return OpType == AMDGPU::OPERAND_REG_INLINE_C;
}
unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
@@ -183,6 +449,29 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4);
case SIRegisterInfo::SCRATCH_PTR:
return AMDGPU::SGPR2_SGPR3;
+ case SIRegisterInfo::INPUT_PTR:
+ return AMDGPU::SGPR0_SGPR1;
+ case SIRegisterInfo::TIDIG_X:
+ return AMDGPU::VGPR0;
+ case SIRegisterInfo::TIDIG_Y:
+ return AMDGPU::VGPR1;
+ case SIRegisterInfo::TIDIG_Z:
+ return AMDGPU::VGPR2;
}
llvm_unreachable("unexpected preloaded value type");
}
+
+/// \brief Returns a register that is not used at any point in the function.
+/// If all registers are used, then this function will return
+// AMDGPU::NoRegister.
+unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
+ const TargetRegisterClass *RC) const {
+
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I) {
+ if (!MRI.isPhysRegUsed(*I))
+ return *I;
+ }
+ return AMDGPU::NoRegister;
+}
+
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
index 5d0235c0f427..d14212c2b104 100644
--- a/lib/Target/R600/SIRegisterInfo.h
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
-#ifndef SIREGISTERINFO_H_
-#define SIREGISTERINFO_H_
+#ifndef LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
+#define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
#include "AMDGPURegisterInfo.h"
@@ -42,11 +42,24 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
unsigned getHWRegIndex(unsigned Reg) const override;
/// \brief Return the 'base' register class for this register.
- /// e.g. SGPR0 => SReg_32, VGPR => VReg_32 SGPR0_SGPR1 -> SReg_32, etc.
+ /// e.g. SGPR0 => SReg_32, VGPR => VGPR_32 SGPR0_SGPR1 -> SReg_32, etc.
const TargetRegisterClass *getPhysRegClass(unsigned Reg) const;
/// \returns true if this class contains only SGPR registers
- bool isSGPRClass(const TargetRegisterClass *RC) const;
+ bool isSGPRClass(const TargetRegisterClass *RC) const {
+ if (!RC)
+ return false;
+
+ return !hasVGPRs(RC);
+ }
+
+ /// \returns true if this class ID contains only SGPR registers
+ bool isSGPRClassID(unsigned RCID) const {
+ if (static_cast<int>(RCID) == -1)
+ return false;
+
+ return isSGPRClass(getRegClass(RCID));
+ }
/// \returns true if this class contains VGPR registers.
bool hasVGPRs(const TargetRegisterClass *RC) const;
@@ -67,28 +80,41 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
unsigned getPhysRegSubReg(unsigned Reg, const TargetRegisterClass *SubRC,
unsigned Channel) const;
- /// \returns True if operands defined with this register class can accept
- /// inline immediates.
- bool regClassCanUseImmediate(int RCID) const;
+ /// \returns True if operands defined with this operand type can accept
+ /// a literal constant (i.e. any 32-bit immediate).
+ bool opCanUseLiteralConstant(unsigned OpType) const;
- /// \returns True if operands defined with this register class can accept
- /// inline immediates.
- bool regClassCanUseImmediate(const TargetRegisterClass *RC) const;
+ /// \returns True if operands defined with this operand type can accept
+ /// an inline constant. i.e. An integer value in the range (-16, 64) or
+ /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
+ bool opCanUseInlineConstant(unsigned OpType) const;
enum PreloadedValue {
TGID_X,
TGID_Y,
TGID_Z,
SCRATCH_WAVE_OFFSET,
- SCRATCH_PTR
+ SCRATCH_PTR,
+ INPUT_PTR,
+ TIDIG_X,
+ TIDIG_Y,
+ TIDIG_Z
};
/// \brief Returns the physical register that \p Value is stored in.
unsigned getPreloadedValue(const MachineFunction &MF,
enum PreloadedValue Value) const;
+ unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
+ const TargetRegisterClass *RC) const;
+
+private:
+ void buildScratchLoadStore(MachineBasicBlock::iterator MI,
+ unsigned LoadStoreOp, unsigned Value,
+ unsigned ScratchPtr, unsigned ScratchOffset,
+ int64_t Offset, RegScavenger *RS) const;
};
} // End namespace llvm
-#endif // SIREGISTERINFO_H_
+#endif
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 8974b6300625..1a1efb0c89a9 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -27,10 +27,28 @@ def VCC : RegisterWithSubRegs<"VCC", [VCC_LO, VCC_HI]> {
let HWEncoding = 106;
}
-def EXEC : SIReg<"EXEC", 126>;
+def EXEC_LO : SIReg<"exec_lo", 126>;
+def EXEC_HI : SIReg<"exec_hi", 127>;
+
+def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [sub0, sub1];
+ let HWEncoding = 126;
+}
+
def SCC : SIReg<"SCC", 253>;
def M0 : SIReg <"M0", 124>;
+def FLAT_SCR_LO : SIReg<"flat_scr_lo", 104>; // Offset in units of 256-bytes.
+def FLAT_SCR_HI : SIReg<"flat_scr_hi", 105>; // Size is the per-thread scratch size, in bytes.
+
+// Pair to indicate location of scratch space for flat accesses.
+def FLAT_SCR : RegisterWithSubRegs <"FLAT_SCR", [FLAT_SCR_LO, FLAT_SCR_HI]> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [sub0, sub1];
+ let HWEncoding = 104;
+}
+
// SGPR registers
foreach Index = 0-101 in {
def SGPR#Index : SIReg <"SGPR"#Index, Index>;
@@ -152,20 +170,24 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
//===----------------------------------------------------------------------===//
// Special register classes for predicates and the M0 register
-def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)>;
+def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
+ let CopyCost = -1; // Theoretically it is possible to read from SCC,
+ // but it should never be necessary.
+}
+
def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>;
def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
// Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
- (add SGPR_32, M0Reg, VCC_LO)
+ (add SGPR_32, M0Reg, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
>;
-def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64], 64, (add SGPR_64Regs)>;
+def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 64, (add SGPR_64Regs)>;
-def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64,
- (add SGPR_64Regs, VCCReg, EXECReg)
+def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 64,
+ (add SGPR_64, VCCReg, EXECReg, FLAT_SCR)
>;
def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8], 128, (add SGPR_128)>;
@@ -175,8 +197,6 @@ def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256
def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 512, (add SGPR_512)>;
// Register class for all vector registers (VGPRs + Interploation Registers)
-def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>;
-
def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
@@ -191,17 +211,49 @@ def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)>;
+class RegImmOperand <RegisterClass rc> : RegisterOperand<rc> {
+ let OperandNamespace = "AMDGPU";
+ let OperandType = "OPERAND_REG_IMM32";
+}
+
+class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> {
+ let OperandNamespace = "AMDGPU";
+ let OperandType = "OPERAND_REG_INLINE_C";
+}
+
+//===----------------------------------------------------------------------===//
+// SSrc_* Operands with an SGPR or a 32-bit immediate
+//===----------------------------------------------------------------------===//
+
+def SSrc_32 : RegImmOperand<SReg_32>;
+
+def SSrc_64 : RegImmOperand<SReg_64>;
+
//===----------------------------------------------------------------------===//
-// [SV]Src_(32|64) register classes, can have either an immediate or an register
+// SCSrc_* Operands with an SGPR or a inline constant
//===----------------------------------------------------------------------===//
-def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
+def SCSrc_32 : RegInlineOperand<SReg_32>;
-def SSrc_64 : RegisterClass<"AMDGPU", [i64, f64, i1], 64, (add SReg_64)>;
+//===----------------------------------------------------------------------===//
+// VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
+//===----------------------------------------------------------------------===//
+
+def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;
+
+def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
+
+def VSrc_32 : RegImmOperand<VS_32>;
+
+def VSrc_64 : RegImmOperand<VS_64>;
+
+//===----------------------------------------------------------------------===//
+// VCSrc_* Operands with an SGPR, VGPR or an inline constant
+//===----------------------------------------------------------------------===//
-def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
+def VCSrc_32 : RegInlineOperand<VS_32>;
-def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
+def VCSrc_64 : RegInlineOperand<VS_64>;
//===----------------------------------------------------------------------===//
// SGPR and VGPR register classes
diff --git a/lib/Target/R600/SISchedule.td b/lib/Target/R600/SISchedule.td
index 28b65b825855..9b1f676020bf 100644
--- a/lib/Target/R600/SISchedule.td
+++ b/lib/Target/R600/SISchedule.td
@@ -7,9 +7,85 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: This is just a place holder for now.
+// MachineModel definitions for Southern Islands (SI)
//
//===----------------------------------------------------------------------===//
+def WriteBranch : SchedWrite;
+def WriteExport : SchedWrite;
+def WriteLDS : SchedWrite;
+def WriteSALU : SchedWrite;
+def WriteSMEM : SchedWrite;
+def WriteVMEM : SchedWrite;
-def SI_Itin : ProcessorItineraries <[], [], []>;
+// Vector ALU instructions
+def Write32Bit : SchedWrite;
+def WriteQuarterRate32 : SchedWrite;
+
+def WriteFloatFMA : SchedWrite;
+
+def WriteDouble : SchedWrite;
+def WriteDoubleAdd : SchedWrite;
+
+def SIFullSpeedModel : SchedMachineModel;
+def SIQuarterSpeedModel : SchedMachineModel;
+
+// BufferSize = 0 means the processors are in-order.
+let BufferSize = 0 in {
+
+// XXX: Are the resource counts correct?
+def HWBranch : ProcResource<1>;
+def HWExport : ProcResource<7>; // Taken from S_WAITCNT
+def HWLGKM : ProcResource<31>; // Taken from S_WAITCNT
+def HWSALU : ProcResource<1>;
+def HWVMEM : ProcResource<15>; // Taken from S_WAITCNT
+def HWVALU : ProcResource<1>;
+
+}
+
+class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,
+ int latency> : WriteRes<write, resources> {
+ let Latency = latency;
+}
+
+class HWVALUWriteRes<SchedWrite write, int latency> :
+ HWWriteRes<write, [HWVALU], latency>;
+
+
+// The latency numbers are taken from AMD Accelerated Parallel Processing
+// guide. They may not be acurate.
+
+// The latency values are 1 / (operations / cycle) / 4.
+multiclass SICommonWriteRes {
+
+ def : HWWriteRes<WriteBranch, [HWBranch], 100>; // XXX: Guessed ???
+ def : HWWriteRes<WriteExport, [HWExport], 100>; // XXX: Guessed ???
+ def : HWWriteRes<WriteLDS, [HWLGKM], 32>; // 2 - 64
+ def : HWWriteRes<WriteSALU, [HWSALU], 1>;
+ def : HWWriteRes<WriteSMEM, [HWLGKM], 10>; // XXX: Guessed ???
+ def : HWWriteRes<WriteVMEM, [HWVMEM], 450>; // 300 - 600
+
+ def : HWVALUWriteRes<Write32Bit, 1>;
+ def : HWVALUWriteRes<WriteQuarterRate32, 4>;
+}
+
+
+let SchedModel = SIFullSpeedModel in {
+
+defm : SICommonWriteRes;
+
+def : HWVALUWriteRes<WriteFloatFMA, 1>;
+def : HWVALUWriteRes<WriteDouble, 4>;
+def : HWVALUWriteRes<WriteDoubleAdd, 2>;
+
+} // End SchedModel = SIFullSpeedModel
+
+let SchedModel = SIQuarterSpeedModel in {
+
+defm : SICommonWriteRes;
+
+def : HWVALUWriteRes<WriteFloatFMA, 16>;
+def : HWVALUWriteRes<WriteDouble, 16>;
+def : HWVALUWriteRes<WriteDoubleAdd, 8>;
+
+} // End SchedModel = SIQuarterSpeedModel
diff --git a/lib/Target/R600/SIShrinkInstructions.cpp b/lib/Target/R600/SIShrinkInstructions.cpp
index 745c4b65644d..f91d1177bbae 100644
--- a/lib/Target/R600/SIShrinkInstructions.cpp
+++ b/lib/Target/R600/SIShrinkInstructions.cpp
@@ -10,11 +10,13 @@
//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
@@ -24,6 +26,8 @@
STATISTIC(NumInstructionsShrunk,
"Number of 64-bit instruction reduced to 32-bit.");
+STATISTIC(NumLiteralConstantsFolded,
+ "Number of literal constants folded into 32-bit instructions.");
namespace llvm {
void initializeSIShrinkInstructionsPass(PassRegistry&);
@@ -41,13 +45,13 @@ public:
SIShrinkInstructions() : MachineFunctionPass(ID) {
}
- virtual bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
- virtual const char *getPassName() const override {
+ const char *getPassName() const override {
return "SI Shrink Instructions";
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -90,29 +94,83 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
const MachineOperand *Src1Mod =
TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
- if (Src1 && (!isVGPR(Src1, TRI, MRI) || Src1Mod->getImm() != 0))
+ if (Src1 && (!isVGPR(Src1, TRI, MRI) || (Src1Mod && Src1Mod->getImm() != 0)))
return false;
- // We don't need to check src0, all input types are legal, so just make
- // sure src0 isn't using any modifiers.
- const MachineOperand *Src0Mod =
- TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
- if (Src0Mod && Src0Mod->getImm() != 0)
+ // We don't need to check src0, all input types are legal, so just make sure
+ // src0 isn't using any modifiers.
+ if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
return false;
// Check output modifiers
- const MachineOperand *Omod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
- if (Omod && Omod->getImm() != 0)
+ if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
return false;
- const MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp);
- return !Clamp || Clamp->getImm() == 0;
+ if (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
+ return false;
+
+ return true;
+}
+
+/// \brief This function checks \p MI for operands defined by a move immediate
+/// instruction and then folds the literal constant into the instruction if it
+/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instruction
+/// and will only fold literal constants if we are still in SSA.
+static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
+ MachineRegisterInfo &MRI, bool TryToCommute = true) {
+
+ if (!MRI.isSSA())
+ return;
+
+ assert(TII->isVOP1(MI.getOpcode()) || TII->isVOP2(MI.getOpcode()) ||
+ TII->isVOPC(MI.getOpcode()));
+
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+
+ // Only one literal constant is allowed per instruction, so if src0 is a
+ // literal constant then we can't do any folding.
+ if (Src0->isImm() && TII->isLiteralConstant(*Src0))
+ return;
+
+
+ // Literal constants and SGPRs can only be used in Src0, so if Src0 is an
+ // SGPR, we cannot commute the instruction, so we can't fold any literal
+ // constants.
+ if (Src0->isReg() && !isVGPR(Src0, TRI, MRI))
+ return;
+
+ // Try to fold Src0
+ if (Src0->isReg()) {
+ unsigned Reg = Src0->getReg();
+ MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
+ if (Def && Def->isMoveImmediate()) {
+ MachineOperand &MovSrc = Def->getOperand(1);
+ bool ConstantFolded = false;
+
+ if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) {
+ Src0->ChangeToImmediate(MovSrc.getImm());
+ ConstantFolded = true;
+ }
+ if (ConstantFolded) {
+ if (MRI.use_empty(Reg))
+ Def->eraseFromParent();
+ ++NumLiteralConstantsFolded;
+ return;
+ }
+ }
+ }
+
+ // We have failed to fold src0, so commute the instruction and try again.
+ if (TryToCommute && MI.isCommutable() && TII->commuteInstruction(&MI))
+ foldImmediates(MI, TII, MRI, false);
+
}
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
- MF.getTarget().getInstrInfo());
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
const SIRegisterInfo &TRI = TII->getRegisterInfo();
std::vector<unsigned> I1Defs;
@@ -125,11 +183,23 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
Next = std::next(I);
MachineInstr &MI = *I;
+ // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
+ if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
+ const MachineOperand &Src = MI.getOperand(1);
+
+ if (Src.isImm()) {
+ if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src))
+ MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
+ }
+
+ continue;
+ }
+
if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
continue;
if (!canShrink(MI, TII, TRI, MRI)) {
- // Try commtuing the instruction and see if that enables us to shrink
+ // Try commuting the instruction and see if that enables us to shrink
// it.
if (!MI.isCommutable() || !TII->commuteInstruction(&MI) ||
!canShrink(MI, TII, TRI, MRI))
@@ -147,18 +217,17 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
unsigned DstReg = MI.getOperand(0).getReg();
if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
// VOPC instructions can only write to the VCC register. We can't
- // force them to use VCC here, because the register allocator
- // has trouble with sequences like this, which cause the allocator
- // to run out of registes if vreg0 and vreg1 belong to the VCCReg
- // register class:
+ // force them to use VCC here, because the register allocator has
+ // trouble with sequences like this, which cause the allocator to run
+ // out of registers if vreg0 and vreg1 belong to the VCCReg register
+ // class:
// vreg0 = VOPC;
// vreg1 = VOPC;
// S_AND_B64 vreg0, vreg1
//
- // So, instead of forcing the instruction to write to VCC, we provide a
- // hint to the register allocator to use VCC and then we
- // we will run this pass again after RA and shrink it if it outpus to
- // VCC.
+ // So, instead of forcing the instruction to write to VCC, we provide
+ // a hint to the register allocator to use VCC and then we we will run
+ // this pass again after RA and shrink it if it outputs to VCC.
MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
continue;
}
@@ -167,27 +236,28 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
}
// We can shrink this instruction
- DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << "\n";);
+ DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';);
- MachineInstrBuilder MIB =
+ MachineInstrBuilder Inst32 =
BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
// dst
- MIB.addOperand(MI.getOperand(0));
+ Inst32.addOperand(MI.getOperand(0));
- MIB.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
+ Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
const MachineOperand *Src1 =
TII->getNamedOperand(MI, AMDGPU::OpName::src1);
if (Src1)
- MIB.addOperand(*Src1);
-
- for (const MachineOperand &MO : MI.implicit_operands())
- MIB.addOperand(MO);
+ Inst32.addOperand(*Src1);
- DEBUG(dbgs() << "e32 MI = "; MI.dump(); dbgs() << "\n";);
++NumInstructionsShrunk;
MI.eraseFromParent();
+
+ foldImmediates(*Inst32, TII, MRI);
+ DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
+
+
}
}
return false;
diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp
index 367963aebb00..9318dc11d55d 100644
--- a/lib/Target/R600/SITypeRewriter.cpp
+++ b/lib/Target/R600/SITypeRewriter.cpp
@@ -87,7 +87,7 @@ void SITypeRewriter::visitLoadInst(LoadInst &I) {
Value *BitCast = Builder.CreateBitCast(Ptr,
PointerType::get(v4i32,PtrTy->getPointerAddressSpace()));
LoadInst *Load = Builder.CreateLoad(BitCast);
- SmallVector <std::pair<unsigned, MDNode*>, 8> MD;
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
I.getAllMetadataOtherThanDebugLoc(MD);
for (unsigned i = 0, e = MD.size(); i != e; ++i) {
Load->setMetadata(MD[i].first, MD[i].second);
diff --git a/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp b/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp
index f437564f4b84..d723d6e3e8b7 100644
--- a/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp
+++ b/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp
@@ -16,11 +16,15 @@
using namespace llvm;
-/// \brief The target for the AMDGPU backend
+/// \brief The target which suports all AMD GPUs. This will eventually
+/// be deprecated and there will be a R600 target and a GCN target.
Target llvm::TheAMDGPUTarget;
+/// \brief The target for GCN GPUs
+Target llvm::TheGCNTarget;
/// \brief Extern function to initialize the targets for the AMDGPU backend
extern "C" void LLVMInitializeR600TargetInfo() {
RegisterTarget<Triple::r600, false>
R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX");
+ RegisterTarget<Triple::amdgcn, false> GCN(TheGCNTarget, "amdgcn", "AMD GCN GPUs");
}
diff --git a/lib/Target/R600/VIInstrFormats.td b/lib/Target/R600/VIInstrFormats.td
new file mode 100644
index 000000000000..5285d18ced46
--- /dev/null
+++ b/lib/Target/R600/VIInstrFormats.td
@@ -0,0 +1,145 @@
+//===-- VIInstrFormats.td - VI Instruction Encodings ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// VI Instruction format definitions.
+//
+//===----------------------------------------------------------------------===//
+
+class DSe_vi <bits<8> op> : Enc64 {
+
+ bits<8> vdst;
+ bits<1> gds;
+ bits<8> addr;
+ bits<8> data0;
+ bits<8> data1;
+ bits<8> offset0;
+ bits<8> offset1;
+
+ let Inst{7-0} = offset0;
+ let Inst{15-8} = offset1;
+ let Inst{16} = gds;
+ let Inst{24-17} = op;
+ let Inst{31-26} = 0x36; //encoding
+ let Inst{39-32} = addr;
+ let Inst{47-40} = data0;
+ let Inst{55-48} = data1;
+ let Inst{63-56} = vdst;
+}
+
+class MUBUFe_vi <bits<7> op> : Enc64 {
+
+ bits<12> offset;
+ bits<1> offen;
+ bits<1> idxen;
+ bits<1> glc;
+ bits<1> lds;
+ bits<8> vaddr;
+ bits<8> vdata;
+ bits<7> srsrc;
+ bits<1> slc;
+ bits<1> tfe;
+ bits<8> soffset;
+
+ let Inst{11-0} = offset;
+ let Inst{12} = offen;
+ let Inst{13} = idxen;
+ let Inst{14} = glc;
+ let Inst{16} = lds;
+ let Inst{17} = slc;
+ let Inst{24-18} = op;
+ let Inst{31-26} = 0x38; //encoding
+ let Inst{39-32} = vaddr;
+ let Inst{47-40} = vdata;
+ let Inst{52-48} = srsrc{6-2};
+ let Inst{55} = tfe;
+ let Inst{63-56} = soffset;
+}
+
+class MTBUFe_vi <bits<4> op> : Enc64 {
+
+ bits<12> offset;
+ bits<1> offen;
+ bits<1> idxen;
+ bits<1> glc;
+ bits<4> dfmt;
+ bits<3> nfmt;
+ bits<8> vaddr;
+ bits<8> vdata;
+ bits<7> srsrc;
+ bits<1> slc;
+ bits<1> tfe;
+ bits<8> soffset;
+
+ let Inst{11-0} = offset;
+ let Inst{12} = offen;
+ let Inst{13} = idxen;
+ let Inst{14} = glc;
+ let Inst{18-15} = op;
+ let Inst{22-19} = dfmt;
+ let Inst{25-23} = nfmt;
+ let Inst{31-26} = 0x3a; //encoding
+ let Inst{39-32} = vaddr;
+ let Inst{47-40} = vdata;
+ let Inst{52-48} = srsrc{6-2};
+ let Inst{54} = slc;
+ let Inst{55} = tfe;
+ let Inst{63-56} = soffset;
+}
+
+class SMEMe_vi <bits<8> op, bit imm> : Enc64 {
+
+ bits<7> sbase;
+ bits<7> sdata;
+ bits<1> glc;
+ bits<20> offset;
+
+ let Inst{5-0} = sbase{6-1};
+ let Inst{12-6} = sdata;
+ let Inst{16} = glc;
+ let Inst{17} = imm;
+ let Inst{25-18} = op;
+ let Inst{31-26} = 0x30; //encoding
+ let Inst{51-32} = offset;
+}
+
+class VOP3e_vi <bits<10> op> : Enc64 {
+
+ bits<8> dst;
+ bits<2> src0_modifiers;
+ bits<9> src0;
+ bits<2> src1_modifiers;
+ bits<9> src1;
+ bits<2> src2_modifiers;
+ bits<9> src2;
+ bits<1> clamp;
+ bits<2> omod;
+
+ let Inst{7-0} = dst;
+ let Inst{8} = src0_modifiers{1};
+ let Inst{9} = src1_modifiers{1};
+ let Inst{10} = src2_modifiers{1};
+ let Inst{15} = clamp;
+ let Inst{25-16} = op;
+ let Inst{31-26} = 0x34; //encoding
+ let Inst{40-32} = src0;
+ let Inst{49-41} = src1;
+ let Inst{58-50} = src2;
+ let Inst{60-59} = omod;
+ let Inst{61} = src0_modifiers{0};
+ let Inst{62} = src1_modifiers{0};
+ let Inst{63} = src2_modifiers{0};
+}
+
+class EXPe_vi : EXPe {
+ let Inst{31-26} = 0x31; //encoding
+}
+
+class VINTRPe_vi <bits<2> op> : VINTRPe <op> {
+ let Inst{31-26} = 0x35; // encoding
+}
diff --git a/lib/Target/R600/VIInstructions.td b/lib/Target/R600/VIInstructions.td
new file mode 100644
index 000000000000..07cfa29ae12b
--- /dev/null
+++ b/lib/Target/R600/VIInstructions.td
@@ -0,0 +1,89 @@
+//===-- VIInstructions.td - VI Instruction Defintions ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Instruction definitions for VI and newer.
+//===----------------------------------------------------------------------===//
+
+let SubtargetPredicate = isVI in {
+
+def V_LDEXP_F32 : VOP3InstVI <0x288, "v_ldexp_f32", VOP_F32_F32_I32,
+ AMDGPUldexp
+>;
+def V_BFM_B32 : VOP3InstVI <0x293, "v_bfm_b32", VOP_I32_I32_I32, AMDGPUbfm>;
+def V_BCNT_U32_B32 : VOP3InstVI <0x28b, "v_bcnt_u32_b32", VOP_I32_I32_I32>;
+def V_MBCNT_LO_U32_B32 : VOP3InstVI <0x28c, "v_mbcnt_lo_u32_b32",
+ VOP_I32_I32_I32
+>;
+def V_MBCNT_HI_U32_B32 : VOP3InstVI <0x28d, "v_mbcnt_hi_u32_b32",
+ VOP_I32_I32_I32
+>;
+
+def V_CVT_PKRTZ_F16_F32 : VOP3InstVI <0x296, "v_cvt_pkrtz_f16_f32",
+ VOP_I32_F32_F32, int_SI_packf16
+>;
+
+defm BUFFER_LOAD_DWORD_VI : MUBUF_Load_Helper_vi <
+ 0x14, "buffer_load_dword", VGPR_32, i32, global_load
+>;
+
+defm BUFFER_LOAD_FORMAT_XYZW_VI : MUBUF_Load_Helper_vi <
+ 0x03, "buffer_load_format_xyzw", VReg_128
+>;
+
+} // End SubtargetPredicate = isVI
+
+//===----------------------------------------------------------------------===//
+// VOP2 Patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isVI] in {
+
+def : Pat <
+ (int_SI_tid),
+ (V_MBCNT_HI_U32_B32 0xffffffff,
+ (V_MBCNT_LO_U32_B32 0xffffffff, 0))
+>;
+
+//===----------------------------------------------------------------------===//
+// SMEM Patterns
+//===----------------------------------------------------------------------===//
+
+// 1. Offset as 8bit DWORD immediate
+def : Pat <
+ (SIload_constant v4i32:$sbase, IMM20bit:$offset),
+ (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
+>;
+
+//===----------------------------------------------------------------------===//
+// MUBUF Patterns
+//===----------------------------------------------------------------------===//
+
+// Offset in an 32Bit VGPR
+def : Pat <
+ (SIload_constant v4i32:$sbase, i32:$voff),
+ (BUFFER_LOAD_DWORD_VI_OFFEN $sbase, $voff, 0, 0, 0, 0, 0)
+>;
+
+// Offset in an 32Bit VGPR
+def : Pat <
+ (SIload_constant v4i32:$sbase, i32:$voff),
+ (BUFFER_LOAD_DWORD_VI_OFFEN $sbase, $voff, 0, 0, 0, 0, 0)
+>;
+
+/* int_SI_vs_load_input */
+def : Pat<
+ (SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
+ (BUFFER_LOAD_FORMAT_XYZW_VI_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
+>;
+
+defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_VI_OFFSET,
+ BUFFER_LOAD_DWORD_VI_OFFEN,
+ BUFFER_LOAD_DWORD_VI_IDXEN,
+ BUFFER_LOAD_DWORD_VI_BOTHEN>;
+
+} // End Predicates = [isVI]
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index a9aab86abdac..0fa56e66747d 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -95,44 +95,6 @@ something that reassoc doesn't think about yet.
//===---------------------------------------------------------------------===//
-This function: (derived from GCC PR19988)
-double foo(double x, double y) {
- return ((x + 0.1234 * y) * (x + -0.1234 * y));
-}
-
-compiles to:
-_foo:
- movapd %xmm1, %xmm2
- mulsd LCPI1_1(%rip), %xmm1
- mulsd LCPI1_0(%rip), %xmm2
- addsd %xmm0, %xmm1
- addsd %xmm0, %xmm2
- movapd %xmm1, %xmm0
- mulsd %xmm2, %xmm0
- ret
-
-Reassociate should be able to turn it into:
-
-double foo(double x, double y) {
- return ((x + 0.1234 * y) * (x - 0.1234 * y));
-}
-
-Which allows the multiply by constant to be CSE'd, producing:
-
-_foo:
- mulsd LCPI1_0(%rip), %xmm1
- movapd %xmm1, %xmm2
- addsd %xmm0, %xmm2
- subsd %xmm1, %xmm0
- mulsd %xmm2, %xmm0
- ret
-
-This doesn't need -ffast-math support at all. This is particularly bad because
-the llvm-gcc frontend is canonicalizing the later into the former, but clang
-doesn't have this problem.
-
-//===---------------------------------------------------------------------===//
-
These two functions should generate the same code on big-endian systems:
int g(int *j,int *l) { return memcmp(j,l,4); }
@@ -771,7 +733,7 @@ f (unsigned long a, unsigned long b, unsigned long c)
return ((a & (c - 1)) != 0) | ((b & (c - 1)) != 0);
}
Both should combine to ((a|b) & (c-1)) != 0. Currently not optimized with
-"clang -emit-llvm-bc | opt -std-compile-opts".
+"clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
@@ -784,7 +746,7 @@ void clear_pmd_range(unsigned long start, unsigned long end)
}
The expression should optimize to something like
"!((start|end)&~PMD_MASK). Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
@@ -803,7 +765,7 @@ int f(int x, int y)
return (abs(x)) >= 0;
}
This should optimize to x == INT_MIN. (With -fwrapv.) Currently not
-optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+optimized with "clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
@@ -841,117 +803,117 @@ rshift_gt (unsigned int a)
All should simplify to a single comparison. All of these are
currently not optimized with "clang -emit-llvm-bc | opt
--std-compile-opts".
+-O3".
//===---------------------------------------------------------------------===//
From GCC Bug 32605:
int c(int* x) {return (char*)x+2 == (char*)x;}
Should combine to 0. Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts" (although llc can optimize it).
+-emit-llvm-bc | opt -O3" (although llc can optimize it).
//===---------------------------------------------------------------------===//
int a(unsigned b) {return ((b << 31) | (b << 30)) >> 31;}
Should be combined to "((b >> 1) | b) & 1". Currently not optimized
-with "clang -emit-llvm-bc | opt -std-compile-opts".
+with "clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
unsigned a(unsigned x, unsigned y) { return x | (y & 1) | (y & 2);}
Should combine to "x | (y & 3)". Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int a(int a, int b, int c) {return (~a & c) | ((c|a) & b);}
Should fold to "(~a & c) | (a & b)". Currently not optimized with
-"clang -emit-llvm-bc | opt -std-compile-opts".
+"clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int a(int a,int b) {return (~(a|b))|a;}
Should fold to "a|~b". Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int a(int a, int b) {return (a&&b) || (a&&!b);}
Should fold to "a". Currently not optimized with "clang -emit-llvm-bc
-| opt -std-compile-opts".
+| opt -O3".
//===---------------------------------------------------------------------===//
int a(int a, int b, int c) {return (a&&b) || (!a&&c);}
Should fold to "a ? b : c", or at least something sane. Currently not
-optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+optimized with "clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int a(int a, int b, int c) {return (a&&b) || (a&&c) || (a&&b&&c);}
Should fold to a && (b || c). Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int a(int x) {return x | ((x & 8) ^ 8);}
Should combine to x | 8. Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int a(int x) {return x ^ ((x & 8) ^ 8);}
Should also combine to x | 8. Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int a(int x) {return ((x | -9) ^ 8) & x;}
Should combine to x & -9. Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
unsigned a(unsigned a) {return a * 0x11111111 >> 28 & 1;}
Should combine to "a * 0x88888888 >> 31". Currently not optimized
-with "clang -emit-llvm-bc | opt -std-compile-opts".
+with "clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
unsigned a(char* x) {if ((*x & 32) == 0) return b();}
There's an unnecessary zext in the generated code with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
unsigned a(unsigned long long x) {return 40 * (x >> 1);}
Should combine to "20 * (((unsigned)x) & -2)". Currently not
-optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+optimized with "clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int g(int x) { return (x - 10) < 0; }
Should combine to "x <= 9" (the sub has nsw). Currently not
-optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+optimized with "clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int g(int x) { return (x + 10) < 0; }
Should combine to "x < -10" (the add has nsw). Currently not
-optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+optimized with "clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int f(int i, int j) { return i < j + 1; }
int g(int i, int j) { return j > i - 1; }
Should combine to "i <= j" (the add/sub has nsw). Currently not
-optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+optimized with "clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
unsigned f(unsigned x) { return ((x & 7) + 1) & 15; }
The & 15 part should be optimized away, it doesn't change the result. Currently
-not optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+not optimized with "clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
@@ -1163,7 +1125,7 @@ There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the
GCC testsuite, ones we don't get yet are (checked through loadpre25):
[CRIT EDGE BREAKING]
-loadpre3.c predcom-4.c
+predcom-4.c
[PRE OF READONLY CALL]
loadpre5.c
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 9df005401897..551189c13c71 100644
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -48,7 +48,7 @@ class SparcAsmParser : public MCTargetAsmParser {
// public interface of the MCTargetAsmParser.
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
@@ -386,16 +386,13 @@ public:
bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
SmallVector<MCInst, 8> Instructions;
unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
MatchingInlineAsm);
switch (MatchResult) {
- default:
- break;
-
case Match_Success: {
Inst.setLoc(IDLoc);
Out.EmitInstruction(Inst, STI);
@@ -408,7 +405,7 @@ bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0U) {
+ if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
@@ -422,7 +419,7 @@ bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_MnemonicFail:
return Error(IDLoc, "invalid instruction mnemonic");
}
- return true;
+ llvm_unreachable("Implement any new match types added!");
}
bool SparcAsmParser::
@@ -444,7 +441,7 @@ ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc)
return Error(StartLoc, "invalid register name");
}
-static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features,
+static void applyMnemonicAliases(StringRef &Mnemonic, uint64_t Features,
unsigned VariantID);
bool SparcAsmParser::ParseInstruction(ParseInstructionInfo &Info,
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index cebda920e74c..c486411f9a1e 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -2,9 +2,8 @@ set(LLVM_TARGET_DEFINITIONS Sparc.td)
tablegen(LLVM SparcGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM SparcGenInstrInfo.inc -gen-instr-info)
-tablegen(LLVM SparcGenCodeEmitter.inc -gen-emitter)
tablegen(LLVM SparcGenDisassemblerTables.inc -gen-disassembler)
-tablegen(LLVM SparcGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM SparcGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM SparcGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM SparcGenAsmMatcher.inc -gen-asm-matcher)
tablegen(LLVM SparcGenDAGISel.inc -gen-dag-isel)
@@ -24,8 +23,6 @@ add_llvm_target(SparcCodeGen
SparcSubtarget.cpp
SparcTargetMachine.cpp
SparcSelectionDAGInfo.cpp
- SparcJITInfo.cpp
- SparcCodeEmitter.cpp
SparcMCInstLower.cpp
SparcTargetObjectFile.cpp
)
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index f3441ffcf6a6..28369fd5c342 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -110,7 +110,7 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
- const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo();
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
MachineBasicBlock::iterator MI = I;
@@ -187,7 +187,7 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
if (J->getOpcode() == SP::RESTORErr
|| J->getOpcode() == SP::RESTOREri) {
// change retl to ret.
- slot->setDesc(TM.getInstrInfo()->get(SP::RET));
+ slot->setDesc(TM.getSubtargetImpl()->getInstrInfo()->get(SP::RET));
return J;
}
}
@@ -329,7 +329,8 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg)
{
// Check Reg and all aliased Registers.
- for (MCRegAliasIterator AI(Reg, TM.getRegisterInfo(), true);
+ for (MCRegAliasIterator AI(Reg, TM.getSubtargetImpl()->getRegisterInfo(),
+ true);
AI.isValid(); ++AI)
if (RegSet.count(*AI))
return true;
@@ -482,7 +483,7 @@ bool Filler::tryCombineRestoreWithPrevInst(MachineBasicBlock &MBB,
if (PrevInst->isBundledWithSucc())
return false;
- const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo();
switch (PrevInst->getOpcode()) {
default: break;
diff --git a/lib/Target/Sparc/Disassembler/LLVMBuild.txt b/lib/Target/Sparc/Disassembler/LLVMBuild.txt
index c27398fc654e..bd5397dac314 100644
--- a/lib/Target/Sparc/Disassembler/LLVMBuild.txt
+++ b/lib/Target/Sparc/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = SparcDisassembler
parent = Sparc
-required_libraries = MC SparcInfo Support
+required_libraries = MCDisassembler SparcInfo Support
add_to_library_groups = Sparc
diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 4df09904c9f6..8bc4ca981614 100644
--- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -16,7 +16,6 @@
#include "SparcSubtarget.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
-#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -27,23 +26,17 @@ typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
-/// SparcDisassembler - a disassembler class for Sparc.
+/// A disassembler class for Sparc.
class SparcDisassembler : public MCDisassembler {
public:
- /// Constructor - Initializes the disassembler.
- ///
- SparcDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
- MCDisassembler(STI, Ctx)
- {}
+ SparcDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
+ : MCDisassembler(STI, Ctx) {}
virtual ~SparcDisassembler() {}
- /// getInstruction - See MCDisassembler.
- DecodeStatus getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const override;
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
};
}
@@ -213,47 +206,37 @@ static DecodeStatus DecodeSWAP(MCInst &Inst, unsigned insn, uint64_t Address,
#include "SparcGenDisassemblerTables.inc"
-/// readInstruction - read four bytes from the MemoryObject
-/// and return 32 bit word.
-static DecodeStatus readInstruction32(const MemoryObject &region,
- uint64_t address,
- uint64_t &size,
- uint32_t &insn) {
- uint8_t Bytes[4];
-
+/// Read four bytes from the ArrayRef and return 32 bit word.
+static DecodeStatus readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address,
+ uint64_t &Size, uint32_t &Insn) {
// We want to read exactly 4 Bytes of data.
- if (region.readBytes(address, 4, Bytes) == -1) {
- size = 0;
+ if (Bytes.size() < 4) {
+ Size = 0;
return MCDisassembler::Fail;
}
// Encoded as a big-endian 32-bit word in the stream.
- insn = (Bytes[3] << 0) |
- (Bytes[2] << 8) |
- (Bytes[1] << 16) |
- (Bytes[0] << 24);
+ Insn =
+ (Bytes[3] << 0) | (Bytes[2] << 8) | (Bytes[1] << 16) | (Bytes[0] << 24);
return MCDisassembler::Success;
}
-
-DecodeStatus
-SparcDisassembler::getInstruction(MCInst &instr,
- uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &vStream,
- raw_ostream &cStream) const {
+DecodeStatus SparcDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const {
uint32_t Insn;
- DecodeStatus Result = readInstruction32(Region, Address, Size, Insn);
+ DecodeStatus Result = readInstruction32(Bytes, Address, Size, Insn);
if (Result == MCDisassembler::Fail)
return MCDisassembler::Fail;
// Calling the auto-generated decoder function.
- Result = decodeInstruction(DecoderTableSparc32, instr, Insn, Address,
- this, STI);
+ Result =
+ decodeInstruction(DecoderTableSparc32, Instr, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
index 8fe4075d137f..c96d5ad154e3 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
+++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SparcINSTPRINTER_H
-#define SparcINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_SPARC_INSTPRINTER_SPARCINSTPRINTER_H
+#define LLVM_LIB_TARGET_SPARC_INSTPRINTER_SPARCINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCSubtargetInfo.h"
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h b/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
index d42bcee6b3c0..8d79396d936e 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SPARC_FIXUPKINDS_H
-#define LLVM_SPARC_FIXUPKINDS_H
+#ifndef LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCFIXUPKINDS_H
+#define LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCFIXUPKINDS_H
#include "llvm/MC/MCFixup.h"
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index df66ca9006b8..6767e4b224f8 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -35,7 +35,6 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) {
Data64bitsDirective = (isV9) ? "\t.xword\t" : nullptr;
ZeroDirective = "\t.skip\t";
CommentString = "!";
- HasLEB128 = true;
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
@@ -43,9 +42,7 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) {
SunStyleELFSectionSwitchSyntax = true;
UsesELFSectionDirectiveForBSS = true;
- if (TheTriple.getOS() == llvm::Triple::Solaris ||
- TheTriple.getOS() == llvm::Triple::OpenBSD)
- UseIntegratedAssembler = true;
+ UseIntegratedAssembler = true;
}
const MCExpr*
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
index e126b687afdb..84de55145b65 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARCTARGETASMINFO_H
-#define SPARCTARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCMCASMINFO_H
+#define LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCMCASMINFO_H
#include "llvm/MC/MCAsmInfoELF.h"
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
index 7f01ab06879f..d97e3a25c5a7 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
@@ -161,8 +161,9 @@ Sparc::Fixups SparcMCExpr::getFixupKind(SparcMCExpr::VariantKind Kind) {
bool
SparcMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const {
- return getSubExpr()->EvaluateAsRelocatable(Res, Layout);
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const {
+ return getSubExpr()->EvaluateAsRelocatable(Res, Layout, Fixup);
}
static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
index f0d0ef363ad8..f72c6c454963 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SPARCMCEXPR_H
-#define LLVM_SPARCMCEXPR_H
+#ifndef LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCMCEXPR_H
+#define LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCMCEXPR_H
#include "SparcFixupKinds.h"
#include "llvm/MC/MCExpr.h"
@@ -87,7 +87,8 @@ public:
/// @}
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const override;
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 571017dbf679..3cc43142da07 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -125,10 +125,8 @@ static MCCodeGenInfo *createSparcV9MCCodeGenInfo(StringRef TT, Reloc::Model RM,
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Context, MCAsmBackend &MAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll,
- bool NoExecStack) {
- MCStreamer *S =
- createELFStreamer(Context, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ const MCSubtargetInfo &STI, bool RelaxAll) {
+ MCStreamer *S = createELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
new SparcTargetELFStreamer(*S);
return S;
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index c8029a8c594c..c31943d7720e 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARCMCTARGETDESC_H
-#define SPARCMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCMCTARGETDESC_H
+#define LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCMCTARGETDESC_H
#include "llvm/Support/DataTypes.h"
diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile
index bcc02918cdb8..c2a95b47151a 100644
--- a/lib/Target/Sparc/Makefile
+++ b/lib/Target/Sparc/Makefile
@@ -16,7 +16,7 @@ BUILT_SOURCES = SparcGenRegisterInfo.inc SparcGenInstrInfo.inc \
SparcGenAsmWriter.inc SparcGenAsmMatcher.inc \
SparcGenDAGISel.inc SparcGenDisassemblerTables.inc \
SparcGenSubtargetInfo.inc SparcGenCallingConv.inc \
- SparcGenCodeEmitter.inc SparcGenMCCodeEmitter.inc
+ SparcGenMCCodeEmitter.inc
DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
diff --git a/lib/Target/Sparc/README.txt b/lib/Target/Sparc/README.txt
index 34e68cfa78f7..647c2763e521 100644
--- a/lib/Target/Sparc/README.txt
+++ b/lib/Target/Sparc/README.txt
@@ -56,6 +56,4 @@ int %t1(int %a, int %b) {
leaf fns.
* Fill delay slots
-* Implement JIT support
-
* Use %g0 directly to materialize 0. No instruction is required.
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index de20aaa5db5d..96378d522dc0 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef TARGET_SPARC_H
-#define TARGET_SPARC_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARC_H
+#define LLVM_LIB_TARGET_SPARC_SPARC_H
#include "MCTargetDesc/SparcMCTargetDesc.h"
#include "llvm/Support/ErrorHandling.h"
@@ -29,8 +29,6 @@ namespace llvm {
FunctionPass *createSparcISelDag(SparcTargetMachine &TM);
FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM);
- FunctionPass *createSparcJITCodeEmitterPass(SparcTargetMachine &TM,
- JITCodeEmitter &JCE);
void LowerSparcMachineInstrToMCInst(const MachineInstr *MI,
MCInst &OutMI,
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index 1b7330e8c5b4..6432003db014 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -296,7 +296,7 @@ void SparcAsmPrinter::EmitFunctionBodyStart() {
void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
raw_ostream &O) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
const MachineOperand &MO = MI->getOperand (opNum);
SparcMCExpr::VariantKind TF = (SparcMCExpr::VariantKind) MO.getTargetFlags();
@@ -450,7 +450,8 @@ void SparcAsmPrinter::EmitEndOfAsmFile(Module &M) {
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
if (!Stubs.empty()) {
OutStreamer.SwitchSection(TLOFELF.getDataSection());
- unsigned PtrSize = TM.getDataLayout()->getPointerSize(0);
+ unsigned PtrSize =
+ TM.getSubtargetImpl()->getDataLayout()->getPointerSize(0);
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
OutStreamer.EmitLabel(Stubs[i].first);
OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), PtrSize);
diff --git a/lib/Target/Sparc/SparcCodeEmitter.cpp b/lib/Target/Sparc/SparcCodeEmitter.cpp
deleted file mode 100644
index 247da2a95797..000000000000
--- a/lib/Target/Sparc/SparcCodeEmitter.cpp
+++ /dev/null
@@ -1,280 +0,0 @@
-//===-- Sparc/SparcCodeEmitter.cpp - Convert Sparc Code to Machine Code ---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===---------------------------------------------------------------------===//
-//
-// This file contains the pass that transforms the Sparc machine instructions
-// into relocatable machine code.
-//
-//===---------------------------------------------------------------------===//
-
-#include "Sparc.h"
-#include "MCTargetDesc/SparcMCExpr.h"
-#include "SparcRelocations.h"
-#include "SparcTargetMachine.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Support/Debug.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-STATISTIC(NumEmitted, "Number of machine instructions emitted");
-
-namespace {
-
-class SparcCodeEmitter : public MachineFunctionPass {
- SparcJITInfo *JTI;
- const SparcInstrInfo *II;
- const DataLayout *TD;
- const SparcSubtarget *Subtarget;
- TargetMachine &TM;
- JITCodeEmitter &MCE;
- const std::vector<MachineConstantPoolEntry> *MCPEs;
- bool IsPIC;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineModuleInfo> ();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- static char ID;
-
-public:
- SparcCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(ID), JTI(nullptr), II(nullptr), TD(nullptr),
- TM(tm), MCE(mce), MCPEs(nullptr),
- IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "Sparc Machine Code Emitter";
- }
-
- /// getBinaryCodeForInstr - This function, generated by the
- /// CodeEmitterGenerator using TableGen, produces the binary encoding for
- /// machine instructions.
- uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
-
- void emitInstruction(MachineBasicBlock::instr_iterator MI,
- MachineBasicBlock &MBB);
-
-private:
- /// getMachineOpValue - Return binary encoding of operand. If the machine
- /// operand requires relocation, record the relocation and return zero.
- unsigned getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const;
-
- unsigned getCallTargetOpValue(const MachineInstr &MI,
- unsigned) const;
- unsigned getBranchTargetOpValue(const MachineInstr &MI,
- unsigned) const;
- unsigned getBranchPredTargetOpValue(const MachineInstr &MI,
- unsigned) const;
- unsigned getBranchOnRegTargetOpValue(const MachineInstr &MI,
- unsigned) const;
-
- void emitWord(unsigned Word);
-
- unsigned getRelocation(const MachineInstr &MI,
- const MachineOperand &MO) const;
-
- void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc) const;
- void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
- void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
- void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const;
-};
-} // end anonymous namespace.
-
-char SparcCodeEmitter::ID = 0;
-
-bool SparcCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
- SparcTargetMachine &Target = static_cast<SparcTargetMachine &>(
- const_cast<TargetMachine &>(MF.getTarget()));
-
- JTI = Target.getJITInfo();
- II = Target.getInstrInfo();
- TD = Target.getDataLayout();
- Subtarget = &TM.getSubtarget<SparcSubtarget> ();
- MCPEs = &MF.getConstantPool()->getConstants();
- JTI->Initialize(MF, IsPIC);
- MCE.setModuleInfo(&getAnalysis<MachineModuleInfo> ());
-
- do {
- DEBUG(errs() << "JITTing function '"
- << MF.getName() << "'\n");
- MCE.startFunction(MF);
-
- for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
- MBB != E; ++MBB){
- MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(),
- E = MBB->instr_end(); I != E;)
- emitInstruction(*I++, *MBB);
- }
- } while (MCE.finishFunction(MF));
-
- return false;
-}
-
-void SparcCodeEmitter::emitInstruction(MachineBasicBlock::instr_iterator MI,
- MachineBasicBlock &MBB) {
- DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << *MI);
-
- MCE.processDebugLoc(MI->getDebugLoc(), true);
-
- ++NumEmitted;
-
- switch (MI->getOpcode()) {
- default: {
- emitWord(getBinaryCodeForInstr(*MI));
- break;
- }
- case TargetOpcode::INLINEASM: {
- // We allow inline assembler nodes with empty bodies - they can
- // implicitly define registers, which is ok for JIT.
- if (MI->getOperand(0).getSymbolName()[0]) {
- report_fatal_error("JIT does not support inline asm!");
- }
- break;
- }
- case TargetOpcode::CFI_INSTRUCTION:
- break;
- case TargetOpcode::EH_LABEL: {
- MCE.emitLabel(MI->getOperand(0).getMCSymbol());
- break;
- }
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL: {
- // Do nothing.
- break;
- }
- case SP::GETPCX: {
- report_fatal_error("JIT does not support pseudo instruction GETPCX yet!");
- break;
- }
- }
-
- MCE.processDebugLoc(MI->getDebugLoc(), false);
-}
-
-void SparcCodeEmitter::emitWord(unsigned Word) {
- DEBUG(errs() << " 0x";
- errs().write_hex(Word) << "\n");
- MCE.emitWordBE(Word);
-}
-
-/// getMachineOpValue - Return binary encoding of operand. If the machine
-/// operand requires relocation, record the relocation and return zero.
-unsigned SparcCodeEmitter::getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const {
- if (MO.isReg())
- return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
- else if (MO.isImm())
- return static_cast<unsigned>(MO.getImm());
- else if (MO.isGlobal())
- emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO));
- else if (MO.isSymbol())
- emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO));
- else if (MO.isCPI())
- emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO));
- else if (MO.isMBB())
- emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
- else
- llvm_unreachable("Unable to encode MachineOperand!");
- return 0;
-}
-unsigned SparcCodeEmitter::getCallTargetOpValue(const MachineInstr &MI,
- unsigned opIdx) const {
- const MachineOperand MO = MI.getOperand(opIdx);
- return getMachineOpValue(MI, MO);
-}
-
-unsigned SparcCodeEmitter::getBranchTargetOpValue(const MachineInstr &MI,
- unsigned opIdx) const {
- const MachineOperand MO = MI.getOperand(opIdx);
- return getMachineOpValue(MI, MO);
-}
-
-unsigned SparcCodeEmitter::getBranchPredTargetOpValue(const MachineInstr &MI,
- unsigned opIdx) const {
- const MachineOperand MO = MI.getOperand(opIdx);
- return getMachineOpValue(MI, MO);
-}
-
-unsigned SparcCodeEmitter::getBranchOnRegTargetOpValue(const MachineInstr &MI,
- unsigned opIdx) const {
- const MachineOperand MO = MI.getOperand(opIdx);
- return getMachineOpValue(MI, MO);
-}
-
-unsigned SparcCodeEmitter::getRelocation(const MachineInstr &MI,
- const MachineOperand &MO) const {
-
- unsigned TF = MO.getTargetFlags();
- switch (TF) {
- default:
- case SparcMCExpr::VK_Sparc_None: break;
- case SparcMCExpr::VK_Sparc_LO: return SP::reloc_sparc_lo;
- case SparcMCExpr::VK_Sparc_HI: return SP::reloc_sparc_hi;
- case SparcMCExpr::VK_Sparc_H44: return SP::reloc_sparc_h44;
- case SparcMCExpr::VK_Sparc_M44: return SP::reloc_sparc_m44;
- case SparcMCExpr::VK_Sparc_L44: return SP::reloc_sparc_l44;
- case SparcMCExpr::VK_Sparc_HH: return SP::reloc_sparc_hh;
- case SparcMCExpr::VK_Sparc_HM: return SP::reloc_sparc_hm;
- }
-
- unsigned Opc = MI.getOpcode();
- switch (Opc) {
- default: break;
- case SP::CALL: return SP::reloc_sparc_pc30;
- case SP::BA:
- case SP::BCOND:
- case SP::FBCOND: return SP::reloc_sparc_pc22;
- case SP::BPXCC: return SP::reloc_sparc_pc19;
- }
- llvm_unreachable("unknown reloc!");
-}
-
-void SparcCodeEmitter::emitGlobalAddress(const GlobalValue *GV,
- unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
- const_cast<GlobalValue *>(GV), 0,
- true));
-}
-
-void SparcCodeEmitter::
-emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
- Reloc, ES, 0, 0));
-}
-
-void SparcCodeEmitter::
-emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
- Reloc, CPI, 0, false));
-}
-
-void SparcCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
- unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
- Reloc, BB));
-}
-
-
-/// createSparcJITCodeEmitterPass - Return a pass that emits the collected Sparc
-/// code to the specified MCE object.
-FunctionPass *llvm::createSparcJITCodeEmitterPass(SparcTargetMachine &TM,
- JITCodeEmitter &JCE) {
- return new SparcCodeEmitter(TM, JCE);
-}
-
-#include "SparcGenCodeEmitter.inc"
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index 3cdfda3e059a..1b67b4b34036 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -46,7 +46,7 @@ void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF,
DebugLoc dl = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc();
const SparcInstrInfo &TII =
- *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const SparcInstrInfo *>(MF.getSubtarget().getInstrInfo());
if (NumBytes >= -4096 && NumBytes < 4096) {
BuildMI(MBB, MBBI, dl, TII.get(ADDri), SP::O6)
@@ -88,7 +88,7 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
const SparcInstrInfo &TII =
- *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const SparcInstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineBasicBlock::iterator MBBI = MBB.begin();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -153,7 +153,7 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
const SparcInstrInfo &TII =
- *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const SparcInstrInfo *>(MF.getSubtarget().getInstrInfo());
DebugLoc dl = MBBI->getDebugLoc();
assert(MBBI->getOpcode() == SP::RETL &&
"Can only put epilog before 'retl' instruction!");
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index a7d1b8902dcd..9e53994e3c35 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARC_FRAMEINFO_H
-#define SPARC_FRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCFRAMELOWERING_H
+#define LLVM_LIB_TARGET_SPARC_SPARCFRAMELOWERING_H
#include "Sparc.h"
#include "llvm/Target/TargetFrameLowering.h"
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 2fade27f2d5d..b3b029e3632d 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -66,16 +66,15 @@ private:
} // end anonymous namespace
SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
- unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF);
- return CurDAG->getRegister(GlobalBaseReg,
- getTargetLowering()->getPointerTy()).getNode();
+ unsigned GlobalBaseReg =
+ TM.getSubtargetImpl()->getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode();
}
bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
SDValue &Base, SDValue &Offset) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TLI->getPointerTy());
Offset = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
@@ -90,8 +89,8 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
if (FrameIndexSDNode *FIN =
dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
// Constant offset from frame ref.
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
- getTargetLowering()->getPointerTy());
+ Base =
+ CurDAG->getTargetFrameIndex(FIN->getIndex(), TLI->getPointerTy());
} else {
Base = Addr.getOperand(0);
}
@@ -135,7 +134,7 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
}
R1 = Addr;
- R2 = CurDAG->getRegister(SP::G0, getTargetLowering()->getPointerTy());
+ R2 = CurDAG->getRegister(SP::G0, TLI->getPointerTy());
return true;
}
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 990f52a97275..0a3607e02a0a 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -190,8 +190,8 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain,
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
// Analyze return values.
CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32);
@@ -250,8 +250,8 @@ SparcTargetLowering::LowerReturn_64(SDValue Chain,
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
// Analyze return values.
CCInfo.AnalyzeReturn(Outs, RetCC_Sparc64);
@@ -349,8 +349,8 @@ LowerFormalArguments_32(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32);
const unsigned StackOffset = 92;
@@ -474,7 +474,7 @@ LowerFormalArguments_32(SDValue Chain,
DAG.getConstant(Offset, MVT::i32));
Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
MachinePointerInfo(),
- VA.getValVT(), false, false,0);
+ VA.getValVT(), false, false, false,0);
Load = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Load);
}
InVals.push_back(Load);
@@ -549,8 +549,8 @@ LowerFormalArguments_64(SDValue Chain,
// Analyze arguments according to CC_Sparc64.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc64);
// The argument array begins at %fp+BIAS+128, after the register save area.
@@ -698,8 +698,8 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32);
// Get the size of the outgoing arguments stack space requirement.
@@ -915,7 +915,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
// Add a register mask operand representing the call-preserved registers.
const SparcRegisterInfo *TRI =
- ((const SparcTargetMachine&)getTargetMachine()).getRegisterInfo();
+ getTargetMachine().getSubtarget<SparcSubtarget>().getRegisterInfo();
const uint32_t *Mask = ((hasReturnsTwice)
? TRI->getRTCallPreservedMask(CallConv)
: TRI->getCallPreservedMask(CallConv));
@@ -934,8 +934,8 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs, *DAG.getContext());
+ CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
RVInfo.AnalyzeCallResult(Ins, RetCC_Sparc32);
@@ -1061,8 +1061,8 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallOperands(CLI.Outs, CC_Sparc64);
// Get the size of the outgoing arguments stack space requirement.
@@ -1228,10 +1228,10 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
// Add a register mask operand representing the call-preserved registers.
const SparcRegisterInfo *TRI =
- ((const SparcTargetMachine&)getTargetMachine()).getRegisterInfo();
- const uint32_t *Mask = ((hasReturnsTwice)
- ? TRI->getRTCallPreservedMask(CLI.CallConv)
- : TRI->getCallPreservedMask(CLI.CallConv));
+ getTargetMachine().getSubtarget<SparcSubtarget>().getRegisterInfo();
+ const uint32_t *Mask =
+ ((hasReturnsTwice) ? TRI->getRTCallPreservedMask(CLI.CallConv)
+ : TRI->getCallPreservedMask(CLI.CallConv));
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -1255,8 +1255,8 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs, *DAG.getContext());
+ CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
// Set inreg flag manually for codegen generated library calls that
// return float.
@@ -1366,7 +1366,7 @@ static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
}
SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
- : TargetLowering(TM, new SparcELFTargetObjectFile()) {
+ : TargetLowering(TM) {
Subtarget = &TM.getSubtarget<SparcSubtarget>();
// Set up the register classes.
@@ -1378,11 +1378,14 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
addRegisterClass(MVT::i64, &SP::I64RegsRegClass);
// Turn FP extload into load/fextend
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
+ for (MVT VT : MVT::fp_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
+ }
// Sparc doesn't have i1 sign extending load
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ for (MVT VT : MVT::integer_valuetypes())
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
// Turn FP truncstore into trunc + store.
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -1905,7 +1908,9 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
Ops.push_back(Symbol);
Ops.push_back(DAG.getRegister(SP::O0, PtrVT));
const uint32_t *Mask = getTargetMachine()
- .getRegisterInfo()->getCallPreservedMask(CallingConv::C);
+ .getSubtargetImpl()
+ ->getRegisterInfo()
+ ->getCallPreservedMask(CallingConv::C);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
Ops.push_back(InFlag);
@@ -2754,9 +2759,10 @@ static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG,
ISD::SETNE);
}
// MulResult is a node with an illegal type. Because such things are not
- // generally permitted during this phase of legalization, delete the
- // node. The above EXTRACT_ELEMENT nodes should have been folded.
- DAG.DeleteNode(MulResult.getNode());
+ // generally permitted during this phase of legalization, ensure that
+ // nothing is left using the node. The above EXTRACT_ELEMENT nodes should have
+ // been folded.
+ assert(MulResult->use_empty() && "Illegally typed node still in use!");
SDValue Ops[2] = { BottomHalf, TopHalf } ;
return DAG.getMergeValues(Ops, dl);
@@ -2900,7 +2906,8 @@ MachineBasicBlock*
SparcTargetLowering::expandSelectCC(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned BROpcode) const {
- const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ const TargetInstrInfo &TII =
+ *getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
unsigned CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
@@ -2961,7 +2968,8 @@ SparcTargetLowering::expandAtomicRMW(MachineInstr *MI,
MachineBasicBlock *MBB,
unsigned Opcode,
unsigned CondCode) const {
- const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ const TargetInstrInfo &TII =
+ *getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
DebugLoc DL = MI->getDebugLoc();
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index a24cc82eecb4..a62d569164ce 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARC_ISELLOWERING_H
-#define SPARC_ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCISELLOWERING_H
+#define LLVM_LIB_TARGET_SPARC_SPARCISELLOWERING_H
#include "Sparc.h"
#include "llvm/Target/TargetLowering.h"
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index 3a1472ee9b15..fe93ed7b57c7 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARCINSTRUCTIONINFO_H
-#define SPARCINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCINSTRINFO_H
+#define LLVM_LIB_TARGET_SPARC_SPARCINSTRINFO_H
#include "SparcRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index 960261ce9835..c32023901b2d 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -331,7 +331,7 @@ let hasSideEffects = 1, mayStore = 1 in {
[(flushw)]>;
}
-let isBarrier = 1, isTerminator = 1, rd = 0b1000, rs1 = 0, simm13 = 5 in
+let isBarrier = 1, isTerminator = 1, rd = 0b01000, rs1 = 0, simm13 = 5 in
def TA5 : F3_2<0b10, 0b111010, (outs), (ins), "ta 5", [(trap)]>;
let rd = 0 in
diff --git a/lib/Target/Sparc/SparcInstrVIS.td b/lib/Target/Sparc/SparcInstrVIS.td
index 3e2b49d05c28..d9adf3e8b0f5 100644
--- a/lib/Target/Sparc/SparcInstrVIS.td
+++ b/lib/Target/Sparc/SparcInstrVIS.td
@@ -71,13 +71,13 @@ def FPACKFIX : VISInst2<0b000111101, "fpackfix">;
def FEXPAND : VISInst2<0b001001101, "fexpand">;
def FPMERGE : VISInst <0b001001011, "fpmerge">;
-def FMUL8X16 : VISInst<0b00110001, "fmul8x16">;
-def FMUL8X16AU : VISInst<0b00110011, "fmul8x16au">;
-def FMUL8X16AL : VISInst<0b00110101, "fmul8x16al">;
-def FMUL8SUX16 : VISInst<0b00110110, "fmul8sux16">;
-def FMUL8ULX16 : VISInst<0b00110111, "fmul8ulx16">;
-def FMULD8SUX16 : VISInst<0b00111000, "fmuld8sux16">;
-def FMULD8ULX16 : VISInst<0b00111001, "fmuld8ulx16">;
+def FMUL8X16 : VISInst<0b000110001, "fmul8x16">;
+def FMUL8X16AU : VISInst<0b000110011, "fmul8x16au">;
+def FMUL8X16AL : VISInst<0b000110101, "fmul8x16al">;
+def FMUL8SUX16 : VISInst<0b000110110, "fmul8sux16">;
+def FMUL8ULX16 : VISInst<0b000110111, "fmul8ulx16">;
+def FMULD8SUX16 : VISInst<0b000111000, "fmuld8sux16">;
+def FMULD8ULX16 : VISInst<0b000111001, "fmuld8ulx16">;
def ALIGNADDR : VISInst<0b000011000, "alignaddr", I64Regs>;
def ALIGNADDRL : VISInst<0b000011010, "alignaddrl", I64Regs>;
@@ -134,7 +134,7 @@ def EDGE16L : VISInst<0b000000110, "edge16l", I64Regs>;
def EDGE32 : VISInst<0b000001000, "edge32", I64Regs>;
def EDGE32L : VISInst<0b000001010, "edge32l", I64Regs>;
-def PDIST : VISInst<0b00111110, "pdist">;
+def PDIST : VISInst<0b000111110, "pdist">;
def ARRAY8 : VISInst<0b000010000, "array8", I64Regs>;
def ARRAY16 : VISInst<0b000010010, "array16", I64Regs>;
@@ -181,7 +181,7 @@ def CMASK32 : VISInstFormat<0b000011111, (outs), (ins I64Regs:$rs2),
}
-def FCHKSM16 : VISInst<0b01000100, "fchksm16">;
+def FCHKSM16 : VISInst<0b001000100, "fchksm16">;
def FHADDS : F3_3<0b10, 0b110100, 0b001100001,
(outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
@@ -229,14 +229,14 @@ def FNSMULD : F3_3<0b10, 0b110100, 0b001111001,
def FPADD64 : VISInst<0b001000010, "fpadd64">;
-def FSLL16 : VISInst<0b00100001, "fsll16">;
-def FSRL16 : VISInst<0b00100011, "fsrl16">;
-def FSLL32 : VISInst<0b00100101, "fsll32">;
-def FSRL32 : VISInst<0b00100111, "fsrl32">;
-def FSLAS16 : VISInst<0b00101001, "fslas16">;
-def FSRA16 : VISInst<0b00101011, "fsra16">;
-def FSLAS32 : VISInst<0b00101101, "fslas32">;
-def FSRA32 : VISInst<0b00101111, "fsra32">;
+def FSLL16 : VISInst<0b000100001, "fsll16">;
+def FSRL16 : VISInst<0b000100011, "fsrl16">;
+def FSLL32 : VISInst<0b000100101, "fsll32">;
+def FSRL32 : VISInst<0b000100111, "fsrl32">;
+def FSLAS16 : VISInst<0b000101001, "fslas16">;
+def FSRA16 : VISInst<0b000101011, "fsra16">;
+def FSLAS32 : VISInst<0b000101101, "fslas32">;
+def FSRA32 : VISInst<0b000101111, "fsra32">;
let rs1 = 0 in
def LZCNT : VISInstFormat<0b000010111, (outs I64Regs:$rd),
diff --git a/lib/Target/Sparc/SparcJITInfo.cpp b/lib/Target/Sparc/SparcJITInfo.cpp
deleted file mode 100644
index d0eec98b5e91..000000000000
--- a/lib/Target/Sparc/SparcJITInfo.cpp
+++ /dev/null
@@ -1,326 +0,0 @@
-//===-- SparcJITInfo.cpp - Implement the Sparc JIT Interface --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the JIT interfaces for the Sparc target.
-//
-//===----------------------------------------------------------------------===//
-#include "SparcJITInfo.h"
-#include "Sparc.h"
-#include "SparcRelocations.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/Support/Memory.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-/// JITCompilerFunction - This contains the address of the JIT function used to
-/// compile a function lazily.
-static TargetJITInfo::JITCompilerFn JITCompilerFunction;
-
-extern "C" void SparcCompilationCallback();
-
-extern "C" {
-#if defined (__sparc__)
-
-#if defined(__arch64__)
-#define FRAME_PTR(X) #X "+2047"
-#else
-#define FRAME_PTR(X) #X
-#endif
-
- asm(
- ".text\n"
- "\t.align 4\n"
- "\t.global SparcCompilationCallback\n"
- "\t.type SparcCompilationCallback, #function\n"
- "SparcCompilationCallback:\n"
- // Save current register window and create stack.
- // 128 (save area) + 6*8 (for arguments) + 16*8 (for float regfile) = 304
- "\tsave %sp, -304, %sp\n"
- // save float regfile to the stack.
- "\tstd %f0, [" FRAME_PTR(%fp) "-0]\n"
- "\tstd %f2, [" FRAME_PTR(%fp) "-8]\n"
- "\tstd %f4, [" FRAME_PTR(%fp) "-16]\n"
- "\tstd %f6, [" FRAME_PTR(%fp) "-24]\n"
- "\tstd %f8, [" FRAME_PTR(%fp) "-32]\n"
- "\tstd %f10, [" FRAME_PTR(%fp) "-40]\n"
- "\tstd %f12, [" FRAME_PTR(%fp) "-48]\n"
- "\tstd %f14, [" FRAME_PTR(%fp) "-56]\n"
- "\tstd %f16, [" FRAME_PTR(%fp) "-64]\n"
- "\tstd %f18, [" FRAME_PTR(%fp) "-72]\n"
- "\tstd %f20, [" FRAME_PTR(%fp) "-80]\n"
- "\tstd %f22, [" FRAME_PTR(%fp) "-88]\n"
- "\tstd %f24, [" FRAME_PTR(%fp) "-96]\n"
- "\tstd %f26, [" FRAME_PTR(%fp) "-104]\n"
- "\tstd %f28, [" FRAME_PTR(%fp) "-112]\n"
- "\tstd %f30, [" FRAME_PTR(%fp) "-120]\n"
- // stubaddr is in %g1.
- "\tcall SparcCompilationCallbackC\n"
- "\t mov %g1, %o0\n"
- // restore float regfile from the stack.
- "\tldd [" FRAME_PTR(%fp) "-0], %f0\n"
- "\tldd [" FRAME_PTR(%fp) "-8], %f2\n"
- "\tldd [" FRAME_PTR(%fp) "-16], %f4\n"
- "\tldd [" FRAME_PTR(%fp) "-24], %f6\n"
- "\tldd [" FRAME_PTR(%fp) "-32], %f8\n"
- "\tldd [" FRAME_PTR(%fp) "-40], %f10\n"
- "\tldd [" FRAME_PTR(%fp) "-48], %f12\n"
- "\tldd [" FRAME_PTR(%fp) "-56], %f14\n"
- "\tldd [" FRAME_PTR(%fp) "-64], %f16\n"
- "\tldd [" FRAME_PTR(%fp) "-72], %f18\n"
- "\tldd [" FRAME_PTR(%fp) "-80], %f20\n"
- "\tldd [" FRAME_PTR(%fp) "-88], %f22\n"
- "\tldd [" FRAME_PTR(%fp) "-96], %f24\n"
- "\tldd [" FRAME_PTR(%fp) "-104], %f26\n"
- "\tldd [" FRAME_PTR(%fp) "-112], %f28\n"
- "\tldd [" FRAME_PTR(%fp) "-120], %f30\n"
- // restore original register window and
- // copy %o0 to %g1
- "\trestore %o0, 0, %g1\n"
- // call the new stub
- "\tjmp %g1\n"
- "\t nop\n"
- "\t.size SparcCompilationCallback, .-SparcCompilationCallback"
- );
-#else
- void SparcCompilationCallback() {
- llvm_unreachable(
- "Cannot call SparcCompilationCallback() on a non-sparc arch!");
- }
-#endif
-}
-
-
-#define SETHI_INST(imm, rd) (0x01000000 | ((rd) << 25) | ((imm) & 0x3FFFFF))
-#define JMP_INST(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x38 << 19) \
- | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF))
-#define NOP_INST SETHI_INST(0, 0)
-#define OR_INST_I(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x02 << 19) \
- | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF))
-#define OR_INST_R(rs1, rs2, rd) (0x80000000 | ((rd) << 25) | (0x02 << 19) \
- | ((rs1) << 14) | (0 << 13) | ((rs2) & 0x1F))
-#define RDPC_INST(rd) (0x80000000 | ((rd) << 25) | (0x28 << 19) \
- | (5 << 14))
-#define LDX_INST(rs1, imm, rd) (0xC0000000 | ((rd) << 25) | (0x0B << 19) \
- | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF))
-#define SLLX_INST(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x25 << 19) \
- | ((rs1) << 14) | (3 << 12) | ((imm) & 0x3F))
-#define SUB_INST(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x04 << 19) \
- | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF))
-#define XOR_INST(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x03 << 19) \
- | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF))
-#define BA_INST(tgt) (0x10800000 | ((tgt) & 0x3FFFFF))
-
-// Emit instructions to jump to Addr and store the starting address of
-// the instructions emitted in the scratch register.
-static void emitInstrForIndirectJump(intptr_t Addr,
- unsigned scratch,
- SmallVectorImpl<uint32_t> &Insts) {
-
- if (isInt<13>(Addr)) {
- // Emit: jmpl %g0+Addr, <scratch>
- // nop
- Insts.push_back(JMP_INST(0, LO10(Addr), scratch));
- Insts.push_back(NOP_INST);
- return;
- }
-
- if (isUInt<32>(Addr)) {
- // Emit: sethi %hi(Addr), scratch
- // jmpl scratch+%lo(Addr), scratch
- // sub scratch, 4, scratch
- Insts.push_back(SETHI_INST(HI22(Addr), scratch));
- Insts.push_back(JMP_INST(scratch, LO10(Addr), scratch));
- Insts.push_back(SUB_INST(scratch, 4, scratch));
- return;
- }
-
- if (Addr < 0 && isInt<33>(Addr)) {
- // Emit: sethi %hix(Addr), scratch)
- // xor scratch, %lox(Addr), scratch
- // jmpl scratch+0, scratch
- // sub scratch, 8, scratch
- Insts.push_back(SETHI_INST(HIX22(Addr), scratch));
- Insts.push_back(XOR_INST(scratch, LOX10(Addr), scratch));
- Insts.push_back(JMP_INST(scratch, 0, scratch));
- Insts.push_back(SUB_INST(scratch, 8, scratch));
- return;
- }
-
- // Emit: rd %pc, scratch
- // ldx [scratch+16], scratch
- // jmpl scratch+0, scratch
- // sub scratch, 8, scratch
- // <Addr: 8 byte>
- Insts.push_back(RDPC_INST(scratch));
- Insts.push_back(LDX_INST(scratch, 16, scratch));
- Insts.push_back(JMP_INST(scratch, 0, scratch));
- Insts.push_back(SUB_INST(scratch, 8, scratch));
- Insts.push_back((uint32_t)(((int64_t)Addr) >> 32) & 0xffffffff);
- Insts.push_back((uint32_t)(Addr & 0xffffffff));
-
- // Instruction sequence without rdpc instruction
- // 7 instruction and 2 scratch register
- // Emit: sethi %hh(Addr), scratch
- // or scratch, %hm(Addr), scratch
- // sllx scratch, 32, scratch
- // sethi %hi(Addr), scratch2
- // or scratch, scratch2, scratch
- // jmpl scratch+%lo(Addr), scratch
- // sub scratch, 20, scratch
- // Insts.push_back(SETHI_INST(HH22(Addr), scratch));
- // Insts.push_back(OR_INST_I(scratch, HM10(Addr), scratch));
- // Insts.push_back(SLLX_INST(scratch, 32, scratch));
- // Insts.push_back(SETHI_INST(HI22(Addr), scratch2));
- // Insts.push_back(OR_INST_R(scratch, scratch2, scratch));
- // Insts.push_back(JMP_INST(scratch, LO10(Addr), scratch));
- // Insts.push_back(SUB_INST(scratch, 20, scratch));
-}
-
-extern "C" void *SparcCompilationCallbackC(intptr_t StubAddr) {
- // Get the address of the compiled code for this function.
- intptr_t NewVal = (intptr_t) JITCompilerFunction((void*) StubAddr);
-
- // Rewrite the function stub so that we don't end up here every time we
- // execute the call. We're replacing the stub instructions with code
- // that jumps to the compiled function:
-
- SmallVector<uint32_t, 8> Insts;
- intptr_t diff = (NewVal - StubAddr) >> 2;
- if (isInt<22>(diff)) {
- // Use branch instruction to jump
- Insts.push_back(BA_INST(diff));
- Insts.push_back(NOP_INST);
- } else {
- // Otherwise, use indirect jump to the compiled function
- emitInstrForIndirectJump(NewVal, 1, Insts);
- }
-
- for (unsigned i = 0, e = Insts.size(); i != e; ++i)
- *(uint32_t *)(StubAddr + i*4) = Insts[i];
-
- sys::Memory::InvalidateInstructionCache((void*) StubAddr, Insts.size() * 4);
- return (void*)StubAddr;
-}
-
-
-void SparcJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
- llvm_unreachable("FIXME: Implement SparcJITInfo::"
- "replaceMachineCodeForFunction");
-}
-
-
-TargetJITInfo::StubLayout SparcJITInfo::getStubLayout() {
- // The stub contains maximum of 4 4-byte instructions and 8 bytes for address,
- // aligned at 32 bytes.
- // See emitFunctionStub and emitInstrForIndirectJump for details.
- StubLayout Result = { 4*4 + 8, 32 };
- return Result;
-}
-
-void *SparcJITInfo::emitFunctionStub(const Function *F, void *Fn,
- JITCodeEmitter &JCE)
-{
- JCE.emitAlignment(32);
- void *Addr = (void*) (JCE.getCurrentPCValue());
-
- intptr_t CurrentAddr = (intptr_t)Addr;
- intptr_t EmittedAddr;
- SmallVector<uint32_t, 8> Insts;
- if (Fn != (void*)(intptr_t)SparcCompilationCallback) {
- EmittedAddr = (intptr_t)Fn;
- intptr_t diff = (EmittedAddr - CurrentAddr) >> 2;
- if (isInt<22>(diff)) {
- Insts.push_back(BA_INST(diff));
- Insts.push_back(NOP_INST);
- }
- } else {
- EmittedAddr = (intptr_t)SparcCompilationCallback;
- }
-
- if (Insts.size() == 0)
- emitInstrForIndirectJump(EmittedAddr, 1, Insts);
-
-
- if (!sys::Memory::setRangeWritable(Addr, 4 * Insts.size()))
- llvm_unreachable("ERROR: Unable to mark stub writable.");
-
- for (unsigned i = 0, e = Insts.size(); i != e; ++i)
- JCE.emitWordBE(Insts[i]);
-
- sys::Memory::InvalidateInstructionCache(Addr, 4 * Insts.size());
- if (!sys::Memory::setRangeExecutable(Addr, 4 * Insts.size()))
- llvm_unreachable("ERROR: Unable to mark stub executable.");
-
- return Addr;
-}
-
-
-TargetJITInfo::LazyResolverFn
-SparcJITInfo::getLazyResolverFunction(JITCompilerFn F) {
- JITCompilerFunction = F;
- return SparcCompilationCallback;
-}
-
-/// relocate - Before the JIT can run a block of code that has been emitted,
-/// it must rewrite the code to contain the actual addresses of any
-/// referenced global symbols.
-void SparcJITInfo::relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char *GOTBase) {
- for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
- void *RelocPos = (char*) Function + MR->getMachineCodeOffset();
- intptr_t ResultPtr = (intptr_t) MR->getResultPointer();
-
- switch ((SP::RelocationType) MR->getRelocationType()) {
- case SP::reloc_sparc_hi:
- ResultPtr = (ResultPtr >> 10) & 0x3fffff;
- break;
-
- case SP::reloc_sparc_lo:
- ResultPtr = (ResultPtr & 0x3ff);
- break;
-
- case SP::reloc_sparc_pc30:
- ResultPtr = ((ResultPtr - (intptr_t)RelocPos) >> 2) & 0x3fffffff;
- break;
-
- case SP::reloc_sparc_pc22:
- ResultPtr = ((ResultPtr - (intptr_t)RelocPos) >> 2) & 0x3fffff;
- break;
-
- case SP::reloc_sparc_pc19:
- ResultPtr = ((ResultPtr - (intptr_t)RelocPos) >> 2) & 0x7ffff;
- break;
-
- case SP::reloc_sparc_h44:
- ResultPtr = (ResultPtr >> 22) & 0x3fffff;
- break;
-
- case SP::reloc_sparc_m44:
- ResultPtr = (ResultPtr >> 12) & 0x3ff;
- break;
-
- case SP::reloc_sparc_l44:
- ResultPtr = (ResultPtr & 0xfff);
- break;
-
- case SP::reloc_sparc_hh:
- ResultPtr = (((int64_t)ResultPtr) >> 42) & 0x3fffff;
- break;
-
- case SP::reloc_sparc_hm:
- ResultPtr = (((int64_t)ResultPtr) >> 32) & 0x3ff;
- break;
-
- }
- *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
- }
-}
diff --git a/lib/Target/Sparc/SparcJITInfo.h b/lib/Target/Sparc/SparcJITInfo.h
deleted file mode 100644
index ff1b43a7f6aa..000000000000
--- a/lib/Target/Sparc/SparcJITInfo.h
+++ /dev/null
@@ -1,67 +0,0 @@
-//==- SparcJITInfo.h - Sparc Implementation of the JIT Interface -*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the SparcJITInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPARCJITINFO_H
-#define SPARCJITINFO_H
-
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetJITInfo.h"
-
-namespace llvm {
-class SparcTargetMachine;
-
-class SparcJITInfo : public TargetJITInfo {
-
- bool IsPIC;
-
- public:
- explicit SparcJITInfo()
- : IsPIC(false) {}
-
- /// replaceMachineCodeForFunction - Make it so that calling the function
- /// whose machine code is at OLD turns into a call to NEW, perhaps by
- /// overwriting OLD with a branch to NEW. This is used for self-modifying
- /// code.
- ///
- void replaceMachineCodeForFunction(void *Old, void *New) override;
-
- // getStubLayout - Returns the size and alignment of the largest call stub
- // on Sparc.
- StubLayout getStubLayout() override;
-
-
- /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
- /// small native function that simply calls the function at the specified
- /// address.
- void *emitFunctionStub(const Function *F, void *Fn,
- JITCodeEmitter &JCE) override;
-
- /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
- LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
-
- /// relocate - Before the JIT can run a block of code that has been emitted,
- /// it must rewrite the code to contain the actual addresses of any
- /// referenced global symbols.
- void relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char *GOTBase) override;
-
- /// Initialize - Initialize internal stage for the function being JITted.
- void Initialize(const MachineFunction &MF, bool isPIC) {
- IsPIC = isPIC;
- }
-
-};
-}
-
-#endif
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h
index 3783c16d9922..104744279d9d 100644
--- a/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -10,8 +10,8 @@
// This file declares Sparc specific per-machine-function information.
//
//===----------------------------------------------------------------------===//
-#ifndef SPARCMACHINEFUNCTIONINFO_H
-#define SPARCMACHINEFUNCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_SPARC_SPARCMACHINEFUNCTIONINFO_H
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index dc1ec7c9d6b4..3cca98f0ca13 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -108,7 +108,7 @@ static void replaceFI(MachineFunction &MF,
return;
}
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
// FIXME: it would be better to scavenge a register here instead of
// reserving G1 all of the time.
@@ -174,7 +174,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (!Subtarget.isV9() || !Subtarget.hasHardQuad()) {
if (MI.getOpcode() == SP::STQFri) {
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
unsigned SrcReg = MI.getOperand(2).getReg();
unsigned SrcEvenReg = getSubReg(SrcReg, SP::sub_even64);
unsigned SrcOddReg = getSubReg(SrcReg, SP::sub_odd64);
@@ -186,7 +186,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(2).setReg(SrcOddReg);
Offset += 8;
} else if (MI.getOpcode() == SP::LDQFri) {
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
unsigned DestReg = MI.getOperand(0).getReg();
unsigned DestEvenReg = getSubReg(DestReg, SP::sub_even64);
unsigned DestOddReg = getSubReg(DestReg, SP::sub_odd64);
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 77f879a8beca..63567b08bc16 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARCREGISTERINFO_H
-#define SPARCREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCREGISTERINFO_H
+#define LLVM_LIB_TARGET_SPARC_SPARCREGISTERINFO_H
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/Sparc/SparcRelocations.h b/lib/Target/Sparc/SparcRelocations.h
deleted file mode 100644
index c1ff78dab0ca..000000000000
--- a/lib/Target/Sparc/SparcRelocations.h
+++ /dev/null
@@ -1,56 +0,0 @@
-//===-- SparcRelocations.h - Sparc Code Relocations -------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Sparc target-specific relocation types
-// (for relocation-model=static).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPARC_RELOCATIONS_H
-#define SPARC_RELOCATIONS_H
-
-#include "llvm/CodeGen/MachineRelocation.h"
-
-namespace llvm {
- namespace SP {
- enum RelocationType {
- // reloc_sparc_hi - upper 22 bits
- reloc_sparc_hi = 1,
-
- // reloc_sparc_lo - lower 10 bits
- reloc_sparc_lo = 2,
-
- // reloc_sparc_pc30 - pc rel. 30 bits for call
- reloc_sparc_pc30 = 3,
-
- // reloc_sparc_pc22 - pc rel. 22 bits for branch
- reloc_sparc_pc22 = 4,
-
- // reloc_sparc_pc22 - pc rel. 19 bits for branch with icc/xcc
- reloc_sparc_pc19 = 5,
-
- // reloc_sparc_h44 - 43-22 bits
- reloc_sparc_h44 = 6,
-
- // reloc_sparc_m44 - 21-12 bits
- reloc_sparc_m44 = 7,
-
- // reloc_sparc_l44 - lower 12 bits
- reloc_sparc_l44 = 8,
-
- // reloc_sparc_hh - 63-42 bits
- reloc_sparc_hh = 9,
-
- // reloc_sparc_hm - 41-32 bits
- reloc_sparc_hm = 10
- };
- }
-}
-
-#endif
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h
index 2346f4109dcb..a3a21d603b81 100644
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.h
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARCSELECTIONDAGINFO_H
-#define SPARCSELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_SPARC_SPARCSELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index a3357786cded..d503b2b91b45 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -11,13 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARC_SUBTARGET_H
-#define SPARC_SUBTARGET_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCSUBTARGET_H
+#define LLVM_LIB_TARGET_SPARC_SPARCSUBTARGET_H
#include "SparcFrameLowering.h"
#include "SparcInstrInfo.h"
#include "SparcISelLowering.h"
-#include "SparcJITInfo.h"
#include "SparcSelectionDAGInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -43,21 +42,25 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
SparcTargetLowering TLInfo;
SparcSelectionDAGInfo TSInfo;
SparcFrameLowering FrameLowering;
- SparcJITInfo JITInfo;
public:
SparcSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, TargetMachine &TM, bool is64bit);
- const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
- const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; }
- const SparcRegisterInfo *getRegisterInfo() const {
+ const SparcInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const TargetFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const SparcRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
- const SparcTargetLowering *getTargetLowering() const { return &TLInfo; }
- const SparcSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
- SparcJITInfo *getJITInfo() { return &JITInfo; }
- const DataLayout *getDataLayout() const { return &DL; }
+ const SparcTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const SparcSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const DataLayout *getDataLayout() const override { return &DL; }
bool isV9() const { return IsV9; }
bool isVIS() const { return IsVIS; }
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 0130face3ff6..6dccddce626e 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "SparcTargetMachine.h"
+#include "SparcTargetObjectFile.h"
#include "Sparc.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/PassManager.h"
@@ -32,10 +33,13 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL,
bool is64bit)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ TLOF(make_unique<SparcELFTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this, is64bit) {
initAsmInfo();
}
+SparcTargetMachine::~SparcTargetMachine() {}
+
namespace {
/// Sparc Code Generator Pass Configuration Options.
class SparcPassConfig : public TargetPassConfig {
@@ -47,8 +51,9 @@ public:
return getTM<SparcTargetMachine>();
}
+ void addIRPasses() override;
bool addInstSelector() override;
- bool addPreEmitPass() override;
+ void addPreEmitPass() override;
};
} // namespace
@@ -56,24 +61,19 @@ TargetPassConfig *SparcTargetMachine::createPassConfig(PassManagerBase &PM) {
return new SparcPassConfig(this, PM);
}
-bool SparcPassConfig::addInstSelector() {
- addPass(createSparcISelDag(getSparcTargetMachine()));
- return false;
+void SparcPassConfig::addIRPasses() {
+ addPass(createAtomicExpandPass(&getSparcTargetMachine()));
+
+ TargetPassConfig::addIRPasses();
}
-bool SparcTargetMachine::addCodeEmitter(PassManagerBase &PM,
- JITCodeEmitter &JCE) {
- // Machine code emitter pass for Sparc.
- PM.add(createSparcJITCodeEmitterPass(*this, JCE));
+bool SparcPassConfig::addInstSelector() {
+ addPass(createSparcISelDag(getSparcTargetMachine()));
return false;
}
-/// addPreEmitPass - This pass may be implemented by targets that want to run
-/// passes immediately before machine code is emitted. This should return
-/// true if -print-machineinstrs should print out the code after the passes.
-bool SparcPassConfig::addPreEmitPass(){
+void SparcPassConfig::addPreEmitPass(){
addPass(createSparcDelaySlotFillerPass(getSparcTargetMachine()));
- return true;
}
void SparcV8TargetMachine::anchor() { }
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 03b513746dfe..096e7c8485aa 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARCTARGETMACHINE_H
-#define SPARCTARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCTARGETMACHINE_H
+#define LLVM_LIB_TARGET_SPARC_SPARCTARGETMACHINE_H
#include "SparcInstrInfo.h"
#include "SparcSubtarget.h"
@@ -21,37 +21,22 @@
namespace llvm {
class SparcTargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
SparcSubtarget Subtarget;
public:
SparcTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64bit);
+ ~SparcTargetMachine() override;
- const SparcInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const TargetFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
const SparcSubtarget *getSubtargetImpl() const override { return &Subtarget; }
- const SparcRegisterInfo *getRegisterInfo() const override {
- return getSubtargetImpl()->getRegisterInfo();
- }
- const SparcTargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const SparcSelectionDAGInfo *getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
- SparcJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) override;
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
};
/// SparcV8TargetMachine - Sparc 32-bit target machine
diff --git a/lib/Target/Sparc/SparcTargetObjectFile.h b/lib/Target/Sparc/SparcTargetObjectFile.h
index c60675b49459..76c8cca04680 100644
--- a/lib/Target/Sparc/SparcTargetObjectFile.h
+++ b/lib/Target/Sparc/SparcTargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_SPARC_TARGETOBJECTFILE_H
-#define LLVM_TARGET_SPARC_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_SPARC_SPARCTARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
diff --git a/lib/Target/Sparc/SparcTargetStreamer.h b/lib/Target/Sparc/SparcTargetStreamer.h
index 3767d8e27b57..3b503503abce 100644
--- a/lib/Target/Sparc/SparcTargetStreamer.h
+++ b/lib/Target/Sparc/SparcTargetStreamer.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARCTARGETSTREAMER_H
-#define SPARCTARGETSTREAMER_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_SPARC_SPARCTARGETSTREAMER_H
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCStreamer.h"
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 758be41ce263..cb528db9db51 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -345,7 +345,7 @@ public:
SMLoc NameLoc, OperandVector &Operands) override;
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
// Used by the TableGen code to parse particular operand types.
@@ -677,7 +677,7 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands,
bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
unsigned MatchResult;
@@ -685,7 +685,6 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
MatchingInlineAsm);
switch (MatchResult) {
- default: break;
case Match_Success:
Inst.setLoc(IDLoc);
Out.EmitInstruction(Inst, STI);
@@ -696,7 +695,7 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// Special case the error message for the very common case where only
// a single subtarget feature is missing
std::string Msg = "instruction requires:";
- unsigned Mask = 1;
+ uint64_t Mask = 1;
for (unsigned I = 0; I < sizeof(ErrorInfo) * 8 - 1; ++I) {
if (ErrorInfo & Mask) {
Msg += " ";
@@ -709,7 +708,7 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0U) {
+ if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index 4da2d0f2dd59..41a614d9d151 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -5,7 +5,7 @@ tablegen(LLVM SystemZGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM SystemZGenCallingConv.inc -gen-callingconv)
tablegen(LLVM SystemZGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM SystemZGenDisassemblerTables.inc -gen-disassembler)
-tablegen(LLVM SystemZGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM SystemZGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM SystemZGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM SystemZGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM SystemZGenSubtargetInfo.inc -gen-subtarget)
diff --git a/lib/Target/SystemZ/Disassembler/LLVMBuild.txt b/lib/Target/SystemZ/Disassembler/LLVMBuild.txt
index c3081f5447bc..fd7826999bc5 100644
--- a/lib/Target/SystemZ/Disassembler/LLVMBuild.txt
+++ b/lib/Target/SystemZ/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = SystemZDisassembler
parent = SystemZ
-required_libraries = MC Support SystemZDesc SystemZInfo
+required_libraries = MC MCDisassembler Support SystemZDesc SystemZInfo
add_to_library_groups = SystemZ
diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 2350776e10fe..23173bfbd91b 100644
--- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -12,7 +12,6 @@
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -28,11 +27,10 @@ public:
: MCDisassembler(STI, Ctx) {}
virtual ~SystemZDisassembler() {}
- // Override MCDisassembler.
- DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
- const MemoryObject &region, uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const override;
+ DecodeStatus getInstruction(MCInst &instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
};
} // end anonymous namespace
@@ -288,14 +286,13 @@ static DecodeStatus decodeBDLAddr64Disp12Len8Operand(MCInst &Inst,
#include "SystemZGenDisassemblerTables.inc"
DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
- const MemoryObject &Region,
+ ArrayRef<uint8_t> Bytes,
uint64_t Address,
- raw_ostream &os,
- raw_ostream &cs) const {
+ raw_ostream &OS,
+ raw_ostream &CS) const {
// Get the first two bytes of the instruction.
- uint8_t Bytes[6];
Size = 0;
- if (Region.readBytes(Address, 2, Bytes) == -1)
+ if (Bytes.size() < 2)
return MCDisassembler::Fail;
// The top 2 bits of the first byte specify the size.
@@ -312,7 +309,7 @@ DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
// Read any remaining bytes.
- if (Size > 2 && Region.readBytes(Address + 2, Size - 2, Bytes + 2) == -1)
+ if (Bytes.size() < Size)
return MCDisassembler::Fail;
// Construct the instruction.
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
index dce482b216fb..753903cf06d5 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SYSTEMZINSTPRINTER_H
-#define LLVM_SYSTEMZINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/Support/Compiler.h"
diff --git a/lib/Target/SystemZ/LLVMBuild.txt b/lib/Target/SystemZ/LLVMBuild.txt
index 7781318cc164..542aaee77358 100644
--- a/lib/Target/SystemZ/LLVMBuild.txt
+++ b/lib/Target/SystemZ/LLVMBuild.txt
@@ -31,5 +31,5 @@ has_jit = 1
type = Library
name = SystemZCodeGen
parent = SystemZ
-required_libraries = AsmPrinter CodeGen Core MC Scalar SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target
+required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target
add_to_library_groups = SystemZ
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index c46a36bdd23d..0161d6263e7d 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -23,12 +23,7 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(StringRef TT) {
Data64bitsDirective = "\t.quad\t";
UsesELFSectionDirectiveForBSS = true;
SupportsDebugInformation = true;
- HasLEB128 = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
-}
-const MCSection *
-SystemZMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const {
- return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
- 0, SectionKind::getMetadata());
+ UseIntegratedAssembler = true;
}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
index 1de97afbfe0a..19b5b4b09724 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SystemZTARGETASMINFO_H
-#define SystemZTARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCASMINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCASMINFO_H
#include "llvm/MC/MCAsmInfoELF.h"
#include "llvm/Support/Compiler.h"
@@ -19,9 +19,6 @@ class StringRef;
class SystemZMCAsmInfo : public MCAsmInfoELF {
public:
explicit SystemZMCAsmInfo(StringRef TT);
-
- // Override MCAsmInfo;
- const MCSection *getNonexecutableStackSection(MCContext &Ctx) const override;
};
} // end namespace llvm
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
index a3aab712c08c..52a8d1d6600b 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SYSTEMZMCFIXUPS_H
-#define LLVM_SYSTEMZMCFIXUPS_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCFIXUPS_H
+#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCFIXUPS_H
#include "llvm/MC/MCFixup.h"
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index cc94869eb5fc..6e82b6d98ae4 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -181,15 +181,12 @@ static MCInstPrinter *createSystemZMCInstPrinter(const Target &T,
return new SystemZInstPrinter(MAI, MII, MRI);
}
-static MCStreamer *createSystemZMCObjectStreamer(const Target &T, StringRef TT,
- MCContext &Ctx,
- MCAsmBackend &MAB,
- raw_ostream &OS,
- MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI,
- bool RelaxAll,
- bool NoExecStack) {
- return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+static MCStreamer *
+createSystemZMCObjectStreamer(const Target &T, StringRef TT, MCContext &Ctx,
+ MCAsmBackend &MAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ const MCSubtargetInfo &STI, bool RelaxAll) {
+ return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
}
extern "C" void LLVMInitializeSystemZTargetMC() {
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index cbaf9a83b80f..5eb6526a5c00 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZMCTARGETDESC_H
-#define SYSTEMZMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCTARGETDESC_H
+#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCTARGETDESC_H
#include "llvm/Support/DataTypes.h"
diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile
index 445725bd1e12..732c31725538 100644
--- a/lib/Target/SystemZ/Makefile
+++ b/lib/Target/SystemZ/Makefile
@@ -15,7 +15,6 @@ TARGET = SystemZ
BUILT_SOURCES = SystemZGenRegisterInfo.inc \
SystemZGenAsmWriter.inc \
SystemZGenAsmMatcher.inc \
- SystemZGenCodeEmitter.inc \
SystemZGenDisassemblerTables.inc \
SystemZGenInstrInfo.inc \
SystemZGenDAGISel.inc \
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index 15792494930a..c8b95b2b2ca7 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZ_H
-#define SYSTEMZ_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZ_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZ_H
#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "llvm/Support/CodeGen.h"
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 8b18bc16e1c6..f4f3ec7a9733 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -185,7 +185,8 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
MCSymbolRefExpr::Create(getSymbol(ZCPV->getGlobalValue()),
getModifierVariantKind(ZCPV->getModifier()),
OutContext);
- uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType());
+ uint64_t Size =
+ TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(ZCPV->getType());
OutStreamer.EmitValue(Expr, Size);
}
@@ -229,7 +230,7 @@ void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
if (!Stubs.empty()) {
OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
OutStreamer.EmitLabel(Stubs[i].first);
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.h b/lib/Target/SystemZ/SystemZAsmPrinter.h
index 20093bc614d8..64672792a876 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZASMPRINTER_H
-#define SYSTEMZASMPRINTER_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZASMPRINTER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZASMPRINTER_H
#include "SystemZTargetMachine.h"
#include "llvm/CodeGen/AsmPrinter.h"
diff --git a/lib/Target/SystemZ/SystemZCallingConv.h b/lib/Target/SystemZ/SystemZCallingConv.h
index 4b1569d2bd72..71605ac11268 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.h
+++ b/lib/Target/SystemZ/SystemZCallingConv.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZCALLINGCONV_H
-#define SYSTEMZCALLINGCONV_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H
namespace llvm {
namespace SystemZ {
diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.h b/lib/Target/SystemZ/SystemZConstantPoolValue.h
index 699718f5c80e..0bd8c205ea4d 100644
--- a/lib/Target/SystemZ/SystemZConstantPoolValue.h
+++ b/lib/Target/SystemZ/SystemZConstantPoolValue.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZCONSTANTPOOLVALUE_H
-#define SYSTEMZCONSTANTPOOLVALUE_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCONSTANTPOOLVALUE_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCONSTANTPOOLVALUE_H
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp
index dc210d608631..ce99ee5bc412 100644
--- a/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -47,7 +47,7 @@ struct Reference {
return *this;
}
- operator bool() const { return Def || Use; }
+ LLVM_EXPLICIT operator bool() const { return Def || Use; }
// True if the register is defined or used in some form, either directly or
// via a sub- or super-register.
@@ -458,7 +458,7 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) {
}
bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) {
- TII = static_cast<const SystemZInstrInfo *>(F.getTarget().getInstrInfo());
+ TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
TRI = &TII->getRegisterInfo();
bool Changed = false;
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 055dbe914995..eff4ae3baf3f 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -13,6 +13,7 @@
#include "SystemZInstrInfo.h"
#include "SystemZMachineFunctionInfo.h"
#include "SystemZRegisterInfo.h"
+#include "SystemZSubtarget.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
@@ -65,7 +66,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
MachineFrameInfo *MFFrame = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
bool HasFP = hasFP(MF);
SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
bool IsVarArg = MF.getFunction()->isVarArg();
@@ -108,7 +109,8 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// and end registers.
static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB,
unsigned GPR64, bool IsImplicit) {
- const TargetRegisterInfo *RI = MBB.getParent()->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *RI =
+ MBB.getParent()->getSubtarget().getRegisterInfo();
unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_l32);
bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32);
if (!IsLive || !IsImplicit) {
@@ -127,7 +129,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
return false;
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
bool IsVarArg = MF.getFunction()->isVarArg();
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -216,7 +218,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return false;
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
bool HasFP = hasFP(MF);
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -292,7 +294,7 @@ static void emitIncrement(MachineBasicBlock &MBB,
else {
Opcode = SystemZ::AGFI;
// Make sure we maintain 8-byte stack alignment.
- int64_t MinVal = -int64_t(1) << 31;
+ int64_t MinVal = -uint64_t(1) << 31;
int64_t MaxVal = (int64_t(1) << 31) - 8;
if (ThisVal < MinVal)
ThisVal = MinVal;
@@ -311,7 +313,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineFrameInfo *MFFrame = MF.getFrameInfo();
auto *ZII =
- static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineModuleInfo &MMI = MF.getMMI();
@@ -408,7 +410,7 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
auto *ZII =
- static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
// Skip the return instruction.
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h
index 4d5fe6dce62d..cefa56fd74e5 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZFRAMELOWERING_H
-#define SYSTEMZFRAMELOWERING_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H
#include "llvm/ADT/IndexedMap.h"
#include "llvm/Target/TargetFrameLowering.h"
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 24f7584ae9c6..5f84624c38ea 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -140,7 +140,7 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
}
const SystemZInstrInfo *getInstrInfo() const {
- return getTargetMachine().getInstrInfo();
+ return getTargetMachine().getSubtargetImpl()->getInstrInfo();
}
// Try to fold more of the base or index of AM into AM, where IsBase
@@ -315,9 +315,9 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
public:
SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(TM, OptLevel),
- Lowering(*TM.getTargetLowering()),
- Subtarget(*TM.getSubtargetImpl()) { }
+ : SelectionDAGISel(TM, OptLevel),
+ Lowering(*TM.getSubtargetImpl()->getTargetLowering()),
+ Subtarget(*TM.getSubtargetImpl()) {}
// Override MachineFunctionPass.
const char *getPassName() const override {
@@ -1000,8 +1000,8 @@ bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store,
if (V1 == V2 && End1 == End2)
return false;
- return !AA->alias(AliasAnalysis::Location(V1, End1, Load->getTBAAInfo()),
- AliasAnalysis::Location(V2, End2, Store->getTBAAInfo()));
+ return !AA->alias(AliasAnalysis::Location(V1, End1, Load->getAAInfo()),
+ AliasAnalysis::Location(V2, End2, Store->getAAInfo()));
}
bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const {
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 00c65f5bba6b..f7ac1ca29910 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -81,7 +81,7 @@ static MachineOperand earlyUseOperand(MachineOperand Op) {
}
SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
- : TargetLowering(tm, new TargetLoweringObjectFileELF()),
+ : TargetLowering(tm),
Subtarget(tm.getSubtarget<SystemZSubtarget>()) {
MVT PtrVT = getPointerTy();
@@ -218,10 +218,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
// We have native instructions for i8, i16 and i32 extensions, but not i1.
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
+ }
// Handle the various types of symbolic address.
setOperationAction(ISD::ConstantPool, PtrVT, Custom);
@@ -275,7 +277,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
// Needed so that we don't try to implement f128 constant loads using
// a load-and-extend of a f80 constant (in cases where the constant
// would fit in an f80).
- setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand);
+ for (MVT VT : MVT::fp_valuetypes())
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
// Floating-point truncation and stores need to be done separately.
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -339,9 +342,10 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
return Imm.isZero() || Imm.isNegZero();
}
-bool SystemZTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
- unsigned,
- bool *Fast) const {
+bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned,
+ unsigned,
+ bool *Fast) const {
// Unaligned accesses should never be slower than the expanded version.
// We check specifically for aligned accesses in the few cases where
// they are required.
@@ -674,12 +678,11 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
SystemZMachineFunctionInfo *FuncInfo =
MF.getInfo<SystemZMachineFunctionInfo>();
auto *TFL = static_cast<const SystemZFrameLowering *>(
- DAG.getTarget().getFrameLowering());
+ DAG.getSubtarget().getFrameLowering());
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), ArgLocs,
- *DAG.getContext());
+ CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
unsigned NumFixedGPRs = 0;
@@ -782,7 +785,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
return Chain;
}
-static bool canUseSiblingCall(CCState ArgCCInfo,
+static bool canUseSiblingCall(const CCState &ArgCCInfo,
SmallVectorImpl<CCValAssign> &ArgLocs) {
// Punt if there are any indirect or stack arguments, or if the call
// needs the call-saved argument register R6.
@@ -817,8 +820,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Analyze the operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState ArgCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), ArgLocs,
- *DAG.getContext());
+ CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
// We don't support GuaranteedTailCallOpt, only automatically-detected
@@ -915,7 +917,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
RegsToPass[I].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -940,8 +943,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RetLocs;
- CCState RetCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), RetLocs,
- *DAG.getContext());
+ CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
// Copy all of the result registers out of their specified physreg.
@@ -972,8 +974,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain,
// Assign locations to each returned value.
SmallVector<CCValAssign, 16> RetLocs;
- CCState RetCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), RetLocs,
- *DAG.getContext());
+ CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
// Quick exit for void returns
@@ -1191,7 +1192,7 @@ static void adjustSubwordCmp(SelectionDAG &DAG, Comparison &C) {
Load->getChain(), Load->getBasePtr(),
Load->getPointerInfo(), Load->getMemoryVT(),
Load->isVolatile(), Load->isNonTemporal(),
- Load->getAlignment());
+ Load->isInvariant(), Load->getAlignment());
// Make sure that the second operand is an i32 with the right value.
if (C.Op1.getValueType() != MVT::i32 ||
@@ -2614,7 +2615,7 @@ MachineBasicBlock *
SystemZTargetLowering::emitSelect(MachineInstr *MI,
MachineBasicBlock *MBB) const {
const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(
- MBB->getParent()->getTarget().getInstrInfo());
+ MBB->getParent()->getSubtarget().getInstrInfo());
unsigned DestReg = MI->getOperand(0).getReg();
unsigned TrueReg = MI->getOperand(1).getReg();
@@ -2663,7 +2664,7 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
unsigned StoreOpcode, unsigned STOCOpcode,
bool Invert) const {
const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(
- MBB->getParent()->getTarget().getInstrInfo());
+ MBB->getParent()->getSubtarget().getInstrInfo());
unsigned SrcReg = MI->getOperand(0).getReg();
MachineOperand Base = MI->getOperand(1);
@@ -2732,7 +2733,7 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
bool Invert) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
bool IsSubWord = (BitSize < 32);
@@ -2802,14 +2803,10 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
unsigned Tmp = MRI.createVirtualRegister(RC);
BuildMI(MBB, DL, TII->get(BinOpcode), Tmp)
.addReg(RotatedOldVal).addOperand(Src2);
- if (BitSize < 32)
+ if (BitSize <= 32)
// XILF with the upper BitSize bits set.
BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
- .addReg(Tmp).addImm(uint32_t(~0 << (32 - BitSize)));
- else if (BitSize == 32)
- // XILF with every bit set.
- BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
- .addReg(Tmp).addImm(~uint32_t(0));
+ .addReg(Tmp).addImm(-1U << (32 - BitSize));
else {
// Use LCGR and add -1 to the result, which is more compact than
// an XILF, XILH pair.
@@ -2856,7 +2853,7 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
unsigned BitSize) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
bool IsSubWord = (BitSize < 32);
@@ -2968,7 +2965,7 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
MachineBasicBlock *MBB) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
// Extract the operands. Base can be a register or a frame index.
@@ -3085,7 +3082,7 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI,
bool ClearEven, unsigned SubReg) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI->getDebugLoc();
@@ -3117,7 +3114,7 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
unsigned Opcode) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI->getDebugLoc();
@@ -3287,7 +3284,7 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
unsigned Opcode) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI->getDebugLoc();
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index e21b0501933f..887c236f1e78 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_SystemZ_ISELLOWERING_H
-#define LLVM_TARGET_SystemZ_ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H
#include "SystemZ.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -208,8 +208,9 @@ public:
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
- bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS,
- bool *Fast) const override;
+ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
+ unsigned Align,
+ bool *Fast) const override;
bool isTruncateFree(Type *, Type *) const override;
bool isTruncateFree(EVT, EVT) const override;
const char *getTargetNodeName(unsigned Opcode) const override;
@@ -321,4 +322,4 @@ private:
};
} // end namespace llvm
-#endif // LLVM_TARGET_SystemZ_ISELLOWERING_H
+#endif
diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h
index 84196e94a5a6..464f79a3bac9 100644
--- a/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ b/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZINSTRBUILDER_H
-#define SYSTEMZINSTRBUILDER_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRBUILDER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRBUILDER_H
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index e8841e131324..4a5582fbf4e2 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -26,14 +26,14 @@ defm CondStoreF64 : CondStores<FP64, nonvolatile_store,
//===----------------------------------------------------------------------===//
// Load zero.
-let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def LZER : InherentRRE<"lzer", 0xB374, FP32, (fpimm0)>;
def LZDR : InherentRRE<"lzdr", 0xB375, FP64, (fpimm0)>;
def LZXR : InherentRRE<"lzxr", 0xB376, FP128, (fpimm0)>;
}
// Moves between two floating-point registers.
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def LER : UnaryRR <"le", 0x38, null_frag, FP32, FP32>;
def LDR : UnaryRR <"ld", 0x28, null_frag, FP64, FP64>;
def LXR : UnaryRRE<"lx", 0xB365, null_frag, FP128, FP128>;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index f58ab474fbbc..8ff9553ca081 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -633,7 +633,7 @@ struct LogicOp {
LogicOp(unsigned regSize, unsigned immLSB, unsigned immSize)
: RegSize(regSize), ImmLSB(immLSB), ImmSize(immSize) {}
- operator bool() const { return RegSize; }
+ LLVM_EXPLICIT operator bool() const { return RegSize; }
unsigned RegSize, ImmLSB, ImmSize;
};
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 83009cb8d426..d2e3f541f80e 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_SYSTEMZINSTRINFO_H
-#define LLVM_TARGET_SYSTEMZINSTRINFO_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRINFO_H
#include "SystemZ.h"
#include "SystemZRegisterInfo.h"
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index f4951ad8e0ac..902d74de506b 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -16,7 +16,7 @@ def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt),
def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
[(callseq_end timm:$amt1, timm:$amt2)]>;
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
// Takes as input the value of the stack pointer after a dynamic allocation
// has been made. Sets the output to the address of the dynamically-
// allocated area itself, skipping the outgoing arguments.
@@ -263,7 +263,7 @@ def BASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2),
//===----------------------------------------------------------------------===//
// Register moves.
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
// Expands to LR, RISBHG or RISBLG, depending on the choice of registers.
def LRMux : UnaryRRPseudo<"l", null_frag, GRX32, GRX32>,
Requires<[FeatureHighWord]>;
@@ -286,7 +286,7 @@ let Uses = [CC] in {
}
// Immediate moves.
-let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
+let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1,
isReMaterializable = 1 in {
// 16-bit sign-extended immediates. LHIMux expands to LHI or IIHF,
// deopending on the choice of register.
@@ -402,13 +402,13 @@ let mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L] in
//===----------------------------------------------------------------------===//
// 32-bit extensions from registers.
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def LBR : UnaryRRE<"lb", 0xB926, sext8, GR32, GR32>;
def LHR : UnaryRRE<"lh", 0xB927, sext16, GR32, GR32>;
}
// 64-bit extensions from registers.
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def LGBR : UnaryRRE<"lgb", 0xB906, sext8, GR64, GR64>;
def LGHR : UnaryRRE<"lgh", 0xB907, sext16, GR64, GR64>;
def LGFR : UnaryRRE<"lgf", 0xB914, sext32, GR64, GR32>;
@@ -452,7 +452,7 @@ let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in
//===----------------------------------------------------------------------===//
// 32-bit extensions from registers.
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
// Expands to LLCR or RISB[LH]G, depending on the choice of registers.
def LLCRMux : UnaryRRPseudo<"llc", zext8, GRX32, GRX32>,
Requires<[FeatureHighWord]>;
@@ -464,7 +464,7 @@ let neverHasSideEffects = 1 in {
}
// 64-bit extensions from registers.
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def LLGCR : UnaryRRE<"llgc", 0xB984, zext8, GR64, GR64>;
def LLGHR : UnaryRRE<"llgh", 0xB985, zext16, GR64, GR64>;
def LLGFR : UnaryRRE<"llgf", 0xB916, zext32, GR64, GR32>;
@@ -546,7 +546,7 @@ def STMG : StoreMultipleRSY<"stmg", 0xEB24, GR64>;
//===----------------------------------------------------------------------===//
// Byte-swapping register moves.
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def LRVR : UnaryRRE<"lrv", 0xB91F, bswap, GR32, GR32>;
def LRVGR : UnaryRRE<"lrvg", 0xB90F, bswap, GR64, GR64>;
}
@@ -566,7 +566,7 @@ def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap, nonvolatile_store>,
//===----------------------------------------------------------------------===//
// Load BDX-style addresses.
-let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isReMaterializable = 1,
+let hasSideEffects = 0, isAsCheapAsAMove = 1, isReMaterializable = 1,
DispKey = "la" in {
let DispSize = "12" in
def LA : InstRX<0x41, (outs GR64:$R1), (ins laaddr12pair:$XBD2),
@@ -580,7 +580,7 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isReMaterializable = 1,
// Load a PC-relative address. There's no version of this instruction
// with a 16-bit offset, so there's no relaxation.
-let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
+let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1,
isReMaterializable = 1 in {
def LARL : InstRIL<0xC00, (outs GR64:$R1), (ins pcrel32:$I2),
"larl\t$R1, $I2",
@@ -1012,13 +1012,13 @@ def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>;
//===----------------------------------------------------------------------===//
// Shift left.
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>;
def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>;
}
// Logical shift right.
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>;
def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>;
}
@@ -1030,7 +1030,7 @@ let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
}
// Rotate left.
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>;
def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>;
}
diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp
index 808133432486..8dab44e7f8af 100644
--- a/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -448,7 +448,7 @@ void SystemZLongBranch::relaxBranches() {
}
bool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) {
- TII = static_cast<const SystemZInstrInfo *>(F.getTarget().getInstrInfo());
+ TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
MF = &F;
uint64_t Size = initMBBInfo();
if (Size <= MaxForwardRange || !mustRelaxABranch())
diff --git a/lib/Target/SystemZ/SystemZMCInstLower.h b/lib/Target/SystemZ/SystemZMCInstLower.h
index 90447ffe90f0..7173cfa42959 100644
--- a/lib/Target/SystemZ/SystemZMCInstLower.h
+++ b/lib/Target/SystemZ/SystemZMCInstLower.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SYSTEMZMCINSTLOWER_H
-#define LLVM_SYSTEMZMCINSTLOWER_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMCINSTLOWER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMCINSTLOWER_H
#include "llvm/MC/MCExpr.h"
#include "llvm/Support/Compiler.h"
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index 50865f135beb..92c2ce7324a0 100644
--- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZMACHINEFUNCTIONINFO_H
-#define SYSTEMZMACHINEFUNCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINEFUNCTIONINFO_H
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index f03bcc412d51..64f5eebf37c1 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -35,7 +35,7 @@ SystemZRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
BitVector
SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (TFI->hasFP(MF)) {
// R11D is the frame pointer. Reserve all aliases.
@@ -62,8 +62,8 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
MachineBasicBlock &MBB = *MI->getParent();
MachineFunction &MF = *MBB.getParent();
auto *TII =
- static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
DebugLoc DL = MI->getDebugLoc();
// Decompose the frame index into a base and offset.
@@ -134,6 +134,6 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
unsigned
SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
return TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D;
}
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 9bffa467a15d..212fe91f38ab 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SystemZREGISTERINFO_H
-#define SystemZREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZREGISTERINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZREGISTERINFO_H
#include "SystemZ.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index e9de146af1d6..a257d6b55494 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZSELECTIONDAGINFO_H
-#define SYSTEMZSELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
index aad899c41c0f..ec7a8c40d18a 100644
--- a/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -150,7 +150,7 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
}
bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) {
- TII = static_cast<const SystemZInstrInfo *>(F.getTarget().getInstrInfo());
+ TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
bool Changed = false;
for (auto &MBB : F)
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index 4e8c710bdefd..f8815524e0f3 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZSUBTARGET_H
-#define SYSTEMZSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSUBTARGET_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSUBTARGET_H
#include "SystemZFrameLowering.h"
#include "SystemZISelLowering.h"
@@ -55,14 +55,20 @@ public:
SystemZSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM);
- const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; }
- const SystemZInstrInfo *getInstrInfo() const { return &InstrInfo; }
- const DataLayout *getDataLayout() const { return &DL; }
- const SystemZRegisterInfo *getRegisterInfo() const {
+ const TargetFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const SystemZInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const SystemZRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
- const SystemZTargetLowering *getTargetLowering() const { return &TLInfo; }
- const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ const SystemZTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
// This is important for reducing register pressure in vector code.
bool useAA() const override { return true; }
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 0122e99f8a77..a210074484e7 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -11,6 +11,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
using namespace llvm;
@@ -25,10 +26,13 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ TLOF(make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
+SystemZTargetMachine::~SystemZTargetMachine() {}
+
namespace {
/// SystemZ Code Generator Pass Configuration Options.
class SystemZPassConfig : public TargetPassConfig {
@@ -42,14 +46,13 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
- bool addPreSched2() override;
- bool addPreEmitPass() override;
+ void addPreSched2() override;
+ void addPreEmitPass() override;
};
} // end anonymous namespace
void SystemZPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
- addPass(createPartiallyInlineLibCallsPass());
}
bool SystemZPassConfig::addInstSelector() {
@@ -57,14 +60,13 @@ bool SystemZPassConfig::addInstSelector() {
return false;
}
-bool SystemZPassConfig::addPreSched2() {
+void SystemZPassConfig::addPreSched2() {
if (getOptLevel() != CodeGenOpt::None &&
getSystemZTargetMachine().getSubtargetImpl()->hasLoadStoreOnCond())
addPass(&IfConverterID);
- return true;
}
-bool SystemZPassConfig::addPreEmitPass() {
+void SystemZPassConfig::addPreEmitPass() {
// We eliminate comparisons here rather than earlier because some
// transformations can change the set of available CC values and we
// generally want those transformations to have priority. This is
@@ -89,11 +91,10 @@ bool SystemZPassConfig::addPreEmitPass() {
// between the comparison and the branch, but it isn't clear whether
// preventing that would be a win or not.
if (getOptLevel() != CodeGenOpt::None)
- addPass(createSystemZElimComparePass(getSystemZTargetMachine()));
+ addPass(createSystemZElimComparePass(getSystemZTargetMachine()), false);
if (getOptLevel() != CodeGenOpt::None)
- addPass(createSystemZShortenInstPass(getSystemZTargetMachine()));
+ addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false);
addPass(createSystemZLongBranchPass(getSystemZTargetMachine()));
- return true;
}
TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index ded07e912443..9fae5e43e754 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZTARGETMACHINE_H
-#define SYSTEMZTARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H
#include "SystemZSubtarget.h"
#include "llvm/Target/TargetMachine.h"
@@ -23,6 +23,7 @@ namespace llvm {
class TargetFrameLowering;
class SystemZTargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
SystemZSubtarget Subtarget;
public:
@@ -30,32 +31,17 @@ public:
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
+ ~SystemZTargetMachine() override;
// Override TargetMachine.
- const TargetFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
- const SystemZInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
const SystemZSubtarget *getSubtargetImpl() const override {
return &Subtarget;
}
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
- const SystemZRegisterInfo *getRegisterInfo() const override {
- return getSubtargetImpl()->getRegisterInfo();
- }
- const SystemZTargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
-
// Override LLVMTargetMachine
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
};
} // end namespace llvm
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index d277f82eb869..4b51b3f7eea5 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -49,7 +49,7 @@ LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) {
void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) {
// The DataLayoutPass must now be in sync with the module. Unfortunatelly we
// cannot enforce that from the C api.
- unwrap(PM)->add(new DataLayoutPass(*unwrap(TD)));
+ unwrap(PM)->add(new DataLayoutPass());
}
void LLVMAddTargetLibraryInfo(LLVMTargetLibraryInfoRef TLI,
diff --git a/lib/Target/TargetJITInfo.cpp b/lib/Target/TargetJITInfo.cpp
deleted file mode 100644
index aafedf8749b1..000000000000
--- a/lib/Target/TargetJITInfo.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-//===- Target/TargetJITInfo.h - Target Information for JIT ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetJITInfo.h"
-
-using namespace llvm;
-
-void TargetJITInfo::anchor() { }
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index 616ff9067300..c0abdbd9279b 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -28,8 +28,12 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"_IO_putc",
"_ZdaPv",
"_ZdaPvRKSt9nothrow_t",
+ "_ZdaPvj",
+ "_ZdaPvm",
"_ZdlPv",
"_ZdlPvRKSt9nothrow_t",
+ "_ZdlPvj",
+ "_ZdlPvm",
"_Znaj",
"_ZnajRKSt9nothrow_t",
"_Znam",
@@ -47,6 +51,8 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"__isoc99_scanf",
"__isoc99_sscanf",
"__memcpy_chk",
+ "__memmove_chk",
+ "__memset_chk",
"__sincospi_stret",
"__sincospif_stret",
"__sinpi",
@@ -54,7 +60,11 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"__sqrt_finite",
"__sqrtf_finite",
"__sqrtl_finite",
+ "__stpcpy_chk",
+ "__stpncpy_chk",
+ "__strcpy_chk",
"__strdup",
+ "__strncpy_chk",
"__strndup",
"__strtok_r",
"abs",
@@ -348,7 +358,7 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
static bool hasSinCosPiStret(const Triple &T) {
// Only Darwin variants have _stret versions of combined trig functions.
- if (!T.isMacOSX() && T.getOS() != Triple::IOS)
+ if (!T.isOSDarwin())
return false;
// The ABI is rather complicated on x86, so don't do anything special there.
@@ -358,7 +368,7 @@ static bool hasSinCosPiStret(const Triple &T) {
if (T.isMacOSX() && T.isMacOSXVersionLT(10, 9))
return false;
- if (T.getOS() == Triple::IOS && T.isOSVersionLT(7, 0))
+ if (T.isiOS() && T.isOSVersionLT(7, 0))
return false;
return true;
@@ -379,9 +389,10 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T,
}
#endif // !NDEBUG
- // There are no library implementations of mempcy and memset for r600 and
+ // There are no library implementations of mempcy and memset for AMD gpus and
// these can be difficult to lower in the backend.
- if (T.getArch() == Triple::r600) {
+ if (T.getArch() == Triple::r600 ||
+ T.getArch() == Triple::amdgcn) {
TLI.setUnavailable(LibFunc::memcpy);
TLI.setUnavailable(LibFunc::memset);
TLI.setUnavailable(LibFunc::memset_pattern16);
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 2569e922641d..f34347382dfc 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -30,6 +30,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -42,7 +43,7 @@ using namespace llvm;
void TargetLoweringObjectFile::Initialize(MCContext &ctx,
const TargetMachine &TM) {
Ctx = &ctx;
- DL = TM.getDataLayout();
+ DL = TM.getSubtargetImpl()->getDataLayout();
InitMCObjectFileInfo(TM.getTargetTriple(),
TM.getRelocationModel(), TM.getCodeModel(), *Ctx);
}
@@ -199,7 +200,8 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
// Otherwise, just drop it into a mergable constant section. If we have
// a section for this size, use it, otherwise use the arbitrary sized
// mergable section.
- switch (TM.getDataLayout()->getTypeAllocSize(C->getType())) {
+ switch (TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(
+ C->getType())) {
case 4: return SectionKind::getMergeableConst4();
case 8: return SectionKind::getMergeableConst8();
case 16: return SectionKind::getMergeableConst16();
@@ -268,31 +270,6 @@ SectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
return SelectSectionForGlobal(GV, Kind, Mang, TM);
}
-bool TargetLoweringObjectFile::isSectionAtomizableBySymbols(
- const MCSection &Section) const {
- return false;
-}
-
-// Lame default implementation. Calculate the section name for global.
-const MCSection *
-TargetLoweringObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
- SectionKind Kind,
- Mangler &Mang,
- const TargetMachine &TM) const{
- assert(!Kind.isThreadLocal() && "Doesn't support TLS");
-
- if (Kind.isText())
- return getTextSection();
-
- if (Kind.isBSS() && BSSSection != nullptr)
- return BSSSection;
-
- if (Kind.isReadOnly() && ReadOnlySection != nullptr)
- return ReadOnlySection;
-
- return getDataSection();
-}
-
/// getSectionForConstant - Given a mergable constant with the
/// specified size and relocation information, return a section that it
/// should be placed in.
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 95c8cb66f402..b3ff0010c434 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -21,11 +21,13 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
//---------------------------------------------------------------------------
@@ -47,17 +49,13 @@ TargetMachine::~TargetMachine() {
}
/// \brief Reset the target options based on the function's attributes.
-void TargetMachine::resetTargetOptions(const MachineFunction *MF) const {
- const Function *F = MF->getFunction();
- TargetOptions &TO = MF->getTarget().Options;
-
-#define RESET_OPTION(X, Y) \
- do { \
- if (F->hasFnAttribute(Y)) \
- TO.X = \
- (F->getAttributes(). \
- getAttribute(AttributeSet::FunctionIndex, \
- Y).getValueAsString() == "true"); \
+void TargetMachine::resetTargetOptions(const Function &F) const {
+#define RESET_OPTION(X, Y) \
+ do { \
+ if (F.hasFnAttribute(Y)) \
+ Options.X = (F.getAttributes() \
+ .getAttribute(AttributeSet::FunctionIndex, Y) \
+ .getValueAsString() == "true"); \
} while (0)
RESET_OPTION(NoFramePointerElim, "no-frame-pointer-elim");
@@ -68,7 +66,7 @@ void TargetMachine::resetTargetOptions(const MachineFunction *MF) const {
RESET_OPTION(UseSoftFloat, "use-soft-float");
RESET_OPTION(DisableTailCalls, "disable-tail-calls");
- TO.MCOptions.SanitizeAddress = F->hasFnAttribute(Attribute::SanitizeAddress);
+ Options.MCOptions.SanitizeAddress = F.hasFnAttribute(Attribute::SanitizeAddress);
}
/// getRelocationModel - Returns the code generation relocation model. The
@@ -172,6 +170,19 @@ void TargetMachine::setDataSections(bool V) {
Options.DataSections = V;
}
+static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo,
+ const MCSection &Section) {
+ if (!AsmInfo.isSectionAtomizableBySymbols(Section))
+ return true;
+
+ // If it is not dead stripped, it is safe to use private labels.
+ const MCSectionMachO &SMO = cast<MCSectionMachO>(Section);
+ if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP))
+ return true;
+
+ return false;
+}
+
void TargetMachine::getNameWithPrefix(SmallVectorImpl<char> &Name,
const GlobalValue *GV, Mangler &Mang,
bool MayAlwaysUsePrivate) const {
@@ -183,9 +194,9 @@ void TargetMachine::getNameWithPrefix(SmallVectorImpl<char> &Name,
}
SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, *this);
const TargetLoweringObjectFile &TLOF =
- getTargetLowering()->getObjFileLowering();
+ getSubtargetImpl()->getTargetLowering()->getObjFileLowering();
const MCSection *TheSection = TLOF.SectionForGlobal(GV, GVKind, Mang, *this);
- bool CannotUsePrivateLabel = TLOF.isSectionAtomizableBySymbols(*TheSection);
+ bool CannotUsePrivateLabel = !canUsePrivateLabel(*AsmInfo, *TheSection);
Mang.getNameWithPrefix(Name, GV, CannotUsePrivateLabel);
}
@@ -193,6 +204,6 @@ MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV, Mangler &Mang) const {
SmallString<60> NameStr;
getNameWithPrefix(NameStr, GV, Mang);
const TargetLoweringObjectFile &TLOF =
- getTargetLowering()->getObjFileLowering();
+ getSubtargetImpl()->getTargetLowering()->getObjFileLowering();
return TLOF.getContext().GetOrCreateSymbol(NameStr.str());
}
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index 20923c97ec88..b3e07df7b5ba 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -24,6 +24,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <cassert>
#include <cstdlib>
#include <cstring>
@@ -172,7 +173,7 @@ char* LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T) {
}
LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) {
- return wrap(unwrap(T)->getDataLayout());
+ return wrap(unwrap(T)->getSubtargetImpl()->getDataLayout());
}
void LLVMSetTargetMachineAsmVerbosity(LLVMTargetMachineRef T,
@@ -189,7 +190,7 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
std::string error;
- const DataLayout* td = TM->getDataLayout();
+ const DataLayout *td = TM->getSubtargetImpl()->getDataLayout();
if (!td) {
error = "No DataLayout in TargetMachine";
@@ -197,7 +198,7 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
return true;
}
Mod->setDataLayout(td);
- pass.add(new DataLayoutPass(Mod));
+ pass.add(new DataLayoutPass());
TargetMachine::CodeGenFileType ft;
switch (codegen) {
@@ -222,10 +223,10 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) {
- std::string error;
- raw_fd_ostream dest(Filename, error, sys::fs::F_None);
- if (!error.empty()) {
- *ErrorMessage = strdup(error.c_str());
+ std::error_code EC;
+ raw_fd_ostream dest(Filename, EC, sys::fs::F_None);
+ if (EC) {
+ *ErrorMessage = strdup(EC.message().c_str());
return true;
}
formatted_raw_ostream destf(dest);
diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp
index 386a813b057f..10597a84bca5 100644
--- a/lib/Target/TargetSubtargetInfo.cpp
+++ b/lib/Target/TargetSubtargetInfo.cpp
@@ -39,7 +39,7 @@ bool TargetSubtargetInfo::useMachineScheduler() const {
return enableMachineScheduler();
}
-bool TargetSubtargetInfo::enableAtomicExpandLoadLinked() const {
+bool TargetSubtargetInfo::enableAtomicExpand() const {
return true;
}
@@ -53,7 +53,7 @@ bool TargetSubtargetInfo::enableRALocalReassignment(
}
bool TargetSubtargetInfo::enablePostMachineScheduler() const {
- return getSchedModel()->PostRAScheduler;
+ return getSchedModel().PostRAScheduler;
}
bool TargetSubtargetInfo::useAA() const {
diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt
index b022a41b192f..2c1926edc26b 100644
--- a/lib/Target/X86/AsmParser/CMakeLists.txt
+++ b/lib/Target/X86/AsmParser/CMakeLists.txt
@@ -1,4 +1,7 @@
add_llvm_library(LLVMX86AsmParser
X86AsmInstrumentation.cpp
X86AsmParser.cpp
+
+ LINK_LIBS
+ LLVMX86CodeGen
)
diff --git a/lib/Target/X86/AsmParser/LLVMBuild.txt b/lib/Target/X86/AsmParser/LLVMBuild.txt
index 9f94d5d38864..284bfd052ccf 100644
--- a/lib/Target/X86/AsmParser/LLVMBuild.txt
+++ b/lib/Target/X86/AsmParser/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = X86AsmParser
parent = X86
-required_libraries = MC MCParser Support X86Desc X86Info
+required_libraries = MC MCParser Support X86CodeGen X86Desc X86Info
add_to_library_groups = X86
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index a365f62190d2..543af8e4e77d 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -10,9 +10,12 @@
#include "MCTargetDesc/X86BaseInfo.h"
#include "X86AsmInstrumentation.h"
#include "X86Operand.h"
+#include "X86RegisterInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
@@ -23,6 +26,73 @@
#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/CommandLine.h"
+#include <algorithm>
+#include <cassert>
+#include <vector>
+
+// Following comment describes how assembly instrumentation works.
+// Currently we have only AddressSanitizer instrumentation, but we're
+// planning to implement MemorySanitizer for inline assembly too. If
+// you're not familiar with AddressSanitizer algorithm, please, read
+// https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm.
+//
+// When inline assembly is parsed by an instance of X86AsmParser, all
+// instructions are emitted via EmitInstruction method. That's the
+// place where X86AsmInstrumentation analyzes an instruction and
+// decides, whether the instruction should be emitted as is or
+// instrumentation is required. The latter case happens when an
+// instruction reads from or writes to memory. Now instruction opcode
+// is explicitly checked, and if an instruction has a memory operand
+// (for instance, movq (%rsi, %rcx, 8), %rax) - it should be
+// instrumented. There're also exist instructions that modify
+// memory but don't have an explicit memory operands, for instance,
+// movs.
+//
+// Let's consider at first 8-byte memory accesses when an instruction
+// has an explicit memory operand. In this case we need two registers -
+// AddressReg to compute address of a memory cells which are accessed
+// and ShadowReg to compute corresponding shadow address. So, we need
+// to spill both registers before instrumentation code and restore them
+// after instrumentation. Thus, in general, instrumentation code will
+// look like this:
+// PUSHF # Store flags, otherwise they will be overwritten
+// PUSH AddressReg # spill AddressReg
+// PUSH ShadowReg # spill ShadowReg
+// LEA MemOp, AddressReg # compute address of the memory operand
+// MOV AddressReg, ShadowReg
+// SHR ShadowReg, 3
+// # ShadowOffset(AddressReg >> 3) contains address of a shadow
+// # corresponding to MemOp.
+// CMP ShadowOffset(ShadowReg), 0 # test shadow value
+// JZ .Done # when shadow equals to zero, everything is fine
+// MOV AddressReg, RDI
+// # Call __asan_report function with AddressReg as an argument
+// CALL __asan_report
+// .Done:
+// POP ShadowReg # Restore ShadowReg
+// POP AddressReg # Restore AddressReg
+// POPF # Restore flags
+//
+// Memory accesses with different size (1-, 2-, 4- and 16-byte) are
+// handled in a similar manner, but small memory accesses (less than 8
+// byte) require an additional ScratchReg, which is used for shadow value.
+//
+// If, suppose, we're instrumenting an instruction like movs, only
+// contents of RDI, RDI + AccessSize * RCX, RSI, RSI + AccessSize *
+// RCX are checked. In this case there're no need to spill and restore
+// AddressReg , ShadowReg or flags four times, they're saved on stack
+// just once, before instrumentation of these four addresses, and restored
+// at the end of the instrumentation.
+//
+// There exist several things which complicate this simple algorithm.
+// * Instrumented memory operand can have RSP as a base or an index
+// register. So we need to add a constant offset before computation
+// of memory address, since flags, AddressReg, ShadowReg, etc. were
+// already stored on stack and RSP was modified.
+// * Debug info (usually, DWARF) should be adjusted, because sometimes
+// RSP is used as a frame register. So, we need to select some
+// register as a frame register and temprorary override current CFA
+// register.
namespace llvm {
namespace {
@@ -32,10 +102,23 @@ static cl::opt<bool> ClAsanInstrumentAssembly(
cl::desc("instrument assembly with AddressSanitizer checks"), cl::Hidden,
cl::init(false));
-bool IsStackReg(unsigned Reg) {
- return Reg == X86::RSP || Reg == X86::ESP || Reg == X86::SP;
+const int64_t MinAllowedDisplacement = std::numeric_limits<int32_t>::min();
+const int64_t MaxAllowedDisplacement = std::numeric_limits<int32_t>::max();
+
+int64_t ApplyDisplacementBounds(int64_t Displacement) {
+ return std::max(std::min(MaxAllowedDisplacement, Displacement),
+ MinAllowedDisplacement);
+}
+
+void CheckDisplacementBounds(int64_t Displacement) {
+ assert(Displacement >= MinAllowedDisplacement &&
+ Displacement <= MaxAllowedDisplacement);
}
+bool IsStackReg(unsigned Reg) { return Reg == X86::RSP || Reg == X86::ESP; }
+
+bool IsSmallMemAccess(unsigned AccessSize) { return AccessSize < 8; }
+
std::string FuncName(unsigned AccessSize, bool IsWrite) {
return std::string("__asan_report_") + (IsWrite ? "store" : "load") +
utostr(AccessSize);
@@ -43,60 +126,264 @@ std::string FuncName(unsigned AccessSize, bool IsWrite) {
class X86AddressSanitizer : public X86AsmInstrumentation {
public:
- X86AddressSanitizer(const MCSubtargetInfo &STI) : STI(STI) {}
+ struct RegisterContext {
+ private:
+ enum RegOffset {
+ REG_OFFSET_ADDRESS = 0,
+ REG_OFFSET_SHADOW,
+ REG_OFFSET_SCRATCH
+ };
+
+ public:
+ RegisterContext(unsigned AddressReg, unsigned ShadowReg,
+ unsigned ScratchReg) {
+ BusyRegs.push_back(convReg(AddressReg, MVT::i64));
+ BusyRegs.push_back(convReg(ShadowReg, MVT::i64));
+ BusyRegs.push_back(convReg(ScratchReg, MVT::i64));
+ }
+
+ unsigned AddressReg(MVT::SimpleValueType VT) const {
+ return convReg(BusyRegs[REG_OFFSET_ADDRESS], VT);
+ }
+
+ unsigned ShadowReg(MVT::SimpleValueType VT) const {
+ return convReg(BusyRegs[REG_OFFSET_SHADOW], VT);
+ }
+
+ unsigned ScratchReg(MVT::SimpleValueType VT) const {
+ return convReg(BusyRegs[REG_OFFSET_SCRATCH], VT);
+ }
+
+ void AddBusyReg(unsigned Reg) {
+ if (Reg != X86::NoRegister)
+ BusyRegs.push_back(convReg(Reg, MVT::i64));
+ }
+
+ void AddBusyRegs(const X86Operand &Op) {
+ AddBusyReg(Op.getMemBaseReg());
+ AddBusyReg(Op.getMemIndexReg());
+ }
+
+ unsigned ChooseFrameReg(MVT::SimpleValueType VT) const {
+ static const MCPhysReg Candidates[] = { X86::RBP, X86::RAX, X86::RBX,
+ X86::RCX, X86::RDX, X86::RDI,
+ X86::RSI };
+ for (unsigned Reg : Candidates) {
+ if (!std::count(BusyRegs.begin(), BusyRegs.end(), Reg))
+ return convReg(Reg, VT);
+ }
+ return X86::NoRegister;
+ }
+
+ private:
+ unsigned convReg(unsigned Reg, MVT::SimpleValueType VT) const {
+ return Reg == X86::NoRegister ? Reg : getX86SubSuperRegister(Reg, VT);
+ }
+
+ std::vector<unsigned> BusyRegs;
+ };
+
+ X86AddressSanitizer(const MCSubtargetInfo &STI)
+ : X86AsmInstrumentation(STI), RepPrefix(false), OrigSPOffset(0) {}
+
virtual ~X86AddressSanitizer() {}
// X86AsmInstrumentation implementation:
- virtual void InstrumentInstruction(
- const MCInst &Inst, OperandVector &Operands, MCContext &Ctx,
- const MCInstrInfo &MII, MCStreamer &Out) override {
+ virtual void InstrumentAndEmitInstruction(const MCInst &Inst,
+ OperandVector &Operands,
+ MCContext &Ctx,
+ const MCInstrInfo &MII,
+ MCStreamer &Out) override {
+ InstrumentMOVS(Inst, Operands, Ctx, MII, Out);
+ if (RepPrefix)
+ EmitInstruction(Out, MCInstBuilder(X86::REP_PREFIX));
+
InstrumentMOV(Inst, Operands, Ctx, MII, Out);
- }
- // Should be implemented differently in x86_32 and x86_64 subclasses.
- virtual void InstrumentMemOperandSmallImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) = 0;
- virtual void InstrumentMemOperandLargeImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) = 0;
+ RepPrefix = (Inst.getOpcode() == X86::REP_PREFIX);
+ if (!RepPrefix)
+ EmitInstruction(Out, Inst);
+ }
- void InstrumentMemOperand(MCParsedAsmOperand &Op, unsigned AccessSize,
- bool IsWrite, MCContext &Ctx, MCStreamer &Out);
+ // Adjusts up stack and saves all registers used in instrumentation.
+ virtual void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) = 0;
+
+ // Restores all registers used in instrumentation and adjusts stack.
+ virtual void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) = 0;
+
+ virtual void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx, MCStreamer &Out) = 0;
+ virtual void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx, MCStreamer &Out) = 0;
+
+ virtual void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
+ MCStreamer &Out) = 0;
+
+ void InstrumentMemOperand(X86Operand &Op, unsigned AccessSize, bool IsWrite,
+ const RegisterContext &RegCtx, MCContext &Ctx,
+ MCStreamer &Out);
+ void InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg, unsigned CntReg,
+ unsigned AccessSize, MCContext &Ctx, MCStreamer &Out);
+
+ void InstrumentMOVS(const MCInst &Inst, OperandVector &Operands,
+ MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out);
void InstrumentMOV(const MCInst &Inst, OperandVector &Operands,
MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out);
- void EmitInstruction(MCStreamer &Out, const MCInst &Inst) {
- Out.EmitInstruction(Inst, STI);
- }
+protected:
void EmitLabel(MCStreamer &Out, MCSymbol *Label) { Out.EmitLabel(Label); }
-protected:
- const MCSubtargetInfo &STI;
+ void EmitLEA(X86Operand &Op, MVT::SimpleValueType VT, unsigned Reg,
+ MCStreamer &Out) {
+ assert(VT == MVT::i32 || VT == MVT::i64);
+ MCInst Inst;
+ Inst.setOpcode(VT == MVT::i32 ? X86::LEA32r : X86::LEA64r);
+ Inst.addOperand(MCOperand::CreateReg(getX86SubSuperRegister(Reg, VT)));
+ Op.addMemOperands(Inst, 5);
+ EmitInstruction(Out, Inst);
+ }
+
+ void ComputeMemOperandAddress(X86Operand &Op, MVT::SimpleValueType VT,
+ unsigned Reg, MCContext &Ctx, MCStreamer &Out);
+
+ // Creates new memory operand with Displacement added to an original
+ // displacement. Residue will contain a residue which could happen when the
+ // total displacement exceeds 32-bit limitation.
+ std::unique_ptr<X86Operand> AddDisplacement(X86Operand &Op,
+ int64_t Displacement,
+ MCContext &Ctx, int64_t *Residue);
+
+ bool is64BitMode() const {
+ return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
+ }
+ bool is32BitMode() const {
+ return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
+ }
+ bool is16BitMode() const {
+ return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
+ }
+
+ unsigned getPointerWidth() {
+ if (is16BitMode()) return 16;
+ if (is32BitMode()) return 32;
+ if (is64BitMode()) return 64;
+ llvm_unreachable("invalid mode");
+ }
+
+ // True when previous instruction was actually REP prefix.
+ bool RepPrefix;
+
+ // Offset from the original SP register.
+ int64_t OrigSPOffset;
};
void X86AddressSanitizer::InstrumentMemOperand(
- MCParsedAsmOperand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) {
+ X86Operand &Op, unsigned AccessSize, bool IsWrite,
+ const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
assert(Op.isMem() && "Op should be a memory operand.");
assert((AccessSize & (AccessSize - 1)) == 0 && AccessSize <= 16 &&
"AccessSize should be a power of two, less or equal than 16.");
+ // FIXME: take into account load/store alignment.
+ if (IsSmallMemAccess(AccessSize))
+ InstrumentMemOperandSmall(Op, AccessSize, IsWrite, RegCtx, Ctx, Out);
+ else
+ InstrumentMemOperandLarge(Op, AccessSize, IsWrite, RegCtx, Ctx, Out);
+}
+
+void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg,
+ unsigned CntReg,
+ unsigned AccessSize,
+ MCContext &Ctx, MCStreamer &Out) {
+ // FIXME: check whole ranges [DstReg .. DstReg + AccessSize * (CntReg - 1)]
+ // and [SrcReg .. SrcReg + AccessSize * (CntReg - 1)].
+ RegisterContext RegCtx(X86::RDX /* AddressReg */, X86::RAX /* ShadowReg */,
+ IsSmallMemAccess(AccessSize)
+ ? X86::RBX
+ : X86::NoRegister /* ScratchReg */);
+ RegCtx.AddBusyReg(DstReg);
+ RegCtx.AddBusyReg(SrcReg);
+ RegCtx.AddBusyReg(CntReg);
+
+ InstrumentMemOperandPrologue(RegCtx, Ctx, Out);
+
+ // Test (%SrcReg)
+ {
+ const MCExpr *Disp = MCConstantExpr::Create(0, Ctx);
+ std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
+ getPointerWidth(), 0, Disp, SrcReg, 0, AccessSize, SMLoc(), SMLoc()));
+ InstrumentMemOperand(*Op, AccessSize, false /* IsWrite */, RegCtx, Ctx,
+ Out);
+ }
- X86Operand &MemOp = static_cast<X86Operand &>(Op);
- // FIXME: get rid of this limitation.
- if (IsStackReg(MemOp.getMemBaseReg()) || IsStackReg(MemOp.getMemIndexReg()))
+ // Test -1(%SrcReg, %CntReg, AccessSize)
+ {
+ const MCExpr *Disp = MCConstantExpr::Create(-1, Ctx);
+ std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
+ getPointerWidth(), 0, Disp, SrcReg, CntReg, AccessSize, SMLoc(),
+ SMLoc()));
+ InstrumentMemOperand(*Op, AccessSize, false /* IsWrite */, RegCtx, Ctx,
+ Out);
+ }
+
+ // Test (%DstReg)
+ {
+ const MCExpr *Disp = MCConstantExpr::Create(0, Ctx);
+ std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
+ getPointerWidth(), 0, Disp, DstReg, 0, AccessSize, SMLoc(), SMLoc()));
+ InstrumentMemOperand(*Op, AccessSize, true /* IsWrite */, RegCtx, Ctx, Out);
+ }
+
+ // Test -1(%DstReg, %CntReg, AccessSize)
+ {
+ const MCExpr *Disp = MCConstantExpr::Create(-1, Ctx);
+ std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
+ getPointerWidth(), 0, Disp, DstReg, CntReg, AccessSize, SMLoc(),
+ SMLoc()));
+ InstrumentMemOperand(*Op, AccessSize, true /* IsWrite */, RegCtx, Ctx, Out);
+ }
+
+ InstrumentMemOperandEpilogue(RegCtx, Ctx, Out);
+}
+
+void X86AddressSanitizer::InstrumentMOVS(const MCInst &Inst,
+ OperandVector &Operands,
+ MCContext &Ctx, const MCInstrInfo &MII,
+ MCStreamer &Out) {
+ // Access size in bytes.
+ unsigned AccessSize = 0;
+
+ switch (Inst.getOpcode()) {
+ case X86::MOVSB:
+ AccessSize = 1;
+ break;
+ case X86::MOVSW:
+ AccessSize = 2;
+ break;
+ case X86::MOVSL:
+ AccessSize = 4;
+ break;
+ case X86::MOVSQ:
+ AccessSize = 8;
+ break;
+ default:
return;
+ }
- // FIXME: take into account load/store alignment.
- if (AccessSize < 8)
- InstrumentMemOperandSmallImpl(MemOp, AccessSize, IsWrite, Ctx, Out);
- else
- InstrumentMemOperandLargeImpl(MemOp, AccessSize, IsWrite, Ctx, Out);
+ InstrumentMOVSImpl(AccessSize, Ctx, Out);
}
-void X86AddressSanitizer::InstrumentMOV(
- const MCInst &Inst, OperandVector &Operands, MCContext &Ctx,
- const MCInstrInfo &MII, MCStreamer &Out) {
+void X86AddressSanitizer::InstrumentMOV(const MCInst &Inst,
+ OperandVector &Operands, MCContext &Ctx,
+ const MCInstrInfo &MII,
+ MCStreamer &Out) {
// Access size in bytes.
unsigned AccessSize = 0;
@@ -132,41 +419,201 @@ void X86AddressSanitizer::InstrumentMOV(
}
const bool IsWrite = MII.get(Inst.getOpcode()).mayStore();
+
for (unsigned Ix = 0; Ix < Operands.size(); ++Ix) {
assert(Operands[Ix]);
MCParsedAsmOperand &Op = *Operands[Ix];
- if (Op.isMem())
- InstrumentMemOperand(Op, AccessSize, IsWrite, Ctx, Out);
+ if (Op.isMem()) {
+ X86Operand &MemOp = static_cast<X86Operand &>(Op);
+ RegisterContext RegCtx(
+ X86::RDI /* AddressReg */, X86::RAX /* ShadowReg */,
+ IsSmallMemAccess(AccessSize) ? X86::RCX
+ : X86::NoRegister /* ScratchReg */);
+ RegCtx.AddBusyRegs(MemOp);
+ InstrumentMemOperandPrologue(RegCtx, Ctx, Out);
+ InstrumentMemOperand(MemOp, AccessSize, IsWrite, RegCtx, Ctx, Out);
+ InstrumentMemOperandEpilogue(RegCtx, Ctx, Out);
+ }
+ }
+}
+
+void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op,
+ MVT::SimpleValueType VT,
+ unsigned Reg, MCContext &Ctx,
+ MCStreamer &Out) {
+ int64_t Displacement = 0;
+ if (IsStackReg(Op.getMemBaseReg()))
+ Displacement -= OrigSPOffset;
+ if (IsStackReg(Op.getMemIndexReg()))
+ Displacement -= OrigSPOffset * Op.getMemScale();
+
+ assert(Displacement >= 0);
+
+ // Emit Op as is.
+ if (Displacement == 0) {
+ EmitLEA(Op, VT, Reg, Out);
+ return;
+ }
+
+ int64_t Residue;
+ std::unique_ptr<X86Operand> NewOp =
+ AddDisplacement(Op, Displacement, Ctx, &Residue);
+ EmitLEA(*NewOp, VT, Reg, Out);
+
+ while (Residue != 0) {
+ const MCConstantExpr *Disp =
+ MCConstantExpr::Create(ApplyDisplacementBounds(Residue), Ctx);
+ std::unique_ptr<X86Operand> DispOp =
+ X86Operand::CreateMem(getPointerWidth(), 0, Disp, Reg, 0, 1, SMLoc(),
+ SMLoc());
+ EmitLEA(*DispOp, VT, Reg, Out);
+ Residue -= Disp->getValue();
}
}
+std::unique_ptr<X86Operand>
+X86AddressSanitizer::AddDisplacement(X86Operand &Op, int64_t Displacement,
+ MCContext &Ctx, int64_t *Residue) {
+ assert(Displacement >= 0);
+
+ if (Displacement == 0 ||
+ (Op.getMemDisp() && Op.getMemDisp()->getKind() != MCExpr::Constant)) {
+ *Residue = Displacement;
+ return X86Operand::CreateMem(Op.getMemModeSize(), Op.getMemSegReg(),
+ Op.getMemDisp(), Op.getMemBaseReg(),
+ Op.getMemIndexReg(), Op.getMemScale(),
+ SMLoc(), SMLoc());
+ }
+
+ int64_t OrigDisplacement =
+ static_cast<const MCConstantExpr *>(Op.getMemDisp())->getValue();
+ CheckDisplacementBounds(OrigDisplacement);
+ Displacement += OrigDisplacement;
+
+ int64_t NewDisplacement = ApplyDisplacementBounds(Displacement);
+ CheckDisplacementBounds(NewDisplacement);
+
+ *Residue = Displacement - NewDisplacement;
+ const MCExpr *Disp = MCConstantExpr::Create(NewDisplacement, Ctx);
+ return X86Operand::CreateMem(Op.getMemModeSize(), Op.getMemSegReg(), Disp,
+ Op.getMemBaseReg(), Op.getMemIndexReg(),
+ Op.getMemScale(), SMLoc(), SMLoc());
+}
+
class X86AddressSanitizer32 : public X86AddressSanitizer {
public:
static const long kShadowOffset = 0x20000000;
X86AddressSanitizer32(const MCSubtargetInfo &STI)
: X86AddressSanitizer(STI) {}
+
virtual ~X86AddressSanitizer32() {}
- virtual void InstrumentMemOperandSmallImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) override;
- virtual void InstrumentMemOperandLargeImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) override;
+ unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) {
+ unsigned FrameReg = GetFrameRegGeneric(Ctx, Out);
+ if (FrameReg == X86::NoRegister)
+ return FrameReg;
+ return getX86SubSuperRegister(FrameReg, MVT::i32);
+ }
+
+ void SpillReg(MCStreamer &Out, unsigned Reg) {
+ EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(Reg));
+ OrigSPOffset -= 4;
+ }
+
+ void RestoreReg(MCStreamer &Out, unsigned Reg) {
+ EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(Reg));
+ OrigSPOffset += 4;
+ }
+
+ void StoreFlags(MCStreamer &Out) {
+ EmitInstruction(Out, MCInstBuilder(X86::PUSHF32));
+ OrigSPOffset -= 4;
+ }
+
+ void RestoreFlags(MCStreamer &Out) {
+ EmitInstruction(Out, MCInstBuilder(X86::POPF32));
+ OrigSPOffset += 4;
+ }
+
+ virtual void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override {
+ unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i32);
+ assert(LocalFrameReg != X86::NoRegister);
+
+ const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
+ unsigned FrameReg = GetFrameReg(Ctx, Out);
+ if (MRI && FrameReg != X86::NoRegister) {
+ SpillReg(Out, LocalFrameReg);
+ if (FrameReg == X86::ESP) {
+ Out.EmitCFIAdjustCfaOffset(4 /* byte size of the LocalFrameReg */);
+ Out.EmitCFIRelOffset(
+ MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */), 0);
+ }
+ EmitInstruction(
+ Out,
+ MCInstBuilder(X86::MOV32rr).addReg(LocalFrameReg).addReg(FrameReg));
+ Out.EmitCFIRememberState();
+ Out.EmitCFIDefCfaRegister(
+ MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */));
+ }
+
+ SpillReg(Out, RegCtx.AddressReg(MVT::i32));
+ SpillReg(Out, RegCtx.ShadowReg(MVT::i32));
+ if (RegCtx.ScratchReg(MVT::i32) != X86::NoRegister)
+ SpillReg(Out, RegCtx.ScratchReg(MVT::i32));
+ StoreFlags(Out);
+ }
- private:
- void EmitCallAsanReport(MCContext &Ctx, MCStreamer &Out, unsigned AccessSize,
- bool IsWrite, unsigned AddressReg) {
+ virtual void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override {
+ unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i32);
+ assert(LocalFrameReg != X86::NoRegister);
+
+ RestoreFlags(Out);
+ if (RegCtx.ScratchReg(MVT::i32) != X86::NoRegister)
+ RestoreReg(Out, RegCtx.ScratchReg(MVT::i32));
+ RestoreReg(Out, RegCtx.ShadowReg(MVT::i32));
+ RestoreReg(Out, RegCtx.AddressReg(MVT::i32));
+
+ unsigned FrameReg = GetFrameReg(Ctx, Out);
+ if (Ctx.getRegisterInfo() && FrameReg != X86::NoRegister) {
+ RestoreReg(Out, LocalFrameReg);
+ Out.EmitCFIRestoreState();
+ if (FrameReg == X86::ESP)
+ Out.EmitCFIAdjustCfaOffset(-4 /* byte size of the LocalFrameReg */);
+ }
+ }
+
+ virtual void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override;
+ virtual void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override;
+ virtual void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
+ MCStreamer &Out) override;
+
+private:
+ void EmitCallAsanReport(unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out, const RegisterContext &RegCtx) {
EmitInstruction(Out, MCInstBuilder(X86::CLD));
EmitInstruction(Out, MCInstBuilder(X86::MMX_EMMS));
- EmitInstruction(Out, MCInstBuilder(X86::AND64ri8).addReg(X86::ESP)
- .addReg(X86::ESP).addImm(-16));
- EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(AddressReg));
+ EmitInstruction(Out, MCInstBuilder(X86::AND64ri8)
+ .addReg(X86::ESP)
+ .addReg(X86::ESP)
+ .addImm(-16));
+ EmitInstruction(
+ Out, MCInstBuilder(X86::PUSH32r).addReg(RegCtx.AddressReg(MVT::i32)));
-
- const std::string& Fn = FuncName(AccessSize, IsWrite);
+ const std::string &Fn = FuncName(AccessSize, IsWrite);
MCSymbol *FnSym = Ctx.GetOrCreateSymbol(StringRef(Fn));
const MCSymbolRefExpr *FnExpr =
MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
@@ -174,136 +621,140 @@ public:
}
};
-void X86AddressSanitizer32::InstrumentMemOperandSmallImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) {
- EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::EAX));
- EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::ECX));
- EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::EDX));
- EmitInstruction(Out, MCInstBuilder(X86::PUSHF32));
+void X86AddressSanitizer32::InstrumentMemOperandSmall(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite,
+ const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
+ unsigned AddressRegI32 = RegCtx.AddressReg(MVT::i32);
+ unsigned ShadowRegI32 = RegCtx.ShadowReg(MVT::i32);
+ unsigned ShadowRegI8 = RegCtx.ShadowReg(MVT::i8);
- {
- MCInst Inst;
- Inst.setOpcode(X86::LEA32r);
- Inst.addOperand(MCOperand::CreateReg(X86::EAX));
- Op.addMemOperands(Inst, 5);
- EmitInstruction(Out, Inst);
- }
+ assert(RegCtx.ScratchReg(MVT::i32) != X86::NoRegister);
+ unsigned ScratchRegI32 = RegCtx.ScratchReg(MVT::i32);
- EmitInstruction(
- Out, MCInstBuilder(X86::MOV32rr).addReg(X86::ECX).addReg(X86::EAX));
- EmitInstruction(Out, MCInstBuilder(X86::SHR32ri).addReg(X86::ECX)
- .addReg(X86::ECX).addImm(3));
+ ComputeMemOperandAddress(Op, MVT::i32, AddressRegI32, Ctx, Out);
+
+ EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ShadowRegI32).addReg(
+ AddressRegI32));
+ EmitInstruction(Out, MCInstBuilder(X86::SHR32ri)
+ .addReg(ShadowRegI32)
+ .addReg(ShadowRegI32)
+ .addImm(3));
{
MCInst Inst;
Inst.setOpcode(X86::MOV8rm);
- Inst.addOperand(MCOperand::CreateReg(X86::CL));
+ Inst.addOperand(MCOperand::CreateReg(ShadowRegI8));
const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(0, Disp, X86::ECX, 0, 1, SMLoc(), SMLoc()));
+ X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI32, 0, 1,
+ SMLoc(), SMLoc()));
Op->addMemOperands(Inst, 5);
EmitInstruction(Out, Inst);
}
- EmitInstruction(Out,
- MCInstBuilder(X86::TEST8rr).addReg(X86::CL).addReg(X86::CL));
+ EmitInstruction(
+ Out, MCInstBuilder(X86::TEST8rr).addReg(ShadowRegI8).addReg(ShadowRegI8));
MCSymbol *DoneSym = Ctx.CreateTempSymbol();
const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
- EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr));
+ EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
- EmitInstruction(
- Out, MCInstBuilder(X86::MOV32rr).addReg(X86::EDX).addReg(X86::EAX));
- EmitInstruction(Out, MCInstBuilder(X86::AND32ri).addReg(X86::EDX)
- .addReg(X86::EDX).addImm(7));
+ EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ScratchRegI32).addReg(
+ AddressRegI32));
+ EmitInstruction(Out, MCInstBuilder(X86::AND32ri)
+ .addReg(ScratchRegI32)
+ .addReg(ScratchRegI32)
+ .addImm(7));
switch (AccessSize) {
+ default: llvm_unreachable("Incorrect access size");
case 1:
break;
case 2: {
- MCInst Inst;
- Inst.setOpcode(X86::LEA32r);
- Inst.addOperand(MCOperand::CreateReg(X86::EDX));
-
const MCExpr *Disp = MCConstantExpr::Create(1, Ctx);
std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(0, Disp, X86::EDX, 0, 1, SMLoc(), SMLoc()));
- Op->addMemOperands(Inst, 5);
- EmitInstruction(Out, Inst);
+ X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1,
+ SMLoc(), SMLoc()));
+ EmitLEA(*Op, MVT::i32, ScratchRegI32, Out);
break;
}
case 4:
- EmitInstruction(Out, MCInstBuilder(X86::ADD32ri8).addReg(X86::EDX)
- .addReg(X86::EDX).addImm(3));
- break;
- default:
- assert(false && "Incorrect access size");
+ EmitInstruction(Out, MCInstBuilder(X86::ADD32ri8)
+ .addReg(ScratchRegI32)
+ .addReg(ScratchRegI32)
+ .addImm(3));
break;
}
EmitInstruction(
- Out, MCInstBuilder(X86::MOVSX32rr8).addReg(X86::ECX).addReg(X86::CL));
- EmitInstruction(
- Out, MCInstBuilder(X86::CMP32rr).addReg(X86::EDX).addReg(X86::ECX));
- EmitInstruction(Out, MCInstBuilder(X86::JL_4).addExpr(DoneExpr));
+ Out,
+ MCInstBuilder(X86::MOVSX32rr8).addReg(ShadowRegI32).addReg(ShadowRegI8));
+ EmitInstruction(Out, MCInstBuilder(X86::CMP32rr).addReg(ScratchRegI32).addReg(
+ ShadowRegI32));
+ EmitInstruction(Out, MCInstBuilder(X86::JL_1).addExpr(DoneExpr));
- EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite, X86::EAX);
+ EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
EmitLabel(Out, DoneSym);
-
- EmitInstruction(Out, MCInstBuilder(X86::POPF32));
- EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EDX));
- EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::ECX));
- EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EAX));
}
-void X86AddressSanitizer32::InstrumentMemOperandLargeImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) {
- EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::EAX));
- EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::ECX));
- EmitInstruction(Out, MCInstBuilder(X86::PUSHF32));
+void X86AddressSanitizer32::InstrumentMemOperandLarge(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite,
+ const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
+ unsigned AddressRegI32 = RegCtx.AddressReg(MVT::i32);
+ unsigned ShadowRegI32 = RegCtx.ShadowReg(MVT::i32);
- {
- MCInst Inst;
- Inst.setOpcode(X86::LEA32r);
- Inst.addOperand(MCOperand::CreateReg(X86::EAX));
- Op.addMemOperands(Inst, 5);
- EmitInstruction(Out, Inst);
- }
- EmitInstruction(
- Out, MCInstBuilder(X86::MOV32rr).addReg(X86::ECX).addReg(X86::EAX));
- EmitInstruction(Out, MCInstBuilder(X86::SHR32ri).addReg(X86::ECX)
- .addReg(X86::ECX).addImm(3));
+ ComputeMemOperandAddress(Op, MVT::i32, AddressRegI32, Ctx, Out);
+
+ EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ShadowRegI32).addReg(
+ AddressRegI32));
+ EmitInstruction(Out, MCInstBuilder(X86::SHR32ri)
+ .addReg(ShadowRegI32)
+ .addReg(ShadowRegI32)
+ .addImm(3));
{
MCInst Inst;
switch (AccessSize) {
- case 8:
- Inst.setOpcode(X86::CMP8mi);
- break;
- case 16:
- Inst.setOpcode(X86::CMP16mi);
- break;
- default:
- assert(false && "Incorrect access size");
- break;
+ default: llvm_unreachable("Incorrect access size");
+ case 8:
+ Inst.setOpcode(X86::CMP8mi);
+ break;
+ case 16:
+ Inst.setOpcode(X86::CMP16mi);
+ break;
}
const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(0, Disp, X86::ECX, 0, 1, SMLoc(), SMLoc()));
+ X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI32, 0, 1,
+ SMLoc(), SMLoc()));
Op->addMemOperands(Inst, 5);
Inst.addOperand(MCOperand::CreateImm(0));
EmitInstruction(Out, Inst);
}
MCSymbol *DoneSym = Ctx.CreateTempSymbol();
const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
- EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr));
+ EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
- EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite, X86::EAX);
+ EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
EmitLabel(Out, DoneSym);
+}
- EmitInstruction(Out, MCInstBuilder(X86::POPF32));
- EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::ECX));
- EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EAX));
+void X86AddressSanitizer32::InstrumentMOVSImpl(unsigned AccessSize,
+ MCContext &Ctx,
+ MCStreamer &Out) {
+ StoreFlags(Out);
+
+ // No need to test when ECX is equals to zero.
+ MCSymbol *DoneSym = Ctx.CreateTempSymbol();
+ const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ EmitInstruction(
+ Out, MCInstBuilder(X86::TEST32rr).addReg(X86::ECX).addReg(X86::ECX));
+ EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
+
+ // Instrument first and last elements in src and dst range.
+ InstrumentMOVSBase(X86::EDI /* DstReg */, X86::ESI /* SrcReg */,
+ X86::ECX /* CntReg */, AccessSize, Ctx, Out);
+
+ EmitLabel(Out, DoneSym);
+ RestoreFlags(Out);
}
class X86AddressSanitizer64 : public X86AddressSanitizer {
@@ -312,37 +763,127 @@ public:
X86AddressSanitizer64(const MCSubtargetInfo &STI)
: X86AddressSanitizer(STI) {}
+
virtual ~X86AddressSanitizer64() {}
- virtual void InstrumentMemOperandSmallImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) override;
- virtual void InstrumentMemOperandLargeImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) override;
+ unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) {
+ unsigned FrameReg = GetFrameRegGeneric(Ctx, Out);
+ if (FrameReg == X86::NoRegister)
+ return FrameReg;
+ return getX86SubSuperRegister(FrameReg, MVT::i64);
+ }
+
+ void SpillReg(MCStreamer &Out, unsigned Reg) {
+ EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(Reg));
+ OrigSPOffset -= 8;
+ }
+
+ void RestoreReg(MCStreamer &Out, unsigned Reg) {
+ EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(Reg));
+ OrigSPOffset += 8;
+ }
+
+ void StoreFlags(MCStreamer &Out) {
+ EmitInstruction(Out, MCInstBuilder(X86::PUSHF64));
+ OrigSPOffset -= 8;
+ }
+
+ void RestoreFlags(MCStreamer &Out) {
+ EmitInstruction(Out, MCInstBuilder(X86::POPF64));
+ OrigSPOffset += 8;
+ }
+
+ virtual void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override {
+ unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i64);
+ assert(LocalFrameReg != X86::NoRegister);
+
+ const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
+ unsigned FrameReg = GetFrameReg(Ctx, Out);
+ if (MRI && FrameReg != X86::NoRegister) {
+ SpillReg(Out, X86::RBP);
+ if (FrameReg == X86::RSP) {
+ Out.EmitCFIAdjustCfaOffset(8 /* byte size of the LocalFrameReg */);
+ Out.EmitCFIRelOffset(
+ MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */), 0);
+ }
+ EmitInstruction(
+ Out,
+ MCInstBuilder(X86::MOV64rr).addReg(LocalFrameReg).addReg(FrameReg));
+ Out.EmitCFIRememberState();
+ Out.EmitCFIDefCfaRegister(
+ MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */));
+ }
+
+ EmitAdjustRSP(Ctx, Out, -128);
+ SpillReg(Out, RegCtx.ShadowReg(MVT::i64));
+ SpillReg(Out, RegCtx.AddressReg(MVT::i64));
+ if (RegCtx.ScratchReg(MVT::i64) != X86::NoRegister)
+ SpillReg(Out, RegCtx.ScratchReg(MVT::i64));
+ StoreFlags(Out);
+ }
+
+ virtual void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override {
+ unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i64);
+ assert(LocalFrameReg != X86::NoRegister);
+
+ RestoreFlags(Out);
+ if (RegCtx.ScratchReg(MVT::i64) != X86::NoRegister)
+ RestoreReg(Out, RegCtx.ScratchReg(MVT::i64));
+ RestoreReg(Out, RegCtx.AddressReg(MVT::i64));
+ RestoreReg(Out, RegCtx.ShadowReg(MVT::i64));
+ EmitAdjustRSP(Ctx, Out, 128);
+
+ unsigned FrameReg = GetFrameReg(Ctx, Out);
+ if (Ctx.getRegisterInfo() && FrameReg != X86::NoRegister) {
+ RestoreReg(Out, LocalFrameReg);
+ Out.EmitCFIRestoreState();
+ if (FrameReg == X86::RSP)
+ Out.EmitCFIAdjustCfaOffset(-8 /* byte size of the LocalFrameReg */);
+ }
+ }
+
+ virtual void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override;
+ virtual void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override;
+ virtual void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
+ MCStreamer &Out) override;
private:
void EmitAdjustRSP(MCContext &Ctx, MCStreamer &Out, long Offset) {
- MCInst Inst;
- Inst.setOpcode(X86::LEA64r);
- Inst.addOperand(MCOperand::CreateReg(X86::RSP));
-
const MCExpr *Disp = MCConstantExpr::Create(Offset, Ctx);
std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(0, Disp, X86::RSP, 0, 1, SMLoc(), SMLoc()));
- Op->addMemOperands(Inst, 5);
- EmitInstruction(Out, Inst);
+ X86Operand::CreateMem(getPointerWidth(), 0, Disp, X86::RSP, 0, 1,
+ SMLoc(), SMLoc()));
+ EmitLEA(*Op, MVT::i64, X86::RSP, Out);
+ OrigSPOffset += Offset;
}
- void EmitCallAsanReport(MCContext &Ctx, MCStreamer &Out, unsigned AccessSize,
- bool IsWrite) {
+ void EmitCallAsanReport(unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out, const RegisterContext &RegCtx) {
EmitInstruction(Out, MCInstBuilder(X86::CLD));
EmitInstruction(Out, MCInstBuilder(X86::MMX_EMMS));
- EmitInstruction(Out, MCInstBuilder(X86::AND64ri8).addReg(X86::RSP)
- .addReg(X86::RSP).addImm(-16));
+ EmitInstruction(Out, MCInstBuilder(X86::AND64ri8)
+ .addReg(X86::RSP)
+ .addReg(X86::RSP)
+ .addImm(-16));
- const std::string& Fn = FuncName(AccessSize, IsWrite);
+ if (RegCtx.AddressReg(MVT::i64) != X86::RDI) {
+ EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(X86::RDI).addReg(
+ RegCtx.AddressReg(MVT::i64)));
+ }
+ const std::string &Fn = FuncName(AccessSize, IsWrite);
MCSymbol *FnSym = Ctx.GetOrCreateSymbol(StringRef(Fn));
const MCSymbolRefExpr *FnExpr =
MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
@@ -350,119 +891,111 @@ private:
}
};
-void X86AddressSanitizer64::InstrumentMemOperandSmallImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) {
- EmitAdjustRSP(Ctx, Out, -128);
- EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RAX));
- EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RCX));
- EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RDI));
- EmitInstruction(Out, MCInstBuilder(X86::PUSHF64));
- {
- MCInst Inst;
- Inst.setOpcode(X86::LEA64r);
- Inst.addOperand(MCOperand::CreateReg(X86::RDI));
- Op.addMemOperands(Inst, 5);
- EmitInstruction(Out, Inst);
- }
- EmitInstruction(
- Out, MCInstBuilder(X86::MOV64rr).addReg(X86::RAX).addReg(X86::RDI));
- EmitInstruction(Out, MCInstBuilder(X86::SHR64ri).addReg(X86::RAX)
- .addReg(X86::RAX).addImm(3));
+void X86AddressSanitizer64::InstrumentMemOperandSmall(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite,
+ const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
+ unsigned AddressRegI64 = RegCtx.AddressReg(MVT::i64);
+ unsigned AddressRegI32 = RegCtx.AddressReg(MVT::i32);
+ unsigned ShadowRegI64 = RegCtx.ShadowReg(MVT::i64);
+ unsigned ShadowRegI32 = RegCtx.ShadowReg(MVT::i32);
+ unsigned ShadowRegI8 = RegCtx.ShadowReg(MVT::i8);
+
+ assert(RegCtx.ScratchReg(MVT::i32) != X86::NoRegister);
+ unsigned ScratchRegI32 = RegCtx.ScratchReg(MVT::i32);
+
+ ComputeMemOperandAddress(Op, MVT::i64, AddressRegI64, Ctx, Out);
+
+ EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(ShadowRegI64).addReg(
+ AddressRegI64));
+ EmitInstruction(Out, MCInstBuilder(X86::SHR64ri)
+ .addReg(ShadowRegI64)
+ .addReg(ShadowRegI64)
+ .addImm(3));
{
MCInst Inst;
Inst.setOpcode(X86::MOV8rm);
- Inst.addOperand(MCOperand::CreateReg(X86::AL));
+ Inst.addOperand(MCOperand::CreateReg(ShadowRegI8));
const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(0, Disp, X86::RAX, 0, 1, SMLoc(), SMLoc()));
+ X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI64, 0, 1,
+ SMLoc(), SMLoc()));
Op->addMemOperands(Inst, 5);
EmitInstruction(Out, Inst);
}
- EmitInstruction(Out,
- MCInstBuilder(X86::TEST8rr).addReg(X86::AL).addReg(X86::AL));
+ EmitInstruction(
+ Out, MCInstBuilder(X86::TEST8rr).addReg(ShadowRegI8).addReg(ShadowRegI8));
MCSymbol *DoneSym = Ctx.CreateTempSymbol();
const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
- EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr));
+ EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
- EmitInstruction(
- Out, MCInstBuilder(X86::MOV32rr).addReg(X86::ECX).addReg(X86::EDI));
- EmitInstruction(Out, MCInstBuilder(X86::AND32ri).addReg(X86::ECX)
- .addReg(X86::ECX).addImm(7));
+ EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ScratchRegI32).addReg(
+ AddressRegI32));
+ EmitInstruction(Out, MCInstBuilder(X86::AND32ri)
+ .addReg(ScratchRegI32)
+ .addReg(ScratchRegI32)
+ .addImm(7));
switch (AccessSize) {
+ default: llvm_unreachable("Incorrect access size");
case 1:
break;
case 2: {
- MCInst Inst;
- Inst.setOpcode(X86::LEA32r);
- Inst.addOperand(MCOperand::CreateReg(X86::ECX));
-
const MCExpr *Disp = MCConstantExpr::Create(1, Ctx);
std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(0, Disp, X86::ECX, 0, 1, SMLoc(), SMLoc()));
- Op->addMemOperands(Inst, 5);
- EmitInstruction(Out, Inst);
+ X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1,
+ SMLoc(), SMLoc()));
+ EmitLEA(*Op, MVT::i32, ScratchRegI32, Out);
break;
}
case 4:
- EmitInstruction(Out, MCInstBuilder(X86::ADD32ri8).addReg(X86::ECX)
- .addReg(X86::ECX).addImm(3));
- break;
- default:
- assert(false && "Incorrect access size");
+ EmitInstruction(Out, MCInstBuilder(X86::ADD32ri8)
+ .addReg(ScratchRegI32)
+ .addReg(ScratchRegI32)
+ .addImm(3));
break;
}
EmitInstruction(
- Out, MCInstBuilder(X86::MOVSX32rr8).addReg(X86::EAX).addReg(X86::AL));
- EmitInstruction(
- Out, MCInstBuilder(X86::CMP32rr).addReg(X86::ECX).addReg(X86::EAX));
- EmitInstruction(Out, MCInstBuilder(X86::JL_4).addExpr(DoneExpr));
+ Out,
+ MCInstBuilder(X86::MOVSX32rr8).addReg(ShadowRegI32).addReg(ShadowRegI8));
+ EmitInstruction(Out, MCInstBuilder(X86::CMP32rr).addReg(ScratchRegI32).addReg(
+ ShadowRegI32));
+ EmitInstruction(Out, MCInstBuilder(X86::JL_1).addExpr(DoneExpr));
- EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite);
+ EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
EmitLabel(Out, DoneSym);
-
- EmitInstruction(Out, MCInstBuilder(X86::POPF64));
- EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RDI));
- EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RCX));
- EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RAX));
- EmitAdjustRSP(Ctx, Out, 128);
}
-void X86AddressSanitizer64::InstrumentMemOperandLargeImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) {
- EmitAdjustRSP(Ctx, Out, -128);
- EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RAX));
- EmitInstruction(Out, MCInstBuilder(X86::PUSHF64));
+void X86AddressSanitizer64::InstrumentMemOperandLarge(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite,
+ const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
+ unsigned AddressRegI64 = RegCtx.AddressReg(MVT::i64);
+ unsigned ShadowRegI64 = RegCtx.ShadowReg(MVT::i64);
- {
- MCInst Inst;
- Inst.setOpcode(X86::LEA64r);
- Inst.addOperand(MCOperand::CreateReg(X86::RAX));
- Op.addMemOperands(Inst, 5);
- EmitInstruction(Out, Inst);
- }
- EmitInstruction(Out, MCInstBuilder(X86::SHR64ri).addReg(X86::RAX)
- .addReg(X86::RAX).addImm(3));
+ ComputeMemOperandAddress(Op, MVT::i64, AddressRegI64, Ctx, Out);
+
+ EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(ShadowRegI64).addReg(
+ AddressRegI64));
+ EmitInstruction(Out, MCInstBuilder(X86::SHR64ri)
+ .addReg(ShadowRegI64)
+ .addReg(ShadowRegI64)
+ .addImm(3));
{
MCInst Inst;
switch (AccessSize) {
+ default: llvm_unreachable("Incorrect access size");
case 8:
Inst.setOpcode(X86::CMP8mi);
break;
case 16:
Inst.setOpcode(X86::CMP16mi);
break;
- default:
- assert(false && "Incorrect access size");
- break;
}
const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(0, Disp, X86::RAX, 0, 1, SMLoc(), SMLoc()));
+ X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI64, 0, 1,
+ SMLoc(), SMLoc()));
Op->addMemOperands(Inst, 5);
Inst.addOperand(MCOperand::CreateImm(0));
EmitInstruction(Out, Inst);
@@ -470,24 +1003,68 @@ void X86AddressSanitizer64::InstrumentMemOperandLargeImpl(
MCSymbol *DoneSym = Ctx.CreateTempSymbol();
const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
- EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr));
+ EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
- EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite);
+ EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
EmitLabel(Out, DoneSym);
+}
- EmitInstruction(Out, MCInstBuilder(X86::POPF64));
- EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RAX));
- EmitAdjustRSP(Ctx, Out, 128);
+void X86AddressSanitizer64::InstrumentMOVSImpl(unsigned AccessSize,
+ MCContext &Ctx,
+ MCStreamer &Out) {
+ StoreFlags(Out);
+
+ // No need to test when RCX is equals to zero.
+ MCSymbol *DoneSym = Ctx.CreateTempSymbol();
+ const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ EmitInstruction(
+ Out, MCInstBuilder(X86::TEST64rr).addReg(X86::RCX).addReg(X86::RCX));
+ EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
+
+ // Instrument first and last elements in src and dst range.
+ InstrumentMOVSBase(X86::RDI /* DstReg */, X86::RSI /* SrcReg */,
+ X86::RCX /* CntReg */, AccessSize, Ctx, Out);
+
+ EmitLabel(Out, DoneSym);
+ RestoreFlags(Out);
}
} // End anonymous namespace
-X86AsmInstrumentation::X86AsmInstrumentation() {}
+X86AsmInstrumentation::X86AsmInstrumentation(const MCSubtargetInfo &STI)
+ : STI(STI), InitialFrameReg(0) {}
+
X86AsmInstrumentation::~X86AsmInstrumentation() {}
-void X86AsmInstrumentation::InstrumentInstruction(
+void X86AsmInstrumentation::InstrumentAndEmitInstruction(
const MCInst &Inst, OperandVector &Operands, MCContext &Ctx,
- const MCInstrInfo &MII, MCStreamer &Out) {}
+ const MCInstrInfo &MII, MCStreamer &Out) {
+ EmitInstruction(Out, Inst);
+}
+
+void X86AsmInstrumentation::EmitInstruction(MCStreamer &Out,
+ const MCInst &Inst) {
+ Out.EmitInstruction(Inst, STI);
+}
+
+unsigned X86AsmInstrumentation::GetFrameRegGeneric(const MCContext &Ctx,
+ MCStreamer &Out) {
+ if (!Out.getNumFrameInfos()) // No active dwarf frame
+ return X86::NoRegister;
+ const MCDwarfFrameInfo &Frame = Out.getDwarfFrameInfos().back();
+ if (Frame.End) // Active dwarf frame is closed
+ return X86::NoRegister;
+ const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
+ if (!MRI) // No register info
+ return X86::NoRegister;
+
+ if (InitialFrameReg) {
+ // FrameReg is set explicitly, we're instrumenting a MachineFunction.
+ return InitialFrameReg;
+ }
+
+ return MRI->getLLVMRegNum(Frame.CurrentCfaRegister, true /* IsEH */);
+}
X86AsmInstrumentation *
CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
@@ -501,7 +1078,7 @@ CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
if ((STI.getFeatureBits() & X86::Mode64Bit) != 0)
return new X86AddressSanitizer64(STI);
}
- return new X86AsmInstrumentation();
+ return new X86AsmInstrumentation(STI);
}
} // End llvm namespace
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
index 1bc3c0930153..19ebcc44f61e 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86_ASM_INSTRUMENTATION_H
-#define X86_ASM_INSTRUMENTATION_H
+#ifndef LLVM_LIB_TARGET_X86_ASMPARSER_X86ASMINSTRUMENTATION_H
+#define LLVM_LIB_TARGET_X86_ASMPARSER_X86ASMINSTRUMENTATION_H
#include "llvm/ADT/SmallVector.h"
@@ -34,11 +34,15 @@ class X86AsmInstrumentation {
public:
virtual ~X86AsmInstrumentation();
- // Instruments Inst. Should be called just before the original
- // instruction is sent to Out.
- virtual void InstrumentInstruction(
+ // Sets frame register corresponding to a current frame.
+ void SetInitialFrameRegister(unsigned RegNo) {
+ InitialFrameReg = RegNo;
+ }
+
+ // Tries to instrument and emit instruction.
+ virtual void InstrumentAndEmitInstruction(
const MCInst &Inst,
- SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>> &Operands,
+ SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand> > &Operands,
MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out);
protected:
@@ -46,9 +50,17 @@ protected:
CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
const MCContext &Ctx, const MCSubtargetInfo &STI);
- X86AsmInstrumentation();
+ X86AsmInstrumentation(const MCSubtargetInfo &STI);
+
+ unsigned GetFrameRegGeneric(const MCContext &Ctx, MCStreamer &Out);
+
+ void EmitInstruction(MCStreamer &Out, const MCInst &Inst);
+
+ const MCSubtargetInfo &STI;
+
+ unsigned InitialFrameReg;
};
} // End llvm namespace
-#endif // X86_ASM_INSTRUMENTATION_H
+#endif
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index a11a238fc976..d743aa6ef333 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
#include <memory>
using namespace llvm;
@@ -55,12 +56,12 @@ static const char OpPrecedence[] = {
class X86AsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
- MCAsmParser &Parser;
const MCInstrInfo &MII;
ParseInstructionInfo *InstInfo;
std::unique_ptr<X86AsmInstrumentation> Instrumentation;
private:
SMLoc consumeToken() {
+ MCAsmParser &Parser = getParser();
SMLoc Result = Parser.getTok().getLoc();
Parser.Lex();
return Result;
@@ -85,7 +86,7 @@ private:
typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
SmallVector<ICToken, 4> PostfixStack;
-
+
public:
int64_t popOperand() {
assert (!PostfixStack.empty() && "Poped an empty stack!");
@@ -99,7 +100,7 @@ private:
"Unexpected operand!");
PostfixStack.push_back(std::make_pair(Op, Val));
}
-
+
void popOperator() { InfixOperatorStack.pop_back(); }
void pushOperator(InfixCalculatorTok Op) {
// Push the new operator if the stack is empty.
@@ -107,7 +108,7 @@ private:
InfixOperatorStack.push_back(Op);
return;
}
-
+
// Push the new operator if it has a higher precedence than the operator
// on the top of the stack or the operator on the top of the stack is a
// left parentheses.
@@ -117,7 +118,7 @@ private:
InfixOperatorStack.push_back(Op);
return;
}
-
+
// The operator on the top of the stack has higher precedence than the
// new operator.
unsigned ParenCount = 0;
@@ -125,17 +126,17 @@ private:
// Nothing to process.
if (InfixOperatorStack.empty())
break;
-
+
Idx = InfixOperatorStack.size() - 1;
StackOp = InfixOperatorStack[Idx];
if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
break;
-
+
// If we have an even parentheses count and we see a left parentheses,
// then stop processing.
if (!ParenCount && StackOp == IC_LPAREN)
break;
-
+
if (StackOp == IC_RPAREN) {
++ParenCount;
InfixOperatorStack.pop_back();
@@ -157,10 +158,10 @@ private:
if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
PostfixStack.push_back(std::make_pair(StackOp, 0));
}
-
+
if (PostfixStack.empty())
return 0;
-
+
SmallVector<ICToken, 16> OperandStack;
for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
ICToken Op = PostfixStack[i];
@@ -262,7 +263,7 @@ private:
State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
AddImmPrefix(addimmprefix) { Info.clear(); }
-
+
unsigned getBaseReg() { return BaseReg; }
unsigned getIndexReg() { return IndexReg; }
unsigned getScale() { return Scale; }
@@ -630,13 +631,10 @@ private:
}
};
- MCAsmParser &getParser() const { return Parser; }
-
- MCAsmLexer &getLexer() const { return Parser.getLexer(); }
-
bool Error(SMLoc L, const Twine &Msg,
ArrayRef<SMRange> Ranges = None,
bool MatchingInlineAsm = false) {
+ MCAsmParser &Parser = getParser();
if (MatchingInlineAsm) return true;
return Parser.Error(L, Msg, Ranges);
}
@@ -644,8 +642,9 @@ private:
bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
ArrayRef<SMRange> Ranges = None,
bool MatchingInlineAsm = false) {
- Parser.eatToEndOfStatement();
- return Error(L, Msg, Ranges, MatchingInlineAsm);
+ MCAsmParser &Parser = getParser();
+ Parser.eatToEndOfStatement();
+ return Error(L, Msg, Ranges, MatchingInlineAsm);
}
std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
@@ -685,6 +684,7 @@ private:
bool ParseDirectiveWord(unsigned Size, SMLoc L);
bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
+ bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
bool processInstruction(MCInst &Inst, const OperandVector &Ops);
/// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
@@ -693,10 +693,26 @@ private:
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
- virtual bool OmitRegisterFromClobberLists(unsigned RegNo) override;
+ void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
+ MCStreamer &Out, bool MatchingInlineAsm);
+
+ bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
+ bool MatchingInlineAsm);
+
+ bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm);
+
+ bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm);
+
+ bool OmitRegisterFromClobberLists(unsigned RegNo) override;
/// doSrcDstMatch - Returns true if operands are matching in their
/// word size (%si and %di, %esi and %edi, etc.). Order depends on
@@ -730,6 +746,13 @@ private:
(X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
}
+ unsigned getPointerWidth() {
+ if (is16BitMode()) return 16;
+ if (is32BitMode()) return 32;
+ if (is64BitMode()) return 64;
+ llvm_unreachable("invalid mode");
+ }
+
bool isParsingIntelSyntax() {
return getParser().getAssemblerDialect();
}
@@ -743,11 +766,9 @@ private:
/// }
public:
- X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
- const MCInstrInfo &mii,
- const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(sti), Parser(parser), MII(mii),
- InstInfo(nullptr) {
+ X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser,
+ const MCInstrInfo &mii, const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(sti), MII(mii), InstInfo(nullptr) {
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -757,6 +778,8 @@ public:
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ void SetFrameRegister(unsigned RegNo) override;
+
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@@ -832,6 +855,7 @@ bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
bool X86AsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
+ MCAsmParser &Parser = getParser();
RegNo = 0;
const AsmToken &PercentTok = Parser.getTok();
StartLoc = PercentTok.getLoc();
@@ -939,20 +963,26 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
return false;
}
+void X86AsmParser::SetFrameRegister(unsigned RegNo) {
+ Instrumentation->SetInitialFrameRegister(RegNo);
+}
+
std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
unsigned basereg =
is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
- return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
- /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
+ return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
+ /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
+ Loc, Loc, 0);
}
std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
unsigned basereg =
is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
- return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
- /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
+ return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
+ /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
+ Loc, Loc, 0);
}
std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
@@ -981,15 +1011,20 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
InlineAsmIdentifierInfo &Info) {
- // If this is not a VarDecl then assume it is a FuncDecl or some other label
- // reference. We need an 'r' constraint here, so we need to create register
- // operand to ensure proper matching. Just pick a GPR based on the size of
- // a pointer.
- if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) {
- unsigned RegNo =
- is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
- return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
- SMLoc(), Identifier, Info.OpDecl);
+ // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
+ // some other label reference.
+ if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
+ // Insert an explicit size if the user didn't have one.
+ if (!Size) {
+ Size = getPointerWidth();
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
+ /*Len=*/0, Size));
+ }
+
+ // Create an absolute memory reference in order to match against
+ // instructions taking a PC relative operand.
+ return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
+ Identifier, Info.OpDecl);
}
// We either have a direct symbol reference, or an offset from a symbol. The
@@ -1011,8 +1046,9 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
// if we don't know the actual value at this time. This is necessary to
// get the matching correct in some cases.
BaseReg = BaseReg ? BaseReg : 1;
- return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
- End, Size, Identifier, Info.OpDecl);
+ return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
+ IndexReg, Scale, Start, End, Size, Identifier,
+ Info.OpDecl);
}
static void
@@ -1064,7 +1100,7 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
(*I).Kind = AOK_Delete;
}
const char *SymLocPtr = SymName.data();
- // Skip everything before the symbol.
+ // Skip everything before the symbol.
if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
assert(Len > 0 && "Expected a non-negative length.");
AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
@@ -1078,6 +1114,7 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
}
bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
bool Done = false;
@@ -1088,7 +1125,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
// identifier. Don't try an parse it as a register.
if (Tok.getString().startswith("."))
break;
-
+
// If we're parsing an immediate expression, we don't expect a '['.
if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
break;
@@ -1154,7 +1191,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
MCSymbol *Sym =
getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
- const MCExpr *Val =
+ const MCExpr *Val =
MCSymbolRefExpr::Create(Sym, Variant, getContext());
if (IDVal == "b" && Sym->isUndefined())
return Error(Loc, "invalid reference to undefined symbol");
@@ -1199,6 +1236,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
std::unique_ptr<X86Operand>
X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
int64_t ImmDisp, unsigned Size) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
if (getLexer().isNot(AsmToken::LBrac))
@@ -1238,7 +1276,7 @@ X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
const MCExpr *NewDisp;
if (ParseIntelDotOperator(Disp, NewDisp))
return nullptr;
-
+
End = Tok.getEndLoc();
Parser.Lex(); // Eat the field.
Disp = NewDisp;
@@ -1251,17 +1289,17 @@ X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
// handle [-42]
if (!BaseReg && !IndexReg) {
if (!SegReg)
- return X86Operand::CreateMem(Disp, Start, End, Size);
- else
- return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
+ return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
+ return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
+ Start, End, Size);
}
StringRef ErrMsg;
if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
Error(StartInBrac, ErrMsg);
return nullptr;
}
- return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
- End, Size);
+ return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
+ IndexReg, Scale, Start, End, Size);
}
InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
@@ -1274,13 +1312,16 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
StringRef &Identifier,
InlineAsmIdentifierInfo &Info,
bool IsUnevaluatedOperand, SMLoc &End) {
+ MCAsmParser &Parser = getParser();
assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
Val = nullptr;
StringRef LineBuf(Identifier.data());
- SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
+ void *Result =
+ SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
const AsmToken &Tok = Parser.getTok();
+ SMLoc Loc = Tok.getLoc();
// Advance the token stream until the end of the current token is
// after the end of what the frontend claimed.
@@ -1292,9 +1333,22 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
if (End.getPointer() == EndPtr) break;
}
+ Identifier = LineBuf;
+
+ // If the identifier lookup was unsuccessful, assume that we are dealing with
+ // a label.
+ if (!Result) {
+ StringRef InternalName =
+ SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
+ Loc, false);
+ assert(InternalName.size() && "We should have an internal name here.");
+ // Push a rewrite for replacing the identifier name with the internal name.
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Label, Loc,
+ Identifier.size(),
+ InternalName));
+ }
// Create the symbol reference.
- Identifier = LineBuf;
MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
@@ -1305,6 +1359,7 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
std::unique_ptr<X86Operand>
X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
unsigned Size) {
+ MCAsmParser &Parser = getParser();
assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
const AsmToken &Tok = Parser.getTok(); // Eat colon.
if (Tok.isNot(AsmToken::Colon))
@@ -1325,9 +1380,9 @@ X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
// be followed by a bracketed expression. If it isn't we know we have our
// final segment override.
const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
- return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
- /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
- Size);
+ return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
+ /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
+ Start, ImmDispToken.getEndLoc(), Size);
}
}
@@ -1340,7 +1395,7 @@ X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
if (getParser().parsePrimaryExpr(Val, End))
return ErrorOperand(Tok.getLoc(), "unknown token in expression");
- return X86Operand::CreateMem(Val, Start, End, Size);
+ return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
}
InlineAsmIdentifierInfo Info;
@@ -1356,6 +1411,7 @@ X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
SMLoc Start,
unsigned Size) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc End;
@@ -1369,7 +1425,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
if (getParser().parsePrimaryExpr(Val, End))
return ErrorOperand(Tok.getLoc(), "unknown token in expression");
- return X86Operand::CreateMem(Val, Start, End, Size);
+ return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
}
InlineAsmIdentifierInfo Info;
@@ -1407,14 +1463,15 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
// BaseReg is non-zero to avoid assertions. In the context of inline asm,
// we're pointing to a local variable in memory, so the base register is
// really the frame or stack pointer.
- return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/1, /*IndexReg=*/0,
- /*Scale=*/1, Start, End, Size, Identifier,
- Info.OpDecl);
+ return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
+ /*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1,
+ Start, End, Size, Identifier, Info.OpDecl);
}
/// Parse the '.' operator.
bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
const MCExpr *&NewDisp) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
int64_t OrigDispVal, DotDispVal;
@@ -1459,6 +1516,7 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
/// Parse the 'offset' operator. This operator is used to specify the
/// location rather then the content of a variable.
std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc OffsetOfLoc = Tok.getLoc();
Parser.Lex(); // Eat offset.
@@ -1496,6 +1554,7 @@ enum IntelOperatorKind {
/// TYPE operator returns the size of a C or C++ type or variable. If the
/// variable is an array, TYPE returns the size of a single element.
std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc TypeLoc = Tok.getLoc();
Parser.Lex(); // Eat operator.
@@ -1529,6 +1588,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
}
std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc Start, End;
@@ -1549,7 +1609,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
if (Size) {
Parser.Lex(); // Eat operand size (e.g., byte, word).
if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
- return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
+ return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
Parser.Lex(); // Eat ptr.
}
Start = Tok.getLoc();
@@ -1580,7 +1640,8 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
// to the MCExpr with the directional local symbol and this is a
// memory operand not an immediate operand.
if (SM.getSym())
- return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
+ return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
+ Size);
const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
return X86Operand::CreateImm(ImmExpr, Start, End);
@@ -1611,6 +1672,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
}
std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
+ MCAsmParser &Parser = getParser();
switch (getLexer().getKind()) {
default:
// Parse a memory operand with no segment register.
@@ -1631,6 +1693,9 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
if (getLexer().isNot(AsmToken::Colon))
return X86Operand::CreateReg(RegNo, Start, End);
+ if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
+ return ErrorOperand(Start, "invalid segment register");
+
getParser().Lex(); // Eat the colon.
return ParseMemOperand(RegNo, Start);
}
@@ -1648,6 +1713,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
const MCParsedAsmOperand &Op) {
+ MCAsmParser &Parser = getParser();
if(STI.getFeatureBits() & X86::FeatureAVX512) {
if (getLexer().is(AsmToken::LCurly)) {
// Eat "{" and mark the current place.
@@ -1719,6 +1785,7 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
SMLoc MemStart) {
+ MCAsmParser &Parser = getParser();
// We have to disambiguate a parenthesized expression "(4+5)" from the start
// of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
// only way to do this without lookahead is to eat the '(' and see what is
@@ -1733,8 +1800,9 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
if (getLexer().isNot(AsmToken::LParen)) {
// Unless we have a segment register, treat this as an immediate.
if (SegReg == 0)
- return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
- return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
+ return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
+ return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
+ MemStart, ExprEnd);
}
// Eat the '('.
@@ -1760,8 +1828,10 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
if (getLexer().isNot(AsmToken::LParen)) {
// Unless we have a segment register, treat this as an immediate.
if (SegReg == 0)
- return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
- return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
+ return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
+ ExprEnd);
+ return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
+ MemStart, ExprEnd);
}
// Eat the '('.
@@ -1876,12 +1946,15 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
return nullptr;
}
- return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
- MemStart, MemEnd);
+ if (SegReg || BaseReg || IndexReg)
+ return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
+ IndexReg, Scale, MemStart, MemEnd);
+ return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
}
bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
InstInfo = &Info;
StringRef PatchedName = Name;
@@ -2200,6 +2273,22 @@ static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
}
+bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
+ switch (Inst.getOpcode()) {
+ default: return true;
+ case X86::INT:
+ X86Operand &Op = static_cast<X86Operand &>(*Ops[1]);
+ assert(Op.isImm() && "expected immediate");
+ int64_t Res;
+ if (!Op.getImm()->EvaluateAsAbsolute(Res) || Res > 255) {
+ Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]");
+ return false;
+ }
+ return true;
+ }
+ llvm_unreachable("handle the instruction appropriately");
+}
+
bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
switch (Inst.getOpcode()) {
default: return false;
@@ -2279,51 +2368,79 @@ bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
}
}
-static const char *getSubtargetFeatureName(unsigned Val);
+static const char *getSubtargetFeatureName(uint64_t Val);
void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
MCStreamer &Out) {
- Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), MII,
- Out);
- Out.EmitInstruction(Inst, STI);
+ Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
+ MII, Out);
}
bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ MCStreamer &Out, uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
- assert(!Operands.empty() && "Unexpect empty operand list!");
- X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
- assert(Op.isToken() && "Leading operand should always be a mnemonic!");
- ArrayRef<SMRange> EmptyRanges = None;
+ if (isParsingIntelSyntax())
+ return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
+ MatchingInlineAsm);
+ return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
+ MatchingInlineAsm);
+}
- // First, handle aliases that expand to multiple instructions.
+void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
+ OperandVector &Operands, MCStreamer &Out,
+ bool MatchingInlineAsm) {
// FIXME: This should be replaced with a real .td file alias mechanism.
// Also, MatchInstructionImpl should actually *do* the EmitInstruction
// call.
- if (Op.getToken() == "fstsw" || Op.getToken() == "fstcw" ||
- Op.getToken() == "fstsww" || Op.getToken() == "fstcww" ||
- Op.getToken() == "finit" || Op.getToken() == "fsave" ||
- Op.getToken() == "fstenv" || Op.getToken() == "fclex") {
+ const char *Repl = StringSwitch<const char *>(Op.getToken())
+ .Case("finit", "fninit")
+ .Case("fsave", "fnsave")
+ .Case("fstcw", "fnstcw")
+ .Case("fstcww", "fnstcw")
+ .Case("fstenv", "fnstenv")
+ .Case("fstsw", "fnstsw")
+ .Case("fstsww", "fnstsw")
+ .Case("fclex", "fnclex")
+ .Default(nullptr);
+ if (Repl) {
MCInst Inst;
Inst.setOpcode(X86::WAIT);
Inst.setLoc(IDLoc);
if (!MatchingInlineAsm)
EmitInstruction(Inst, Operands, Out);
-
- const char *Repl = StringSwitch<const char *>(Op.getToken())
- .Case("finit", "fninit")
- .Case("fsave", "fnsave")
- .Case("fstcw", "fnstcw")
- .Case("fstcww", "fnstcw")
- .Case("fstenv", "fnstenv")
- .Case("fstsw", "fnstsw")
- .Case("fstsww", "fnstsw")
- .Case("fclex", "fnclex")
- .Default(nullptr);
- assert(Repl && "Unknown wait-prefixed instruction");
Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
}
+}
+
+bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
+ bool MatchingInlineAsm) {
+ assert(ErrorInfo && "Unknown missing feature!");
+ ArrayRef<SMRange> EmptyRanges = None;
+ SmallString<126> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "instruction requires:";
+ uint64_t Mask = 1;
+ for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
+ if (ErrorInfo & Mask)
+ OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
+ Mask <<= 1;
+ }
+ return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
+}
+
+bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) {
+ assert(!Operands.empty() && "Unexpect empty operand list!");
+ X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
+ assert(Op.isToken() && "Leading operand should always be a mnemonic!");
+ ArrayRef<SMRange> EmptyRanges = None;
+
+ // First, handle aliases that expand to multiple instructions.
+ MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
bool WasOriginallyInvalidOperand = false;
MCInst Inst;
@@ -2332,8 +2449,11 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
switch (MatchInstructionImpl(Operands, Inst,
ErrorInfo, MatchingInlineAsm,
isParsingIntelSyntax())) {
- default: break;
+ default: llvm_unreachable("Unexpected match result!");
case Match_Success:
+ if (!validateInstruction(Inst, Operands))
+ return true;
+
// Some instructions need post-processing to, for example, tweak which
// encoding is selected. Loop on it while changes happen so the
// individual transformations can chain off each other.
@@ -2346,21 +2466,8 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
EmitInstruction(Inst, Operands, Out);
Opcode = Inst.getOpcode();
return false;
- case Match_MissingFeature: {
- assert(ErrorInfo && "Unknown missing feature!");
- // Special case the error message for the very common case where only
- // a single subtarget feature is missing.
- std::string Msg = "instruction requires:";
- unsigned Mask = 1;
- for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
- if (ErrorInfo & Mask) {
- Msg += " ";
- Msg += getSubtargetFeatureName(ErrorInfo & Mask);
- }
- Mask <<= 1;
- }
- return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
- }
+ case Match_MissingFeature:
+ return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
case Match_InvalidOperand:
WasOriginallyInvalidOperand = true;
break;
@@ -2389,34 +2496,18 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
// Check for the various suffix matches.
- Tmp[Base.size()] = Suffixes[0];
- unsigned ErrorInfoIgnore;
- unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
- unsigned Match1, Match2, Match3, Match4;
-
- Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
- MatchingInlineAsm, isParsingIntelSyntax());
- // If this returned as a missing feature failure, remember that.
- if (Match1 == Match_MissingFeature)
- ErrorInfoMissingFeature = ErrorInfoIgnore;
- Tmp[Base.size()] = Suffixes[1];
- Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
- MatchingInlineAsm, isParsingIntelSyntax());
- // If this returned as a missing feature failure, remember that.
- if (Match2 == Match_MissingFeature)
- ErrorInfoMissingFeature = ErrorInfoIgnore;
- Tmp[Base.size()] = Suffixes[2];
- Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
- MatchingInlineAsm, isParsingIntelSyntax());
- // If this returned as a missing feature failure, remember that.
- if (Match3 == Match_MissingFeature)
- ErrorInfoMissingFeature = ErrorInfoIgnore;
- Tmp[Base.size()] = Suffixes[3];
- Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
- MatchingInlineAsm, isParsingIntelSyntax());
- // If this returned as a missing feature failure, remember that.
- if (Match4 == Match_MissingFeature)
- ErrorInfoMissingFeature = ErrorInfoIgnore;
+ uint64_t ErrorInfoIgnore;
+ uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
+ unsigned Match[4];
+
+ for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
+ Tmp.back() = Suffixes[I];
+ Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+ MatchingInlineAsm, isParsingIntelSyntax());
+ // If this returned as a missing feature failure, remember that.
+ if (Match[I] == Match_MissingFeature)
+ ErrorInfoMissingFeature = ErrorInfoIgnore;
+ }
// Restore the old token.
Op.setTokenValue(Base);
@@ -2425,8 +2516,7 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// instruction will already have been filled in correctly, since the failing
// matches won't have modified it).
unsigned NumSuccessfulMatches =
- (Match1 == Match_Success) + (Match2 == Match_Success) +
- (Match3 == Match_Success) + (Match4 == Match_Success);
+ std::count(std::begin(Match), std::end(Match), Match_Success);
if (NumSuccessfulMatches == 1) {
Inst.setLoc(IDLoc);
if (!MatchingInlineAsm)
@@ -2442,10 +2532,9 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (NumSuccessfulMatches > 1) {
char MatchChars[4];
unsigned NumMatches = 0;
- if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
- if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
- if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
- if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
+ for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
+ if (Match[I] == Match_Success)
+ MatchChars[NumMatches++] = Suffixes[I];
SmallString<126> Msg;
raw_svector_ostream OS(Msg);
@@ -2466,8 +2555,7 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// If all of the instructions reported an invalid mnemonic, then the original
// mnemonic was invalid.
- if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
- (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
+ if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
if (!WasOriginallyInvalidOperand) {
ArrayRef<SMRange> Ranges =
MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
@@ -2476,7 +2564,7 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
// Recover location info for the operand if we know which was the problem.
- if (ErrorInfo != ~0U) {
+ if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction",
EmptyRanges, MatchingInlineAsm);
@@ -2495,27 +2583,19 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// If one instruction matched with a missing feature, report this as a
// missing feature.
- if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
- (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
- std::string Msg = "instruction requires:";
- unsigned Mask = 1;
- for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
- if (ErrorInfoMissingFeature & Mask) {
- Msg += " ";
- Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
- }
- Mask <<= 1;
- }
- return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
+ if (std::count(std::begin(Match), std::end(Match),
+ Match_MissingFeature) == 1) {
+ ErrorInfo = ErrorInfoMissingFeature;
+ return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
+ MatchingInlineAsm);
}
// If one instruction matched with an invalid operand, report this as an
// operand failure.
- if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
- (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
- Error(IDLoc, "invalid operand for instruction", EmptyRanges,
- MatchingInlineAsm);
- return true;
+ if (std::count(std::begin(Match), std::end(Match),
+ Match_InvalidOperand) == 1) {
+ return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
+ MatchingInlineAsm);
}
// If all of these were an outright failure, report it in a useless way.
@@ -2524,25 +2604,176 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
}
+bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) {
+ assert(!Operands.empty() && "Unexpect empty operand list!");
+ X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
+ assert(Op.isToken() && "Leading operand should always be a mnemonic!");
+ StringRef Mnemonic = Op.getToken();
+ ArrayRef<SMRange> EmptyRanges = None;
+
+ // First, handle aliases that expand to multiple instructions.
+ MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
+
+ MCInst Inst;
+
+ // Find one unsized memory operand, if present.
+ X86Operand *UnsizedMemOp = nullptr;
+ for (const auto &Op : Operands) {
+ X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
+ if (X86Op->isMemUnsized())
+ UnsizedMemOp = X86Op;
+ }
+
+ // Allow some instructions to have implicitly pointer-sized operands. This is
+ // compatible with gas.
+ if (UnsizedMemOp) {
+ static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
+ for (const char *Instr : PtrSizedInstrs) {
+ if (Mnemonic == Instr) {
+ UnsizedMemOp->Mem.Size = getPointerWidth();
+ break;
+ }
+ }
+ }
+
+ // If an unsized memory operand is present, try to match with each memory
+ // operand size. In Intel assembly, the size is not part of the instruction
+ // mnemonic.
+ SmallVector<unsigned, 8> Match;
+ uint64_t ErrorInfoMissingFeature = 0;
+ if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
+ static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
+ for (unsigned Size : MopSizes) {
+ UnsizedMemOp->Mem.Size = Size;
+ uint64_t ErrorInfoIgnore;
+ unsigned LastOpcode = Inst.getOpcode();
+ unsigned M =
+ MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+ MatchingInlineAsm, isParsingIntelSyntax());
+ if (Match.empty() || LastOpcode != Inst.getOpcode())
+ Match.push_back(M);
+
+ // If this returned as a missing feature failure, remember that.
+ if (Match.back() == Match_MissingFeature)
+ ErrorInfoMissingFeature = ErrorInfoIgnore;
+ }
+
+ // Restore the size of the unsized memory operand if we modified it.
+ if (UnsizedMemOp)
+ UnsizedMemOp->Mem.Size = 0;
+ }
+
+ // If we haven't matched anything yet, this is not a basic integer or FPU
+ // operation. There shouldn't be any ambiguity in our mneumonic table, so try
+ // matching with the unsized operand.
+ if (Match.empty()) {
+ Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
+ MatchingInlineAsm,
+ isParsingIntelSyntax()));
+ // If this returned as a missing feature failure, remember that.
+ if (Match.back() == Match_MissingFeature)
+ ErrorInfoMissingFeature = ErrorInfo;
+ }
+
+ // Restore the size of the unsized memory operand if we modified it.
+ if (UnsizedMemOp)
+ UnsizedMemOp->Mem.Size = 0;
+
+ // If it's a bad mnemonic, all results will be the same.
+ if (Match.back() == Match_MnemonicFail) {
+ ArrayRef<SMRange> Ranges =
+ MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
+ return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
+ Ranges, MatchingInlineAsm);
+ }
+
+ // If exactly one matched, then we treat that as a successful match (and the
+ // instruction will already have been filled in correctly, since the failing
+ // matches won't have modified it).
+ unsigned NumSuccessfulMatches =
+ std::count(std::begin(Match), std::end(Match), Match_Success);
+ if (NumSuccessfulMatches == 1) {
+ if (!validateInstruction(Inst, Operands))
+ return true;
+
+ // Some instructions need post-processing to, for example, tweak which
+ // encoding is selected. Loop on it while changes happen so the individual
+ // transformations can chain off each other.
+ if (!MatchingInlineAsm)
+ while (processInstruction(Inst, Operands))
+ ;
+ Inst.setLoc(IDLoc);
+ if (!MatchingInlineAsm)
+ EmitInstruction(Inst, Operands, Out);
+ Opcode = Inst.getOpcode();
+ return false;
+ } else if (NumSuccessfulMatches > 1) {
+ assert(UnsizedMemOp &&
+ "multiple matches only possible with unsized memory operands");
+ ArrayRef<SMRange> Ranges =
+ MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
+ return Error(UnsizedMemOp->getStartLoc(),
+ "ambiguous operand size for instruction '" + Mnemonic + "\'",
+ Ranges, MatchingInlineAsm);
+ }
+
+ // If one instruction matched with a missing feature, report this as a
+ // missing feature.
+ if (std::count(std::begin(Match), std::end(Match),
+ Match_MissingFeature) == 1) {
+ ErrorInfo = ErrorInfoMissingFeature;
+ return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
+ MatchingInlineAsm);
+ }
+
+ // If one instruction matched with an invalid operand, report this as an
+ // operand failure.
+ if (std::count(std::begin(Match), std::end(Match),
+ Match_InvalidOperand) == 1) {
+ return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
+ MatchingInlineAsm);
+ }
+
+ // If all of these were an outright failure, report it in a useless way.
+ return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
+ MatchingInlineAsm);
+}
+
bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
}
bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
+ MCAsmParser &Parser = getParser();
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal == ".word")
return ParseDirectiveWord(2, DirectiveID.getLoc());
else if (IDVal.startswith(".code"))
return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
else if (IDVal.startswith(".att_syntax")) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (Parser.getTok().getString() == "prefix")
+ Parser.Lex();
+ else if (Parser.getTok().getString() == "noprefix")
+ return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
+ "supported: registers must have a "
+ "'%' prefix in .att_syntax");
+ }
getParser().setAssemblerDialect(0);
return false;
} else if (IDVal.startswith(".intel_syntax")) {
getParser().setAssemblerDialect(1);
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- // FIXME: Handle noprefix
if (Parser.getTok().getString() == "noprefix")
Parser.Lex();
+ else if (Parser.getTok().getString() == "prefix")
+ return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
+ "supported: registers must not have "
+ "a '%' prefix in .intel_syntax");
}
return false;
}
@@ -2552,6 +2783,7 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
/// ParseDirectiveWord
/// ::= .word [ expression (, expression)* ]
bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
@@ -2579,6 +2811,7 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
/// ParseDirectiveCode
/// ::= .code16 | .code32 | .code64
bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (IDVal == ".code16") {
Parser.Lex();
if (!is16BitMode()) {
diff --git a/lib/Target/X86/AsmParser/X86AsmParserCommon.h b/lib/Target/X86/AsmParser/X86AsmParserCommon.h
index ef1565f585e9..72aeeaac1639 100644
--- a/lib/Target/X86/AsmParser/X86AsmParserCommon.h
+++ b/lib/Target/X86/AsmParser/X86AsmParserCommon.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86_ASM_PARSER_COMMON_H
-#define X86_ASM_PARSER_COMMON_H
+#ifndef LLVM_LIB_TARGET_X86_ASMPARSER_X86ASMPARSERCOMMON_H
+#define LLVM_LIB_TARGET_X86_ASMPARSER_X86ASMPARSERCOMMON_H
namespace llvm {
@@ -24,10 +24,6 @@ inline bool isImmSExti32i8Value(uint64_t Value) {
(0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
}
-inline bool isImmZExtu32u8Value(uint64_t Value) {
- return (Value <= 0x00000000000000FFULL);
-}
-
inline bool isImmSExti64i8Value(uint64_t Value) {
return (( Value <= 0x000000000000007FULL)||
(0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
@@ -40,4 +36,4 @@ inline bool isImmSExti64i32Value(uint64_t Value) {
} // End of namespace llvm
-#endif // X86_ASM_PARSER_COMMON_H
+#endif
diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h
index 1bbfc1192447..6675d4dc045c 100644
--- a/lib/Target/X86/AsmParser/X86Operand.h
+++ b/lib/Target/X86/AsmParser/X86Operand.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86_OPERAND_H
-#define X86_OPERAND_H
+#ifndef LLVM_LIB_TARGET_X86_ASMPARSER_X86OPERAND_H
+#define LLVM_LIB_TARGET_X86_ASMPARSER_X86OPERAND_H
#include "X86AsmParserCommon.h"
#include "llvm/MC/MCExpr.h"
@@ -53,6 +53,7 @@ struct X86Operand : public MCParsedAsmOperand {
unsigned IndexReg;
unsigned Scale;
unsigned Size;
+ unsigned ModeSize;
};
union {
@@ -120,6 +121,10 @@ struct X86Operand : public MCParsedAsmOperand {
assert(Kind == Memory && "Invalid access!");
return Mem.Scale;
}
+ unsigned getMemModeSize() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.ModeSize;
+ }
bool isToken() const override {return Kind == Token; }
@@ -153,20 +158,6 @@ struct X86Operand : public MCParsedAsmOperand {
// extension.
return isImmSExti32i8Value(CE->getValue());
}
- bool isImmZExtu32u8() const {
- if (!isImm())
- return false;
-
- // If this isn't a constant expr, just assume it fits and let relaxation
- // handle it.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE)
- return true;
-
- // Otherwise, check the value is in a range that makes sense for this
- // extension.
- return isImmZExtu32u8Value(CE->getValue());
- }
bool isImmSExti64i8() const {
if (!isImm())
return false;
@@ -205,6 +196,9 @@ struct X86Operand : public MCParsedAsmOperand {
}
bool isMem() const override { return Kind == Memory; }
+ bool isMemUnsized() const {
+ return Kind == Memory && Mem.Size == 0;
+ }
bool isMem8() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 8);
}
@@ -260,6 +254,14 @@ struct X86Operand : public MCParsedAsmOperand {
!getMemIndexReg() && getMemScale() == 1;
}
+ bool isAbsMem16() const {
+ return isAbsMem() && Mem.ModeSize == 16;
+ }
+
+ bool isAbsMem32() const {
+ return isAbsMem() && Mem.ModeSize != 16;
+ }
+
bool isSrcIdx() const {
return !getMemIndexReg() && getMemScale() == 1 &&
(getMemBaseReg() == X86::RSI || getMemBaseReg() == X86::ESI ||
@@ -299,21 +301,43 @@ struct X86Operand : public MCParsedAsmOperand {
return isMem64() && isDstIdx();
}
- bool isMemOffs8() const {
- return Kind == Memory && !getMemBaseReg() &&
- !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 8);
+ bool isMemOffs() const {
+ return Kind == Memory && !getMemBaseReg() && !getMemIndexReg() &&
+ getMemScale() == 1;
+ }
+
+ bool isMemOffs16_8() const {
+ return isMemOffs() && Mem.ModeSize == 16 && (!Mem.Size || Mem.Size == 8);
+ }
+ bool isMemOffs16_16() const {
+ return isMemOffs() && Mem.ModeSize == 16 && (!Mem.Size || Mem.Size == 16);
+ }
+ bool isMemOffs16_32() const {
+ return isMemOffs() && Mem.ModeSize == 16 && (!Mem.Size || Mem.Size == 32);
+ }
+ bool isMemOffs32_8() const {
+ return isMemOffs() && Mem.ModeSize == 32 && (!Mem.Size || Mem.Size == 8);
}
- bool isMemOffs16() const {
- return Kind == Memory && !getMemBaseReg() &&
- !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 16);
+ bool isMemOffs32_16() const {
+ return isMemOffs() && Mem.ModeSize == 32 && (!Mem.Size || Mem.Size == 16);
}
- bool isMemOffs32() const {
- return Kind == Memory && !getMemBaseReg() &&
- !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 32);
+ bool isMemOffs32_32() const {
+ return isMemOffs() && Mem.ModeSize == 32 && (!Mem.Size || Mem.Size == 32);
}
- bool isMemOffs64() const {
- return Kind == Memory && !getMemBaseReg() &&
- !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 64);
+ bool isMemOffs32_64() const {
+ return isMemOffs() && Mem.ModeSize == 32 && (!Mem.Size || Mem.Size == 64);
+ }
+ bool isMemOffs64_8() const {
+ return isMemOffs() && Mem.ModeSize == 64 && (!Mem.Size || Mem.Size == 8);
+ }
+ bool isMemOffs64_16() const {
+ return isMemOffs() && Mem.ModeSize == 64 && (!Mem.Size || Mem.Size == 16);
+ }
+ bool isMemOffs64_32() const {
+ return isMemOffs() && Mem.ModeSize == 64 && (!Mem.Size || Mem.Size == 32);
+ }
+ bool isMemOffs64_64() const {
+ return isMemOffs() && Mem.ModeSize == 64 && (!Mem.Size || Mem.Size == 64);
}
bool isReg() const override { return Kind == Register; }
@@ -441,8 +465,9 @@ struct X86Operand : public MCParsedAsmOperand {
/// Create an absolute memory operand.
static std::unique_ptr<X86Operand>
- CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, unsigned Size = 0,
- StringRef SymName = StringRef(), void *OpDecl = nullptr) {
+ CreateMem(unsigned ModeSize, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
+ unsigned Size = 0, StringRef SymName = StringRef(),
+ void *OpDecl = nullptr) {
auto Res = llvm::make_unique<X86Operand>(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
@@ -450,6 +475,7 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.IndexReg = 0;
Res->Mem.Scale = 1;
Res->Mem.Size = Size;
+ Res->Mem.ModeSize = ModeSize;
Res->SymName = SymName;
Res->OpDecl = OpDecl;
Res->AddressOf = false;
@@ -458,9 +484,9 @@ struct X86Operand : public MCParsedAsmOperand {
/// Create a generalized memory operand.
static std::unique_ptr<X86Operand>
- CreateMem(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
- unsigned IndexReg, unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0, StringRef SymName = StringRef(),
+ CreateMem(unsigned ModeSize, unsigned SegReg, const MCExpr *Disp,
+ unsigned BaseReg, unsigned IndexReg, unsigned Scale, SMLoc StartLoc,
+ SMLoc EndLoc, unsigned Size = 0, StringRef SymName = StringRef(),
void *OpDecl = nullptr) {
// We should never just have a displacement, that should be parsed as an
// absolute memory operand.
@@ -476,6 +502,7 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.IndexReg = IndexReg;
Res->Mem.Scale = Scale;
Res->Mem.Size = Size;
+ Res->Mem.ModeSize = ModeSize;
Res->SymName = SymName;
Res->OpDecl = OpDecl;
Res->AddressOf = false;
@@ -485,4 +512,4 @@ struct X86Operand : public MCParsedAsmOperand {
} // End of namespace llvm
-#endif // X86_OPERAND
+#endif
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index a09767e1eaff..1083fad80e8a 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -14,15 +14,12 @@ add_public_tablegen_target(X86CommonTableGen)
set(sources
X86AsmPrinter.cpp
- X86AtomicExpandPass.cpp
- X86CodeEmitter.cpp
X86FastISel.cpp
X86FloatingPoint.cpp
X86FrameLowering.cpp
X86ISelDAGToDAG.cpp
X86ISelLowering.cpp
X86InstrInfo.cpp
- X86JITInfo.cpp
X86MCInstLower.cpp
X86MachineFunctionInfo.cpp
X86PadShortFunction.cpp
diff --git a/lib/Target/X86/Disassembler/LLVMBuild.txt b/lib/Target/X86/Disassembler/LLVMBuild.txt
index cac7adff4922..e003fc9f996e 100644
--- a/lib/Target/X86/Disassembler/LLVMBuild.txt
+++ b/lib/Target/X86/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = X86Disassembler
parent = X86
-required_libraries = MC Support X86Info
+required_libraries = MCDisassembler Support X86Info
add_to_library_groups = X86
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 521bd21b81c6..1c5618288e75 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -23,7 +23,6 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -52,8 +51,8 @@ const char *llvm::X86Disassembler::GetInstrName(unsigned Opcode,
#define debug(s) DEBUG(Debug(__FILE__, __LINE__, s));
-namespace llvm {
-
+namespace llvm {
+
// Fill-ins to make the compiler happy. These constants are never actually
// assigned; they are just filler to make an automatically-generated switch
// statement work.
@@ -97,16 +96,26 @@ X86GenericDisassembler::X86GenericDisassembler(
}
}
-/// regionReader - a callback function that wraps the readByte method from
-/// MemoryObject.
+struct Region {
+ ArrayRef<uint8_t> Bytes;
+ uint64_t Base;
+ Region(ArrayRef<uint8_t> Bytes, uint64_t Base) : Bytes(Bytes), Base(Base) {}
+};
+
+/// A callback function that wraps the readByte method from Region.
///
-/// @param arg - The generic callback parameter. In this case, this should
-/// be a pointer to a MemoryObject.
-/// @param byte - A pointer to the byte to be read.
-/// @param address - The address to be read.
-static int regionReader(const void* arg, uint8_t* byte, uint64_t address) {
- const MemoryObject* region = static_cast<const MemoryObject*>(arg);
- return region->readByte(address, byte);
+/// @param Arg - The generic callback parameter. In this case, this should
+/// be a pointer to a Region.
+/// @param Byte - A pointer to the byte to be read.
+/// @param Address - The address to be read.
+static int regionReader(const void *Arg, uint8_t *Byte, uint64_t Address) {
+ auto *R = static_cast<const Region *>(Arg);
+ ArrayRef<uint8_t> Bytes = R->Bytes;
+ unsigned Index = Address - R->Base;
+ if (Bytes.size() <= Index)
+ return -1;
+ *Byte = Bytes[Index];
+ return 0;
}
/// logger - a callback function that wraps the operator<< method from
@@ -118,47 +127,38 @@ static int regionReader(const void* arg, uint8_t* byte, uint64_t address) {
static void logger(void* arg, const char* log) {
if (!arg)
return;
-
+
raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
vStream << log << "\n";
-}
-
+}
+
//
// Public interface for the disassembler
//
-MCDisassembler::DecodeStatus
-X86GenericDisassembler::getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const {
- CommentStream = &cStream;
-
- InternalInstruction internalInstr;
-
- dlog_t loggerFn = logger;
- if (&vStream == &nulls())
- loggerFn = nullptr; // Disable logging completely if it's going to nulls().
-
- int ret = decodeInstruction(&internalInstr,
- regionReader,
- (const void*)&region,
- loggerFn,
- (void*)&vStream,
- (const void*)MII.get(),
- address,
- fMode);
-
- if (ret) {
- size = internalInstr.readerCursor - address;
+MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
+ MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream, raw_ostream &CStream) const {
+ CommentStream = &CStream;
+
+ InternalInstruction InternalInstr;
+
+ dlog_t LoggerFn = logger;
+ if (&VStream == &nulls())
+ LoggerFn = nullptr; // Disable logging completely if it's going to nulls().
+
+ Region R(Bytes, Address);
+
+ int Ret = decodeInstruction(&InternalInstr, regionReader, (const void *)&R,
+ LoggerFn, (void *)&VStream,
+ (const void *)MII.get(), Address, fMode);
+
+ if (Ret) {
+ Size = InternalInstr.readerCursor - Address;
return Fail;
- }
- else {
- size = internalInstr.length;
- return (!translateInstruction(instr, internalInstr, this)) ?
- Success : Fail;
+ } else {
+ Size = InternalInstr.length;
+ return (!translateInstruction(Instr, InternalInstr, this)) ? Success : Fail;
}
}
@@ -184,7 +184,7 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
}
/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
-/// immediate Value in the MCInst.
+/// immediate Value in the MCInst.
///
/// @param Value - The immediate Value, has had any PC adjustment made by
/// the caller.
@@ -196,7 +196,7 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
/// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
/// called then that function is called to get any symbolic information for the
/// immediate in the instruction using the Address, Offset and Width. If that
-/// returns non-zero then the symbolic information it returns is used to create
+/// returns non-zero then the symbolic information it returns is used to create
/// an MCExpr and that is added as an operand to the MCInst. If getOpInfo()
/// returns zero and isBranch is true then a symbol look up for immediate Value
/// is done and if a symbol is found an MCExpr is created with that, else
@@ -204,8 +204,8 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
/// if it adds an operand to the MCInst and false otherwise.
static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
uint64_t Address, uint64_t Offset,
- uint64_t Width, MCInst &MI,
- const MCDisassembler *Dis) {
+ uint64_t Width, MCInst &MI,
+ const MCDisassembler *Dis) {
return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch,
Offset, Width);
}
@@ -215,7 +215,7 @@ static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
/// These can often be addresses in a literal pool. The Address of the
/// instruction and its immediate Value are used to determine the address
/// being referenced in the literal pool entry. The SymbolLookUp call back will
-/// return a pointer to a literal 'C' string if the referenced address is an
+/// return a pointer to a literal 'C' string if the referenced address is an
/// address into a section with 'C' string literals.
static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value,
const void *Decoder) {
@@ -287,7 +287,7 @@ static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {
static void translateImmediate(MCInst &mcInst, uint64_t immediate,
const OperandSpecifier &operand,
InternalInstruction &insn,
- const MCDisassembler *Dis) {
+ const MCDisassembler *Dis) {
// Sign-extend the immediate if necessary.
OperandType type = (OperandType)operand.type;
@@ -350,6 +350,54 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
case ENCODING_IO:
break;
}
+ } else if (type == TYPE_IMM3) {
+ // Check for immediates that printSSECC can't handle.
+ if (immediate >= 8) {
+ unsigned NewOpc;
+ switch (mcInst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case X86::CMPPDrmi: NewOpc = X86::CMPPDrmi_alt; break;
+ case X86::CMPPDrri: NewOpc = X86::CMPPDrri_alt; break;
+ case X86::CMPPSrmi: NewOpc = X86::CMPPSrmi_alt; break;
+ case X86::CMPPSrri: NewOpc = X86::CMPPSrri_alt; break;
+ case X86::CMPSDrm: NewOpc = X86::CMPSDrm_alt; break;
+ case X86::CMPSDrr: NewOpc = X86::CMPSDrr_alt; break;
+ case X86::CMPSSrm: NewOpc = X86::CMPSSrm_alt; break;
+ case X86::CMPSSrr: NewOpc = X86::CMPSSrr_alt; break;
+ }
+ // Switch opcode to the one that doesn't get special printing.
+ mcInst.setOpcode(NewOpc);
+ }
+ } else if (type == TYPE_IMM5) {
+ // Check for immediates that printAVXCC can't handle.
+ if (immediate >= 32) {
+ unsigned NewOpc;
+ switch (mcInst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case X86::VCMPPDrmi: NewOpc = X86::VCMPPDrmi_alt; break;
+ case X86::VCMPPDrri: NewOpc = X86::VCMPPDrri_alt; break;
+ case X86::VCMPPSrmi: NewOpc = X86::VCMPPSrmi_alt; break;
+ case X86::VCMPPSrri: NewOpc = X86::VCMPPSrri_alt; break;
+ case X86::VCMPSDrm: NewOpc = X86::VCMPSDrm_alt; break;
+ case X86::VCMPSDrr: NewOpc = X86::VCMPSDrr_alt; break;
+ case X86::VCMPSSrm: NewOpc = X86::VCMPSSrm_alt; break;
+ case X86::VCMPSSrr: NewOpc = X86::VCMPSSrr_alt; break;
+ case X86::VCMPPDYrmi: NewOpc = X86::VCMPPDYrmi_alt; break;
+ case X86::VCMPPDYrri: NewOpc = X86::VCMPPDYrri_alt; break;
+ case X86::VCMPPSYrmi: NewOpc = X86::VCMPPSYrmi_alt; break;
+ case X86::VCMPPSYrri: NewOpc = X86::VCMPPSYrri_alt; break;
+ case X86::VCMPPDZrmi: NewOpc = X86::VCMPPDZrmi_alt; break;
+ case X86::VCMPPDZrri: NewOpc = X86::VCMPPDZrri_alt; break;
+ case X86::VCMPPSZrmi: NewOpc = X86::VCMPPSZrmi_alt; break;
+ case X86::VCMPPSZrri: NewOpc = X86::VCMPPSZrri_alt; break;
+ case X86::VCMPSDZrm: NewOpc = X86::VCMPSDZrmi_alt; break;
+ case X86::VCMPSDZrr: NewOpc = X86::VCMPSDZrri_alt; break;
+ case X86::VCMPSSZrm: NewOpc = X86::VCMPSSZrmi_alt; break;
+ case X86::VCMPSSZrr: NewOpc = X86::VCMPSSZrri_alt; break;
+ }
+ // Switch opcode to the one that doesn't get special printing.
+ mcInst.setOpcode(NewOpc);
+ }
}
switch (type) {
@@ -407,7 +455,7 @@ static bool translateRMRegister(MCInst &mcInst,
debug("A R/M register operand may not have a SIB byte");
return true;
}
-
+
switch (insn.eaBase) {
default:
debug("Unexpected EA base register");
@@ -427,7 +475,7 @@ static bool translateRMRegister(MCInst &mcInst,
ALL_REGS
#undef ENTRY
}
-
+
return false;
}
@@ -440,26 +488,26 @@ static bool translateRMRegister(MCInst &mcInst,
/// from.
/// @return - 0 on success; nonzero otherwise
static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
- const MCDisassembler *Dis) {
+ const MCDisassembler *Dis) {
// Addresses in an MCInst are represented as five operands:
- // 1. basereg (register) The R/M base, or (if there is a SIB) the
+ // 1. basereg (register) The R/M base, or (if there is a SIB) the
// SIB base
- // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
+ // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
// scale amount
// 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
- // the index (which is multiplied by the
+ // the index (which is multiplied by the
// scale amount)
// 4. displacement (immediate) 0, or the displacement if there is one
// 5. segmentreg (register) x86_registerNONE for now, but could be set
// if we have segment overrides
-
+
MCOperand baseReg;
MCOperand scaleAmount;
MCOperand indexReg;
MCOperand displacement;
MCOperand segmentReg;
uint64_t pcrel = 0;
-
+
if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
if (insn.sibBase != SIB_BASE_NONE) {
switch (insn.sibBase) {
@@ -512,7 +560,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
(insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX);
SIBIndex IndexBase = IndexIs512 ? SIB_INDEX_ZMM0 :
IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
- insn.sibIndex = (SIBIndex)(IndexBase +
+ insn.sibIndex = (SIBIndex)(IndexBase +
(insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset));
}
@@ -534,7 +582,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
} else {
indexReg = MCOperand::CreateReg(0);
}
-
+
scaleAmount = MCOperand::CreateImm(insn.sibScale);
} else {
switch (insn.eaBase) {
@@ -553,7 +601,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
}
else
baseReg = MCOperand::CreateReg(0);
-
+
indexReg = MCOperand::CreateReg(0);
break;
case EA_BASE_BX_SI:
@@ -584,7 +632,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
// placeholders to keep the compiler happy.
#define ENTRY(x) \
case EA_BASE_##x: \
- baseReg = MCOperand::CreateReg(X86::x); break;
+ baseReg = MCOperand::CreateReg(X86::x); break;
ALL_EA_BASES
#undef ENTRY
#define ENTRY(x) case EA_REG_##x:
@@ -595,14 +643,14 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
return true;
}
}
-
+
scaleAmount = MCOperand::CreateImm(1);
}
-
+
displacement = MCOperand::CreateImm(insn.displacement);
segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
-
+
mcInst.addOperand(baseReg);
mcInst.addOperand(scaleAmount);
mcInst.addOperand(indexReg);
@@ -623,7 +671,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
/// from.
/// @return - 0 on success; nonzero otherwise
static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
- InternalInstruction &insn, const MCDisassembler *Dis) {
+ InternalInstruction &insn, const MCDisassembler *Dis) {
switch (operand.type) {
default:
debug("Unexpected type for a R/M operand");
@@ -633,8 +681,6 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_R32:
case TYPE_R64:
case TYPE_Rv:
- case TYPE_MM:
- case TYPE_MM32:
case TYPE_MM64:
case TYPE_XMM:
case TYPE_XMM32:
@@ -660,9 +706,6 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_M32FP:
case TYPE_M64FP:
case TYPE_M80FP:
- case TYPE_M16INT:
- case TYPE_M32INT:
- case TYPE_M64INT:
case TYPE_M1616:
case TYPE_M1632:
case TYPE_M1664:
@@ -670,7 +713,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
return translateRMMemory(mcInst, insn, Dis);
}
}
-
+
/// translateFPRegister - Translates a stack position on the FPU stack to its
/// LLVM form, and appends it to an MCInst.
///
@@ -698,7 +741,7 @@ static bool translateMaskRegister(MCInst &mcInst,
return false;
}
-/// translateOperand - Translates an operand stored in an internal instruction
+/// translateOperand - Translates an operand stored in an internal instruction
/// to LLVM's format and appends it to an MCInst.
///
/// @param mcInst - The MCInst to append to.
@@ -707,7 +750,7 @@ static bool translateMaskRegister(MCInst &mcInst,
/// @return - false on success; true otherwise.
static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
InternalInstruction &insn,
- const MCDisassembler *Dis) {
+ const MCDisassembler *Dis) {
switch (operand.encoding) {
default:
debug("Unhandled operand encoding during translation");
@@ -761,7 +804,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
insn, Dis);
}
}
-
+
/// translateInstruction - Translates an internal instruction and all its
/// operands to an MCInst.
///
@@ -770,12 +813,12 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
/// @return - false on success; true otherwise.
static bool translateInstruction(MCInst &mcInst,
InternalInstruction &insn,
- const MCDisassembler *Dis) {
+ const MCDisassembler *Dis) {
if (!insn.spec) {
debug("Instruction has no specification");
return true;
}
-
+
mcInst.setOpcode(insn.instructionID);
// If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
// prefix bytes should be disassembled as xrelease and xacquire then set the
@@ -786,9 +829,9 @@ static bool translateInstruction(MCInst &mcInst,
else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
mcInst.setOpcode(X86::XACQUIRE_PREFIX);
}
-
+
insn.numImmediatesTranslated = 0;
-
+
for (const auto &Op : insn.operands) {
if (Op.encoding != ENCODING_NONE) {
if (translateOperand(mcInst, Op, insn, Dis)) {
@@ -796,7 +839,7 @@ static bool translateInstruction(MCInst &mcInst,
}
}
}
-
+
return false;
}
@@ -807,9 +850,9 @@ static MCDisassembler *createX86Disassembler(const Target &T,
return new X86Disassembler::X86GenericDisassembler(STI, Ctx, std::move(MII));
}
-extern "C" void LLVMInitializeX86Disassembler() {
+extern "C" void LLVMInitializeX86Disassembler() {
// Register the disassembler.
- TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
+ TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
createX86Disassembler);
TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
createX86Disassembler);
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h
index 4dc7c29078fc..d7f426b2641d 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.h
+++ b/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -71,8 +71,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86DISASSEMBLER_H
-#define X86DISASSEMBLER_H
+#ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLER_H
+#define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLER_H
#include "X86DisassemblerDecoderCommon.h"
#include "llvm/MC/MCDisassembler.h"
@@ -87,21 +87,17 @@ class raw_ostream;
namespace X86Disassembler {
-/// X86GenericDisassembler - Generic disassembler for all X86 platforms.
-/// All each platform class should have to do is subclass the constructor, and
-/// provide a different disassemblerMode value.
+/// Generic disassembler for all X86 platforms. All each platform class should
+/// have to do is subclass the constructor, and provide a different
+/// disassemblerMode value.
class X86GenericDisassembler : public MCDisassembler {
std::unique_ptr<const MCInstrInfo> MII;
public:
- /// Constructor - Initializes the disassembler.
- ///
X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
std::unique_ptr<const MCInstrInfo> MII);
public:
-
- /// getInstruction - See MCDisassembler.
DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
- const MemoryObject &region, uint64_t address,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &vStream,
raw_ostream &cStream) const override;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index ab3d1f774bc7..619a0d4dd65e 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -1,4 +1,4 @@
-//===-- X86DisassemblerDecoder.c - Disassembler decoder -------------------===//
+//===-- X86DisassemblerDecoder.cpp - Disassembler decoder -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,10 +13,10 @@
//
//===----------------------------------------------------------------------===//
-#include <stdarg.h> /* for va_*() */
-#include <stdio.h> /* for vsnprintf() */
-#include <stdlib.h> /* for exit() */
-#include <string.h> /* for memset() */
+#include <cstdarg> /* for va_*() */
+#include <cstdio> /* for vsnprintf() */
+#include <cstdlib> /* for exit() */
+#include <cstring> /* for memset() */
#include "X86DisassemblerDecoder.h"
@@ -472,8 +472,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
insn->vectorExtensionType = TYPE_EVEX;
- }
- else {
+ } else {
unconsumeByte(insn); /* unconsume byte1 */
unconsumeByte(insn); /* unconsume byte */
insn->necessaryPrefixLocation = insn->readerCursor - 2;
@@ -504,8 +503,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]);
}
- }
- else if (byte == 0xc4) {
+ } else if (byte == 0xc4) {
uint8_t byte1;
if (lookAtByte(insn, &byte1)) {
@@ -516,8 +514,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
insn->vectorExtensionType = TYPE_VEX_3B;
insn->necessaryPrefixLocation = insn->readerCursor - 1;
- }
- else {
+ } else {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
@@ -541,8 +538,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
insn->vectorExtensionPrefix[2]);
}
- }
- else if (byte == 0xc5) {
+ } else if (byte == 0xc5) {
uint8_t byte1;
if (lookAtByte(insn, &byte1)) {
@@ -552,8 +548,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
insn->vectorExtensionType = TYPE_VEX_2B;
- }
- else {
+ } else {
unconsumeByte(insn);
}
@@ -566,8 +561,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
| (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
}
- switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1]))
- {
+ switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
default:
break;
case VEX_PREFIX_66:
@@ -579,8 +573,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->vectorExtensionPrefix[0],
insn->vectorExtensionPrefix[1]);
}
- }
- else if (byte == 0x8f) {
+ } else if (byte == 0x8f) {
uint8_t byte1;
if (lookAtByte(insn, &byte1)) {
@@ -591,8 +584,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */
insn->vectorExtensionType = TYPE_XOP;
insn->necessaryPrefixLocation = insn->readerCursor - 1;
- }
- else {
+ } else {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
@@ -612,8 +604,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
| (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
}
- switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2]))
- {
+ switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
default:
break;
case VEX_PREFIX_66:
@@ -625,8 +616,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
insn->vectorExtensionPrefix[2]);
}
- }
- else {
+ } else {
if (insn->mode == MODE_64BIT) {
if ((byte & 0xf0) == 0x40) {
uint8_t opcodeByte;
@@ -698,8 +688,7 @@ static int readOpcode(struct InternalInstruction* insn) {
insn->opcodeType = ONEBYTE;
- if (insn->vectorExtensionType == TYPE_EVEX)
- {
+ if (insn->vectorExtensionType == TYPE_EVEX) {
switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
default:
dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",
@@ -715,8 +704,7 @@ static int readOpcode(struct InternalInstruction* insn) {
insn->opcodeType = THREEBYTE_3A;
return consumeByte(insn, &insn->opcode);
}
- }
- else if (insn->vectorExtensionType == TYPE_VEX_3B) {
+ } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
default:
dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
@@ -732,12 +720,10 @@ static int readOpcode(struct InternalInstruction* insn) {
insn->opcodeType = THREEBYTE_3A;
return consumeByte(insn, &insn->opcode);
}
- }
- else if (insn->vectorExtensionType == TYPE_VEX_2B) {
+ } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
insn->opcodeType = TWOBYTE;
return consumeByte(insn, &insn->opcode);
- }
- else if (insn->vectorExtensionType == TYPE_XOP) {
+ } else if (insn->vectorExtensionType == TYPE_XOP) {
switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
default:
dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
@@ -866,6 +852,22 @@ static bool is16BitEquivalent(const char* orig, const char* equiv) {
}
/*
+ * is64Bit - Determines whether this instruction is a 64-bit instruction.
+ *
+ * @param name - The instruction that is not 16-bit
+ */
+static bool is64Bit(const char* name) {
+ off_t i;
+
+ for (i = 0;; ++i) {
+ if (name[i] == '\0')
+ return false;
+ if (name[i] == '6' && name[i+1] == '4')
+ return true;
+ }
+}
+
+/*
* getID - Determines the ID of an instruction, consuming the ModR/M byte as
* appropriate for extended and escape opcodes. Determines the attributes and
* context for the instruction before doing so.
@@ -911,8 +913,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
attrMask |= ATTR_EVEXL;
if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
attrMask |= ATTR_EVEXL2;
- }
- else if (insn->vectorExtensionType == TYPE_VEX_3B) {
+ } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
case VEX_PREFIX_66:
attrMask |= ATTR_OPSIZE;
@@ -927,8 +928,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
attrMask |= ATTR_VEXL;
- }
- else if (insn->vectorExtensionType == TYPE_VEX_2B) {
+ } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
case VEX_PREFIX_66:
attrMask |= ATTR_OPSIZE;
@@ -943,8 +943,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
attrMask |= ATTR_VEXL;
- }
- else if (insn->vectorExtensionType == TYPE_XOP) {
+ } else if (insn->vectorExtensionType == TYPE_XOP) {
switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
case VEX_PREFIX_66:
attrMask |= ATTR_OPSIZE;
@@ -959,12 +958,10 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
attrMask |= ATTR_VEXL;
- }
- else {
+ } else {
return -1;
}
- }
- else {
+ } else {
if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
attrMask |= ATTR_OPSIZE;
else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
@@ -978,29 +975,75 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
if (insn->rexPrefix & 0x08)
attrMask |= ATTR_REXW;
- if (getIDWithAttrMask(&instructionID, insn, attrMask))
- return -1;
-
/*
* JCXZ/JECXZ need special handling for 16-bit mode because the meaning
* of the AdSize prefix is inverted w.r.t. 32-bit mode.
*/
- if (insn->mode == MODE_16BIT && insn->opcode == 0xE3) {
- const struct InstructionSpecifier *spec;
- spec = specifierForUID(instructionID);
+ if (insn->mode == MODE_16BIT && insn->opcodeType == ONEBYTE &&
+ insn->opcode == 0xE3)
+ attrMask ^= ATTR_ADSIZE;
+ if (getIDWithAttrMask(&instructionID, insn, attrMask))
+ return -1;
+
+ /* The following clauses compensate for limitations of the tables. */
+
+ if (insn->mode != MODE_64BIT &&
+ insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
/*
- * Check for Ii8PCRel instructions. We could alternatively do a
- * string-compare on the names, but this is probably cheaper.
+ * The tables can't distinquish between cases where the W-bit is used to
+ * select register size and cases where its a required part of the opcode.
*/
- if (x86OperandSets[spec->operands][0].type == TYPE_REL8) {
- attrMask ^= ATTR_ADSIZE;
- if (getIDWithAttrMask(&instructionID, insn, attrMask))
- return -1;
+ if ((insn->vectorExtensionType == TYPE_EVEX &&
+ wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
+ (insn->vectorExtensionType == TYPE_VEX_3B &&
+ wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
+ (insn->vectorExtensionType == TYPE_XOP &&
+ wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
+
+ uint16_t instructionIDWithREXW;
+ if (getIDWithAttrMask(&instructionIDWithREXW,
+ insn, attrMask | ATTR_REXW)) {
+ insn->instructionID = instructionID;
+ insn->spec = specifierForUID(instructionID);
+ return 0;
+ }
+
+ const char *SpecName = GetInstrName(instructionIDWithREXW, miiArg);
+ // If not a 64-bit instruction. Switch the opcode.
+ if (!is64Bit(SpecName)) {
+ insn->instructionID = instructionIDWithREXW;
+ insn->spec = specifierForUID(instructionIDWithREXW);
+ return 0;
+ }
}
}
- /* The following clauses compensate for limitations of the tables. */
+ /*
+ * Absolute moves need special handling.
+ * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
+ * inverted w.r.t.
+ * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
+ * any position.
+ */
+ if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) {
+ /* Make sure we observed the prefixes in any position. */
+ if (insn->prefixPresent[0x67])
+ attrMask |= ATTR_ADSIZE;
+ if (insn->prefixPresent[0x66])
+ attrMask |= ATTR_OPSIZE;
+
+ /* In 16-bit, invert the attributes. */
+ if (insn->mode == MODE_16BIT)
+ attrMask ^= ATTR_ADSIZE | ATTR_OPSIZE;
+
+ if (getIDWithAttrMask(&instructionID, insn, attrMask))
+ return -1;
+
+ insn->instructionID = instructionID;
+ insn->spec = specifierForUID(instructionID);
+ return 0;
+ }
if ((insn->mode == MODE_16BIT || insn->prefixPresent[0x66]) &&
!(attrMask & ATTR_OPSIZE)) {
@@ -1417,22 +1460,14 @@ static int readModRM(struct InternalInstruction* insn) {
case TYPE_VK16: \
return prefix##_K0 + index; \
case TYPE_MM64: \
- case TYPE_MM32: \
- case TYPE_MM: \
- if (index > 7) \
- *valid = 0; \
- return prefix##_MM0 + index; \
+ return prefix##_MM0 + (index & 0x7); \
case TYPE_SEGMENTREG: \
if (index > 5) \
*valid = 0; \
return prefix##_ES + index; \
case TYPE_DEBUGREG: \
- if (index > 7) \
- *valid = 0; \
return prefix##_DR0 + index; \
case TYPE_CONTROLREG: \
- if (index > 8) \
- *valid = 0; \
return prefix##_CR0 + index; \
} \
}
@@ -1709,12 +1744,6 @@ static int readOperands(struct InternalInstruction* insn) {
}
if (readImmediate(insn, 1))
return -1;
- if (Op.type == TYPE_IMM3 &&
- insn->immediates[insn->numImmediatesConsumed - 1] > 7)
- return -1;
- if (Op.type == TYPE_IMM5 &&
- insn->immediates[insn->numImmediatesConsumed - 1] > 31)
- return -1;
if (Op.type == TYPE_XMM128 ||
Op.type == TYPE_XMM256)
sawRegImm = 1;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 8c45402ab5e1..a79a923ac525 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86DISASSEMBLERDECODER_H
-#define X86DISASSEMBLERDECODER_H
+#ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H
+#define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H
#include "X86DisassemblerDecoderCommon.h"
#include "llvm/ADT/ArrayRef.h"
@@ -341,7 +341,15 @@ namespace X86Disassembler {
ENTRY(DR4) \
ENTRY(DR5) \
ENTRY(DR6) \
- ENTRY(DR7)
+ ENTRY(DR7) \
+ ENTRY(DR8) \
+ ENTRY(DR9) \
+ ENTRY(DR10) \
+ ENTRY(DR11) \
+ ENTRY(DR12) \
+ ENTRY(DR13) \
+ ENTRY(DR14) \
+ ENTRY(DR15)
#define REGS_CONTROL \
ENTRY(CR0) \
@@ -352,7 +360,14 @@ namespace X86Disassembler {
ENTRY(CR5) \
ENTRY(CR6) \
ENTRY(CR7) \
- ENTRY(CR8)
+ ENTRY(CR8) \
+ ENTRY(CR9) \
+ ENTRY(CR10) \
+ ENTRY(CR11) \
+ ENTRY(CR12) \
+ ENTRY(CR13) \
+ ENTRY(CR14) \
+ ENTRY(CR15)
#define ALL_EA_BASES \
EA_BASES_16BIT \
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 13a7b557b440..1f8f9da5250e 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -14,8 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86DISASSEMBLERDECODERCOMMON_H
-#define X86DISASSEMBLERDECODERCOMMON_H
+#ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODERCOMMON_H
+#define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODERCOMMON_H
#include "llvm/Support/DataTypes.h"
@@ -82,6 +82,7 @@ enum attributeBits {
"operands change width") \
ENUM_ENTRY(IC_ADSIZE, 3, "requires an ADSIZE prefix, so " \
"operands change width") \
+ ENUM_ENTRY(IC_OPSIZE_ADSIZE, 4, "requires ADSIZE and OPSIZE prefixes") \
ENUM_ENTRY(IC_XD, 2, "may say something about the opcode " \
"but not the operands") \
ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \
@@ -90,20 +91,24 @@ enum attributeBits {
"operands change width") \
ENUM_ENTRY(IC_XS_OPSIZE, 3, "requires an OPSIZE prefix, so " \
"operands change width") \
- ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\
+ ENUM_ENTRY(IC_64BIT_REXW, 5, "requires a REX.W prefix, so operands "\
"change width; overrides IC_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_REXW_ADSIZE, 6, "requires a REX.W prefix and 0x67 " \
+ "prefix") \
ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \
ENUM_ENTRY(IC_64BIT_ADSIZE, 3, "Just as meaningful as IC_ADSIZE") \
- ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \
+ ENUM_ENTRY(IC_64BIT_OPSIZE_ADSIZE, 4, "Just as meaningful as IC_OPSIZE/" \
+ "IC_ADSIZE") \
+ ENUM_ENTRY(IC_64BIT_XD, 6, "XD instructions are SSE; REX.W is " \
"secondary") \
- ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \
+ ENUM_ENTRY(IC_64BIT_XS, 6, "Just as meaningful as IC_64BIT_XD") \
ENUM_ENTRY(IC_64BIT_XD_OPSIZE, 3, "Just as meaningful as IC_XD_OPSIZE") \
ENUM_ENTRY(IC_64BIT_XS_OPSIZE, 3, "Just as meaningful as IC_XS_OPSIZE") \
- ENUM_ENTRY(IC_64BIT_REXW_XS, 6, "OPSIZE could mean a different " \
+ ENUM_ENTRY(IC_64BIT_REXW_XS, 7, "OPSIZE could mean a different " \
"opcode") \
- ENUM_ENTRY(IC_64BIT_REXW_XD, 6, "Just as meaningful as " \
+ ENUM_ENTRY(IC_64BIT_REXW_XD, 7, "Just as meaningful as " \
"IC_64BIT_REXW_XS") \
- ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \
+ ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 8, "The Dynamic Duo! Prefer over all " \
"else because this changes most " \
"operands' meaning") \
ENUM_ENTRY(IC_VEX, 1, "requires a VEX prefix") \
@@ -416,10 +421,6 @@ enum OperandEncoding {
ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \
ENUM_ENTRY(TYPE_M1632, "2+4-byte") \
ENUM_ENTRY(TYPE_M1664, "2+8-byte") \
- ENUM_ENTRY(TYPE_M16_32, "2+4-byte two-part memory operand (LIDT, LGDT)") \
- ENUM_ENTRY(TYPE_M16_16, "2+2-byte (BOUND)") \
- ENUM_ENTRY(TYPE_M32_32, "4+4-byte (BOUND)") \
- ENUM_ENTRY(TYPE_M16_64, "2+8-byte (LIDT, LGDT)") \
ENUM_ENTRY(TYPE_SRCIDX8, "1-byte memory at source index") \
ENUM_ENTRY(TYPE_SRCIDX16, "2-byte memory at source index") \
ENUM_ENTRY(TYPE_SRCIDX32, "4-byte memory at source index") \
@@ -438,14 +439,8 @@ enum OperandEncoding {
ENUM_ENTRY(TYPE_M32FP, "32-bit IEE754 memory floating-point operand") \
ENUM_ENTRY(TYPE_M64FP, "64-bit") \
ENUM_ENTRY(TYPE_M80FP, "80-bit extended") \
- ENUM_ENTRY(TYPE_M16INT, "2-byte memory integer operand for use in " \
- "floating-point instructions") \
- ENUM_ENTRY(TYPE_M32INT, "4-byte") \
- ENUM_ENTRY(TYPE_M64INT, "8-byte") \
ENUM_ENTRY(TYPE_ST, "Position on the floating-point stack") \
- ENUM_ENTRY(TYPE_MM, "MMX register operand") \
- ENUM_ENTRY(TYPE_MM32, "4-byte MMX register or memory operand") \
- ENUM_ENTRY(TYPE_MM64, "8-byte") \
+ ENUM_ENTRY(TYPE_MM64, "8-byte MMX register") \
ENUM_ENTRY(TYPE_XMM, "XMM register operand") \
ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \
ENUM_ENTRY(TYPE_XMM64, "8-byte") \
@@ -500,7 +495,7 @@ enum ModifierType {
};
#undef ENUM_ENTRY
-static const unsigned X86_MAX_OPERANDS = 5;
+static const unsigned X86_MAX_OPERANDS = 6;
/// Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode
/// are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index b45b1185bd67..db6948518c5a 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -45,50 +45,36 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
uint64_t TSFlags = Desc.TSFlags;
+ // If verbose assembly is enabled, we can print some informative comments.
+ if (CommentStream)
+ HasCustomInstComment =
+ EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
+
if (TSFlags & X86II::LOCK)
OS << "\tlock\n";
+ // Output CALLpcrel32 as "callq" in 64-bit mode.
+ // In Intel annotation it's always emitted as "call".
+ //
+ // TODO: Probably this hack should be redesigned via InstAlias in
+ // InstrInfo.td as soon as Requires clause is supported properly
+ // for InstAlias.
+ if (MI->getOpcode() == X86::CALLpcrel32 &&
+ (getAvailableFeatures() & X86::Mode64Bit) != 0) {
+ OS << "\tcallq\t";
+ printPCRelImm(MI, 0, OS);
+ }
// Try to print any aliases first.
- if (!printAliasInstr(MI, OS))
+ else if (!printAliasInstr(MI, OS))
printInstruction(MI, OS);
// Next always print the annotation.
printAnnotation(OS, Annot);
-
- // If verbose assembly is enabled, we can print some informative comments.
- if (CommentStream)
- EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
-}
-
-void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- int64_t Imm = MI->getOperand(Op).getImm() & 0xf;
- switch (Imm) {
- default: llvm_unreachable("Invalid ssecc argument!");
- case 0: O << "eq"; break;
- case 1: O << "lt"; break;
- case 2: O << "le"; break;
- case 3: O << "unord"; break;
- case 4: O << "neq"; break;
- case 5: O << "nlt"; break;
- case 6: O << "nle"; break;
- case 7: O << "ord"; break;
- case 8: O << "eq_uq"; break;
- case 9: O << "nge"; break;
- case 0xa: O << "ngt"; break;
- case 0xb: O << "false"; break;
- case 0xc: O << "neq_oq"; break;
- case 0xd: O << "ge"; break;
- case 0xe: O << "gt"; break;
- case 0xf: O << "true"; break;
- }
}
-void X86ATTInstPrinter::printAVXCC(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- int64_t Imm = MI->getOperand(Op).getImm() & 0x1f;
+static void printSSEAVXCC(int64_t Imm, raw_ostream &O) {
switch (Imm) {
- default: llvm_unreachable("Invalid avxcc argument!");
+ default: llvm_unreachable("Invalid ssecc/avxcc argument!");
case 0: O << "eq"; break;
case 1: O << "lt"; break;
case 2: O << "le"; break;
@@ -124,6 +110,20 @@ void X86ATTInstPrinter::printAVXCC(const MCInst *MI, unsigned Op,
}
}
+void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ int64_t Imm = MI->getOperand(Op).getImm();
+ assert((Imm & 0x7) == Imm); // Ensure valid immediate.
+ printSSEAVXCC(Imm, O);
+}
+
+void X86ATTInstPrinter::printAVXCC(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ int64_t Imm = MI->getOperand(Op).getImm();
+ assert((Imm & 0x1f) == Imm); // Ensure valid immediate.
+ printSSEAVXCC(Imm, O);
+}
+
void X86ATTInstPrinter::printRoundingControl(const MCInst *MI, unsigned Op,
raw_ostream &O) {
int64_t Imm = MI->getOperand(Op).getImm() & 0x3;
@@ -151,8 +151,7 @@ void X86ATTInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo,
int64_t Address;
if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
O << formatHex((uint64_t)Address);
- }
- else {
+ } else {
// Otherwise, just print the expression.
O << *Op.getExpr();
}
@@ -170,7 +169,11 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
<< '$' << formatImm((int64_t)Op.getImm())
<< markup(">");
- if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
+ // If there are no instruction-specific comments, add a comment clarifying
+ // the hex value of the immediate operand when it isn't in the range
+ // [-256,255].
+ if (CommentStream && !HasCustomInstComment &&
+ (Op.getImm() > 255 || Op.getImm() < -256))
*CommentStream << format("imm = 0x%" PRIX64 "\n", (uint64_t)Op.getImm());
} else {
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index 531183b02edf..649dc84a0cfc 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -11,10 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86_ATT_INST_PRINTER_H
-#define X86_ATT_INST_PRINTER_H
+#ifndef LLVM_LIB_TARGET_X86_INSTPRINTER_X86ATTINSTPRINTER_H
+#define LLVM_LIB_TARGET_X86_INSTPRINTER_X86ATTINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
@@ -23,8 +24,11 @@ class MCOperand;
class X86ATTInstPrinter final : public MCInstPrinter {
public:
X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI)
- : MCInstPrinter(MAI, MII, MRI) {}
+ const MCRegisterInfo &MRI, const MCSubtargetInfo &STI)
+ : MCInstPrinter(MAI, MII, MRI) {
+ // Initialize the set of available features.
+ setAvailableFeatures(STI.getFeatureBits());
+ }
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override;
@@ -49,10 +53,14 @@ public:
void printMemOffset(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
void printRoundingControl(const MCInst *MI, unsigned Op, raw_ostream &OS);
+ void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+
void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
-
+
void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
@@ -129,8 +137,11 @@ public:
void printMemOffs64(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemOffset(MI, OpNo, O);
}
+
+private:
+ bool HasCustomInstComment;
};
-
+
}
#endif
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index baf6507f8244..a4c0ca882c71 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -28,20 +28,127 @@ using namespace llvm;
/// EmitAnyX86InstComments - This function decodes x86 instructions and prints
/// newline terminated strings to the specified string if desired. This
/// information is shown in disassembly dumps when verbose assembly is enabled.
-void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
const char *(*getRegName)(unsigned)) {
// If this is a shuffle operation, the switch should fill in this state.
SmallVector<int, 8> ShuffleMask;
const char *DestName = nullptr, *Src1Name = nullptr, *Src2Name = nullptr;
switch (MI->getOpcode()) {
+ default:
+ // Not an instruction for which we can decode comments.
+ return false;
+
+ case X86::BLENDPDrri:
+ case X86::VBLENDPDrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::BLENDPDrmi:
+ case X86::VBLENDPDrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v2f64,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VBLENDPDYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VBLENDPDYrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v4f64,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::BLENDPSrri:
+ case X86::VBLENDPSrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::BLENDPSrmi:
+ case X86::VBLENDPSrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v4f32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VBLENDPSYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VBLENDPSYrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v8f32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::PBLENDWrri:
+ case X86::VPBLENDWrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PBLENDWrmi:
+ case X86::VPBLENDWrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v8i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPBLENDWYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPBLENDWYrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v16i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::VPBLENDDrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPBLENDDrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v4i32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::VPBLENDDYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPBLENDDYrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v8i32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
case X86::INSERTPSrr:
case X86::VINSERTPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::INSERTPSrm:
+ case X86::VINSERTPSrm:
DestName = getRegName(MI->getOperand(0).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
- Src2Name = getRegName(MI->getOperand(2).getReg());
- if(MI->getOperand(3).isImm())
- DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeINSERTPSMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
break;
case X86::MOVLHPSrr:
@@ -60,6 +167,80 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeMOVHLPSMask(2, ShuffleMask);
break;
+ case X86::MOVSLDUPrr:
+ case X86::VMOVSLDUPrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::MOVSLDUPrm:
+ case X86::VMOVSLDUPrm:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVSLDUPMask(MVT::v4f32, ShuffleMask);
+ break;
+
+ case X86::VMOVSHDUPYrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VMOVSHDUPYrm:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVSHDUPMask(MVT::v8f32, ShuffleMask);
+ break;
+
+ case X86::VMOVSLDUPYrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VMOVSLDUPYrm:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVSLDUPMask(MVT::v8f32, ShuffleMask);
+ break;
+
+ case X86::MOVSHDUPrr:
+ case X86::VMOVSHDUPrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::MOVSHDUPrm:
+ case X86::VMOVSHDUPrm:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask);
+ break;
+
+ case X86::PSLLDQri:
+ case X86::VPSLLDQri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSLLDQMask(MVT::v16i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::VPSLLDQYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSLLDQMask(MVT::v32i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::PSRLDQri:
+ case X86::VPSRLDQri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSRLDQMask(MVT::v16i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::VPSRLDQYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSRLDQMask(MVT::v32i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
case X86::PALIGNR128rr:
case X86::VPALIGNR128rr:
Src1Name = getRegName(MI->getOperand(2).getReg());
@@ -208,6 +389,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DestName = getRegName(MI->getOperand(0).getReg());
DecodeUNPCKHMask(MVT::v8i32, ShuffleMask);
break;
+ case X86::VPUNPCKHDQZrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHDQZrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v16i32, ShuffleMask);
+ break;
case X86::PUNPCKHQDQrr:
case X86::VPUNPCKHQDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
@@ -226,6 +415,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DestName = getRegName(MI->getOperand(0).getReg());
DecodeUNPCKHMask(MVT::v4i64, ShuffleMask);
break;
+ case X86::VPUNPCKHQDQZrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHQDQZrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v8i64, ShuffleMask);
+ break;
case X86::PUNPCKLBWrr:
case X86::VPUNPCKLBWrr:
@@ -281,6 +478,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DestName = getRegName(MI->getOperand(0).getReg());
DecodeUNPCKLMask(MVT::v8i32, ShuffleMask);
break;
+ case X86::VPUNPCKLDQZrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLDQZrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v16i32, ShuffleMask);
+ break;
case X86::PUNPCKLQDQrr:
case X86::VPUNPCKLQDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
@@ -299,6 +504,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DestName = getRegName(MI->getOperand(0).getReg());
DecodeUNPCKLMask(MVT::v4i64, ShuffleMask);
break;
+ case X86::VPUNPCKLQDQZrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLQDQZrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v8i64, ShuffleMask);
+ break;
case X86::SHUFPDrri:
case X86::VSHUFPDrri:
@@ -368,6 +581,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VUNPCKLPDZrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPDZrm:
+ DecodeUNPCKLMask(MVT::v8f64, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
case X86::UNPCKLPSrr:
case X86::VUNPCKLPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
@@ -386,6 +607,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VUNPCKLPSZrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPSZrm:
+ DecodeUNPCKLMask(MVT::v16f32, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
case X86::UNPCKHPDrr:
case X86::VUNPCKHPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
@@ -404,6 +633,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VUNPCKHPDZrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKHPDZrm:
+ DecodeUNPCKHMask(MVT::v8f64, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
case X86::UNPCKHPSrr:
case X86::VUNPCKHPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
@@ -422,6 +659,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VUNPCKHPSZrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKHPSZrm:
+ DecodeUNPCKHMask(MVT::v16f32, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
case X86::VPERMILPSri:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
@@ -489,54 +734,59 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
}
+ // The only comments we decode are shuffles, so give up if we were unable to
+ // decode a shuffle mask.
+ if (ShuffleMask.empty())
+ return false;
- // If this was a shuffle operation, print the shuffle mask.
- if (!ShuffleMask.empty()) {
- if (!DestName) DestName = Src1Name;
- OS << (DestName ? DestName : "mem") << " = ";
+ if (!DestName) DestName = Src1Name;
+ OS << (DestName ? DestName : "mem") << " = ";
- // If the two sources are the same, canonicalize the input elements to be
- // from the first src so that we get larger element spans.
- if (Src1Name == Src2Name) {
- for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
- if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
- ShuffleMask[i] >= (int)e) // From second mask.
- ShuffleMask[i] -= e;
- }
+ // If the two sources are the same, canonicalize the input elements to be
+ // from the first src so that we get larger element spans.
+ if (Src1Name == Src2Name) {
+ for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
+ ShuffleMask[i] >= (int)e) // From second mask.
+ ShuffleMask[i] -= e;
}
+ }
- // The shuffle mask specifies which elements of the src1/src2 fill in the
- // destination, with a few sentinel values. Loop through and print them
- // out.
- for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
- if (i != 0)
+ // The shuffle mask specifies which elements of the src1/src2 fill in the
+ // destination, with a few sentinel values. Loop through and print them
+ // out.
+ for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if (i != 0)
+ OS << ',';
+ if (ShuffleMask[i] == SM_SentinelZero) {
+ OS << "zero";
+ continue;
+ }
+
+ // Otherwise, it must come from src1 or src2. Print the span of elements
+ // that comes from this src.
+ bool isSrc1 = ShuffleMask[i] < (int)ShuffleMask.size();
+ const char *SrcName = isSrc1 ? Src1Name : Src2Name;
+ OS << (SrcName ? SrcName : "mem") << '[';
+ bool IsFirst = true;
+ while (i != e && (int)ShuffleMask[i] != SM_SentinelZero &&
+ (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) {
+ if (!IsFirst)
OS << ',';
- if (ShuffleMask[i] == SM_SentinelZero) {
- OS << "zero";
- continue;
- }
-
- // Otherwise, it must come from src1 or src2. Print the span of elements
- // that comes from this src.
- bool isSrc1 = ShuffleMask[i] < (int)ShuffleMask.size();
- const char *SrcName = isSrc1 ? Src1Name : Src2Name;
- OS << (SrcName ? SrcName : "mem") << '[';
- bool IsFirst = true;
- while (i != e &&
- (int)ShuffleMask[i] >= 0 &&
- (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) {
- if (!IsFirst)
- OS << ',';
- else
- IsFirst = false;
+ else
+ IsFirst = false;
+ if (ShuffleMask[i] == SM_SentinelUndef)
+ OS << "u";
+ else
OS << ShuffleMask[i] % ShuffleMask.size();
- ++i;
- }
- OS << ']';
- --i; // For loop increments element #.
+ ++i;
}
- //MI->print(OS, 0);
- OS << "\n";
+ OS << ']';
+ --i; // For loop increments element #.
}
+ //MI->print(OS, 0);
+ OS << "\n";
+ // We successfully added a comment to this instruction.
+ return true;
}
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.h b/lib/Target/X86/InstPrinter/X86InstComments.h
index 13fdf9af8c98..687581b10aec 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.h
+++ b/lib/Target/X86/InstPrinter/X86InstComments.h
@@ -12,13 +12,13 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86_INST_COMMENTS_H
-#define X86_INST_COMMENTS_H
+#ifndef LLVM_LIB_TARGET_X86_INSTPRINTER_X86INSTCOMMENTS_H
+#define LLVM_LIB_TARGET_X86_INSTPRINTER_X86INSTCOMMENTS_H
namespace llvm {
class MCInst;
class raw_ostream;
- void EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+ bool EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
const char *(*getRegName)(unsigned));
}
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 1c8466bf2945..449445df7d27 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -50,33 +50,7 @@ void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
}
-void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- int64_t Imm = MI->getOperand(Op).getImm() & 0xf;
- switch (Imm) {
- default: llvm_unreachable("Invalid ssecc argument!");
- case 0: O << "eq"; break;
- case 1: O << "lt"; break;
- case 2: O << "le"; break;
- case 3: O << "unord"; break;
- case 4: O << "neq"; break;
- case 5: O << "nlt"; break;
- case 6: O << "nle"; break;
- case 7: O << "ord"; break;
- case 8: O << "eq_uq"; break;
- case 9: O << "nge"; break;
- case 0xa: O << "ngt"; break;
- case 0xb: O << "false"; break;
- case 0xc: O << "neq_oq"; break;
- case 0xd: O << "ge"; break;
- case 0xe: O << "gt"; break;
- case 0xf: O << "true"; break;
- }
-}
-
-void X86IntelInstPrinter::printAVXCC(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- int64_t Imm = MI->getOperand(Op).getImm() & 0x1f;
+static void printSSEAVXCC(int64_t Imm, raw_ostream &O) {
switch (Imm) {
default: llvm_unreachable("Invalid avxcc argument!");
case 0: O << "eq"; break;
@@ -114,6 +88,20 @@ void X86IntelInstPrinter::printAVXCC(const MCInst *MI, unsigned Op,
}
}
+void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ int64_t Imm = MI->getOperand(Op).getImm();
+ assert((Imm & 0x7) == Imm); // Ensure valid immediate.
+ printSSEAVXCC(Imm, O);
+}
+
+void X86IntelInstPrinter::printAVXCC(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ int64_t Imm = MI->getOperand(Op).getImm();
+ assert((Imm & 0x1f) == Imm); // Ensure valid immediate.
+ printSSEAVXCC(Imm, O);
+}
+
void X86IntelInstPrinter::printRoundingControl(const MCInst *MI, unsigned Op,
raw_ostream &O) {
int64_t Imm = MI->getOperand(Op).getImm() & 0x3;
@@ -168,21 +156,21 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
const MCOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg);
const MCOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp);
const MCOperand &SegReg = MI->getOperand(Op+X86::AddrSegmentReg);
-
+
// If this has a segment register, print it.
if (SegReg.getReg()) {
printOperand(MI, Op+X86::AddrSegmentReg, O);
O << ':';
}
-
+
O << '[';
-
+
bool NeedPlus = false;
if (BaseReg.getReg()) {
printOperand(MI, Op+X86::AddrBaseReg, O);
NeedPlus = true;
}
-
+
if (IndexReg.getReg()) {
if (NeedPlus) O << " + ";
if (ScaleVal != 1)
@@ -209,7 +197,7 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
O << formatImm(DispVal);
}
}
-
+
O << ']';
}
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 4d9b48155430..641423d56d03 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86_INTEL_INST_PRINTER_H
-#define X86_INTEL_INST_PRINTER_H
+#ifndef LLVM_LIB_TARGET_X86_INSTPRINTER_X86INTELINSTPRINTER_H
+#define LLVM_LIB_TARGET_X86_INSTPRINTER_X86INTELINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/Support/raw_ostream.h"
@@ -44,11 +44,15 @@ public:
void printDstIdx(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printRoundingControl(const MCInst *MI, unsigned Op, raw_ostream &OS);
+ void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
+
void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "opaque ptr ";
printMemReference(MI, OpNo, O);
}
-
+
void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "byte ptr ";
printMemReference(MI, OpNo, O);
@@ -152,7 +156,7 @@ public:
printMemOffset(MI, OpNo, O);
}
};
-
+
}
#endif
diff --git a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
index 146d1112014b..b9fdc9c483fd 100644
--- a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = X86Desc
parent = X86
-required_libraries = MC Object Support X86AsmPrinter X86Info
+required_libraries = MC MCDisassembler Object Support X86AsmPrinter X86Info
add_to_library_groups = X86
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 23bca0d53d3d..719b761084f9 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -11,7 +11,6 @@
#include "MCTargetDesc/X86FixupKinds.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixupKindInfo.h"
@@ -437,10 +436,30 @@ class DarwinX86AsmBackend : public X86AsmBackend {
bool Is64Bit;
unsigned OffsetSize; ///< Offset of a "push" instruction.
- unsigned PushInstrSize; ///< Size of a "push" instruction.
unsigned MoveInstrSize; ///< Size of a "move" instruction.
- unsigned StackDivide; ///< Amount to adjust stack stize by.
+ unsigned StackDivide; ///< Amount to adjust stack size by.
protected:
+ /// \brief Size of a "push" instruction for the given register.
+ unsigned PushInstrSize(unsigned Reg) const {
+ switch (Reg) {
+ case X86::EBX:
+ case X86::ECX:
+ case X86::EDX:
+ case X86::EDI:
+ case X86::ESI:
+ case X86::EBP:
+ case X86::RBX:
+ case X86::RBP:
+ return 1;
+ case X86::R12:
+ case X86::R13:
+ case X86::R14:
+ case X86::R15:
+ return 2;
+ }
+ return 1;
+ }
+
/// \brief Implementation of algorithm to generate the compact unwind encoding
/// for the CFI instructions.
uint32_t
@@ -493,7 +512,7 @@ protected:
// Defines a new offset for the CFA. E.g.
//
// With frame:
- //
+ //
// pushq %rbp
// L0:
// .cfi_def_cfa_offset 16
@@ -530,7 +549,7 @@ protected:
unsigned Reg = MRI.getLLVMRegNum(Inst.getRegister(), true);
SavedRegs[SavedRegIdx++] = Reg;
StackAdjust += OffsetSize;
- InstrOffset += PushInstrSize;
+ InstrOffset += PushInstrSize(Reg);
break;
}
}
@@ -663,7 +682,7 @@ private:
// 4 3
// 5 3
//
- for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) {
+ for (unsigned i = 0; i < RegCount; ++i) {
int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
if (CUReg == -1) return ~0U;
SavedRegs[i] = CUReg;
@@ -724,7 +743,6 @@ public:
OffsetSize = Is64Bit ? 8 : 4;
MoveInstrSize = Is64Bit ? 3 : 2;
StackDivide = Is64Bit ? 8 : 4;
- PushInstrSize = 1;
}
};
@@ -759,39 +777,6 @@ public:
MachO::CPU_TYPE_X86_64, Subtype);
}
- bool doesSectionRequireSymbols(const MCSection &Section) const override {
- // Temporary labels in the string literals sections require symbols. The
- // issue is that the x86_64 relocation format does not allow symbol +
- // offset, and so the linker does not have enough information to resolve the
- // access to the appropriate atom unless an external relocation is used. For
- // non-cstring sections, we expect the compiler to use a non-temporary label
- // for anything that could have an addend pointing outside the symbol.
- //
- // See <rdar://problem/4765733>.
- const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
- return SMO.getType() == MachO::S_CSTRING_LITERALS;
- }
-
- bool isSectionAtomizable(const MCSection &Section) const override {
- const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
- // Fixed sized data sections are uniqued, they cannot be diced into atoms.
- switch (SMO.getType()) {
- default:
- return true;
-
- case MachO::S_4BYTE_LITERALS:
- case MachO::S_8BYTE_LITERALS:
- case MachO::S_16BYTE_LITERALS:
- case MachO::S_LITERAL_POINTERS:
- case MachO::S_NON_LAZY_SYMBOL_POINTERS:
- case MachO::S_LAZY_SYMBOL_POINTERS:
- case MachO::S_MOD_INIT_FUNC_POINTERS:
- case MachO::S_MOD_TERM_FUNC_POINTERS:
- case MachO::S_INTERPOSING:
- return false;
- }
- }
-
/// \brief Generate the compact unwind encoding for the CFI instructions.
uint32_t generateCompactUnwindEncoding(
ArrayRef<MCCFIInstruction> Instrs) const override {
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 026e4c487d81..28be15beb675 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -14,8 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86BASEINFO_H
-#define X86BASEINFO_H
+#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86BASEINFO_H
+#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86BASEINFO_H
#include "X86MCTargetDesc.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -216,7 +216,7 @@ namespace X86II {
MO_SECREL
};
- enum {
+ enum : uint64_t {
//===------------------------------------------------------------------===//
// Instruction encodings. These are the standard/most common forms for X86
// instructions.
@@ -303,17 +303,18 @@ namespace X86II {
//// MRM_XX - A mod/rm byte of exactly 0xXX.
MRM_C0 = 32, MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35,
MRM_C4 = 36, MRM_C8 = 37, MRM_C9 = 38, MRM_CA = 39,
- MRM_CB = 40, MRM_D0 = 41, MRM_D1 = 42, MRM_D4 = 43,
- MRM_D5 = 44, MRM_D6 = 45, MRM_D8 = 46, MRM_D9 = 47,
- MRM_DA = 48, MRM_DB = 49, MRM_DC = 50, MRM_DD = 51,
- MRM_DE = 52, MRM_DF = 53, MRM_E0 = 54, MRM_E1 = 55,
- MRM_E2 = 56, MRM_E3 = 57, MRM_E4 = 58, MRM_E5 = 59,
- MRM_E8 = 60, MRM_E9 = 61, MRM_EA = 62, MRM_EB = 63,
- MRM_EC = 64, MRM_ED = 65, MRM_EE = 66, MRM_F0 = 67,
- MRM_F1 = 68, MRM_F2 = 69, MRM_F3 = 70, MRM_F4 = 71,
- MRM_F5 = 72, MRM_F6 = 73, MRM_F7 = 74, MRM_F8 = 75,
- MRM_F9 = 76, MRM_FA = 77, MRM_FB = 78, MRM_FC = 79,
- MRM_FD = 80, MRM_FE = 81, MRM_FF = 82,
+ MRM_CB = 40, MRM_CF = 41, MRM_D0 = 42, MRM_D1 = 43,
+ MRM_D4 = 44, MRM_D5 = 45, MRM_D6 = 46, MRM_D7 = 47,
+ MRM_D8 = 48, MRM_D9 = 49, MRM_DA = 50, MRM_DB = 51,
+ MRM_DC = 52, MRM_DD = 53, MRM_DE = 54, MRM_DF = 55,
+ MRM_E0 = 56, MRM_E1 = 57, MRM_E2 = 58, MRM_E3 = 59,
+ MRM_E4 = 60, MRM_E5 = 61, MRM_E8 = 62, MRM_E9 = 63,
+ MRM_EA = 64, MRM_EB = 65, MRM_EC = 66, MRM_ED = 67,
+ MRM_EE = 68, MRM_F0 = 69, MRM_F1 = 70, MRM_F2 = 71,
+ MRM_F3 = 72, MRM_F4 = 73, MRM_F5 = 74, MRM_F6 = 75,
+ MRM_F7 = 76, MRM_F8 = 77, MRM_F9 = 78, MRM_FA = 79,
+ MRM_FB = 80, MRM_FC = 81, MRM_FD = 82, MRM_FE = 83,
+ MRM_FF = 84,
FormMask = 127,
@@ -327,21 +328,28 @@ namespace X86II {
OpSizeShift = 7,
OpSizeMask = 0x3 << OpSizeShift,
- OpSize16 = 1,
- OpSize32 = 2,
+ OpSizeFixed = 0 << OpSizeShift,
+ OpSize16 = 1 << OpSizeShift,
+ OpSize32 = 2 << OpSizeShift,
- // AsSize - Set if this instruction requires an operand size prefix (0x67),
- // which most often indicates that the instruction address 16 bit address
- // instead of 32 bit address (or 32 bit address in 64 bit mode).
+ // AsSize - AdSizeX implies this instruction determines its need of 0x67
+ // prefix from a normal ModRM memory operand. The other types indicate that
+ // an operand is encoded with a specific width and a prefix is needed if
+ // it differs from the current mode.
AdSizeShift = OpSizeShift + 2,
- AdSize = 1 << AdSizeShift,
+ AdSizeMask = 0x3 << AdSizeShift,
+
+ AdSizeX = 1 << AdSizeShift,
+ AdSize16 = 1 << AdSizeShift,
+ AdSize32 = 2 << AdSizeShift,
+ AdSize64 = 3 << AdSizeShift,
//===------------------------------------------------------------------===//
// OpPrefix - There are several prefix bytes that are used as opcode
// extensions. These are 0x66, 0xF3, and 0xF2. If this field is 0 there is
// no prefix.
//
- OpPrefixShift = AdSizeShift + 1,
+ OpPrefixShift = AdSizeShift + 2,
OpPrefixMask = 0x7 << OpPrefixShift,
// PS, PD - Prefix code for packed single and double precision vector
@@ -454,51 +462,53 @@ namespace X86II {
EncodingMask = 0x3 << EncodingShift,
// VEX - encoding using 0xC4/0xC5
- VEX = 1,
+ VEX = 1 << EncodingShift,
/// XOP - Opcode prefix used by XOP instructions.
- XOP = 2,
+ XOP = 2 << EncodingShift,
// VEX_EVEX - Specifies that this instruction use EVEX form which provides
// syntax support up to 32 512-bit register operands and up to 7 16-bit
// mask operands as well as source operand data swizzling/memory operand
// conversion, eviction hint, and rounding mode.
- EVEX = 3,
+ EVEX = 3 << EncodingShift,
// Opcode
OpcodeShift = EncodingShift + 2,
- //===------------------------------------------------------------------===//
- /// VEX - The opcode prefix used by AVX instructions
- VEXShift = OpcodeShift + 8,
-
/// VEX_W - Has a opcode specific functionality, but is used in the same
/// way as REX_W is for regular SSE instructions.
- VEX_W = 1U << 0,
+ VEX_WShift = OpcodeShift + 8,
+ VEX_W = 1ULL << VEX_WShift,
/// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
/// address instructions in SSE are represented as 3 address ones in AVX
/// and the additional register is encoded in VEX_VVVV prefix.
- VEX_4V = 1U << 1,
+ VEX_4VShift = VEX_WShift + 1,
+ VEX_4V = 1ULL << VEX_4VShift,
/// VEX_4VOp3 - Similar to VEX_4V, but used on instructions that encode
/// operand 3 with VEX.vvvv.
- VEX_4VOp3 = 1U << 2,
+ VEX_4VOp3Shift = VEX_4VShift + 1,
+ VEX_4VOp3 = 1ULL << VEX_4VOp3Shift,
/// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
/// must be encoded in the i8 immediate field. This usually happens in
/// instructions with 4 operands.
- VEX_I8IMM = 1U << 3,
+ VEX_I8IMMShift = VEX_4VOp3Shift + 1,
+ VEX_I8IMM = 1ULL << VEX_I8IMMShift,
/// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
/// instruction uses 256-bit wide registers. This is usually auto detected
/// if a VR256 register is used, but some AVX instructions also have this
/// field marked when using a f256 memory references.
- VEX_L = 1U << 4,
+ VEX_LShift = VEX_I8IMMShift + 1,
+ VEX_L = 1ULL << VEX_LShift,
// VEX_LIG - Specifies that this instruction ignores the L-bit in the VEX
// prefix. Usually used for scalar instructions. Needed by disassembler.
- VEX_LIG = 1U << 5,
+ VEX_LIGShift = VEX_LShift + 1,
+ VEX_LIG = 1ULL << VEX_LIGShift,
// TODO: we should combine VEX_L and VEX_LIG together to form a 2-bit field
// with following encoding:
@@ -509,20 +519,24 @@ namespace X86II {
// this will save 1 tsflag bit
// EVEX_K - Set if this instruction requires masking
- EVEX_K = 1U << 6,
+ EVEX_KShift = VEX_LIGShift + 1,
+ EVEX_K = 1ULL << EVEX_KShift,
// EVEX_Z - Set if this instruction has EVEX.Z field set.
- EVEX_Z = 1U << 7,
+ EVEX_ZShift = EVEX_KShift + 1,
+ EVEX_Z = 1ULL << EVEX_ZShift,
// EVEX_L2 - Set if this instruction has EVEX.L' field set.
- EVEX_L2 = 1U << 8,
+ EVEX_L2Shift = EVEX_ZShift + 1,
+ EVEX_L2 = 1ULL << EVEX_L2Shift,
// EVEX_B - Set if this instruction has EVEX.B field set.
- EVEX_B = 1U << 9,
+ EVEX_BShift = EVEX_L2Shift + 1,
+ EVEX_B = 1ULL << EVEX_BShift,
// The scaling factor for the AVX512's 8-bit compressed displacement.
- CD8_Scale_Shift = VEXShift + 10,
- CD8_Scale_Mask = 127,
+ CD8_Scale_Shift = EVEX_BShift + 1,
+ CD8_Scale_Mask = 127ULL << CD8_Scale_Shift,
/// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
/// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents
@@ -531,16 +545,16 @@ namespace X86II {
/// we handle this by storeing the classifier in the opcode field and using
/// this flag to indicate that the encoder should do the wacky 3DNow! thing.
Has3DNow0F0FOpcodeShift = CD8_Scale_Shift + 7,
- Has3DNow0F0FOpcode = 1U << (Has3DNow0F0FOpcodeShift - VEXShift),
+ Has3DNow0F0FOpcode = 1ULL << Has3DNow0F0FOpcodeShift,
/// MemOp4 - Used to indicate swapping of operand 3 and 4 to be encoded in
/// ModRM or I8IMM. This is used for FMA4 and XOP instructions.
MemOp4Shift = Has3DNow0F0FOpcodeShift + 1,
- MemOp4 = 1U << (MemOp4Shift - VEXShift),
+ MemOp4 = 1ULL << MemOp4Shift,
/// Explicitly specified rounding control
EVEX_RCShift = MemOp4Shift + 1,
- EVEX_RC = 1U << (EVEX_RCShift - VEXShift)
+ EVEX_RC = 1ULL << EVEX_RCShift
};
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
@@ -642,10 +656,10 @@ namespace X86II {
/// counted as one operand.
///
inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) {
- bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
- bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
- bool HasEVEX_K = ((TSFlags >> X86II::VEXShift) & X86II::EVEX_K);
-
+ bool HasVEX_4V = TSFlags & X86II::VEX_4V;
+ bool HasMemOp4 = TSFlags & X86II::MemOp4;
+ bool HasEVEX_K = TSFlags & X86II::EVEX_K;
+
switch (TSFlags & X86II::FormMask) {
default: llvm_unreachable("Unknown FormMask value in getMemoryOperandNo!");
case X86II::Pseudo:
@@ -662,19 +676,10 @@ namespace X86II {
return -1;
case X86II::MRMDestMem:
return 0;
- case X86II::MRMSrcMem: {
- unsigned FirstMemOp = 1;
- if (HasVEX_4V)
- ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
- if (HasMemOp4)
- ++FirstMemOp;// Skip the register source (which is encoded in I8IMM).
- if (HasEVEX_K)
- ++FirstMemOp;// Skip the mask register
- // FIXME: Maybe lea should have its own form? This is a horrible hack.
- //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
- // Opcode == X86::LEA16r || Opcode == X86::LEA32r)
- return FirstMemOp;
- }
+ case X86II::MRMSrcMem:
+ // Start from 1, skip any registers encoded in VEX_VVVV or I8IMM, or a
+ // mask register.
+ return 1 + HasVEX_4V + HasMemOp4 + HasEVEX_K;
case X86II::MRMXr:
case X86II::MRM0r: case X86II::MRM1r:
case X86II::MRM2r: case X86II::MRM3r:
@@ -685,32 +690,27 @@ namespace X86II {
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m: {
- bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
- unsigned FirstMemOp = 0;
- if (HasVEX_4V)
- ++FirstMemOp;// Skip the register dest (which is encoded in VEX_VVVV).
- if (HasEVEX_K)
- ++FirstMemOp;// Skip the mask register
- return FirstMemOp;
- }
+ case X86II::MRM6m: case X86II::MRM7m:
+ // Start from 0, skip registers encoded in VEX_VVVV or a mask register.
+ return 0 + HasVEX_4V + HasEVEX_K;
case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2:
case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C8:
case X86II::MRM_C9: case X86II::MRM_CA: case X86II::MRM_CB:
- case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4:
- case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8:
- case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB:
- case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE:
- case X86II::MRM_DF: case X86II::MRM_E0: case X86II::MRM_E1:
- case X86II::MRM_E2: case X86II::MRM_E3: case X86II::MRM_E4:
- case X86II::MRM_E5: case X86II::MRM_E8: case X86II::MRM_E9:
- case X86II::MRM_EA: case X86II::MRM_EB: case X86II::MRM_EC:
- case X86II::MRM_ED: case X86II::MRM_EE: case X86II::MRM_F0:
- case X86II::MRM_F1: case X86II::MRM_F2: case X86II::MRM_F3:
- case X86II::MRM_F4: case X86II::MRM_F5: case X86II::MRM_F6:
- case X86II::MRM_F7: case X86II::MRM_F8: case X86II::MRM_F9:
- case X86II::MRM_FA: case X86II::MRM_FB: case X86II::MRM_FC:
- case X86II::MRM_FD: case X86II::MRM_FE: case X86II::MRM_FF:
+ case X86II::MRM_CF: case X86II::MRM_D0: case X86II::MRM_D1:
+ case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6:
+ case X86II::MRM_D7: case X86II::MRM_D8: case X86II::MRM_D9:
+ case X86II::MRM_DA: case X86II::MRM_DB: case X86II::MRM_DC:
+ case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF:
+ case X86II::MRM_E0: case X86II::MRM_E1: case X86II::MRM_E2:
+ case X86II::MRM_E3: case X86II::MRM_E4: case X86II::MRM_E5:
+ case X86II::MRM_E8: case X86II::MRM_E9: case X86II::MRM_EA:
+ case X86II::MRM_EB: case X86II::MRM_EC: case X86II::MRM_ED:
+ case X86II::MRM_EE: case X86II::MRM_F0: case X86II::MRM_F1:
+ case X86II::MRM_F2: case X86II::MRM_F3: case X86II::MRM_F4:
+ case X86II::MRM_F5: case X86II::MRM_F6: case X86II::MRM_F7:
+ case X86II::MRM_F8: case X86II::MRM_F9: case X86II::MRM_FA:
+ case X86II::MRM_FB: case X86II::MRM_FC: case X86II::MRM_FD:
+ case X86II::MRM_FE: case X86II::MRM_FF:
return -1;
}
}
@@ -751,7 +751,7 @@ namespace X86II {
(RegNo > X86::ZMM15 && RegNo <= X86::ZMM31));
}
-
+
inline bool isX86_64NonExtLowByteReg(unsigned reg) {
return (reg == X86::SPL || reg == X86::BPL ||
reg == X86::SIL || reg == X86::DIL);
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index 3fdec874cbac..be6a8e4a43eb 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -77,7 +77,7 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
break;
case MCSymbolRefExpr::VK_GOTTPOFF:
Type = ELF::R_X86_64_GOTTPOFF;
- break;
+ break;
case MCSymbolRefExpr::VK_TLSGD:
Type = ELF::R_X86_64_TLSGD;
break;
diff --git a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
index 09396b790df2..4899900dcef9 100644
--- a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
+++ b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_X86_X86FIXUPKINDS_H
-#define LLVM_X86_X86FIXUPKINDS_H
+#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86FIXUPKINDS_H
+#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86FIXUPKINDS_H
#include "llvm/MC/MCFixup.h"
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index b1411bc5040d..5795514beec9 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -102,21 +102,12 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
TextAlignFillValue = 0x90;
- // Set up DWARF directives
- HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
-
// Debug Information
SupportsDebugInformation = true;
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
- // OpenBSD and Bitrig have buggy support for .quad in 32-bit mode, just split
- // into two .words.
- if ((T.getOS() == Triple::OpenBSD || T.getOS() == Triple::Bitrig) &&
- T.getArch() == Triple::x86)
- Data64bitsDirective = nullptr;
-
// Always enable the integrated assembler by default.
// Clang also enabled it when the OS is Solaris but that is redundant here.
UseIntegratedAssembler = true;
@@ -133,19 +124,17 @@ X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym,
return MCBinaryExpr::CreateAdd(Res, Four, Context);
}
-const MCSection *X86ELFMCAsmInfo::
-getNonexecutableStackSection(MCContext &Ctx) const {
- return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
- 0, SectionKind::getMetadata());
-}
-
void X86MCAsmInfoMicrosoft::anchor() { }
X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
if (Triple.getArch() == Triple::x86_64) {
PrivateGlobalPrefix = ".L";
+ PrivateLabelPrefix = ".L";
PointerSize = 8;
- ExceptionsType = ExceptionHandling::WinEH;
+ WinEHEncodingType = WinEH::EncodingType::Itanium;
+
+ // Use MSVC-compatible EH data.
+ ExceptionsType = ExceptionHandling::MSVC;
}
AssemblerDialect = AsmWriterFlavor;
@@ -163,8 +152,10 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
assert(Triple.isOSWindows() && "Windows is the only supported COFF target");
if (Triple.getArch() == Triple::x86_64) {
PrivateGlobalPrefix = ".L";
+ PrivateLabelPrefix = ".L";
PointerSize = 8;
- ExceptionsType = ExceptionHandling::WinEH;
+ WinEHEncodingType = WinEH::EncodingType::Itanium;
+ ExceptionsType = ExceptionHandling::ItaniumWinEH;
} else {
ExceptionsType = ExceptionHandling::DwarfCFI;
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
index a7509b03809a..deaad2a5b8e8 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86TARGETASMINFO_H
-#define X86TARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCASMINFO_H
+#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCASMINFO_H
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmInfoCOFF.h"
@@ -23,7 +23,8 @@ namespace llvm {
class Triple;
class X86MCAsmInfoDarwin : public MCAsmInfoDarwin {
- void anchor() override;
+ virtual void anchor();
+
public:
explicit X86MCAsmInfoDarwin(const Triple &Triple);
};
@@ -39,8 +40,6 @@ namespace llvm {
void anchor() override;
public:
explicit X86ELFMCAsmInfo(const Triple &Triple);
- const MCSection *
- getNonexecutableStackSection(MCContext &Ctx) const override;
};
class X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 075db11027d1..5d925243dd68 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -185,12 +185,11 @@ static bool isDisp8(int Value) {
/// isCDisp8 - Return true if this signed displacement fits in a 8-bit
/// compressed dispacement field.
static bool isCDisp8(uint64_t TSFlags, int Value, int& CValue) {
- assert(((TSFlags & X86II::EncodingMask) >>
- X86II::EncodingShift == X86II::EVEX) &&
+ assert(((TSFlags & X86II::EncodingMask) == X86II::EVEX) &&
"Compressed 8-bit displacement is only valid for EVEX inst.");
unsigned CD8_Scale =
- (TSFlags >> X86II::CD8_Scale_Shift) & X86II::CD8_Scale_Mask;
+ (TSFlags & X86II::CD8_Scale_Mask) >> X86II::CD8_Scale_Shift;
if (CD8_Scale == 0) {
CValue = Value;
return isDisp8(Value);
@@ -373,9 +372,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
const MCOperand &Scale = MI.getOperand(Op+X86::AddrScaleAmt);
const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
unsigned BaseReg = Base.getReg();
- unsigned char Encoding = (TSFlags & X86II::EncodingMask) >>
- X86II::EncodingShift;
- bool HasEVEX = (Encoding == X86II::EVEX);
+ bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX;
// Handle %rip relative addressing.
if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode
@@ -593,13 +590,14 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
int MemOperand, const MCInst &MI,
const MCInstrDesc &Desc,
raw_ostream &OS) const {
- unsigned char Encoding = (TSFlags & X86II::EncodingMask) >>
- X86II::EncodingShift;
- bool HasEVEX_K = ((TSFlags >> X86II::VEXShift) & X86II::EVEX_K);
- bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
- bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
- bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
- bool HasEVEX_RC = (TSFlags >> X86II::VEXShift) & X86II::EVEX_RC;
+ assert(!(TSFlags & X86II::LOCK) && "Can't have LOCK VEX.");
+
+ uint64_t Encoding = TSFlags & X86II::EncodingMask;
+ bool HasEVEX_K = TSFlags & X86II::EVEX_K;
+ bool HasVEX_4V = TSFlags & X86II::VEX_4V;
+ bool HasVEX_4VOp3 = TSFlags & X86II::VEX_4VOp3;
+ bool HasMemOp4 = TSFlags & X86II::MemOp4;
+ bool HasEVEX_RC = TSFlags & X86II::EVEX_RC;
// VEX_R: opcode externsion equivalent to REX.R in
// 1's complement (inverted) form
@@ -680,18 +678,18 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
bool EncodeRC = false;
- if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
+ if (TSFlags & X86II::VEX_W)
VEX_W = 1;
- if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
+ if (TSFlags & X86II::VEX_L)
VEX_L = 1;
- if (((TSFlags >> X86II::VEXShift) & X86II::EVEX_L2))
+ if (TSFlags & X86II::EVEX_L2)
EVEX_L2 = 1;
- if (HasEVEX_K && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_Z))
+ if (HasEVEX_K && (TSFlags & X86II::EVEX_Z))
EVEX_z = 1;
- if (((TSFlags >> X86II::VEXShift) & X86II::EVEX_B))
+ if ((TSFlags & X86II::EVEX_B))
EVEX_b = 1;
switch (TSFlags & X86II::OpPrefixMask) {
@@ -725,7 +723,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// MemAddr, src1(VEX_4V), src2(ModR/M)
// MemAddr, src1(ModR/M), imm8
//
- if (X86II::isX86_64ExtendedReg(MI.getOperand(MemOperand +
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(MemOperand +
X86::AddrBaseReg).getReg()))
VEX_B = 0x0;
if (X86II::isX86_64ExtendedReg(MI.getOperand(MemOperand +
@@ -867,7 +865,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
EVEX_rc = MI.getOperand(RcOperand).getImm() & 0x3;
}
EncodeRC = true;
- }
+ }
break;
case X86II::MRMDestReg:
// MRMDestReg instructions forms:
@@ -1109,10 +1107,14 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
raw_ostream &OS) const {
// Emit the operand size opcode prefix as needed.
- unsigned char OpSize = (TSFlags & X86II::OpSizeMask) >> X86II::OpSizeShift;
- if (OpSize == (is16BitMode(STI) ? X86II::OpSize32 : X86II::OpSize16))
+ if ((TSFlags & X86II::OpSizeMask) == (is16BitMode(STI) ? X86II::OpSize32
+ : X86II::OpSize16))
EmitByte(0x66, CurByte, OS);
+ // Emit the LOCK opcode prefix.
+ if (TSFlags & X86II::LOCK)
+ EmitByte(0xF0, CurByte, OS);
+
switch (TSFlags & X86II::OpPrefixMask) {
case X86II::PD: // 66
EmitByte(0x66, CurByte, OS);
@@ -1170,27 +1172,22 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
unsigned CurByte = 0;
// Encoding type for this instruction.
- unsigned char Encoding = (TSFlags & X86II::EncodingMask) >>
- X86II::EncodingShift;
+ uint64_t Encoding = TSFlags & X86II::EncodingMask;
// It uses the VEX.VVVV field?
- bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
- bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
- bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
+ bool HasVEX_4V = TSFlags & X86II::VEX_4V;
+ bool HasVEX_4VOp3 = TSFlags & X86II::VEX_4VOp3;
+ bool HasMemOp4 = TSFlags & X86II::MemOp4;
const unsigned MemOp4_I8IMMOperand = 2;
// It uses the EVEX.aaa field?
- bool HasEVEX_K = ((TSFlags >> X86II::VEXShift) & X86II::EVEX_K);
- bool HasEVEX_RC = ((TSFlags >> X86II::VEXShift) & X86II::EVEX_RC);
-
+ bool HasEVEX_K = TSFlags & X86II::EVEX_K;
+ bool HasEVEX_RC = TSFlags & X86II::EVEX_RC;
+
// Determine where the memory operand starts, if present.
int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
if (MemoryOperand != -1) MemoryOperand += CurOp;
- // Emit the lock opcode prefix as needed.
- if (TSFlags & X86II::LOCK)
- EmitByte(0xF0, CurByte, OS);
-
// Emit segment override opcode prefix as needed.
if (MemoryOperand >= 0)
EmitSegmentOverridePrefix(CurByte, MemoryOperand+X86::AddrSegmentReg,
@@ -1202,16 +1199,10 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// Emit the address size opcode prefix as needed.
bool need_address_override;
- // The AdSize prefix is only for 32-bit and 64-bit modes. Hm, perhaps we
- // should introduce an AdSize16 bit instead of having seven special cases?
- if ((!is16BitMode(STI) && TSFlags & X86II::AdSize) ||
- (is16BitMode(STI) && (MI.getOpcode() == X86::JECXZ_32 ||
- MI.getOpcode() == X86::MOV8o8a ||
- MI.getOpcode() == X86::MOV16o16a ||
- MI.getOpcode() == X86::MOV32o32a ||
- MI.getOpcode() == X86::MOV8ao8 ||
- MI.getOpcode() == X86::MOV16ao16 ||
- MI.getOpcode() == X86::MOV32ao32))) {
+ uint64_t AdSize = TSFlags & X86II::AdSizeMask;
+ if ((is16BitMode(STI) && AdSize == X86II::AdSize32) ||
+ (is32BitMode(STI) && AdSize == X86II::AdSize16) ||
+ (is64BitMode(STI) && AdSize == X86II::AdSize32)) {
need_address_override = true;
} else if (MemoryOperand < 0) {
need_address_override = false;
@@ -1237,7 +1228,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
- if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
+ if (TSFlags & X86II::Has3DNow0F0FOpcode)
BaseOpcode = 0x0F; // Weird 3DNow! encoding.
unsigned SrcRegNum = 0;
@@ -1437,20 +1428,21 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2:
case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C8:
case X86II::MRM_C9: case X86II::MRM_CA: case X86II::MRM_CB:
- case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4:
- case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8:
- case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB:
- case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE:
- case X86II::MRM_DF: case X86II::MRM_E0: case X86II::MRM_E1:
- case X86II::MRM_E2: case X86II::MRM_E3: case X86II::MRM_E4:
- case X86II::MRM_E5: case X86II::MRM_E8: case X86II::MRM_E9:
- case X86II::MRM_EA: case X86II::MRM_EB: case X86II::MRM_EC:
- case X86II::MRM_ED: case X86II::MRM_EE: case X86II::MRM_F0:
- case X86II::MRM_F1: case X86II::MRM_F2: case X86II::MRM_F3:
- case X86II::MRM_F4: case X86II::MRM_F5: case X86II::MRM_F6:
- case X86II::MRM_F7: case X86II::MRM_F8: case X86II::MRM_F9:
- case X86II::MRM_FA: case X86II::MRM_FB: case X86II::MRM_FC:
- case X86II::MRM_FD: case X86II::MRM_FE: case X86II::MRM_FF:
+ case X86II::MRM_CF: case X86II::MRM_D0: case X86II::MRM_D1:
+ case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6:
+ case X86II::MRM_D7: case X86II::MRM_D8: case X86II::MRM_D9:
+ case X86II::MRM_DA: case X86II::MRM_DB: case X86II::MRM_DC:
+ case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF:
+ case X86II::MRM_E0: case X86II::MRM_E1: case X86II::MRM_E2:
+ case X86II::MRM_E3: case X86II::MRM_E4: case X86II::MRM_E5:
+ case X86II::MRM_E8: case X86II::MRM_E9: case X86II::MRM_EA:
+ case X86II::MRM_EB: case X86II::MRM_EC: case X86II::MRM_ED:
+ case X86II::MRM_EE: case X86II::MRM_F0: case X86II::MRM_F1:
+ case X86II::MRM_F2: case X86II::MRM_F3: case X86II::MRM_F4:
+ case X86II::MRM_F5: case X86II::MRM_F6: case X86II::MRM_F7:
+ case X86II::MRM_F8: case X86II::MRM_F9: case X86II::MRM_FA:
+ case X86II::MRM_FB: case X86II::MRM_FC: case X86II::MRM_FD:
+ case X86II::MRM_FE: case X86II::MRM_FF:
EmitByte(BaseOpcode, CurByte, OS);
unsigned char MRM;
@@ -1465,11 +1457,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM_C9: MRM = 0xC9; break;
case X86II::MRM_CA: MRM = 0xCA; break;
case X86II::MRM_CB: MRM = 0xCB; break;
+ case X86II::MRM_CF: MRM = 0xCF; break;
case X86II::MRM_D0: MRM = 0xD0; break;
case X86II::MRM_D1: MRM = 0xD1; break;
case X86II::MRM_D4: MRM = 0xD4; break;
case X86II::MRM_D5: MRM = 0xD5; break;
case X86II::MRM_D6: MRM = 0xD6; break;
+ case X86II::MRM_D7: MRM = 0xD7; break;
case X86II::MRM_D8: MRM = 0xD8; break;
case X86II::MRM_D9: MRM = 0xD9; break;
case X86II::MRM_DA: MRM = 0xDA; break;
@@ -1518,7 +1512,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
while (CurOp != NumOps && NumOps - CurOp <= 2) {
// The last source register of a 4 operand instruction in AVX is encoded
// in bits[7:4] of a immediate byte.
- if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
+ if (TSFlags & X86II::VEX_I8IMM) {
const MCOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand
: CurOp);
++CurOp;
@@ -1544,7 +1538,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
}
}
- if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
+ if (TSFlags & X86II::Has3DNow0F0FOpcode)
EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
#ifndef NDEBUG
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 3bfad6c71b9b..5a9181df70e6 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -351,11 +351,8 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &_OS,
- MCCodeEmitter *_Emitter,
- const MCSubtargetInfo &STI,
- bool RelaxAll,
- bool NoExecStack) {
+ raw_ostream &_OS, MCCodeEmitter *_Emitter,
+ const MCSubtargetInfo &STI, bool RelaxAll) {
Triple TheTriple(TT);
switch (TheTriple.getObjectFormat()) {
@@ -366,7 +363,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
assert(TheTriple.isOSWindows() && "only Windows COFF is supported");
return createX86WinCOFFStreamer(Ctx, MAB, _Emitter, _OS, RelaxAll);
case Triple::ELF:
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+ return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll);
}
}
@@ -377,7 +374,7 @@ static MCInstPrinter *createX86MCInstPrinter(const Target &T,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) {
if (SyntaxVariant == 0)
- return new X86ATTInstPrinter(MAI, MII, MRI);
+ return new X86ATTInstPrinter(MAI, MII, MRI, STI);
if (SyntaxVariant == 1)
return new X86IntelInstPrinter(MAI, MII, MRI);
return nullptr;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index ebe74cfeaddc..d8320b97736d 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86MCTARGETDESC_H
-#define X86MCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCTARGETDESC_H
+#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCTARGETDESC_H
#include "llvm/Support/DataTypes.h"
#include <string>
@@ -40,8 +40,8 @@ namespace DWARFFlavour {
enum {
X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2
};
-}
-
+}
+
/// N86 namespace - Native X86 register numbers
///
namespace N86 {
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index ead333830090..7a83f4c64e6d 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -10,6 +10,7 @@
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "MCTargetDesc/X86FixupKinds.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
@@ -47,23 +48,21 @@ class X86MachObjectWriter : public MCMachObjectTargetWriter {
const MCFixup &Fixup,
MCValue Target,
uint64_t &FixedValue);
- void RecordX86_64Relocation(MachObjectWriter *Writer,
- const MCAssembler &Asm,
+ void RecordX86_64Relocation(MachObjectWriter *Writer, MCAssembler &Asm,
const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup,
- MCValue Target,
- uint64_t &FixedValue);
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue);
+
public:
X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
uint32_t CPUSubtype)
: MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
/*UseAggressiveSymbolFolding=*/Is64Bit) {}
- void RecordRelocation(MachObjectWriter *Writer,
- const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFragment *Fragment, const MCFixup &Fixup,
- MCValue Target, uint64_t &FixedValue) override {
+ void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
+ const MCAsmLayout &Layout, const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) override {
if (Writer->is64Bit())
RecordX86_64Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
FixedValue);
@@ -97,13 +96,10 @@ static unsigned getFixupKindLog2Size(unsigned Kind) {
}
}
-void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
- const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup,
- MCValue Target,
- uint64_t &FixedValue) {
+void X86MachObjectWriter::RecordX86_64Relocation(
+ MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) {
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
@@ -117,6 +113,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
unsigned Index = 0;
unsigned IsExtern = 0;
unsigned Type = 0;
+ const MCSymbolData *RelSymbol = nullptr;
Value = Target.getConstant();
@@ -132,7 +129,6 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
if (Target.isAbsolute()) { // constant
// SymbolNum of 0 indicates the absolute section.
Type = MachO::X86_64_RELOC_UNSIGNED;
- Index = 0;
// FIXME: I believe this is broken, I don't think the linker can understand
// it. I think it would require a local relocation, but I'm not sure if that
@@ -179,49 +175,44 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
if (A_Base == B_Base && A_Base)
report_fatal_error("unsupported relocation with identical base", false);
- // A subtraction expression where both symbols are undefined is a
+ // A subtraction expression where either symbol is undefined is a
// non-relocatable expression.
- if (A->isUndefined() && B->isUndefined())
- report_fatal_error("unsupported relocation with subtraction expression",
- false);
+ if (A->isUndefined() || B->isUndefined()) {
+ StringRef Name = A->isUndefined() ? A->getName() : B->getName();
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ "unsupported relocation with subtraction expression, symbol '" +
+ Name + "' can not be undefined in a subtraction expression");
+ }
Value += Writer->getSymbolAddress(&A_SD, Layout) -
(!A_Base ? 0 : Writer->getSymbolAddress(A_Base, Layout));
Value -= Writer->getSymbolAddress(&B_SD, Layout) -
(!B_Base ? 0 : Writer->getSymbolAddress(B_Base, Layout));
- if (A_Base) {
- Index = A_Base->getIndex();
- IsExtern = 1;
- }
- else {
+ if (!A_Base)
Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
- IsExtern = 0;
- }
Type = MachO::X86_64_RELOC_UNSIGNED;
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
- MRE.r_word1 = ((Index << 0) |
- (IsPCRel << 24) |
- (Log2Size << 25) |
- (IsExtern << 27) |
- (Type << 28));
- Writer->addRelocation(Fragment->getParent(), MRE);
-
- if (B_Base) {
- Index = B_Base->getIndex();
- IsExtern = 1;
- }
- else {
+ MRE.r_word1 =
+ (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+ Writer->addRelocation(A_Base, Fragment->getParent(), MRE);
+
+ if (B_Base)
+ RelSymbol = B_Base;
+ else
Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
- IsExtern = 0;
- }
Type = MachO::X86_64_RELOC_SUBTRACTOR;
} else {
const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
+ if (Symbol->isTemporary() && Value) {
+ const MCSection &Sec = Symbol->getSection();
+ if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec))
+ Asm.addLocalUsedInReloc(*Symbol);
+ }
const MCSymbolData &SD = Asm.getSymbolData(*Symbol);
- const MCSymbolData *Base = Asm.getAtom(&SD);
+ RelSymbol = Asm.getAtom(&SD);
// Relocations inside debug sections always use local relocations when
// possible. This seems to be done because the debugger doesn't fully
@@ -231,23 +222,20 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
Fragment->getParent()->getSection());
if (Section.hasAttribute(MachO::S_ATTR_DEBUG))
- Base = nullptr;
+ RelSymbol = nullptr;
}
// x86_64 almost always uses external relocations, except when there is no
// symbol to use as a base address (a local symbol with no preceding
// non-local symbol).
- if (Base) {
- Index = Base->getIndex();
- IsExtern = 1;
-
+ if (RelSymbol) {
// Add the local offset, if needed.
- if (Base != &SD)
- Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
+ if (RelSymbol != &SD)
+ Value +=
+ Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(RelSymbol);
} else if (Symbol->isInSection() && !Symbol->isVariable()) {
// The index is the section ordinal (1-based).
Index = SD.getFragment()->getParent()->getOrdinal() + 1;
- IsExtern = 0;
Value += Writer->getSymbolAddress(&SD, Layout);
if (IsPCRel)
@@ -346,12 +334,9 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
// struct relocation_info (8 bytes)
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
- MRE.r_word1 = ((Index << 0) |
- (IsPCRel << 24) |
- (Log2Size << 25) |
- (IsExtern << 27) |
- (Type << 28));
- Writer->addRelocation(Fragment->getParent(), MRE);
+ MRE.r_word1 = (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
+ (IsExtern << 27) | (Type << 28);
+ Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
}
bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
@@ -423,7 +408,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
(IsPCRel << 30) |
MachO::R_SCATTERED);
MRE.r_word1 = Value2;
- Writer->addRelocation(Fragment->getParent(), MRE);
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
} else {
// If the offset is more than 24-bits, it won't fit in a scattered
// relocation offset field, so we fall back to using a non-scattered
@@ -445,7 +430,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
(IsPCRel << 30) |
MachO::R_SCATTERED);
MRE.r_word1 = Value;
- Writer->addRelocation(Fragment->getParent(), MRE);
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
return true;
}
@@ -466,7 +451,6 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
// Get the symbol data.
const MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
- unsigned Index = SD_A->getIndex();
// We're only going to have a second symbol in pic mode and it'll be a
// subtraction from the picbase. For 32-bit pic the addend is the difference
@@ -489,12 +473,9 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
// struct relocation_info (8 bytes)
MachO::any_relocation_info MRE;
MRE.r_word0 = Value;
- MRE.r_word1 = ((Index << 0) |
- (IsPCRel << 24) |
- (Log2Size << 25) |
- (1 << 27) | // r_extern
- (MachO::GENERIC_RELOC_TLV << 28)); // r_type
- Writer->addRelocation(Fragment->getParent(), MRE);
+ MRE.r_word1 =
+ (IsPCRel << 24) | (Log2Size << 25) | (MachO::GENERIC_RELOC_TLV << 28);
+ Writer->addRelocation(SD_A, Fragment->getParent(), MRE);
}
void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
@@ -545,8 +526,8 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
// See <reloc.h>.
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
unsigned Index = 0;
- unsigned IsExtern = 0;
unsigned Type = 0;
+ const MCSymbolData *RelSymbol = nullptr;
if (Target.isAbsolute()) { // constant
// SymbolNum of 0 indicates the absolute section.
@@ -567,12 +548,11 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
// Check whether we need an external or internal relocation.
if (Writer->doesSymbolRequireExternRelocation(SD)) {
- IsExtern = 1;
- Index = SD->getIndex();
+ RelSymbol = SD;
// For external relocations, make sure to offset the fixup value to
// compensate for the addend of the symbol address, if it was
// undefined. This occurs with weak definitions, for example.
- if (!SD->Symbol->isUndefined())
+ if (!SD->getSymbol().isUndefined())
FixedValue -= Layout.getSymbolOffset(SD);
} else {
// The index is the section ordinal (1-based).
@@ -590,12 +570,9 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
// struct relocation_info (8 bytes)
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
- MRE.r_word1 = ((Index << 0) |
- (IsPCRel << 24) |
- (Log2Size << 25) |
- (IsExtern << 27) |
- (Type << 28));
- Writer->addRelocation(Fragment->getParent(), MRE);
+ MRE.r_word1 =
+ (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+ Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
}
MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index 6727f5edd26b..5f1596c46825 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -8,18 +8,21 @@
//===----------------------------------------------------------------------===//
#include "X86MCTargetDesc.h"
+#include "llvm/MC/MCWin64EH.h"
#include "llvm/MC/MCWinCOFFStreamer.h"
using namespace llvm;
namespace {
class X86WinCOFFStreamer : public MCWinCOFFStreamer {
+ Win64EH::UnwindEmitter EHStreamer;
public:
X86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter *CE,
raw_ostream &OS)
: MCWinCOFFStreamer(C, AB, *CE, OS) { }
void EmitWinEHHandlerData() override;
+ void EmitWindowsUnwindTables() override;
void FinishImpl() override;
};
@@ -28,7 +31,13 @@ void X86WinCOFFStreamer::EmitWinEHHandlerData() {
// We have to emit the unwind info now, because this directive
// actually switches to the .xdata section!
- MCWin64EHUnwindEmitter::EmitUnwindInfo(*this, getCurrentWinFrameInfo());
+ EHStreamer.EmitUnwindInfo(*this, getCurrentWinFrameInfo());
+}
+
+void X86WinCOFFStreamer::EmitWindowsUnwindTables() {
+ if (!getNumWinFrameInfos())
+ return;
+ EHStreamer.Emit(*this);
}
void X86WinCOFFStreamer::FinishImpl() {
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 52d3c01076de..19a183201755 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -2,17 +2,6 @@
// Random ideas for the X86 backend.
//===---------------------------------------------------------------------===//
-This should be one DIV/IDIV instruction, not a libcall:
-
-unsigned test(unsigned long long X, unsigned Y) {
- return X/Y;
-}
-
-This can be done trivially with a custom legalizer. What about overflow
-though? http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14224
-
-//===---------------------------------------------------------------------===//
-
Improvements to the multiply -> shift/add algorithm:
http://gcc.gnu.org/ml/gcc-patches/2004-08/msg01590.html
@@ -83,43 +72,6 @@ It appears icc use push for parameter passing. Need to investigate.
//===---------------------------------------------------------------------===//
-This:
-
-void foo(void);
-void bar(int x, int *P) {
- x >>= 2;
- if (x)
- foo();
- *P = x;
-}
-
-compiles into:
-
- movq %rsi, %rbx
- movl %edi, %r14d
- sarl $2, %r14d
- testl %r14d, %r14d
- je LBB0_2
-
-Instead of doing an explicit test, we can use the flags off the sar. This
-occurs in a bigger testcase like this, which is pretty common:
-
-#include <vector>
-int test1(std::vector<int> &X) {
- int Sum = 0;
- for (long i = 0, e = X.size(); i != e; ++i)
- X[i] = 0;
- return Sum;
-}
-
-//===---------------------------------------------------------------------===//
-
-Only use inc/neg/not instructions on processors where they are faster than
-add/sub/xor. They are slower on the P4 due to only updating some processor
-flags.
-
-//===---------------------------------------------------------------------===//
-
The instruction selector sometimes misses folding a load into a compare. The
pattern is written as (cmp reg, (load p)). Because the compare isn't
commutative, it is not matched with the load on both sides. The dag combiner
@@ -303,42 +255,6 @@ opposed to two cycles for the movl+lea variant.
//===---------------------------------------------------------------------===//
-__builtin_ffs codegen is messy.
-
-int ffs_(unsigned X) { return __builtin_ffs(X); }
-
-llvm produces:
-ffs_:
- movl 4(%esp), %ecx
- bsfl %ecx, %eax
- movl $32, %edx
- cmove %edx, %eax
- incl %eax
- xorl %edx, %edx
- testl %ecx, %ecx
- cmove %edx, %eax
- ret
-
-vs gcc:
-
-_ffs_:
- movl $-1, %edx
- bsfl 4(%esp), %eax
- cmove %edx, %eax
- addl $1, %eax
- ret
-
-Another example of __builtin_ffs (use predsimplify to eliminate a select):
-
-int foo (unsigned long j) {
- if (j)
- return __builtin_ffs (j) - 1;
- else
- return 0;
-}
-
-//===---------------------------------------------------------------------===//
-
It appears gcc place string data with linkonce linkage in
.section __TEXT,__const_coal,coalesced instead of
.section __DATA,__const_coal,coalesced.
@@ -466,85 +382,6 @@ We should inline lrintf and probably other libc functions.
//===---------------------------------------------------------------------===//
-Use the FLAGS values from arithmetic instructions more. For example, compile:
-
-int add_zf(int *x, int y, int a, int b) {
- if ((*x += y) == 0)
- return a;
- else
- return b;
-}
-
-to:
- addl %esi, (%rdi)
- movl %edx, %eax
- cmovne %ecx, %eax
- ret
-instead of:
-
-_add_zf:
- addl (%rdi), %esi
- movl %esi, (%rdi)
- testl %esi, %esi
- cmove %edx, %ecx
- movl %ecx, %eax
- ret
-
-As another example, compile function f2 in test/CodeGen/X86/cmp-test.ll
-without a test instruction.
-
-//===---------------------------------------------------------------------===//
-
-These two functions have identical effects:
-
-unsigned int f(unsigned int i, unsigned int n) {++i; if (i == n) ++i; return i;}
-unsigned int f2(unsigned int i, unsigned int n) {++i; i += i == n; return i;}
-
-We currently compile them to:
-
-_f:
- movl 4(%esp), %eax
- movl %eax, %ecx
- incl %ecx
- movl 8(%esp), %edx
- cmpl %edx, %ecx
- jne LBB1_2 #UnifiedReturnBlock
-LBB1_1: #cond_true
- addl $2, %eax
- ret
-LBB1_2: #UnifiedReturnBlock
- movl %ecx, %eax
- ret
-_f2:
- movl 4(%esp), %eax
- movl %eax, %ecx
- incl %ecx
- cmpl 8(%esp), %ecx
- sete %cl
- movzbl %cl, %ecx
- leal 1(%ecx,%eax), %eax
- ret
-
-both of which are inferior to GCC's:
-
-_f:
- movl 4(%esp), %edx
- leal 1(%edx), %eax
- addl $2, %edx
- cmpl 8(%esp), %eax
- cmove %edx, %eax
- ret
-_f2:
- movl 4(%esp), %eax
- addl $1, %eax
- xorl %edx, %edx
- cmpl 8(%esp), %eax
- sete %dl
- addl %edx, %eax
- ret
-
-//===---------------------------------------------------------------------===//
-
This code:
void test(int X) {
@@ -1398,20 +1235,6 @@ A similar code sequence works for division.
//===---------------------------------------------------------------------===//
-These should compile to the same code, but the later codegen's to useless
-instructions on X86. This may be a trivial dag combine (GCC PR7061):
-
-struct s1 { unsigned char a, b; };
-unsigned long f1(struct s1 x) {
- return x.a + x.b;
-}
-struct s2 { unsigned a: 8, b: 8; };
-unsigned long f2(struct s2 x) {
- return x.a + x.b;
-}
-
-//===---------------------------------------------------------------------===//
-
We currently compile this:
define i32 @func1(i32 %v1, i32 %v2) nounwind {
diff --git a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
index 1ea8798e316e..fceb083b5f2d 100644
--- a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
+++ b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
@@ -13,7 +13,7 @@ using namespace llvm;
Target llvm::TheX86_32Target, llvm::TheX86_64Target;
-extern "C" void LLVMInitializeX86TargetInfo() {
+extern "C" void LLVMInitializeX86TargetInfo() {
RegisterTarget<Triple::x86, /*HasJIT=*/true>
X(TheX86_32Target, "x86", "32-bit X86: Pentium-Pro and above");
diff --git a/lib/Target/X86/Utils/LLVMBuild.txt b/lib/Target/X86/Utils/LLVMBuild.txt
index fdb886f53a08..de0a30fa19c8 100644
--- a/lib/Target/X86/Utils/LLVMBuild.txt
+++ b/lib/Target/X86/Utils/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = X86Utils
parent = X86
-required_libraries = Support
+required_libraries = Core Support
add_to_library_groups = X86
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 5f2441c017f5..e4c58d42c5dc 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "X86ShuffleDecode.h"
+#include "llvm/IR/Constants.h"
#include "llvm/CodeGen/MachineValueType.h"
//===----------------------------------------------------------------------===//
@@ -62,6 +63,51 @@ void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
ShuffleMask.push_back(NElts+i);
}
+void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+ for (int i = 0, e = NumElts / 2; i < e; ++i) {
+ ShuffleMask.push_back(2 * i);
+ ShuffleMask.push_back(2 * i);
+ }
+}
+
+void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+ for (int i = 0, e = NumElts / 2; i < e; ++i) {
+ ShuffleMask.push_back(2 * i + 1);
+ ShuffleMask.push_back(2 * i + 1);
+ }
+}
+
+void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned VectorSizeInBits = VT.getSizeInBits();
+ unsigned NumElts = VectorSizeInBits / 8;
+ unsigned NumLanes = VectorSizeInBits / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ for (unsigned l = 0; l < NumElts; l += NumLaneElts)
+ for (unsigned i = 0; i < NumLaneElts; ++i) {
+ int M = SM_SentinelZero;
+ if (i >= Imm) M = i - Imm + l;
+ ShuffleMask.push_back(M);
+ }
+}
+
+void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned VectorSizeInBits = VT.getSizeInBits();
+ unsigned NumElts = VectorSizeInBits / 8;
+ unsigned NumLanes = VectorSizeInBits / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ for (unsigned l = 0; l < NumElts; l += NumLaneElts)
+ for (unsigned i = 0; i < NumLaneElts; ++i) {
+ unsigned Base = i + Imm;
+ int M = Base + l;
+ if (Base >= NumLaneElts) M = SM_SentinelZero;
+ ShuffleMask.push_back(M);
+ }
+}
+
void DecodePALIGNRMask(MVT VT, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
@@ -207,6 +253,90 @@ void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
}
}
+void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
+ Type *MaskTy = C->getType();
+ // It is not an error for the PSHUFB mask to not be a vector of i8 because the
+ // constant pool uniques constants by their bit representation.
+ // e.g. the following take up the same space in the constant pool:
+ // i128 -170141183420855150465331762880109871104
+ //
+ // <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
+ //
+ // <4 x i32> <i32 -2147483648, i32 -2147483648,
+ // i32 -2147483648, i32 -2147483648>
+
+ unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
+
+ if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512.
+ return;
+
+ // This is a straightforward byte vector.
+ if (MaskTy->isVectorTy() && MaskTy->getVectorElementType()->isIntegerTy(8)) {
+ int NumElements = MaskTy->getVectorNumElements();
+ ShuffleMask.reserve(NumElements);
+
+ for (int i = 0; i < NumElements; ++i) {
+ // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
+ // lane of the vector we're inside.
+ int Base = i < 16 ? 0 : 16;
+ Constant *COp = C->getAggregateElement(i);
+ if (!COp) {
+ ShuffleMask.clear();
+ return;
+ } else if (isa<UndefValue>(COp)) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+ uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+ // If the high bit (7) of the byte is set, the element is zeroed.
+ if (Element & (1 << 7))
+ ShuffleMask.push_back(SM_SentinelZero);
+ else {
+ // Only the least significant 4 bits of the byte are used.
+ int Index = Base + (Element & 0xf);
+ ShuffleMask.push_back(Index);
+ }
+ }
+ }
+ // TODO: Handle funny-looking vectors too.
+}
+
+void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
+ SmallVectorImpl<int> &ShuffleMask) {
+ for (int i = 0, e = RawMask.size(); i < e; ++i) {
+ uint64_t M = RawMask[i];
+ if (M == (uint64_t)SM_SentinelUndef) {
+ ShuffleMask.push_back(M);
+ continue;
+ }
+ // For AVX vectors with 32 bytes the base of the shuffle is the half of
+ // the vector we're inside.
+ int Base = i < 16 ? 0 : 16;
+ // If the high bit (7) of the byte is set, the element is zeroed.
+ if (M & (1 << 7))
+ ShuffleMask.push_back(SM_SentinelZero);
+ else {
+ // Only the least significant 4 bits of the byte are used.
+ int Index = Base + (M & 0xf);
+ ShuffleMask.push_back(Index);
+ }
+ }
+}
+
+void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ int ElementBits = VT.getScalarSizeInBits();
+ int NumElements = VT.getVectorNumElements();
+ for (int i = 0; i < NumElements; ++i) {
+ // If there are more than 8 elements in the vector, then any immediate blend
+ // mask applies to each 128-bit lane. There can never be more than
+ // 8 elements in a 128-bit lane with an immediate blend.
+ int Bit = NumElements > 8 ? i % (128 / ElementBits) : i;
+ assert(Bit < 8 &&
+ "Immediate blends only operate over 8 elements at a time!");
+ ShuffleMask.push_back(((Imm >> Bit) & 1) ? NumElements + i : i);
+ }
+}
+
/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
/// No VT provided since it only works on 256-bit, 4 element vectors.
void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
@@ -215,4 +345,44 @@ void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
}
}
+void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
+ Type *MaskTy = C->getType();
+ assert(MaskTy->isVectorTy() && "Expected a vector constant mask!");
+ assert(MaskTy->getVectorElementType()->isIntegerTy() &&
+ "Expected integer constant mask elements!");
+ int ElementBits = MaskTy->getScalarSizeInBits();
+ int NumElements = MaskTy->getVectorNumElements();
+ assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&
+ "Unexpected number of vector elements.");
+ ShuffleMask.reserve(NumElements);
+ if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
+ assert((unsigned)NumElements == CDS->getNumElements() &&
+ "Constant mask has a different number of elements!");
+
+ for (int i = 0; i < NumElements; ++i) {
+ int Base = (i * ElementBits / 128) * (128 / ElementBits);
+ uint64_t Element = CDS->getElementAsInteger(i);
+ // Only the least significant 2 bits of the integer are used.
+ int Index = Base + (Element & 0x3);
+ ShuffleMask.push_back(Index);
+ }
+ } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
+ assert((unsigned)NumElements == C->getNumOperands() &&
+ "Constant mask has a different number of elements!");
+
+ for (int i = 0; i < NumElements; ++i) {
+ int Base = (i * ElementBits / 128) * (128 / ElementBits);
+ Constant *COp = CV->getOperand(i);
+ if (isa<UndefValue>(COp)) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+ uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+ // Only the least significant 2 bits of the integer are used.
+ int Index = Base + (Element & 0x3);
+ ShuffleMask.push_back(Index);
+ }
+ }
+}
+
} // llvm namespace
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 9e75b6b945c5..6ba3c64f8ec3 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -12,21 +12,21 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86_SHUFFLE_DECODE_H
-#define X86_SHUFFLE_DECODE_H
+#ifndef LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H
+#define LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/ArrayRef.h"
//===----------------------------------------------------------------------===//
// Vector Mask Decoding
//===----------------------------------------------------------------------===//
namespace llvm {
+class Constant;
class MVT;
-enum {
- SM_SentinelZero = -1
-};
+enum { SM_SentinelUndef = -1, SM_SentinelZero = -2 };
void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
@@ -36,6 +36,14 @@ void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
// <0,2> or <0,1,4,5>
void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
+void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
+
+void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
+
+void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
+void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
@@ -59,6 +67,16 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
/// different datatypes and vector widths.
void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
+/// \brief Decode a PSHUFB mask from an IR-level vector constant.
+void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a PSHUFB mask from a raw array of constants such as from
+/// BUILD_VECTOR.
+void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a BLEND immediate mask into a shuffle mask.
+void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask);
@@ -67,6 +85,9 @@ void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
/// No VT provided since it only works on 256-bit, 4 element vectors.
void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+/// \brief Decode a VPERMILP variable mask from an IR-level vector constant.
+void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
+
} // llvm namespace
#endif
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index d5522ed95eb4..8bd5817e528f 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef TARGET_X86_H
-#define TARGET_X86_H
+#ifndef LLVM_LIB_TARGET_X86_X86_H
+#define LLVM_LIB_TARGET_X86_X86_H
#include "llvm/Support/CodeGen.h"
@@ -21,13 +21,8 @@ namespace llvm {
class FunctionPass;
class ImmutablePass;
-class JITCodeEmitter;
class X86TargetMachine;
-/// createX86AtomicExpandPass - This pass expands atomic operations that cannot
-/// be handled natively in terms of a loop using cmpxchg.
-FunctionPass *createX86AtomicExpandPass(const X86TargetMachine *TM);
-
/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
///
@@ -54,11 +49,6 @@ FunctionPass *createX86FloatingPointStackifierPass();
/// AVX and SSE.
FunctionPass *createX86IssueVZeroUpperPass();
-/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
-/// to the specified MCE object.
-FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
- JITCodeEmitter &JCE);
-
/// createX86EmitCodeToMemory - Returns a pass that converts a register
/// allocated function into raw machine code in a dynamically
/// allocated chunk of memory.
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index cd32a0f24234..ab3319afe93f 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -79,9 +79,16 @@ def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
"Bit testing of memory is slow">;
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">;
+// FIXME: This is a 16-byte (SSE/AVX) feature; we should rename it to make that
+// explicit. Also, it seems this would be the default state for most chips
+// going forward, so it would probably be better to negate the logic and
+// match the 32-byte "slow mem" feature below.
def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
"IsUAMemFast", "true",
"Fast unaligned memory access">;
+def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
+ "IsUAMem32Slow", "true",
+ "Slow unaligned 32-byte memory access">;
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
"Support SSE 4a instructions",
[FeatureSSE3]>;
@@ -157,15 +164,22 @@ def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
"Enable SHA instructions",
[FeatureSSE2]>;
+def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
+ "Support SGX instructions">;
def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
"Support PRFCHW instructions">;
def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
"Support RDSEED instruction">;
+def FeatureSMAP : SubtargetFeature<"smap", "HasSMAP", "true",
+ "Support SMAP instructions">;
def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
-def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
- "HasSlowDivide", "true",
- "Use small divide for positive values less than 256">;
+def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
+ "HasSlowDivide32", "true",
+ "Use 8-bit divide for positive values less than 256">;
+def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw",
+ "HasSlowDivide64", "true",
+ "Use 16-bit divide for positive values less than 65536">;
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
"PadShortFunctions", "true",
"Pad short functions">;
@@ -178,6 +192,10 @@ def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
"LEA instruction with certain arguments is slow">;
def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
"INC and DEC instructions are slower than ADD and SUB">;
+def FeatureUseSqrtEst : SubtargetFeature<"use-sqrt-est", "UseSqrtEst", "true",
+ "Use RSQRT* to optimize square root calculations">;
+def FeatureUseRecipEst : SubtargetFeature<"use-recip-est", "UseReciprocalEst",
+ "true", "Use RCP* to optimize division calculations">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -222,78 +240,171 @@ def : ProcessorModel<"core2", SandyBridgeModel,
def : ProcessorModel<"penryn", SandyBridgeModel,
[FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
-// Atom.
-def : ProcessorModel<"atom", AtomModel,
- [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
- FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
- FeatureSlowDivide,
- FeatureCallRegIndirect,
- FeatureLEAUsesAG,
- FeaturePadShortFunctions]>;
-
-// Atom Silvermont.
-def : ProcessorModel<"slm", SLMModel, [ProcIntelSLM,
- FeatureSSE42, FeatureCMPXCHG16B,
- FeatureMOVBE, FeaturePOPCNT,
- FeaturePCLMUL, FeatureAES,
- FeatureCallRegIndirect,
- FeaturePRFCHW,
- FeatureSlowLEA, FeatureSlowIncDec,
- FeatureSlowBTMem, FeatureFastUAMem]>;
+// Atom CPUs.
+class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
+ ProcIntelAtom,
+ FeatureSSSE3,
+ FeatureCMPXCHG16B,
+ FeatureMOVBE,
+ FeatureSlowBTMem,
+ FeatureLeaForSP,
+ FeatureSlowDivide32,
+ FeatureSlowDivide64,
+ FeatureCallRegIndirect,
+ FeatureLEAUsesAG,
+ FeaturePadShortFunctions
+ ]>;
+def : BonnellProc<"bonnell">;
+def : BonnellProc<"atom">; // Pin the generic name to the baseline.
+
+class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
+ ProcIntelSLM,
+ FeatureSSE42,
+ FeatureCMPXCHG16B,
+ FeatureMOVBE,
+ FeaturePOPCNT,
+ FeaturePCLMUL,
+ FeatureAES,
+ FeatureSlowDivide64,
+ FeatureCallRegIndirect,
+ FeaturePRFCHW,
+ FeatureSlowLEA,
+ FeatureSlowIncDec,
+ FeatureSlowBTMem,
+ FeatureFastUAMem
+ ]>;
+def : SilvermontProc<"silvermont">;
+def : SilvermontProc<"slm">; // Legacy alias.
+
// "Arrandale" along with corei3 and corei5
-def : ProcessorModel<"corei7", SandyBridgeModel,
- [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
- FeatureFastUAMem, FeaturePOPCNT, FeatureAES]>;
+class NehalemProc<string Name, list<SubtargetFeature> AdditionalFeatures>
+ : ProcessorModel<Name, SandyBridgeModel, !listconcat([
+ FeatureSSE42,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeatureFastUAMem,
+ FeaturePOPCNT
+ ],
+ AdditionalFeatures)>;
+def : NehalemProc<"nehalem", []>;
+def : NehalemProc<"corei7", [FeatureAES]>;
-def : ProcessorModel<"nehalem", SandyBridgeModel,
- [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
- FeatureFastUAMem, FeaturePOPCNT]>;
// Westmere is a similar machine to nehalem with some additional features.
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
-def : ProcessorModel<"westmere", SandyBridgeModel,
- [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
- FeatureFastUAMem, FeaturePOPCNT, FeatureAES,
- FeaturePCLMUL]>;
-// Sandy Bridge
+class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
+ FeatureSSE42,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeatureFastUAMem,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL
+ ]>;
+def : WestmereProc<"westmere">;
+
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
// rather than a superset.
-def : ProcessorModel<"corei7-avx", SandyBridgeModel,
- [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
- FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>;
-// Ivy Bridge
-def : ProcessorModel<"core-avx-i", SandyBridgeModel,
- [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
- FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
- FeatureF16C, FeatureFSGSBase]>;
-
-// Haswell
-def : ProcessorModel<"core-avx2", HaswellModel,
- [FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem,
- FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
- FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT,
- FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,
- FeatureHLE]>;
-
-// KNL
+class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
+ FeatureAVX,
+ FeatureCMPXCHG16B,
+ FeatureFastUAMem,
+ FeatureSlowUAMem32,
+ FeatureVectorUAMem,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL
+ ]>;
+def : SandyBridgeProc<"sandybridge">;
+def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
+
+class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
+ FeatureAVX,
+ FeatureCMPXCHG16B,
+ FeatureFastUAMem,
+ FeatureSlowUAMem32,
+ FeatureVectorUAMem,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase
+ ]>;
+def : IvyBridgeProc<"ivybridge">;
+def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
+
+class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
+ FeatureAVX2,
+ FeatureCMPXCHG16B,
+ FeatureFastUAMem,
+ FeatureVectorUAMem,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase,
+ FeatureMOVBE,
+ FeatureLZCNT,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureFMA,
+ FeatureRTM,
+ FeatureHLE,
+ FeatureSlowIncDec
+ ]>;
+def : HaswellProc<"haswell">;
+def : HaswellProc<"core-avx2">; // Legacy alias.
+
+class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
+ FeatureAVX2,
+ FeatureCMPXCHG16B,
+ FeatureFastUAMem,
+ FeatureVectorUAMem,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase,
+ FeatureMOVBE,
+ FeatureLZCNT,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureFMA,
+ FeatureRTM,
+ FeatureHLE,
+ FeatureADX,
+ FeatureRDSEED,
+ FeatureSMAP,
+ FeatureSlowIncDec
+ ]>;
+def : BroadwellProc<"broadwell">;
+
// FIXME: define KNL model
-def : ProcessorModel<"knl", HaswellModel,
+class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,
[FeatureAVX512, FeatureERI, FeatureCDI, FeaturePFI,
FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT,
FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
- FeatureSlowIncDec]>;
+ FeatureSlowIncDec, FeatureVectorUAMem]>;
+def : KnightsLandingProc<"knl">;
-// SKX
// FIXME: define SKX model
-def : ProcessorModel<"skx", HaswellModel,
+class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel,
[FeatureAVX512, FeatureCDI,
FeatureDQI, FeatureBWI, FeatureVLX,
FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT,
FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
- FeatureSlowIncDec]>;
+ FeatureSlowIncDec, FeatureSGX, FeatureVectorUAMem]>;
+def : SkylakeProc<"skylake">;
+def : SkylakeProc<"skx">; // Legacy alias.
+
+
+// AMD CPUs.
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [Feature3DNow]>;
@@ -302,7 +413,7 @@ def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem,
FeatureSlowSHLD]>;
def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem,
+def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem,
FeatureSlowSHLD]>;
def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem,
FeatureSlowSHLD]>;
@@ -326,39 +437,52 @@ def : Proc<"amdfam10", [FeatureSSE4A,
Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
FeaturePOPCNT, FeatureSlowBTMem,
FeatureSlowSHLD]>;
+def : Proc<"barcelona", [FeatureSSE4A,
+ Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
+ FeaturePOPCNT, FeatureSlowBTMem,
+ FeatureSlowSHLD]>;
// Bobcat
def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT,
FeatureSlowSHLD]>;
+
// Jaguar
-def : Proc<"btver2", [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B,
- FeaturePRFCHW, FeatureAES, FeaturePCLMUL,
- FeatureBMI, FeatureF16C, FeatureMOVBE,
- FeatureLZCNT, FeaturePOPCNT, FeatureSlowSHLD]>;
+def : ProcessorModel<"btver2", BtVer2Model,
+ [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B,
+ FeaturePRFCHW, FeatureAES, FeaturePCLMUL,
+ FeatureBMI, FeatureF16C, FeatureMOVBE,
+ FeatureLZCNT, FeaturePOPCNT, FeatureFastUAMem,
+ FeatureSlowSHLD, FeatureUseSqrtEst, FeatureUseRecipEst]>;
+
+// TODO: We should probably add 'FeatureFastUAMem' to all of the AMD chips.
+
// Bulldozer
def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
- FeatureLZCNT, FeaturePOPCNT, FeatureSlowSHLD]>;
+ FeatureAVX, FeatureSSE4A, FeatureLZCNT,
+ FeaturePOPCNT, FeatureSlowSHLD]>;
// Piledriver
def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
- FeatureF16C, FeatureLZCNT,
- FeaturePOPCNT, FeatureBMI, FeatureTBM,
- FeatureFMA, FeatureSlowSHLD]>;
+ FeatureAVX, FeatureSSE4A, FeatureF16C,
+ FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
+ FeatureTBM, FeatureFMA, FeatureSlowSHLD]>;
// Steamroller
def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
- FeatureF16C, FeatureLZCNT,
- FeaturePOPCNT, FeatureBMI, FeatureTBM,
- FeatureFMA, FeatureFSGSBase]>;
+ FeatureAVX, FeatureSSE4A, FeatureF16C,
+ FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
+ FeatureTBM, FeatureFMA, FeatureSlowSHLD,
+ FeatureFSGSBase]>;
// Excavator
def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4,
FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW,
FeaturePCLMUL, FeatureF16C, FeatureLZCNT,
FeaturePOPCNT, FeatureBMI, FeatureBMI2,
- FeatureTBM, FeatureFMA, FeatureFSGSBase]>;
+ FeatureTBM, FeatureFMA, FeatureSSE4A,
+ FeatureFSGSBase]>;
def : Proc<"geode", [Feature3DNowA]>;
@@ -371,7 +495,7 @@ def : Proc<"c3-2", [FeatureSSE1]>;
// be good for modern chips without enabling instruction set encodings past the
// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
// modern 64-bit x86 chip, and enables features that are generally beneficial.
-//
+//
// We currently use the Sandy Bridge model as the default scheduling model as
// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
// covers a huge swath of x86 processors. If there are specific scheduling
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 57c7a62bd5c1..14a0f38df668 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -47,6 +47,8 @@ using namespace llvm;
/// runOnMachineFunction - Emit the function body.
///
bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ SMShadowTracker.startFunction(MF);
+
SetupMachineFunction(MF);
if (Subtarget->isTargetCOFF()) {
@@ -503,7 +505,7 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
- if (Subtarget->isTargetMacho())
+ if (Subtarget->isTargetMachO())
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
if (Subtarget->isTargetCOFF()) {
@@ -556,14 +558,16 @@ MCSymbol *X86AsmPrinter::GetCPISymbol(unsigned CPID) const {
const MachineConstantPoolEntry &CPE =
MF->getConstantPool()->getConstants()[CPID];
if (!CPE.isMachineConstantPoolEntry()) {
- SectionKind Kind = CPE.getSectionKind(TM.getDataLayout());
+ SectionKind Kind =
+ CPE.getSectionKind(TM.getSubtargetImpl()->getDataLayout());
const Constant *C = CPE.Val.ConstVal;
- const MCSectionCOFF *S = cast<MCSectionCOFF>(
- getObjFileLowering().getSectionForConstant(Kind, C));
- if (MCSymbol *Sym = S->getCOMDATSymbol()) {
- if (Sym->isUndefined())
- OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
- return Sym;
+ if (const MCSectionCOFF *S = dyn_cast<MCSectionCOFF>(
+ getObjFileLowering().getSectionForConstant(Kind, C))) {
+ if (MCSymbol *Sym = S->getCOMDATSymbol()) {
+ if (Sym->isUndefined())
+ OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+ return Sym;
+ }
}
}
}
@@ -599,10 +603,10 @@ void X86AsmPrinter::GenerateExportDirective(const MCSymbol *Sym, bool IsData) {
}
void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
- if (Subtarget->isTargetMacho()) {
+ if (Subtarget->isTargetMachO()) {
// All darwin targets use mach-o.
MachineModuleInfoMachO &MMIMacho =
- MMI->getObjFileInfo<MachineModuleInfoMachO>();
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
// Output stubs for dynamically-linked functions.
MachineModuleInfoMachO::SymbolListTy Stubs;
@@ -725,7 +729,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
if (!Stubs.empty()) {
OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
for (const auto &Stub : Stubs) {
OutStreamer.EmitLabel(Stub.first);
@@ -734,6 +738,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
}
Stubs.clear();
}
+
+ SM.serializeToStackMapSection();
}
}
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index b1bbe8e41cc0..748b948d212f 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -7,14 +7,19 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86ASMPRINTER_H
-#define X86ASMPRINTER_H
+#ifndef LLVM_LIB_TARGET_X86_X86ASMPRINTER_H
+#define LLVM_LIB_TARGET_X86_X86ASMPRINTER_H
#include "X86Subtarget.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/Target/TargetMachine.h"
+// Implemented in X86MCInstLower.cpp
+namespace {
+ class X86MCInstLower;
+}
+
namespace llvm {
class MCStreamer;
class MCSymbol;
@@ -25,9 +30,63 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
void GenerateExportDirective(const MCSymbol *Sym, bool IsData);
+ // This utility class tracks the length of a stackmap instruction's 'shadow'.
+ // It is used by the X86AsmPrinter to ensure that the stackmap shadow
+ // invariants (i.e. no other stackmaps, patchpoints, or control flow within
+ // the shadow) are met, while outputting a minimal number of NOPs for padding.
+ //
+ // To minimise the number of NOPs used, the shadow tracker counts the number
+ // of instruction bytes output since the last stackmap. Only if there are too
+ // few instruction bytes to cover the shadow are NOPs used for padding.
+ class StackMapShadowTracker {
+ public:
+ StackMapShadowTracker(TargetMachine &TM);
+ ~StackMapShadowTracker();
+ void startFunction(MachineFunction &MF);
+ void count(MCInst &Inst, const MCSubtargetInfo &STI);
+
+ // Called to signal the start of a shadow of RequiredSize bytes.
+ void reset(unsigned RequiredSize) {
+ RequiredShadowSize = RequiredSize;
+ CurrentShadowSize = 0;
+ InShadow = true;
+ }
+
+ // Called before every stackmap/patchpoint, and at the end of basic blocks,
+ // to emit any necessary padding-NOPs.
+ void emitShadowPadding(MCStreamer &OutStreamer, const MCSubtargetInfo &STI);
+ private:
+ TargetMachine &TM;
+ std::unique_ptr<MCCodeEmitter> CodeEmitter;
+ bool InShadow;
+
+ // RequiredShadowSize holds the length of the shadow specified in the most
+ // recently encountered STACKMAP instruction.
+ // CurrentShadowSize counts the number of bytes encoded since the most
+ // recently encountered STACKMAP, stopping when that number is greater than
+ // or equal to RequiredShadowSize.
+ unsigned RequiredShadowSize, CurrentShadowSize;
+ };
+
+ StackMapShadowTracker SMShadowTracker;
+
+ // All instructions emitted by the X86AsmPrinter should use this helper
+ // method.
+ //
+ // This helper function invokes the SMShadowTracker on each instruction before
+ // outputting it to the OutStream. This allows the shadow tracker to minimise
+ // the number of NOPs used for stackmap padding.
+ void EmitAndCountInstruction(MCInst &Inst);
+
+ void InsertStackMapShadows(MachineFunction &MF);
+ void LowerSTACKMAP(const MachineInstr &MI);
+ void LowerPATCHPOINT(const MachineInstr &MI);
+
+ void LowerTlsAddr(X86MCInstLower &MCInstLowering, const MachineInstr &MI);
+
public:
explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer), SM(*this) {
+ : AsmPrinter(TM, Streamer), SM(*this), SMShadowTracker(TM) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
}
@@ -43,6 +102,10 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
void EmitInstruction(const MachineInstr *MI) override;
+ void EmitBasicBlockEnd(const MachineBasicBlock &MBB) override {
+ SMShadowTracker.emitShadowPadding(OutStreamer, getSubtargetInfo());
+ }
+
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &OS) override;
@@ -53,6 +116,12 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
/// \brief Return the symbol for the specified constant pool entry.
MCSymbol *GetCPISymbol(unsigned CPID) const override;
+ bool doInitialization(Module &M) override {
+ SMShadowTracker.reset(0);
+ SM.reset();
+ return AsmPrinter::doInitialization(M);
+ }
+
bool runOnMachineFunction(MachineFunction &F) override;
};
diff --git a/lib/Target/X86/X86AtomicExpandPass.cpp b/lib/Target/X86/X86AtomicExpandPass.cpp
deleted file mode 100644
index 3dcadb16760b..000000000000
--- a/lib/Target/X86/X86AtomicExpandPass.cpp
+++ /dev/null
@@ -1,283 +0,0 @@
-//===-- X86AtomicExpandPass.cpp - Expand illegal atomic instructions --0---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a pass (at IR level) to replace atomic instructions which
-// cannot be implemented as a single instruction with cmpxchg-based loops.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86.h"
-#include "X86TargetMachine.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "x86-atomic-expand"
-
-namespace {
- class X86AtomicExpandPass : public FunctionPass {
- const X86TargetMachine *TM;
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit X86AtomicExpandPass(const X86TargetMachine *TM)
- : FunctionPass(ID), TM(TM) {}
-
- bool runOnFunction(Function &F) override;
- bool expandAtomicInsts(Function &F);
-
- bool needsCmpXchgNb(Type *MemType);
-
- /// There are four kinds of atomic operations. Two never need expanding:
- /// cmpxchg is what we expand the others *to*, and loads are easily handled
- /// by ISelLowering. Atomicrmw and store can need expanding in some
- /// circumstances.
- bool shouldExpand(Instruction *Inst);
-
- /// 128-bit atomic stores (64-bit on i686) need to be implemented in terms
- /// of trivial cmpxchg16b loops. A simple store isn't necessarily atomic.
- bool shouldExpandStore(StoreInst *SI);
-
- /// Only some atomicrmw instructions need expanding -- some operations
- /// (e.g. max) have absolutely no architectural support; some (e.g. or) have
- /// limited support but can't return the previous value; some (e.g. add)
- /// have complete support in the instruction set.
- ///
- /// Also, naturally, 128-bit operations always need to be expanded.
- bool shouldExpandAtomicRMW(AtomicRMWInst *AI);
-
- bool expandAtomicRMW(AtomicRMWInst *AI);
- bool expandAtomicStore(StoreInst *SI);
- };
-}
-
-char X86AtomicExpandPass::ID = 0;
-
-FunctionPass *llvm::createX86AtomicExpandPass(const X86TargetMachine *TM) {
- return new X86AtomicExpandPass(TM);
-}
-
-bool X86AtomicExpandPass::runOnFunction(Function &F) {
- SmallVector<Instruction *, 1> AtomicInsts;
-
- // Changing control-flow while iterating through it is a bad idea, so gather a
- // list of all atomic instructions before we start.
- for (BasicBlock &BB : F)
- for (Instruction &Inst : BB) {
- if (isa<AtomicRMWInst>(&Inst) ||
- (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic()))
- AtomicInsts.push_back(&Inst);
- }
-
- bool MadeChange = false;
- for (Instruction *Inst : AtomicInsts) {
- if (!shouldExpand(Inst))
- continue;
-
- if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
- MadeChange |= expandAtomicRMW(AI);
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- MadeChange |= expandAtomicStore(SI);
-
- assert(MadeChange && "Atomic inst not expanded when it should be?");
- Inst->eraseFromParent();
- }
-
- return MadeChange;
-}
-
-/// Returns true if the operand type is 1 step up from the native width, and
-/// the corresponding cmpxchg8b or cmpxchg16b instruction is available
-/// (otherwise we leave them alone to become __sync_fetch_and_... calls).
-bool X86AtomicExpandPass::needsCmpXchgNb(llvm::Type *MemType) {
- const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>();
- unsigned OpWidth = MemType->getPrimitiveSizeInBits();
-
- if (OpWidth == 64)
- return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b
- if (OpWidth == 128)
- return Subtarget.hasCmpxchg16b();
-
- return false;
-}
-
-bool X86AtomicExpandPass::shouldExpandAtomicRMW(AtomicRMWInst *AI) {
- const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>();
- unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
-
- if (needsCmpXchgNb(AI->getType()))
- return true;
-
- if (AI->getType()->getPrimitiveSizeInBits() > NativeWidth)
- return false;
-
- AtomicRMWInst::BinOp Op = AI->getOperation();
- switch (Op) {
- default:
- llvm_unreachable("Unknown atomic operation");
- case AtomicRMWInst::Xchg:
- case AtomicRMWInst::Add:
- case AtomicRMWInst::Sub:
- // It's better to use xadd, xsub or xchg for these in all cases.
- return false;
- case AtomicRMWInst::Or:
- case AtomicRMWInst::And:
- case AtomicRMWInst::Xor:
- // If the atomicrmw's result isn't actually used, we can just add a "lock"
- // prefix to a normal instruction for these operations.
- return !AI->use_empty();
- case AtomicRMWInst::Nand:
- case AtomicRMWInst::Max:
- case AtomicRMWInst::Min:
- case AtomicRMWInst::UMax:
- case AtomicRMWInst::UMin:
- // These always require a non-trivial set of data operations on x86. We must
- // use a cmpxchg loop.
- return true;
- }
-}
-
-bool X86AtomicExpandPass::shouldExpandStore(StoreInst *SI) {
- if (needsCmpXchgNb(SI->getValueOperand()->getType()))
- return true;
-
- return false;
-}
-
-bool X86AtomicExpandPass::shouldExpand(Instruction *Inst) {
- if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
- return shouldExpandAtomicRMW(AI);
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- return shouldExpandStore(SI);
- return false;
-}
-
-/// Emit IR to implement the given atomicrmw operation on values in registers,
-/// returning the new value.
-static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
- Value *Loaded, Value *Inc) {
- Value *NewVal;
- switch (Op) {
- case AtomicRMWInst::Xchg:
- return Inc;
- case AtomicRMWInst::Add:
- return Builder.CreateAdd(Loaded, Inc, "new");
- case AtomicRMWInst::Sub:
- return Builder.CreateSub(Loaded, Inc, "new");
- case AtomicRMWInst::And:
- return Builder.CreateAnd(Loaded, Inc, "new");
- case AtomicRMWInst::Nand:
- return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
- case AtomicRMWInst::Or:
- return Builder.CreateOr(Loaded, Inc, "new");
- case AtomicRMWInst::Xor:
- return Builder.CreateXor(Loaded, Inc, "new");
- case AtomicRMWInst::Max:
- NewVal = Builder.CreateICmpSGT(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- case AtomicRMWInst::Min:
- NewVal = Builder.CreateICmpSLE(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- case AtomicRMWInst::UMax:
- NewVal = Builder.CreateICmpUGT(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- case AtomicRMWInst::UMin:
- NewVal = Builder.CreateICmpULE(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- default:
- break;
- }
- llvm_unreachable("Unknown atomic op");
-}
-
-bool X86AtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) {
- AtomicOrdering Order =
- AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
- Value *Addr = AI->getPointerOperand();
- BasicBlock *BB = AI->getParent();
- Function *F = BB->getParent();
- LLVMContext &Ctx = F->getContext();
-
- // Given: atomicrmw some_op iN* %addr, iN %incr ordering
- //
- // The standard expansion we produce is:
- // [...]
- // %init_loaded = load atomic iN* %addr
- // br label %loop
- // loop:
- // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
- // %new = some_op iN %loaded, %incr
- // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
- // %new_loaded = extractvalue { iN, i1 } %pair, 0
- // %success = extractvalue { iN, i1 } %pair, 1
- // br i1 %success, label %atomicrmw.end, label %loop
- // atomicrmw.end:
- // [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
- BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
-
- // This grabs the DebugLoc from AI.
- IRBuilder<> Builder(AI);
-
- // The split call above "helpfully" added a branch at the end of BB (to the
- // wrong place), but we want a load. It's easiest to just remove
- // the branch entirely.
- std::prev(BB->end())->eraseFromParent();
- Builder.SetInsertPoint(BB);
- LoadInst *InitLoaded = Builder.CreateLoad(Addr);
- InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
- Builder.CreateBr(LoopBB);
-
- // Start the main loop block now that we've taken care of the preliminaries.
- Builder.SetInsertPoint(LoopBB);
- PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
- Loaded->addIncoming(InitLoaded, BB);
-
- Value *NewVal =
- performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
-
- Value *Pair = Builder.CreateAtomicCmpXchg(
- Addr, Loaded, NewVal, Order,
- AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
- Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
- Loaded->addIncoming(NewLoaded, LoopBB);
-
- Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
- Builder.CreateCondBr(Success, ExitBB, LoopBB);
-
- AI->replaceAllUsesWith(NewLoaded);
-
- return true;
-}
-
-bool X86AtomicExpandPass::expandAtomicStore(StoreInst *SI) {
- // An atomic store might need cmpxchg16b (or 8b on x86) to execute. Express
- // this in terms of the usual expansion to "atomicrmw xchg".
- IRBuilder<> Builder(SI);
- AtomicOrdering Order =
- SI->getOrdering() == Unordered ? Monotonic : SI->getOrdering();
- AtomicRMWInst *AI =
- Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
- SI->getValueOperand(), Order);
-
- // Now we have an appropriate swap instruction, lower it as usual.
- if (shouldExpandAtomicRMW(AI)) {
- expandAtomicRMW(AI);
- AI->eraseFromParent();
- return true;
- }
-
- return AI;
-}
diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h
index e76f9fda2db9..0eb2494f1d63 100644
--- a/lib/Target/X86/X86CallingConv.h
+++ b/lib/Target/X86/X86CallingConv.h
@@ -12,14 +12,27 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86CALLINGCONV_H
-#define X86CALLINGCONV_H
+#ifndef LLVM_LIB_TARGET_X86_X86CALLINGCONV_H
+#define LLVM_LIB_TARGET_X86_X86CALLINGCONV_H
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/IR/CallingConv.h"
namespace llvm {
+inline bool CC_X86_32_VectorCallIndirect(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ // Similar to CCPassIndirect, with the addition of inreg.
+ LocVT = MVT::i32;
+ LocInfo = CCValAssign::Indirect;
+ ArgFlags.setInReg();
+ return false; // Continue the search, but now for i32.
+}
+
+
inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
CCState &) {
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 86c01bd64647..75a2ec004685 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -14,7 +14,9 @@
/// CCIfSubtarget - Match if the current subtarget has a feature F.
class CCIfSubtarget<string F, CCAction A>
- : CCIf<!strconcat("State.getTarget().getSubtarget<X86Subtarget>().", F), A>;
+ : CCIf<!strconcat("static_cast<const X86Subtarget&>"
+ "(State.getMachineFunction().getSubtarget()).", F),
+ A>;
//===----------------------------------------------------------------------===//
// Return Value Calling Conventions
@@ -59,20 +61,20 @@ def RetCC_X86Common : CallingConv<[
// MM0, it doesn't support these vector types.
CCIfType<[x86mmx], CCAssignToReg<[MM0]>>,
- // Long double types are always returned in ST0 (even with SSE).
- CCIfType<[f80], CCAssignToReg<[ST0, ST1]>>
+ // Long double types are always returned in FP0 (even with SSE).
+ CCIfType<[f80], CCAssignToReg<[FP0, FP1]>>
]>;
// X86-32 C return-value convention.
def RetCC_X86_32_C : CallingConv<[
- // The X86-32 calling convention returns FP values in ST0, unless marked
+ // The X86-32 calling convention returns FP values in FP0, unless marked
// with "inreg" (used here to distinguish one kind of reg from another,
// weirdly; this is really the sse-regparm calling convention) in which
// case they use XMM0, otherwise it is the same as the common X86 calling
// conv.
CCIfInReg<CCIfSubtarget<"hasSSE2()",
CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
- CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
+ CCIfType<[f32,f64], CCAssignToReg<[FP0, FP1]>>,
CCDelegateTo<RetCC_X86Common>
]>;
@@ -122,6 +124,24 @@ def RetCC_X86_32_HiPE : CallingConv<[
CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX]>>
]>;
+// X86-32 HiPE return-value convention.
+def RetCC_X86_32_VectorCall : CallingConv<[
+ // Vector types are returned in XMM0,XMM1,XMMM2 and XMM3.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
+
+ // 256-bit FP vectors
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
+
+ // 512-bit FP vectors
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,
+
+ // Return integers in the standard way.
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
// X86-64 C return-value convention.
def RetCC_X86_64_C : CallingConv<[
// The X86-64 calling convention always returns FP values in XMM0.
@@ -177,6 +197,7 @@ def RetCC_X86_32 : CallingConv<[
CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
// If HiPE, use RetCC_X86_32_HiPE.
CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
+ CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_32_VectorCall>>,
// Otherwise, use RetCC_X86_32_C.
CCDelegateTo<RetCC_X86_32_C>
@@ -224,6 +245,7 @@ def CC_X86_64_C : CallingConv<[
CCIfType<[i8, i16], CCPromoteToType<i32>>,
// The 'nest' parameter, if any, is passed in R10.
+ CCIfNest<CCIfSubtarget<"isTarget64BitILP32()", CCAssignToReg<[R10D]>>>,
CCIfNest<CCAssignToReg<[R10]>>,
// The first 6 integer arguments are passed in integer registers.
@@ -327,6 +349,25 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[f80], CCAssignToStack<0, 0>>
]>;
+def CC_X86_Win64_VectorCall : CallingConv<[
+ // The first 6 floating point and vector types of 128 bits or less use
+ // XMM0-XMM5.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5]>>,
+
+ // 256-bit vectors use YMM registers.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5]>>,
+
+ // 512-bit vectors use ZMM registers.
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5]>>,
+
+ // Delegate to fastcall to handle integer types.
+ CCDelegateTo<CC_X86_Win64_C>
+]>;
+
+
def CC_X86_64_GHC : CallingConv<[
// Promote i8/i16/i32 arguments to i64.
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
@@ -460,6 +501,30 @@ def CC_X86_32_FastCall : CallingConv<[
CCDelegateTo<CC_X86_32_Common>
]>;
+def CC_X86_32_VectorCall : CallingConv<[
+ // The first 6 floating point and vector types of 128 bits or less use
+ // XMM0-XMM5.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5]>>,
+
+ // 256-bit vectors use YMM registers.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5]>>,
+
+ // 512-bit vectors use ZMM registers.
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5]>>,
+
+ // Otherwise, pass it indirectly.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64,
+ v32i8, v16i16, v8i32, v4i64, v8f32, v4f64,
+ v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCCustom<"CC_X86_32_VectorCallIndirect">>,
+
+ // Delegate to fastcall to handle integer types.
+ CCDelegateTo<CC_X86_32_FastCall>
+]>;
+
def CC_X86_32_ThisCall_Common : CallingConv<[
// The first integer argument is passed in ECX
CCIfType<[i32], CCAssignToReg<[ECX]>>,
@@ -573,6 +638,7 @@ def CC_Intel_OCL_BI : CallingConv<[
// This is the root argument convention for the X86-32 backend.
def CC_X86_32 : CallingConv<[
CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
+ CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_32_VectorCall>>,
CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
@@ -590,6 +656,7 @@ def CC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_X86_64_AnyReg>>,
CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<CC_X86_Win64_C>>,
CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<CC_X86_64_C>>,
+ CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win64_VectorCall>>,
// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
deleted file mode 100644
index a3ae7ee315a3..000000000000
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ /dev/null
@@ -1,1498 +0,0 @@
-//===-- X86CodeEmitter.cpp - Convert X86 code to machine code -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the pass that transforms the X86 machine instructions into
-// relocatable machine code.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86.h"
-#include "X86InstrInfo.h"
-#include "X86JITInfo.h"
-#include "X86Relocations.h"
-#include "X86Subtarget.h"
-#include "X86TargetMachine.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOptions.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "x86-emitter"
-
-STATISTIC(NumEmitted, "Number of machine instructions emitted");
-
-namespace {
- template<class CodeEmitter>
- class Emitter : public MachineFunctionPass {
- const X86InstrInfo *II;
- const DataLayout *TD;
- X86TargetMachine &TM;
- CodeEmitter &MCE;
- MachineModuleInfo *MMI;
- intptr_t PICBaseOffset;
- bool Is64BitMode;
- bool IsPIC;
- public:
- static char ID;
- explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce)
- : MachineFunctionPass(ID), II(nullptr), TD(nullptr), TM(tm),
- MCE(mce), PICBaseOffset(0), Is64BitMode(false),
- IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "X86 Machine Code Emitter";
- }
-
- void emitOpcodePrefix(uint64_t TSFlags, int MemOperand,
- const MachineInstr &MI,
- const MCInstrDesc *Desc) const;
-
- void emitVEXOpcodePrefix(uint64_t TSFlags, int MemOperand,
- const MachineInstr &MI,
- const MCInstrDesc *Desc) const;
-
- void emitSegmentOverridePrefix(uint64_t TSFlags,
- int MemOperand,
- const MachineInstr &MI) const;
-
- void emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc);
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- AU.addRequired<MachineModuleInfo>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- private:
- void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
- void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
- intptr_t Disp = 0, intptr_t PCAdj = 0,
- bool Indirect = false);
- void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
- void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0,
- intptr_t PCAdj = 0);
- void emitJumpTableAddress(unsigned JTI, unsigned Reloc,
- intptr_t PCAdj = 0);
-
- void emitDisplacementField(const MachineOperand *RelocOp, int DispVal,
- intptr_t Adj = 0, bool IsPCRel = true);
-
- void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField);
- void emitRegModRMByte(unsigned RegOpcodeField);
- void emitSIBByte(unsigned SS, unsigned Index, unsigned Base);
- void emitConstant(uint64_t Val, unsigned Size);
-
- void emitMemModRMByte(const MachineInstr &MI,
- unsigned Op, unsigned RegOpcodeField,
- intptr_t PCAdj = 0);
-
- unsigned getX86RegNum(unsigned RegNo) const {
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
- return TRI->getEncodingValue(RegNo) & 0x7;
- }
-
- unsigned char getVEXRegisterEncoding(const MachineInstr &MI,
- unsigned OpNum) const;
- };
-
-template<class CodeEmitter>
- char Emitter<CodeEmitter>::ID = 0;
-} // end anonymous namespace.
-
-/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
-/// to the specified JITCodeEmitter object.
-FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM,
- JITCodeEmitter &JCE) {
- return new Emitter<JITCodeEmitter>(TM, JCE);
-}
-
-template<class CodeEmitter>
-bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
- MMI = &getAnalysis<MachineModuleInfo>();
- MCE.setModuleInfo(MMI);
-
- II = TM.getInstrInfo();
- TD = TM.getDataLayout();
- Is64BitMode = TM.getSubtarget<X86Subtarget>().is64Bit();
- IsPIC = TM.getRelocationModel() == Reloc::PIC_;
-
- do {
- DEBUG(dbgs() << "JITTing function '" << MF.getName() << "'\n");
- MCE.startFunction(MF);
- for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
- MBB != E; ++MBB) {
- MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- const MCInstrDesc &Desc = I->getDesc();
- emitInstruction(*I, &Desc);
- // MOVPC32r is basically a call plus a pop instruction.
- if (Desc.getOpcode() == X86::MOVPC32r)
- emitInstruction(*I, &II->get(X86::POP32r));
- ++NumEmitted; // Keep track of the # of mi's emitted
- }
- }
- } while (MCE.finishFunction(MF));
-
- return false;
-}
-
-/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
-/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
-/// size, and 3) use of X86-64 extended registers.
-static unsigned determineREX(const MachineInstr &MI) {
- unsigned REX = 0;
- const MCInstrDesc &Desc = MI.getDesc();
-
- // Pseudo instructions do not need REX prefix byte.
- if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
- return 0;
- if (Desc.TSFlags & X86II::REX_W)
- REX |= 1 << 3;
-
- unsigned NumOps = Desc.getNumOperands();
- if (NumOps) {
- bool isTwoAddr = NumOps > 1 &&
- Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1;
-
- // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
- unsigned i = isTwoAddr ? 1 : 0;
- for (unsigned e = NumOps; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (MO.isReg()) {
- unsigned Reg = MO.getReg();
- if (X86II::isX86_64NonExtLowByteReg(Reg))
- REX |= 0x40;
- }
- }
-
- switch (Desc.TSFlags & X86II::FormMask) {
- case X86II::MRMSrcReg: {
- if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= 1 << 2;
- i = isTwoAddr ? 2 : 1;
- for (unsigned e = NumOps; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (X86InstrInfo::isX86_64ExtendedReg(MO))
- REX |= 1 << 0;
- }
- break;
- }
- case X86II::MRMSrcMem: {
- if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= 1 << 2;
- unsigned Bit = 0;
- i = isTwoAddr ? 2 : 1;
- for (; i != NumOps; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (MO.isReg()) {
- if (X86InstrInfo::isX86_64ExtendedReg(MO))
- REX |= 1 << Bit;
- Bit++;
- }
- }
- break;
- }
- case X86II::MRMXm:
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m:
- case X86II::MRMDestMem: {
- unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
- i = isTwoAddr ? 1 : 0;
- if (NumOps > e && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e)))
- REX |= 1 << 2;
- unsigned Bit = 0;
- for (; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (MO.isReg()) {
- if (X86InstrInfo::isX86_64ExtendedReg(MO))
- REX |= 1 << Bit;
- Bit++;
- }
- }
- break;
- }
- default: {
- if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= 1 << 0;
- i = isTwoAddr ? 2 : 1;
- for (unsigned e = NumOps; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (X86InstrInfo::isX86_64ExtendedReg(MO))
- REX |= 1 << 2;
- }
- break;
- }
- }
- }
- return REX;
-}
-
-
-/// emitPCRelativeBlockAddress - This method keeps track of the information
-/// necessary to resolve the address of this block later and emits a dummy
-/// value.
-///
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) {
- // Remember where this reference was and where it is to so we can
- // deal with it later.
- MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
- X86::reloc_pcrel_word, MBB));
- MCE.emitWordLE(0);
-}
-
-/// emitGlobalAddress - Emit the specified address to the code stream assuming
-/// this is part of a "take the address of a global" instruction.
-///
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitGlobalAddress(const GlobalValue *GV,
- unsigned Reloc,
- intptr_t Disp /* = 0 */,
- intptr_t PCAdj /* = 0 */,
- bool Indirect /* = false */) {
- intptr_t RelocCST = Disp;
- if (Reloc == X86::reloc_picrel_word)
- RelocCST = PICBaseOffset;
- else if (Reloc == X86::reloc_pcrel_word)
- RelocCST = PCAdj;
- MachineRelocation MR = Indirect
- ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
- const_cast<GlobalValue *>(GV),
- RelocCST, false)
- : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
- const_cast<GlobalValue *>(GV), RelocCST, false);
- MCE.addRelocation(MR);
- // The relocated value will be added to the displacement
- if (Reloc == X86::reloc_absolute_dword)
- MCE.emitDWordLE(Disp);
- else
- MCE.emitWordLE((int32_t)Disp);
-}
-
-/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
-/// be emitted to the current location in the function, and allow it to be PC
-/// relative.
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES,
- unsigned Reloc) {
- intptr_t RelocCST = (Reloc == X86::reloc_picrel_word) ? PICBaseOffset : 0;
-
- // X86 never needs stubs because instruction selection will always pick
- // an instruction sequence that is large enough to hold any address
- // to a symbol.
- // (see X86ISelLowering.cpp, near 2039: X86TargetLowering::LowerCall)
- bool NeedStub = false;
- MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
- Reloc, ES, RelocCST,
- 0, NeedStub));
- if (Reloc == X86::reloc_absolute_dword)
- MCE.emitDWordLE(0);
- else
- MCE.emitWordLE(0);
-}
-
-/// emitConstPoolAddress - Arrange for the address of an constant pool
-/// to be emitted to the current location in the function, and allow it to be PC
-/// relative.
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI, unsigned Reloc,
- intptr_t Disp /* = 0 */,
- intptr_t PCAdj /* = 0 */) {
- intptr_t RelocCST = 0;
- if (Reloc == X86::reloc_picrel_word)
- RelocCST = PICBaseOffset;
- else if (Reloc == X86::reloc_pcrel_word)
- RelocCST = PCAdj;
- MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
- Reloc, CPI, RelocCST));
- // The relocated value will be added to the displacement
- if (Reloc == X86::reloc_absolute_dword)
- MCE.emitDWordLE(Disp);
- else
- MCE.emitWordLE((int32_t)Disp);
-}
-
-/// emitJumpTableAddress - Arrange for the address of a jump table to
-/// be emitted to the current location in the function, and allow it to be PC
-/// relative.
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTI, unsigned Reloc,
- intptr_t PCAdj /* = 0 */) {
- intptr_t RelocCST = 0;
- if (Reloc == X86::reloc_picrel_word)
- RelocCST = PICBaseOffset;
- else if (Reloc == X86::reloc_pcrel_word)
- RelocCST = PCAdj;
- MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
- Reloc, JTI, RelocCST));
- // The relocated value will be added to the displacement
- if (Reloc == X86::reloc_absolute_dword)
- MCE.emitDWordLE(0);
- else
- MCE.emitWordLE(0);
-}
-
-inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
- unsigned RM) {
- assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
- return RM | (RegOpcode << 3) | (Mod << 6);
-}
-
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitRegModRMByte(unsigned ModRMReg,
- unsigned RegOpcodeFld){
- MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)));
-}
-
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitRegModRMByte(unsigned RegOpcodeFld) {
- MCE.emitByte(ModRMByte(3, RegOpcodeFld, 0));
-}
-
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitSIBByte(unsigned SS,
- unsigned Index,
- unsigned Base) {
- // SIB byte is in the same format as the ModRMByte...
- MCE.emitByte(ModRMByte(SS, Index, Base));
-}
-
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitConstant(uint64_t Val, unsigned Size) {
- // Output the constant in little endian byte order...
- for (unsigned i = 0; i != Size; ++i) {
- MCE.emitByte(Val & 255);
- Val >>= 8;
- }
-}
-
-/// isDisp8 - Return true if this signed displacement fits in a 8-bit
-/// sign-extended field.
-static bool isDisp8(int Value) {
- return Value == (signed char)Value;
-}
-
-static bool gvNeedsNonLazyPtr(const MachineOperand &GVOp,
- const TargetMachine &TM) {
- // For Darwin-64, simulate the linktime GOT by using the same non-lazy-pointer
- // mechanism as 32-bit mode.
- if (TM.getSubtarget<X86Subtarget>().is64Bit() &&
- !TM.getSubtarget<X86Subtarget>().isTargetDarwin())
- return false;
-
- // Return true if this is a reference to a stub containing the address of the
- // global, not the global itself.
- return isGlobalStubReference(GVOp.getTargetFlags());
-}
-
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
- int DispVal,
- intptr_t Adj /* = 0 */,
- bool IsPCRel /* = true */) {
- // If this is a simple integer displacement that doesn't require a relocation,
- // emit it now.
- if (!RelocOp) {
- emitConstant(DispVal, 4);
- return;
- }
-
- // Otherwise, this is something that requires a relocation. Emit it as such
- // now.
- unsigned RelocType = Is64BitMode ?
- (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext)
- : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
- if (RelocOp->isGlobal()) {
- // In 64-bit static small code model, we could potentially emit absolute.
- // But it's probably not beneficial. If the MCE supports using RIP directly
- // do it, otherwise fallback to absolute (this is determined by IsPCRel).
- // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative
- // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute
- bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM);
- emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(),
- Adj, Indirect);
- } else if (RelocOp->isSymbol()) {
- emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType);
- } else if (RelocOp->isCPI()) {
- emitConstPoolAddress(RelocOp->getIndex(), RelocType,
- RelocOp->getOffset(), Adj);
- } else {
- assert(RelocOp->isJTI() && "Unexpected machine operand!");
- emitJumpTableAddress(RelocOp->getIndex(), RelocType, Adj);
- }
-}
-
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
- unsigned Op,unsigned RegOpcodeField,
- intptr_t PCAdj) {
- const MachineOperand &Op3 = MI.getOperand(Op+3);
- int DispVal = 0;
- const MachineOperand *DispForReloc = nullptr;
-
- // Figure out what sort of displacement we have to handle here.
- if (Op3.isGlobal()) {
- DispForReloc = &Op3;
- } else if (Op3.isSymbol()) {
- DispForReloc = &Op3;
- } else if (Op3.isCPI()) {
- if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
- DispForReloc = &Op3;
- } else {
- DispVal += MCE.getConstantPoolEntryAddress(Op3.getIndex());
- DispVal += Op3.getOffset();
- }
- } else if (Op3.isJTI()) {
- if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
- DispForReloc = &Op3;
- } else {
- DispVal += MCE.getJumpTableEntryAddress(Op3.getIndex());
- }
- } else {
- DispVal = Op3.getImm();
- }
-
- const MachineOperand &Base = MI.getOperand(Op);
- const MachineOperand &Scale = MI.getOperand(Op+1);
- const MachineOperand &IndexReg = MI.getOperand(Op+2);
-
- unsigned BaseReg = Base.getReg();
-
- // Handle %rip relative addressing.
- if (BaseReg == X86::RIP ||
- (Is64BitMode && DispForReloc)) { // [disp32+RIP] in X86-64 mode
- assert(IndexReg.getReg() == 0 && Is64BitMode &&
- "Invalid rip-relative address");
- MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
- emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
- return;
- }
-
- // Indicate that the displacement will use an pcrel or absolute reference
- // by default. MCEs able to resolve addresses on-the-fly use pcrel by default
- // while others, unless explicit asked to use RIP, use absolute references.
- bool IsPCRel = MCE.earlyResolveAddresses() ? true : false;
-
- // Is a SIB byte needed?
- // If no BaseReg, issue a RIP relative instruction only if the MCE can
- // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
- // 2-7) and absolute references.
- unsigned BaseRegNo = -1U;
- if (BaseReg != 0 && BaseReg != X86::RIP)
- BaseRegNo = getX86RegNum(BaseReg);
-
- if (// The SIB byte must be used if there is an index register.
- IndexReg.getReg() == 0 &&
- // The SIB byte must be used if the base is ESP/RSP/R12, all of which
- // encode to an R/M value of 4, which indicates that a SIB byte is
- // present.
- BaseRegNo != N86::ESP &&
- // If there is no base register and we're in 64-bit mode, we need a SIB
- // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
- (!Is64BitMode || BaseReg != 0)) {
- if (BaseReg == 0 || // [disp32] in X86-32 mode
- BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode
- MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
- emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
- return;
- }
-
- // If the base is not EBP/ESP and there is no displacement, use simple
- // indirect register encoding, this handles addresses like [EAX]. The
- // encoding for [EBP] with no displacement means [disp32] so we handle it
- // by emitting a displacement of 0 below.
- if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
- MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo));
- return;
- }
-
- // Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
- if (!DispForReloc && isDisp8(DispVal)) {
- MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo));
- emitConstant(DispVal, 1);
- return;
- }
-
- // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
- MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
- emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
- return;
- }
-
- // Otherwise we need a SIB byte, so start by outputting the ModR/M byte first.
- assert(IndexReg.getReg() != X86::ESP &&
- IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
-
- bool ForceDisp32 = false;
- bool ForceDisp8 = false;
- if (BaseReg == 0) {
- // If there is no base register, we emit the special case SIB byte with
- // MOD=0, BASE=4, to JUST get the index, scale, and displacement.
- MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
- ForceDisp32 = true;
- } else if (DispForReloc) {
- // Emit the normal disp32 encoding.
- MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
- ForceDisp32 = true;
- } else if (DispVal == 0 && BaseRegNo != N86::EBP) {
- // Emit no displacement ModR/M byte
- MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
- } else if (isDisp8(DispVal)) {
- // Emit the disp8 encoding...
- MCE.emitByte(ModRMByte(1, RegOpcodeField, 4));
- ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
- } else {
- // Emit the normal disp32 encoding...
- MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
- }
-
- // Calculate what the SS field value should be...
- static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 };
- unsigned SS = SSTable[Scale.getImm()];
-
- if (BaseReg == 0) {
- // Handle the SIB byte for the case where there is no base, see Intel
- // Manual 2A, table 2-7. The displacement has already been output.
- unsigned IndexRegNo;
- if (IndexReg.getReg())
- IndexRegNo = getX86RegNum(IndexReg.getReg());
- else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
- IndexRegNo = 4;
- emitSIBByte(SS, IndexRegNo, 5);
- } else {
- unsigned BaseRegNo = getX86RegNum(BaseReg);
- unsigned IndexRegNo;
- if (IndexReg.getReg())
- IndexRegNo = getX86RegNum(IndexReg.getReg());
- else
- IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
- emitSIBByte(SS, IndexRegNo, BaseRegNo);
- }
-
- // Do we need to output a displacement?
- if (ForceDisp8) {
- emitConstant(DispVal, 1);
- } else if (DispVal != 0 || ForceDisp32) {
- emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
- }
-}
-
-static const MCInstrDesc *UpdateOp(MachineInstr &MI, const X86InstrInfo *II,
- unsigned Opcode) {
- const MCInstrDesc *Desc = &II->get(Opcode);
- MI.setDesc(*Desc);
- return Desc;
-}
-
-/// Is16BitMemOperand - Return true if the specified instruction has
-/// a 16-bit memory operand. Op specifies the operand # of the memoperand.
-static bool Is16BitMemOperand(const MachineInstr &MI, unsigned Op) {
- const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
- const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
-
- if ((BaseReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) ||
- (IndexReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg())))
- return true;
- return false;
-}
-
-/// Is32BitMemOperand - Return true if the specified instruction has
-/// a 32-bit memory operand. Op specifies the operand # of the memoperand.
-static bool Is32BitMemOperand(const MachineInstr &MI, unsigned Op) {
- const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
- const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
-
- if ((BaseReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg.getReg())) ||
- (IndexReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg.getReg())))
- return true;
- return false;
-}
-
-/// Is64BitMemOperand - Return true if the specified instruction has
-/// a 64-bit memory operand. Op specifies the operand # of the memoperand.
-#ifndef NDEBUG
-static bool Is64BitMemOperand(const MachineInstr &MI, unsigned Op) {
- const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
- const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
-
- if ((BaseReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) ||
- (IndexReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg())))
- return true;
- return false;
-}
-#endif
-
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitOpcodePrefix(uint64_t TSFlags,
- int MemOperand,
- const MachineInstr &MI,
- const MCInstrDesc *Desc) const {
- // Emit the operand size opcode prefix as needed.
- if (((TSFlags & X86II::OpSizeMask) >> X86II::OpSizeShift) == X86II::OpSize16)
- MCE.emitByte(0x66);
-
- switch (Desc->TSFlags & X86II::OpPrefixMask) {
- case X86II::PD: // 66
- MCE.emitByte(0x66);
- break;
- case X86II::XS: // F3
- MCE.emitByte(0xF3);
- break;
- case X86II::XD: // F2
- MCE.emitByte(0xF2);
- break;
- }
-
- // Handle REX prefix.
- if (Is64BitMode) {
- if (unsigned REX = determineREX(MI))
- MCE.emitByte(0x40 | REX);
- }
-
- // 0x0F escape code must be emitted just before the opcode.
- switch (Desc->TSFlags & X86II::OpMapMask) {
- case X86II::TB: // Two-byte opcode map
- case X86II::T8: // 0F 38
- case X86II::TA: // 0F 3A
- MCE.emitByte(0x0F);
- break;
- }
-
- switch (Desc->TSFlags & X86II::OpMapMask) {
- case X86II::T8: // 0F 38
- MCE.emitByte(0x38);
- break;
- case X86II::TA: // 0F 3A
- MCE.emitByte(0x3A);
- break;
- }
-}
-
-// On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
-// 0-7 and the difference between the 2 groups is given by the REX prefix.
-// In the VEX prefix, registers are seen sequencially from 0-15 and encoded
-// in 1's complement form, example:
-//
-// ModRM field => XMM9 => 1
-// VEX.VVVV => XMM9 => ~9
-//
-// See table 4-35 of Intel AVX Programming Reference for details.
-template<class CodeEmitter>
-unsigned char
-Emitter<CodeEmitter>::getVEXRegisterEncoding(const MachineInstr &MI,
- unsigned OpNum) const {
- unsigned SrcReg = MI.getOperand(OpNum).getReg();
- unsigned SrcRegNum = getX86RegNum(MI.getOperand(OpNum).getReg());
- if (X86II::isX86_64ExtendedReg(SrcReg))
- SrcRegNum |= 8;
-
- // The registers represented through VEX_VVVV should
- // be encoded in 1's complement form.
- return (~SrcRegNum) & 0xf;
-}
-
-/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitSegmentOverridePrefix(uint64_t TSFlags,
- int MemOperand,
- const MachineInstr &MI) const {
- if (MemOperand < 0)
- return; // No memory operand
-
- // Check for explicit segment override on memory operand.
- switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) {
- default: llvm_unreachable("Unknown segment register!");
- case 0: break;
- case X86::CS: MCE.emitByte(0x2E); break;
- case X86::SS: MCE.emitByte(0x36); break;
- case X86::DS: MCE.emitByte(0x3E); break;
- case X86::ES: MCE.emitByte(0x26); break;
- case X86::FS: MCE.emitByte(0x64); break;
- case X86::GS: MCE.emitByte(0x65); break;
- }
-}
-
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
- int MemOperand,
- const MachineInstr &MI,
- const MCInstrDesc *Desc) const {
- unsigned char Encoding = (TSFlags & X86II::EncodingMask) >>
- X86II::EncodingShift;
- bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
- bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
- bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
-
- // VEX_R: opcode externsion equivalent to REX.R in
- // 1's complement (inverted) form
- //
- // 1: Same as REX_R=0 (must be 1 in 32-bit mode)
- // 0: Same as REX_R=1 (64 bit mode only)
- //
- unsigned char VEX_R = 0x1;
-
- // VEX_X: equivalent to REX.X, only used when a
- // register is used for index in SIB Byte.
- //
- // 1: Same as REX.X=0 (must be 1 in 32-bit mode)
- // 0: Same as REX.X=1 (64-bit mode only)
- unsigned char VEX_X = 0x1;
-
- // VEX_B:
- //
- // 1: Same as REX_B=0 (ignored in 32-bit mode)
- // 0: Same as REX_B=1 (64 bit mode only)
- //
- unsigned char VEX_B = 0x1;
-
- // VEX_W: opcode specific (use like REX.W, or used for
- // opcode extension, or ignored, depending on the opcode byte)
- unsigned char VEX_W = 0;
-
- // VEX_5M (VEX m-mmmmm field):
- //
- // 0b00000: Reserved for future use
- // 0b00001: implied 0F leading opcode
- // 0b00010: implied 0F 38 leading opcode bytes
- // 0b00011: implied 0F 3A leading opcode bytes
- // 0b00100-0b11111: Reserved for future use
- // 0b01000: XOP map select - 08h instructions with imm byte
- // 0b01001: XOP map select - 09h instructions with no imm byte
- // 0b01010: XOP map select - 0Ah instructions with imm dword
- unsigned char VEX_5M = 0;
-
- // VEX_4V (VEX vvvv field): a register specifier
- // (in 1's complement form) or 1111 if unused.
- unsigned char VEX_4V = 0xf;
-
- // VEX_L (Vector Length):
- //
- // 0: scalar or 128-bit vector
- // 1: 256-bit vector
- //
- unsigned char VEX_L = 0;
-
- // VEX_PP: opcode extension providing equivalent
- // functionality of a SIMD prefix
- //
- // 0b00: None
- // 0b01: 66
- // 0b10: F3
- // 0b11: F2
- //
- unsigned char VEX_PP = 0;
-
- if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
- VEX_W = 1;
-
- if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
- VEX_L = 1;
-
- switch (TSFlags & X86II::OpPrefixMask) {
- default: break; // VEX_PP already correct
- case X86II::PD: VEX_PP = 0x1; break; // 66
- case X86II::XS: VEX_PP = 0x2; break; // F3
- case X86II::XD: VEX_PP = 0x3; break; // F2
- }
-
- switch (TSFlags & X86II::OpMapMask) {
- default: llvm_unreachable("Invalid prefix!");
- case X86II::TB: VEX_5M = 0x1; break; // 0F
- case X86II::T8: VEX_5M = 0x2; break; // 0F 38
- case X86II::TA: VEX_5M = 0x3; break; // 0F 3A
- case X86II::XOP8: VEX_5M = 0x8; break;
- case X86II::XOP9: VEX_5M = 0x9; break;
- case X86II::XOPA: VEX_5M = 0xA; break;
- }
-
- // Classify VEX_B, VEX_4V, VEX_R, VEX_X
- unsigned NumOps = Desc->getNumOperands();
- unsigned CurOp = 0;
- if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0)
- ++CurOp;
- else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) {
- assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
- // Special case for GATHER with 2 TIED_TO operands
- // Skip the first 2 operands: dst, mask_wb
- CurOp += 2;
- }
-
- switch (TSFlags & X86II::FormMask) {
- default: llvm_unreachable("Unexpected form in emitVEXOpcodePrefix!");
- case X86II::RawFrm:
- break;
- case X86II::MRMDestMem: {
- // MRMDestMem instructions forms:
- // MemAddr, src1(ModR/M)
- // MemAddr, src1(VEX_4V), src2(ModR/M)
- // MemAddr, src1(ModR/M), imm8
- //
- if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg()))
- VEX_B = 0x0;
- if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg()))
- VEX_X = 0x0;
-
- CurOp = X86::AddrNumOperands;
- if (HasVEX_4V)
- VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
-
- const MachineOperand &MO = MI.getOperand(CurOp);
- if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg()))
- VEX_R = 0x0;
- break;
- }
- case X86II::MRMSrcMem:
- // MRMSrcMem instructions forms:
- // src1(ModR/M), MemAddr
- // src1(ModR/M), src2(VEX_4V), MemAddr
- // src1(ModR/M), MemAddr, imm8
- // src1(ModR/M), MemAddr, src2(VEX_I8IMM)
- //
- // FMA4:
- // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
- // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
- if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
- VEX_R = 0x0;
- CurOp++;
-
- if (HasVEX_4V) {
- VEX_4V = getVEXRegisterEncoding(MI, CurOp);
- CurOp++;
- }
-
- if (X86II::isX86_64ExtendedReg(
- MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
- VEX_B = 0x0;
- if (X86II::isX86_64ExtendedReg(
- MI.getOperand(MemOperand+X86::AddrIndexReg).getReg()))
- VEX_X = 0x0;
-
- if (HasVEX_4VOp3)
- VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands);
- break;
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m: {
- // MRM[0-9]m instructions forms:
- // MemAddr
- // src1(VEX_4V), MemAddr
- if (HasVEX_4V)
- VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
-
- if (X86II::isX86_64ExtendedReg(
- MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
- VEX_B = 0x0;
- if (X86II::isX86_64ExtendedReg(
- MI.getOperand(MemOperand+X86::AddrIndexReg).getReg()))
- VEX_X = 0x0;
- break;
- }
- case X86II::MRMSrcReg:
- // MRMSrcReg instructions forms:
- // dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
- // dst(ModR/M), src1(ModR/M)
- // dst(ModR/M), src1(ModR/M), imm8
- //
- if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
- VEX_R = 0x0;
- CurOp++;
-
- if (HasVEX_4V)
- VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
-
- if (HasMemOp4) // Skip second register source (encoded in I8IMM)
- CurOp++;
-
- if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
- VEX_B = 0x0;
- CurOp++;
- if (HasVEX_4VOp3)
- VEX_4V = getVEXRegisterEncoding(MI, CurOp);
- break;
- case X86II::MRMDestReg:
- // MRMDestReg instructions forms:
- // dst(ModR/M), src(ModR/M)
- // dst(ModR/M), src(ModR/M), imm8
- // dst(ModR/M), src1(VEX_4V), src2(ModR/M)
- if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
- VEX_B = 0x0;
- CurOp++;
-
- if (HasVEX_4V)
- VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
-
- if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
- VEX_R = 0x0;
- break;
- case X86II::MRM0r: case X86II::MRM1r:
- case X86II::MRM2r: case X86II::MRM3r:
- case X86II::MRM4r: case X86II::MRM5r:
- case X86II::MRM6r: case X86II::MRM7r:
- // MRM0r-MRM7r instructions forms:
- // dst(VEX_4V), src(ModR/M), imm8
- VEX_4V = getVEXRegisterEncoding(MI, CurOp);
- CurOp++;
-
- if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
- VEX_B = 0x0;
- break;
- }
-
- // Emit segment override opcode prefix as needed.
- emitSegmentOverridePrefix(TSFlags, MemOperand, MI);
-
- // VEX opcode prefix can have 2 or 3 bytes
- //
- // 3 bytes:
- // +-----+ +--------------+ +-------------------+
- // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp |
- // +-----+ +--------------+ +-------------------+
- // 2 bytes:
- // +-----+ +-------------------+
- // | C5h | | R | vvvv | L | pp |
- // +-----+ +-------------------+
- //
- // XOP uses a similar prefix:
- // +-----+ +--------------+ +-------------------+
- // | 8Fh | | RXB | m-mmmm | | W | vvvv | L | pp |
- // +-----+ +--------------+ +-------------------+
- unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
-
- // Can this use the 2 byte VEX prefix?
- if (Encoding == X86II::VEX && VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) {
- MCE.emitByte(0xC5);
- MCE.emitByte(LastByte | (VEX_R << 7));
- return;
- }
-
- // 3 byte VEX prefix
- MCE.emitByte(Encoding == X86II::XOP ? 0x8F : 0xC4);
- MCE.emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M);
- MCE.emitByte(LastByte | (VEX_W << 7));
-}
-
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
- const MCInstrDesc *Desc) {
- DEBUG(dbgs() << MI);
-
- // If this is a pseudo instruction, lower it.
- switch (Desc->getOpcode()) {
- case X86::ADD16rr_DB: Desc = UpdateOp(MI, II, X86::OR16rr); break;
- case X86::ADD32rr_DB: Desc = UpdateOp(MI, II, X86::OR32rr); break;
- case X86::ADD64rr_DB: Desc = UpdateOp(MI, II, X86::OR64rr); break;
- case X86::ADD16ri_DB: Desc = UpdateOp(MI, II, X86::OR16ri); break;
- case X86::ADD32ri_DB: Desc = UpdateOp(MI, II, X86::OR32ri); break;
- case X86::ADD64ri32_DB: Desc = UpdateOp(MI, II, X86::OR64ri32); break;
- case X86::ADD16ri8_DB: Desc = UpdateOp(MI, II, X86::OR16ri8); break;
- case X86::ADD32ri8_DB: Desc = UpdateOp(MI, II, X86::OR32ri8); break;
- case X86::ADD64ri8_DB: Desc = UpdateOp(MI, II, X86::OR64ri8); break;
- case X86::ACQUIRE_MOV8rm: Desc = UpdateOp(MI, II, X86::MOV8rm); break;
- case X86::ACQUIRE_MOV16rm: Desc = UpdateOp(MI, II, X86::MOV16rm); break;
- case X86::ACQUIRE_MOV32rm: Desc = UpdateOp(MI, II, X86::MOV32rm); break;
- case X86::ACQUIRE_MOV64rm: Desc = UpdateOp(MI, II, X86::MOV64rm); break;
- case X86::RELEASE_MOV8mr: Desc = UpdateOp(MI, II, X86::MOV8mr); break;
- case X86::RELEASE_MOV16mr: Desc = UpdateOp(MI, II, X86::MOV16mr); break;
- case X86::RELEASE_MOV32mr: Desc = UpdateOp(MI, II, X86::MOV32mr); break;
- case X86::RELEASE_MOV64mr: Desc = UpdateOp(MI, II, X86::MOV64mr); break;
- }
-
-
- MCE.processDebugLoc(MI.getDebugLoc(), true);
-
- unsigned Opcode = Desc->Opcode;
-
- // If this is a two-address instruction, skip one of the register operands.
- unsigned NumOps = Desc->getNumOperands();
- unsigned CurOp = 0;
- if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0)
- ++CurOp;
- else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) {
- assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
- // Special case for GATHER with 2 TIED_TO operands
- // Skip the first 2 operands: dst, mask_wb
- CurOp += 2;
- }
-
- uint64_t TSFlags = Desc->TSFlags;
-
- // Encoding type for this instruction.
- unsigned char Encoding = (TSFlags & X86II::EncodingMask) >>
- X86II::EncodingShift;
-
- // It uses the VEX.VVVV field?
- bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
- bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
- bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
- const unsigned MemOp4_I8IMMOperand = 2;
-
- // Determine where the memory operand starts, if present.
- int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
- if (MemoryOperand != -1) MemoryOperand += CurOp;
-
- // Emit the lock opcode prefix as needed.
- if (Desc->TSFlags & X86II::LOCK)
- MCE.emitByte(0xF0);
-
- // Emit segment override opcode prefix as needed.
- emitSegmentOverridePrefix(TSFlags, MemoryOperand, MI);
-
- // Emit the repeat opcode prefix as needed.
- if (Desc->TSFlags & X86II::REP)
- MCE.emitByte(0xF3);
-
- // Emit the address size opcode prefix as needed.
- bool need_address_override;
- if (TSFlags & X86II::AdSize) {
- need_address_override = true;
- } else if (MemoryOperand < 0) {
- need_address_override = false;
- } else if (Is64BitMode) {
- assert(!Is16BitMemOperand(MI, MemoryOperand));
- need_address_override = Is32BitMemOperand(MI, MemoryOperand);
- } else {
- assert(!Is64BitMemOperand(MI, MemoryOperand));
- need_address_override = Is16BitMemOperand(MI, MemoryOperand);
- }
-
- if (need_address_override)
- MCE.emitByte(0x67);
-
- if (Encoding == 0)
- emitOpcodePrefix(TSFlags, MemoryOperand, MI, Desc);
- else
- emitVEXOpcodePrefix(TSFlags, MemoryOperand, MI, Desc);
-
- unsigned char BaseOpcode = X86II::getBaseOpcodeFor(Desc->TSFlags);
- switch (TSFlags & X86II::FormMask) {
- default:
- llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!");
- case X86II::Pseudo:
- // Remember the current PC offset, this is the PIC relocation
- // base address.
- switch (Opcode) {
- default:
- llvm_unreachable("pseudo instructions should be removed before code"
- " emission");
- // Do nothing for Int_MemBarrier - it's just a comment. Add a debug
- // to make it slightly easier to see.
- case X86::Int_MemBarrier:
- DEBUG(dbgs() << "#MEMBARRIER\n");
- break;
-
- case TargetOpcode::INLINEASM:
- // We allow inline assembler nodes with empty bodies - they can
- // implicitly define registers, which is ok for JIT.
- if (MI.getOperand(0).getSymbolName()[0]) {
- DebugLoc DL = MI.getDebugLoc();
- DL.print(MI.getParent()->getParent()->getFunction()->getContext(),
- llvm::errs());
- report_fatal_error("JIT does not support inline asm!");
- }
- break;
- case TargetOpcode::DBG_VALUE:
- case TargetOpcode::CFI_INSTRUCTION:
- break;
- case TargetOpcode::GC_LABEL:
- case TargetOpcode::EH_LABEL:
- MCE.emitLabel(MI.getOperand(0).getMCSymbol());
- break;
-
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- break;
-
- case X86::SEH_PushReg:
- case X86::SEH_SaveReg:
- case X86::SEH_SaveXMM:
- case X86::SEH_StackAlloc:
- case X86::SEH_SetFrame:
- case X86::SEH_PushFrame:
- case X86::SEH_EndPrologue:
- break;
-
- case X86::MOVPC32r: {
- // This emits the "call" portion of this pseudo instruction.
- MCE.emitByte(BaseOpcode);
- emitConstant(0, X86II::getSizeOfImm(Desc->TSFlags));
- // Remember PIC base.
- PICBaseOffset = (intptr_t) MCE.getCurrentPCOffset();
- X86JITInfo *JTI = TM.getJITInfo();
- JTI->setPICBase(MCE.getCurrentPCValue());
- break;
- }
- }
- CurOp = NumOps;
- break;
- case X86II::RawFrm: {
- MCE.emitByte(BaseOpcode);
-
- if (CurOp == NumOps)
- break;
-
- const MachineOperand &MO = MI.getOperand(CurOp++);
-
- DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n");
- DEBUG(dbgs() << "isMBB " << MO.isMBB() << "\n");
- DEBUG(dbgs() << "isGlobal " << MO.isGlobal() << "\n");
- DEBUG(dbgs() << "isSymbol " << MO.isSymbol() << "\n");
- DEBUG(dbgs() << "isImm " << MO.isImm() << "\n");
-
- if (MO.isMBB()) {
- emitPCRelativeBlockAddress(MO.getMBB());
- break;
- }
-
- if (MO.isGlobal()) {
- emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
- MO.getOffset(), 0);
- break;
- }
-
- if (MO.isSymbol()) {
- emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word);
- break;
- }
-
- // FIXME: Only used by hackish MCCodeEmitter, remove when dead.
- if (MO.isJTI()) {
- emitJumpTableAddress(MO.getIndex(), X86::reloc_pcrel_word);
- break;
- }
-
- assert(MO.isImm() && "Unknown RawFrm operand!");
- if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) {
- // Fix up immediate operand for pc relative calls.
- intptr_t Imm = (intptr_t)MO.getImm();
- Imm = Imm - MCE.getCurrentPCValue() - 4;
- emitConstant(Imm, X86II::getSizeOfImm(Desc->TSFlags));
- } else
- emitConstant(MO.getImm(), X86II::getSizeOfImm(Desc->TSFlags));
- break;
- }
-
- case X86II::AddRegFrm: {
- MCE.emitByte(BaseOpcode +
- getX86RegNum(MI.getOperand(CurOp++).getReg()));
-
- if (CurOp == NumOps)
- break;
-
- const MachineOperand &MO1 = MI.getOperand(CurOp++);
- unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
- if (MO1.isImm()) {
- emitConstant(MO1.getImm(), Size);
- break;
- }
-
- unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
- : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
- if (Opcode == X86::MOV32ri64)
- rt = X86::reloc_absolute_word; // FIXME: add X86II flag?
- // This should not occur on Darwin for relocatable objects.
- if (Opcode == X86::MOV64ri)
- rt = X86::reloc_absolute_dword; // FIXME: add X86II flag?
- if (MO1.isGlobal()) {
- bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
- emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
- Indirect);
- } else if (MO1.isSymbol())
- emitExternalSymbolAddress(MO1.getSymbolName(), rt);
- else if (MO1.isCPI())
- emitConstPoolAddress(MO1.getIndex(), rt);
- else if (MO1.isJTI())
- emitJumpTableAddress(MO1.getIndex(), rt);
- break;
- }
-
- case X86II::MRMDestReg: {
- MCE.emitByte(BaseOpcode);
-
- unsigned SrcRegNum = CurOp+1;
- if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
- SrcRegNum++;
-
- emitRegModRMByte(MI.getOperand(CurOp).getReg(),
- getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
- CurOp = SrcRegNum + 1;
- break;
- }
- case X86II::MRMDestMem: {
- MCE.emitByte(BaseOpcode);
-
- unsigned SrcRegNum = CurOp + X86::AddrNumOperands;
- if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
- SrcRegNum++;
- emitMemModRMByte(MI, CurOp,
- getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
- CurOp = SrcRegNum + 1;
- break;
- }
-
- case X86II::MRMSrcReg: {
- MCE.emitByte(BaseOpcode);
-
- unsigned SrcRegNum = CurOp+1;
- if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
- ++SrcRegNum;
-
- if (HasMemOp4) // Skip 2nd src (which is encoded in I8IMM)
- ++SrcRegNum;
-
- emitRegModRMByte(MI.getOperand(SrcRegNum).getReg(),
- getX86RegNum(MI.getOperand(CurOp).getReg()));
- // 2 operands skipped with HasMemOp4, compensate accordingly
- CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1;
- if (HasVEX_4VOp3)
- ++CurOp;
- break;
- }
- case X86II::MRMSrcMem: {
- int AddrOperands = X86::AddrNumOperands;
- unsigned FirstMemOp = CurOp+1;
- if (HasVEX_4V) {
- ++AddrOperands;
- ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
- }
- if (HasMemOp4) // Skip second register source (encoded in I8IMM)
- ++FirstMemOp;
-
- MCE.emitByte(BaseOpcode);
-
- intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ?
- X86II::getSizeOfImm(Desc->TSFlags) : 0;
- emitMemModRMByte(MI, FirstMemOp,
- getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj);
- CurOp += AddrOperands + 1;
- if (HasVEX_4VOp3)
- ++CurOp;
- break;
- }
-
- case X86II::MRMXr:
- case X86II::MRM0r: case X86II::MRM1r:
- case X86II::MRM2r: case X86II::MRM3r:
- case X86II::MRM4r: case X86II::MRM5r:
- case X86II::MRM6r: case X86II::MRM7r: {
- if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
- ++CurOp;
- MCE.emitByte(BaseOpcode);
- uint64_t Form = (Desc->TSFlags & X86II::FormMask);
- emitRegModRMByte(MI.getOperand(CurOp++).getReg(),
- (Form == X86II::MRMXr) ? 0 : Form-X86II::MRM0r);
-
- if (CurOp == NumOps)
- break;
-
- const MachineOperand &MO1 = MI.getOperand(CurOp++);
- unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
- if (MO1.isImm()) {
- emitConstant(MO1.getImm(), Size);
- break;
- }
-
- unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
- : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
- if (Opcode == X86::MOV64ri32)
- rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
- if (MO1.isGlobal()) {
- bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
- emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
- Indirect);
- } else if (MO1.isSymbol())
- emitExternalSymbolAddress(MO1.getSymbolName(), rt);
- else if (MO1.isCPI())
- emitConstPoolAddress(MO1.getIndex(), rt);
- else if (MO1.isJTI())
- emitJumpTableAddress(MO1.getIndex(), rt);
- break;
- }
-
- case X86II::MRMXm:
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m: {
- if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
- ++CurOp;
- intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ?
- (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ?
- X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0;
-
- MCE.emitByte(BaseOpcode);
- uint64_t Form = (Desc->TSFlags & X86II::FormMask);
- emitMemModRMByte(MI, CurOp, (Form==X86II::MRMXm) ? 0 : Form - X86II::MRM0m,
- PCAdj);
- CurOp += X86::AddrNumOperands;
-
- if (CurOp == NumOps)
- break;
-
- const MachineOperand &MO = MI.getOperand(CurOp++);
- unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
- if (MO.isImm()) {
- emitConstant(MO.getImm(), Size);
- break;
- }
-
- unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
- : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
- if (Opcode == X86::MOV64mi32)
- rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
- if (MO.isGlobal()) {
- bool Indirect = gvNeedsNonLazyPtr(MO, TM);
- emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
- Indirect);
- } else if (MO.isSymbol())
- emitExternalSymbolAddress(MO.getSymbolName(), rt);
- else if (MO.isCPI())
- emitConstPoolAddress(MO.getIndex(), rt);
- else if (MO.isJTI())
- emitJumpTableAddress(MO.getIndex(), rt);
- break;
- }
-
- case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2:
- case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C8:
- case X86II::MRM_C9: case X86II::MRM_CA: case X86II::MRM_CB:
- case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4:
- case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8:
- case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB:
- case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE:
- case X86II::MRM_DF: case X86II::MRM_E0: case X86II::MRM_E1:
- case X86II::MRM_E2: case X86II::MRM_E3: case X86II::MRM_E4:
- case X86II::MRM_E5: case X86II::MRM_E8: case X86II::MRM_E9:
- case X86II::MRM_EA: case X86II::MRM_EB: case X86II::MRM_EC:
- case X86II::MRM_ED: case X86II::MRM_EE: case X86II::MRM_F0:
- case X86II::MRM_F1: case X86II::MRM_F2: case X86II::MRM_F3:
- case X86II::MRM_F4: case X86II::MRM_F5: case X86II::MRM_F6:
- case X86II::MRM_F7: case X86II::MRM_F8: case X86II::MRM_F9:
- case X86II::MRM_FA: case X86II::MRM_FB: case X86II::MRM_FC:
- case X86II::MRM_FD: case X86II::MRM_FE: case X86II::MRM_FF:
- MCE.emitByte(BaseOpcode);
-
- unsigned char MRM;
- switch (TSFlags & X86II::FormMask) {
- default: llvm_unreachable("Invalid Form");
- case X86II::MRM_C0: MRM = 0xC0; break;
- case X86II::MRM_C1: MRM = 0xC1; break;
- case X86II::MRM_C2: MRM = 0xC2; break;
- case X86II::MRM_C3: MRM = 0xC3; break;
- case X86II::MRM_C4: MRM = 0xC4; break;
- case X86II::MRM_C8: MRM = 0xC8; break;
- case X86II::MRM_C9: MRM = 0xC9; break;
- case X86II::MRM_CA: MRM = 0xCA; break;
- case X86II::MRM_CB: MRM = 0xCB; break;
- case X86II::MRM_D0: MRM = 0xD0; break;
- case X86II::MRM_D1: MRM = 0xD1; break;
- case X86II::MRM_D4: MRM = 0xD4; break;
- case X86II::MRM_D5: MRM = 0xD5; break;
- case X86II::MRM_D6: MRM = 0xD6; break;
- case X86II::MRM_D8: MRM = 0xD8; break;
- case X86II::MRM_D9: MRM = 0xD9; break;
- case X86II::MRM_DA: MRM = 0xDA; break;
- case X86II::MRM_DB: MRM = 0xDB; break;
- case X86II::MRM_DC: MRM = 0xDC; break;
- case X86II::MRM_DD: MRM = 0xDD; break;
- case X86II::MRM_DE: MRM = 0xDE; break;
- case X86II::MRM_DF: MRM = 0xDF; break;
- case X86II::MRM_E0: MRM = 0xE0; break;
- case X86II::MRM_E1: MRM = 0xE1; break;
- case X86II::MRM_E2: MRM = 0xE2; break;
- case X86II::MRM_E3: MRM = 0xE3; break;
- case X86II::MRM_E4: MRM = 0xE4; break;
- case X86II::MRM_E5: MRM = 0xE5; break;
- case X86II::MRM_E8: MRM = 0xE8; break;
- case X86II::MRM_E9: MRM = 0xE9; break;
- case X86II::MRM_EA: MRM = 0xEA; break;
- case X86II::MRM_EB: MRM = 0xEB; break;
- case X86II::MRM_EC: MRM = 0xEC; break;
- case X86II::MRM_ED: MRM = 0xED; break;
- case X86II::MRM_EE: MRM = 0xEE; break;
- case X86II::MRM_F0: MRM = 0xF0; break;
- case X86II::MRM_F1: MRM = 0xF1; break;
- case X86II::MRM_F2: MRM = 0xF2; break;
- case X86II::MRM_F3: MRM = 0xF3; break;
- case X86II::MRM_F4: MRM = 0xF4; break;
- case X86II::MRM_F5: MRM = 0xF5; break;
- case X86II::MRM_F6: MRM = 0xF6; break;
- case X86II::MRM_F7: MRM = 0xF7; break;
- case X86II::MRM_F8: MRM = 0xF8; break;
- case X86II::MRM_F9: MRM = 0xF9; break;
- case X86II::MRM_FA: MRM = 0xFA; break;
- case X86II::MRM_FB: MRM = 0xFB; break;
- case X86II::MRM_FC: MRM = 0xFC; break;
- case X86II::MRM_FD: MRM = 0xFD; break;
- case X86II::MRM_FE: MRM = 0xFE; break;
- case X86II::MRM_FF: MRM = 0xFF; break;
- }
- MCE.emitByte(MRM);
- break;
- }
-
- while (CurOp != NumOps && NumOps - CurOp <= 2) {
- // The last source register of a 4 operand instruction in AVX is encoded
- // in bits[7:4] of a immediate byte.
- if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
- const MachineOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand
- : CurOp);
- ++CurOp;
- unsigned RegNum = getX86RegNum(MO.getReg()) << 4;
- if (X86II::isX86_64ExtendedReg(MO.getReg()))
- RegNum |= 1 << 7;
- // If there is an additional 5th operand it must be an immediate, which
- // is encoded in bits[3:0]
- if (CurOp != NumOps) {
- const MachineOperand &MIMM = MI.getOperand(CurOp++);
- if (MIMM.isImm()) {
- unsigned Val = MIMM.getImm();
- assert(Val < 16 && "Immediate operand value out of range");
- RegNum |= Val;
- }
- }
- emitConstant(RegNum, 1);
- } else {
- emitConstant(MI.getOperand(CurOp++).getImm(),
- X86II::getSizeOfImm(Desc->TSFlags));
- }
- }
-
- if (!MI.isVariadic() && CurOp != NumOps) {
-#ifndef NDEBUG
- dbgs() << "Cannot encode all operands of: " << MI << "\n";
-#endif
- llvm_unreachable(nullptr);
- }
-
- MCE.processDebugLoc(MI.getDebugLoc(), false);
-}
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index ae3eea4943d7..5d71eac7c05a 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -64,7 +64,7 @@ public:
X86ScalarSSEf32 = Subtarget->hasSSE1();
}
- bool TargetSelectInstruction(const Instruction *I) override;
+ bool fastSelectInstruction(const Instruction *I) override;
/// \brief The specified machine instr operand is a vreg, and that
/// vreg is being provided by the specified load instruction. If possible,
@@ -73,9 +73,9 @@ public:
bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) override;
- bool FastLowerArguments() override;
- bool FastLowerCall(CallLoweringInfo &CLI) override;
- bool FastLowerIntrinsicCall(const IntrinsicInst *II) override;
+ bool fastLowerArguments() override;
+ bool fastLowerCall(CallLoweringInfo &CLI) override;
+ bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
#include "X86GenFastISel.inc"
@@ -127,7 +127,7 @@ private:
bool X86SelectFPTrunc(const Instruction *I);
const X86InstrInfo *getInstrInfo() const {
- return getTargetMachine()->getInstrInfo();
+ return getTargetMachine()->getSubtargetImpl()->getInstrInfo();
}
const X86TargetMachine *getTargetMachine() const {
return static_cast<const X86TargetMachine *>(&TM);
@@ -135,11 +135,14 @@ private:
bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
- unsigned TargetMaterializeConstant(const Constant *C) override;
+ unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
+ unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
+ unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
+ unsigned fastMaterializeConstant(const Constant *C) override;
- unsigned TargetMaterializeAlloca(const AllocaInst *C) override;
+ unsigned fastMaterializeAlloca(const AllocaInst *C) override;
- unsigned TargetMaterializeFloatZero(const ConstantFP *CF) override;
+ unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
/// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
/// computed in an SSE register, not on the X87 floating point stack.
@@ -161,46 +164,6 @@ private:
} // end anonymous namespace.
-static CmpInst::Predicate optimizeCmpPredicate(const CmpInst *CI) {
- // If both operands are the same, then try to optimize or fold the cmp.
- CmpInst::Predicate Predicate = CI->getPredicate();
- if (CI->getOperand(0) != CI->getOperand(1))
- return Predicate;
-
- switch (Predicate) {
- default: llvm_unreachable("Invalid predicate!");
- case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break;
- case CmpInst::FCMP_OEQ: Predicate = CmpInst::FCMP_ORD; break;
- case CmpInst::FCMP_OGT: Predicate = CmpInst::FCMP_FALSE; break;
- case CmpInst::FCMP_OGE: Predicate = CmpInst::FCMP_ORD; break;
- case CmpInst::FCMP_OLT: Predicate = CmpInst::FCMP_FALSE; break;
- case CmpInst::FCMP_OLE: Predicate = CmpInst::FCMP_ORD; break;
- case CmpInst::FCMP_ONE: Predicate = CmpInst::FCMP_FALSE; break;
- case CmpInst::FCMP_ORD: Predicate = CmpInst::FCMP_ORD; break;
- case CmpInst::FCMP_UNO: Predicate = CmpInst::FCMP_UNO; break;
- case CmpInst::FCMP_UEQ: Predicate = CmpInst::FCMP_TRUE; break;
- case CmpInst::FCMP_UGT: Predicate = CmpInst::FCMP_UNO; break;
- case CmpInst::FCMP_UGE: Predicate = CmpInst::FCMP_TRUE; break;
- case CmpInst::FCMP_ULT: Predicate = CmpInst::FCMP_UNO; break;
- case CmpInst::FCMP_ULE: Predicate = CmpInst::FCMP_TRUE; break;
- case CmpInst::FCMP_UNE: Predicate = CmpInst::FCMP_UNO; break;
- case CmpInst::FCMP_TRUE: Predicate = CmpInst::FCMP_TRUE; break;
-
- case CmpInst::ICMP_EQ: Predicate = CmpInst::FCMP_TRUE; break;
- case CmpInst::ICMP_NE: Predicate = CmpInst::FCMP_FALSE; break;
- case CmpInst::ICMP_UGT: Predicate = CmpInst::FCMP_FALSE; break;
- case CmpInst::ICMP_UGE: Predicate = CmpInst::FCMP_TRUE; break;
- case CmpInst::ICMP_ULT: Predicate = CmpInst::FCMP_FALSE; break;
- case CmpInst::ICMP_ULE: Predicate = CmpInst::FCMP_TRUE; break;
- case CmpInst::ICMP_SGT: Predicate = CmpInst::FCMP_FALSE; break;
- case CmpInst::ICMP_SGE: Predicate = CmpInst::FCMP_TRUE; break;
- case CmpInst::ICMP_SLT: Predicate = CmpInst::FCMP_FALSE; break;
- case CmpInst::ICMP_SLE: Predicate = CmpInst::FCMP_TRUE; break;
- }
-
- return Predicate;
-}
-
static std::pair<X86::CondCode, bool>
getX86ConditionCode(CmpInst::Predicate Predicate) {
X86::CondCode CC = X86::COND_INVALID;
@@ -529,7 +492,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
unsigned Src, EVT SrcVT,
unsigned &ResultReg) {
- unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+ unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
Src, /*TODO: Kill=*/false);
if (RR == 0)
return false;
@@ -581,7 +544,7 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
// Ok, we need to do a load from a stub. If we've already loaded from
// this stub, reuse the loaded pointer, otherwise emit the load now.
- DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
+ DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
unsigned LoadReg;
if (I != LocalValueMap.end() && I->second != 0) {
LoadReg = I->second;
@@ -692,7 +655,7 @@ redo_gep:
case Instruction::Alloca: {
// Do static allocas.
const AllocaInst *A = cast<AllocaInst>(V);
- DenseMap<const AllocaInst*, int>::iterator SI =
+ DenseMap<const AllocaInst *, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(A);
if (SI != FuncInfo.StaticAllocaMap.end()) {
AM.BaseType = X86AddressMode::FrameIndexBase;
@@ -940,7 +903,7 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
unsigned Alignment = S->getAlignment();
unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
- if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = ABIAlignment;
bool Aligned = Alignment >= ABIAlignment;
@@ -993,8 +956,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
- CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
- I->getContext());
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
const Value *RV = Ret->getOperand(0);
@@ -1017,7 +979,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// The calling-convention tables for x87 returns don't tell
// the whole story.
- if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+ if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
return false;
unsigned SrcReg = Reg + VA.getValNo();
@@ -1036,23 +998,23 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
if (SrcVT == MVT::i1) {
if (Outs[0].Flags.isSExt())
return false;
- SrcReg = FastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
+ SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
SrcVT = MVT::i8;
}
unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
ISD::SIGN_EXTEND;
- SrcReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
+ SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
SrcReg, /*TODO: Kill=*/false);
}
// Make the copy.
unsigned DstReg = VA.getLocReg();
- const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
// Avoid a cross-class copy. This is very unlikely.
if (!SrcRC->contains(DstReg))
return false;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
- DstReg).addReg(SrcReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
// Add register to return instruction.
RetRegs.push_back(VA.getLocReg());
@@ -1068,14 +1030,15 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments()!");
unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
- RetReg).addReg(Reg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
RetRegs.push_back(RetReg);
}
// Now emit the RET.
MachineInstrBuilder MIB =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
MIB.addReg(RetRegs[i], RegState::Implicit);
return true;
@@ -1104,7 +1067,7 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) {
if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg))
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1194,7 +1157,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
ResultReg = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
ResultReg);
- ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
+ ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
X86::sub_8bit);
if (!ResultReg)
return false;
@@ -1209,7 +1172,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
}
if (ResultReg) {
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1250,7 +1213,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
FlagReg2);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
ResultReg).addReg(FlagReg1).addReg(FlagReg2);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1268,7 +1231,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1285,7 +1248,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType());
if (SrcVT.SimpleTy == MVT::i1) {
// Set the high bits to zero.
- ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
+ ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
SrcVT = MVT::i8;
if (ResultReg == 0)
@@ -1312,17 +1275,16 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
ResultReg)
.addImm(0).addReg(Result32).addImm(X86::sub_32bit);
} else if (DstVT != MVT::i8) {
- ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
+ ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
ResultReg, /*Kill=*/true);
if (ResultReg == 0)
return false;
}
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
-
bool X86FastISel::X86SelectBranch(const Instruction *I) {
// Unconditional branches are selected by tablegen-generated code.
// Handle a conditional branch.
@@ -1342,8 +1304,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
switch (Predicate) {
default: break;
- case CmpInst::FCMP_FALSE: FastEmitBranch(FalseMBB, DbgLoc); return true;
- case CmpInst::FCMP_TRUE: FastEmitBranch(TrueMBB, DbgLoc); return true;
+ case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
+ case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true;
}
const Value *CmpLHS = CI->getOperand(0);
@@ -1400,7 +1362,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
// X86 requires a second branch to handle UNE (and OEQ, which is mapped
// to UNE above).
if (NeedExtraBranch) {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_4))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_1))
.addMBB(TrueMBB);
}
@@ -1413,7 +1375,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
// Emits an unconditional branch to the FalseBB, obtains the branch
// weight, and adds it to the successor list.
- FastEmitBranch(FalseMBB, DbgLoc);
+ fastEmitBranch(FalseMBB, DbgLoc);
return true;
}
@@ -1437,15 +1399,15 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
.addReg(OpReg).addImm(1);
- unsigned JmpOpc = X86::JNE_4;
+ unsigned JmpOpc = X86::JNE_1;
if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
std::swap(TrueMBB, FalseMBB);
- JmpOpc = X86::JE_4;
+ JmpOpc = X86::JE_1;
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
.addMBB(TrueMBB);
- FastEmitBranch(FalseMBB, DbgLoc);
+ fastEmitBranch(FalseMBB, DbgLoc);
uint32_t BranchWeight = 0;
if (FuncInfo.BPI)
BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
@@ -1465,7 +1427,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
.addMBB(TrueMBB);
- FastEmitBranch(FalseMBB, DbgLoc);
+ fastEmitBranch(FalseMBB, DbgLoc);
uint32_t BranchWeight = 0;
if (FuncInfo.BPI)
BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
@@ -1482,9 +1444,9 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(OpReg).addImm(1);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_4))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
.addMBB(TrueMBB);
- FastEmitBranch(FalseMBB, DbgLoc);
+ fastEmitBranch(FalseMBB, DbgLoc);
uint32_t BranchWeight = 0;
if (FuncInfo.BPI)
BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
@@ -1558,7 +1520,7 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
.addReg(Op0Reg);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1670,8 +1632,8 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
TII.get(X86::MOV32r0), Zero32);
// Copy the zero into the appropriate sub/super/identical physical
- // register. Unfortunately the operations needed are not uniform enough to
- // fit neatly into the table above.
+ // register. Unfortunately the operations needed are not uniform enough
+ // to fit neatly into the table above.
if (VT.SimpleTy == MVT::i16) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Copy), TypeEntry.HighInReg)
@@ -1712,7 +1674,7 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
ResultSuperReg).addReg(SourceSuperReg).addImm(8);
// Now reference the 8-bit subreg of the result.
- ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
+ ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
/*Kill=*/true, X86::sub_8bit);
}
// Copy the result out of the physreg if we haven't already.
@@ -1721,7 +1683,7 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
.addReg(OpEntry.DivRemResultReg);
}
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1779,7 +1741,7 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
EVT CmpVT = TLI.getValueType(CmpLHS->getType());
// Emit a compare of the LHS and RHS, setting the flags.
if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
- return false;
+ return false;
if (SETFOpc) {
unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
@@ -1837,9 +1799,9 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
return false;
unsigned Opc = X86::getCMovFromCond(CC, RC->getSize());
- unsigned ResultReg = FastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
+ unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
LHSReg, LHSIsKill);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1858,7 +1820,7 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
if (I->getType() != CI->getOperand(0)->getType() ||
!((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
- (Subtarget->hasSSE2() && RetVT == MVT::f64) ))
+ (Subtarget->hasSSE2() && RetVT == MVT::f64)))
return false;
const Value *CmpLHS = CI->getOperand(0);
@@ -1917,15 +1879,15 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
return false;
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
- unsigned CmpReg = FastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
+ unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
- unsigned AndReg = FastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
+ unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
LHSReg, LHSIsKill);
- unsigned AndNReg = FastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
+ unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
RHSReg, RHSIsKill);
- unsigned ResultReg = FastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
+ unsigned ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
AndReg, /*IsKill=*/true);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1988,8 +1950,8 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
unsigned ResultReg =
- FastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
- UpdateValueMap(I, ResultReg);
+ fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -2018,7 +1980,7 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(OpReg, getKillRegState(OpIsKill));
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
}
@@ -2051,7 +2013,7 @@ bool X86FastISel::X86SelectFPExt(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::CVTSS2SDrr), ResultReg)
.addReg(OpReg);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
}
@@ -2070,7 +2032,7 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::CVTSD2SSrr), ResultReg)
.addReg(OpReg);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
}
@@ -2096,30 +2058,29 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
if (SrcVT == MVT::i8) {
// Truncate from i8 to i1; no code needed.
- UpdateValueMap(I, InputReg);
+ updateValueMap(I, InputReg);
return true;
}
if (!Subtarget->is64Bit()) {
// If we're on x86-32; we can't extract an i8 from a general register.
// First issue a copy to GR16_ABCD or GR32_ABCD.
- const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ?
- (const TargetRegisterClass*)&X86::GR16_ABCDRegClass :
- (const TargetRegisterClass*)&X86::GR32_ABCDRegClass;
+ const TargetRegisterClass *CopyRC =
+ (SrcVT == MVT::i16) ? &X86::GR16_ABCDRegClass : &X86::GR32_ABCDRegClass;
unsigned CopyReg = createResultReg(CopyRC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
- CopyReg).addReg(InputReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), CopyReg).addReg(InputReg);
InputReg = CopyReg;
}
// Issue an extract_subreg.
- unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
+ unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
InputReg, /*Kill=*/true,
X86::sub_8bit);
if (!ResultReg)
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -2145,9 +2106,8 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
VT = MVT::i32;
else if (Len >= 2)
VT = MVT::i16;
- else {
+ else
VT = MVT::i8;
- }
unsigned Reg;
bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
@@ -2163,19 +2123,7 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
return true;
}
-static bool isCommutativeIntrinsic(IntrinsicInst const *II) {
- switch (II->getIntrinsicID()) {
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow:
- return true;
- default:
- return false;
- }
-}
-
-bool X86FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
+bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
// FIXME: Handle more intrinsics.
switch (II->getIntrinsicID()) {
default: return false;
@@ -2195,14 +2143,14 @@ bool X86FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
}
- // This needs to be set before we call getFrameRegister, otherwise we get
- // the wrong frame register.
+ // This needs to be set before we call getPtrSizedFrameRegister, otherwise
+ // we get the wrong frame register.
MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
MFI->setFrameAddressIsTaken(true);
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
- unsigned FrameReg = RegInfo->getFrameRegister(*(FuncInfo.MF));
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
+ unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*(FuncInfo.MF));
assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
(FrameReg == X86::EBP && VT == MVT::i32)) &&
"Invalid Frame Register!");
@@ -2228,7 +2176,7 @@ bool X86FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
SrcReg = DestReg;
}
- UpdateValueMap(II, SrcReg);
+ updateValueMap(II, SrcReg);
return true;
}
case Intrinsic::memcpy: {
@@ -2258,7 +2206,7 @@ bool X86FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
return false;
- return LowerCallTo(II, "memcpy", II->getNumArgOperands() - 2);
+ return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 2);
}
case Intrinsic::memset: {
const MemSetInst *MSI = cast<MemSetInst>(II);
@@ -2273,7 +2221,7 @@ bool X86FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
if (MSI->getDestAddressSpace() > 255)
return false;
- return LowerCallTo(II, "memset", II->getNumArgOperands() - 2);
+ return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
}
case Intrinsic::stackprotector: {
// Emit code to store the stack guard onto the stack.
@@ -2299,8 +2247,10 @@ bool X86FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
// FIXME may need to add RegState::Debug to any registers produced,
// although ESP/EBP should be the only ones at the moment.
- addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM).
- addImm(0).addMetadata(DI->getVariable());
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
+ .addImm(0)
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
return true;
}
case Intrinsic::trap: {
@@ -2317,7 +2267,7 @@ bool X86FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
if (!isTypeLegal(RetTy, VT))
return false;
- // Unfortunately we can't use FastEmit_r, because the AVX version of FSQRT
+ // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
// is not generated by FastISel yet.
// FIXME: Update this code once tablegen can handle it.
static const unsigned SqrtOpc[2][2] = {
@@ -2356,7 +2306,7 @@ bool X86FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
MIB.addReg(SrcReg);
- UpdateValueMap(II, ResultReg);
+ updateValueMap(II, ResultReg);
return true;
}
case Intrinsic::sadd_with_overflow:
@@ -2387,15 +2337,23 @@ bool X86FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
isCommutativeIntrinsic(II))
std::swap(LHS, RHS);
+ bool UseIncDec = false;
+ if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne())
+ UseIncDec = true;
+
unsigned BaseOpc, CondOpc;
switch (II->getIntrinsicID()) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::sadd_with_overflow:
- BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break;
+ BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD);
+ CondOpc = X86::SETOr;
+ break;
case Intrinsic::uadd_with_overflow:
BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
case Intrinsic::ssub_with_overflow:
- BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break;
+ BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB);
+ CondOpc = X86::SETOr;
+ break;
case Intrinsic::usub_with_overflow:
BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
case Intrinsic::smul_with_overflow:
@@ -2411,9 +2369,21 @@ bool X86FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
unsigned ResultReg = 0;
// Check if we have an immediate version.
- if (auto const *C = dyn_cast<ConstantInt>(RHS)) {
- ResultReg = FastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
- C->getZExtValue());
+ if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
+ static const unsigned Opc[2][4] = {
+ { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
+ { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
+ };
+
+ if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) {
+ ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ bool IsDec = BaseOpc == X86ISD::DEC;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
+ .addReg(LHSReg, getKillRegState(LHSIsKill));
+ } else
+ ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
+ CI->getZExtValue());
}
unsigned RHSReg;
@@ -2423,7 +2393,7 @@ bool X86FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
if (RHSReg == 0)
return false;
RHSIsKill = hasTrivialKill(RHS);
- ResultReg = FastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
+ ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
RHSIsKill);
}
@@ -2438,7 +2408,7 @@ bool X86FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
.addReg(LHSReg, getKillRegState(LHSIsKill));
- ResultReg = FastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
+ ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
} else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
static const unsigned MULOpc[] =
@@ -2449,10 +2419,10 @@ bool X86FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), X86::AL)
.addReg(LHSReg, getKillRegState(LHSIsKill));
- ResultReg = FastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
+ ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
RHSIsKill);
} else
- ResultReg = FastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
+ ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
RHSReg, RHSIsKill);
}
@@ -2465,7 +2435,7 @@ bool X86FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
ResultReg2);
- UpdateValueMap(II, ResultReg, 2);
+ updateValueMap(II, ResultReg, 2);
return true;
}
case Intrinsic::x86_sse_cvttss2si:
@@ -2531,13 +2501,13 @@ bool X86FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(Reg);
- UpdateValueMap(II, ResultReg);
+ updateValueMap(II, ResultReg);
return true;
}
}
}
-bool X86FastISel::FastLowerArguments() {
+bool X86FastISel::fastLowerArguments() {
if (!FuncInfo.CanLowerReturn)
return false;
@@ -2554,7 +2524,7 @@ bool X86FastISel::FastLowerArguments() {
if (!Subtarget->is64Bit())
return false;
-
+
// Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
unsigned GPRCnt = 0;
unsigned FPRCnt = 0;
@@ -2627,7 +2597,7 @@ bool X86FastISel::FastLowerArguments() {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(DstReg, getKillRegState(true));
- UpdateValueMap(&Arg, ResultReg);
+ updateValueMap(&Arg, ResultReg);
}
return true;
}
@@ -2649,7 +2619,7 @@ static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget,
return 4;
}
-bool X86FastISel::FastLowerCall(CallLoweringInfo &CLI) {
+bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
auto &OutVals = CLI.OutVals;
auto &OutFlags = CLI.OutFlags;
auto &OutRegs = CLI.OutRegs;
@@ -2699,6 +2669,9 @@ bool X86FastISel::FastLowerCall(CallLoweringInfo &CLI) {
TM.Options.GuaranteedTailCallOpt))
return false;
+ SmallVector<MVT, 16> OutVTs;
+ SmallVector<unsigned, 16> ArgRegs;
+
// If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
// instruction. This is safe because it is common to all FastISel supported
// calling conventions on x86.
@@ -2716,46 +2689,44 @@ bool X86FastISel::FastLowerCall(CallLoweringInfo &CLI) {
// Passing bools around ends up doing a trunc to i1 and passing it.
// Codegen this as an argument + "and 1".
- if (auto *TI = dyn_cast<TruncInst>(Val)) {
- if (TI->getType()->isIntegerTy(1) && CLI.CS &&
- (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
- TI->hasOneUse()) {
- Val = cast<TruncInst>(Val)->getOperand(0);
- unsigned ResultReg = getRegForValue(Val);
-
- if (!ResultReg)
- return false;
-
- MVT ArgVT;
- if (!isTypeLegal(Val->getType(), ArgVT))
- return false;
+ MVT VT;
+ auto *TI = dyn_cast<TruncInst>(Val);
+ unsigned ResultReg;
+ if (TI && TI->getType()->isIntegerTy(1) && CLI.CS &&
+ (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
+ TI->hasOneUse()) {
+ Value *PrevVal = TI->getOperand(0);
+ ResultReg = getRegForValue(PrevVal);
+
+ if (!ResultReg)
+ return false;
- ResultReg =
- FastEmit_ri(ArgVT, ArgVT, ISD::AND, ResultReg, Val->hasOneUse(), 1);
+ if (!isTypeLegal(PrevVal->getType(), VT))
+ return false;
- if (!ResultReg)
- return false;
- UpdateValueMap(Val, ResultReg);
- }
+ ResultReg =
+ fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1);
+ } else {
+ if (!isTypeLegal(Val->getType(), VT))
+ return false;
+ ResultReg = getRegForValue(Val);
}
+
+ if (!ResultReg)
+ return false;
+
+ ArgRegs.push_back(ResultReg);
+ OutVTs.push_back(VT);
}
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs,
- CLI.RetTy->getContext());
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
// Allocate shadow area for Win64
if (IsWin64)
CCInfo.AllocateStack(32, 8);
- SmallVector<MVT, 16> OutVTs;
- for (auto *Val : OutVals) {
- MVT VT;
- if (!isTypeLegal(Val->getType(), VT))
- return false;
- OutVTs.push_back(VT);
- }
CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
@@ -2767,8 +2738,8 @@ bool X86FastISel::FastLowerCall(CallLoweringInfo &CLI) {
.addImm(NumBytes);
// Walk the register/memloc assignments, inserting copies/loads.
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo *>(TM.getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign const &VA = ArgLocs[i];
const Value *ArgVal = OutVals[VA.getValNo()];
@@ -2777,9 +2748,7 @@ bool X86FastISel::FastLowerCall(CallLoweringInfo &CLI) {
if (ArgVT == MVT::x86mmx)
return false;
- unsigned ArgReg = getRegForValue(ArgVal);
- if (!ArgReg)
- return false;
+ unsigned ArgReg = ArgRegs[VA.getValNo()];
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -2819,7 +2788,7 @@ bool X86FastISel::FastLowerCall(CallLoweringInfo &CLI) {
break;
}
case CCValAssign::BCvt: {
- ArgReg = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
+ ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
/*TODO: Kill=*/false);
assert(ArgReg && "Failed to emit a bitcast!");
ArgVT = VA.getLocVT();
@@ -2846,6 +2815,11 @@ bool X86FastISel::FastLowerCall(CallLoweringInfo &CLI) {
OutRegs.push_back(VA.getLocReg());
} else {
assert(VA.isMemLoc());
+
+ // Don't emit stores for undef values.
+ if (isa<UndefValue>(ArgVal))
+ continue;
+
unsigned LocMemOffset = VA.getLocMemOffset();
X86AddressMode AM;
AM.Base.Reg = RegInfo->getStackRegister();
@@ -2982,7 +2956,7 @@ bool X86FastISel::FastLowerCall(CallLoweringInfo &CLI) {
// Now handle call return values.
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs,
+ CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
CLI.RetTy->getContext());
CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
@@ -2999,39 +2973,33 @@ bool X86FastISel::FastLowerCall(CallLoweringInfo &CLI) {
report_fatal_error("SSE register return with SSE disabled");
}
- // If this is a call to a function that returns an fp value on the floating
- // point stack, we must guarantee the value is popped from the stack, so
- // a COPY is not good enough - the copy instruction may be eliminated if the
- // return value is not used. We use the FpPOP_RETVAL instruction instead.
- if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) {
- // If we prefer to use the value in xmm registers, copy it out as f80 and
- // use a truncate to move it from fp stack reg to xmm reg.
- if (isScalarFPTypeInSSEReg(VA.getValVT())) {
- CopyVT = MVT::f80;
- CopyReg = createResultReg(&X86::RFP80RegClass);
- }
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(X86::FpPOP_RETVAL), CopyReg);
-
- // Round the f80 to the right size, which also moves it to the appropriate
- // xmm register. This is accomplished by storing the f80 value in memory
- // and then loading it back.
- if (CopyVT != VA.getValVT()) {
- EVT ResVT = VA.getValVT();
- unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
- unsigned MemSize = ResVT.getSizeInBits()/8;
- int FI = MFI.CreateStackObject(MemSize, MemSize, false);
- addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(Opc)), FI)
- .addReg(CopyReg);
- Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
- addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(Opc), ResultReg + i), FI);
- }
- } else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg());
- InRegs.push_back(VA.getLocReg());
+ // If we prefer to use the value in xmm registers, copy it out as f80 and
+ // use a truncate to move it from fp stack reg to xmm reg.
+ if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
+ isScalarFPTypeInSSEReg(VA.getValVT())) {
+ CopyVT = MVT::f80;
+ CopyReg = createResultReg(&X86::RFP80RegClass);
+ }
+
+ // Copy out the result.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg());
+ InRegs.push_back(VA.getLocReg());
+
+ // Round the f80 to the right size, which also moves it to the appropriate
+ // xmm register. This is accomplished by storing the f80 value in memory
+ // and then loading it back.
+ if (CopyVT != VA.getValVT()) {
+ EVT ResVT = VA.getValVT();
+ unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
+ unsigned MemSize = ResVT.getSizeInBits()/8;
+ int FI = MFI.CreateStackObject(MemSize, MemSize, false);
+ addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc)), FI)
+ .addReg(CopyReg);
+ Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
+ addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg + i), FI);
}
}
@@ -3043,7 +3011,7 @@ bool X86FastISel::FastLowerCall(CallLoweringInfo &CLI) {
}
bool
-X86FastISel::TargetSelectInstruction(const Instruction *I) {
+X86FastISel::fastSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
default: break;
case Instruction::Load:
@@ -3086,7 +3054,7 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) {
return X86SelectTrunc(I);
unsigned Reg = getRegForValue(I->getOperand(0));
if (Reg == 0) return false;
- UpdateValueMap(I, Reg);
+ updateValueMap(I, Reg);
return true;
}
}
@@ -3094,13 +3062,69 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) {
return false;
}
-unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
- MVT VT;
- if (!isTypeLegal(C->getType(), VT))
+unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
+ if (VT > MVT::i64)
return 0;
+ uint64_t Imm = CI->getZExtValue();
+ if (Imm == 0) {
+ unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type");
+ case MVT::i1:
+ case MVT::i8:
+ return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
+ X86::sub_8bit);
+ case MVT::i16:
+ return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true,
+ X86::sub_16bit);
+ case MVT::i32:
+ return SrcReg;
+ case MVT::i64: {
+ unsigned ResultReg = createResultReg(&X86::GR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
+ .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
+ return ResultReg;
+ }
+ }
+ }
+
+ unsigned Opc = 0;
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type");
+ case MVT::i1: VT = MVT::i8; // fall-through
+ case MVT::i8: Opc = X86::MOV8ri; break;
+ case MVT::i16: Opc = X86::MOV16ri; break;
+ case MVT::i32: Opc = X86::MOV32ri; break;
+ case MVT::i64: {
+ if (isUInt<32>(Imm))
+ Opc = X86::MOV32ri;
+ else if (isInt<32>(Imm))
+ Opc = X86::MOV64ri32;
+ else
+ Opc = X86::MOV64ri;
+ break;
+ }
+ }
+ if (VT == MVT::i64 && Opc == X86::MOV32ri) {
+ unsigned SrcReg = fastEmitInst_i(Opc, &X86::GR32RegClass, Imm);
+ unsigned ResultReg = createResultReg(&X86::GR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
+ .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
+ return ResultReg;
+ }
+ return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
+}
+
+unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
+ if (CFP->isNullValue())
+ return fastMaterializeFloatZero(CFP);
+
// Can't handle alternate code models yet.
- if (TM.getCodeModel() != CodeModel::Small)
+ CodeModel::Model CM = TM.getCodeModel();
+ if (CM != CodeModel::Small && CM != CodeModel::Large)
return 0;
// Get opcode and regclass of the output for the given load instruction.
@@ -3108,23 +3132,6 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
const TargetRegisterClass *RC = nullptr;
switch (VT.SimpleTy) {
default: return 0;
- case MVT::i8:
- Opc = X86::MOV8rm;
- RC = &X86::GR8RegClass;
- break;
- case MVT::i16:
- Opc = X86::MOV16rm;
- RC = &X86::GR16RegClass;
- break;
- case MVT::i32:
- Opc = X86::MOV32rm;
- RC = &X86::GR32RegClass;
- break;
- case MVT::i64:
- // Must be in x86-64 mode.
- Opc = X86::MOV64rm;
- RC = &X86::GR64RegClass;
- break;
case MVT::f32:
if (X86ScalarSSEf32) {
Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
@@ -3148,39 +3155,11 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
return 0;
}
- // Materialize addresses with LEA/MOV instructions.
- if (isa<GlobalValue>(C)) {
- X86AddressMode AM;
- if (X86SelectAddress(C, AM)) {
- // If the expression is just a basereg, then we're done, otherwise we need
- // to emit an LEA.
- if (AM.BaseType == X86AddressMode::RegBase &&
- AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
- return AM.Base.Reg;
-
- unsigned ResultReg = createResultReg(RC);
- if (TM.getRelocationModel() == Reloc::Static &&
- TLI.getPointerTy() == MVT::i64) {
- // The displacement code be more than 32 bits away so we need to use
- // an instruction with a 64 bit immediate
- Opc = X86::MOV64ri;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(Opc), ResultReg).addGlobalAddress(cast<GlobalValue>(C));
- } else {
- Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
- addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(Opc), ResultReg), AM);
- }
- return ResultReg;
- }
- return 0;
- }
-
// MachineConstantPool wants an explicit alignment.
- unsigned Align = DL.getPrefTypeAlignment(C->getType());
+ unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
if (Align == 0) {
- // Alignment of vector types. FIXME!
- Align = DL.getTypeAllocSize(C->getType());
+ // Alignment of vector types. FIXME!
+ Align = DL.getTypeAllocSize(CFP->getType());
}
// x86-32 PIC requires a PIC base register for constant pools.
@@ -3198,23 +3177,88 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
}
// Create the load from the constant pool.
- unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
+ unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
unsigned ResultReg = createResultReg(RC);
+
+ if (CM == CodeModel::Large) {
+ unsigned AddrReg = createResultReg(&X86::GR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
+ AddrReg)
+ .addConstantPoolIndex(CPI, 0, OpFlag);
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg);
+ addDirectMem(MIB, AddrReg);
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
+ TM.getSubtargetImpl()->getDataLayout()->getPointerSize(), Align);
+ MIB->addMemOperand(*FuncInfo.MF, MMO);
+ return ResultReg;
+ }
+
addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg),
- MCPOffset, PICBase, OpFlag);
-
+ CPI, PICBase, OpFlag);
return ResultReg;
}
-unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
+unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return 0;
+
+ // Materialize addresses with LEA/MOV instructions.
+ X86AddressMode AM;
+ if (X86SelectAddress(GV, AM)) {
+ // If the expression is just a basereg, then we're done, otherwise we need
+ // to emit an LEA.
+ if (AM.BaseType == X86AddressMode::RegBase &&
+ AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
+ return AM.Base.Reg;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ if (TM.getRelocationModel() == Reloc::Static &&
+ TLI.getPointerTy() == MVT::i64) {
+ // The displacement code could be more than 32 bits away so we need to use
+ // an instruction with a 64 bit immediate
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
+ ResultReg)
+ .addGlobalAddress(GV);
+ } else {
+ unsigned Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg), AM);
+ }
+ return ResultReg;
+ }
+ return 0;
+}
+
+unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
+ EVT CEVT = TLI.getValueType(C->getType(), true);
+
+ // Only handle simple types.
+ if (!CEVT.isSimple())
+ return 0;
+ MVT VT = CEVT.getSimpleVT();
+
+ if (const auto *CI = dyn_cast<ConstantInt>(C))
+ return X86MaterializeInt(CI, VT);
+ else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return X86MaterializeFP(CFP, VT);
+ else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return X86MaterializeGV(GV, VT);
+
+ return 0;
+}
+
+unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
// Fail on dynamic allocas. At this point, getRegForValue has already
// checked its CSE maps, so if we're here trying to handle a dynamic
// alloca, we're not going to succeed. X86SelectAddress has a
// check for dynamic allocas, because it's called directly from
- // various places, but TargetMaterializeAlloca also needs a check
+ // various places, but targetMaterializeAlloca also needs a check
// in order to avoid recursion between getRegForValue,
- // X86SelectAddrss, and TargetMaterializeAlloca.
+ // X86SelectAddrss, and targetMaterializeAlloca.
if (!FuncInfo.StaticAllocaMap.count(C))
return 0;
assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
@@ -3222,7 +3266,7 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
X86AddressMode AM;
if (!X86SelectAddress(C, AM))
return 0;
- unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
+ unsigned Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
unsigned ResultReg = createResultReg(RC);
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -3230,7 +3274,7 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
return ResultReg;
}
-unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
+unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
MVT VT;
if (!isTypeLegal(CF->getType(), VT))
return 0;
@@ -3276,7 +3320,7 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
if (!X86SelectAddress(Ptr, AM))
return false;
- const X86InstrInfo &XII = (const X86InstrInfo&)TII;
+ const X86InstrInfo &XII = (const X86InstrInfo &)TII;
unsigned Size = DL.getTypeAllocSize(LI->getType());
unsigned Alignment = LI->getAlignment();
@@ -3288,7 +3332,8 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
AM.getFullAddress(AddrOps);
MachineInstr *Result =
- XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
+ XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps,
+ Size, Alignment, /*AllowCommute=*/true);
if (!Result)
return false;
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index eb9f743226cc..8e033a096fd8 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -155,7 +155,8 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
if (!ST.LEAusesAG() && !ST.slowLEA())
return false;
- TII = static_cast<const X86InstrInfo *>(TM->getInstrInfo());
+ TII =
+ static_cast<const X86InstrInfo *>(TM->getSubtargetImpl()->getInstrInfo());
DEBUG(dbgs() << "Start X86FixupLEAs\n";);
// Process all basic blocks.
@@ -217,7 +218,8 @@ FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
if (usesRegister(p, CurInst) == RU_Write) {
return CurInst;
}
- InstrDistance += TII->getInstrLatency(TM->getInstrItineraryData(), CurInst);
+ InstrDistance += TII->getInstrLatency(
+ TM->getSubtargetImpl()->getInstrItineraryData(), CurInst);
Found = getPreviousInstr(CurInst, MFI);
}
return nullptr;
@@ -281,6 +283,7 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
return;
int addrr_opcode, addri_opcode;
switch (opcode) {
+ default: llvm_unreachable("Unexpected LEA instruction");
case X86::LEA16r:
addrr_opcode = X86::ADD16rr;
addri_opcode = X86::ADD16ri;
@@ -294,8 +297,6 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
addrr_opcode = X86::ADD64rr;
addri_opcode = X86::ADD64ri32;
break;
- default:
- assert(false && "Unexpected LEA instruction");
}
DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
DEBUG(dbgs() << "FixLEA: Replaced by: ";);
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index c8a3ab3082ad..618910946bf9 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -28,12 +28,14 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/EdgeBundles.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/Support/Debug.h"
@@ -41,7 +43,9 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
+#include <bitset>
using namespace llvm;
#define DEBUG_TYPE "x86-codegen"
@@ -50,6 +54,8 @@ STATISTIC(NumFXCH, "Number of fxch instructions inserted");
STATISTIC(NumFP , "Number of floating point instructions");
namespace {
+ const unsigned ScratchFPReg = 7;
+
struct FPS : public MachineFunctionPass {
static char ID;
FPS() : MachineFunctionPass(ID) {
@@ -137,7 +143,7 @@ namespace {
unsigned StackTop; // The current top of the FP stack.
enum {
- NumFPRegs = 16 // Including scratch pseudo-registers.
+ NumFPRegs = 8 // Including scratch pseudo-registers.
};
// For each live FP<n> register, point to its Stack[] entry.
@@ -146,27 +152,6 @@ namespace {
// register allocator thinks.
unsigned RegMap[NumFPRegs];
- // Pending fixed registers - Inline assembly needs FP registers to appear
- // in fixed stack slot positions. This is handled by copying FP registers
- // to ST registers before the instruction, and copying back after the
- // instruction.
- //
- // This is modeled with pending ST registers. NumPendingSTs is the number
- // of ST registers (ST0-STn) we are tracking. PendingST[n] points to an FP
- // register that holds the ST value. The ST registers are not moved into
- // place until immediately before the instruction that needs them.
- //
- // It can happen that we need an ST register to be live when no FP register
- // holds the value:
- //
- // %ST0 = COPY %FP4<kill>
- //
- // When that happens, we allocate a scratch FP register to hold the ST
- // value. That means every register in PendingST must be live.
-
- unsigned NumPendingSTs;
- unsigned char PendingST[8];
-
// Set up our stack model to match the incoming registers to MBB.
void setupBlockStack();
@@ -180,9 +165,6 @@ namespace {
dbgs() << " FP" << Stack[i];
assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
}
- for (unsigned i = 0; i != NumPendingSTs; ++i)
- dbgs() << ", ST" << i << " in FP" << unsigned(PendingST[i]);
- dbgs() << "\n";
}
#endif
@@ -199,19 +181,6 @@ namespace {
return Slot < StackTop && Stack[Slot] == RegNo;
}
- /// getScratchReg - Return an FP register that is not currently in use.
- unsigned getScratchReg() const {
- for (int i = NumFPRegs - 1; i >= 8; --i)
- if (!isLive(i))
- return i;
- llvm_unreachable("Ran out of scratch FP registers");
- }
-
- /// isScratchReg - Returns trus if RegNo is a scratch FP register.
- static bool isScratchReg(unsigned RegNo) {
- return RegNo > 8 && RegNo < NumFPRegs;
- }
-
/// getStackEntry - Return the X86::FP<n> register in register ST(i).
unsigned getStackEntry(unsigned STi) const {
if (STi >= StackTop)
@@ -263,21 +232,6 @@ namespace {
BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
}
- /// duplicatePendingSTBeforeKill - The instruction at I is about to kill
- /// RegNo. If any PendingST registers still need the RegNo value, duplicate
- /// them to new scratch registers.
- void duplicatePendingSTBeforeKill(unsigned RegNo, MachineInstr *I) {
- for (unsigned i = 0; i != NumPendingSTs; ++i) {
- if (PendingST[i] != RegNo)
- continue;
- unsigned SR = getScratchReg();
- DEBUG(dbgs() << "Duplicating pending ST" << i
- << " in FP" << RegNo << " to FP" << SR << '\n');
- duplicateToTop(RegNo, SR, I);
- PendingST[i] = SR;
- }
- }
-
/// popStackAfter - Pop the current value off of the top of the FP stack
/// after the specified instruction.
void popStackAfter(MachineBasicBlock::iterator &I);
@@ -304,6 +258,7 @@ namespace {
bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
+ void handleCall(MachineBasicBlock::iterator &I);
void handleZeroArgFP(MachineBasicBlock::iterator &I);
void handleOneArgFP(MachineBasicBlock::iterator &I);
void handleOneArgFPRW(MachineBasicBlock::iterator &I);
@@ -320,6 +275,8 @@ namespace {
return X86::RFP80RegClass.contains(DstReg) ||
X86::RFP80RegClass.contains(SrcReg);
}
+
+ void setKillFlags(MachineBasicBlock &MBB) const;
};
char FPS::ID = 0;
}
@@ -354,7 +311,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
if (!FPIsUsed) return false;
Bundles = &getAnalysis<EdgeBundles>();
- TII = MF.getTarget().getInstrInfo();
+ TII = MF.getSubtarget().getInstrInfo();
// Prepare cross-MBB liveness.
bundleCFG(MF);
@@ -367,15 +324,13 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock *Entry = MF.begin();
bool Changed = false;
- for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 8> >
- I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed);
- I != E; ++I)
- Changed |= processBasicBlock(MF, **I);
+ for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
+ Changed |= processBasicBlock(MF, *BB);
// Process any unreachable blocks in arbitrary order now.
if (MF.size() != Processed.size())
for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
- if (Processed.insert(BB))
+ if (Processed.insert(BB).second)
Changed |= processBasicBlock(MF, *BB);
LiveBundles.clear();
@@ -409,8 +364,8 @@ void FPS::bundleCFG(MachineFunction &MF) {
bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
bool Changed = false;
MBB = &BB;
- NumPendingSTs = 0;
+ setKillFlags(BB);
setupBlockStack();
for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
@@ -428,6 +383,9 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
X86::RFP80RegClass.contains(MI->getOperand(0).getReg()))
FPInstClass = X86II::SpecialFP;
+ if (MI->isCall())
+ FPInstClass = X86II::SpecialFP;
+
if (FPInstClass == X86II::NotFP)
continue; // Efficiently ignore non-fp insts!
@@ -462,7 +420,9 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
// after definition. If so, pop them.
for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
unsigned Reg = DeadRegs[i];
- if (Reg >= X86::FP0 && Reg <= X86::FP6) {
+ // Check if Reg is live on the stack. An inline-asm register operand that
+ // is in the clobber list and marked dead might not be live on the stack.
+ if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) {
DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
freeStackSlotAfter(I, Reg-X86::FP0);
}
@@ -874,7 +834,9 @@ FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
RegMap[TopReg] = OldSlot;
RegMap[FPRegNo] = ~0;
Stack[--StackTop] = ~0;
- return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg);
+ return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr))
+ .addReg(STReg)
+ .getInstr();
}
/// adjustLiveRegs - Kill and revive registers such that exactly the FP
@@ -966,6 +928,31 @@ void FPS::shuffleStackTop(const unsigned char *FixStack,
// Instruction transformation implementation
//===----------------------------------------------------------------------===//
+void FPS::handleCall(MachineBasicBlock::iterator &I) {
+ unsigned STReturns = 0;
+
+ for (const auto &MO : I->operands()) {
+ if (!MO.isReg())
+ continue;
+
+ unsigned R = MO.getReg() - X86::FP0;
+
+ if (R < 8) {
+ assert(MO.isDef() && MO.isImplicit());
+ STReturns |= 1 << R;
+ }
+ }
+
+ unsigned N = CountTrailingOnes_32(STReturns);
+
+ // FP registers used for function return must be consecutive starting at
+ // FP0.
+ assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
+
+ for (unsigned I = 0; I < N; ++I)
+ pushReg(N - I - 1);
+}
+
/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
///
void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
@@ -992,9 +979,6 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
- if (KillsSrc)
- duplicatePendingSTBeforeKill(Reg, I);
-
// FISTP64m is strange because there isn't a non-popping versions.
// If we have one _and_ we don't want to pop the operand, duplicate the value
// on the stack instead of moving it. This ensure that popping the value is
@@ -1015,7 +999,7 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
MI->getOpcode() == X86::ISTT_Fp32m80 ||
MI->getOpcode() == X86::ISTT_Fp64m80 ||
MI->getOpcode() == X86::ST_FpP80m)) {
- duplicateToTop(Reg, getScratchReg(), I);
+ duplicateToTop(Reg, ScratchFPReg, I);
} else {
moveToTop(Reg, I); // Move to the top of the stack...
}
@@ -1058,7 +1042,6 @@ void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
if (KillsSrc) {
- duplicatePendingSTBeforeKill(Reg, I);
// If this is the last use of the source register, just make sure it's on
// the top of the stack.
moveToTop(Reg, I);
@@ -1314,71 +1297,22 @@ void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
/// floating point instructions. This is primarily intended for use by pseudo
/// instructions.
///
-void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
- MachineInstr *MI = I;
+void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
+ MachineInstr *MI = Inst;
+
+ if (MI->isCall()) {
+ handleCall(Inst);
+ return;
+ }
+
switch (MI->getOpcode()) {
default: llvm_unreachable("Unknown SpecialFP instruction!");
case TargetOpcode::COPY: {
// We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
const MachineOperand &MO1 = MI->getOperand(1);
const MachineOperand &MO0 = MI->getOperand(0);
- unsigned DstST = MO0.getReg() - X86::ST0;
- unsigned SrcST = MO1.getReg() - X86::ST0;
bool KillsSrc = MI->killsRegister(MO1.getReg());
- // ST = COPY FP. Set up a pending ST register.
- if (DstST < 8) {
- unsigned SrcFP = getFPReg(MO1);
- assert(isLive(SrcFP) && "Cannot copy dead register");
- assert(!MO0.isDead() && "Cannot copy to dead ST register");
-
- // Unallocated STs are marked as the nonexistent FP255.
- while (NumPendingSTs <= DstST)
- PendingST[NumPendingSTs++] = NumFPRegs;
-
- // STi could still be live from a previous inline asm.
- if (isScratchReg(PendingST[DstST])) {
- DEBUG(dbgs() << "Clobbering old ST in FP" << unsigned(PendingST[DstST])
- << '\n');
- freeStackSlotBefore(MI, PendingST[DstST]);
- }
-
- // When the source is killed, allocate a scratch FP register.
- if (KillsSrc) {
- duplicatePendingSTBeforeKill(SrcFP, I);
- unsigned Slot = getSlot(SrcFP);
- unsigned SR = getScratchReg();
- PendingST[DstST] = SR;
- Stack[Slot] = SR;
- RegMap[SR] = Slot;
- } else
- PendingST[DstST] = SrcFP;
- break;
- }
-
- // FP = COPY ST. Extract fixed stack value.
- // Any instruction defining ST registers must have assigned them to a
- // scratch register.
- if (SrcST < 8) {
- unsigned DstFP = getFPReg(MO0);
- assert(!isLive(DstFP) && "Cannot copy ST to live FP register");
- assert(NumPendingSTs > SrcST && "Cannot copy from dead ST register");
- unsigned SrcFP = PendingST[SrcST];
- assert(isScratchReg(SrcFP) && "Expected ST in a scratch register");
- assert(isLive(SrcFP) && "Scratch holding ST is dead");
-
- // DstFP steals the stack slot from SrcFP.
- unsigned Slot = getSlot(SrcFP);
- Stack[Slot] = DstFP;
- RegMap[DstFP] = Slot;
-
- // Always treat the ST as killed.
- PendingST[SrcST] = NumFPRegs;
- while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
- --NumPendingSTs;
- break;
- }
-
// FP <- FP copy.
unsigned DstFP = getFPReg(MO0);
unsigned SrcFP = getFPReg(MO1);
@@ -1392,7 +1326,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
} else {
// For COPY we just duplicate the specified value to a new stack slot.
// This could be made better, but would require substantial changes.
- duplicateToTop(SrcFP, DstFP, I);
+ duplicateToTop(SrcFP, DstFP, Inst);
}
break;
}
@@ -1401,41 +1335,11 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
// All FP registers must be explicitly defined, so load a 0 instead.
unsigned Reg = MI->getOperand(0).getReg() - X86::FP0;
DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
- BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0));
+ BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::LD_F0));
pushReg(Reg);
break;
}
- case X86::FpPOP_RETVAL: {
- // The FpPOP_RETVAL instruction is used after calls that return a value on
- // the floating point stack. We cannot model this with ST defs since CALL
- // instructions have fixed clobber lists. This instruction is interpreted
- // to mean that there is one more live register on the stack than we
- // thought.
- //
- // This means that StackTop does not match the hardware stack between a
- // call and the FpPOP_RETVAL instructions. We do tolerate FP instructions
- // between CALL and FpPOP_RETVAL as long as they don't overflow the
- // hardware stack.
- unsigned DstFP = getFPReg(MI->getOperand(0));
-
- // Move existing stack elements up to reflect reality.
- assert(StackTop < 8 && "Stack overflowed before FpPOP_RETVAL");
- if (StackTop) {
- std::copy_backward(Stack, Stack + StackTop, Stack + StackTop + 1);
- for (unsigned i = 0; i != NumFPRegs; ++i)
- ++RegMap[i];
- }
- ++StackTop;
-
- // DstFP is the new bottom of the stack.
- Stack[0] = DstFP;
- RegMap[DstFP] = 0;
-
- // DstFP will be killed by processBasicBlock if this was a dead def.
- break;
- }
-
case TargetOpcode::INLINEASM: {
// The inline asm MachineInstr currently only *uses* FP registers for the
// 'f' constraint. These should be turned into the current ST(x) register
@@ -1472,19 +1376,30 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
// only tell clobbers from defs by looking at the asm descriptor.
unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0;
unsigned NumOps = 0;
+ SmallSet<unsigned, 1> FRegIdx;
+ unsigned RCID;
+
for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands();
i != e && MI->getOperand(i).isImm(); i += 1 + NumOps) {
unsigned Flags = MI->getOperand(i).getImm();
+
NumOps = InlineAsm::getNumOperandRegisters(Flags);
if (NumOps != 1)
continue;
const MachineOperand &MO = MI->getOperand(i + 1);
if (!MO.isReg())
continue;
- unsigned STReg = MO.getReg() - X86::ST0;
+ unsigned STReg = MO.getReg() - X86::FP0;
if (STReg >= 8)
continue;
+ // If the flag has a register class constraint, this must be an operand
+ // with constraint "f". Record its index and continue.
+ if (InlineAsm::hasRegClassConstraint(Flags, RCID)) {
+ FRegIdx.insert(i + 1);
+ continue;
+ }
+
switch (InlineAsm::getKind(Flags)) {
case InlineAsm::Kind_RegUse:
STUses |= (1u << STReg);
@@ -1527,71 +1442,42 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
<< NumSTPopped << ", and defines " << NumSTDefs << " regs.\n");
- // Scan the instruction for FP uses corresponding to "f" constraints.
- // Collect FP registers to kill afer the instruction.
- // Always kill all the scratch regs.
+#ifndef NDEBUG
+ // If any input operand uses constraint "f", all output register
+ // constraints must be early-clobber defs.
+ for (unsigned I = 0, E = MI->getNumOperands(); I < E; ++I)
+ if (FRegIdx.count(I)) {
+ assert((1 << getFPReg(MI->getOperand(I)) & STDefs) == 0 &&
+ "Operands with constraint \"f\" cannot overlap with defs");
+ }
+#endif
+
+ // Collect all FP registers (register operands with constraints "t", "u",
+ // and "f") to kill afer the instruction.
unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
- unsigned FPUsed = 0;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &Op = MI->getOperand(i);
if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
continue;
- if (!Op.isUse())
- MI->emitError("illegal \"f\" output constraint");
unsigned FPReg = getFPReg(Op);
- FPUsed |= 1U << FPReg;
// If we kill this operand, make sure to pop it from the stack after the
// asm. We just remember it for now, and pop them all off at the end in
// a batch.
- if (Op.isKill())
+ if (Op.isUse() && Op.isKill())
FPKills |= 1U << FPReg;
}
- // The popped inputs will be killed by the instruction, so duplicate them
- // if the FP register needs to be live after the instruction, or if it is
- // used in the instruction itself. We effectively treat the popped inputs
- // as early clobbers.
- for (unsigned i = 0; i < NumSTPopped; ++i) {
- if ((FPKills & ~FPUsed) & (1u << PendingST[i]))
- continue;
- unsigned SR = getScratchReg();
- duplicateToTop(PendingST[i], SR, I);
- DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
- << unsigned(PendingST[i]) << " to avoid clobbering it.\n");
- PendingST[i] = SR;
- }
-
- // Make sure we have a unique live register for every fixed use. Some of
- // them could be undef uses, and we need to emit LD_F0 instructions.
- for (unsigned i = 0; i < NumSTUses; ++i) {
- if (i < NumPendingSTs && PendingST[i] < NumFPRegs) {
- // Check for shared assignments.
- for (unsigned j = 0; j < i; ++j) {
- if (PendingST[j] != PendingST[i])
- continue;
- // STi and STj are inn the same register, create a copy.
- unsigned SR = getScratchReg();
- duplicateToTop(PendingST[i], SR, I);
- DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
- << unsigned(PendingST[i])
- << " to avoid collision with ST" << j << '\n');
- PendingST[i] = SR;
- }
- continue;
- }
- unsigned SR = getScratchReg();
- DEBUG(dbgs() << "Emitting LD_F0 for ST" << i << " in FP" << SR << '\n');
- BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0));
- pushReg(SR);
- PendingST[i] = SR;
- if (NumPendingSTs == i)
- ++NumPendingSTs;
- }
- assert(NumPendingSTs >= NumSTUses && "Fixed registers should be assigned");
+ // Do not include registers that are implicitly popped by defs/clobbers.
+ FPKills &= ~(STDefs | STClobbers);
// Now we can rearrange the live registers to match what was requested.
- shuffleStackTop(PendingST, NumPendingSTs, I);
+ unsigned char STUsesArray[8];
+
+ for (unsigned I = 0; I < NumSTUses; ++I)
+ STUsesArray[I] = I;
+
+ shuffleStackTop(STUsesArray, NumSTUses, Inst);
DEBUG({dbgs() << "Before asm: "; dumpStack();});
// With the stack layout fixed, rewrite the FP registers.
@@ -1599,36 +1485,22 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
MachineOperand &Op = MI->getOperand(i);
if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
continue;
+
unsigned FPReg = getFPReg(Op);
- Op.setReg(getSTReg(FPReg));
+
+ if (FRegIdx.count(i))
+ // Operand with constraint "f".
+ Op.setReg(getSTReg(FPReg));
+ else
+ // Operand with a single register class constraint ("t" or "u").
+ Op.setReg(X86::ST0 + FPReg);
}
// Simulate the inline asm popping its inputs and pushing its outputs.
StackTop -= NumSTPopped;
- // Hold the fixed output registers in scratch FP registers. They will be
- // transferred to real FP registers by copies.
- NumPendingSTs = 0;
- for (unsigned i = 0; i < NumSTDefs; ++i) {
- unsigned SR = getScratchReg();
- pushReg(SR);
- FPKills &= ~(1u << SR);
- }
for (unsigned i = 0; i < NumSTDefs; ++i)
- PendingST[NumPendingSTs++] = getStackEntry(i);
- DEBUG({dbgs() << "After asm: "; dumpStack();});
-
- // If any of the ST defs were dead, pop them immediately. Our caller only
- // handles dead FP defs.
- MachineBasicBlock::iterator InsertPt = MI;
- for (unsigned i = 0; STDefs & (1u << i); ++i) {
- if (!(STDeadDefs & (1u << i)))
- continue;
- freeStackSlotAfter(InsertPt, PendingST[i]);
- PendingST[i] = NumFPRegs;
- }
- while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
- --NumPendingSTs;
+ pushReg(NumSTDefs - i - 1);
// If this asm kills any FP registers (is the last use of them) we must
// explicitly emit pop instructions for them. Do this now after the asm has
@@ -1640,9 +1512,10 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
while (FPKills) {
unsigned FPReg = countTrailingZeros(FPKills);
if (isLive(FPReg))
- freeStackSlotAfter(InsertPt, FPReg);
+ freeStackSlotAfter(Inst, FPReg);
FPKills &= ~(1U << FPReg);
}
+
// Don't delete the inline asm!
return;
}
@@ -1655,12 +1528,12 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
Op.getReg() >= X86::FP0 && Op.getReg() <= X86::FP6);
unsigned FPReg = getFPReg(Op);
if (Op.isKill())
- moveToTop(FPReg, I);
+ moveToTop(FPReg, Inst);
else
- duplicateToTop(FPReg, FPReg, I);
+ duplicateToTop(FPReg, FPReg, Inst);
// Emit the call. This will pop the operand.
- BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::CALLpcrel32))
+ BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::CALLpcrel32))
.addExternalSymbol("_ftol2")
.addReg(X86::ST0, RegState::ImplicitKill)
.addReg(X86::ECX, RegState::ImplicitDefine)
@@ -1738,7 +1611,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
// Duplicate the TOS so that we return it twice. Just pick some other FPx
// register to hold it.
- unsigned NewReg = getScratchReg();
+ unsigned NewReg = ScratchFPReg;
duplicateToTop(FirstFPRegOp, NewReg, MI);
FirstFPRegOp = NewReg;
}
@@ -1761,13 +1634,54 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
return;
}
- I = MBB->erase(I); // Remove the pseudo instruction
+ Inst = MBB->erase(Inst); // Remove the pseudo instruction
// We want to leave I pointing to the previous instruction, but what if we
// just erased the first instruction?
- if (I == MBB->begin()) {
+ if (Inst == MBB->begin()) {
DEBUG(dbgs() << "Inserting dummy KILL\n");
- I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL));
+ Inst = BuildMI(*MBB, Inst, DebugLoc(), TII->get(TargetOpcode::KILL));
} else
- --I;
+ --Inst;
+}
+
+void FPS::setKillFlags(MachineBasicBlock &MBB) const {
+ const TargetRegisterInfo *TRI =
+ MBB.getParent()->getSubtarget().getRegisterInfo();
+ LivePhysRegs LPR(TRI);
+
+ LPR.addLiveOuts(&MBB);
+
+ for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
+ I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
+ std::bitset<8> Defs;
+ SmallVector<MachineOperand *, 2> Uses;
+ MachineInstr &MI = *I;
+
+ for (auto &MO : I->operands()) {
+ if (!MO.isReg())
+ continue;
+
+ unsigned Reg = MO.getReg() - X86::FP0;
+
+ if (Reg >= 8)
+ continue;
+
+ if (MO.isDef()) {
+ Defs.set(Reg);
+ if (!LPR.contains(MO.getReg()))
+ MO.setIsDead();
+ } else
+ Uses.push_back(&MO);
+ }
+
+ for (auto *MO : Uses)
+ if (Defs.test(getFPReg(*MO)) || !LPR.contains(MO->getReg()))
+ MO->setIsKill();
+
+ LPR.stepBackward(MI);
+ }
}
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 8c029a8c22d5..16aab16d63ee 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -30,6 +30,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/Debug.h"
+#include <cstdlib>
using namespace llvm;
@@ -46,14 +47,15 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineModuleInfo &MMI = MF.getMMI();
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
RegInfo->needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() ||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
- MMI.callsUnwindInit() || MMI.callsEHReturn());
+ MMI.callsUnwindInit() || MMI.callsEHReturn() ||
+ MFI->hasStackMap() || MFI->hasPatchPoint());
}
static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
@@ -80,6 +82,27 @@ static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) {
}
}
+static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
+ if (IsLP64) {
+ if (isInt<8>(Imm))
+ return X86::AND64ri8;
+ return X86::AND64ri32;
+ }
+ if (isInt<8>(Imm))
+ return X86::AND32ri8;
+ return X86::AND32ri;
+}
+
+static unsigned getPUSHiOpcode(bool IsLP64, MachineOperand MO) {
+ // We don't support LP64 for now.
+ assert(!IsLP64);
+
+ if (MO.isImm() && isInt<8>(MO.getImm()))
+ return X86::PUSH32i8;
+
+ return X86::PUSHi32;;
+}
+
static unsigned getLEArOpcode(unsigned IsLP64) {
return IsLP64 ? X86::LEA64r : X86::LEA32r;
}
@@ -148,32 +171,32 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
static
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
unsigned StackPtr, int64_t NumBytes,
- bool Is64Bit, bool IsLP64, bool UseLEA,
+ bool Is64BitTarget, bool Is64BitStackPtr, bool UseLEA,
const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) {
bool isSub = NumBytes < 0;
uint64_t Offset = isSub ? -NumBytes : NumBytes;
unsigned Opc;
if (UseLEA)
- Opc = getLEArOpcode(IsLP64);
+ Opc = getLEArOpcode(Is64BitStackPtr);
else
Opc = isSub
- ? getSUBriOpcode(IsLP64, Offset)
- : getADDriOpcode(IsLP64, Offset);
+ ? getSUBriOpcode(Is64BitStackPtr, Offset)
+ : getADDriOpcode(Is64BitStackPtr, Offset);
uint64_t Chunk = (1LL << 31) - 1;
DebugLoc DL = MBB.findDebugLoc(MBBI);
while (Offset) {
uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
- if (ThisVal == (Is64Bit ? 8 : 4)) {
+ if (ThisVal == (Is64BitTarget ? 8 : 4)) {
// Use push / pop instead.
unsigned Reg = isSub
- ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
- : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
+ ? (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX)
+ : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
if (Reg) {
Opc = isSub
- ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
- : (Is64Bit ? X86::POP64r : X86::POP32r);
+ ? (Is64BitTarget ? X86::PUSH64r : X86::PUSH32r)
+ : (Is64BitTarget ? X86::POP64r : X86::POP32r);
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
.addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
if (isSub)
@@ -314,7 +337,7 @@ X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
// Add callee saved registers to move list.
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
@@ -352,6 +375,23 @@ static bool usesTheStack(const MachineFunction &MF) {
return false;
}
+void X86FrameLowering::getStackProbeFunction(const X86Subtarget &STI,
+ unsigned &CallOp,
+ const char *&Symbol) {
+ CallOp = STI.is64Bit() ? X86::W64ALLOCA : X86::CALLpcrel32;
+
+ if (STI.is64Bit()) {
+ if (STI.isTargetCygMing()) {
+ Symbol = "___chkstk_ms";
+ } else {
+ Symbol = "__chkstk";
+ }
+ } else if (STI.isTargetCygMing())
+ Symbol = "_alloca";
+ else
+ Symbol = "_chkstk";
+}
+
/// emitPrologue - Push callee-saved registers onto the stack, which
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
@@ -418,6 +458,8 @@ static bool usesTheStack(const MachineFunction &MF) {
[if needs base pointer]
mov %rsp, %rbx
+ [if needs to restore base pointer]
+ mov %rsp, -MMM(%rbp)
; Emit CFI info
[if needs FP]
@@ -440,8 +482,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *Fn = MF.getFunction();
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
@@ -449,11 +491,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
bool HasFP = hasFP(MF);
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool Is64Bit = STI.is64Bit();
- bool IsLP64 = STI.isTarget64BitLP64();
+ // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
+ const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
bool IsWin64 = STI.isTargetWin64();
- bool IsWinEH =
- MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() ==
- ExceptionHandling::WinEH; // Not necessarily synonymous with IsWin64.
+ // Not necessarily synonymous with IsWin64.
+ bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry();
bool NeedsDwarfCFI =
!IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
@@ -461,6 +503,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned StackAlign = getStackAlignment();
unsigned SlotSize = RegInfo->getSlotSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ const unsigned MachineFramePtr = STI.isTarget64BitILP32() ?
+ getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr;
unsigned StackPtr = RegInfo->getStackRegister();
unsigned BasePtr = RegInfo->getBaseRegister();
DebugLoc DL;
@@ -482,6 +526,16 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
X86FI->setCalleeSavedFrameSize(
X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
+ bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO());
+
+ // The default stack probe size is 4096 if the function has no stackprobesize
+ // attribute.
+ unsigned StackProbeSize = 4096;
+ if (Fn->hasFnAttribute("stack-probe-size"))
+ Fn->getFnAttribute("stack-probe-size")
+ .getValueAsString()
+ .getAsInteger(0, StackProbeSize);
+
// If this is x86-64 and the Red Zone is not disabled, if we are a leaf
// function, and use up to 128 bytes of stack space, don't have a frame
// pointer, calls, or dynamic alloca then we do not need to adjust the
@@ -507,7 +561,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (TailCallReturnAddrDelta < 0) {
MachineInstr *MI =
BuildMI(MBB, MBBI, DL,
- TII.get(getSUBriOpcode(IsLP64, -TailCallReturnAddrDelta)),
+ TII.get(getSUBriOpcode(Uses64BitFramePtr, -TailCallReturnAddrDelta)),
StackPtr)
.addReg(StackPtr)
.addImm(-TailCallReturnAddrDelta)
@@ -535,6 +589,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (HasFP) {
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
+ // If required, include space for extra hidden slot for stashing base pointer.
+ if (X86FI->getRestoreBasePointer())
+ FrameSize += SlotSize;
if (RegInfo->needsStackRealignment(MF)) {
// Callee-saved registers are pushed on stack before the stack
// is realigned.
@@ -551,7 +608,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Save EBP/RBP into the appropriate stack slot.
BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
- .addReg(FramePtr, RegState::Kill)
+ .addReg(MachineFramePtr, RegState::Kill)
.setMIFlag(MachineInstr::FrameSetup);
if (NeedsDwarfCFI) {
@@ -564,7 +621,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.addCFIIndex(CFIIndex);
// Change the rule for the FramePtr to be an "offset" rule.
- unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true);
+ unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr,
DwarfFramePtr, 2 * stackGrowth));
@@ -580,14 +637,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Update EBP with the new base value.
BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
+ TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), FramePtr)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
if (NeedsDwarfCFI) {
// Mark effective beginning of when frame pointer becomes valid.
// Define the current CFA to use the EBP/RBP register.
- unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true);
+ unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -596,7 +653,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Mark the FramePtr as live-in in every block.
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
- I->addLiveIn(FramePtr);
+ I->addLiveIn(MachineFramePtr);
} else {
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
}
@@ -633,11 +690,12 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// able to calculate their offsets from the frame pointer).
if (RegInfo->needsStackRealignment(MF)) {
assert(HasFP && "There should be a frame pointer if stack is realigned.");
+ uint64_t Val = -MaxAlign;
MachineInstr *MI =
BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)
+ TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), StackPtr)
.addReg(StackPtr)
- .addImm(-MaxAlign)
+ .addImm(Val)
.setMIFlag(MachineInstr::FrameSetup);
// The EFLAGS implicit def is dead.
@@ -663,19 +721,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// responsible for adjusting the stack pointer. Touching the stack at 4K
// increments is necessary to ensure that the guard pages used by the OS
// virtual memory manager are allocated in correct sequence.
- if (NumBytes >= 4096 && STI.isOSWindows() && !STI.isTargetMacho()) {
+ if (NumBytes >= StackProbeSize && UseStackProbe) {
const char *StackProbeSymbol;
+ unsigned CallOp;
- if (Is64Bit) {
- if (STI.isTargetCygMing()) {
- StackProbeSymbol = "___chkstk_ms";
- } else {
- StackProbeSymbol = "__chkstk";
- }
- } else if (STI.isTargetCygMing())
- StackProbeSymbol = "_alloca";
- else
- StackProbeSymbol = "_chkstk";
+ getStackProbeFunction(STI, CallOp, StackProbeSymbol);
// Check whether EAX is livein for this function.
bool isEAXAlive = isEAXLiveIn(MF);
@@ -706,7 +756,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
}
BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32))
+ TII.get(CallOp))
.addExternalSymbol(StackProbeSymbol)
.addReg(StackPtr, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit)
@@ -722,15 +772,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.setMIFlag(MachineInstr::FrameSetup);
}
if (isEAXAlive) {
- // Restore EAX
- MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
- X86::EAX),
- StackPtr, false, NumBytes - 4);
- MI->setFlag(MachineInstr::FrameSetup);
- MBB.insert(MBBI, MI);
+ // Restore EAX
+ MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
+ X86::EAX),
+ StackPtr, false, NumBytes - 4);
+ MI->setFlag(MachineInstr::FrameSetup);
+ MBB.insert(MBBI, MI);
}
} else if (NumBytes) {
- emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64,
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, Uses64BitFramePtr,
UseLEA, TII, *RegInfo);
}
@@ -746,7 +796,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// will restore SP to (BP - SEHFrameOffset)
for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
int offset = MFI->getObjectOffset(Info.getFrameIdx());
- SEHFrameOffset = std::max(SEHFrameOffset, abs(offset));
+ SEHFrameOffset = std::max(SEHFrameOffset, std::abs(offset));
}
SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant
@@ -804,10 +854,18 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// to reference locals.
if (RegInfo->hasBasePointer(MF)) {
// Update the base pointer with the current stack pointer.
- unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr;
+ unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
+ if (X86FI->getRestoreBasePointer()) {
+ // Stash value of base pointer. Saving RSP instead of EBP shortens dependence chain.
+ unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
+ FramePtr, true, X86FI->getRestoreBasePointerOffset())
+ .addReg(StackPtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
}
if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
@@ -834,21 +892,28 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
const MachineFrameInfo *MFI = MF.getFrameInfo();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
assert(MBBI != MBB.end() && "Returning block has no instructions");
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc DL = MBBI->getDebugLoc();
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool Is64Bit = STI.is64Bit();
- bool IsLP64 = STI.isTarget64BitLP64();
+ // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
+ const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
+ const bool Is64BitILP32 = STI.isTarget64BitILP32();
bool UseLEA = STI.useLeaForSP();
unsigned StackAlign = getStackAlignment();
unsigned SlotSize = RegInfo->getSlotSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned MachineFramePtr = Is64BitILP32 ?
+ getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr;
unsigned StackPtr = RegInfo->getStackRegister();
+ bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+ bool NeedsWinEH = IsWinEH && MF.getFunction()->needsUnwindTableEntry();
+
switch (RetOpcode) {
default:
llvm_unreachable("Can only insert epilog into returning blocks");
@@ -898,7 +963,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// Pop EBP.
BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
+ TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr);
} else {
NumBytes = StackSize - CSSize;
}
@@ -930,27 +995,39 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (RegInfo->needsStackRealignment(MF))
MBBI = FirstCSPop;
if (CSSize != 0) {
- unsigned Opc = getLEArOpcode(IsLP64);
+ unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
FramePtr, false, -CSSize);
+ --MBBI;
} else {
- unsigned Opc = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
+ unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(FramePtr);
+ --MBBI;
}
} else if (NumBytes) {
// Adjust stack pointer back: ESP += numbytes.
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, IsLP64, UseLEA,
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, Uses64BitFramePtr, UseLEA,
TII, *RegInfo);
+ --MBBI;
}
+ // Windows unwinder will not invoke function's exception handler if IP is
+ // either in prologue or in epilogue. This behavior causes a problem when a
+ // call immediately precedes an epilogue, because the return address points
+ // into the epilogue. To cope with that, we insert an epilogue marker here,
+ // then replace it with a 'nop' if it ends up immediately after a CALL in the
+ // final emitted code.
+ if (NeedsWinEH)
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
+
// We're returning from function via eh_return.
if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
MBBI = MBB.getLastNonDebugInstr();
MachineOperand &DestAddr = MBBI->getOperand(0);
assert(DestAddr.isReg() && "Offset should be in register!");
BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+ TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
StackPtr).addReg(DestAddr.getReg());
} else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
RetOpcode == X86::TCRETURNmi ||
@@ -976,7 +1053,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (Offset) {
// Check for possible merge with preceding ADD instruction.
Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, IsLP64,
+ emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, Uses64BitFramePtr,
UseLEA, TII, *RegInfo);
}
@@ -1021,7 +1098,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// Check for possible merge with preceding ADD instruction.
delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, IsLP64, UseLEA, TII,
+ emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, Uses64BitFramePtr, UseLEA, TII,
*RegInfo);
}
}
@@ -1029,7 +1106,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
int FI) const {
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
const MachineFrameInfo *MFI = MF.getFrameInfo();
int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
uint64_t StackSize = MFI->getStackSize();
@@ -1072,7 +1149,7 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const {
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
// We can't calculate offset from frame pointer if the stack is realigned,
// so enforce usage of stack/base pointer. The base pointer is used when we
// have dynamic allocas in addition to dynamic realignment.
@@ -1085,12 +1162,85 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
return getFrameIndexOffset(MF, FI);
}
+// Simplified from getFrameIndexOffset keeping only StackPointer cases
+int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Does not include any dynamic realign.
+ const uint64_t StackSize = MFI->getStackSize();
+ {
+#ifndef NDEBUG
+ const X86RegisterInfo *RegInfo =
+ static_cast<const X86RegisterInfo*>(MF.getSubtarget().getRegisterInfo());
+ // Note: LLVM arranges the stack as:
+ // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP)
+ // > "Stack Slots" (<--SP)
+ // We can always address StackSlots from RSP. We can usually (unless
+ // needsStackRealignment) address CSRs from RSP, but sometimes need to
+ // address them from RBP. FixedObjects can be placed anywhere in the stack
+ // frame depending on their specific requirements (i.e. we can actually
+ // refer to arguments to the function which are stored in the *callers*
+ // frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs
+ // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject.
+
+ assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case");
+
+ // We don't handle tail calls, and shouldn't be seeing them
+ // either.
+ int TailCallReturnAddrDelta =
+ MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta();
+ assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!");
+#endif
+ }
+
+ // This is how the math works out:
+ //
+ // %rsp grows (i.e. gets lower) left to right. Each box below is
+ // one word (eight bytes). Obj0 is the stack slot we're trying to
+ // get to.
+ //
+ // ----------------------------------
+ // | BP | Obj0 | Obj1 | ... | ObjN |
+ // ----------------------------------
+ // ^ ^ ^ ^
+ // A B C E
+ //
+ // A is the incoming stack pointer.
+ // (B - A) is the local area offset (-8 for x86-64) [1]
+ // (C - A) is the Offset returned by MFI->getObjectOffset for Obj0 [2]
+ //
+ // |(E - B)| is the StackSize (absolute value, positive). For a
+ // stack that grown down, this works out to be (B - E). [3]
+ //
+ // E is also the value of %rsp after stack has been set up, and we
+ // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
+ // (C - E) == (C - A) - (B - A) + (B - E)
+ // { Using [1], [2] and [3] above }
+ // == getObjectOffset - LocalAreaOffset + StackSize
+ //
+
+ // Get the Offset from the StackPointer
+ int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
+
+ return Offset + StackSize;
+}
+// Simplified from getFrameIndexReference keeping only StackPointer cases
+int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const {
+ const X86RegisterInfo *RegInfo =
+ static_cast<const X86RegisterInfo*>(MF.getSubtarget().getRegisterInfo());
+
+ assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case");
+
+ FrameReg = RegInfo->getStackRegister();
+ return getFrameIndexOffsetFromSP(MF, FI);
+}
+
bool X86FrameLowering::assignCalleeSavedSpillSlots(
MachineFunction &MF, const TargetRegisterInfo *TRI,
std::vector<CalleeSavedInfo> &CSI) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
+ static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
unsigned SlotSize = RegInfo->getSlotSize();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
@@ -1107,7 +1257,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
// about avoiding it later.
unsigned FPReg = RegInfo->getFrameRegister(MF);
for (unsigned i = 0; i < CSI.size(); ++i) {
- if (CSI[i].getReg() == FPReg) {
+ if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
CSI.erase(CSI.begin() + i);
break;
}
@@ -1138,7 +1288,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
// ensure alignment
- SpillSlotOffset -= abs(SpillSlotOffset) % RC->getAlignment();
+ SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment();
// spill into slot
SpillSlotOffset -= RC->getSize();
int SlotIndex =
@@ -1157,7 +1307,7 @@ bool X86FrameLowering::spillCalleeSavedRegisters(
DebugLoc DL = MBB.findDebugLoc(MI);
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
// Push GPRs. It increases frame size.
@@ -1205,7 +1355,7 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
DebugLoc DL = MBB.findDebugLoc(MI);
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
// Reload XMMs from stack frame.
@@ -1237,7 +1387,7 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
+ static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
unsigned SlotSize = RegInfo->getSlotSize();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
@@ -1278,7 +1428,7 @@ HasNestArgument(const MachineFunction *MF) {
/// and the properties of the function either one or two registers will be
/// needed. Set primary to true for the first register, false for the second.
static unsigned
-GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
+GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) {
CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
// Erlang stuff.
@@ -1289,8 +1439,12 @@ GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
return Primary ? X86::EBX : X86::EDI;
}
- if (Is64Bit)
- return Primary ? X86::R11 : X86::R12;
+ if (Is64Bit) {
+ if (IsLP64)
+ return Primary ? X86::R11 : X86::R12;
+ else
+ return Primary ? X86::R11D : X86::R12D;
+ }
bool IsNested = HasNestArgument(&MF);
@@ -1314,21 +1468,23 @@ void
X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
MachineBasicBlock &prologueMBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
uint64_t StackSize;
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool Is64Bit = STI.is64Bit();
+ const bool IsLP64 = STI.isTarget64BitLP64();
unsigned TlsReg, TlsOffset;
DebugLoc DL;
- unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true);
+ unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
"Scratch register is live-in");
if (MF.getFunction()->isVarArg())
report_fatal_error("Segmented stacks do not support vararg functions.");
- if (!STI.isTargetLinux() && !STI.isTargetDarwin() &&
- !STI.isTargetWin32() && !STI.isTargetWin64() && !STI.isTargetFreeBSD())
+ if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
+ !STI.isTargetWin64() && !STI.isTargetFreeBSD() &&
+ !STI.isTargetDragonFly())
report_fatal_error("Segmented stacks not supported on this platform.");
// Eventually StackSize will be calculated by a link-time pass; which will
@@ -1359,7 +1515,7 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
}
if (IsNested)
- allocMBB->addLiveIn(X86::R10);
+ allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
MF.push_front(allocMBB);
MF.push_front(checkMBB);
@@ -1372,7 +1528,7 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
if (Is64Bit) {
if (STI.isTargetLinux()) {
TlsReg = X86::FS;
- TlsOffset = 0x70;
+ TlsOffset = IsLP64 ? 0x70 : 0x40;
} else if (STI.isTargetDarwin()) {
TlsReg = X86::GS;
TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
@@ -1382,17 +1538,20 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
} else if (STI.isTargetFreeBSD()) {
TlsReg = X86::FS;
TlsOffset = 0x18;
+ } else if (STI.isTargetDragonFly()) {
+ TlsReg = X86::FS;
+ TlsOffset = 0x20; // use tls_tcb.tcb_segstack
} else {
report_fatal_error("Segmented stacks not supported on this platform.");
}
if (CompareStackPointer)
- ScratchReg = X86::RSP;
+ ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
else
- BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
+ BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP)
.addImm(1).addReg(0).addImm(-StackSize).addReg(0);
- BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg)
+ BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg)
.addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
} else {
if (STI.isTargetLinux()) {
@@ -1404,6 +1563,9 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
} else if (STI.isTargetWin32()) {
TlsReg = X86::FS;
TlsOffset = 0x14; // pvArbitrary, reserved for application use
+ } else if (STI.isTargetDragonFly()) {
+ TlsReg = X86::FS;
+ TlsOffset = 0x10; // use tls_tcb.tcb_segstack
} else if (STI.isTargetFreeBSD()) {
report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
} else {
@@ -1416,7 +1578,8 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
.addImm(1).addReg(0).addImm(-StackSize).addReg(0);
- if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64()) {
+ if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() ||
+ STI.isTargetDragonFly()) {
BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
.addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
} else if (STI.isTargetDarwin()) {
@@ -1426,11 +1589,11 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
bool SaveScratch2;
if (CompareStackPointer) {
// The primary scratch register is available for holding the TLS offset.
- ScratchReg2 = GetScratchRegister(Is64Bit, MF, true);
+ ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
SaveScratch2 = false;
} else {
// Need to use a second register to hold the TLS offset
- ScratchReg2 = GetScratchRegister(Is64Bit, MF, false);
+ ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
// Unfortunately, with fastcc the second scratch register may hold an
// argument.
@@ -1460,7 +1623,7 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
// This jump is taken if SP >= (Stacklet Limit + Stack Space required).
// It jumps to normal execution of the function body.
- BuildMI(checkMBB, DL, TII.get(X86::JA_4)).addMBB(&prologueMBB);
+ BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&prologueMBB);
// On 32 bit we first push the arguments size and then the frame size. On 64
// bit, we pass the stack frame size in r10 and the argument size in r11.
@@ -1468,15 +1631,21 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
// Functions with nested arguments use R10, so it needs to be saved across
// the call to _morestack
+ const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
+ const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
+ const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
+ const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
+ const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri;
+
if (IsNested)
- BuildMI(allocMBB, DL, TII.get(X86::MOV64rr), X86::RAX).addReg(X86::R10);
+ BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
- BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R10)
+ BuildMI(allocMBB, DL, TII.get(MOVri), Reg10)
.addImm(StackSize);
- BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R11)
+ BuildMI(allocMBB, DL, TII.get(MOVri), Reg11)
.addImm(X86FI->getArgumentStackSize());
- MF.getRegInfo().setPhysRegUsed(X86::R10);
- MF.getRegInfo().setPhysRegUsed(X86::R11);
+ MF.getRegInfo().setPhysRegUsed(Reg10);
+ MF.getRegInfo().setPhysRegUsed(Reg11);
} else {
BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
.addImm(X86FI->getArgumentStackSize());
@@ -1485,12 +1654,36 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
}
// __morestack is in libgcc
- if (Is64Bit)
- BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
- .addExternalSymbol("__morestack");
- else
- BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol("__morestack");
+ if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
+ // Under the large code model, we cannot assume that __morestack lives
+ // within 2^31 bytes of the call site, so we cannot use pc-relative
+ // addressing. We cannot perform the call via a temporary register,
+ // as the rax register may be used to store the static chain, and all
+ // other suitable registers may be either callee-save or used for
+ // parameter passing. We cannot use the stack at this point either
+ // because __morestack manipulates the stack directly.
+ //
+ // To avoid these issues, perform an indirect call via a read-only memory
+ // location containing the address.
+ //
+ // This solution is not perfect, as it assumes that the .rodata section
+ // is laid out within 2^31 bytes of each function body, but this seems
+ // to be sufficient for JIT.
+ BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
+ .addReg(X86::RIP)
+ .addImm(0)
+ .addReg(0)
+ .addExternalSymbol("__morestack_addr")
+ .addReg(0);
+ MF.getMMI().setUsesMorestackAddr(true);
+ } else {
+ if (Is64Bit)
+ BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
+ .addExternalSymbol("__morestack");
+ else
+ BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
+ .addExternalSymbol("__morestack");
+ }
if (IsNested)
BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
@@ -1523,13 +1716,14 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
/// temp0 = sp - MaxStack
/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineFrameInfo *MFI = MF.getFrameInfo();
const unsigned SlotSize =
- static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo())
+ static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo())
->getSlotSize();
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
const bool Is64Bit = STI.is64Bit();
+ const bool IsLP64 = STI.isTarget64BitLP64();
DebugLoc DL;
// HiPE-specific values
const unsigned HipeLeafWords = 24;
@@ -1623,7 +1817,7 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
SPLimitOffset = 0x4c;
}
- ScratchReg = GetScratchRegister(Is64Bit, MF, true);
+ ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
"HiPE prologue scratch register is live-in");
@@ -1633,7 +1827,7 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
// SPLimitOffset is in a fixed heap location (pointed by BP).
addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
.addReg(ScratchReg), PReg, false, SPLimitOffset);
- BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_4)).addMBB(&prologueMBB);
+ BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&prologueMBB);
// Create new MBB for IncStack:
BuildMI(incStackMBB, DL, TII.get(CALLop)).
@@ -1642,7 +1836,7 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
SPReg, false, -MaxStack);
addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
.addReg(ScratchReg), PReg, false, SPLimitOffset);
- BuildMI(incStackMBB, DL, TII.get(X86::JLE_4)).addMBB(incStackMBB);
+ BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB);
stackCheckMBB->addSuccessor(&prologueMBB, 99);
stackCheckMBB->addSuccessor(incStackMBB, 1);
@@ -1654,40 +1848,141 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
#endif
}
+bool X86FrameLowering::
+convertArgMovsToPushes(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, uint64_t Amount) const {
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
+ unsigned StackPtr = RegInfo.getStackRegister();
+
+ // Scan the call setup sequence for the pattern we're looking for.
+ // We only handle a simple case now - a sequence of MOV32mi or MOV32mr
+ // instructions, that push a sequence of 32-bit values onto the stack, with
+ // no gaps.
+ std::map<int64_t, MachineBasicBlock::iterator> MovMap;
+ do {
+ int Opcode = I->getOpcode();
+ if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr)
+ break;
+
+ // We only want movs of the form:
+ // movl imm/r32, k(%ecx)
+ // If we run into something else, bail
+ // Note that AddrBaseReg may, counterintuitively, not be a register...
+ if (!I->getOperand(X86::AddrBaseReg).isReg() ||
+ (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
+ !I->getOperand(X86::AddrScaleAmt).isImm() ||
+ (I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
+ (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
+ (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
+ !I->getOperand(X86::AddrDisp).isImm())
+ return false;
+
+ int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
+
+ // We don't want to consider the unaligned case.
+ if (StackDisp % 4)
+ return false;
+
+ // If the same stack slot is being filled twice, something's fishy.
+ if (!MovMap.insert(std::pair<int64_t, MachineInstr*>(StackDisp, I)).second)
+ return false;
+
+ ++I;
+ } while (I != MBB.end());
+
+ // We now expect the end of the sequence - a call and a stack adjust.
+ if (I == MBB.end())
+ return false;
+ if (!I->isCall())
+ return false;
+ MachineBasicBlock::iterator Call = I;
+ if ((++I)->getOpcode() != TII.getCallFrameDestroyOpcode())
+ return false;
+
+ // Now, go through the map, and see that we don't have any gaps,
+ // but only a series of 32-bit MOVs.
+ // Since std::map provides ordered iteration, the original order
+ // of the MOVs doesn't matter.
+ int64_t ExpectedDist = 0;
+ for (auto MMI = MovMap.begin(), MME = MovMap.end(); MMI != MME;
+ ++MMI, ExpectedDist += 4)
+ if (MMI->first != ExpectedDist)
+ return false;
+
+ // Ok, everything looks fine. Do the transformation.
+ DebugLoc DL = I->getDebugLoc();
+
+ // It's possible the original stack adjustment amount was larger than
+ // that done by the pushes. If so, we still need a SUB.
+ Amount -= ExpectedDist;
+ if (Amount) {
+ MachineInstr* Sub = BuildMI(MBB, Call, DL,
+ TII.get(getSUBriOpcode(false, Amount)), StackPtr)
+ .addReg(StackPtr).addImm(Amount);
+ Sub->getOperand(3).setIsDead();
+ }
+
+ // Now, iterate through the map in reverse order, and replace the movs
+ // with pushes. MOVmi/MOVmr doesn't have any defs, so need to replace uses.
+ for (auto MMI = MovMap.rbegin(), MME = MovMap.rend(); MMI != MME; ++MMI) {
+ MachineBasicBlock::iterator MOV = MMI->second;
+ MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
+
+ // Replace MOVmr with PUSH32r, and MOVmi with PUSHi of appropriate size
+ int PushOpcode = X86::PUSH32r;
+ if (MOV->getOpcode() == X86::MOV32mi)
+ PushOpcode = getPUSHiOpcode(false, PushOp);
+
+ BuildMI(MBB, Call, DL, TII.get(PushOpcode)).addOperand(PushOp);
+ MBB.erase(MOV);
+ }
+
+ return true;
+}
+
void X86FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- const X86RegisterInfo &RegInfo =
- *static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
unsigned StackPtr = RegInfo.getStackRegister();
- bool reseveCallFrame = hasReservedCallFrame(MF);
+ bool reserveCallFrame = hasReservedCallFrame(MF);
int Opcode = I->getOpcode();
bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool IsLP64 = STI.isTarget64BitLP64();
DebugLoc DL = I->getDebugLoc();
- uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
+ uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
I = MBB.erase(I);
- if (!reseveCallFrame) {
+ if (!reserveCallFrame) {
// If the stack pointer can be changed after prologue, turn the
// adjcallstackup instruction into a 'sub ESP, <amt>' and the
// adjcallstackdown instruction into 'add ESP, <amt>'
- // TODO: consider using push / pop instead of sub + store / add
if (Amount == 0)
return;
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- unsigned StackAlign =
- MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned StackAlign = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
MachineInstr *New = nullptr;
if (Opcode == TII.getCallFrameSetupOpcode()) {
+ // Try to convert movs to the stack into pushes.
+ // We currently only look for a pattern that appears in 32-bit
+ // calling conventions.
+ if (!IsLP64 && convertArgMovsToPushes(MF, MBB, I, Amount))
+ return;
+
New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)),
StackPtr)
.addReg(StackPtr)
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 5ad3d4dc2e97..ee0ee227cad8 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86_FRAMELOWERING_H
-#define X86_FRAMELOWERING_H
+#ifndef LLVM_LIB_TARGET_X86_X86FRAMELOWERING_H
+#define LLVM_LIB_TARGET_X86_X86FRAMELOWERING_H
#include "llvm/Target/TargetFrameLowering.h"
@@ -20,12 +20,17 @@ namespace llvm {
class MCSymbol;
class X86TargetMachine;
+class X86Subtarget;
class X86FrameLowering : public TargetFrameLowering {
public:
explicit X86FrameLowering(StackDirection D, unsigned StackAl, int LAO)
: TargetFrameLowering(StackGrowsDown, StackAl, LAO) {}
+ static void getStackProbeFunction(const X86Subtarget &STI,
+ unsigned &CallOp,
+ const char *&Symbol);
+
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
DebugLoc DL) const;
@@ -64,9 +69,23 @@ public:
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const override;
+ int getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const;
+ int getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
+
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
+
+private:
+ /// convertArgMovsToPushes - This method tries to convert a call sequence
+ /// that uses sub and mov instructions to put the argument onto the stack
+ /// into a series of pushes.
+ /// Returns true if the transformation succeeded, false if not.
+ bool convertArgMovsToPushes(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ uint64_t Amount) const;
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index ba2f5f645d09..7fe8e8b344aa 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
@@ -33,6 +34,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include <stdint.h>
using namespace llvm;
#define DEBUG_TYPE "x86-isel"
@@ -192,7 +194,6 @@ namespace {
private:
SDNode *Select(SDNode *N) override;
SDNode *SelectGather(SDNode *N, unsigned Opc);
- SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT);
bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
@@ -237,10 +238,10 @@ namespace {
inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
- Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
- CurDAG->getTargetFrameIndex(AM.Base_FrameIndex,
- getTargetLowering()->getPointerTy()) :
- AM.Base_Reg;
+ Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+ ? CurDAG->getTargetFrameIndex(AM.Base_FrameIndex,
+ TLI->getPointerTy())
+ : AM.Base_Reg;
Scale = getI8Imm(AM.Scale);
Index = AM.IndexReg;
// These are 32-bit even in 64-bit mode since RIP relative offset
@@ -297,7 +298,14 @@ namespace {
/// getInstrInfo - Return a reference to the TargetInstrInfo, casted
/// to the target-specific type.
const X86InstrInfo *getInstrInfo() const {
- return getTargetMachine().getInstrInfo();
+ return getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ }
+
+ /// \brief Address-mode matching performs shift-of-and to and-of-shift
+ /// reassociation in order to expose more scaled addressing
+ /// opportunities.
+ bool ComplexPatternFuncMutatesDAG() const override {
+ return true;
}
};
}
@@ -510,7 +518,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
// If the source and destination are SSE registers, then this is a legal
// conversion that should not be lowered.
const X86TargetLowering *X86Lowering =
- static_cast<const X86TargetLowering *>(getTargetLowering());
+ static_cast<const X86TargetLowering *>(TLI);
bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
if (SrcIsSSE && DstIsSSE)
@@ -544,7 +552,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
false, false, 0);
SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
MachinePointerInfo(),
- MemVT, false, false, 0);
+ MemVT, false, false, false, 0);
// We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
// extload we created. This will cause general havok on the dag because
@@ -565,7 +573,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
/// the main function.
void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
MachineFrameInfo *MFI) {
- const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo();
if (Subtarget->isTargetCygMing()) {
unsigned CallOp =
Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
@@ -775,9 +783,10 @@ static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
}
}
-// Transform "(X >> (8-C1)) & C2" to "(X >> 8) & 0xff)" if safe. This
-// allows us to convert the shift and and into an h-register extract and
-// a scaled index. Returns false if the simplification is performed.
+// Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
+// safe. This allows us to convert the shift and and into an h-register
+// extract and a scaled index. Returns false if the simplification is
+// performed.
static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
uint64_t Mask,
SDValue Shift, SDValue X,
@@ -1429,7 +1438,7 @@ bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base,
RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base);
if (RN && RN->getReg() == 0)
Base = CurDAG->getRegister(0, MVT::i64);
- else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(N)) {
+ else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(Base)) {
// Base could already be %rip, particularly in the x32 ABI.
Base = SDValue(CurDAG->getMachineNode(
TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
@@ -1563,26 +1572,7 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
///
SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
- return CurDAG->getRegister(GlobalBaseReg,
- getTargetLowering()->getPointerTy()).getNode();
-}
-
-SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
- SDValue Chain = Node->getOperand(0);
- SDValue In1 = Node->getOperand(1);
- SDValue In2L = Node->getOperand(2);
- SDValue In2H = Node->getOperand(3);
-
- SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
- return nullptr;
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
- const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain};
- SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node),
- MVT::i32, MVT::i32, MVT::Other, Ops);
- cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
- return ResNode;
+ return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode();
}
/// Atomic opcode table
@@ -1716,16 +1706,23 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
SDLoc dl,
enum AtomicOpc &Op, MVT NVT,
- SDValue Val) {
+ SDValue Val,
+ const X86Subtarget *Subtarget) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) {
int64_t CNVal = CN->getSExtValue();
// Quit if not 32-bit imm.
if ((int32_t)CNVal != CNVal)
return Val;
+ // Quit if INT32_MIN: it would be negated as it is negative and overflow,
+ // producing an immediate that does not fit in the 32 bits available for
+ // an immediate operand to sub. However, it still fits in 32 bits for the
+ // add (since it is not negated) so we can return target-constant.
+ if (CNVal == INT32_MIN)
+ return CurDAG->getTargetConstant(CNVal, NVT);
// For atomic-load-add, we could do some optimizations.
if (Op == ADD) {
// Translate to INC/DEC if ADD by 1 or -1.
- if ((CNVal == 1) || (CNVal == -1)) {
+ if (((CNVal == 1) || (CNVal == -1)) && !Subtarget->slowIncDec()) {
Op = (CNVal == 1) ? INC : DEC;
// No more constant operand after being translated into INC/DEC.
return SDValue();
@@ -1774,8 +1771,8 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
SDValue Chain = Node->getOperand(0);
SDValue Ptr = Node->getOperand(1);
SDValue Val = Node->getOperand(2);
- SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ SDValue Base, Scale, Index, Disp, Segment;
+ if (!SelectAddr(Node, Ptr, Base, Scale, Index, Disp, Segment))
return nullptr;
// Which index into the table.
@@ -1797,7 +1794,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
break;
}
- Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val);
+ Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val, Subtarget);
bool isUnOp = !Val.getNode();
bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
@@ -1829,31 +1826,40 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
Opc = AtomicOpcTbl[Op][I32];
break;
case MVT::i64:
- Opc = AtomicOpcTbl[Op][I64];
if (isCN) {
if (immSext8(Val.getNode()))
Opc = AtomicOpcTbl[Op][SextConstantI64];
else if (i64immSExt32(Val.getNode()))
Opc = AtomicOpcTbl[Op][ConstantI64];
- }
+ else
+ llvm_unreachable("True 64 bits constant in SelectAtomicLoadArith");
+ } else
+ Opc = AtomicOpcTbl[Op][I64];
break;
}
assert(Opc != 0 && "Invalid arith lock transform!");
+ // Building the new node.
SDValue Ret;
- SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, NVT), 0);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
if (isUnOp) {
- SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
+ SDValue Ops[] = { Base, Scale, Index, Disp, Segment, Chain };
Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
} else {
- SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
+ SDValue Ops[] = { Base, Scale, Index, Disp, Segment, Val, Chain };
Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
}
+
+ // Copying the MachineMemOperand.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+
+ // We need to have two outputs as that is what the original instruction had.
+ // So we add a dummy, undefined output. This is safe as we checked first
+ // that no-one uses our output anyway.
+ SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, NVT), 0);
SDValue RetVals[] = { Undef, Ret };
return CurDAG->getMergeValues(RetVals, dl).getNode();
}
@@ -1885,8 +1891,8 @@ static bool HasNoSignedComparisonUses(SDNode *N) {
case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
- case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4:
- case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4:
+ case X86::JA_1: case X86::JAE_1: case X86::JB_1: case X86::JBE_1:
+ case X86::JE_1: case X86::JNE_1: case X86::JP_1: case X86::JNP_1:
case X86::CMOVA16rr: case X86::CMOVA16rm:
case X86::CMOVA32rr: case X86::CMOVA32rm:
case X86::CMOVA64rr: case X86::CMOVA64rm:
@@ -2125,6 +2131,16 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case X86ISD::GlobalBaseReg:
return getGlobalBaseReg();
+ case X86ISD::SHRUNKBLEND: {
+ // SHRUNKBLEND selects like a regular VSELECT.
+ SDValue VSelect = CurDAG->getNode(
+ ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
+ Node->getOperand(1), Node->getOperand(2));
+ ReplaceUses(SDValue(Node, 0), VSelect);
+ SelectCode(VSelect.getNode());
+ // We already called ReplaceUses.
+ return nullptr;
+ }
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_AND:
@@ -2212,6 +2228,25 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
getI8Imm(ShlVal));
}
+ case X86ISD::UMUL8:
+ case X86ISD::SMUL8: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r);
+
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL,
+ N0, SDValue()).getValue(1);
+
+ SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32);
+ SDValue Ops[] = {N1, InFlag};
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
+
+ ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
+ ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
+ return nullptr;
+ }
+
case X86ISD::UMUL: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
@@ -2387,11 +2422,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
case ISD::SDIVREM:
- case ISD::UDIVREM: {
+ case ISD::UDIVREM:
+ case X86ISD::SDIVREM8_SEXT_HREG:
+ case X86ISD::UDIVREM8_ZEXT_HREG: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
- bool isSigned = Opcode == ISD::SDIVREM;
+ bool isSigned = (Opcode == ISD::SDIVREM ||
+ Opcode == X86ISD::SDIVREM8_SEXT_HREG);
if (!isSigned) {
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
@@ -2466,7 +2504,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
} else {
// Zero out the high part, effectively zero extending the input.
- SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
+ SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
switch (NVT.SimpleTy) {
case MVT::i16:
ClrNode =
@@ -2507,33 +2545,43 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
}
- // Prevent use of AH in a REX instruction by referencing AX instead.
- // Shift it down 8 bits.
+ // Prevent use of AH in a REX instruction by explicitly copying it to
+ // an ABCD_L register.
//
// The current assumption of the register allocator is that isel
- // won't generate explicit references to the GPR8_NOREX registers. If
+ // won't generate explicit references to the GR8_ABCD_H registers. If
// the allocator and/or the backend get enhanced to be more robust in
// that regard, this can be, and should be, removed.
- if (HiReg == X86::AH && Subtarget->is64Bit() &&
- !SDValue(Node, 1).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::AX, MVT::i16, InFlag);
- InFlag = Result.getValue(2);
-
- // If we also need AL (the quotient), get it by extracting a subreg from
- // Result. The fast register allocator does not like multiple CopyFromReg
- // nodes using aliasing registers.
- if (!SDValue(Node, 0).use_empty())
- ReplaceUses(SDValue(Node, 0),
- CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
-
- // Shift AX right by 8 bits instead of using AH.
- Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
- Result,
- CurDAG->getTargetConstant(8, MVT::i8)),
- 0);
- ReplaceUses(SDValue(Node, 1),
- CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+ if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
+ SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
+ unsigned AHExtOpcode =
+ isSigned ? X86::MOVSX32_NOREXrr8 : X86::MOVZX32_NOREXrr8;
+
+ SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
+ MVT::Glue, AHCopy, InFlag);
+ SDValue Result(RNode, 0);
+ InFlag = SDValue(RNode, 1);
+
+ if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG ||
+ Opcode == X86ISD::SDIVREM8_SEXT_HREG) {
+ if (Node->getValueType(1) == MVT::i64) {
+ // It's not possible to directly movsx AH to a 64bit register, because
+ // the latter needs the REX prefix, but the former can't have it.
+ assert(Opcode != X86ISD::SDIVREM8_SEXT_HREG &&
+ "Unexpected i64 sext of h-register");
+ Result =
+ SDValue(CurDAG->getMachineNode(
+ TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
+ CurDAG->getTargetConstant(0, MVT::i64), Result,
+ CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
+ 0);
+ }
+ } else {
+ Result =
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
+ }
+ ReplaceUses(SDValue(Node, 1), Result);
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
// Copy the division (low) result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
@@ -2563,12 +2611,30 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
- // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
- // use a smaller encoding.
if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
- HasNoSignedComparisonUses(Node))
- // Look past the truncate if CMP is the only use of it.
+ HasNoSignedComparisonUses(Node)) {
+ // Look for (X86cmp (truncate $op, i1), 0) and try to convert to a
+ // smaller encoding
+ if (Opcode == X86ISD::CMP && N0.getValueType() == MVT::i1 &&
+ X86::isZeroNode(N1)) {
+ SDValue Reg = N0.getOperand(0);
+ SDValue Imm = CurDAG->getTargetConstant(1, MVT::i8);
+
+ // Emit testb
+ if (Reg.getScalarValueSizeInBits() > 8)
+ Reg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Reg);
+ // Emit a testb.
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
+ Reg, Imm);
+ ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
+ return nullptr;
+ }
+
N0 = N0.getOperand(0);
+ }
+ // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
+ // use a smaller encoding.
+ // Look past the truncate if CMP is the only use of it.
if ((N0.getNode()->getOpcode() == ISD::AND ||
(N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) &&
N0.getNode()->hasOneUse() &&
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 3a2cb6be12d2..a1fd34ea8000 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -19,6 +19,7 @@
#include "X86MachineFunctionInfo.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
@@ -49,6 +50,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
+#include "X86IntrinsicsInfo.h"
#include <bitset>
#include <numeric>
#include <cctype>
@@ -65,10 +67,23 @@ static cl::opt<bool> ExperimentalVectorWideningLegalization(
cl::Hidden);
static cl::opt<bool> ExperimentalVectorShuffleLowering(
- "x86-experimental-vector-shuffle-lowering", cl::init(false),
+ "x86-experimental-vector-shuffle-lowering", cl::init(true),
cl::desc("Enable an experimental vector shuffle lowering code path."),
cl::Hidden);
+static cl::opt<bool> ExperimentalVectorShuffleLegality(
+ "x86-experimental-vector-shuffle-legality", cl::init(false),
+ cl::desc("Enable experimental shuffle legality based on the experimental "
+ "shuffle lowering. Should only be used with the experimental "
+ "shuffle lowering."),
+ cl::Hidden);
+
+static cl::opt<int> ReciprocalEstimateRefinementSteps(
+ "x86-recip-refinement-steps", cl::init(1),
+ cl::desc("Specify the number of Newton-Raphson iterations applied to the "
+ "result of the hardware reciprocal estimate instruction."),
+ cl::NotHidden);
+
// Forward declarations.
static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
SDValue V2);
@@ -99,21 +114,18 @@ static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
// If the input is a buildvector just emit a smaller one.
if (Vec.getOpcode() == ISD::BUILD_VECTOR)
return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
- makeArrayRef(Vec->op_begin()+NormalizedIdxVal,
+ makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
ElemsPerChunk));
SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
- SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
- VecIdx);
-
- return Result;
-
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
}
+
/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
/// instructions or a simple subregister reference. Idx is an index in the
-/// 128 bits we want. It need not be aligned to a 128-bit bounday. That makes
+/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
/// lowering EXTRACT_VECTOR_ELT operations easier.
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG, SDLoc dl) {
@@ -150,25 +162,23 @@ static SDValue InsertSubVector(SDValue Result, SDValue Vec,
* ElemsPerChunk);
SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
- VecIdx);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
}
+
/// Generate a DAG to put 128-bits into a vector > 128 bits. This
/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
/// simple superregister reference. Idx is an index in the 128 bits
-/// we want. It need not be aligned to a 128-bit bounday. That makes
+/// we want. It need not be aligned to a 128-bit boundary. That makes
/// lowering INSERT_VECTOR_ELT operations easier.
-static SDValue Insert128BitVector(SDValue Result, SDValue Vec,
- unsigned IdxVal, SelectionDAG &DAG,
- SDLoc dl) {
+static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG,SDLoc dl) {
assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
}
-static SDValue Insert256BitVector(SDValue Result, SDValue Vec,
- unsigned IdxVal, SelectionDAG &DAG,
- SDLoc dl) {
+static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl) {
assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
}
@@ -191,28 +201,10 @@ static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
}
-static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
- if (TT.isOSBinFormatMachO()) {
- if (TT.getArch() == Triple::x86_64)
- return new X86_64MachoTargetObjectFile();
- return new TargetLoweringObjectFileMachO();
- }
-
- if (TT.isOSLinux())
- return new X86LinuxTargetObjectFile();
- if (TT.isOSBinFormatELF())
- return new TargetLoweringObjectFileELF();
- if (TT.isKnownWindowsMSVCEnvironment())
- return new X86WindowsTargetObjectFile();
- if (TT.isOSBinFormatCOFF())
- return new TargetLoweringObjectFileCOFF();
- llvm_unreachable("unknown subtarget type");
-}
-
// FIXME: This should stop caching the target machine as soon as
// we can remove resetOperationActions et al.
-X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
- : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
+X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM)
+ : TargetLowering(TM) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
@@ -240,13 +232,13 @@ void X86TargetLowering::resetOperationActions() {
// Set up the TargetLowering object.
static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
- // X86 is weird, it always uses i8 for shift amounts and setcc results.
+ // X86 is weird. It always uses i8 for shift amounts and setcc results.
setBooleanContents(ZeroOrOneBooleanContent);
// X86-SSE is even stranger. It uses -1 or 0 for vector masks.
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
- // For 64-bit since we have so many registers use the ILP scheduler, for
- // 32-bit code use the register pressure specific scheduling.
+ // For 64-bit, since we have so many registers, use the ILP scheduler.
+ // For 32-bit, use the register pressure specific scheduling.
// For Atom, always use ILP scheduling.
if (Subtarget->isAtom())
setSchedulingPreference(Sched::ILP);
@@ -255,13 +247,14 @@ void X86TargetLowering::resetOperationActions() {
else
setSchedulingPreference(Sched::RegPressure);
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
+ TM.getSubtarget<X86Subtarget>().getRegisterInfo();
setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
- // Bypass expensive divides on Atom when compiling with O2
- if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
- addBypassSlowDiv(32, 8);
- if (Subtarget->is64Bit())
+ // Bypass expensive divides on Atom when compiling with O2.
+ if (TM.getOptLevel() >= CodeGenOpt::Default) {
+ if (Subtarget->hasSlowDivide32())
+ addBypassSlowDiv(32, 8);
+ if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
addBypassSlowDiv(64, 16);
}
@@ -306,7 +299,8 @@ void X86TargetLowering::resetOperationActions() {
if (Subtarget->is64Bit())
addRegisterClass(MVT::i64, &X86::GR64RegClass);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ for (MVT VT : MVT::integer_valuetypes())
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
// We don't accept any truncstore of integer registers.
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
@@ -316,6 +310,8 @@ void X86TargetLowering::resetOperationActions() {
setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
// SETOEQ and SETUNE require checking two conditions.
setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
@@ -529,7 +525,9 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f80, MVT::f16, Expand);
@@ -659,8 +657,7 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
- MVT::i64 : MVT::i32, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(), Custom);
if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
// f32 and f64 use SSE.
@@ -808,13 +805,13 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::FLOG10, MVT::f80, Expand);
setOperationAction(ISD::FEXP, MVT::f80, Expand);
setOperationAction(ISD::FEXP2, MVT::f80, Expand);
+ setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
+ setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
// First set operation action for all vector types to either promote
// (for widening) or expand (for scalarization). Then we will selectively
// turn on ones that can be effectively codegen'd.
- for (int i = MVT::FIRST_VECTOR_VALUETYPE;
- i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
+ for (MVT VT : MVT::vector_valuetypes()) {
setOperationAction(ISD::ADD , VT, Expand);
setOperationAction(ISD::SUB , VT, Expand);
setOperationAction(ISD::FADD, VT, Expand);
@@ -883,18 +880,19 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::ANY_EXTEND, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
- for (int InnerVT = MVT::FIRST_VECTOR_VALUETYPE;
- InnerVT <= MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
- setTruncStoreAction(VT,
- (MVT::SimpleValueType)InnerVT, Expand);
- setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
+ for (MVT InnerVT : MVT::vector_valuetypes()) {
+ setTruncStoreAction(InnerVT, VT, Expand);
+
+ setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
- // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like types,
- // we have to deal with them whether we ask for Expansion or not. Setting
- // Expand causes its own optimisation problems though, so leave them legal.
- if (VT.getVectorElementType() == MVT::i1)
- setLoadExtAction(ISD::EXTLOAD, VT, Expand);
+ // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
+ // types, we have to deal with them whether we ask for Expansion or not.
+ // Setting Expand causes its own optimisation problems though, so leave
+ // them legal.
+ if (VT.getVectorElementType() == MVT::i1)
+ setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
+ }
}
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
@@ -951,12 +949,13 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
}
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
- // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
+ // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
// registers cannot be used even for integer operations.
addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
@@ -997,6 +996,14 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+ // Only provide customized ctpop vector bit twiddling for vector types we
+ // know to perform better than using the popcnt instructions on each vector
+ // element. If popcnt isn't supported, always provide the custom version.
+ if (!Subtarget->hasPOPCNT()) {
+ setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
+ }
+
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
MVT VT = (MVT::SimpleValueType)i;
@@ -1011,6 +1018,22 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
+ // We support custom legalizing of sext and anyext loads for specific
+ // memory vector types which we can load as a scalar (or sequence of
+ // scalars) and extend in-register to a legal 128-bit vector type. For sext
+ // loads these must work with a single scalar load.
+ for (MVT VT : MVT::integer_vector_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
+ }
+
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
@@ -1043,8 +1066,6 @@ void X86TargetLowering::resetOperationActions() {
AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
}
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-
// Custom lower v2i64 and v2f64 selects.
setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
@@ -1064,7 +1085,8 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
- setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal);
+ for (MVT VT : MVT::fp_vector_valuetypes())
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
@@ -1106,7 +1128,15 @@ void X86TargetLowering::resetOperationActions() {
// some vselects for now.
setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
- // i8 and i16 vectors are custom , because the source register and source
+ // SSE41 brings specific instructions for doing vector sign extend even in
+ // cases where we don't have SRA.
+ for (MVT VT : MVT::integer_vector_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
+ }
+
+ // i8 and i16 vectors are custom because the source register and source
// source memory operand types are not the same width. f32 vectors are
// custom since the immediate controlling the insert encodes additional
// information.
@@ -1120,7 +1150,7 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
- // FIXME: these should be Legal but thats only for the case where
+ // FIXME: these should be Legal, but that's only for the case where
// the index is constant. For now custom expand to deal with that.
if (Subtarget->is64Bit()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
@@ -1200,7 +1230,8 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, Legal);
+ for (MVT VT : MVT::fp_vector_valuetypes())
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
setOperationAction(ISD::SRL, MVT::v16i16, Custom);
setOperationAction(ISD::SRL, MVT::v32i8, Custom);
@@ -1270,6 +1301,20 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::VSELECT, MVT::v16i16, Custom);
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
+
+ // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
+ // when we have a 256bit-wide blend with immediate.
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
+
+ // Only provide customized ctpop vector bit twiddling for vector types we
+ // know to perform better than using the popcnt instructions on each
+ // vector element. If popcnt isn't supported, always provide the custom
+ // version.
+ if (!Subtarget->hasPOPCNT())
+ setOperationAction(ISD::CTPOP, MVT::v4i64, Custom);
+
+ // Custom CTPOP always performs better on natively supported v8i32
+ setOperationAction(ISD::CTPOP, MVT::v8i32, Custom);
} else {
setOperationAction(ISD::ADD, MVT::v4i64, Custom);
setOperationAction(ISD::ADD, MVT::v8i32, Custom);
@@ -1298,15 +1343,16 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::SRA, MVT::v8i32, Custom);
// Custom lower several nodes for 256-bit types.
- for (int i = MVT::FIRST_VECTOR_VALUETYPE;
- i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
-
+ for (MVT VT : MVT::vector_valuetypes()) {
+ if (VT.getScalarSizeInBits() >= 32) {
+ setOperationAction(ISD::MLOAD, VT, Legal);
+ setOperationAction(ISD::MSTORE, VT, Legal);
+ }
// Extract subvector is special because the value type
// (result) is 128-bit but the source is 256-bit wide.
- if (VT.is128BitVector())
+ if (VT.is128BitVector()) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
-
+ }
// Do not attempt to custom lower other non-256-bit vectors
if (!VT.is256BitVector())
continue;
@@ -1351,12 +1397,14 @@ void X86TargetLowering::resetOperationActions() {
addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
+ for (MVT VT : MVT::fp_vector_valuetypes())
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
+
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
setOperationAction(ISD::SETCC, MVT::i1, Custom);
setOperationAction(ISD::XOR, MVT::i1, Legal);
setOperationAction(ISD::OR, MVT::i1, Legal);
setOperationAction(ISD::AND, MVT::i1, Legal);
- setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, Legal);
setOperationAction(ISD::LOAD, MVT::v16f32, Legal);
setOperationAction(ISD::LOAD, MVT::v8f64, Legal);
setOperationAction(ISD::LOAD, MVT::v8i64, Legal);
@@ -1394,6 +1442,10 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
@@ -1466,16 +1518,13 @@ void X86TargetLowering::resetOperationActions() {
}
// Custom lower several nodes.
- for (int i = MVT::FIRST_VECTOR_VALUETYPE;
- i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
-
+ for (MVT VT : MVT::vector_valuetypes()) {
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
// Extract subvector is special because the value type
// (result) is 256/128-bit but the source is 512-bit wide.
- if (VT.is128BitVector() || VT.is256BitVector())
+ if (VT.is128BitVector() || VT.is256BitVector()) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
-
+ }
if (VT.getVectorElementType() == MVT::i1)
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
@@ -1491,12 +1540,14 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::MLOAD, VT, Legal);
+ setOperationAction(ISD::MSTORE, VT, Legal);
}
}
for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
MVT VT = (MVT::SimpleValueType)i;
- // Do not attempt to promote non-256-bit vectors
+ // Do not attempt to promote non-512-bit vectors.
if (!VT.is512BitVector())
continue;
@@ -1505,13 +1556,59 @@ void X86TargetLowering::resetOperationActions() {
}
}// has AVX-512
+ if (!TM.Options.UseSoftFloat && Subtarget->hasBWI()) {
+ addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
+ addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
+
+ addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
+ addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
+
+ setOperationAction(ISD::LOAD, MVT::v32i16, Legal);
+ setOperationAction(ISD::LOAD, MVT::v64i8, Legal);
+ setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
+ setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
+ setOperationAction(ISD::ADD, MVT::v32i16, Legal);
+ setOperationAction(ISD::ADD, MVT::v64i8, Legal);
+ setOperationAction(ISD::SUB, MVT::v32i16, Legal);
+ setOperationAction(ISD::SUB, MVT::v64i8, Legal);
+ setOperationAction(ISD::MUL, MVT::v32i16, Legal);
+
+ for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
+ const MVT VT = (MVT::SimpleValueType)i;
+
+ const unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+
+ // Do not attempt to promote non-512-bit vectors.
+ if (!VT.is512BitVector())
+ continue;
+
+ if (EltSize < 32) {
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Legal);
+ }
+ }
+ }
+
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVLX()) {
+ addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
+ addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
+
+ setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
+ setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Legal);
+
+ setOperationAction(ISD::AND, MVT::v8i32, Legal);
+ setOperationAction(ISD::OR, MVT::v8i32, Legal);
+ setOperationAction(ISD::XOR, MVT::v8i32, Legal);
+ setOperationAction(ISD::AND, MVT::v4i32, Legal);
+ setOperationAction(ISD::OR, MVT::v4i32, Legal);
+ setOperationAction(ISD::XOR, MVT::v4i32, Legal);
+ }
+
// SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
// of this type with custom code.
- for (int VT = MVT::FIRST_VECTOR_VALUETYPE;
- VT != MVT::LAST_VECTOR_VALUETYPE; VT++) {
- setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,
- Custom);
- }
+ for (MVT VT : MVT::vector_valuetypes())
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -1537,9 +1634,6 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::UMULO, VT, Custom);
}
- // There are no 8-bit 3-address imul/mul instructions
- setOperationAction(ISD::SMULO, MVT::i8, Expand);
- setOperationAction(ISD::UMULO, MVT::i8, Expand);
if (!Subtarget->is64Bit()) {
// These libcalls are not available in 32-bit.
@@ -1553,9 +1647,8 @@ void X86TargetLowering::resetOperationActions() {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
if (Subtarget->isTargetDarwin()) {
- // For MacOSX, we don't want to the normal expansion of a libcall to
- // sincos. We want to issue a libcall to __sincos_stret to avoid memory
- // traffic.
+ // For MacOSX, we don't want the normal expansion of a libcall to sincos.
+ // We want to issue a libcall to __sincos_stret to avoid memory traffic.
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
}
@@ -1614,8 +1707,15 @@ void X86TargetLowering::resetOperationActions() {
// Predictable cmov don't hurt on atom because it's in-order.
PredictableSelectIsExpensive = !Subtarget->isAtom();
-
+ EnableExtLdPromotion = true;
setPrefFunctionAlignment(4); // 2^4 bytes.
+
+ verifyIntrinsicTables();
+}
+
+// This has so far only been implemented for 64-bit MachO.
+bool X86TargetLowering::useLoadStackGuardNode() const {
+ return Subtarget->isTargetMachO() && Subtarget->is64Bit();
}
TargetLoweringBase::LegalizeTypeAction
@@ -1632,16 +1732,46 @@ EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector())
return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
- if (Subtarget->hasAVX512())
- switch(VT.getVectorNumElements()) {
- case 8: return MVT::v8i1;
- case 16: return MVT::v16i1;
+ const unsigned NumElts = VT.getVectorNumElements();
+ const EVT EltVT = VT.getVectorElementType();
+ if (VT.is512BitVector()) {
+ if (Subtarget->hasAVX512())
+ if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
+ EltVT == MVT::f32 || EltVT == MVT::f64)
+ switch(NumElts) {
+ case 8: return MVT::v8i1;
+ case 16: return MVT::v16i1;
+ }
+ if (Subtarget->hasBWI())
+ if (EltVT == MVT::i8 || EltVT == MVT::i16)
+ switch(NumElts) {
+ case 32: return MVT::v32i1;
+ case 64: return MVT::v64i1;
+ }
+ }
+
+ if (VT.is256BitVector() || VT.is128BitVector()) {
+ if (Subtarget->hasVLX())
+ if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
+ EltVT == MVT::f32 || EltVT == MVT::f64)
+ switch(NumElts) {
+ case 2: return MVT::v2i1;
+ case 4: return MVT::v4i1;
+ case 8: return MVT::v8i1;
+ }
+ if (Subtarget->hasBWI() && Subtarget->hasVLX())
+ if (EltVT == MVT::i8 || EltVT == MVT::i16)
+ switch(NumElts) {
+ case 8: return MVT::v8i1;
+ case 16: return MVT::v16i1;
+ case 32: return MVT::v32i1;
+ }
}
return VT.changeVectorElementTypeToInteger();
}
-/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
if (MaxAlign == 16)
@@ -1666,7 +1796,7 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
}
}
-/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. For X86, aggregates
/// that contain SSE vectors are placed at 16-byte boundaries while the rest
/// are at 4-byte boundaries.
@@ -1685,7 +1815,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
return Align;
}
-/// getOptimalMemOpType - Returns the target specific optimal type for load
+/// Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
@@ -1742,15 +1872,16 @@ bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
}
bool
-X86TargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
- unsigned,
- bool *Fast) const {
+X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned,
+ unsigned,
+ bool *Fast) const {
if (Fast)
*Fast = Subtarget->isUnalignedMemAccessFast();
return true;
}
-/// getJumpTableEncoding - Return the entry encoding for a jump table in the
+/// Return the entry encoding for a jump table in the
/// current function. The returned value is a member of the
/// MachineJumpTableInfo::JTEntryKind enum.
unsigned X86TargetLowering::getJumpTableEncoding() const {
@@ -1776,8 +1907,7 @@ X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
MCSymbolRefExpr::VK_GOTOFF, Ctx);
}
-/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
-/// jumptable.
+/// Returns relocation base for the given PIC jumptable.
SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
if (!Subtarget->is64Bit())
@@ -1787,9 +1917,8 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
return Table;
}
-/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
-/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
-/// MCExpr.
+/// This returns the relocation base for the given PIC jumptable,
+/// the same as getPICJumpTableRelocBase, but as an MCExpr.
const MCExpr *X86TargetLowering::
getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
MCContext &Ctx) const {
@@ -1810,9 +1939,7 @@ X86TargetLowering::findRepresentativeClass(MVT VT) const{
default:
return TargetLowering::findRepresentativeClass(VT);
case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
- RRC = Subtarget->is64Bit() ?
- (const TargetRegisterClass*)&X86::GR64RegClass :
- (const TargetRegisterClass*)&X86::GR32RegClass;
+ RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
break;
case MVT::x86mmx:
RRC = &X86::VR64RegClass;
@@ -1867,8 +1994,7 @@ X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, MF.getTarget(),
- RVLocs, Context);
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_X86);
}
@@ -1887,8 +2013,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, DAG.getTarget(),
- RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
SDValue Flag;
@@ -1905,7 +2030,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
SDValue ValToCopy = OutVals[i];
EVT ValVT = ValToCopy.getValueType();
- // Promote values to the appropriate types
+ // Promote values to the appropriate types.
if (VA.getLocInfo() == CCValAssign::SExt)
ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
else if (VA.getLocInfo() == CCValAssign::ZExt)
@@ -1916,7 +2041,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy);
assert(VA.getLocInfo() != CCValAssign::FPExt &&
- "Unexpected FP-extend for return value.");
+ "Unexpected FP-extend for return value.");
// If this is x86-64, and we disabled SSE, we can't return FP values,
// or SSE or MMX vectors.
@@ -1934,8 +2059,8 @@ X86TargetLowering::LowerReturn(SDValue Chain,
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
// the RET instruction and handled by the FP Stackifier.
- if (VA.getLocReg() == X86::ST0 ||
- VA.getLocReg() == X86::ST1) {
+ if (VA.getLocReg() == X86::FP0 ||
+ VA.getLocReg() == X86::FP1) {
// If this is a copy from an xmm register to ST(0), use an FPExtend to
// change the value to the FP stack register class.
if (isScalarFPTypeInSSEReg(VA.getValVT()))
@@ -2021,6 +2146,13 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
UI != UE; ++UI) {
if (UI->getOpcode() != X86ISD::RET_FLAG)
return false;
+ // If we are returning more than one value, we can definitely
+ // not make a tail call see PR19530
+ if (UI->getNumOperands() > 4)
+ return false;
+ if (UI->getNumOperands() == 4 &&
+ UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
+ return false;
HasRet = true;
}
@@ -2031,8 +2163,8 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
return true;
}
-MVT
-X86TargetLowering::getTypeForExtArgOrReturn(MVT VT,
+EVT
+X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const {
MVT ReturnMVT;
// TODO: Is this also valid on 32-bit?
@@ -2041,11 +2173,11 @@ X86TargetLowering::getTypeForExtArgOrReturn(MVT VT,
else
ReturnMVT = MVT::i32;
- MVT MinVT = getRegisterType(ReturnMVT);
+ EVT MinVT = getRegisterType(Context, ReturnMVT);
return VT.bitsLT(MinVT) ? MinVT : VT;
}
-/// LowerCallResult - Lower the result values of a call into the
+/// Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
///
SDValue
@@ -2058,8 +2190,8 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
bool Is64Bit = Subtarget->is64Bit();
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
// Copy all of the result registers out of their specified physreg.
@@ -2073,33 +2205,21 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
report_fatal_error("SSE register return with SSE disabled");
}
- SDValue Val;
-
- // If this is a call to a function that returns an fp value on the floating
- // point stack, we must guarantee the value is popped from the stack, so
- // a CopyFromReg is not good enough - the copy instruction may be eliminated
- // if the return value is not used. We use the FpPOP_RETVAL instruction
- // instead.
- if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) {
- // If we prefer to use the value in xmm registers, copy it out as f80 and
- // use a truncate to move it from fp stack reg to xmm reg.
- if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
- SDValue Ops[] = { Chain, InFlag };
- Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT,
- MVT::Other, MVT::Glue, Ops), 1);
- Val = Chain.getValue(0);
-
- // Round the f80 to the right size, which also moves it to the appropriate
- // xmm register.
- if (CopyVT != VA.getValVT())
- Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
- // This truncation won't change the value.
- DAG.getIntPtrConstant(1));
- } else {
- Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
- CopyVT, InFlag).getValue(1);
- Val = Chain.getValue(0);
- }
+ // If we prefer to use the value in xmm registers, copy it out as f80 and
+ // use a truncate to move it from fp stack reg to xmm reg.
+ if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
+ isScalarFPTypeInSSEReg(VA.getValVT()))
+ CopyVT = MVT::f80;
+
+ Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+ CopyVT, InFlag).getValue(1);
+ SDValue Val = Chain.getValue(0);
+
+ if (CopyVT != VA.getValVT())
+ Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
+ // This truncation won't change the value.
+ DAG.getIntPtrConstant(1));
+
InFlag = Chain.getValue(2);
InVals.push_back(Val);
}
@@ -2137,8 +2257,7 @@ callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
return StackStructReturn;
}
-/// ArgsAreStructReturn - Determines whether a function uses struct
-/// return semantics.
+/// Determines whether a function uses struct return semantics.
static StructReturnType
argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
if (Ins.empty())
@@ -2152,10 +2271,9 @@ argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
return StackStructReturn;
}
-/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
-/// by "Src" to address "Dst" with size and alignment information specified by
-/// the specific parameter attribute. The copy will be passed as a byval
-/// function parameter.
+/// Make a copy of an aggregate at address specified by "Src" to address
+/// "Dst" with size and alignment information specified by the specific
+/// parameter attribute. The copy will be passed as a byval function parameter.
static SDValue
CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
@@ -2167,7 +2285,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
MachinePointerInfo(), MachinePointerInfo());
}
-/// IsTailCallConvention - Return true if the calling convention is one that
+/// Return true if the calling convention is one that
/// supports tail call optimization.
static bool IsTailCallConvention(CallingConv::ID CC) {
return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
@@ -2192,7 +2310,7 @@ bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
return true;
}
-/// FuncIsMadeTailCallSafe - Return true if the function is being made into
+/// Return true if the function is being made into
/// a tailcall target by changing its ABI.
static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
bool GuaranteedTailCallOpt) {
@@ -2240,6 +2358,55 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
}
}
+// FIXME: Get this from tablegen.
+static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
+ const X86Subtarget *Subtarget) {
+ assert(Subtarget->is64Bit());
+
+ if (Subtarget->isCallingConvWin64(CallConv)) {
+ static const MCPhysReg GPR64ArgRegsWin64[] = {
+ X86::RCX, X86::RDX, X86::R8, X86::R9
+ };
+ return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
+ }
+
+ static const MCPhysReg GPR64ArgRegs64Bit[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+ };
+ return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
+}
+
+// FIXME: Get this from tablegen.
+static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
+ CallingConv::ID CallConv,
+ const X86Subtarget *Subtarget) {
+ assert(Subtarget->is64Bit());
+ if (Subtarget->isCallingConvWin64(CallConv)) {
+ // The XMM registers which might contain var arg parameters are shadowed
+ // in their paired GPR. So we only need to save the GPR to their home
+ // slots.
+ // TODO: __vectorcall will change this.
+ return None;
+ }
+
+ const Function *Fn = MF.getFunction();
+ bool NoImplicitFloatOps = Fn->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
+ assert(!(MF.getTarget().Options.UseSoftFloat && NoImplicitFloatOps) &&
+ "SSE register cannot be used when SSE is disabled!");
+ if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
+ !Subtarget->hasSSE1())
+ // Kernel mode asks for SSE to be disabled, so there are no XMM argument
+ // registers.
+ return None;
+
+ static const MCPhysReg XMMArgRegs64Bit[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
+}
+
SDValue
X86TargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv,
@@ -2267,8 +2434,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, MF, DAG.getTarget(),
- ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
if (IsWin64)
@@ -2312,6 +2478,10 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
RC = &X86::VK8RegClass;
else if (RegVT == MVT::v16i1)
RC = &X86::VK16RegClass;
+ else if (RegVT == MVT::v32i1)
+ RC = &X86::VK32RegClass;
+ else if (RegVT == MVT::v64i1)
+ RC = &X86::VK64RegClass;
else
llvm_unreachable("Unknown argument type!");
@@ -2378,125 +2548,146 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
// If the function takes variable number of arguments, make a frame index for
- // the start of the first vararg value... for expansion of llvm.va_start.
- if (isVarArg) {
- if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
- CallConv != CallingConv::X86_ThisCall)) {
- FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
+ // the start of the first vararg value... for expansion of llvm.va_start. We
+ // can skip this if there are no va_start calls.
+ if (MFI->hasVAStart() &&
+ (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
+ CallConv != CallingConv::X86_ThisCall))) {
+ FuncInfo->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(1, StackSize, true));
+ }
+
+ // Figure out if XMM registers are in use.
+ assert(!(MF.getTarget().Options.UseSoftFloat &&
+ Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoImplicitFloat)) &&
+ "SSE register cannot be used when SSE is disabled!");
+
+ // 64-bit calling conventions support varargs and register parameters, so we
+ // have to do extra work to spill them in the prologue.
+ if (Is64Bit && isVarArg && MFI->hasVAStart()) {
+ // Find the first unallocated argument registers.
+ ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
+ ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
+ unsigned NumIntRegs =
+ CCInfo.getFirstUnallocated(ArgGPRs.data(), ArgGPRs.size());
+ unsigned NumXMMRegs =
+ CCInfo.getFirstUnallocated(ArgXMMs.data(), ArgXMMs.size());
+ assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
+ "SSE register cannot be used when SSE is disabled!");
+
+ // Gather all the live in physical registers.
+ SmallVector<SDValue, 6> LiveGPRs;
+ SmallVector<SDValue, 8> LiveXMMRegs;
+ SDValue ALVal;
+ for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
+ unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
+ LiveGPRs.push_back(
+ DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
+ }
+ if (!ArgXMMs.empty()) {
+ unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
+ ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
+ for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
+ unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
+ LiveXMMRegs.push_back(
+ DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
+ }
}
- if (Is64Bit) {
- unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
-
- // FIXME: We should really autogenerate these arrays
- static const MCPhysReg GPR64ArgRegsWin64[] = {
- X86::RCX, X86::RDX, X86::R8, X86::R9
- };
- static const MCPhysReg GPR64ArgRegs64Bit[] = {
- X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
- };
- static const MCPhysReg XMMArgRegs64Bit[] = {
- X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
- X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
- };
- const MCPhysReg *GPR64ArgRegs;
- unsigned NumXMMRegs = 0;
-
- if (IsWin64) {
- // The XMM registers which might contain var arg parameters are shadowed
- // in their paired GPR. So we only need to save the GPR to their home
- // slots.
- TotalNumIntRegs = 4;
- GPR64ArgRegs = GPR64ArgRegsWin64;
- } else {
- TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
- GPR64ArgRegs = GPR64ArgRegs64Bit;
- NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit,
- TotalNumXMMRegs);
- }
- unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
- TotalNumIntRegs);
-
- bool NoImplicitFloatOps = Fn->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
- assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
- "SSE register cannot be used when SSE is disabled!");
- assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
- NoImplicitFloatOps) &&
- "SSE register cannot be used when SSE is disabled!");
- if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
- !Subtarget->hasSSE1())
- // Kernel mode asks for SSE to be disabled, so don't push them
- // on the stack.
- TotalNumXMMRegs = 0;
-
- if (IsWin64) {
- const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
- // Get to the caller-allocated home save location. Add 8 to account
- // for the return address.
- int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
- FuncInfo->setRegSaveFrameIndex(
+ if (IsWin64) {
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+ // Get to the caller-allocated home save location. Add 8 to account
+ // for the return address.
+ int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
+ FuncInfo->setRegSaveFrameIndex(
MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
- // Fixup to set vararg frame on shadow area (4 x i64).
- if (NumIntRegs < 4)
- FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
- } else {
- // For X86-64, if there are vararg parameters that are passed via
- // registers, then we must store them to their spots on the stack so
- // they may be loaded by deferencing the result of va_next.
- FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
- FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
- FuncInfo->setRegSaveFrameIndex(
- MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
- false));
- }
-
- // Store the integer parameter registers.
- SmallVector<SDValue, 8> MemOps;
- SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
- getPointerTy());
- unsigned Offset = FuncInfo->getVarArgsGPOffset();
- for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
- SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
- DAG.getIntPtrConstant(Offset));
- unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
- &X86::GR64RegClass);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
- SDValue Store =
- DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo::getFixedStack(
- FuncInfo->getRegSaveFrameIndex(), Offset),
- false, false, 0);
- MemOps.push_back(Store);
- Offset += 8;
- }
-
- if (TotalNumXMMRegs != 0 && NumXMMRegs != TotalNumXMMRegs) {
- // Now store the XMM (fp + vector) parameter registers.
- SmallVector<SDValue, 11> SaveXMMOps;
- SaveXMMOps.push_back(Chain);
-
- unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
- SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
- SaveXMMOps.push_back(ALVal);
-
- SaveXMMOps.push_back(DAG.getIntPtrConstant(
- FuncInfo->getRegSaveFrameIndex()));
- SaveXMMOps.push_back(DAG.getIntPtrConstant(
- FuncInfo->getVarArgsFPOffset()));
-
- for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
- unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
- &X86::VR128RegClass);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
- SaveXMMOps.push_back(Val);
- }
- MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
- MVT::Other, SaveXMMOps));
- }
-
- if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+ // Fixup to set vararg frame on shadow area (4 x i64).
+ if (NumIntRegs < 4)
+ FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
+ } else {
+ // For X86-64, if there are vararg parameters that are passed via
+ // registers, then we must store them to their spots on the stack so
+ // they may be loaded by deferencing the result of va_next.
+ FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
+ FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
+ FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
+ ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
+ }
+
+ // Store the integer parameter registers.
+ SmallVector<SDValue, 8> MemOps;
+ SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+ getPointerTy());
+ unsigned Offset = FuncInfo->getVarArgsGPOffset();
+ for (SDValue Val : LiveGPRs) {
+ SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
+ DAG.getIntPtrConstant(Offset));
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo::getFixedStack(
+ FuncInfo->getRegSaveFrameIndex(), Offset),
+ false, false, 0);
+ MemOps.push_back(Store);
+ Offset += 8;
+ }
+
+ if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
+ // Now store the XMM (fp + vector) parameter registers.
+ SmallVector<SDValue, 12> SaveXMMOps;
+ SaveXMMOps.push_back(Chain);
+ SaveXMMOps.push_back(ALVal);
+ SaveXMMOps.push_back(DAG.getIntPtrConstant(
+ FuncInfo->getRegSaveFrameIndex()));
+ SaveXMMOps.push_back(DAG.getIntPtrConstant(
+ FuncInfo->getVarArgsFPOffset()));
+ SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
+ LiveXMMRegs.end());
+ MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
+ MVT::Other, SaveXMMOps));
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+ }
+
+ if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
+ // Find the largest legal vector type.
+ MVT VecVT = MVT::Other;
+ // FIXME: Only some x86_32 calling conventions support AVX512.
+ if (Subtarget->hasAVX512() &&
+ (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
+ CallConv == CallingConv::Intel_OCL_BI)))
+ VecVT = MVT::v16f32;
+ else if (Subtarget->hasAVX())
+ VecVT = MVT::v8f32;
+ else if (Subtarget->hasSSE2())
+ VecVT = MVT::v4f32;
+
+ // We forward some GPRs and some vector types.
+ SmallVector<MVT, 2> RegParmTypes;
+ MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
+ RegParmTypes.push_back(IntVT);
+ if (VecVT != MVT::Other)
+ RegParmTypes.push_back(VecVT);
+
+ // Compute the set of forwarded registers. The rest are scratch.
+ SmallVectorImpl<ForwardedRegister> &Forwards =
+ FuncInfo->getForwardedMustTailRegParms();
+ CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
+
+ // Conservatively forward AL on x86_64, since it might be used for varargs.
+ if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
+ unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
+ Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
+ }
+
+ // Copy all forwards from physical to virtual registers.
+ for (ForwardedRegister &F : Forwards) {
+ // FIXME: Can we use a less constrained schedule?
+ SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
+ F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
+ Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
}
}
@@ -2544,7 +2735,7 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
false, false, 0);
}
-/// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
+/// Emit a load of return address if tail call
/// optimization is performed and it is required.
SDValue
X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
@@ -2561,7 +2752,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
return SDValue(OutRetAddr.getNode(), 1);
}
-/// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call
+/// Emit a store of the return address if tail call
/// optimization is performed and it is required (FPDiff!=0).
static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
SDValue Chain, SDValue RetAddrFrIdx,
@@ -2599,6 +2790,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
StructReturnType SR = callIsStructReturn(Outs);
bool IsSibcall = false;
+ X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
if (MF.getTarget().Options.DisableTailCalls)
isTailCall = false;
@@ -2630,8 +2822,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, MF, MF.getTarget(),
- ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
if (IsWin64)
@@ -2652,7 +2843,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
int FPDiff = 0;
if (isTailCall && !IsSibcall && !IsMustTail) {
// Lower arguments at fp - stackoffset + fpdiff.
- X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
FPDiff = NumBytesCallerPushed - NumBytes;
@@ -2671,8 +2861,12 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// arguments passed in memory when using inalloca.
if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
NumBytesToPush = 0;
- assert(ArgLocs.back().getLocMemOffset() == 0 &&
- "an inalloca argument must be the only memory argument");
+ if (!ArgLocs.back().isMemLoc())
+ report_fatal_error("cannot use inalloca attribute on a register "
+ "parameter");
+ if (ArgLocs.back().getLocMemOffset() != 0)
+ report_fatal_error("any parameter with the inalloca attribute must be "
+ "the only memory argument");
}
if (!IsSibcall)
@@ -2691,8 +2885,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Walk the register/memloc assignments, inserting copies/loads. In the case
// of tail call optimization arguments are handle later.
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
// Skip inalloca arguments, they have already been written.
ISD::ArgFlagsTy Flags = Outs[i].Flags;
@@ -2791,7 +2985,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
}
- if (Is64Bit && isVarArg && !IsWin64) {
+ if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
@@ -2813,6 +3007,14 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(NumXMMRegs, MVT::i8)));
}
+ if (isVarArg && IsMustTail) {
+ const auto &Forwards = X86Info->getForwardedMustTailRegParms();
+ for (const auto &F : Forwards) {
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
+ RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
+ }
+ }
+
// For tail calls lower the arguments to the 'real' stack slots. Sibcalls
// don't need this because the eligibility check rejects calls that require
// shuffling arguments passed in memory.
@@ -2889,10 +3091,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// through a register, since the call instruction's 32-bit
// pc-relative offset may not be large enough to hold the whole
// address.
- } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ } else if (Callee->getOpcode() == ISD::GlobalAddress) {
// If the callee is a GlobalAddress node (quite common, every direct call
// is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
// it.
+ GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
// We should use extra load for direct calls to dllimported functions in
// non-JIT mode.
@@ -2962,6 +3165,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
OpFlags);
+ } else if (Subtarget->isTarget64BitILP32() && Callee->getValueType(0) == MVT::i32) {
+ // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
+ Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
}
// Returns a chain & a flag for retval copy to use.
@@ -2988,7 +3194,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -3079,9 +3285,9 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
SelectionDAG& DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const TargetMachine &TM = MF.getTarget();
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
- const TargetFrameLowering &TFI = *TM.getFrameLowering();
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
+ const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering();
unsigned StackAlignment = TFI.getStackAlignment();
uint64_t AlignMask = StackAlignment - 1;
int64_t Offset = StackSize;
@@ -3194,8 +3400,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
// emit a special epilogue.
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
if (RegInfo->needsStackRealignment(MF))
return false;
@@ -3223,8 +3429,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
return false;
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
@@ -3244,12 +3450,12 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
if (Unused) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
- if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+ if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
return false;
}
}
@@ -3258,13 +3464,13 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// results are returned in the same way as what the caller expects.
if (!CCMatch) {
SmallVector<CCValAssign, 16> RVLocs1;
- CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs1, *DAG.getContext());
+ CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
+ *DAG.getContext());
CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
SmallVector<CCValAssign, 16> RVLocs2;
- CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs2, *DAG.getContext());
+ CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
+ *DAG.getContext());
CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
if (RVLocs1.size() != RVLocs2.size())
@@ -3290,8 +3496,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// Check if stack adjustment is needed. For now, do not do this if any
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
// Allocate shadow area for Win64
if (IsCalleeWin64)
@@ -3308,7 +3514,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const X86InstrInfo *TII =
- static_cast<const X86InstrInfo *>(DAG.getTarget().getInstrInfo());
+ static_cast<const X86InstrInfo *>(DAG.getSubtarget().getInstrInfo());
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[i];
@@ -3336,7 +3542,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// In PIC we need an extra register to formulate the address computation
// for the callee.
unsigned MaxInRegs =
- (DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
+ (DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -3378,6 +3584,8 @@ static bool MayFoldIntoStore(SDValue Op) {
static bool isTargetShuffle(unsigned Opcode) {
switch(Opcode) {
default: return false;
+ case X86ISD::BLENDI:
+ case X86ISD::PSHUFB:
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
@@ -3395,7 +3603,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::MOVSD:
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
- case X86ISD::VPERMILP:
+ case X86ISD::VPERMILPI:
case X86ISD::VPERM2X128:
case X86ISD::VPERMI:
return true;
@@ -3421,7 +3629,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
- case X86ISD::VPERMILP:
+ case X86ISD::VPERMILPI:
case X86ISD::VPERMI:
return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
}
@@ -3433,6 +3641,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::PALIGNR:
+ case X86ISD::VALIGN:
case X86ISD::SHUFP:
case X86ISD::VPERM2X128:
return DAG.getNode(Opc, dl, VT, V1, V2,
@@ -3459,8 +3668,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
int ReturnAddrIndex = FuncInfo->getRAIndex();
@@ -3500,7 +3709,7 @@ bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
// For kernel code model we know that all object resist in the negative half
// of 32bits address space. We may not accept negative offsets, since they may
// be just off and we may accept pretty large positive ones.
- if (M == CodeModel::Kernel && Offset > 0)
+ if (M == CodeModel::Kernel && Offset >= 0)
return true;
return false;
@@ -3510,23 +3719,18 @@ bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
/// own arguments. Callee pop is necessary to support tail calls.
bool X86::isCalleePop(CallingConv::ID CallingConv,
bool is64Bit, bool IsVarArg, bool TailCallOpt) {
- if (IsVarArg)
- return false;
-
switch (CallingConv) {
default:
return false;
case CallingConv::X86_StdCall:
- return !is64Bit;
case CallingConv::X86_FastCall:
- return !is64Bit;
case CallingConv::X86_ThisCall:
return !is64Bit;
case CallingConv::Fast:
- return TailCallOpt;
case CallingConv::GHC:
- return TailCallOpt;
case CallingConv::HiPE:
+ if (IsVarArg)
+ return false;
return TailCallOpt;
}
}
@@ -3667,6 +3871,18 @@ bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
return false;
}
+bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
+ ISD::LoadExtType ExtTy,
+ EVT NewVT) const {
+ // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
+ // relocation target a movq or addq instruction: don't let the load shrink.
+ SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
+ if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
+ if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
+ return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
+ return true;
+}
+
/// \brief Returns true if it is beneficial to convert a load of a constant
/// to just the constant itself.
bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
@@ -3679,6 +3895,24 @@ bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return true;
}
+bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT,
+ unsigned Index) const {
+ if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
+ return false;
+
+ return (Index == 0 || Index == ResVT.getVectorNumElements());
+}
+
+bool X86TargetLowering::isCheapToSpeculateCttz() const {
+ // Speculate cttz only if we can directly use TZCNT.
+ return Subtarget->hasBMI();
+}
+
+bool X86TargetLowering::isCheapToSpeculateCtlz() const {
+ // Speculate ctlz only if we can directly use LZCNT.
+ return Subtarget->hasLZCNT();
+}
+
/// isUndefOrInRange - Return true if Val is undef or if its value falls within
/// the specified range (L, H].
static bool isUndefOrInRange(int Val, int Low, int Hi) {
@@ -3693,7 +3927,7 @@ static bool isUndefOrEqual(int Val, int CmpVal) {
/// isSequentialOrUndefInRange - Return true if every element in Mask, beginning
/// from position Pos and ending in Pos+Size, falls within the specified
-/// sequential range (L, L+Pos]. or is undef.
+/// sequential range (Low, Low+Size]. or is undef.
static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
unsigned Pos, unsigned Size, int Low) {
for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
@@ -3703,14 +3937,23 @@ static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
}
/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
-/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
-/// the second operand.
-static bool isPSHUFDMask(ArrayRef<int> Mask, MVT VT) {
- if (VT == MVT::v4f32 || VT == MVT::v4i32 )
- return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
- if (VT == MVT::v2f64 || VT == MVT::v2i64)
- return (Mask[0] < 2 && Mask[1] < 2);
- return false;
+/// is suitable for input to PSHUFD. That is, it doesn't reference the other
+/// operand - by default will match for first operand.
+static bool isPSHUFDMask(ArrayRef<int> Mask, MVT VT,
+ bool TestSecondOperand = false) {
+ if (VT != MVT::v4f32 && VT != MVT::v4i32 &&
+ VT != MVT::v2f64 && VT != MVT::v2i64)
+ return false;
+
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned Lo = TestSecondOperand ? NumElems : 0;
+ unsigned Hi = Lo + NumElems;
+
+ for (unsigned i = 0; i < NumElems; ++i)
+ if (!isUndefOrInRange(Mask[i], (int)Lo, (int)Hi))
+ return false;
+
+ return true;
}
/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
@@ -3771,16 +4014,12 @@ static bool isPSHUFLWMask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
return true;
}
-/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that
-/// is suitable for input to PALIGNR.
-static bool isPALIGNRMask(ArrayRef<int> Mask, MVT VT,
- const X86Subtarget *Subtarget) {
- if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) ||
- (VT.is256BitVector() && !Subtarget->hasInt256()))
- return false;
-
+/// \brief Return true if the mask specifies a shuffle of elements that is
+/// suitable for input to intralane (palignr) or interlane (valign) vector
+/// right-shift.
+static bool isAlignrMask(ArrayRef<int> Mask, MVT VT, bool InterLane) {
unsigned NumElts = VT.getVectorNumElements();
- unsigned NumLanes = VT.is512BitVector() ? 1: VT.getSizeInBits()/128;
+ unsigned NumLanes = InterLane ? 1: VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
// Do not handle 64-bit element shuffles with palignr.
@@ -3844,6 +4083,29 @@ static bool isPALIGNRMask(ArrayRef<int> Mask, MVT VT,
return true;
}
+/// \brief Return true if the node specifies a shuffle of elements that is
+/// suitable for input to PALIGNR.
+static bool isPALIGNRMask(ArrayRef<int> Mask, MVT VT,
+ const X86Subtarget *Subtarget) {
+ if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) ||
+ (VT.is256BitVector() && !Subtarget->hasInt256()) ||
+ VT.is512BitVector())
+ // FIXME: Add AVX512BW.
+ return false;
+
+ return isAlignrMask(Mask, VT, false);
+}
+
+/// \brief Return true if the node specifies a shuffle of elements that is
+/// suitable for input to VALIGN.
+static bool isVALIGNMask(ArrayRef<int> Mask, MVT VT,
+ const X86Subtarget *Subtarget) {
+ // FIXME: Add AVX512VL.
+ if (!VT.is512BitVector() || !Subtarget->hasAVX512())
+ return false;
+ return isAlignrMask(Mask, VT, true);
+}
+
/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
/// the two vector operands have swapped position.
static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
@@ -4086,43 +4348,34 @@ static bool isUNPCKLMask(ArrayRef<int> Mask, MVT VT,
assert(VT.getSizeInBits() >= 128 &&
"Unsupported vector type for unpckl");
- // AVX defines UNPCK* to operate independently on 128-bit lanes.
- unsigned NumLanes;
- unsigned NumOf256BitLanes;
unsigned NumElts = VT.getVectorNumElements();
- if (VT.is256BitVector()) {
- if (NumElts != 4 && NumElts != 8 &&
- (!HasInt256 || (NumElts != 16 && NumElts != 32)))
+ if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
- NumLanes = 2;
- NumOf256BitLanes = 1;
- } else if (VT.is512BitVector()) {
- assert(VT.getScalarType().getSizeInBits() >= 32 &&
- "Unsupported vector type for unpckh");
- NumLanes = 2;
- NumOf256BitLanes = 2;
- } else {
- NumLanes = 1;
- NumOf256BitLanes = 1;
- }
- unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
- unsigned NumLaneElts = NumEltsInStride/NumLanes;
+ assert((!VT.is512BitVector() || VT.getScalarType().getSizeInBits() >= 32) &&
+ "Unsupported vector type for unpckh");
- for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
- for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
- for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
- int BitI = Mask[l256*NumEltsInStride+l+i];
- int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
- if (!isUndefOrEqual(BitI, j+l256*NumElts))
- return false;
- if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
+ // AVX defines UNPCK* to operate independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
+
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
+ int BitI = Mask[l+i];
+ int BitI1 = Mask[l+i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (V2IsSplat) {
+ if (!isUndefOrEqual(BitI1, NumElts))
return false;
- if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
+ } else {
+ if (!isUndefOrEqual(BitI1, j + NumElts))
return false;
}
}
}
+
return true;
}
@@ -4133,39 +4386,29 @@ static bool isUNPCKHMask(ArrayRef<int> Mask, MVT VT,
assert(VT.getSizeInBits() >= 128 &&
"Unsupported vector type for unpckh");
- // AVX defines UNPCK* to operate independently on 128-bit lanes.
- unsigned NumLanes;
- unsigned NumOf256BitLanes;
unsigned NumElts = VT.getVectorNumElements();
- if (VT.is256BitVector()) {
- if (NumElts != 4 && NumElts != 8 &&
- (!HasInt256 || (NumElts != 16 && NumElts != 32)))
+ if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
- NumLanes = 2;
- NumOf256BitLanes = 1;
- } else if (VT.is512BitVector()) {
- assert(VT.getScalarType().getSizeInBits() >= 32 &&
- "Unsupported vector type for unpckh");
- NumLanes = 2;
- NumOf256BitLanes = 2;
- } else {
- NumLanes = 1;
- NumOf256BitLanes = 1;
- }
- unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
- unsigned NumLaneElts = NumEltsInStride/NumLanes;
+ assert((!VT.is512BitVector() || VT.getScalarType().getSizeInBits() >= 32) &&
+ "Unsupported vector type for unpckh");
- for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
- for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
- for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
- int BitI = Mask[l256*NumEltsInStride+l+i];
- int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
- if (!isUndefOrEqual(BitI, j+l256*NumElts))
- return false;
- if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
+ // AVX defines UNPCK* to operate independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
+
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
+ int BitI = Mask[l+i];
+ int BitI1 = Mask[l+i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (V2IsSplat) {
+ if (isUndefOrEqual(BitI1, NumElts))
return false;
- if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
+ } else {
+ if (!isUndefOrEqual(BitI1, j+NumElts))
return false;
}
}
@@ -4668,11 +4911,13 @@ static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
return Mask;
}
-/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
-static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
+/// \brief Return the appropriate immediate to shuffle the specified
+/// VECTOR_SHUFFLE mask with the PALIGNR (if InterLane is false) or with
+/// VALIGN (if Interlane is true) instructions.
+static unsigned getShuffleAlignrImmediate(ShuffleVectorSDNode *SVOp,
+ bool InterLane) {
MVT VT = SVOp->getSimpleValueType(0);
- unsigned EltSize = VT.is512BitVector() ? 1 :
+ unsigned EltSize = InterLane ? 1 :
VT.getVectorElementType().getSizeInBits() >> 3;
unsigned NumElts = VT.getVectorNumElements();
@@ -4693,6 +4938,19 @@ static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
return (Val - i) * EltSize;
}
+/// \brief Return the appropriate immediate to shuffle the specified
+/// VECTOR_SHUFFLE mask with the PALIGNR instruction.
+static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
+ return getShuffleAlignrImmediate(SVOp, false);
+}
+
+/// \brief Return the appropriate immediate to shuffle the specified
+/// VECTOR_SHUFFLE mask with the VALIGN instruction.
+static unsigned getShuffleVALIGNImmediate(ShuffleVectorSDNode *SVOp) {
+ return getShuffleAlignrImmediate(SVOp, true);
+}
+
+
static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
@@ -4891,32 +5149,32 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
SDValue Vec;
if (VT.is128BitVector()) { // SSE
if (Subtarget->hasSSE2()) { // SSE2
- SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Cst = DAG.getConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
} else { // SSE1
- SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+ SDValue Cst = DAG.getConstantFP(+0.0, MVT::f32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
}
} else if (VT.is256BitVector()) { // AVX
if (Subtarget->hasInt256()) { // AVX2
- SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Cst = DAG.getConstant(0, MVT::i32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
} else {
// 256-bit logic and arithmetic instructions in AVX are all
// floating-point, no support for integer ops. Emit fp zeroed vectors.
- SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+ SDValue Cst = DAG.getConstantFP(+0.0, MVT::f32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops);
}
} else if (VT.is512BitVector()) { // AVX-512
- SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Cst = DAG.getConstant(0, MVT::i32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
} else if (VT.getScalarType() == MVT::i1) {
assert(VT.getVectorNumElements() <= 16 && "Unexpected vector type");
- SDValue Cst = DAG.getTargetConstant(0, MVT::i1);
+ SDValue Cst = DAG.getConstant(0, MVT::i1);
SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
} else
@@ -4933,7 +5191,7 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
SDLoc dl) {
assert(VT.isVector() && "Expected a vector type");
- SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
+ SDValue Cst = DAG.getConstant(~0U, MVT::i32);
SDValue Vec;
if (VT.is256BitVector()) {
if (HasInt256) { // AVX2
@@ -5103,37 +5361,49 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
}
/// getTargetShuffleMask - Calculates the shuffle mask corresponding to the
-/// target specific opcode. Returns true if the Mask could be calculated.
-/// Sets IsUnary to true if only uses one source.
+/// target specific opcode. Returns true if the Mask could be calculated. Sets
+/// IsUnary to true if only uses one source. Note that this will set IsUnary for
+/// shuffles which use a single input multiple times, and in those cases it will
+/// adjust the mask to only have indices within that single input.
static bool getTargetShuffleMask(SDNode *N, MVT VT,
SmallVectorImpl<int> &Mask, bool &IsUnary) {
unsigned NumElems = VT.getVectorNumElements();
SDValue ImmN;
IsUnary = false;
+ bool IsFakeUnary = false;
switch(N->getOpcode()) {
+ case X86ISD::BLENDI:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeBLENDMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ break;
case X86ISD::SHUFP:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::UNPCKH:
DecodeUNPCKHMask(VT, Mask);
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::UNPCKL:
DecodeUNPCKLMask(VT, Mask);
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::MOVHLPS:
DecodeMOVHLPSMask(NumElems, Mask);
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::MOVLHPS:
DecodeMOVLHPSMask(NumElems, Mask);
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::PALIGNR:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
break;
case X86ISD::PSHUFD:
- case X86ISD::VPERMILP:
+ case X86ISD::VPERMILPI:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
@@ -5148,6 +5418,66 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
+ case X86ISD::PSHUFB: {
+ IsUnary = true;
+ SDValue MaskNode = N->getOperand(1);
+ while (MaskNode->getOpcode() == ISD::BITCAST)
+ MaskNode = MaskNode->getOperand(0);
+
+ if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
+ // If we have a build-vector, then things are easy.
+ EVT VT = MaskNode.getValueType();
+ assert(VT.isVector() &&
+ "Can't produce a non-vector with a build_vector!");
+ if (!VT.isInteger())
+ return false;
+
+ int NumBytesPerElement = VT.getVectorElementType().getSizeInBits() / 8;
+
+ SmallVector<uint64_t, 32> RawMask;
+ for (int i = 0, e = MaskNode->getNumOperands(); i < e; ++i) {
+ SDValue Op = MaskNode->getOperand(i);
+ if (Op->getOpcode() == ISD::UNDEF) {
+ RawMask.push_back((uint64_t)SM_SentinelUndef);
+ continue;
+ }
+ auto *CN = dyn_cast<ConstantSDNode>(Op.getNode());
+ if (!CN)
+ return false;
+ APInt MaskElement = CN->getAPIntValue();
+
+ // We now have to decode the element which could be any integer size and
+ // extract each byte of it.
+ for (int j = 0; j < NumBytesPerElement; ++j) {
+ // Note that this is x86 and so always little endian: the low byte is
+ // the first byte of the mask.
+ RawMask.push_back(MaskElement.getLoBits(8).getZExtValue());
+ MaskElement = MaskElement.lshr(8);
+ }
+ }
+ DecodePSHUFBMask(RawMask, Mask);
+ break;
+ }
+
+ auto *MaskLoad = dyn_cast<LoadSDNode>(MaskNode);
+ if (!MaskLoad)
+ return false;
+
+ SDValue Ptr = MaskLoad->getBasePtr();
+ if (Ptr->getOpcode() == X86ISD::Wrapper)
+ Ptr = Ptr->getOperand(0);
+
+ auto *MaskCP = dyn_cast<ConstantPoolSDNode>(Ptr);
+ if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
+ return false;
+
+ if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
+ DecodePSHUFBMask(C, Mask);
+ break;
+ }
+
+ return false;
+ }
case X86ISD::VPERMI:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERMMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
@@ -5169,17 +5499,29 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
if (Mask.empty()) return false;
break;
+ case X86ISD::MOVSLDUP:
+ DecodeMOVSLDUPMask(VT, Mask);
+ break;
+ case X86ISD::MOVSHDUP:
+ DecodeMOVSHDUPMask(VT, Mask);
+ break;
case X86ISD::MOVDDUP:
case X86ISD::MOVLHPD:
case X86ISD::MOVLPD:
case X86ISD::MOVLPS:
- case X86ISD::MOVSHDUP:
- case X86ISD::MOVSLDUP:
// Not yet implemented
return false;
default: llvm_unreachable("unknown target shuffle node");
}
+ // If we have a fake unary shuffle, the shuffle mask is spread across two
+ // inputs that are actually the same node. Re-map the mask to always point
+ // into the first input.
+ if (IsFakeUnary)
+ for (int &M : Mask)
+ if (M >= (int)Mask.size())
+ M -= Mask.size();
+
return true;
}
@@ -5470,76 +5812,112 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
}
/// LowerBuildVectorv4x32 - Custom lower build_vector of v4i32 or v4f32.
-static SDValue LowerBuildVectorv4x32(SDValue Op, unsigned NumElems,
- unsigned NonZeros, unsigned NumNonZero,
- unsigned NumZero, SelectionDAG &DAG,
+static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
const X86Subtarget *Subtarget,
const TargetLowering &TLI) {
- // We know there's at least one non-zero element
- unsigned FirstNonZeroIdx = 0;
- SDValue FirstNonZero = Op->getOperand(FirstNonZeroIdx);
- while (FirstNonZero.getOpcode() == ISD::UNDEF ||
- X86::isZeroNode(FirstNonZero)) {
- ++FirstNonZeroIdx;
- FirstNonZero = Op->getOperand(FirstNonZeroIdx);
+ // Find all zeroable elements.
+ bool Zeroable[4];
+ for (int i=0; i < 4; ++i) {
+ SDValue Elt = Op->getOperand(i);
+ Zeroable[i] = (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt));
+ }
+ assert(std::count_if(&Zeroable[0], &Zeroable[4],
+ [](bool M) { return !M; }) > 1 &&
+ "We expect at least two non-zero elements!");
+
+ // We only know how to deal with build_vector nodes where elements are either
+ // zeroable or extract_vector_elt with constant index.
+ SDValue FirstNonZero;
+ unsigned FirstNonZeroIdx;
+ for (unsigned i=0; i < 4; ++i) {
+ if (Zeroable[i])
+ continue;
+ SDValue Elt = Op->getOperand(i);
+ if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Elt.getOperand(1)))
+ return SDValue();
+ // Make sure that this node is extracting from a 128-bit vector.
+ MVT VT = Elt.getOperand(0).getSimpleValueType();
+ if (!VT.is128BitVector())
+ return SDValue();
+ if (!FirstNonZero.getNode()) {
+ FirstNonZero = Elt;
+ FirstNonZeroIdx = i;
+ }
}
- if (FirstNonZero.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- !isa<ConstantSDNode>(FirstNonZero.getOperand(1)))
- return SDValue();
+ assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!");
+ SDValue V1 = FirstNonZero.getOperand(0);
+ MVT VT = V1.getSimpleValueType();
- SDValue V = FirstNonZero.getOperand(0);
- MVT VVT = V.getSimpleValueType();
- if (!Subtarget->hasSSE41() || (VVT != MVT::v4f32 && VVT != MVT::v4i32))
- return SDValue();
+ // See if this build_vector can be lowered as a blend with zero.
+ SDValue Elt;
+ unsigned EltMaskIdx, EltIdx;
+ int Mask[4];
+ for (EltIdx = 0; EltIdx < 4; ++EltIdx) {
+ if (Zeroable[EltIdx]) {
+ // The zero vector will be on the right hand side.
+ Mask[EltIdx] = EltIdx+4;
+ continue;
+ }
- unsigned FirstNonZeroDst =
- cast<ConstantSDNode>(FirstNonZero.getOperand(1))->getZExtValue();
- unsigned CorrectIdx = FirstNonZeroDst == FirstNonZeroIdx;
- unsigned IncorrectIdx = CorrectIdx ? -1U : FirstNonZeroIdx;
- unsigned IncorrectDst = CorrectIdx ? -1U : FirstNonZeroDst;
+ Elt = Op->getOperand(EltIdx);
+ // By construction, Elt is a EXTRACT_VECTOR_ELT with constant index.
+ EltMaskIdx = cast<ConstantSDNode>(Elt.getOperand(1))->getZExtValue();
+ if (Elt.getOperand(0) != V1 || EltMaskIdx != EltIdx)
+ break;
+ Mask[EltIdx] = EltIdx;
+ }
- for (unsigned Idx = FirstNonZeroIdx + 1; Idx < NumElems; ++Idx) {
- SDValue Elem = Op.getOperand(Idx);
- if (Elem.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elem))
- continue;
+ if (EltIdx == 4) {
+ // Let the shuffle legalizer deal with blend operations.
+ SDValue VZero = getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
+ if (V1.getSimpleValueType() != VT)
+ V1 = DAG.getNode(ISD::BITCAST, SDLoc(V1), VT, V1);
+ return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZero, &Mask[0]);
+ }
- // TODO: What else can be here? Deal with it.
- if (Elem.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
- return SDValue();
+ // See if we can lower this build_vector to a INSERTPS.
+ if (!Subtarget->hasSSE41())
+ return SDValue();
- // TODO: Some optimizations are still possible here
- // ex: Getting one element from a vector, and the rest from another.
- if (Elem.getOperand(0) != V)
- return SDValue();
+ SDValue V2 = Elt.getOperand(0);
+ if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx)
+ V1 = SDValue();
- unsigned Dst = cast<ConstantSDNode>(Elem.getOperand(1))->getZExtValue();
- if (Dst == Idx)
- ++CorrectIdx;
- else if (IncorrectIdx == -1U) {
- IncorrectIdx = Idx;
- IncorrectDst = Dst;
- } else
- // There was already one element with an incorrect index.
- // We can't optimize this case to an insertps.
- return SDValue();
+ bool CanFold = true;
+ for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) {
+ if (Zeroable[i])
+ continue;
+
+ SDValue Current = Op->getOperand(i);
+ SDValue SrcVector = Current->getOperand(0);
+ if (!V1.getNode())
+ V1 = SrcVector;
+ CanFold = SrcVector == V1 &&
+ cast<ConstantSDNode>(Current.getOperand(1))->getZExtValue() == i;
}
- if (NumNonZero == CorrectIdx || NumNonZero == CorrectIdx + 1) {
- SDLoc dl(Op);
- EVT VT = Op.getSimpleValueType();
- unsigned ElementMoveMask = 0;
- if (IncorrectIdx == -1U)
- ElementMoveMask = FirstNonZeroIdx << 6 | FirstNonZeroIdx << 4;
- else
- ElementMoveMask = IncorrectDst << 6 | IncorrectIdx << 4;
+ if (!CanFold)
+ return SDValue();
- SDValue InsertpsMask =
- DAG.getIntPtrConstant(ElementMoveMask | (~NonZeros & 0xf));
- return DAG.getNode(X86ISD::INSERTPS, dl, VT, V, V, InsertpsMask);
- }
+ assert(V1.getNode() && "Expected at least two non-zero elements!");
+ if (V1.getSimpleValueType() != MVT::v4f32)
+ V1 = DAG.getNode(ISD::BITCAST, SDLoc(V1), MVT::v4f32, V1);
+ if (V2.getSimpleValueType() != MVT::v4f32)
+ V2 = DAG.getNode(ISD::BITCAST, SDLoc(V2), MVT::v4f32, V2);
- return SDValue();
+ // Ok, we can emit an INSERTPS instruction.
+ unsigned ZMask = 0;
+ for (int i = 0; i < 4; ++i)
+ if (Zeroable[i])
+ ZMask |= 1 << i;
+
+ unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask;
+ assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
+ SDValue Result = DAG.getNode(X86ISD::INSERTPS, SDLoc(Op), MVT::v4f32, V1, V2,
+ DAG.getIntPtrConstant(InsertPSMask));
+ return DAG.getNode(ISD::BITCAST, SDLoc(Op), VT, Result);
}
/// getVShift - Return a vector logical shift node.
@@ -5685,15 +6063,10 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
SDValue NewLd = SDValue();
- if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
- NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
- LDBase->getPointerInfo(),
- LDBase->isVolatile(), LDBase->isNonTemporal(),
- LDBase->isInvariant(), 0);
NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
- LDBase->getPointerInfo(),
- LDBase->isVolatile(), LDBase->isNonTemporal(),
- LDBase->isInvariant(), LDBase->getAlignment());
+ LDBase->getPointerInfo(), LDBase->isVolatile(),
+ LDBase->isNonTemporal(), LDBase->isInvariant(),
+ LDBase->getAlignment());
if (LDBase->hasAnyUseOfValue(1)) {
SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
@@ -5706,7 +6079,10 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
return NewLd;
}
- if (NumElems == 4 && LastLoadedElt == 1 &&
+
+ //TODO: The code below fires only for for loading the low v2i32 / v2f32
+ //of a v4i32 / v4f32. It's probably worth generalizing.
+ if (NumElems == 4 && LastLoadedElt == 1 && (EltVT.getSizeInBits() == 32) &&
DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
@@ -5742,7 +6118,10 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
/// or SDValue() otherwise.
static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
SelectionDAG &DAG) {
- if (!Subtarget->hasFp256())
+ // VBROADCAST requires AVX.
+ // TODO: Splats could be generated for non-AVX CPUs using SSE
+ // instructions, but there's less potential gain for only 128-bit vectors.
+ if (!Subtarget->hasAVX())
return SDValue();
MVT VT = Op.getSimpleValueType();
@@ -5819,17 +6198,34 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
}
}
+ unsigned ScalarSize = Ld.getValueType().getSizeInBits();
bool IsGE256 = (VT.getSizeInBits() >= 256);
- // Handle the broadcasting a single constant scalar from the constant pool
- // into a vector. On Sandybridge it is still better to load a constant vector
+ // When optimizing for size, generate up to 5 extra bytes for a broadcast
+ // instruction to save 8 or more bytes of constant pool data.
+ // TODO: If multiple splats are generated to load the same constant,
+ // it may be detrimental to overall size. There needs to be a way to detect
+ // that condition to know if this is truly a size win.
+ const Function *F = DAG.getMachineFunction().getFunction();
+ bool OptForSize = F->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+
+ // Handle broadcasting a single constant scalar from the constant pool
+ // into a vector.
+ // On Sandybridge (no AVX2), it is still better to load a constant vector
// from the constant pool and not to broadcast it from a scalar.
- if (ConstSplatVal && Subtarget->hasInt256()) {
+ // But override that restriction when optimizing for size.
+ // TODO: Check if splatting is recommended for other AVX-capable CPUs.
+ if (ConstSplatVal && (Subtarget->hasAVX2() || OptForSize)) {
EVT CVT = Ld.getValueType();
assert(!CVT.isVector() && "Must not broadcast a vector type");
- unsigned ScalarSize = CVT.getSizeInBits();
- if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)) {
+ // Splat f32, i32, v4f64, v4i64 in all cases with AVX2.
+ // For size optimization, also splat v2f64 and v2i64, and for size opt
+ // with AVX2, also splat i8 and i16.
+ // With pattern matching, the VBROADCAST node may become a VMOVDDUP.
+ if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
+ (OptForSize && (ScalarSize == 64 || Subtarget->hasAVX2()))) {
const Constant *C = nullptr;
if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
C = CI->getConstantIntValue();
@@ -5850,7 +6246,6 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
}
bool IsLoad = ISD::isNormalLoad(Ld.getNode());
- unsigned ScalarSize = Ld.getValueType().getSizeInBits();
// Handle AVX2 in-register broadcasts.
if (!IsLoad && Subtarget->hasInt256() &&
@@ -5861,7 +6256,8 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
if (!IsLoad)
return SDValue();
- if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64))
+ if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
+ (Subtarget->hasVLX() && ScalarSize == 64))
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
// The integer check is needed for the 64-bit into 128-bit so it doesn't match
@@ -6017,8 +6413,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
AllContants = false;
NonConstIdx = idx;
NumNonConsts++;
- }
- else {
+ } else {
NumConsts++;
if (cast<ConstantSDNode>(In)->getZExtValue())
Immediate |= (1ULL << idx);
@@ -6041,7 +6436,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
MVT::getIntegerVT(VT.getSizeInBits()));
DstVec = DAG.getNode(ISD::BITCAST, dl, VT, VecAsImm);
}
- else
+ else
DstVec = DAG.getUNDEF(VT);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
Op.getOperand(NonConstIdx),
@@ -6064,7 +6459,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
/// \brief Return true if \p N implements a horizontal binop and return the
/// operands for the horizontal binop into V0 and V1.
-///
+///
/// This is a helper function of PerformBUILD_VECTORCombine.
/// This function checks that the build_vector \p N in input implements a
/// horizontal operation. Parameter \p Opcode defines the kind of horizontal
@@ -6085,7 +6480,7 @@ static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
"Invalid Vector in input!");
-
+
bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
bool CanFold = true;
unsigned ExpectedVExtractIdx = BaseIdx;
@@ -6154,13 +6549,13 @@ static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
}
/// \brief Emit a sequence of two 128-bit horizontal add/sub followed by
-/// a concat_vector.
+/// a concat_vector.
///
/// This is a helper function of PerformBUILD_VECTORCombine.
/// This function expects two 256-bit vectors called V0 and V1.
/// At first, each vector is split into two separate 128-bit vectors.
/// Then, the resulting 128-bit vectors are used to implement two
-/// horizontal binary operations.
+/// horizontal binary operations.
///
/// The kind of horizontal binary operation is defined by \p X86Opcode.
///
@@ -6235,11 +6630,6 @@ static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
VT == MVT::v2f64) && "build_vector with an invalid type found!");
- // Don't try to emit a VSELECT that cannot be lowered into a blend.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
- return SDValue();
-
// Odd-numbered elements in the input build vector are obtained from
// adding two integer/float elements.
// Even-numbered elements in the input build vector are obtained from
@@ -6251,14 +6641,14 @@ static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
for (unsigned i = 0, e = NumElts; i != e; i++) {
SDValue Op = BV->getOperand(i);
-
+
// Skip 'undef' values.
unsigned Opcode = Op.getOpcode();
if (Opcode == ISD::UNDEF) {
std::swap(ExpectedOpcode, NextExpectedOpcode);
continue;
}
-
+
// Early exit if we found an unexpected opcode.
if (Opcode != ExpectedOpcode)
return SDValue();
@@ -6312,34 +6702,11 @@ static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
std::swap(ExpectedOpcode, NextExpectedOpcode);
}
- // Don't try to fold this build_vector into a VSELECT if it has
- // too many UNDEF operands.
+ // Don't try to fold this build_vector into an ADDSUB if the inputs are undef.
if (AddFound && SubFound && InVec0.getOpcode() != ISD::UNDEF &&
- InVec1.getOpcode() != ISD::UNDEF) {
- // Emit a sequence of vector add and sub followed by a VSELECT.
- // The new VSELECT will be lowered into a BLENDI.
- // At ISel stage, we pattern-match the sequence 'add + sub + BLENDI'
- // and emit a single ADDSUB instruction.
- SDValue Sub = DAG.getNode(ExpectedOpcode, DL, VT, InVec0, InVec1);
- SDValue Add = DAG.getNode(NextExpectedOpcode, DL, VT, InVec0, InVec1);
-
- // Construct the VSELECT mask.
- EVT MaskVT = VT.changeVectorElementTypeToInteger();
- EVT SVT = MaskVT.getVectorElementType();
- unsigned SVTBits = SVT.getSizeInBits();
- SmallVector<SDValue, 8> Ops;
+ InVec1.getOpcode() != ISD::UNDEF)
+ return DAG.getNode(X86ISD::ADDSUB, DL, VT, InVec0, InVec1);
- for (unsigned i = 0, e = NumElts; i != e; ++i) {
- APInt Value = i & 1 ? APInt::getNullValue(SVTBits) :
- APInt::getAllOnesValue(SVTBits);
- SDValue Constant = DAG.getConstant(Value, SVT);
- Ops.push_back(Constant);
- }
-
- SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVT, Ops);
- return DAG.getSelect(DL, VT, Mask, Sub, Add);
- }
-
return SDValue();
}
@@ -6382,18 +6749,18 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
// Try to match an SSE3 float HADD/HSUB.
if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
-
+
if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
} else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) {
// Try to match an SSSE3 integer HADD/HSUB.
if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
-
+
if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
}
-
+
if (!Subtarget->hasAVX())
return SDValue();
@@ -6444,7 +6811,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
// Do this only if the target has AVX2.
if (Subtarget->hasAVX2())
return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
-
+
// Do not try to expand this build_vector into a pair of horizontal
// add/sub if we can emit a pair of scalar add/sub.
if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
@@ -6575,6 +6942,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// convert it to a vector with movd (S2V+shuffle to zero extend).
Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
+
+ // If using the new shuffle lowering, just directly insert this.
+ if (ExperimentalVectorShuffleLowering)
+ return DAG.getNode(
+ ISD::BITCAST, dl, VT,
+ getShuffleVectorZeroOrUndef(Item, Idx * 2, true, Subtarget, DAG));
+
Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
// Now we have our 32-bit value zero extended in the low element of
@@ -6648,6 +7022,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (EVTBits == 32) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+ // If using the new shuffle lowering, just directly insert this.
+ if (ExperimentalVectorShuffleLowering)
+ return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG);
+
// Turn it into a shuffle of zero and zero-extended scalar to vector.
Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget, DAG);
SmallVector<int, 8> MaskVec;
@@ -6677,13 +7055,18 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (IsAllConstants)
return SDValue();
- // For AVX-length vectors, build the individual 128-bit pieces and use
+ // For AVX-length vectors, see if we can use a vector load to get all of the
+ // elements, otherwise build the individual 128-bit pieces and use
// shuffles to put them in place.
if (VT.is256BitVector() || VT.is512BitVector()) {
SmallVector<SDValue, 64> V;
for (unsigned i = 0; i != NumElems; ++i)
V.push_back(Op.getOperand(i));
+ // Check for a build vector of consecutive loads.
+ if (SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false))
+ return LD;
+
EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
// Build both the lower and upper subvector.
@@ -6725,8 +7108,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS
if (EVTBits == 32 && NumElems == 4) {
- SDValue V = LowerBuildVectorv4x32(Op, NumElems, NonZeros, NumNonZero,
- NumZero, DAG, Subtarget, *this);
+ SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget, *this);
if (V.getNode())
return V;
}
@@ -6917,6 +7299,89 @@ static bool isSingleInputShuffleMask(ArrayRef<int> Mask) {
return true;
}
+/// \brief Test whether there are elements crossing 128-bit lanes in this
+/// shuffle mask.
+///
+/// X86 divides up its shuffles into in-lane and cross-lane shuffle operations
+/// and we routinely test for these.
+static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) {
+ int LaneSize = 128 / VT.getScalarSizeInBits();
+ int Size = Mask.size();
+ for (int i = 0; i < Size; ++i)
+ if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize)
+ return true;
+ return false;
+}
+
+/// \brief Test whether a shuffle mask is equivalent within each 128-bit lane.
+///
+/// This checks a shuffle mask to see if it is performing the same
+/// 128-bit lane-relative shuffle in each 128-bit lane. This trivially implies
+/// that it is also not lane-crossing. It may however involve a blend from the
+/// same lane of a second vector.
+///
+/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
+/// non-trivial to compute in the face of undef lanes. The representation is
+/// *not* suitable for use with existing 128-bit shuffles as it will contain
+/// entries from both V1 and V2 inputs to the wider mask.
+static bool
+is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
+ SmallVectorImpl<int> &RepeatedMask) {
+ int LaneSize = 128 / VT.getScalarSizeInBits();
+ RepeatedMask.resize(LaneSize, -1);
+ int Size = Mask.size();
+ for (int i = 0; i < Size; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ if ((Mask[i] % Size) / LaneSize != i / LaneSize)
+ // This entry crosses lanes, so there is no way to model this shuffle.
+ return false;
+
+ // Ok, handle the in-lane shuffles by detecting if and when they repeat.
+ if (RepeatedMask[i % LaneSize] == -1)
+ // This is the first non-undef entry in this slot of a 128-bit lane.
+ RepeatedMask[i % LaneSize] =
+ Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + Size;
+ else if (RepeatedMask[i % LaneSize] + (i / LaneSize) * LaneSize != Mask[i])
+ // Found a mismatch with the repeated mask.
+ return false;
+ }
+ return true;
+}
+
+// Hide this symbol with an anonymous namespace instead of 'static' so that MSVC
+// 2013 will allow us to use it as a non-type template parameter.
+namespace {
+
+/// \brief Implementation of the \c isShuffleEquivalent variadic functor.
+///
+/// See its documentation for details.
+bool isShuffleEquivalentImpl(ArrayRef<int> Mask, ArrayRef<const int *> Args) {
+ if (Mask.size() != Args.size())
+ return false;
+ for (int i = 0, e = Mask.size(); i < e; ++i) {
+ assert(*Args[i] >= 0 && "Arguments must be positive integers!");
+ if (Mask[i] != -1 && Mask[i] != *Args[i])
+ return false;
+ }
+ return true;
+}
+
+} // namespace
+
+/// \brief Checks whether a shuffle mask is equivalent to an explicit list of
+/// arguments.
+///
+/// This is a fast way to test a shuffle mask against a fixed pattern:
+///
+/// if (isShuffleEquivalent(Mask, 3, 2, 1, 0)) { ... }
+///
+/// It returns true if the mask is exactly as wide as the argument list, and
+/// each element of the mask is either -1 (signifying undef) or the value given
+/// in the argument.
+static const VariadicFunction1<
+ bool, ArrayRef<int>, int, isShuffleEquivalentImpl> isShuffleEquivalent = {};
+
/// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
///
/// This helper function produces an 8-bit shuffle immediate corresponding to
@@ -6941,6 +7406,835 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask,
return DAG.getConstant(Imm, MVT::i8);
}
+/// \brief Try to emit a blend instruction for a shuffle.
+///
+/// This doesn't do any checks for the availability of instructions for blending
+/// these values. It relies on the availability of the X86ISD::BLENDI pattern to
+/// be matched in the backend with the type given. What it does check for is
+/// that the shuffle mask is in fact a blend.
+static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+
+ unsigned BlendMask = 0;
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ if (Mask[i] >= Size) {
+ if (Mask[i] != i + Size)
+ return SDValue(); // Shuffled V2 input!
+ BlendMask |= 1u << i;
+ continue;
+ }
+ if (Mask[i] >= 0 && Mask[i] != i)
+ return SDValue(); // Shuffled V1 input!
+ }
+ switch (VT.SimpleTy) {
+ case MVT::v2f64:
+ case MVT::v4f32:
+ case MVT::v4f64:
+ case MVT::v8f32:
+ return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
+ DAG.getConstant(BlendMask, MVT::i8));
+
+ case MVT::v4i64:
+ case MVT::v8i32:
+ assert(Subtarget->hasAVX2() && "256-bit integer blends require AVX2!");
+ // FALLTHROUGH
+ case MVT::v2i64:
+ case MVT::v4i32:
+ // If we have AVX2 it is faster to use VPBLENDD when the shuffle fits into
+ // that instruction.
+ if (Subtarget->hasAVX2()) {
+ // Scale the blend by the number of 32-bit dwords per element.
+ int Scale = VT.getScalarSizeInBits() / 32;
+ BlendMask = 0;
+ for (int i = 0, Size = Mask.size(); i < Size; ++i)
+ if (Mask[i] >= Size)
+ for (int j = 0; j < Scale; ++j)
+ BlendMask |= 1u << (i * Scale + j);
+
+ MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32;
+ V1 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V2);
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2,
+ DAG.getConstant(BlendMask, MVT::i8)));
+ }
+ // FALLTHROUGH
+ case MVT::v8i16: {
+ // For integer shuffles we need to expand the mask and cast the inputs to
+ // v8i16s prior to blending.
+ int Scale = 8 / VT.getVectorNumElements();
+ BlendMask = 0;
+ for (int i = 0, Size = Mask.size(); i < Size; ++i)
+ if (Mask[i] >= Size)
+ for (int j = 0; j < Scale; ++j)
+ BlendMask |= 1u << (i * Scale + j);
+
+ V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1);
+ V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V2);
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2,
+ DAG.getConstant(BlendMask, MVT::i8)));
+ }
+
+ case MVT::v16i16: {
+ assert(Subtarget->hasAVX2() && "256-bit integer blends require AVX2!");
+ SmallVector<int, 8> RepeatedMask;
+ if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
+ // We can lower these with PBLENDW which is mirrored across 128-bit lanes.
+ assert(RepeatedMask.size() == 8 && "Repeated mask size doesn't match!");
+ BlendMask = 0;
+ for (int i = 0; i < 8; ++i)
+ if (RepeatedMask[i] >= 16)
+ BlendMask |= 1u << i;
+ return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
+ DAG.getConstant(BlendMask, MVT::i8));
+ }
+ }
+ // FALLTHROUGH
+ case MVT::v32i8: {
+ assert(Subtarget->hasAVX2() && "256-bit integer blends require AVX2!");
+ // Scale the blend by the number of bytes per element.
+ int Scale = VT.getScalarSizeInBits() / 8;
+ assert(Mask.size() * Scale == 32 && "Not a 256-bit vector!");
+
+ // Compute the VSELECT mask. Note that VSELECT is really confusing in the
+ // mix of LLVM's code generator and the x86 backend. We tell the code
+ // generator that boolean values in the elements of an x86 vector register
+ // are -1 for true and 0 for false. We then use the LLVM semantics of 'true'
+ // mapping a select to operand #1, and 'false' mapping to operand #2. The
+ // reality in x86 is that vector masks (pre-AVX-512) use only the high bit
+ // of the element (the remaining are ignored) and 0 in that high bit would
+ // mean operand #1 while 1 in the high bit would mean operand #2. So while
+ // the LLVM model for boolean values in vector elements gets the relevant
+ // bit set, it is set backwards and over constrained relative to x86's
+ // actual model.
+ SDValue VSELECTMask[32];
+ for (int i = 0, Size = Mask.size(); i < Size; ++i)
+ for (int j = 0; j < Scale; ++j)
+ VSELECTMask[Scale * i + j] =
+ Mask[i] < 0 ? DAG.getUNDEF(MVT::i8)
+ : DAG.getConstant(Mask[i] < Size ? -1 : 0, MVT::i8);
+
+ V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, V1);
+ V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, V2);
+ return DAG.getNode(
+ ISD::BITCAST, DL, VT,
+ DAG.getNode(ISD::VSELECT, DL, MVT::v32i8,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, VSELECTMask),
+ V1, V2));
+ }
+
+ default:
+ llvm_unreachable("Not a supported integer vector type!");
+ }
+}
+
+/// \brief Generic routine to lower a shuffle and blend as a decomposed set of
+/// unblended shuffles followed by an unshuffled blend.
+///
+/// This matches the extremely common pattern for handling combined
+/// shuffle+blend operations on newer X86 ISAs where we have very fast blend
+/// operations.
+static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(SDLoc DL, MVT VT,
+ SDValue V1,
+ SDValue V2,
+ ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ // Shuffle the input elements into the desired positions in V1 and V2 and
+ // blend them together.
+ SmallVector<int, 32> V1Mask(Mask.size(), -1);
+ SmallVector<int, 32> V2Mask(Mask.size(), -1);
+ SmallVector<int, 32> BlendMask(Mask.size(), -1);
+ for (int i = 0, Size = Mask.size(); i < Size; ++i)
+ if (Mask[i] >= 0 && Mask[i] < Size) {
+ V1Mask[i] = Mask[i];
+ BlendMask[i] = i;
+ } else if (Mask[i] >= Size) {
+ V2Mask[i] = Mask[i] - Size;
+ BlendMask[i] = i + Size;
+ }
+
+ V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);
+ V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
+ return DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
+}
+
+/// \brief Try to lower a vector shuffle as a byte rotation.
+///
+/// SSSE3 has a generic PALIGNR instruction in x86 that will do an arbitrary
+/// byte-rotation of the concatenation of two vectors; pre-SSSE3 can use
+/// a PSRLDQ/PSLLDQ/POR pattern to get a similar effect. This routine will
+/// try to generically lower a vector shuffle through such an pattern. It
+/// does not check for the profitability of lowering either as PALIGNR or
+/// PSRLDQ/PSLLDQ/POR, only whether the mask is valid to lower in that form.
+/// This matches shuffle vectors that look like:
+///
+/// v8i16 [11, 12, 13, 14, 15, 0, 1, 2]
+///
+/// Essentially it concatenates V1 and V2, shifts right by some number of
+/// elements, and takes the low elements as the result. Note that while this is
+/// specified as a *right shift* because x86 is little-endian, it is a *left
+/// rotate* of the vector lanes.
+///
+/// Note that this only handles 128-bit vector widths currently.
+static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2,
+ ArrayRef<int> Mask,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
+
+ // We need to detect various ways of spelling a rotation:
+ // [11, 12, 13, 14, 15, 0, 1, 2]
+ // [-1, 12, 13, 14, -1, -1, 1, -1]
+ // [-1, -1, -1, -1, -1, -1, 1, 2]
+ // [ 3, 4, 5, 6, 7, 8, 9, 10]
+ // [-1, 4, 5, 6, -1, -1, 9, -1]
+ // [-1, 4, 5, 6, -1, -1, -1, -1]
+ int Rotation = 0;
+ SDValue Lo, Hi;
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ if (Mask[i] == -1)
+ continue;
+ assert(Mask[i] >= 0 && "Only -1 is a valid negative mask element!");
+
+ // Based on the mod-Size value of this mask element determine where
+ // a rotated vector would have started.
+ int StartIdx = i - (Mask[i] % Size);
+ if (StartIdx == 0)
+ // The identity rotation isn't interesting, stop.
+ return SDValue();
+
+ // If we found the tail of a vector the rotation must be the missing
+ // front. If we found the head of a vector, it must be how much of the head.
+ int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
+
+ if (Rotation == 0)
+ Rotation = CandidateRotation;
+ else if (Rotation != CandidateRotation)
+ // The rotations don't match, so we can't match this mask.
+ return SDValue();
+
+ // Compute which value this mask is pointing at.
+ SDValue MaskV = Mask[i] < Size ? V1 : V2;
+
+ // Compute which of the two target values this index should be assigned to.
+ // This reflects whether the high elements are remaining or the low elements
+ // are remaining.
+ SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
+
+ // Either set up this value if we've not encountered it before, or check
+ // that it remains consistent.
+ if (!TargetV)
+ TargetV = MaskV;
+ else if (TargetV != MaskV)
+ // This may be a rotation, but it pulls from the inputs in some
+ // unsupported interleaving.
+ return SDValue();
+ }
+
+ // Check that we successfully analyzed the mask, and normalize the results.
+ assert(Rotation != 0 && "Failed to locate a viable rotation!");
+ assert((Lo || Hi) && "Failed to find a rotated input vector!");
+ if (!Lo)
+ Lo = Hi;
+ else if (!Hi)
+ Hi = Lo;
+
+ assert(VT.getSizeInBits() == 128 &&
+ "Rotate-based lowering only supports 128-bit lowering!");
+ assert(Mask.size() <= 16 &&
+ "Can shuffle at most 16 bytes in a 128-bit vector!");
+
+ // The actual rotate instruction rotates bytes, so we need to scale the
+ // rotation based on how many bytes are in the vector.
+ int Scale = 16 / Mask.size();
+
+ // SSSE3 targets can use the palignr instruction
+ if (Subtarget->hasSSSE3()) {
+ // Cast the inputs to v16i8 to match PALIGNR.
+ Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Hi);
+
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::PALIGNR, DL, MVT::v16i8, Hi, Lo,
+ DAG.getConstant(Rotation * Scale, MVT::i8)));
+ }
+
+ // Default SSE2 implementation
+ int LoByteShift = 16 - Rotation * Scale;
+ int HiByteShift = Rotation * Scale;
+
+ // Cast the inputs to v2i64 to match PSLLDQ/PSRLDQ.
+ Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Hi);
+
+ SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v2i64, Lo,
+ DAG.getConstant(8 * LoByteShift, MVT::i8));
+ SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v2i64, Hi,
+ DAG.getConstant(8 * HiByteShift, MVT::i8));
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift));
+}
+
+/// \brief Compute whether each element of a shuffle is zeroable.
+///
+/// A "zeroable" vector shuffle element is one which can be lowered to zero.
+/// Either it is an undef element in the shuffle mask, the element of the input
+/// referenced is undef, or the element of the input referenced is known to be
+/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
+/// as many lanes with this technique as possible to simplify the remaining
+/// shuffle.
+static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
+ SDValue V1, SDValue V2) {
+ SmallBitVector Zeroable(Mask.size(), false);
+
+ bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
+ bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
+
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ int M = Mask[i];
+ // Handle the easy cases.
+ if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
+ Zeroable[i] = true;
+ continue;
+ }
+
+ // If this is an index into a build_vector node, dig out the input value and
+ // use it.
+ SDValue V = M < Size ? V1 : V2;
+ if (V.getOpcode() != ISD::BUILD_VECTOR)
+ continue;
+
+ SDValue Input = V.getOperand(M % Size);
+ // The UNDEF opcode check really should be dead code here, but not quite
+ // worth asserting on (it isn't invalid, just unexpected).
+ if (Input.getOpcode() == ISD::UNDEF || X86::isZeroNode(Input))
+ Zeroable[i] = true;
+ }
+
+ return Zeroable;
+}
+
+/// \brief Try to lower a vector shuffle as a byte shift (shifts in zeros).
+///
+/// Attempts to match a shuffle mask against the PSRLDQ and PSLLDQ SSE2
+/// byte-shift instructions. The mask must consist of a shifted sequential
+/// shuffle from one of the input vectors and zeroable elements for the
+/// remaining 'shifted in' elements.
+///
+/// Note that this only handles 128-bit vector widths currently.
+static SDValue lowerVectorShuffleAsByteShift(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
+
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+
+ int Size = Mask.size();
+ int Scale = 16 / Size;
+
+ for (int Shift = 1; Shift < Size; Shift++) {
+ int ByteShift = Shift * Scale;
+
+ // PSRLDQ : (little-endian) right byte shift
+ // [ 5, 6, 7, zz, zz, zz, zz, zz]
+ // [ -1, 5, 6, 7, zz, zz, zz, zz]
+ // [ 1, 2, -1, -1, -1, -1, zz, zz]
+ bool ZeroableRight = true;
+ for (int i = Size - Shift; i < Size; i++) {
+ ZeroableRight &= Zeroable[i];
+ }
+
+ if (ZeroableRight) {
+ bool ValidShiftRight1 =
+ isSequentialOrUndefInRange(Mask, 0, Size - Shift, Shift);
+ bool ValidShiftRight2 =
+ isSequentialOrUndefInRange(Mask, 0, Size - Shift, Size + Shift);
+
+ if (ValidShiftRight1 || ValidShiftRight2) {
+ // Cast the inputs to v2i64 to match PSRLDQ.
+ SDValue &TargetV = ValidShiftRight1 ? V1 : V2;
+ SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, TargetV);
+ SDValue Shifted = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v2i64, V,
+ DAG.getConstant(ByteShift * 8, MVT::i8));
+ return DAG.getNode(ISD::BITCAST, DL, VT, Shifted);
+ }
+ }
+
+ // PSLLDQ : (little-endian) left byte shift
+ // [ zz, 0, 1, 2, 3, 4, 5, 6]
+ // [ zz, zz, -1, -1, 2, 3, 4, -1]
+ // [ zz, zz, zz, zz, zz, zz, -1, 1]
+ bool ZeroableLeft = true;
+ for (int i = 0; i < Shift; i++) {
+ ZeroableLeft &= Zeroable[i];
+ }
+
+ if (ZeroableLeft) {
+ bool ValidShiftLeft1 =
+ isSequentialOrUndefInRange(Mask, Shift, Size - Shift, 0);
+ bool ValidShiftLeft2 =
+ isSequentialOrUndefInRange(Mask, Shift, Size - Shift, Size);
+
+ if (ValidShiftLeft1 || ValidShiftLeft2) {
+ // Cast the inputs to v2i64 to match PSLLDQ.
+ SDValue &TargetV = ValidShiftLeft1 ? V1 : V2;
+ SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, TargetV);
+ SDValue Shifted = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v2i64, V,
+ DAG.getConstant(ByteShift * 8, MVT::i8));
+ return DAG.getNode(ISD::BITCAST, DL, VT, Shifted);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// \brief Lower a vector shuffle as a zero or any extension.
+///
+/// Given a specific number of elements, element bit width, and extension
+/// stride, produce either a zero or any extension based on the available
+/// features of the subtarget.
+static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
+ SDLoc DL, MVT VT, int NumElements, int Scale, bool AnyExt, SDValue InputV,
+ const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ assert(Scale > 1 && "Need a scale to extend.");
+ int EltBits = VT.getSizeInBits() / NumElements;
+ assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
+ "Only 8, 16, and 32 bit elements can be extended.");
+ assert(Scale * EltBits <= 64 && "Cannot zero extend past 64 bits.");
+
+ // Found a valid zext mask! Try various lowering strategies based on the
+ // input type and available ISA extensions.
+ if (Subtarget->hasSSE41()) {
+ MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
+ MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
+ NumElements / Scale);
+ InputV = DAG.getNode(ISD::BITCAST, DL, InputVT, InputV);
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV));
+ }
+
+ // For any extends we can cheat for larger element sizes and use shuffle
+ // instructions that can fold with a load and/or copy.
+ if (AnyExt && EltBits == 32) {
+ int PSHUFDMask[4] = {0, -1, 1, -1};
+ return DAG.getNode(
+ ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, InputV),
+ getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
+ }
+ if (AnyExt && EltBits == 16 && Scale > 2) {
+ int PSHUFDMask[4] = {0, -1, 0, -1};
+ InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, InputV),
+ getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG));
+ int PSHUFHWMask[4] = {1, -1, -1, -1};
+ return DAG.getNode(
+ ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, InputV),
+ getV4X86ShuffleImm8ForMask(PSHUFHWMask, DAG)));
+ }
+
+ // If this would require more than 2 unpack instructions to expand, use
+ // pshufb when available. We can only use more than 2 unpack instructions
+ // when zero extending i8 elements which also makes it easier to use pshufb.
+ if (Scale > 4 && EltBits == 8 && Subtarget->hasSSSE3()) {
+ assert(NumElements == 16 && "Unexpected byte vector width!");
+ SDValue PSHUFBMask[16];
+ for (int i = 0; i < 16; ++i)
+ PSHUFBMask[i] =
+ DAG.getConstant((i % Scale == 0) ? i / Scale : 0x80, MVT::i8);
+ InputV = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, InputV);
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV,
+ DAG.getNode(ISD::BUILD_VECTOR, DL,
+ MVT::v16i8, PSHUFBMask)));
+ }
+
+ // Otherwise emit a sequence of unpacks.
+ do {
+ MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
+ SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT)
+ : getZeroVector(InputVT, Subtarget, DAG, DL);
+ InputV = DAG.getNode(ISD::BITCAST, DL, InputVT, InputV);
+ InputV = DAG.getNode(X86ISD::UNPCKL, DL, InputVT, InputV, Ext);
+ Scale /= 2;
+ EltBits *= 2;
+ NumElements /= 2;
+ } while (Scale > 1);
+ return DAG.getNode(ISD::BITCAST, DL, VT, InputV);
+}
+
+/// \brief Try to lower a vector shuffle as a zero extension on any micrarch.
+///
+/// This routine will try to do everything in its power to cleverly lower
+/// a shuffle which happens to match the pattern of a zero extend. It doesn't
+/// check for the profitability of this lowering, it tries to aggressively
+/// match this pattern. It will use all of the micro-architectural details it
+/// can to emit an efficient lowering. It handles both blends with all-zero
+/// inputs to explicitly zero-extend and undef-lanes (sometimes undef due to
+/// masking out later).
+///
+/// The reason we have dedicated lowering for zext-style shuffles is that they
+/// are both incredibly common and often quite performance sensitive.
+static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
+ SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+
+ int Bits = VT.getSizeInBits();
+ int NumElements = Mask.size();
+
+ // Define a helper function to check a particular ext-scale and lower to it if
+ // valid.
+ auto Lower = [&](int Scale) -> SDValue {
+ SDValue InputV;
+ bool AnyExt = true;
+ for (int i = 0; i < NumElements; ++i) {
+ if (Mask[i] == -1)
+ continue; // Valid anywhere but doesn't tell us anything.
+ if (i % Scale != 0) {
+ // Each of the extend elements needs to be zeroable.
+ if (!Zeroable[i])
+ return SDValue();
+
+ // We no lorger are in the anyext case.
+ AnyExt = false;
+ continue;
+ }
+
+ // Each of the base elements needs to be consecutive indices into the
+ // same input vector.
+ SDValue V = Mask[i] < NumElements ? V1 : V2;
+ if (!InputV)
+ InputV = V;
+ else if (InputV != V)
+ return SDValue(); // Flip-flopping inputs.
+
+ if (Mask[i] % NumElements != i / Scale)
+ return SDValue(); // Non-consecutive strided elemenst.
+ }
+
+ // If we fail to find an input, we have a zero-shuffle which should always
+ // have already been handled.
+ // FIXME: Maybe handle this here in case during blending we end up with one?
+ if (!InputV)
+ return SDValue();
+
+ return lowerVectorShuffleAsSpecificZeroOrAnyExtend(
+ DL, VT, NumElements, Scale, AnyExt, InputV, Subtarget, DAG);
+ };
+
+ // The widest scale possible for extending is to a 64-bit integer.
+ assert(Bits % 64 == 0 &&
+ "The number of bits in a vector must be divisible by 64 on x86!");
+ int NumExtElements = Bits / 64;
+
+ // Each iteration, try extending the elements half as much, but into twice as
+ // many elements.
+ for (; NumExtElements < NumElements; NumExtElements *= 2) {
+ assert(NumElements % NumExtElements == 0 &&
+ "The input vector size must be divisble by the extended size.");
+ if (SDValue V = Lower(NumElements / NumExtElements))
+ return V;
+ }
+
+ // No viable ext lowering found.
+ return SDValue();
+}
+
+/// \brief Try to get a scalar value for a specific element of a vector.
+///
+/// Looks through BUILD_VECTOR and SCALAR_TO_VECTOR nodes to find a scalar.
+static SDValue getScalarValueForVectorElement(SDValue V, int Idx,
+ SelectionDAG &DAG) {
+ MVT VT = V.getSimpleValueType();
+ MVT EltVT = VT.getVectorElementType();
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+ // If the bitcasts shift the element size, we can't extract an equivalent
+ // element from it.
+ MVT NewVT = V.getSimpleValueType();
+ if (!NewVT.isVector() || NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
+ return SDValue();
+
+ if (V.getOpcode() == ISD::BUILD_VECTOR ||
+ (Idx == 0 && V.getOpcode() == ISD::SCALAR_TO_VECTOR))
+ return DAG.getNode(ISD::BITCAST, SDLoc(V), EltVT, V.getOperand(Idx));
+
+ return SDValue();
+}
+
+/// \brief Helper to test for a load that can be folded with x86 shuffles.
+///
+/// This is particularly important because the set of instructions varies
+/// significantly based on whether the operand is a load or not.
+static bool isShuffleFoldableLoad(SDValue V) {
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+
+ return ISD::isNON_EXTLoad(V.getNode());
+}
+
+/// \brief Try to lower insertion of a single element into a zero vector.
+///
+/// This is a common pattern that we have especially efficient patterns to lower
+/// across all subtarget feature sets.
+static SDValue lowerVectorShuffleAsElementInsertion(
+ MVT VT, SDLoc DL, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+ MVT ExtVT = VT;
+ MVT EltVT = VT.getVectorElementType();
+
+ int V2Index = std::find_if(Mask.begin(), Mask.end(),
+ [&Mask](int M) { return M >= (int)Mask.size(); }) -
+ Mask.begin();
+ bool IsV1Zeroable = true;
+ for (int i = 0, Size = Mask.size(); i < Size; ++i)
+ if (i != V2Index && !Zeroable[i]) {
+ IsV1Zeroable = false;
+ break;
+ }
+
+ // Check for a single input from a SCALAR_TO_VECTOR node.
+ // FIXME: All of this should be canonicalized into INSERT_VECTOR_ELT and
+ // all the smarts here sunk into that routine. However, the current
+ // lowering of BUILD_VECTOR makes that nearly impossible until the old
+ // vector shuffle lowering is dead.
+ if (SDValue V2S = getScalarValueForVectorElement(
+ V2, Mask[V2Index] - Mask.size(), DAG)) {
+ // We need to zext the scalar if it is smaller than an i32.
+ V2S = DAG.getNode(ISD::BITCAST, DL, EltVT, V2S);
+ if (EltVT == MVT::i8 || EltVT == MVT::i16) {
+ // Using zext to expand a narrow element won't work for non-zero
+ // insertions.
+ if (!IsV1Zeroable)
+ return SDValue();
+
+ // Zero-extend directly to i32.
+ ExtVT = MVT::v4i32;
+ V2S = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V2S);
+ }
+ V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S);
+ } else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 ||
+ EltVT == MVT::i16) {
+ // Either not inserting from the low element of the input or the input
+ // element size is too small to use VZEXT_MOVL to clear the high bits.
+ return SDValue();
+ }
+
+ if (!IsV1Zeroable) {
+ // If V1 can't be treated as a zero vector we have fewer options to lower
+ // this. We can't support integer vectors or non-zero targets cheaply, and
+ // the V1 elements can't be permuted in any way.
+ assert(VT == ExtVT && "Cannot change extended type when non-zeroable!");
+ if (!VT.isFloatingPoint() || V2Index != 0)
+ return SDValue();
+ SmallVector<int, 8> V1Mask(Mask.begin(), Mask.end());
+ V1Mask[V2Index] = -1;
+ if (!isNoopShuffleMask(V1Mask))
+ return SDValue();
+ // This is essentially a special case blend operation, but if we have
+ // general purpose blend operations, they are always faster. Bail and let
+ // the rest of the lowering handle these as blends.
+ if (Subtarget->hasSSE41())
+ return SDValue();
+
+ // Otherwise, use MOVSD or MOVSS.
+ assert((EltVT == MVT::f32 || EltVT == MVT::f64) &&
+ "Only two types of floating point element types to handle!");
+ return DAG.getNode(EltVT == MVT::f32 ? X86ISD::MOVSS : X86ISD::MOVSD, DL,
+ ExtVT, V1, V2);
+ }
+
+ V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2);
+ if (ExtVT != VT)
+ V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
+
+ if (V2Index != 0) {
+ // If we have 4 or fewer lanes we can cheaply shuffle the element into
+ // the desired position. Otherwise it is more efficient to do a vector
+ // shift left. We know that we can do a vector shift left because all
+ // the inputs are zero.
+ if (VT.isFloatingPoint() || VT.getVectorNumElements() <= 4) {
+ SmallVector<int, 4> V2Shuffle(Mask.size(), 1);
+ V2Shuffle[V2Index] = 0;
+ V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle);
+ } else {
+ V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, V2);
+ V2 = DAG.getNode(
+ X86ISD::VSHLDQ, DL, MVT::v2i64, V2,
+ DAG.getConstant(
+ V2Index * EltVT.getSizeInBits(),
+ DAG.getTargetLoweringInfo().getScalarShiftAmountTy(MVT::v2i64)));
+ V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
+ }
+ }
+ return V2;
+}
+
+/// \brief Try to lower broadcast of a single element.
+///
+/// For convenience, this code also bundles all of the subtarget feature set
+/// filtering. While a little annoying to re-dispatch on type here, there isn't
+/// a convenient way to factor it out.
+static SDValue lowerVectorShuffleAsBroadcast(MVT VT, SDLoc DL, SDValue V,
+ ArrayRef<int> Mask,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ if (!Subtarget->hasAVX())
+ return SDValue();
+ if (VT.isInteger() && !Subtarget->hasAVX2())
+ return SDValue();
+
+ // Check that the mask is a broadcast.
+ int BroadcastIdx = -1;
+ for (int M : Mask)
+ if (M >= 0 && BroadcastIdx == -1)
+ BroadcastIdx = M;
+ else if (M >= 0 && M != BroadcastIdx)
+ return SDValue();
+
+ assert(BroadcastIdx < (int)Mask.size() && "We only expect to be called with "
+ "a sorted mask where the broadcast "
+ "comes from V1.");
+
+ // Go up the chain of (vector) values to try and find a scalar load that
+ // we can combine with the broadcast.
+ for (;;) {
+ switch (V.getOpcode()) {
+ case ISD::CONCAT_VECTORS: {
+ int OperandSize = Mask.size() / V.getNumOperands();
+ V = V.getOperand(BroadcastIdx / OperandSize);
+ BroadcastIdx %= OperandSize;
+ continue;
+ }
+
+ case ISD::INSERT_SUBVECTOR: {
+ SDValue VOuter = V.getOperand(0), VInner = V.getOperand(1);
+ auto ConstantIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
+ if (!ConstantIdx)
+ break;
+
+ int BeginIdx = (int)ConstantIdx->getZExtValue();
+ int EndIdx =
+ BeginIdx + (int)VInner.getValueType().getVectorNumElements();
+ if (BroadcastIdx >= BeginIdx && BroadcastIdx < EndIdx) {
+ BroadcastIdx -= BeginIdx;
+ V = VInner;
+ } else {
+ V = VOuter;
+ }
+ continue;
+ }
+ }
+ break;
+ }
+
+ // Check if this is a broadcast of a scalar. We special case lowering
+ // for scalars so that we can more effectively fold with loads.
+ if (V.getOpcode() == ISD::BUILD_VECTOR ||
+ (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
+ V = V.getOperand(BroadcastIdx);
+
+ // If the scalar isn't a load we can't broadcast from it in AVX1, only with
+ // AVX2.
+ if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V))
+ return SDValue();
+ } else if (BroadcastIdx != 0 || !Subtarget->hasAVX2()) {
+ // We can't broadcast from a vector register w/o AVX2, and we can only
+ // broadcast from the zero-element of a vector register.
+ return SDValue();
+ }
+
+ return DAG.getNode(X86ISD::VBROADCAST, DL, VT, V);
+}
+
+// Check for whether we can use INSERTPS to perform the shuffle. We only use
+// INSERTPS when the V1 elements are already in the correct locations
+// because otherwise we can just always use two SHUFPS instructions which
+// are much smaller to encode than a SHUFPS and an INSERTPS. We can also
+// perform INSERTPS if a single V1 element is out of place and all V2
+// elements are zeroable.
+static SDValue lowerVectorShuffleAsInsertPS(SDValue Op, SDValue V1, SDValue V2,
+ ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ assert(Op.getSimpleValueType() == MVT::v4f32 && "Bad shuffle type!");
+ assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
+ assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
+
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+
+ unsigned ZMask = 0;
+ int V1DstIndex = -1;
+ int V2DstIndex = -1;
+ bool V1UsedInPlace = false;
+
+ for (int i = 0; i < 4; i++) {
+ // Synthesize a zero mask from the zeroable elements (includes undefs).
+ if (Zeroable[i]) {
+ ZMask |= 1 << i;
+ continue;
+ }
+
+ // Flag if we use any V1 inputs in place.
+ if (i == Mask[i]) {
+ V1UsedInPlace = true;
+ continue;
+ }
+
+ // We can only insert a single non-zeroable element.
+ if (V1DstIndex != -1 || V2DstIndex != -1)
+ return SDValue();
+
+ if (Mask[i] < 4) {
+ // V1 input out of place for insertion.
+ V1DstIndex = i;
+ } else {
+ // V2 input for insertion.
+ V2DstIndex = i;
+ }
+ }
+
+ // Don't bother if we have no (non-zeroable) element for insertion.
+ if (V1DstIndex == -1 && V2DstIndex == -1)
+ return SDValue();
+
+ // Determine element insertion src/dst indices. The src index is from the
+ // start of the inserted vector, not the start of the concatenated vector.
+ unsigned V2SrcIndex = 0;
+ if (V1DstIndex != -1) {
+ // If we have a V1 input out of place, we use V1 as the V2 element insertion
+ // and don't use the original V2 at all.
+ V2SrcIndex = Mask[V1DstIndex];
+ V2DstIndex = V1DstIndex;
+ V2 = V1;
+ } else {
+ V2SrcIndex = Mask[V2DstIndex] - 4;
+ }
+
+ // If no V1 inputs are used in place, then the result is created only from
+ // the zero mask and the V2 insertion - so remove V1 dependency.
+ if (!V1UsedInPlace)
+ V1 = DAG.getUNDEF(MVT::v4f32);
+
+ unsigned InsertPSMask = V2SrcIndex << 6 | V2DstIndex << 4 | ZMask;
+ assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
+
+ // Insert the V2 element into the desired position.
+ SDLoc DL(Op);
+ return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
+ DAG.getConstant(InsertPSMask, MVT::i8));
+}
+
/// \brief Handle lowering of 2-lane 64-bit floating point shuffles.
///
/// This is the basis function for the 2-lane 64-bit shuffles as we have full
@@ -6963,12 +8257,56 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// Straight shuffle of a single input vector. Simulate this by using the
// single input as both of the "inputs" to this instruction..
unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1);
+
+ if (Subtarget->hasAVX()) {
+ // If we have AVX, we can use VPERMILPS which will allow folding a load
+ // into the shuffle.
+ return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1,
+ DAG.getConstant(SHUFPDMask, MVT::i8));
+ }
+
return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V1,
DAG.getConstant(SHUFPDMask, MVT::i8));
}
assert(Mask[0] >= 0 && Mask[0] < 2 && "Non-canonicalized blend!");
assert(Mask[1] >= 2 && "Non-canonicalized blend!");
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 2))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2f64, V1, V2);
+ if (isShuffleEquivalent(Mask, 1, 3))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
+
+ // If we have a single input, insert that into V1 if we can do so cheaply.
+ if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1) {
+ if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+ MVT::v2f64, DL, V1, V2, Mask, Subtarget, DAG))
+ return Insertion;
+ // Try inverting the insertion since for v2 masks it is easy to do and we
+ // can't reliably sort the mask one way or the other.
+ int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2),
+ Mask[1] < 0 ? -1 : (Mask[1] ^ 2)};
+ if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+ MVT::v2f64, DL, V2, V1, InverseMask, Subtarget, DAG))
+ return Insertion;
+ }
+
+ // Try to use one of the special instruction patterns to handle two common
+ // blend patterns if a zero-blend above didn't work.
+ if (isShuffleEquivalent(Mask, 0, 3) || isShuffleEquivalent(Mask, 1, 3))
+ if (SDValue V1S = getScalarValueForVectorElement(V1, Mask[0], DAG))
+ // We can either use a special instruction to load over the low double or
+ // to move just the low double.
+ return DAG.getNode(
+ isShuffleFoldableLoad(V1S) ? X86ISD::MOVLPD : X86ISD::MOVSD,
+ DL, MVT::v2f64, V2,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V1S));
+
+ if (Subtarget->hasSSE41())
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V2,
DAG.getConstant(SHUFPDMask, MVT::i8));
@@ -6992,6 +8330,11 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
if (isSingleInputShuffleMask(Mask)) {
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v2i64, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
// Straight shuffle of a single input vector. For everything from SSE2
// onward this has a single fast instruction with no scary immediates.
// We have to map the mask as it is actually a v4i32 shuffle instruction.
@@ -7005,6 +8348,44 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
getV4X86ShuffleImm8ForMask(WidenedMask, DAG)));
}
+ // Try to use byte shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsByteShift(
+ DL, MVT::v2i64, V1, V2, Mask, DAG))
+ return Shift;
+
+ // If we have a single input from V2 insert that into V1 if we can do so
+ // cheaply.
+ if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1) {
+ if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+ MVT::v2i64, DL, V1, V2, Mask, Subtarget, DAG))
+ return Insertion;
+ // Try inverting the insertion since for v2 masks it is easy to do and we
+ // can't reliably sort the mask one way or the other.
+ int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2),
+ Mask[1] < 0 ? -1 : (Mask[1] ^ 2)};
+ if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+ MVT::v2i64, DL, V2, V1, InverseMask, Subtarget, DAG))
+ return Insertion;
+ }
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 2))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, V1, V2);
+ if (isShuffleEquivalent(Mask, 1, 3))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);
+
+ if (Subtarget->hasSSE41())
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Try to use byte rotation instructions.
+ // Its more profitable for pre-SSSE3 to use shuffles/unpacks.
+ if (Subtarget->hasSSSE3())
+ if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
+ DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
+ return Rotate;
+
// We implement this with SHUFPD which is pretty lame because it will likely
// incur 2 cycles of stall for integer vectors on Nehalem and older chips.
// However, all the alternatives are still more cycles and newer chips don't
@@ -7015,38 +8396,25 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask));
}
-/// \brief Lower 4-lane 32-bit floating point shuffles.
+/// \brief Lower a vector shuffle using the SHUFPS instruction.
///
-/// Uses instructions exclusively from the floating point unit to minimize
-/// domain crossing penalties, as these are sufficient to implement all v4f32
-/// shuffles.
-static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
- SDLoc DL(Op);
- assert(Op.getSimpleValueType() == MVT::v4f32 && "Bad shuffle type!");
- assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
- assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
- ArrayRef<int> Mask = SVOp->getMask();
- assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
-
+/// This is a helper routine dedicated to lowering vector shuffles using SHUFPS.
+/// It makes no assumptions about whether this is the *best* lowering, it simply
+/// uses it.
+static SDValue lowerVectorShuffleWithSHUFPS(SDLoc DL, MVT VT,
+ ArrayRef<int> Mask, SDValue V1,
+ SDValue V2, SelectionDAG &DAG) {
SDValue LowV = V1, HighV = V2;
int NewMask[4] = {Mask[0], Mask[1], Mask[2], Mask[3]};
int NumV2Elements =
std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
- if (NumV2Elements == 0)
- // Straight shuffle of a single input vector. We pass the input vector to
- // both operands to simulate this with a SHUFPS.
- return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1,
- getV4X86ShuffleImm8ForMask(Mask, DAG));
-
if (NumV2Elements == 1) {
int V2Index =
std::find_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }) -
Mask.begin();
+
// Compute the index adjacent to V2Index and in the same half by toggling
// the low bit.
int V2AdjIndex = V2Index ^ 1;
@@ -7063,7 +8431,7 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// To make this work, blend them together as the first step.
int V1Index = V2AdjIndex;
int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0};
- V2 = DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V2, V1,
+ V2 = DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1,
getV4X86ShuffleImm8ForMask(BlendMask, DAG));
// Now proceed to reconstruct the final blend as we have the necessary
@@ -7080,9 +8448,17 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
} else if (NumV2Elements == 2) {
if (Mask[0] < 4 && Mask[1] < 4) {
// Handle the easy case where we have V1 in the low lanes and V2 in the
- // high lanes. We never see this reversed because we sort the shuffle.
+ // high lanes.
NewMask[2] -= 4;
NewMask[3] -= 4;
+ } else if (Mask[2] < 4 && Mask[3] < 4) {
+ // We also handle the reversed case because this utility may get called
+ // when we detect a SHUFPS pattern but can't easily commute the shuffle to
+ // arrange things in the right direction.
+ NewMask[0] -= 4;
+ NewMask[1] -= 4;
+ HighV = V1;
+ LowV = V2;
} else {
// We have a mixture of V1 and V2 in both low and high lanes. Rather than
// trying to place elements directly, just blend them and set up the final
@@ -7094,7 +8470,7 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
Mask[2] < 4 ? Mask[2] : Mask[3],
(Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4,
(Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4};
- V1 = DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V2,
+ V1 = DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
getV4X86ShuffleImm8ForMask(BlendMask, DAG));
// Now we do a normal shuffle of V1 by giving V1 as both operands to
@@ -7106,10 +8482,78 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
NewMask[3] = Mask[2] < 4 ? 3 : 1;
}
}
- return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, LowV, HighV,
+ return DAG.getNode(X86ISD::SHUFP, DL, VT, LowV, HighV,
getV4X86ShuffleImm8ForMask(NewMask, DAG));
}
+/// \brief Lower 4-lane 32-bit floating point shuffles.
+///
+/// Uses instructions exclusively from the floating point unit to minimize
+/// domain crossing penalties, as these are sufficient to implement all v4f32
+/// shuffles.
+static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(Op.getSimpleValueType() == MVT::v4f32 && "Bad shuffle type!");
+ assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
+
+ int NumV2Elements =
+ std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
+
+ if (NumV2Elements == 0) {
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v4f32, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
+ if (Subtarget->hasAVX()) {
+ // If we have AVX, we can use VPERMILPS which will allow folding a load
+ // into the shuffle.
+ return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f32, V1,
+ getV4X86ShuffleImm8ForMask(Mask, DAG));
+ }
+
+ // Otherwise, use a straight shuffle of a single input vector. We pass the
+ // input vector to both operands to simulate this with a SHUFPS.
+ return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1,
+ getV4X86ShuffleImm8ForMask(Mask, DAG));
+ }
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2);
+ if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2);
+
+ // There are special ways we can lower some single-element blends. However, we
+ // have custom ways we can lower more complex single-element blends below that
+ // we defer to if both this and BLENDPS fail to match, so restrict this to
+ // when the V2 input is targeting element 0 of the mask -- that is the fast
+ // case here.
+ if (NumV2Elements == 1 && Mask[0] >= 4)
+ if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v4f32, DL, V1, V2,
+ Mask, Subtarget, DAG))
+ return V;
+
+ if (Subtarget->hasSSE41()) {
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Use INSERTPS if we can complete the shuffle efficiently.
+ if (SDValue V = lowerVectorShuffleAsInsertPS(Op, V1, V2, Mask, DAG))
+ return V;
+ }
+
+ // Otherwise fall back to a SHUFPS lowering strategy.
+ return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG);
+}
+
/// \brief Lower 4-lane i32 vector shuffles.
///
/// We try to handle these with integer-domain shuffles where we can, but for
@@ -7125,11 +8569,66 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
- if (isSingleInputShuffleMask(Mask))
+ // Whenever we can lower this as a zext, that instruction is strictly faster
+ // than any alternative. It also allows us to fold memory operands into the
+ // shuffle in many cases.
+ if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v4i32, V1, V2,
+ Mask, Subtarget, DAG))
+ return ZExt;
+
+ int NumV2Elements =
+ std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
+
+ if (NumV2Elements == 0) {
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v4i32, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
// Straight shuffle of a single input vector. For everything from SSE2
// onward this has a single fast instruction with no scary immediates.
+ // We coerce the shuffle pattern to be compatible with UNPCK instructions
+ // but we aren't actually going to use the UNPCK instruction because doing
+ // so prevents folding a load into this instruction or making a copy.
+ const int UnpackLoMask[] = {0, 0, 1, 1};
+ const int UnpackHiMask[] = {2, 2, 3, 3};
+ if (isShuffleEquivalent(Mask, 0, 0, 1, 1))
+ Mask = UnpackLoMask;
+ else if (isShuffleEquivalent(Mask, 2, 2, 3, 3))
+ Mask = UnpackHiMask;
+
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
getV4X86ShuffleImm8ForMask(Mask, DAG));
+ }
+
+ // Try to use byte shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsByteShift(
+ DL, MVT::v4i32, V1, V2, Mask, DAG))
+ return Shift;
+
+ // There are special ways we can lower some single-element blends.
+ if (NumV2Elements == 1)
+ if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v4i32, DL, V1, V2,
+ Mask, Subtarget, DAG))
+ return V;
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
+ if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2);
+
+ if (Subtarget->hasSSE41())
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Try to use byte rotation instructions.
+ // Its more profitable for pre-SSSE3 to use shuffles/unpacks.
+ if (Subtarget->hasSSSE3())
+ if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
+ DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
+ return Rotate;
// We implement this with SHUFPS because it can blend from two vectors.
// Because we're going to eventually use SHUFPS, we use SHUFPS even to build
@@ -7182,6 +8681,27 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
MutableArrayRef<int> HToLInputs(LoInputs.data() + NumLToL, NumHToL);
MutableArrayRef<int> HToHInputs(HiInputs.data() + NumLToH, NumHToH);
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v8i16, DL, V,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
+ // Try to use byte shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsByteShift(
+ DL, MVT::v8i16, V, V, Mask, DAG))
+ return Shift;
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 0, 1, 1, 2, 2, 3, 3))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V, V);
+ if (isShuffleEquivalent(Mask, 4, 4, 5, 5, 6, 6, 7, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V, V);
+
+ // Try to use byte rotation instructions.
+ if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
+ DL, MVT::v8i16, V, V, Mask, Subtarget, DAG))
+ return Rotate;
+
// Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all
// such inputs we can swap two of the dwords across the half mark and end up
// with <=2 inputs to each half in each half. Once there, we can fall through
@@ -7190,22 +8710,126 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
// Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h]
// Mask: [0, 1, 2, 7, 4, 5, 6, 3] -----------------> [0, 1, 4, 7, 2, 3, 6, 5]
//
- // Before we had 3-1 in the low half and 3-1 in the high half. Afterward, 2-2
- // and 2-2.
- auto balanceSides = [&](ArrayRef<int> ThreeInputs, int OneInput,
- int ThreeInputHalfSum, int OneInputHalfOffset) {
+ // However in some very rare cases we have a 1-into-3 or 3-into-1 on one half
+ // and an existing 2-into-2 on the other half. In this case we may have to
+ // pre-shuffle the 2-into-2 half to avoid turning it into a 3-into-1 or
+ // 1-into-3 which could cause us to cycle endlessly fixing each side in turn.
+ // Fortunately, we don't have to handle anything but a 2-into-2 pattern
+ // because any other situation (including a 3-into-1 or 1-into-3 in the other
+ // half than the one we target for fixing) will be fixed when we re-enter this
+ // path. We will also combine away any sequence of PSHUFD instructions that
+ // result into a single instruction. Here is an example of the tricky case:
+ //
+ // Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h]
+ // Mask: [3, 7, 1, 0, 2, 7, 3, 5] -THIS-IS-BAD!!!!-> [5, 7, 1, 0, 4, 7, 5, 3]
+ //
+ // This now has a 1-into-3 in the high half! Instead, we do two shuffles:
+ //
+ // Input: [a, b, c, d, e, f, g, h] PSHUFHW[0,2,1,3]-> [a, b, c, d, e, g, f, h]
+ // Mask: [3, 7, 1, 0, 2, 7, 3, 5] -----------------> [3, 7, 1, 0, 2, 7, 3, 6]
+ //
+ // Input: [a, b, c, d, e, g, f, h] -PSHUFD[0,2,1,3]-> [a, b, e, g, c, d, f, h]
+ // Mask: [3, 7, 1, 0, 2, 7, 3, 6] -----------------> [5, 7, 1, 0, 4, 7, 5, 6]
+ //
+ // The result is fine to be handled by the generic logic.
+ auto balanceSides = [&](ArrayRef<int> AToAInputs, ArrayRef<int> BToAInputs,
+ ArrayRef<int> BToBInputs, ArrayRef<int> AToBInputs,
+ int AOffset, int BOffset) {
+ assert((AToAInputs.size() == 3 || AToAInputs.size() == 1) &&
+ "Must call this with A having 3 or 1 inputs from the A half.");
+ assert((BToAInputs.size() == 1 || BToAInputs.size() == 3) &&
+ "Must call this with B having 1 or 3 inputs from the B half.");
+ assert(AToAInputs.size() + BToAInputs.size() == 4 &&
+ "Must call this with either 3:1 or 1:3 inputs (summing to 4).");
+
// Compute the index of dword with only one word among the three inputs in
// a half by taking the sum of the half with three inputs and subtracting
// the sum of the actual three inputs. The difference is the remaining
// slot.
- int DWordA = (ThreeInputHalfSum -
- std::accumulate(ThreeInputs.begin(), ThreeInputs.end(), 0)) /
- 2;
- int DWordB = OneInputHalfOffset / 2 + (OneInput / 2 + 1) % 2;
+ int ADWord, BDWord;
+ int &TripleDWord = AToAInputs.size() == 3 ? ADWord : BDWord;
+ int &OneInputDWord = AToAInputs.size() == 3 ? BDWord : ADWord;
+ int TripleInputOffset = AToAInputs.size() == 3 ? AOffset : BOffset;
+ ArrayRef<int> TripleInputs = AToAInputs.size() == 3 ? AToAInputs : BToAInputs;
+ int OneInput = AToAInputs.size() == 3 ? BToAInputs[0] : AToAInputs[0];
+ int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset);
+ int TripleNonInputIdx =
+ TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0);
+ TripleDWord = TripleNonInputIdx / 2;
+
+ // We use xor with one to compute the adjacent DWord to whichever one the
+ // OneInput is in.
+ OneInputDWord = (OneInput / 2) ^ 1;
+
+ // Check for one tricky case: We're fixing a 3<-1 or a 1<-3 shuffle for AToA
+ // and BToA inputs. If there is also such a problem with the BToB and AToB
+ // inputs, we don't try to fix it necessarily -- we'll recurse and see it in
+ // the next pass. However, if we have a 2<-2 in the BToB and AToB inputs, it
+ // is essential that we don't *create* a 3<-1 as then we might oscillate.
+ if (BToBInputs.size() == 2 && AToBInputs.size() == 2) {
+ // Compute how many inputs will be flipped by swapping these DWords. We
+ // need
+ // to balance this to ensure we don't form a 3-1 shuffle in the other
+ // half.
+ int NumFlippedAToBInputs =
+ std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord) +
+ std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord + 1);
+ int NumFlippedBToBInputs =
+ std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord) +
+ std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord + 1);
+ if ((NumFlippedAToBInputs == 1 &&
+ (NumFlippedBToBInputs == 0 || NumFlippedBToBInputs == 2)) ||
+ (NumFlippedBToBInputs == 1 &&
+ (NumFlippedAToBInputs == 0 || NumFlippedAToBInputs == 2))) {
+ // We choose whether to fix the A half or B half based on whether that
+ // half has zero flipped inputs. At zero, we may not be able to fix it
+ // with that half. We also bias towards fixing the B half because that
+ // will more commonly be the high half, and we have to bias one way.
+ auto FixFlippedInputs = [&V, &DL, &Mask, &DAG](int PinnedIdx, int DWord,
+ ArrayRef<int> Inputs) {
+ int FixIdx = PinnedIdx ^ 1; // The adjacent slot to the pinned slot.
+ bool IsFixIdxInput = std::find(Inputs.begin(), Inputs.end(),
+ PinnedIdx ^ 1) != Inputs.end();
+ // Determine whether the free index is in the flipped dword or the
+ // unflipped dword based on where the pinned index is. We use this bit
+ // in an xor to conditionally select the adjacent dword.
+ int FixFreeIdx = 2 * (DWord ^ (PinnedIdx / 2 == DWord));
+ bool IsFixFreeIdxInput = std::find(Inputs.begin(), Inputs.end(),
+ FixFreeIdx) != Inputs.end();
+ if (IsFixIdxInput == IsFixFreeIdxInput)
+ FixFreeIdx += 1;
+ IsFixFreeIdxInput = std::find(Inputs.begin(), Inputs.end(),
+ FixFreeIdx) != Inputs.end();
+ assert(IsFixIdxInput != IsFixFreeIdxInput &&
+ "We need to be changing the number of flipped inputs!");
+ int PSHUFHalfMask[] = {0, 1, 2, 3};
+ std::swap(PSHUFHalfMask[FixFreeIdx % 4], PSHUFHalfMask[FixIdx % 4]);
+ V = DAG.getNode(FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL,
+ MVT::v8i16, V,
+ getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DAG));
+
+ for (int &M : Mask)
+ if (M != -1 && M == FixIdx)
+ M = FixFreeIdx;
+ else if (M != -1 && M == FixFreeIdx)
+ M = FixIdx;
+ };
+ if (NumFlippedBToBInputs != 0) {
+ int BPinnedIdx =
+ BToAInputs.size() == 3 ? TripleNonInputIdx : OneInput;
+ FixFlippedInputs(BPinnedIdx, BDWord, BToBInputs);
+ } else {
+ assert(NumFlippedAToBInputs != 0 && "Impossible given predicates!");
+ int APinnedIdx =
+ AToAInputs.size() == 3 ? TripleNonInputIdx : OneInput;
+ FixFlippedInputs(APinnedIdx, ADWord, AToBInputs);
+ }
+ }
+ }
int PSHUFDMask[] = {0, 1, 2, 3};
- PSHUFDMask[DWordA] = DWordB;
- PSHUFDMask[DWordB] = DWordA;
+ PSHUFDMask[ADWord] = BDWord;
+ PSHUFDMask[BDWord] = ADWord;
V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V),
@@ -7213,24 +8837,20 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
// Adjust the mask to match the new locations of A and B.
for (int &M : Mask)
- if (M != -1 && M/2 == DWordA)
- M = 2 * DWordB + M % 2;
- else if (M != -1 && M/2 == DWordB)
- M = 2 * DWordA + M % 2;
+ if (M != -1 && M/2 == ADWord)
+ M = 2 * BDWord + M % 2;
+ else if (M != -1 && M/2 == BDWord)
+ M = 2 * ADWord + M % 2;
// Recurse back into this routine to re-compute state now that this isn't
// a 3 and 1 problem.
return DAG.getVectorShuffle(MVT::v8i16, DL, V, DAG.getUNDEF(MVT::v8i16),
Mask);
};
- if (NumLToL == 3 && NumHToL == 1)
- return balanceSides(LToLInputs, HToLInputs[0], 0 + 1 + 2 + 3, 4);
- else if (NumLToL == 1 && NumHToL == 3)
- return balanceSides(HToLInputs, LToLInputs[0], 4 + 5 + 6 + 7, 0);
- else if (NumLToH == 1 && NumHToH == 3)
- return balanceSides(HToHInputs, LToHInputs[0], 4 + 5 + 6 + 7, 0);
- else if (NumLToH == 3 && NumHToH == 1)
- return balanceSides(LToHInputs, HToHInputs[0], 0 + 1 + 2 + 3, 4);
+ if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3))
+ return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4);
+ else if ((NumHToH == 3 && NumLToH == 1) || (NumHToH == 1 && NumLToH == 3))
+ return balanceSides(HToHInputs, LToHInputs, LToLInputs, HToLInputs, 4, 0);
// At this point there are at most two inputs to the low and high halves from
// each half. That means the inputs can always be grouped into dwords and
@@ -7244,9 +8864,10 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
// First fix the masks for all the inputs that are staying in their
// original halves. This will then dictate the targets of the cross-half
// shuffles.
- auto fixInPlaceInputs = [&PSHUFDMask](
- ArrayRef<int> InPlaceInputs, MutableArrayRef<int> SourceHalfMask,
- MutableArrayRef<int> HalfMask, int HalfOffset) {
+ auto fixInPlaceInputs =
+ [&PSHUFDMask](ArrayRef<int> InPlaceInputs, ArrayRef<int> IncomingInputs,
+ MutableArrayRef<int> SourceHalfMask,
+ MutableArrayRef<int> HalfMask, int HalfOffset) {
if (InPlaceInputs.empty())
return;
if (InPlaceInputs.size() == 1) {
@@ -7255,6 +8876,14 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2;
return;
}
+ if (IncomingInputs.empty()) {
+ // Just fix all of the in place inputs.
+ for (int Input : InPlaceInputs) {
+ SourceHalfMask[Input - HalfOffset] = Input - HalfOffset;
+ PSHUFDMask[Input / 2] = Input / 2;
+ }
+ return;
+ }
assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!");
SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
@@ -7266,10 +8895,8 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
std::replace(HalfMask.begin(), HalfMask.end(), InPlaceInputs[1], AdjIndex);
PSHUFDMask[AdjIndex / 2] = AdjIndex / 2;
};
- if (!HToLInputs.empty())
- fixInPlaceInputs(LToLInputs, PSHUFLMask, LoMask, 0);
- if (!LToHInputs.empty())
- fixInPlaceInputs(HToHInputs, PSHUFHMask, HiMask, 4);
+ fixInPlaceInputs(LToLInputs, HToLInputs, PSHUFLMask, LoMask, 0);
+ fixInPlaceInputs(HToHInputs, LToHInputs, PSHUFHMask, HiMask, 4);
// Now gather the cross-half inputs and place them into a free dword of
// their target half.
@@ -7278,7 +8905,8 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
auto moveInputsToRightHalf = [&PSHUFDMask](
MutableArrayRef<int> IncomingInputs, ArrayRef<int> ExistingInputs,
MutableArrayRef<int> SourceHalfMask, MutableArrayRef<int> HalfMask,
- int SourceOffset, int DestOffset) {
+ MutableArrayRef<int> FinalSourceHalfMask, int SourceOffset,
+ int DestOffset) {
auto isWordClobbered = [](ArrayRef<int> SourceHalfMask, int Word) {
return SourceHalfMask[Word] != -1 && SourceHalfMask[Word] != Word;
};
@@ -7304,7 +8932,7 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
Input - SourceOffset;
// We have to swap the uses in our half mask in one sweep.
for (int &M : HalfMask)
- if (M == SourceHalfMask[Input - SourceOffset])
+ if (M == SourceHalfMask[Input - SourceOffset] + SourceOffset)
M = Input;
else if (M == Input)
M = SourceHalfMask[Input - SourceOffset] + SourceOffset;
@@ -7356,18 +8984,68 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
} else if (IncomingInputs.size() == 2) {
if (IncomingInputs[0] / 2 != IncomingInputs[1] / 2 ||
isDWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
- int SourceDWordBase = !isDWordClobbered(SourceHalfMask, 0) ? 0 : 2;
- assert(!isDWordClobbered(SourceHalfMask, SourceDWordBase) &&
- "Not all dwords can be clobbered!");
- SourceHalfMask[SourceDWordBase] = IncomingInputs[0] - SourceOffset;
- SourceHalfMask[SourceDWordBase + 1] = IncomingInputs[1] - SourceOffset;
+ // We have two non-adjacent or clobbered inputs we need to extract from
+ // the source half. To do this, we need to map them into some adjacent
+ // dword slot in the source mask.
+ int InputsFixed[2] = {IncomingInputs[0] - SourceOffset,
+ IncomingInputs[1] - SourceOffset};
+
+ // If there is a free slot in the source half mask adjacent to one of
+ // the inputs, place the other input in it. We use (Index XOR 1) to
+ // compute an adjacent index.
+ if (!isWordClobbered(SourceHalfMask, InputsFixed[0]) &&
+ SourceHalfMask[InputsFixed[0] ^ 1] == -1) {
+ SourceHalfMask[InputsFixed[0]] = InputsFixed[0];
+ SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1];
+ InputsFixed[1] = InputsFixed[0] ^ 1;
+ } else if (!isWordClobbered(SourceHalfMask, InputsFixed[1]) &&
+ SourceHalfMask[InputsFixed[1] ^ 1] == -1) {
+ SourceHalfMask[InputsFixed[1]] = InputsFixed[1];
+ SourceHalfMask[InputsFixed[1] ^ 1] = InputsFixed[0];
+ InputsFixed[0] = InputsFixed[1] ^ 1;
+ } else if (SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] == -1 &&
+ SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] == -1) {
+ // The two inputs are in the same DWord but it is clobbered and the
+ // adjacent DWord isn't used at all. Move both inputs to the free
+ // slot.
+ SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] = InputsFixed[0];
+ SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] = InputsFixed[1];
+ InputsFixed[0] = 2 * ((InputsFixed[0] / 2) ^ 1);
+ InputsFixed[1] = 2 * ((InputsFixed[0] / 2) ^ 1) + 1;
+ } else {
+ // The only way we hit this point is if there is no clobbering
+ // (because there are no off-half inputs to this half) and there is no
+ // free slot adjacent to one of the inputs. In this case, we have to
+ // swap an input with a non-input.
+ for (int i = 0; i < 4; ++i)
+ assert((SourceHalfMask[i] == -1 || SourceHalfMask[i] == i) &&
+ "We can't handle any clobbers here!");
+ assert(InputsFixed[1] != (InputsFixed[0] ^ 1) &&
+ "Cannot have adjacent inputs here!");
+
+ SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1];
+ SourceHalfMask[InputsFixed[1]] = InputsFixed[0] ^ 1;
+
+ // We also have to update the final source mask in this case because
+ // it may need to undo the above swap.
+ for (int &M : FinalSourceHalfMask)
+ if (M == (InputsFixed[0] ^ 1) + SourceOffset)
+ M = InputsFixed[1] + SourceOffset;
+ else if (M == InputsFixed[1] + SourceOffset)
+ M = (InputsFixed[0] ^ 1) + SourceOffset;
+
+ InputsFixed[1] = InputsFixed[0] ^ 1;
+ }
+
+ // Point everything at the fixed inputs.
for (int &M : HalfMask)
if (M == IncomingInputs[0])
- M = SourceDWordBase + SourceOffset;
+ M = InputsFixed[0] + SourceOffset;
else if (M == IncomingInputs[1])
- M = SourceDWordBase + 1 + SourceOffset;
- IncomingInputs[0] = SourceDWordBase + SourceOffset;
- IncomingInputs[1] = SourceDWordBase + 1 + SourceOffset;
+ M = InputsFixed[1] + SourceOffset;
+
+ IncomingInputs[0] = InputsFixed[0] + SourceOffset;
+ IncomingInputs[1] = InputsFixed[1] + SourceOffset;
}
} else {
llvm_unreachable("Unhandled input size!");
@@ -7377,13 +9055,14 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
int FreeDWord = (PSHUFDMask[DestOffset / 2] == -1 ? 0 : 1) + DestOffset / 2;
assert(PSHUFDMask[FreeDWord] == -1 && "DWord not free");
PSHUFDMask[FreeDWord] = IncomingInputs[0] / 2;
- for (int Input : IncomingInputs)
- std::replace(HalfMask.begin(), HalfMask.end(), Input,
- FreeDWord * 2 + Input % 2);
+ for (int &M : HalfMask)
+ for (int Input : IncomingInputs)
+ if (M == Input)
+ M = FreeDWord * 2 + Input % 2;
};
- moveInputsToRightHalf(HToLInputs, LToLInputs, PSHUFHMask, LoMask,
+ moveInputsToRightHalf(HToLInputs, LToLInputs, PSHUFHMask, LoMask, HiMask,
/*SourceOffset*/ 4, /*DestOffset*/ 0);
- moveInputsToRightHalf(LToHInputs, HToHInputs, PSHUFLMask, HiMask,
+ moveInputsToRightHalf(LToHInputs, HToHInputs, PSHUFLMask, HiMask, LoMask,
/*SourceOffset*/ 0, /*DestOffset*/ 4);
// Now enact all the shuffles we've computed to move the inputs into their
@@ -7520,34 +9199,37 @@ static SDValue lowerV8I16BasicBlendVectorShuffle(SDLoc DL, SDValue V1,
if (GoodInputs.size() == 2) {
// If the low inputs are spread across two dwords, pack them into
// a single dword.
- MoveMask[Mask[GoodInputs[0]] % 2 + MoveOffset] =
- Mask[GoodInputs[0]] - MaskOffset;
- MoveMask[Mask[GoodInputs[1]] % 2 + MoveOffset] =
- Mask[GoodInputs[1]] - MaskOffset;
- Mask[GoodInputs[0]] = Mask[GoodInputs[0]] % 2 + MoveOffset + MaskOffset;
- Mask[GoodInputs[1]] = Mask[GoodInputs[0]] % 2 + MoveOffset + MaskOffset;
+ MoveMask[MoveOffset] = Mask[GoodInputs[0]] - MaskOffset;
+ MoveMask[MoveOffset + 1] = Mask[GoodInputs[1]] - MaskOffset;
+ Mask[GoodInputs[0]] = MoveOffset + MaskOffset;
+ Mask[GoodInputs[1]] = MoveOffset + 1 + MaskOffset;
} else {
- // Otherwise pin the low inputs.
+ // Otherwise pin the good inputs.
for (int GoodInput : GoodInputs)
MoveMask[Mask[GoodInput] - MaskOffset] = Mask[GoodInput] - MaskOffset;
}
- int MoveMaskIdx =
- std::find(std::begin(MoveMask) + MoveOffset, std::end(MoveMask), -1) -
- std::begin(MoveMask);
- assert(MoveMaskIdx >= MoveOffset && "Established above");
-
if (BadInputs.size() == 2) {
+ // If we have two bad inputs then there may be either one or two good
+ // inputs fixed in place. Find a fixed input, and then find the *other*
+ // two adjacent indices by using modular arithmetic.
+ int GoodMaskIdx =
+ std::find_if(std::begin(MoveMask) + MoveOffset, std::end(MoveMask),
+ [](int M) { return M >= 0; }) -
+ std::begin(MoveMask);
+ int MoveMaskIdx =
+ ((((GoodMaskIdx - MoveOffset) & ~1) + 2) % 4) + MoveOffset;
assert(MoveMask[MoveMaskIdx] == -1 && "Expected empty slot");
assert(MoveMask[MoveMaskIdx + 1] == -1 && "Expected empty slot");
- MoveMask[MoveMaskIdx + Mask[BadInputs[0]] % 2] =
- Mask[BadInputs[0]] - MaskOffset;
- MoveMask[MoveMaskIdx + Mask[BadInputs[1]] % 2] =
- Mask[BadInputs[1]] - MaskOffset;
- Mask[BadInputs[0]] = MoveMaskIdx + Mask[BadInputs[0]] % 2 + MaskOffset;
- Mask[BadInputs[1]] = MoveMaskIdx + Mask[BadInputs[1]] % 2 + MaskOffset;
+ MoveMask[MoveMaskIdx] = Mask[BadInputs[0]] - MaskOffset;
+ MoveMask[MoveMaskIdx + 1] = Mask[BadInputs[1]] - MaskOffset;
+ Mask[BadInputs[0]] = MoveMaskIdx + MaskOffset;
+ Mask[BadInputs[1]] = MoveMaskIdx + 1 + MaskOffset;
} else {
assert(BadInputs.size() == 1 && "All sizes handled");
+ int MoveMaskIdx = std::find(std::begin(MoveMask) + MoveOffset,
+ std::end(MoveMask), -1) -
+ std::begin(MoveMask);
MoveMask[MoveMaskIdx] = Mask[BadInputs[0]] - MaskOffset;
Mask[BadInputs[0]] = MoveMaskIdx + MaskOffset;
}
@@ -7603,6 +9285,12 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
+ // Whenever we can lower this as a zext, that instruction is strictly faster
+ // than any alternative.
+ if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
+ DL, MVT::v8i16, V1, V2, OrigMask, Subtarget, DAG))
+ return ZExt;
+
auto isV1 = [](int M) { return M >= 0 && M < 8; };
auto isV2 = [](int M) { return M >= 8; };
@@ -7615,6 +9303,33 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
assert(NumV1Inputs > 0 && "All single-input shuffles should be canonicalized "
"to be V1-input shuffles.");
+ // Try to use byte shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsByteShift(
+ DL, MVT::v8i16, V1, V2, Mask, DAG))
+ return Shift;
+
+ // There are special ways we can lower some single-element blends.
+ if (NumV2Inputs == 1)
+ if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v8i16, DL, V1, V2,
+ Mask, Subtarget, DAG))
+ return V;
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 2, 10, 3, 11))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V1, V2);
+ if (isShuffleEquivalent(Mask, 4, 12, 5, 13, 6, 14, 7, 15))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V2);
+
+ if (Subtarget->hasSSE41())
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Try to use byte rotation instructions.
+ if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
+ DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
+ return Rotate;
+
if (NumV1Inputs + NumV2Inputs <= 4)
return lowerV8I16BasicBlendVectorShuffle(DL, V1, V2, Mask, Subtarget, DAG);
@@ -7658,6 +9373,74 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, LoV, HiV));
}
+/// \brief Check whether a compaction lowering can be done by dropping even
+/// elements and compute how many times even elements must be dropped.
+///
+/// This handles shuffles which take every Nth element where N is a power of
+/// two. Example shuffle masks:
+///
+/// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 0, 2, 4, 6, 8, 10, 12, 14
+/// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+/// N = 2: 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12
+/// N = 2: 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28
+/// N = 3: 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8
+/// N = 3: 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24
+///
+/// Any of these lanes can of course be undef.
+///
+/// This routine only supports N <= 3.
+/// FIXME: Evaluate whether either AVX or AVX-512 have any opportunities here
+/// for larger N.
+///
+/// \returns N above, or the number of times even elements must be dropped if
+/// there is such a number. Otherwise returns zero.
+static int canLowerByDroppingEvenElements(ArrayRef<int> Mask) {
+ // Figure out whether we're looping over two inputs or just one.
+ bool IsSingleInput = isSingleInputShuffleMask(Mask);
+
+ // The modulus for the shuffle vector entries is based on whether this is
+ // a single input or not.
+ int ShuffleModulus = Mask.size() * (IsSingleInput ? 1 : 2);
+ assert(isPowerOf2_32((uint32_t)ShuffleModulus) &&
+ "We should only be called with masks with a power-of-2 size!");
+
+ uint64_t ModMask = (uint64_t)ShuffleModulus - 1;
+
+ // We track whether the input is viable for all power-of-2 strides 2^1, 2^2,
+ // and 2^3 simultaneously. This is because we may have ambiguity with
+ // partially undef inputs.
+ bool ViableForN[3] = {true, true, true};
+
+ for (int i = 0, e = Mask.size(); i < e; ++i) {
+ // Ignore undef lanes, we'll optimistically collapse them to the pattern we
+ // want.
+ if (Mask[i] == -1)
+ continue;
+
+ bool IsAnyViable = false;
+ for (unsigned j = 0; j != array_lengthof(ViableForN); ++j)
+ if (ViableForN[j]) {
+ uint64_t N = j + 1;
+
+ // The shuffle mask must be equal to (i * 2^N) % M.
+ if ((uint64_t)Mask[i] == (((uint64_t)i << N) & ModMask))
+ IsAnyViable = true;
+ else
+ ViableForN[j] = false;
+ }
+ // Early exit if we exhaust the possible powers of two.
+ if (!IsAnyViable)
+ break;
+ }
+
+ for (unsigned j = 0; j != array_lengthof(ViableForN); ++j)
+ if (ViableForN[j])
+ return j + 1;
+
+ // Return 0 as there is no viable power of two.
+ return 0;
+}
+
/// \brief Generic lowering of v16i8 shuffles.
///
/// This is a hybrid strategy to lower v16i8 vectors. It first attempts to
@@ -7675,6 +9458,22 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> OrigMask = SVOp->getMask();
assert(OrigMask.size() == 16 && "Unexpected mask size for v16 shuffle!");
+
+ // Try to use byte shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsByteShift(
+ DL, MVT::v16i8, V1, V2, OrigMask, DAG))
+ return Shift;
+
+ // Try to use byte rotation instructions.
+ if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
+ DL, MVT::v16i8, V1, V2, OrigMask, Subtarget, DAG))
+ return Rotate;
+
+ // Try to use a zext lowering.
+ if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
+ DL, MVT::v16i8, V1, V2, OrigMask, Subtarget, DAG))
+ return ZExt;
+
int MaskStorage[16] = {
OrigMask[0], OrigMask[1], OrigMask[2], OrigMask[3],
OrigMask[4], OrigMask[5], OrigMask[6], OrigMask[7],
@@ -7684,8 +9483,16 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
MutableArrayRef<int> LoMask = Mask.slice(0, 8);
MutableArrayRef<int> HiMask = Mask.slice(8, 8);
+ int NumV2Elements =
+ std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 16; });
+
// For single-input shuffles, there are some nicer lowering tricks we can use.
- if (isSingleInputShuffleMask(Mask)) {
+ if (NumV2Elements == 0) {
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v16i8, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
// Check whether we can widen this to an i16 shuffle by duplicating bytes.
// Notably, this handles splat and partial-splat shuffles more efficiently.
// However, it only makes sense if the pre-duplication shuffle simplifies
@@ -7695,10 +9502,10 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// FIXME: We should check for other patterns which can be widened into an
// i16 shuffle as well.
auto canWidenViaDuplication = [](ArrayRef<int> Mask) {
- for (int i = 0; i < 16; i += 2) {
- if (Mask[i] != Mask[i + 1])
+ for (int i = 0; i < 16; i += 2)
+ if (Mask[i] != -1 && Mask[i + 1] != -1 && Mask[i] != Mask[i + 1])
return false;
- }
+
return true;
};
auto tryToWidenViaDuplication = [&]() -> SDValue {
@@ -7759,11 +9566,16 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
MVT::v16i8, V1, V1);
int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
- for (int i = 0; i < 16; i += 2) {
- if (Mask[i] != -1)
- PostDupI16Shuffle[i / 2] = LaneMap[Mask[i]] - (TargetLo ? 0 : 8);
- assert(PostDupI16Shuffle[i / 2] < 8 && "Invalid v8 shuffle mask!");
- }
+ for (int i = 0; i < 16; ++i)
+ if (Mask[i] != -1) {
+ int MappedMask = LaneMap[Mask[i]] - (TargetLo ? 0 : 8);
+ assert(MappedMask < 8 && "Invalid v8 shuffle mask!");
+ if (PostDupI16Shuffle[i / 2] == -1)
+ PostDupI16Shuffle[i / 2] = MappedMask;
+ else
+ assert(PostDupI16Shuffle[i / 2] == MappedMask &&
+ "Conflicting entrties in the original shuffle!");
+ }
return DAG.getNode(
ISD::BITCAST, DL, MVT::v16i8,
DAG.getVectorShuffle(MVT::v8i16, DL,
@@ -7780,21 +9592,127 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
//
// FIXME: We need to handle other interleaving widths (i16, i32, ...).
if (shouldLowerAsInterleaving(Mask)) {
- // FIXME: Figure out whether we should pack these into the low or high
- // halves.
-
- int EMask[16], OMask[16];
+ int NumLoHalf = std::count_if(Mask.begin(), Mask.end(), [](int M) {
+ return (M >= 0 && M < 8) || (M >= 16 && M < 24);
+ });
+ int NumHiHalf = std::count_if(Mask.begin(), Mask.end(), [](int M) {
+ return (M >= 8 && M < 16) || M >= 24;
+ });
+ int EMask[16] = {-1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1};
+ int OMask[16] = {-1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1};
+ bool UnpackLo = NumLoHalf >= NumHiHalf;
+ MutableArrayRef<int> TargetEMask(UnpackLo ? EMask : EMask + 8, 8);
+ MutableArrayRef<int> TargetOMask(UnpackLo ? OMask : OMask + 8, 8);
for (int i = 0; i < 8; ++i) {
- EMask[i] = Mask[2*i];
- OMask[i] = Mask[2*i + 1];
- EMask[i + 8] = -1;
- OMask[i + 8] = -1;
+ TargetEMask[i] = Mask[2 * i];
+ TargetOMask[i] = Mask[2 * i + 1];
}
SDValue Evens = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, EMask);
SDValue Odds = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, OMask);
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, Evens, Odds);
+ return DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
+ MVT::v16i8, Evens, Odds);
+ }
+
+ // Check for SSSE3 which lets us lower all v16i8 shuffles much more directly
+ // with PSHUFB. It is important to do this before we attempt to generate any
+ // blends but after all of the single-input lowerings. If the single input
+ // lowerings can find an instruction sequence that is faster than a PSHUFB, we
+ // want to preserve that and we can DAG combine any longer sequences into
+ // a PSHUFB in the end. But once we start blending from multiple inputs,
+ // the complexity of DAG combining bad patterns back into PSHUFB is too high,
+ // and there are *very* few patterns that would actually be faster than the
+ // PSHUFB approach because of its ability to zero lanes.
+ //
+ // FIXME: The only exceptions to the above are blends which are exact
+ // interleavings with direct instructions supporting them. We currently don't
+ // handle those well here.
+ if (Subtarget->hasSSSE3()) {
+ SDValue V1Mask[16];
+ SDValue V2Mask[16];
+ bool V1InUse = false;
+ bool V2InUse = false;
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+
+ for (int i = 0; i < 16; ++i) {
+ if (Mask[i] == -1) {
+ V1Mask[i] = V2Mask[i] = DAG.getUNDEF(MVT::i8);
+ } else {
+ const int ZeroMask = 0x80;
+ int V1Idx = (Mask[i] < 16 ? Mask[i] : ZeroMask);
+ int V2Idx = (Mask[i] < 16 ? ZeroMask : Mask[i] - 16);
+ if (Zeroable[i])
+ V1Idx = V2Idx = ZeroMask;
+ V1Mask[i] = DAG.getConstant(V1Idx, MVT::i8);
+ V2Mask[i] = DAG.getConstant(V2Idx, MVT::i8);
+ V1InUse |= (ZeroMask != V1Idx);
+ V2InUse |= (ZeroMask != V2Idx);
+ }
+ }
+
+ if (V1InUse)
+ V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V1Mask));
+ if (V2InUse)
+ V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V2Mask));
+
+ // If we need shuffled inputs from both, blend the two.
+ if (V1InUse && V2InUse)
+ return DAG.getNode(ISD::OR, DL, MVT::v16i8, V1, V2);
+ if (V1InUse)
+ return V1; // Single inputs are easy.
+ if (V2InUse)
+ return V2; // Single inputs are easy.
+ // Shuffling to a zeroable vector.
+ return getZeroVector(MVT::v16i8, Subtarget, DAG, DL);
+ }
+
+ // There are special ways we can lower some single-element blends.
+ if (NumV2Elements == 1)
+ if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v16i8, DL, V1, V2,
+ Mask, Subtarget, DAG))
+ return V;
+
+ // Check whether a compaction lowering can be done. This handles shuffles
+ // which take every Nth element for some even N. See the helper function for
+ // details.
+ //
+ // We special case these as they can be particularly efficiently handled with
+ // the PACKUSB instruction on x86 and they show up in common patterns of
+ // rearranging bytes to truncate wide elements.
+ if (int NumEvenDrops = canLowerByDroppingEvenElements(Mask)) {
+ // NumEvenDrops is the power of two stride of the elements. Another way of
+ // thinking about it is that we need to drop the even elements this many
+ // times to get the original input.
+ bool IsSingleInput = isSingleInputShuffleMask(Mask);
+
+ // First we need to zero all the dropped bytes.
+ assert(NumEvenDrops <= 3 &&
+ "No support for dropping even elements more than 3 times.");
+ // We use the mask type to pick which bytes are preserved based on how many
+ // elements are dropped.
+ MVT MaskVTs[] = { MVT::v8i16, MVT::v4i32, MVT::v2i64 };
+ SDValue ByteClearMask =
+ DAG.getNode(ISD::BITCAST, DL, MVT::v16i8,
+ DAG.getConstant(0xFF, MaskVTs[NumEvenDrops - 1]));
+ V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask);
+ if (!IsSingleInput)
+ V2 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V2, ByteClearMask);
+
+ // Now pack things back together.
+ V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1);
+ V2 = IsSingleInput ? V1 : DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V2);
+ SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1, V2);
+ for (int i = 1; i < NumEvenDrops; ++i) {
+ Result = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, Result);
+ Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, Result, Result);
+ }
+
+ return Result;
}
int V1LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
@@ -7893,15 +9811,1109 @@ static SDValue lower128BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
}
}
-/// \brief Tiny helper function to test whether adjacent masks are sequential.
-static bool areAdjacentMasksSequential(ArrayRef<int> Mask) {
- for (int i = 0, Size = Mask.size(); i < Size; i += 2)
- if (Mask[i] + 1 != Mask[i+1])
+/// \brief Helper function to test whether a shuffle mask could be
+/// simplified by widening the elements being shuffled.
+///
+/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
+/// leaves it in an unspecified state.
+///
+/// NOTE: This must handle normal vector shuffle masks and *target* vector
+/// shuffle masks. The latter have the special property of a '-2' representing
+/// a zero-ed lane of a vector.
+static bool canWidenShuffleElements(ArrayRef<int> Mask,
+ SmallVectorImpl<int> &WidenedMask) {
+ for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
+ // If both elements are undef, its trivial.
+ if (Mask[i] == SM_SentinelUndef && Mask[i + 1] == SM_SentinelUndef) {
+ WidenedMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+
+ // Check for an undef mask and a mask value properly aligned to fit with
+ // a pair of values. If we find such a case, use the non-undef mask's value.
+ if (Mask[i] == SM_SentinelUndef && Mask[i + 1] >= 0 && Mask[i + 1] % 2 == 1) {
+ WidenedMask.push_back(Mask[i + 1] / 2);
+ continue;
+ }
+ if (Mask[i + 1] == SM_SentinelUndef && Mask[i] >= 0 && Mask[i] % 2 == 0) {
+ WidenedMask.push_back(Mask[i] / 2);
+ continue;
+ }
+
+ // When zeroing, we need to spread the zeroing across both lanes to widen.
+ if (Mask[i] == SM_SentinelZero || Mask[i + 1] == SM_SentinelZero) {
+ if ((Mask[i] == SM_SentinelZero || Mask[i] == SM_SentinelUndef) &&
+ (Mask[i + 1] == SM_SentinelZero || Mask[i + 1] == SM_SentinelUndef)) {
+ WidenedMask.push_back(SM_SentinelZero);
+ continue;
+ }
return false;
+ }
+
+ // Finally check if the two mask values are adjacent and aligned with
+ // a pair.
+ if (Mask[i] != SM_SentinelUndef && Mask[i] % 2 == 0 && Mask[i] + 1 == Mask[i + 1]) {
+ WidenedMask.push_back(Mask[i] / 2);
+ continue;
+ }
+
+ // Otherwise we can't safely widen the elements used in this shuffle.
+ return false;
+ }
+ assert(WidenedMask.size() == Mask.size() / 2 &&
+ "Incorrect size of mask after widening the elements!");
return true;
}
+/// \brief Generic routine to split ector shuffle into half-sized shuffles.
+///
+/// This routine just extracts two subvectors, shuffles them independently, and
+/// then concatenates them back together. This should work effectively with all
+/// AVX vector shuffle types.
+static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ assert(VT.getSizeInBits() >= 256 &&
+ "Only for 256-bit or wider vector shuffles!");
+ assert(V1.getSimpleValueType() == VT && "Bad operand type!");
+ assert(V2.getSimpleValueType() == VT && "Bad operand type!");
+
+ ArrayRef<int> LoMask = Mask.slice(0, Mask.size() / 2);
+ ArrayRef<int> HiMask = Mask.slice(Mask.size() / 2);
+
+ int NumElements = VT.getVectorNumElements();
+ int SplitNumElements = NumElements / 2;
+ MVT ScalarVT = VT.getScalarType();
+ MVT SplitVT = MVT::getVectorVT(ScalarVT, NumElements / 2);
+
+ SDValue LoV1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V1,
+ DAG.getIntPtrConstant(0));
+ SDValue HiV1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V1,
+ DAG.getIntPtrConstant(SplitNumElements));
+ SDValue LoV2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V2,
+ DAG.getIntPtrConstant(0));
+ SDValue HiV2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V2,
+ DAG.getIntPtrConstant(SplitNumElements));
+
+ // Now create two 4-way blends of these half-width vectors.
+ auto HalfBlend = [&](ArrayRef<int> HalfMask) {
+ bool UseLoV1 = false, UseHiV1 = false, UseLoV2 = false, UseHiV2 = false;
+ SmallVector<int, 32> V1BlendMask, V2BlendMask, BlendMask;
+ for (int i = 0; i < SplitNumElements; ++i) {
+ int M = HalfMask[i];
+ if (M >= NumElements) {
+ if (M >= NumElements + SplitNumElements)
+ UseHiV2 = true;
+ else
+ UseLoV2 = true;
+ V2BlendMask.push_back(M - NumElements);
+ V1BlendMask.push_back(-1);
+ BlendMask.push_back(SplitNumElements + i);
+ } else if (M >= 0) {
+ if (M >= SplitNumElements)
+ UseHiV1 = true;
+ else
+ UseLoV1 = true;
+ V2BlendMask.push_back(-1);
+ V1BlendMask.push_back(M);
+ BlendMask.push_back(i);
+ } else {
+ V2BlendMask.push_back(-1);
+ V1BlendMask.push_back(-1);
+ BlendMask.push_back(-1);
+ }
+ }
+
+ // Because the lowering happens after all combining takes place, we need to
+ // manually combine these blend masks as much as possible so that we create
+ // a minimal number of high-level vector shuffle nodes.
+
+ // First try just blending the halves of V1 or V2.
+ if (!UseLoV1 && !UseHiV1 && !UseLoV2 && !UseHiV2)
+ return DAG.getUNDEF(SplitVT);
+ if (!UseLoV2 && !UseHiV2)
+ return DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask);
+ if (!UseLoV1 && !UseHiV1)
+ return DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask);
+
+ SDValue V1Blend, V2Blend;
+ if (UseLoV1 && UseHiV1) {
+ V1Blend =
+ DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask);
+ } else {
+ // We only use half of V1 so map the usage down into the final blend mask.
+ V1Blend = UseLoV1 ? LoV1 : HiV1;
+ for (int i = 0; i < SplitNumElements; ++i)
+ if (BlendMask[i] >= 0 && BlendMask[i] < SplitNumElements)
+ BlendMask[i] = V1BlendMask[i] - (UseLoV1 ? 0 : SplitNumElements);
+ }
+ if (UseLoV2 && UseHiV2) {
+ V2Blend =
+ DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask);
+ } else {
+ // We only use half of V2 so map the usage down into the final blend mask.
+ V2Blend = UseLoV2 ? LoV2 : HiV2;
+ for (int i = 0; i < SplitNumElements; ++i)
+ if (BlendMask[i] >= SplitNumElements)
+ BlendMask[i] = V2BlendMask[i] + (UseLoV2 ? SplitNumElements : 0);
+ }
+ return DAG.getVectorShuffle(SplitVT, DL, V1Blend, V2Blend, BlendMask);
+ };
+ SDValue Lo = HalfBlend(LoMask);
+ SDValue Hi = HalfBlend(HiMask);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
+}
+
+/// \brief Either split a vector in halves or decompose the shuffles and the
+/// blend.
+///
+/// This is provided as a good fallback for many lowerings of non-single-input
+/// shuffles with more than one 128-bit lane. In those cases, we want to select
+/// between splitting the shuffle into 128-bit components and stitching those
+/// back together vs. extracting the single-input shuffles and blending those
+/// results.
+static SDValue lowerVectorShuffleAsSplitOrBlend(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ assert(!isSingleInputShuffleMask(Mask) && "This routine must not be used to "
+ "lower single-input shuffles as it "
+ "could then recurse on itself.");
+ int Size = Mask.size();
+
+ // If this can be modeled as a broadcast of two elements followed by a blend,
+ // prefer that lowering. This is especially important because broadcasts can
+ // often fold with memory operands.
+ auto DoBothBroadcast = [&] {
+ int V1BroadcastIdx = -1, V2BroadcastIdx = -1;
+ for (int M : Mask)
+ if (M >= Size) {
+ if (V2BroadcastIdx == -1)
+ V2BroadcastIdx = M - Size;
+ else if (M - Size != V2BroadcastIdx)
+ return false;
+ } else if (M >= 0) {
+ if (V1BroadcastIdx == -1)
+ V1BroadcastIdx = M;
+ else if (M != V1BroadcastIdx)
+ return false;
+ }
+ return true;
+ };
+ if (DoBothBroadcast())
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
+ DAG);
+
+ // If the inputs all stem from a single 128-bit lane of each input, then we
+ // split them rather than blending because the split will decompose to
+ // unusually few instructions.
+ int LaneCount = VT.getSizeInBits() / 128;
+ int LaneSize = Size / LaneCount;
+ SmallBitVector LaneInputs[2];
+ LaneInputs[0].resize(LaneCount, false);
+ LaneInputs[1].resize(LaneCount, false);
+ for (int i = 0; i < Size; ++i)
+ if (Mask[i] >= 0)
+ LaneInputs[Mask[i] / Size][(Mask[i] % Size) / LaneSize] = true;
+ if (LaneInputs[0].count() <= 1 && LaneInputs[1].count() <= 1)
+ return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
+
+ // Otherwise, just fall back to decomposed shuffles and a blend. This requires
+ // that the decomposed single-input shuffles don't end up here.
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, DAG);
+}
+
+/// \brief Lower a vector shuffle crossing multiple 128-bit lanes as
+/// a permutation and blend of those lanes.
+///
+/// This essentially blends the out-of-lane inputs to each lane into the lane
+/// from a permuted copy of the vector. This lowering strategy results in four
+/// instructions in the worst case for a single-input cross lane shuffle which
+/// is lower than any other fully general cross-lane shuffle strategy I'm aware
+/// of. Special cases for each particular shuffle pattern should be handled
+/// prior to trying this lowering.
+static SDValue lowerVectorShuffleAsLanePermuteAndBlend(SDLoc DL, MVT VT,
+ SDValue V1, SDValue V2,
+ ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ // FIXME: This should probably be generalized for 512-bit vectors as well.
+ assert(VT.getSizeInBits() == 256 && "Only for 256-bit vector shuffles!");
+ int LaneSize = Mask.size() / 2;
+
+ // If there are only inputs from one 128-bit lane, splitting will in fact be
+ // less expensive. The flags track wether the given lane contains an element
+ // that crosses to another lane.
+ bool LaneCrossing[2] = {false, false};
+ for (int i = 0, Size = Mask.size(); i < Size; ++i)
+ if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize)
+ LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
+ if (!LaneCrossing[0] || !LaneCrossing[1])
+ return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
+
+ if (isSingleInputShuffleMask(Mask)) {
+ SmallVector<int, 32> FlippedBlendMask;
+ for (int i = 0, Size = Mask.size(); i < Size; ++i)
+ FlippedBlendMask.push_back(
+ Mask[i] < 0 ? -1 : (((Mask[i] % Size) / LaneSize == i / LaneSize)
+ ? Mask[i]
+ : Mask[i] % LaneSize +
+ (i / LaneSize) * LaneSize + Size));
+
+ // Flip the vector, and blend the results which should now be in-lane. The
+ // VPERM2X128 mask uses the low 2 bits for the low source and bits 4 and
+ // 5 for the high source. The value 3 selects the high half of source 2 and
+ // the value 2 selects the low half of source 2. We only use source 2 to
+ // allow folding it into a memory operand.
+ unsigned PERMMask = 3 | 2 << 4;
+ SDValue Flipped = DAG.getNode(X86ISD::VPERM2X128, DL, VT, DAG.getUNDEF(VT),
+ V1, DAG.getConstant(PERMMask, MVT::i8));
+ return DAG.getVectorShuffle(VT, DL, V1, Flipped, FlippedBlendMask);
+ }
+
+ // This now reduces to two single-input shuffles of V1 and V2 which at worst
+ // will be handled by the above logic and a blend of the results, much like
+ // other patterns in AVX.
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, DAG);
+}
+
+/// \brief Handle lowering 2-lane 128-bit shuffles.
+static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ // Blends are faster and handle all the non-lane-crossing cases.
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),
+ VT.getVectorNumElements() / 2);
+ // Check for patterns which can be matched with a single insert of a 128-bit
+ // subvector.
+ if (isShuffleEquivalent(Mask, 0, 1, 0, 1) ||
+ isShuffleEquivalent(Mask, 0, 1, 4, 5)) {
+ SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
+ DAG.getIntPtrConstant(0));
+ SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
+ Mask[2] < 4 ? V1 : V2, DAG.getIntPtrConstant(0));
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
+ }
+ if (isShuffleEquivalent(Mask, 0, 1, 6, 7)) {
+ SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
+ DAG.getIntPtrConstant(0));
+ SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
+ DAG.getIntPtrConstant(2));
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
+ }
+
+ // Otherwise form a 128-bit permutation.
+ // FIXME: Detect zero-vector inputs and use the VPERM2X128 to zero that half.
+ unsigned PermMask = Mask[0] / 2 | (Mask[2] / 2) << 4;
+ return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2,
+ DAG.getConstant(PermMask, MVT::i8));
+}
+
+/// \brief Lower a vector shuffle by first fixing the 128-bit lanes and then
+/// shuffling each lane.
+///
+/// This will only succeed when the result of fixing the 128-bit lanes results
+/// in a single-input non-lane-crossing shuffle with a repeating shuffle mask in
+/// each 128-bit lanes. This handles many cases where we can quickly blend away
+/// the lane crosses early and then use simpler shuffles within each lane.
+///
+/// FIXME: It might be worthwhile at some point to support this without
+/// requiring the 128-bit lane-relative shuffles to be repeating, but currently
+/// in x86 only floating point has interesting non-repeating shuffles, and even
+/// those are still *marginally* more expensive.
+static SDValue lowerVectorShuffleByMerging128BitLanes(
+ SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ assert(!isSingleInputShuffleMask(Mask) &&
+ "This is only useful with multiple inputs.");
+
+ int Size = Mask.size();
+ int LaneSize = 128 / VT.getScalarSizeInBits();
+ int NumLanes = Size / LaneSize;
+ assert(NumLanes > 1 && "Only handles 256-bit and wider shuffles.");
+
+ // See if we can build a hypothetical 128-bit lane-fixing shuffle mask. Also
+ // check whether the in-128-bit lane shuffles share a repeating pattern.
+ SmallVector<int, 4> Lanes;
+ Lanes.resize(NumLanes, -1);
+ SmallVector<int, 4> InLaneMask;
+ InLaneMask.resize(LaneSize, -1);
+ for (int i = 0; i < Size; ++i) {
+ if (Mask[i] < 0)
+ continue;
+
+ int j = i / LaneSize;
+
+ if (Lanes[j] < 0) {
+ // First entry we've seen for this lane.
+ Lanes[j] = Mask[i] / LaneSize;
+ } else if (Lanes[j] != Mask[i] / LaneSize) {
+ // This doesn't match the lane selected previously!
+ return SDValue();
+ }
+
+ // Check that within each lane we have a consistent shuffle mask.
+ int k = i % LaneSize;
+ if (InLaneMask[k] < 0) {
+ InLaneMask[k] = Mask[i] % LaneSize;
+ } else if (InLaneMask[k] != Mask[i] % LaneSize) {
+ // This doesn't fit a repeating in-lane mask.
+ return SDValue();
+ }
+ }
+
+ // First shuffle the lanes into place.
+ MVT LaneVT = MVT::getVectorVT(VT.isFloatingPoint() ? MVT::f64 : MVT::i64,
+ VT.getSizeInBits() / 64);
+ SmallVector<int, 8> LaneMask;
+ LaneMask.resize(NumLanes * 2, -1);
+ for (int i = 0; i < NumLanes; ++i)
+ if (Lanes[i] >= 0) {
+ LaneMask[2 * i + 0] = 2*Lanes[i] + 0;
+ LaneMask[2 * i + 1] = 2*Lanes[i] + 1;
+ }
+
+ V1 = DAG.getNode(ISD::BITCAST, DL, LaneVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, DL, LaneVT, V2);
+ SDValue LaneShuffle = DAG.getVectorShuffle(LaneVT, DL, V1, V2, LaneMask);
+
+ // Cast it back to the type we actually want.
+ LaneShuffle = DAG.getNode(ISD::BITCAST, DL, VT, LaneShuffle);
+
+ // Now do a simple shuffle that isn't lane crossing.
+ SmallVector<int, 8> NewMask;
+ NewMask.resize(Size, -1);
+ for (int i = 0; i < Size; ++i)
+ if (Mask[i] >= 0)
+ NewMask[i] = (i / LaneSize) * LaneSize + Mask[i] % LaneSize;
+ assert(!is128BitLaneCrossingShuffleMask(VT, NewMask) &&
+ "Must not introduce lane crosses at this point!");
+
+ return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask);
+}
+
+/// \brief Test whether the specified input (0 or 1) is in-place blended by the
+/// given mask.
+///
+/// This returns true if the elements from a particular input are already in the
+/// slot required by the given mask and require no permutation.
+static bool isShuffleMaskInputInPlace(int Input, ArrayRef<int> Mask) {
+ assert((Input == 0 || Input == 1) && "Only two inputs to shuffles.");
+ int Size = Mask.size();
+ for (int i = 0; i < Size; ++i)
+ if (Mask[i] >= 0 && Mask[i] / Size == Input && Mask[i] % Size != i)
+ return false;
+
+ return true;
+}
+
+/// \brief Handle lowering of 4-lane 64-bit floating point shuffles.
+///
+/// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
+/// isn't available.
+static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
+
+ SmallVector<int, 4> WidenedMask;
+ if (canWidenShuffleElements(Mask, WidenedMask))
+ return lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask, Subtarget,
+ DAG);
+
+ if (isSingleInputShuffleMask(Mask)) {
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v4f64, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
+ if (!is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask)) {
+ // Non-half-crossing single input shuffles can be lowerid with an
+ // interleaved permutation.
+ unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) |
+ ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3);
+ return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1,
+ DAG.getConstant(VPERMILPMask, MVT::i8));
+ }
+
+ // With AVX2 we have direct support for this permutation.
+ if (Subtarget->hasAVX2())
+ return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4f64, V1,
+ getV4X86ShuffleImm8ForMask(Mask, DAG));
+
+ // Otherwise, fall back.
+ return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask,
+ DAG);
+ }
+
+ // X86 has dedicated unpack instructions that can handle specific blend
+ // operations: UNPCKH and UNPCKL.
+ if (isShuffleEquivalent(Mask, 0, 4, 2, 6))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V1, V2);
+ if (isShuffleEquivalent(Mask, 1, 5, 3, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V1, V2);
+
+ // If we have a single input to the zero element, insert that into V1 if we
+ // can do so cheaply.
+ int NumV2Elements =
+ std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
+ if (NumV2Elements == 1 && Mask[0] >= 4)
+ if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+ MVT::v4f64, DL, V1, V2, Mask, Subtarget, DAG))
+ return Insertion;
+
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Check if the blend happens to exactly fit that of SHUFPD.
+ if ((Mask[0] == -1 || Mask[0] < 2) &&
+ (Mask[1] == -1 || (Mask[1] >= 4 && Mask[1] < 6)) &&
+ (Mask[2] == -1 || (Mask[2] >= 2 && Mask[2] < 4)) &&
+ (Mask[3] == -1 || Mask[3] >= 6)) {
+ unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 5) << 1) |
+ ((Mask[2] == 3) << 2) | ((Mask[3] == 7) << 3);
+ return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V1, V2,
+ DAG.getConstant(SHUFPDMask, MVT::i8));
+ }
+ if ((Mask[0] == -1 || (Mask[0] >= 4 && Mask[0] < 6)) &&
+ (Mask[1] == -1 || Mask[1] < 2) &&
+ (Mask[2] == -1 || Mask[2] >= 6) &&
+ (Mask[3] == -1 || (Mask[3] >= 2 && Mask[3] < 4))) {
+ unsigned SHUFPDMask = (Mask[0] == 5) | ((Mask[1] == 1) << 1) |
+ ((Mask[2] == 7) << 2) | ((Mask[3] == 3) << 3);
+ return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V2, V1,
+ DAG.getConstant(SHUFPDMask, MVT::i8));
+ }
+
+ // Try to simplify this by merging 128-bit lanes to enable a lane-based
+ // shuffle. However, if we have AVX2 and either inputs are already in place,
+ // we will be able to shuffle even across lanes the other input in a single
+ // instruction so skip this pattern.
+ if (!(Subtarget->hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) ||
+ isShuffleMaskInputInPlace(1, Mask))))
+ if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+ DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
+ return Result;
+
+ // If we have AVX2 then we always want to lower with a blend because an v4 we
+ // can fully permute the elements.
+ if (Subtarget->hasAVX2())
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2,
+ Mask, DAG);
+
+ // Otherwise fall back on generic lowering.
+ return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, DAG);
+}
+
+/// \brief Handle lowering of 4-lane 64-bit integer shuffles.
+///
+/// This routine is only called when we have AVX2 and thus a reasonable
+/// instruction set for v4i64 shuffling..
+static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v4i64 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v4i64 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
+ assert(Subtarget->hasAVX2() && "We can only lower v4i64 with AVX2!");
+
+ SmallVector<int, 4> WidenedMask;
+ if (canWidenShuffleElements(Mask, WidenedMask))
+ return lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask, Subtarget,
+ DAG);
+
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v4i64, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
+ // When the shuffle is mirrored between the 128-bit lanes of the unit, we can
+ // use lower latency instructions that will operate on both 128-bit lanes.
+ SmallVector<int, 2> RepeatedMask;
+ if (is128BitLaneRepeatedShuffleMask(MVT::v4i64, Mask, RepeatedMask)) {
+ if (isSingleInputShuffleMask(Mask)) {
+ int PSHUFDMask[] = {-1, -1, -1, -1};
+ for (int i = 0; i < 2; ++i)
+ if (RepeatedMask[i] >= 0) {
+ PSHUFDMask[2 * i] = 2 * RepeatedMask[i];
+ PSHUFDMask[2 * i + 1] = 2 * RepeatedMask[i] + 1;
+ }
+ return DAG.getNode(
+ ISD::BITCAST, DL, MVT::v4i64,
+ DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, V1),
+ getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
+ }
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 4, 2, 6))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V1, V2);
+ if (isShuffleEquivalent(Mask, 1, 5, 3, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V1, V2);
+ }
+
+ // AVX2 provides a direct instruction for permuting a single input across
+ // lanes.
+ if (isSingleInputShuffleMask(Mask))
+ return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4i64, V1,
+ getV4X86ShuffleImm8ForMask(Mask, DAG));
+
+ // Try to simplify this by merging 128-bit lanes to enable a lane-based
+ // shuffle. However, if we have AVX2 and either inputs are already in place,
+ // we will be able to shuffle even across lanes the other input in a single
+ // instruction so skip this pattern.
+ if (!(Subtarget->hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) ||
+ isShuffleMaskInputInPlace(1, Mask))))
+ if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+ DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
+ return Result;
+
+ // Otherwise fall back on generic blend lowering.
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2,
+ Mask, DAG);
+}
+
+/// \brief Handle lowering of 8-lane 32-bit floating point shuffles.
+///
+/// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
+/// isn't available.
+static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v8f32 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v8f32 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
+
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v8f32, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
+ // If the shuffle mask is repeated in each 128-bit lane, we have many more
+ // options to efficiently lower the shuffle.
+ SmallVector<int, 4> RepeatedMask;
+ if (is128BitLaneRepeatedShuffleMask(MVT::v8f32, Mask, RepeatedMask)) {
+ assert(RepeatedMask.size() == 4 &&
+ "Repeated masks must be half the mask width!");
+ if (isSingleInputShuffleMask(Mask))
+ return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, V1,
+ getV4X86ShuffleImm8ForMask(RepeatedMask, DAG));
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 4, 12, 5, 13))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V1, V2);
+ if (isShuffleEquivalent(Mask, 2, 10, 3, 11, 6, 14, 7, 15))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V1, V2);
+
+ // Otherwise, fall back to a SHUFPS sequence. Here it is important that we
+ // have already handled any direct blends. We also need to squash the
+ // repeated mask into a simulated v4f32 mask.
+ for (int i = 0; i < 4; ++i)
+ if (RepeatedMask[i] >= 8)
+ RepeatedMask[i] -= 4;
+ return lowerVectorShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, V1, V2, DAG);
+ }
+
+ // If we have a single input shuffle with different shuffle patterns in the
+ // two 128-bit lanes use the variable mask to VPERMILPS.
+ if (isSingleInputShuffleMask(Mask)) {
+ SDValue VPermMask[8];
+ for (int i = 0; i < 8; ++i)
+ VPermMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i32)
+ : DAG.getConstant(Mask[i], MVT::i32);
+ if (!is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask))
+ return DAG.getNode(
+ X86ISD::VPERMILPV, DL, MVT::v8f32, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask));
+
+ if (Subtarget->hasAVX2())
+ return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v8f32,
+ DAG.getNode(ISD::BUILD_VECTOR, DL,
+ MVT::v8i32, VPermMask)),
+ V1);
+
+ // Otherwise, fall back.
+ return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
+ DAG);
+ }
+
+ // Try to simplify this by merging 128-bit lanes to enable a lane-based
+ // shuffle.
+ if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+ DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
+ return Result;
+
+ // If we have AVX2 then we always want to lower with a blend because at v8 we
+ // can fully permute the elements.
+ if (Subtarget->hasAVX2())
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2,
+ Mask, DAG);
+
+ // Otherwise fall back on generic lowering.
+ return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, DAG);
+}
+
+/// \brief Handle lowering of 8-lane 32-bit integer shuffles.
+///
+/// This routine is only called when we have AVX2 and thus a reasonable
+/// instruction set for v8i32 shuffling..
+static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v8i32 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v8i32 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
+ assert(Subtarget->hasAVX2() && "We can only lower v8i32 with AVX2!");
+
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v8i32, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
+ // If the shuffle mask is repeated in each 128-bit lane we can use more
+ // efficient instructions that mirror the shuffles across the two 128-bit
+ // lanes.
+ SmallVector<int, 4> RepeatedMask;
+ if (is128BitLaneRepeatedShuffleMask(MVT::v8i32, Mask, RepeatedMask)) {
+ assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
+ if (isSingleInputShuffleMask(Mask))
+ return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, V1,
+ getV4X86ShuffleImm8ForMask(RepeatedMask, DAG));
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 4, 12, 5, 13))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V1, V2);
+ if (isShuffleEquivalent(Mask, 2, 10, 3, 11, 6, 14, 7, 15))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V1, V2);
+ }
+
+ // If the shuffle patterns aren't repeated but it is a single input, directly
+ // generate a cross-lane VPERMD instruction.
+ if (isSingleInputShuffleMask(Mask)) {
+ SDValue VPermMask[8];
+ for (int i = 0; i < 8; ++i)
+ VPermMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i32)
+ : DAG.getConstant(Mask[i], MVT::i32);
+ return DAG.getNode(
+ X86ISD::VPERMV, DL, MVT::v8i32,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1);
+ }
+
+ // Try to simplify this by merging 128-bit lanes to enable a lane-based
+ // shuffle.
+ if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+ DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
+ return Result;
+
+ // Otherwise fall back on generic blend lowering.
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2,
+ Mask, DAG);
+}
+
+/// \brief Handle lowering of 16-lane 16-bit integer shuffles.
+///
+/// This routine is only called when we have AVX2 and thus a reasonable
+/// instruction set for v16i16 shuffling..
+static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v16i16 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v16i16 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
+ assert(Subtarget->hasAVX2() && "We can only lower v16i16 with AVX2!");
+
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v16i16, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask,
+ // First 128-bit lane:
+ 0, 16, 1, 17, 2, 18, 3, 19,
+ // Second 128-bit lane:
+ 8, 24, 9, 25, 10, 26, 11, 27))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i16, V1, V2);
+ if (isShuffleEquivalent(Mask,
+ // First 128-bit lane:
+ 4, 20, 5, 21, 6, 22, 7, 23,
+ // Second 128-bit lane:
+ 12, 28, 13, 29, 14, 30, 15, 31))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i16, V1, V2);
+
+ if (isSingleInputShuffleMask(Mask)) {
+ // There are no generalized cross-lane shuffle operations available on i16
+ // element types.
+ if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask))
+ return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2,
+ Mask, DAG);
+
+ SDValue PSHUFBMask[32];
+ for (int i = 0; i < 16; ++i) {
+ if (Mask[i] == -1) {
+ PSHUFBMask[2 * i] = PSHUFBMask[2 * i + 1] = DAG.getUNDEF(MVT::i8);
+ continue;
+ }
+
+ int M = i < 8 ? Mask[i] : Mask[i] - 8;
+ assert(M >= 0 && M < 8 && "Invalid single-input mask!");
+ PSHUFBMask[2 * i] = DAG.getConstant(2 * M, MVT::i8);
+ PSHUFBMask[2 * i + 1] = DAG.getConstant(2 * M + 1, MVT::i8);
+ }
+ return DAG.getNode(
+ ISD::BITCAST, DL, MVT::v16i16,
+ DAG.getNode(
+ X86ISD::PSHUFB, DL, MVT::v32i8,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, V1),
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask)));
+ }
+
+ // Try to simplify this by merging 128-bit lanes to enable a lane-based
+ // shuffle.
+ if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+ DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
+ return Result;
+
+ // Otherwise fall back on generic lowering.
+ return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, DAG);
+}
+
+/// \brief Handle lowering of 32-lane 8-bit integer shuffles.
+///
+/// This routine is only called when we have AVX2 and thus a reasonable
+/// instruction set for v32i8 shuffling..
+static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v32i8 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v32i8 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
+ assert(Subtarget->hasAVX2() && "We can only lower v32i8 with AVX2!");
+
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v32i8, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ // Note that these are repeated 128-bit lane unpacks, not unpacks across all
+ // 256-bit lanes.
+ if (isShuffleEquivalent(
+ Mask,
+ // First 128-bit lane:
+ 0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39,
+ // Second 128-bit lane:
+ 16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v32i8, V1, V2);
+ if (isShuffleEquivalent(
+ Mask,
+ // First 128-bit lane:
+ 8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47,
+ // Second 128-bit lane:
+ 24, 56, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, 63))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v32i8, V1, V2);
+
+ if (isSingleInputShuffleMask(Mask)) {
+ // There are no generalized cross-lane shuffle operations available on i8
+ // element types.
+ if (is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask))
+ return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2,
+ Mask, DAG);
+
+ SDValue PSHUFBMask[32];
+ for (int i = 0; i < 32; ++i)
+ PSHUFBMask[i] =
+ Mask[i] < 0
+ ? DAG.getUNDEF(MVT::i8)
+ : DAG.getConstant(Mask[i] < 16 ? Mask[i] : Mask[i] - 16, MVT::i8);
+
+ return DAG.getNode(
+ X86ISD::PSHUFB, DL, MVT::v32i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask));
+ }
+
+ // Try to simplify this by merging 128-bit lanes to enable a lane-based
+ // shuffle.
+ if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+ DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
+ return Result;
+
+ // Otherwise fall back on generic lowering.
+ return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, DAG);
+}
+
+/// \brief High-level routine to lower various 256-bit x86 vector shuffles.
+///
+/// This routine either breaks down the specific type of a 256-bit x86 vector
+/// shuffle or splits it into two 128-bit shuffles and fuses the results back
+/// together based on the available instructions.
+static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ MVT VT, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+
+ // There is a really nice hard cut-over between AVX1 and AVX2 that means we can
+ // check for those subtargets here and avoid much of the subtarget querying in
+ // the per-vector-type lowering routines. With AVX1 we have essentially *zero*
+ // ability to manipulate a 256-bit vector with integer types. Since we'll use
+ // floating point types there eventually, just immediately cast everything to
+ // a float and operate entirely in that domain.
+ if (VT.isInteger() && !Subtarget->hasAVX2()) {
+ int ElementBits = VT.getScalarSizeInBits();
+ if (ElementBits < 32)
+ // No floating point type available, decompose into 128-bit vectors.
+ return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
+
+ MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits),
+ VT.getVectorNumElements());
+ V1 = DAG.getNode(ISD::BITCAST, DL, FpVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, DL, FpVT, V2);
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask));
+ }
+
+ switch (VT.SimpleTy) {
+ case MVT::v4f64:
+ return lowerV4F64VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v4i64:
+ return lowerV4I64VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v8f32:
+ return lowerV8F32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v8i32:
+ return lowerV8I32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v16i16:
+ return lowerV16I16VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v32i8:
+ return lowerV32I8VectorShuffle(Op, V1, V2, Subtarget, DAG);
+
+ default:
+ llvm_unreachable("Not a valid 256-bit x86 vector type!");
+ }
+}
+
+/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
+static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
+
+ // X86 has dedicated unpack instructions that can handle specific blend
+ // operations: UNPCKH and UNPCKL.
+ if (isShuffleEquivalent(Mask, 0, 8, 2, 10, 4, 12, 6, 14))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2);
+ if (isShuffleEquivalent(Mask, 1, 9, 3, 11, 5, 13, 7, 15))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2);
+
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v8f64, V1, V2, Mask, DAG);
+}
+
+/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
+static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask,
+ 0, 16, 1, 17, 4, 20, 5, 21,
+ 8, 24, 9, 25, 12, 28, 13, 29))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2);
+ if (isShuffleEquivalent(Mask,
+ 2, 18, 3, 19, 6, 22, 7, 23,
+ 10, 26, 11, 27, 14, 30, 15, 31))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2);
+
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v16f32, V1, V2, Mask, DAG);
+}
+
+/// \brief Handle lowering of 8-lane 64-bit integer shuffles.
+static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
+
+ // X86 has dedicated unpack instructions that can handle specific blend
+ // operations: UNPCKH and UNPCKL.
+ if (isShuffleEquivalent(Mask, 0, 8, 2, 10, 4, 12, 6, 14))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2);
+ if (isShuffleEquivalent(Mask, 1, 9, 3, 11, 5, 13, 7, 15))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2);
+
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG);
+}
+
+/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
+static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask,
+ 0, 16, 1, 17, 4, 20, 5, 21,
+ 8, 24, 9, 25, 12, 28, 13, 29))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2);
+ if (isShuffleEquivalent(Mask,
+ 2, 18, 3, 19, 6, 22, 7, 23,
+ 10, 26, 11, 27, 14, 30, 15, 31))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2);
+
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG);
+}
+
+/// \brief Handle lowering of 32-lane 16-bit integer shuffles.
+static SDValue lowerV32I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v32i16 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v32i16 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
+ assert(Subtarget->hasBWI() && "We can only lower v32i16 with AVX-512-BWI!");
+
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v32i16, V1, V2, Mask, DAG);
+}
+
+/// \brief Handle lowering of 64-lane 8-bit integer shuffles.
+static SDValue lowerV64I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v64i8 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v64i8 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!");
+ assert(Subtarget->hasBWI() && "We can only lower v64i8 with AVX-512-BWI!");
+
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
+}
+
+/// \brief High-level routine to lower various 512-bit x86 vector shuffles.
+///
+/// This routine either breaks down the specific type of a 512-bit x86 vector
+/// shuffle or splits it into two 256-bit shuffles and fuses the results back
+/// together based on the available instructions.
+static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ MVT VT, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Subtarget->hasAVX512() &&
+ "Cannot lower 512-bit vectors w/ basic ISA!");
+
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(VT.SimpleTy, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
+ // Dispatch to each element type for lowering. If we don't have supprot for
+ // specific element type shuffles at 512 bits, immediately split them and
+ // lower them. Each lowering routine of a given type is allowed to assume that
+ // the requisite ISA extensions for that element type are available.
+ switch (VT.SimpleTy) {
+ case MVT::v8f64:
+ return lowerV8F64VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v16f32:
+ return lowerV16F32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v8i64:
+ return lowerV8I64VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v16i32:
+ return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v32i16:
+ if (Subtarget->hasBWI())
+ return lowerV32I16VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ break;
+ case MVT::v64i8:
+ if (Subtarget->hasBWI())
+ return lowerV64I8VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ break;
+
+ default:
+ llvm_unreachable("Not a valid 512-bit x86 vector type!");
+ }
+
+ // Otherwise fall back on splitting.
+ return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
+}
+
/// \brief Top-level lowering for x86 vector shuffles.
///
/// This handles decomposition, canonicalization, and lowering of all x86
@@ -7945,22 +10957,25 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getVectorShuffle(VT, dl, V1, V2, NewMask);
}
- // For integer vector shuffles, try to collapse them into a shuffle of fewer
- // lanes but wider integers. We cap this to not form integers larger than i64
- // but it might be interesting to form i128 integers to handle flipping the
- // low and high halves of AVX 256-bit vectors.
- if (VT.isInteger() && VT.getScalarSizeInBits() < 64 &&
- areAdjacentMasksSequential(Mask)) {
- SmallVector<int, 8> NewMask;
- for (int i = 0, Size = Mask.size(); i < Size; i += 2)
- NewMask.push_back(Mask[i] / 2);
- MVT NewVT =
- MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits() * 2),
- VT.getVectorNumElements() / 2);
- V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
- V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
- return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getVectorShuffle(NewVT, dl, V1, V2, NewMask));
+ // Try to collapse shuffles into using a vector type with fewer elements but
+ // wider element types. We cap this to not form integers or floating point
+ // elements wider than 64 bits, but it might be interesting to form i128
+ // integers to handle flipping the low and high halves of AVX 256-bit vectors.
+ SmallVector<int, 16> WidenedMask;
+ if (VT.getScalarSizeInBits() < 64 &&
+ canWidenShuffleElements(Mask, WidenedMask)) {
+ MVT NewEltVT = VT.isFloatingPoint()
+ ? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2)
+ : MVT::getIntegerVT(VT.getScalarSizeInBits() * 2);
+ MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
+ // Make sure that the new vector type is legal. For example, v2f64 isn't
+ // legal on SSE1.
+ if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
+ V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getVectorShuffle(NewVT, dl, V1, V2, WidenedMask));
+ }
}
int NumV1Elements = 0, NumUndefElements = 0, NumV2Elements = 0;
@@ -7979,7 +10994,10 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getCommutedVectorShuffle(*SVOp);
// When the number of V1 and V2 elements are the same, try to minimize the
- // number of uses of V2 in the low half of the vector.
+ // number of uses of V2 in the low half of the vector. When that is tied,
+ // ensure that the sum of indices for V1 is equal to or lower than the sum
+ // indices for V2. When those are equal, try to ensure that the number of odd
+ // indices for V1 is lower than the number of odd indices for V2.
if (NumV1Elements == NumV2Elements) {
int LowV1Elements = 0, LowV2Elements = 0;
for (int M : SVOp->getMask().slice(0, NumElements / 2))
@@ -7987,14 +11005,42 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
++LowV2Elements;
else if (M >= 0)
++LowV1Elements;
- if (LowV2Elements > LowV1Elements)
+ if (LowV2Elements > LowV1Elements) {
return DAG.getCommutedVectorShuffle(*SVOp);
+ } else if (LowV2Elements == LowV1Elements) {
+ int SumV1Indices = 0, SumV2Indices = 0;
+ for (int i = 0, Size = SVOp->getMask().size(); i < Size; ++i)
+ if (SVOp->getMask()[i] >= NumElements)
+ SumV2Indices += i;
+ else if (SVOp->getMask()[i] >= 0)
+ SumV1Indices += i;
+ if (SumV2Indices < SumV1Indices) {
+ return DAG.getCommutedVectorShuffle(*SVOp);
+ } else if (SumV2Indices == SumV1Indices) {
+ int NumV1OddIndices = 0, NumV2OddIndices = 0;
+ for (int i = 0, Size = SVOp->getMask().size(); i < Size; ++i)
+ if (SVOp->getMask()[i] >= NumElements)
+ NumV2OddIndices += i % 2;
+ else if (SVOp->getMask()[i] >= 0)
+ NumV1OddIndices += i % 2;
+ if (NumV2OddIndices < NumV1OddIndices)
+ return DAG.getCommutedVectorShuffle(*SVOp);
+ }
+ }
}
// For each vector width, delegate to a specialized lowering routine.
if (VT.getSizeInBits() == 128)
return lower128BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
+ if (VT.getSizeInBits() == 256)
+ return lower256BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
+
+ // Force AVX-512 vectors to be scalarized for now.
+ // FIXME: Implement AVX-512 support!
+ if (VT.getSizeInBits() == 512)
+ return lower512BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
+
llvm_unreachable("Unimplemented!");
}
@@ -9060,7 +12106,9 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl,
// should assume we're changing V2's element's place and behave
// accordingly.
int FromV2 = std::count_if(Mask.begin(), Mask.end(), FromV2Predicate);
- if (FromV1 == FromV2 && DestIndex == Mask[DestIndex] % 4) {
+ assert(DestIndex <= INT32_MAX && "truncated destination index");
+ if (FromV1 == FromV2 &&
+ static_cast<int>(DestIndex) == Mask[DestIndex] % 4) {
From = V2;
To = V1;
DestIndex =
@@ -9163,37 +12211,6 @@ static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget,
if (!DAG.getTargetLoweringInfo().isTypeLegal(NVT))
return SDValue();
- // Simplify the operand as it's prepared to be fed into shuffle.
- unsigned SignificantBits = NVT.getSizeInBits() >> Shift;
- if (V1.getOpcode() == ISD::BITCAST &&
- V1.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
- V1.getOperand(0).getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- V1.getOperand(0).getOperand(0)
- .getSimpleValueType().getSizeInBits() == SignificantBits) {
- // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
- SDValue V = V1.getOperand(0).getOperand(0).getOperand(0);
- ConstantSDNode *CIdx =
- dyn_cast<ConstantSDNode>(V1.getOperand(0).getOperand(0).getOperand(1));
- // If it's foldable, i.e. normal load with single use, we will let code
- // selection to fold it. Otherwise, we will short the conversion sequence.
- if (CIdx && CIdx->getZExtValue() == 0 &&
- (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) {
- MVT FullVT = V.getSimpleValueType();
- MVT V1VT = V1.getSimpleValueType();
- if (FullVT.getSizeInBits() > V1VT.getSizeInBits()) {
- // The "ext_vec_elt" node is wider than the result node.
- // In this case we should extract subvector from V.
- // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)).
- unsigned Ratio = FullVT.getSizeInBits() / V1VT.getSizeInBits();
- MVT SubVecVT = MVT::getVectorVT(FullVT.getVectorElementType(),
- FullVT.getVectorNumElements()/Ratio);
- V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V,
- DAG.getIntPtrConstant(0));
- }
- V1 = DAG.getNode(ISD::BITCAST, DL, V1VT, V);
- }
- }
-
return DAG.getNode(ISD::BITCAST, DL, VT,
DAG.getNode(X86ISD::VZEXT, DL, NVT, V1));
}
@@ -9343,7 +12360,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
if (HasFp256 && (VT == MVT::v4f32 || VT == MVT::v2f64))
- return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask,
+ return getTargetShuffleNode(X86ISD::VPERMILPI, dl, VT, V1, TargetMask,
DAG);
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V1,
@@ -9355,6 +12372,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
getShufflePALIGNRImmediate(SVOp),
DAG);
+ if (isVALIGNMask(M, VT, Subtarget))
+ return getTargetShuffleNode(X86ISD::VALIGN, dl, VT, V1, V2,
+ getShuffleVALIGNImmediate(SVOp),
+ DAG);
+
// Check if this can be converted into a logical shift.
bool isLeft = false;
unsigned ShAmt = 0;
@@ -9520,7 +12542,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if ((HasInt256 && VT == MVT::v8i32) || VT == MVT::v16i32)
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
getShuffleSHUFImmediate(SVOp), DAG);
- return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
+ return getTargetShuffleNode(X86ISD::VPERMILPI, dl, VT, V1,
getShuffleSHUFImmediate(SVOp), DAG);
}
@@ -9639,9 +12661,10 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
return true;
}
-// Try to lower a vselect node into a simple blend instruction.
-static SDValue LowerVSELECTtoBlend(SDValue Op, const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
+/// \brief Try to lower a VSELECT instruction to an immediate-controlled blend
+/// instruction.
+static SDValue lowerVSELECTtoBLENDI(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
SDValue Cond = Op.getOperand(0);
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
@@ -9683,7 +12706,14 @@ static SDValue LowerVSELECTtoBlend(SDValue Op, const X86Subtarget *Subtarget,
}
SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
- SDValue BlendOp = LowerVSELECTtoBlend(Op, Subtarget, DAG);
+ // A vselect where all conditions and data are constants can be optimized into
+ // a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR().
+ if (ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(0).getNode()) &&
+ ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(1).getNode()) &&
+ ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(2).getNode()))
+ return SDValue();
+
+ SDValue BlendOp = lowerVSELECTtoBLENDI(Op, Subtarget, DAG);
if (BlendOp.getNode())
return BlendOp;
@@ -9696,6 +12726,8 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
break;
case MVT::v8i16:
case MVT::v16i16:
+ if (Subtarget->hasBWI() && Subtarget->hasVLX())
+ break;
return SDValue();
}
@@ -9914,62 +12946,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return SDValue();
}
-static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getSimpleValueType();
- MVT EltVT = VT.getVectorElementType();
- SDLoc dl(Op);
-
- SDValue N0 = Op.getOperand(0);
- SDValue N1 = Op.getOperand(1);
- SDValue N2 = Op.getOperand(2);
-
- if (!VT.is128BitVector())
- return SDValue();
-
- if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
- isa<ConstantSDNode>(N2)) {
- unsigned Opc;
- if (VT == MVT::v8i16)
- Opc = X86ISD::PINSRW;
- else if (VT == MVT::v16i8)
- Opc = X86ISD::PINSRB;
- else
- Opc = X86ISD::PINSRB;
-
- // Transform it so it match pinsr{b,w} which expects a GR32 as its second
- // argument.
- if (N1.getValueType() != MVT::i32)
- N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
- if (N2.getValueType() != MVT::i32)
- N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
- return DAG.getNode(Opc, dl, VT, N0, N1, N2);
- }
-
- if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
- // Bits [7:6] of the constant are the source select. This will always be
- // zero here. The DAG Combiner may combine an extract_elt index into these
- // bits. For example (insert (extract, 3), 2) could be matched by putting
- // the '3' into bits [7:6] of X86ISD::INSERTPS.
- // Bits [5:4] of the constant are the destination select. This is the
- // value of the incoming immediate.
- // Bits [3:0] of the constant are the zero mask. The DAG Combiner may
- // combine either bitwise AND or insert of float 0.0 to set these bits.
- N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
- // Create this as a scalar to vector..
- N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
- return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
- }
-
- if ((EltVT == MVT::i32 || EltVT == MVT::i64) && isa<ConstantSDNode>(N2)) {
- // PINSR* works with constant index.
- return Op;
- }
- return SDValue();
-}
-
/// Insert one bit to mask vector, like v16i1 or v8i1.
/// AVX-512 feature.
-SDValue
+SDValue
X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue Vec = Op.getOperand(0);
@@ -9982,7 +12961,7 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
// insert element and then truncate the result.
MVT ExtVecVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
MVT ExtEltVT = (VecVT == MVT::v8i1 ? MVT::i64 : MVT::i32);
- SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
+ SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVecVT, Vec),
DAG.getNode(ISD::ZERO_EXTEND, dl, ExtEltVT, Elt), Idx);
return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp);
@@ -10001,11 +12980,12 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(MaxSift - IdxVal, MVT::i8));
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
-SDValue
-X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
+
+SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
-
+
if (EltVT == MVT::i1)
return InsertBitToMaskVector(Op, DAG);
@@ -10013,20 +12993,20 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
+ if (!isa<ConstantSDNode>(N2))
+ return SDValue();
+ auto *N2C = cast<ConstantSDNode>(N2);
+ unsigned IdxVal = N2C->getZExtValue();
- // If this is a 256-bit vector result, first extract the 128-bit vector,
- // insert the element into the extracted half and then place it back.
+ // If the vector is wider than 128 bits, extract the 128-bit subvector, insert
+ // into that, and then insert the subvector back into the result.
if (VT.is256BitVector() || VT.is512BitVector()) {
- if (!isa<ConstantSDNode>(N2))
- return SDValue();
-
// Get the desired 128-bit vector half.
- unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue();
SDValue V = Extract128BitVector(N0, IdxVal, DAG, dl);
// Insert the element into the desired half.
- unsigned NumEltsIn128 = 128/EltVT.getSizeInBits();
- unsigned IdxIn128 = IdxVal - (IdxVal/NumEltsIn128) * NumEltsIn128;
+ unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits();
+ unsigned IdxIn128 = IdxVal - (IdxVal / NumEltsIn128) * NumEltsIn128;
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
DAG.getConstant(IdxIn128, MVT::i32));
@@ -10034,20 +13014,60 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
// Insert the changed part back to the 256-bit vector
return Insert128BitVector(N0, V, IdxVal, DAG, dl);
}
+ assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");
- if (Subtarget->hasSSE41())
- return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
+ if (Subtarget->hasSSE41()) {
+ if (EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) {
+ unsigned Opc;
+ if (VT == MVT::v8i16) {
+ Opc = X86ISD::PINSRW;
+ } else {
+ assert(VT == MVT::v16i8);
+ Opc = X86ISD::PINSRB;
+ }
+
+ // Transform it so it match pinsr{b,w} which expects a GR32 as its second
+ // argument.
+ if (N1.getValueType() != MVT::i32)
+ N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
+ if (N2.getValueType() != MVT::i32)
+ N2 = DAG.getIntPtrConstant(IdxVal);
+ return DAG.getNode(Opc, dl, VT, N0, N1, N2);
+ }
+
+ if (EltVT == MVT::f32) {
+ // Bits [7:6] of the constant are the source select. This will always be
+ // zero here. The DAG Combiner may combine an extract_elt index into
+ // these
+ // bits. For example (insert (extract, 3), 2) could be matched by
+ // putting
+ // the '3' into bits [7:6] of X86ISD::INSERTPS.
+ // Bits [5:4] of the constant are the destination select. This is the
+ // value of the incoming immediate.
+ // Bits [3:0] of the constant are the zero mask. The DAG Combiner may
+ // combine either bitwise AND or insert of float 0.0 to set these bits.
+ N2 = DAG.getIntPtrConstant(IdxVal << 4);
+ // Create this as a scalar to vector..
+ N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
+ return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
+ }
+
+ if (EltVT == MVT::i32 || EltVT == MVT::i64) {
+ // PINSR* works with constant index.
+ return Op;
+ }
+ }
if (EltVT == MVT::i8)
return SDValue();
- if (EltVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) {
+ if (EltVT.getSizeInBits() == 16) {
// Transform it so it match pinsrw which expects a 16-bit value in a GR32
// as its second argument.
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
- N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
+ N2 = DAG.getIntPtrConstant(IdxVal);
return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
}
return SDValue();
@@ -10360,6 +13380,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
MFI->setAdjustsStack(true);
+ MFI->setHasCalls(true);
SDValue Flag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
@@ -10593,7 +13614,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->is64Bit())
IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, getPointerTy(), Chain,
IDX, MachinePointerInfo(), MVT::i32,
- false, false, 0);
+ false, false, false, 0);
else
IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(),
false, false, false, 0);
@@ -10677,9 +13698,17 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
MVT SrcVT = Op.getOperand(0).getSimpleValueType();
+ SDLoc dl(Op);
- if (SrcVT.isVector())
+ if (SrcVT.isVector()) {
+ if (SrcVT.getVectorElementType() == MVT::i1) {
+ MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
+ return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
+ DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT,
+ Op.getOperand(0)));
+ }
return SDValue();
+ }
assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
@@ -10693,7 +13722,6 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
return Op;
}
- SDLoc dl(Op);
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
@@ -10880,19 +13908,135 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
return Sub;
}
+static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ // The algorithm is the following:
+ // #ifdef __SSE4_1__
+ // uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa);
+ // uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
+ // (uint4) 0x53000000, 0xaa);
+ // #else
+ // uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000;
+ // uint4 hi = (v >> 16) | (uint4) 0x53000000;
+ // #endif
+ // float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
+ // return (float4) lo + fhi;
+
+ SDLoc DL(Op);
+ SDValue V = Op->getOperand(0);
+ EVT VecIntVT = V.getValueType();
+ bool Is128 = VecIntVT == MVT::v4i32;
+ EVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32;
+ // If we convert to something else than the supported type, e.g., to v4f64,
+ // abort early.
+ if (VecFloatVT != Op->getValueType(0))
+ return SDValue();
+
+ unsigned NumElts = VecIntVT.getVectorNumElements();
+ assert((VecIntVT == MVT::v4i32 || VecIntVT == MVT::v8i32) &&
+ "Unsupported custom type");
+ assert(NumElts <= 8 && "The size of the constant array must be fixed");
+
+ // In the #idef/#else code, we have in common:
+ // - The vector of constants:
+ // -- 0x4b000000
+ // -- 0x53000000
+ // - A shift:
+ // -- v >> 16
+
+ // Create the splat vector for 0x4b000000.
+ SDValue CstLow = DAG.getConstant(0x4b000000, MVT::i32);
+ SDValue CstLowArray[] = {CstLow, CstLow, CstLow, CstLow,
+ CstLow, CstLow, CstLow, CstLow};
+ SDValue VecCstLow = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT,
+ makeArrayRef(&CstLowArray[0], NumElts));
+ // Create the splat vector for 0x53000000.
+ SDValue CstHigh = DAG.getConstant(0x53000000, MVT::i32);
+ SDValue CstHighArray[] = {CstHigh, CstHigh, CstHigh, CstHigh,
+ CstHigh, CstHigh, CstHigh, CstHigh};
+ SDValue VecCstHigh = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT,
+ makeArrayRef(&CstHighArray[0], NumElts));
+
+ // Create the right shift.
+ SDValue CstShift = DAG.getConstant(16, MVT::i32);
+ SDValue CstShiftArray[] = {CstShift, CstShift, CstShift, CstShift,
+ CstShift, CstShift, CstShift, CstShift};
+ SDValue VecCstShift = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT,
+ makeArrayRef(&CstShiftArray[0], NumElts));
+ SDValue HighShift = DAG.getNode(ISD::SRL, DL, VecIntVT, V, VecCstShift);
+
+ SDValue Low, High;
+ if (Subtarget.hasSSE41()) {
+ EVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16;
+ // uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa);
+ SDValue VecCstLowBitcast =
+ DAG.getNode(ISD::BITCAST, DL, VecI16VT, VecCstLow);
+ SDValue VecBitcast = DAG.getNode(ISD::BITCAST, DL, VecI16VT, V);
+ // Low will be bitcasted right away, so do not bother bitcasting back to its
+ // original type.
+ Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast,
+ VecCstLowBitcast, DAG.getConstant(0xaa, MVT::i32));
+ // uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
+ // (uint4) 0x53000000, 0xaa);
+ SDValue VecCstHighBitcast =
+ DAG.getNode(ISD::BITCAST, DL, VecI16VT, VecCstHigh);
+ SDValue VecShiftBitcast =
+ DAG.getNode(ISD::BITCAST, DL, VecI16VT, HighShift);
+ // High will be bitcasted right away, so do not bother bitcasting back to
+ // its original type.
+ High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast,
+ VecCstHighBitcast, DAG.getConstant(0xaa, MVT::i32));
+ } else {
+ SDValue CstMask = DAG.getConstant(0xffff, MVT::i32);
+ SDValue VecCstMask = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT, CstMask,
+ CstMask, CstMask, CstMask);
+ // uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000;
+ SDValue LowAnd = DAG.getNode(ISD::AND, DL, VecIntVT, V, VecCstMask);
+ Low = DAG.getNode(ISD::OR, DL, VecIntVT, LowAnd, VecCstLow);
+
+ // uint4 hi = (v >> 16) | (uint4) 0x53000000;
+ High = DAG.getNode(ISD::OR, DL, VecIntVT, HighShift, VecCstHigh);
+ }
+
+ // Create the vector constant for -(0x1.0p39f + 0x1.0p23f).
+ SDValue CstFAdd = DAG.getConstantFP(
+ APFloat(APFloat::IEEEsingle, APInt(32, 0xD3000080)), MVT::f32);
+ SDValue CstFAddArray[] = {CstFAdd, CstFAdd, CstFAdd, CstFAdd,
+ CstFAdd, CstFAdd, CstFAdd, CstFAdd};
+ SDValue VecCstFAdd = DAG.getNode(ISD::BUILD_VECTOR, DL, VecFloatVT,
+ makeArrayRef(&CstFAddArray[0], NumElts));
+
+ // float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
+ SDValue HighBitcast = DAG.getNode(ISD::BITCAST, DL, VecFloatVT, High);
+ SDValue FHigh =
+ DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd);
+ // return (float4) lo + fhi;
+ SDValue LowBitcast = DAG.getNode(ISD::BITCAST, DL, VecFloatVT, Low);
+ return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh);
+}
+
SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
SelectionDAG &DAG) const {
SDValue N0 = Op.getOperand(0);
MVT SVT = N0.getSimpleValueType();
SDLoc dl(Op);
- assert((SVT == MVT::v4i8 || SVT == MVT::v4i16 ||
- SVT == MVT::v8i8 || SVT == MVT::v8i16) &&
- "Custom UINT_TO_FP is not supported!");
-
- MVT NVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements());
- return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
- DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
+ switch (SVT.SimpleTy) {
+ default:
+ llvm_unreachable("Custom UINT_TO_FP is not supported!");
+ case MVT::v4i8:
+ case MVT::v4i16:
+ case MVT::v8i8:
+ case MVT::v8i16: {
+ MVT NVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements());
+ return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
+ DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
+ }
+ case MVT::v4i32:
+ case MVT::v8i32:
+ return lowerUINT_TO_FP_vXi32(Op, DAG, *Subtarget);
+ }
+ llvm_unreachable(nullptr);
}
SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
@@ -10978,7 +14122,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// FIXME: Avoid the extend by constructing the right constant pool?
SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
FudgePtr, MachinePointerInfo::getConstantPool(),
- MVT::f32, false, false, 4);
+ MVT::f32, false, false, false, 4);
// Extend everything to 80 bits to force it to be done on x87.
SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
@@ -11192,12 +14336,9 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
if (VT == MVT::i1) {
assert((InVT.isInteger() && (InVT.getSizeInBits() <= 64)) &&
"Invalid scalar TRUNCATE operation");
- if (InVT == MVT::i32)
+ if (InVT.getSizeInBits() >= 32)
return SDValue();
- if (InVT.getSizeInBits() == 64)
- In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::i32, In);
- else if (InVT.getSizeInBits() < 32)
- In = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, In);
+ In = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, In);
return DAG.getNode(ISD::TRUNCATE, DL, VT, In);
}
assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
@@ -11215,7 +14356,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
InVT = ExtVT;
}
-
+
SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType());
const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
SDValue CP = DAG.getConstantPool(C, getPointerTy());
@@ -11375,58 +14516,47 @@ static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
In, DAG.getUNDEF(SVT)));
}
-static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) {
- LLVMContext *Context = DAG.getContext();
+/// The only differences between FABS and FNEG are the mask and the logic op.
+/// FNEG also has a folding opportunity for FNEG(FABS(x)).
+static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
+ assert((Op.getOpcode() == ISD::FABS || Op.getOpcode() == ISD::FNEG) &&
+ "Wrong opcode for lowering FABS or FNEG.");
+
+ bool IsFABS = (Op.getOpcode() == ISD::FABS);
+
+ // If this is a FABS and it has an FNEG user, bail out to fold the combination
+ // into an FNABS. We'll lower the FABS after that if it is still in use.
+ if (IsFABS)
+ for (SDNode *User : Op->uses())
+ if (User->getOpcode() == ISD::FNEG)
+ return Op;
+
+ SDValue Op0 = Op.getOperand(0);
+ bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
+
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
+ // Assume scalar op for initialization; update for vector if needed.
+ // Note that there are no scalar bitwise logical SSE/AVX instructions, so we
+ // generate a 16-byte vector constant and logic op even for the scalar case.
+ // Using a 16-byte mask allows folding the load of the mask with
+ // the logic op, so it can save (~4 bytes) on code size.
MVT EltVT = VT;
unsigned NumElts = VT == MVT::f64 ? 2 : 4;
+ // FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
+ // decide if we should generate a 16-byte constant mask when we only need 4 or
+ // 8 bytes for the scalar case.
if (VT.isVector()) {
EltVT = VT.getVectorElementType();
NumElts = VT.getVectorNumElements();
}
- Constant *C;
- if (EltVT == MVT::f64)
- C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
- APInt(64, ~(1ULL << 63))));
- else
- C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
- APInt(32, ~(1U << 31))));
- C = ConstantVector::getSplat(NumElts, C);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy());
- unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
- SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, Alignment);
- if (VT.isVector()) {
- MVT ANDVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
- return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getNode(ISD::AND, dl, ANDVT,
- DAG.getNode(ISD::BITCAST, dl, ANDVT,
- Op.getOperand(0)),
- DAG.getNode(ISD::BITCAST, dl, ANDVT, Mask)));
- }
- return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
-}
-static SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) {
+ unsigned EltBits = EltVT.getSizeInBits();
LLVMContext *Context = DAG.getContext();
- SDLoc dl(Op);
- MVT VT = Op.getSimpleValueType();
- MVT EltVT = VT;
- unsigned NumElts = VT == MVT::f64 ? 2 : 4;
- if (VT.isVector()) {
- EltVT = VT.getVectorElementType();
- NumElts = VT.getVectorNumElements();
- }
- Constant *C;
- if (EltVT == MVT::f64)
- C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
- APInt(64, 1ULL << 63)));
- else
- C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
- APInt(32, 1U << 31)));
+ // For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
+ APInt MaskElt =
+ IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignBit(EltBits);
+ Constant *C = ConstantInt::get(*Context, MaskElt);
C = ConstantVector::getSplat(NumElts, C);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy());
@@ -11434,16 +14564,24 @@ static SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) {
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
+
if (VT.isVector()) {
- MVT XORVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits()/64);
+ // For a vector, cast operands to a vector type, perform the logic op,
+ // and cast the result back to the original value type.
+ MVT VecVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
+ SDValue MaskCasted = DAG.getNode(ISD::BITCAST, dl, VecVT, Mask);
+ SDValue Operand = IsFNABS ?
+ DAG.getNode(ISD::BITCAST, dl, VecVT, Op0.getOperand(0)) :
+ DAG.getNode(ISD::BITCAST, dl, VecVT, Op0);
+ unsigned BitOp = IsFABS ? ISD::AND : IsFNABS ? ISD::OR : ISD::XOR;
return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getNode(ISD::XOR, dl, XORVT,
- DAG.getNode(ISD::BITCAST, dl, XORVT,
- Op.getOperand(0)),
- DAG.getNode(ISD::BITCAST, dl, XORVT, Mask)));
+ DAG.getNode(BitOp, dl, VecVT, Operand, MaskCasted));
}
- return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
+ // If not vector, then scalar.
+ unsigned BitOp = IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
+ SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
+ return DAG.getNode(BitOp, dl, VT, Operand, Mask);
}
static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
@@ -11469,19 +14607,17 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
// At this point the operands and the result should have the same
// type, and that won't be f80 since that is not custom lowered.
- // First get the sign bit of second operand.
- SmallVector<Constant*,4> CV;
- if (SrcVT == MVT::f64) {
- const fltSemantics &Sem = APFloat::IEEEdouble;
- CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 1ULL << 63))));
- CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0))));
- } else {
- const fltSemantics &Sem = APFloat::IEEEsingle;
- CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 1U << 31))));
- CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
- }
+ const fltSemantics &Sem =
+ VT == MVT::f64 ? APFloat::IEEEdouble : APFloat::IEEEsingle;
+ const unsigned SizeInBits = VT.getSizeInBits();
+
+ SmallVector<Constant *, 4> CV(
+ VT == MVT::f64 ? 2 : 4,
+ ConstantFP::get(*Context, APFloat(Sem, APInt(SizeInBits, 0))));
+
+ // First, clear all bits but the sign bit from the second operand (sign).
+ CV[0] = ConstantFP::get(*Context,
+ APFloat(Sem, APInt::getHighBitsSet(SizeInBits, 1)));
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(), 16);
SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
@@ -11489,40 +14625,30 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
false, false, false, 16);
SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
- // Shift sign bit right or left if the two operands have different types.
- if (SrcVT.bitsGT(VT)) {
- // Op0 is MVT::f32, Op1 is MVT::f64.
- SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, SignBit);
- SignBit = DAG.getNode(X86ISD::FSRL, dl, MVT::v2f64, SignBit,
- DAG.getConstant(32, MVT::i32));
- SignBit = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, SignBit);
- SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, SignBit,
- DAG.getIntPtrConstant(0));
- }
-
- // Clear first operand sign bit.
- CV.clear();
- if (VT == MVT::f64) {
- const fltSemantics &Sem = APFloat::IEEEdouble;
- CV.push_back(ConstantFP::get(*Context, APFloat(Sem,
- APInt(64, ~(1ULL << 63)))));
- CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0))));
+ // Next, clear the sign bit from the first operand (magnitude).
+ // If it's a constant, we can clear it here.
+ if (ConstantFPSDNode *Op0CN = dyn_cast<ConstantFPSDNode>(Op0)) {
+ APFloat APF = Op0CN->getValueAPF();
+ // If the magnitude is a positive zero, the sign bit alone is enough.
+ if (APF.isPosZero())
+ return SignBit;
+ APF.clearSign();
+ CV[0] = ConstantFP::get(*Context, APF);
} else {
- const fltSemantics &Sem = APFloat::IEEEsingle;
- CV.push_back(ConstantFP::get(*Context, APFloat(Sem,
- APInt(32, ~(1U << 31)))));
- CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV[0] = ConstantFP::get(
+ *Context,
+ APFloat(Sem, APInt::getLowBitsSet(SizeInBits, SizeInBits - 1)));
}
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(), 16);
- SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
- SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
-
- // Or the value with the sign bit.
+ SDValue Val = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, 16);
+ // If the magnitude operand wasn't a constant, we need to AND out the sign.
+ if (!isa<ConstantFPSDNode>(Op0))
+ Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Val);
+
+ // OR the magnitude value with the sign bit.
return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit);
}
@@ -11537,8 +14663,7 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::AND, dl, VT, xFGETSIGN, DAG.getConstant(1, VT));
}
-// LowerVectorAllZeroTest - Check whether an OR'd tree is PTEST-able.
-//
+// Check whether an OR'd tree is PTEST-able.
static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
@@ -11897,12 +15022,12 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
if (Op0.getValueType() == MVT::i1)
llvm_unreachable("Unexpected comparison operation for MVT::i1 operands");
}
-
+
if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
- // Do the comparison at i32 if it's smaller, besides the Atom case.
- // This avoids subregister aliasing issues. Keep the smaller reference
- // if we're optimizing for size, however, as that'll allow better folding
+ // Do the comparison at i32 if it's smaller, besides the Atom case.
+ // This avoids subregister aliasing issues. Keep the smaller reference
+ // if we're optimizing for size, however, as that'll allow better folding
// of memory operations.
if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 &&
!DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute(
@@ -11946,6 +15071,66 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
}
+/// The minimum architected relative accuracy is 2^-12. We need one
+/// Newton-Raphson step to have a good float result (24 bits of precision).
+SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
+ bool &UseOneConstNR) const {
+ // FIXME: We should use instruction latency models to calculate the cost of
+ // each potential sequence, but this is very hard to do reliably because
+ // at least Intel's Core* chips have variable timing based on the number of
+ // significant digits in the divisor and/or sqrt operand.
+ if (!Subtarget->useSqrtEst())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+
+ // SSE1 has rsqrtss and rsqrtps.
+ // TODO: Add support for AVX512 (v16f32).
+ // It is likely not profitable to do this for f64 because a double-precision
+ // rsqrt estimate with refinement on x86 prior to FMA requires at least 16
+ // instructions: convert to single, rsqrtss, convert back to double, refine
+ // (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA
+ // along with FMA, this could be a throughput win.
+ if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
+ (Subtarget->hasAVX() && VT == MVT::v8f32)) {
+ RefinementSteps = 1;
+ UseOneConstNR = false;
+ return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
+ }
+ return SDValue();
+}
+
+/// The minimum architected relative accuracy is 2^-12. We need one
+/// Newton-Raphson step to have a good float result (24 bits of precision).
+SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const {
+ // FIXME: We should use instruction latency models to calculate the cost of
+ // each potential sequence, but this is very hard to do reliably because
+ // at least Intel's Core* chips have variable timing based on the number of
+ // significant digits in the divisor.
+ if (!Subtarget->useReciprocalEst())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+
+ // SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
+ // TODO: Add support for AVX512 (v16f32).
+ // It is likely not profitable to do this for f64 because a double-precision
+ // reciprocal estimate with refinement on x86 prior to FMA requires
+ // 15 instructions: convert to single, rcpss, convert back to double, refine
+ // (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA
+ // along with FMA, this could be a throughput win.
+ if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
+ (Subtarget->hasAVX() && VT == MVT::v8f32)) {
+ RefinementSteps = ReciprocalEstimateRefinementSteps;
+ return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
+ }
+ return SDValue();
+}
+
static bool isAllOnes(SDValue V) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
return C && C->isAllOnesValue();
@@ -12105,7 +15290,7 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
- assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 32 &&
+ assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 8 &&
Op.getValueType().getScalarType() == MVT::i1 &&
"Cannot set masked compare for this operation");
@@ -12219,11 +15404,12 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
EVT OpVT = Op1.getValueType();
if (Subtarget->hasAVX512()) {
if (Op1.getValueType().is512BitVector() ||
+ (Subtarget->hasBWI() && Subtarget->hasVLX()) ||
(MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
return LowerIntVSETCC_AVX512(Op, DAG, Subtarget);
// In AVX-512 architecture setcc returns mask with i1 elements,
- // But there is no compare instruction for i8 and i16 elements.
+ // But there is no compare instruction for i8 and i16 elements in KNL.
// We are not talking about 512-bit operands in this case, these
// types are illegal.
if (MaskResult &&
@@ -12426,8 +15612,11 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
cast<ConstantSDNode>(Op1)->isNullValue() &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
- if (NewSetCC.getNode())
+ if (NewSetCC.getNode()) {
+ if (VT == MVT::i1)
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewSetCC);
return NewSetCC;
+ }
}
// Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
@@ -12729,18 +15918,40 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops);
}
-static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
+ MVT VTElt = VT.getVectorElementType();
+ MVT InVTElt = InVT.getVectorElementType();
SDLoc dl(Op);
+ // SKX processor
+ if ((InVTElt == MVT::i1) &&
+ (((Subtarget->hasBWI() && Subtarget->hasVLX() &&
+ VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() <= 16)) ||
+
+ ((Subtarget->hasBWI() && VT.is512BitVector() &&
+ VTElt.getSizeInBits() <= 16)) ||
+
+ ((Subtarget->hasDQI() && Subtarget->hasVLX() &&
+ VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() >= 32)) ||
+
+ ((Subtarget->hasDQI() && VT.is512BitVector() &&
+ VTElt.getSizeInBits() >= 32))))
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
+
unsigned int NumElts = VT.getVectorNumElements();
+
if (NumElts != 8 && NumElts != 16)
return SDValue();
- if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
+ if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) {
+ if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT)
+ return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0));
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
+ }
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
@@ -12768,7 +15979,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
SDLoc dl(Op);
if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1)
- return LowerSIGN_EXTEND_AVX512(Op, DAG);
+ return LowerSIGN_EXTEND_AVX512(Op, Subtarget, DAG);
if ((VT != MVT::v4i64 || InVT != MVT::v4i32) &&
(VT != MVT::v8i32 || InVT != MVT::v8i16) &&
@@ -12811,6 +16022,208 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
+// Lower vector extended loads using a shuffle. If SSSE3 is not available we
+// may emit an illegal shuffle but the expansion is still better than scalar
+// code. We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise
+// we'll emit a shuffle and a arithmetic shift.
+// TODO: It is possible to support ZExt by zeroing the undef values during
+// the shuffle phase or after the shuffle.
+static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT RegVT = Op.getSimpleValueType();
+ assert(RegVT.isVector() && "We only custom lower vector sext loads.");
+ assert(RegVT.isInteger() &&
+ "We only custom lower integer vector sext loads.");
+
+ // Nothing useful we can do without SSE2 shuffles.
+ assert(Subtarget->hasSSE2() && "We only custom lower sext loads with SSE2.");
+
+ LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
+ SDLoc dl(Ld);
+ EVT MemVT = Ld->getMemoryVT();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned RegSz = RegVT.getSizeInBits();
+
+ ISD::LoadExtType Ext = Ld->getExtensionType();
+
+ assert((Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD)
+ && "Only anyext and sext are currently implemented.");
+ assert(MemVT != RegVT && "Cannot extend to the same type");
+ assert(MemVT.isVector() && "Must load a vector from memory");
+
+ unsigned NumElems = RegVT.getVectorNumElements();
+ unsigned MemSz = MemVT.getSizeInBits();
+ assert(RegSz > MemSz && "Register size must be greater than the mem size");
+
+ if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget->hasInt256()) {
+ // The only way in which we have a legal 256-bit vector result but not the
+ // integer 256-bit operations needed to directly lower a sextload is if we
+ // have AVX1 but not AVX2. In that case, we can always emit a sextload to
+ // a 128-bit vector and a normal sign_extend to 256-bits that should get
+ // correctly legalized. We do this late to allow the canonical form of
+ // sextload to persist throughout the rest of the DAG combiner -- it wants
+ // to fold together any extensions it can, and so will fuse a sign_extend
+ // of an sextload into a sextload targeting a wider value.
+ SDValue Load;
+ if (MemSz == 128) {
+ // Just switch this to a normal load.
+ assert(TLI.isTypeLegal(MemVT) && "If the memory type is a 128-bit type, "
+ "it must be a legal 128-bit vector "
+ "type!");
+ Load = DAG.getLoad(MemVT, dl, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->isInvariant(), Ld->getAlignment());
+ } else {
+ assert(MemSz < 128 &&
+ "Can't extend a type wider than 128 bits to a 256 bit vector!");
+ // Do an sext load to a 128-bit vector type. We want to use the same
+ // number of elements, but elements half as wide. This will end up being
+ // recursively lowered by this routine, but will succeed as we definitely
+ // have all the necessary features if we're using AVX1.
+ EVT HalfEltVT =
+ EVT::getIntegerVT(*DAG.getContext(), RegVT.getScalarSizeInBits() / 2);
+ EVT HalfVecVT = EVT::getVectorVT(*DAG.getContext(), HalfEltVT, NumElems);
+ Load =
+ DAG.getExtLoad(Ext, dl, HalfVecVT, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(), MemVT, Ld->isVolatile(),
+ Ld->isNonTemporal(), Ld->isInvariant(),
+ Ld->getAlignment());
+ }
+
+ // Replace chain users with the new chain.
+ assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
+
+ // Finally, do a normal sign-extend to the desired register.
+ return DAG.getSExtOrTrunc(Load, dl, RegVT);
+ }
+
+ // All sizes must be a power of two.
+ assert(isPowerOf2_32(RegSz * MemSz * NumElems) &&
+ "Non-power-of-two elements are not custom lowered!");
+
+ // Attempt to load the original value using scalar loads.
+ // Find the largest scalar type that divides the total loaded size.
+ MVT SclrLoadTy = MVT::i8;
+ for (MVT Tp : MVT::integer_valuetypes()) {
+ if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) {
+ SclrLoadTy = Tp;
+ }
+ }
+
+ // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
+ if (TLI.isTypeLegal(MVT::f64) && SclrLoadTy.getSizeInBits() < 64 &&
+ (64 <= MemSz))
+ SclrLoadTy = MVT::f64;
+
+ // Calculate the number of scalar loads that we need to perform
+ // in order to load our vector from memory.
+ unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();
+
+ assert((Ext != ISD::SEXTLOAD || NumLoads == 1) &&
+ "Can only lower sext loads with a single scalar load!");
+
+ unsigned loadRegZize = RegSz;
+ if (Ext == ISD::SEXTLOAD && RegSz == 256)
+ loadRegZize /= 2;
+
+ // Represent our vector as a sequence of elements which are the
+ // largest scalar that we can load.
+ EVT LoadUnitVecVT = EVT::getVectorVT(
+ *DAG.getContext(), SclrLoadTy, loadRegZize / SclrLoadTy.getSizeInBits());
+
+ // Represent the data using the same element type that is stored in
+ // memory. In practice, we ''widen'' MemVT.
+ EVT WideVecVT =
+ EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
+ loadRegZize / MemVT.getScalarType().getSizeInBits());
+
+ assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
+ "Invalid vector type");
+
+ // We can't shuffle using an illegal type.
+ assert(TLI.isTypeLegal(WideVecVT) &&
+ "We only lower types that form legal widened vector types");
+
+ SmallVector<SDValue, 8> Chains;
+ SDValue Ptr = Ld->getBasePtr();
+ SDValue Increment =
+ DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, TLI.getPointerTy());
+ SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
+
+ for (unsigned i = 0; i < NumLoads; ++i) {
+ // Perform a single load.
+ SDValue ScalarLoad =
+ DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
+ Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(),
+ Ld->getAlignment());
+ Chains.push_back(ScalarLoad.getValue(1));
+ // Create the first element type using SCALAR_TO_VECTOR in order to avoid
+ // another round of DAGCombining.
+ if (i == 0)
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad);
+ else
+ Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res,
+ ScalarLoad, DAG.getIntPtrConstant(i));
+
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ }
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+
+ // Bitcast the loaded value to a vector of the original element type, in
+ // the size of the target vector type.
+ SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res);
+ unsigned SizeRatio = RegSz / MemSz;
+
+ if (Ext == ISD::SEXTLOAD) {
+ // If we have SSE4.1, we can directly emit a VSEXT node.
+ if (Subtarget->hasSSE41()) {
+ SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
+ return Sext;
+ }
+
+ // Otherwise we'll shuffle the small elements in the high bits of the
+ // larger type and perform an arithmetic shift. If the shift is not legal
+ // it's better to scalarize.
+ assert(TLI.isOperationLegalOrCustom(ISD::SRA, RegVT) &&
+ "We can't implement a sext load without an arithmetic right shift!");
+
+ // Redistribute the loaded elements into the different locations.
+ SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i != NumElems; ++i)
+ ShuffleVec[i * SizeRatio + SizeRatio - 1] = i;
+
+ SDValue Shuff = DAG.getVectorShuffle(
+ WideVecVT, dl, SlicedVec, DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
+
+ Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
+
+ // Build the arithmetic shift.
+ unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
+ MemVT.getVectorElementType().getSizeInBits();
+ Shuff =
+ DAG.getNode(ISD::SRA, dl, RegVT, Shuff, DAG.getConstant(Amt, RegVT));
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
+ return Shuff;
+ }
+
+ // Redistribute the loaded elements into the different locations.
+ SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i != NumElems; ++i)
+ ShuffleVec[i * SizeRatio] = i;
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
+ DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
+
+ // Bitcast to the requested type.
+ Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
+ return Shuff;
+}
+
// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
// ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart
// from the AND / OR.
@@ -13116,7 +16529,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
}
// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
-// Calls to _alloca is needed to probe the stack when allocating more than 4k
+// Calls to _alloca are needed to probe the stack when allocating more than 4k
// bytes in one go. Touching the stack at 4K increments is necessary to ensure
// that the guard pages used by the OS virtual memory manager are allocated in
// correct sequence.
@@ -13125,7 +16538,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool SplitStack = MF.shouldSplitStack();
- bool Lower = (Subtarget->isOSWindows() && !Subtarget->isTargetMacho()) ||
+ bool Lower = (Subtarget->isOSWindows() && !Subtarget->isTargetMachO()) ||
SplitStack;
SDLoc dl(Op);
@@ -13151,7 +16564,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
- const TargetFrameLowering &TFI = *DAG.getTarget().getFrameLowering();
+ const TargetFrameLowering &TFI = *DAG.getSubtarget().getFrameLowering();
unsigned StackAlign = TFI.getStackAlignment();
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
if (Align > StackAlign)
@@ -13174,7 +16587,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
EVT VT = Op.getNode()->getValueType(0);
bool Is64Bit = Subtarget->is64Bit();
- EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
+ EVT SPTy = getPointerTy();
if (SplitStack) {
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -13192,7 +16605,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
}
const TargetRegisterClass *AddrRegClass =
- getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32);
+ getRegClassFor(getPointerTy());
unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
@@ -13201,7 +16614,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getMergeValues(Ops1, dl);
} else {
SDValue Flag;
- unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
+ const unsigned Reg = (Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX);
Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
Flag = Chain.getValue(1);
@@ -13209,8 +16622,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
unsigned SPReg = RegInfo->getStackRegister();
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
Chain = SP.getValue(1);
@@ -13451,7 +16864,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT,
static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
SDValue SrcOp, SDValue ShAmt,
SelectionDAG &DAG) {
- assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
+ MVT SVT = ShAmt.getSimpleValueType();
+ assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!");
// Catch shift-by-constant.
if (ConstantSDNode *CShAmt = dyn_cast<ConstantSDNode>(ShAmt))
@@ -13466,13 +16880,28 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
}
- // Need to build a vector containing shift amount
- // Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0
- SDValue ShOps[4];
- ShOps[0] = ShAmt;
- ShOps[1] = DAG.getConstant(0, MVT::i32);
- ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32);
- ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, ShOps);
+ const X86Subtarget &Subtarget =
+ DAG.getTarget().getSubtarget<X86Subtarget>();
+ if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
+ ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
+ // Let the shuffle legalizer expand this shift amount node.
+ SDValue Op0 = ShAmt.getOperand(0);
+ Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(Op0), MVT::v8i16, Op0);
+ ShAmt = getShuffleVectorZeroOrUndef(Op0, 0, true, &Subtarget, DAG);
+ } else {
+ // Need to build a vector containing shift amount.
+ // SSE/AVX packed shifts only use the lower 64-bit of the shift count.
+ SmallVector<SDValue, 4> ShOps;
+ ShOps.push_back(ShAmt);
+ if (SVT == MVT::i32) {
+ ShOps.push_back(DAG.getConstant(0, SVT));
+ ShOps.push_back(DAG.getUNDEF(SVT));
+ }
+ ShOps.push_back(DAG.getUNDEF(SVT));
+
+ MVT BVT = SVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64;
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, BVT, ShOps);
+ }
// The return type has to be a 128-bit type with the same element
// type as the input type.
@@ -13483,382 +16912,271 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
}
-static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
- SDLoc dl(Op);
- unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- switch (IntNo) {
- default: return SDValue(); // Don't custom lower most intrinsics.
- // Comparison intrinsics.
- case Intrinsic::x86_sse_comieq_ss:
- case Intrinsic::x86_sse_comilt_ss:
- case Intrinsic::x86_sse_comile_ss:
- case Intrinsic::x86_sse_comigt_ss:
- case Intrinsic::x86_sse_comige_ss:
- case Intrinsic::x86_sse_comineq_ss:
- case Intrinsic::x86_sse_ucomieq_ss:
- case Intrinsic::x86_sse_ucomilt_ss:
- case Intrinsic::x86_sse_ucomile_ss:
- case Intrinsic::x86_sse_ucomigt_ss:
- case Intrinsic::x86_sse_ucomige_ss:
- case Intrinsic::x86_sse_ucomineq_ss:
- case Intrinsic::x86_sse2_comieq_sd:
- case Intrinsic::x86_sse2_comilt_sd:
- case Intrinsic::x86_sse2_comile_sd:
- case Intrinsic::x86_sse2_comigt_sd:
- case Intrinsic::x86_sse2_comige_sd:
- case Intrinsic::x86_sse2_comineq_sd:
- case Intrinsic::x86_sse2_ucomieq_sd:
- case Intrinsic::x86_sse2_ucomilt_sd:
- case Intrinsic::x86_sse2_ucomile_sd:
- case Intrinsic::x86_sse2_ucomigt_sd:
- case Intrinsic::x86_sse2_ucomige_sd:
- case Intrinsic::x86_sse2_ucomineq_sd: {
- unsigned Opc;
- ISD::CondCode CC;
+/// \brief Return (and \p Op, \p Mask) for compare instructions or
+/// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the
+/// necessary casting for \p Mask when lowering masking intrinsics.
+static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
+ SDValue PreservedSrc,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ EVT MaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MVT::i1, VT.getVectorNumElements());
+ EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ Mask.getValueType().getSizeInBits());
+ SDLoc dl(Op);
+
+ assert(MaskVT.isSimple() && "invalid mask type");
+
+ if (isAllOnes(Mask))
+ return Op;
+
+ // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
+ // are extracted by EXTRACT_SUBVECTOR.
+ SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
+ DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
+ DAG.getIntPtrConstant(0));
+
+ switch (Op.getOpcode()) {
+ default: break;
+ case X86ISD::PCMPEQM:
+ case X86ISD::PCMPGTM:
+ case X86ISD::CMPM:
+ case X86ISD::CMPMU:
+ return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
+ }
+ if (PreservedSrc.getOpcode() == ISD::UNDEF)
+ PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
+ return DAG.getNode(ISD::VSELECT, dl, VT, VMask, Op, PreservedSrc);
+}
+
+/// \brief Creates an SDNode for a predicated scalar operation.
+/// \returns (X86vselect \p Mask, \p Op, \p PreservedSrc).
+/// The mask is comming as MVT::i8 and it should be truncated
+/// to MVT::i1 while lowering masking intrinsics.
+/// The main difference between ScalarMaskingNode and VectorMaskingNode is using
+/// "X86select" instead of "vselect". We just can't create the "vselect" node for
+/// a scalar instruction.
+static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
+ SDValue PreservedSrc,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ if (isAllOnes(Mask))
+ return Op;
+
+ EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+ // The mask should be of type MVT::i1
+ SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask);
+
+ if (PreservedSrc.getOpcode() == ISD::UNDEF)
+ PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
+ return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
+}
+
+static unsigned getOpcodeForFMAIntrinsic(unsigned IntNo) {
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_sse_comieq_ss:
- case Intrinsic::x86_sse2_comieq_sd:
- Opc = X86ISD::COMI;
- CC = ISD::SETEQ;
- break;
- case Intrinsic::x86_sse_comilt_ss:
- case Intrinsic::x86_sse2_comilt_sd:
- Opc = X86ISD::COMI;
- CC = ISD::SETLT;
- break;
- case Intrinsic::x86_sse_comile_ss:
- case Intrinsic::x86_sse2_comile_sd:
- Opc = X86ISD::COMI;
- CC = ISD::SETLE;
- break;
- case Intrinsic::x86_sse_comigt_ss:
- case Intrinsic::x86_sse2_comigt_sd:
- Opc = X86ISD::COMI;
- CC = ISD::SETGT;
- break;
- case Intrinsic::x86_sse_comige_ss:
- case Intrinsic::x86_sse2_comige_sd:
- Opc = X86ISD::COMI;
- CC = ISD::SETGE;
- break;
- case Intrinsic::x86_sse_comineq_ss:
- case Intrinsic::x86_sse2_comineq_sd:
- Opc = X86ISD::COMI;
- CC = ISD::SETNE;
- break;
- case Intrinsic::x86_sse_ucomieq_ss:
- case Intrinsic::x86_sse2_ucomieq_sd:
- Opc = X86ISD::UCOMI;
- CC = ISD::SETEQ;
- break;
- case Intrinsic::x86_sse_ucomilt_ss:
- case Intrinsic::x86_sse2_ucomilt_sd:
- Opc = X86ISD::UCOMI;
- CC = ISD::SETLT;
- break;
- case Intrinsic::x86_sse_ucomile_ss:
- case Intrinsic::x86_sse2_ucomile_sd:
- Opc = X86ISD::UCOMI;
- CC = ISD::SETLE;
- break;
- case Intrinsic::x86_sse_ucomigt_ss:
- case Intrinsic::x86_sse2_ucomigt_sd:
- Opc = X86ISD::UCOMI;
- CC = ISD::SETGT;
- break;
- case Intrinsic::x86_sse_ucomige_ss:
- case Intrinsic::x86_sse2_ucomige_sd:
- Opc = X86ISD::UCOMI;
- CC = ISD::SETGE;
- break;
- case Intrinsic::x86_sse_ucomineq_ss:
- case Intrinsic::x86_sse2_ucomineq_sd:
- Opc = X86ISD::UCOMI;
- CC = ISD::SETNE;
- break;
+ case Intrinsic::x86_fma_vfmadd_ps:
+ case Intrinsic::x86_fma_vfmadd_pd:
+ case Intrinsic::x86_fma_vfmadd_ps_256:
+ case Intrinsic::x86_fma_vfmadd_pd_256:
+ case Intrinsic::x86_fma_mask_vfmadd_ps_512:
+ case Intrinsic::x86_fma_mask_vfmadd_pd_512:
+ return X86ISD::FMADD;
+ case Intrinsic::x86_fma_vfmsub_ps:
+ case Intrinsic::x86_fma_vfmsub_pd:
+ case Intrinsic::x86_fma_vfmsub_ps_256:
+ case Intrinsic::x86_fma_vfmsub_pd_256:
+ case Intrinsic::x86_fma_mask_vfmsub_ps_512:
+ case Intrinsic::x86_fma_mask_vfmsub_pd_512:
+ return X86ISD::FMSUB;
+ case Intrinsic::x86_fma_vfnmadd_ps:
+ case Intrinsic::x86_fma_vfnmadd_pd:
+ case Intrinsic::x86_fma_vfnmadd_ps_256:
+ case Intrinsic::x86_fma_vfnmadd_pd_256:
+ case Intrinsic::x86_fma_mask_vfnmadd_ps_512:
+ case Intrinsic::x86_fma_mask_vfnmadd_pd_512:
+ return X86ISD::FNMADD;
+ case Intrinsic::x86_fma_vfnmsub_ps:
+ case Intrinsic::x86_fma_vfnmsub_pd:
+ case Intrinsic::x86_fma_vfnmsub_ps_256:
+ case Intrinsic::x86_fma_vfnmsub_pd_256:
+ case Intrinsic::x86_fma_mask_vfnmsub_ps_512:
+ case Intrinsic::x86_fma_mask_vfnmsub_pd_512:
+ return X86ISD::FNMSUB;
+ case Intrinsic::x86_fma_vfmaddsub_ps:
+ case Intrinsic::x86_fma_vfmaddsub_pd:
+ case Intrinsic::x86_fma_vfmaddsub_ps_256:
+ case Intrinsic::x86_fma_vfmaddsub_pd_256:
+ case Intrinsic::x86_fma_mask_vfmaddsub_ps_512:
+ case Intrinsic::x86_fma_mask_vfmaddsub_pd_512:
+ return X86ISD::FMADDSUB;
+ case Intrinsic::x86_fma_vfmsubadd_ps:
+ case Intrinsic::x86_fma_vfmsubadd_pd:
+ case Intrinsic::x86_fma_vfmsubadd_ps_256:
+ case Intrinsic::x86_fma_vfmsubadd_pd_256:
+ case Intrinsic::x86_fma_mask_vfmsubadd_ps_512:
+ case Intrinsic::x86_fma_mask_vfmsubadd_pd_512:
+ return X86ISD::FMSUBADD;
}
+}
- SDValue LHS = Op.getOperand(1);
- SDValue RHS = Op.getOperand(2);
- unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG);
- assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!");
- SDValue Cond = DAG.getNode(Opc, dl, MVT::i32, LHS, RHS);
- SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(X86CC, MVT::i8), Cond);
- return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
- }
+static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc dl(Op);
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ EVT VT = Op.getValueType();
+ const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo);
+ if (IntrData) {
+ switch(IntrData->Type) {
+ case INTR_TYPE_1OP:
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1));
+ case INTR_TYPE_2OP:
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2));
+ case INTR_TYPE_3OP:
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3));
+ case INTR_TYPE_1OP_MASK_RM: {
+ SDValue Src = Op.getOperand(1);
+ SDValue Src0 = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+ SDValue RoundingMode = Op.getOperand(4);
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
+ RoundingMode),
+ Mask, Src0, Subtarget, DAG);
+ }
+ case INTR_TYPE_SCALAR_MASK_RM: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue Src0 = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(4);
+ SDValue RoundingMode = Op.getOperand(5);
+ return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2,
+ RoundingMode),
+ Mask, Src0, Subtarget, DAG);
+ }
+ case INTR_TYPE_2OP_MASK: {
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
+ Op.getOperand(2)),
+ Op.getOperand(4), Op.getOperand(3), Subtarget, DAG);
+ }
+ case CMP_MASK:
+ case CMP_MASK_CC: {
+ // Comparison intrinsics with masks.
+ // Example of transformation:
+ // (i8 (int_x86_avx512_mask_pcmpeq_q_128
+ // (v2i64 %a), (v2i64 %b), (i8 %mask))) ->
+ // (i8 (bitcast
+ // (v8i1 (insert_subvector undef,
+ // (v2i1 (and (PCMPEQM %a, %b),
+ // (extract_subvector
+ // (v8i1 (bitcast %mask)), 0))), 0))))
+ EVT VT = Op.getOperand(1).getValueType();
+ EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ VT.getVectorNumElements());
+ SDValue Mask = Op.getOperand((IntrData->Type == CMP_MASK_CC) ? 4 : 3);
+ EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ Mask.getValueType().getSizeInBits());
+ SDValue Cmp;
+ if (IntrData->Type == CMP_MASK_CC) {
+ Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3));
+ } else {
+ assert(IntrData->Type == CMP_MASK && "Unexpected intrinsic type!");
+ Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
+ Op.getOperand(2));
+ }
+ SDValue CmpMask = getVectorMaskingNode(Cmp, Mask,
+ DAG.getTargetConstant(0, MaskVT),
+ Subtarget, DAG);
+ SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
+ DAG.getUNDEF(BitcastVT), CmpMask,
+ DAG.getIntPtrConstant(0));
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+ case COMI: { // Comparison intrinsics
+ ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG);
+ assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!");
+ SDValue Cond = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), Cond);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
+ case VSHIFT:
+ return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
+ Op.getOperand(1), Op.getOperand(2), DAG);
+ case VSHIFT_MASK:
+ return getVectorMaskingNode(getTargetVShiftNode(IntrData->Opc0, dl,
+ Op.getSimpleValueType(),
+ Op.getOperand(1),
+ Op.getOperand(2), DAG),
+ Op.getOperand(4), Op.getOperand(3), Subtarget,
+ DAG);
+ case COMPRESS_EXPAND_IN_REG: {
+ SDValue Mask = Op.getOperand(3);
+ SDValue DataToCompress = Op.getOperand(1);
+ SDValue PassThru = Op.getOperand(2);
+ if (isAllOnes(Mask)) // return data as is
+ return Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ VT.getVectorNumElements());
+ EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ Mask.getValueType().getSizeInBits());
+ SDLoc dl(Op);
+ SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
+ DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
+ DAG.getIntPtrConstant(0));
- // Arithmetic intrinsics.
- case Intrinsic::x86_sse2_pmulu_dq:
- case Intrinsic::x86_avx2_pmulu_dq:
- return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
-
- case Intrinsic::x86_sse41_pmuldq:
- case Intrinsic::x86_avx2_pmul_dq:
- return DAG.getNode(X86ISD::PMULDQ, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
-
- case Intrinsic::x86_sse2_pmulhu_w:
- case Intrinsic::x86_avx2_pmulhu_w:
- return DAG.getNode(ISD::MULHU, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
-
- case Intrinsic::x86_sse2_pmulh_w:
- case Intrinsic::x86_avx2_pmulh_w:
- return DAG.getNode(ISD::MULHS, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
-
- // SSE2/AVX2 sub with unsigned saturation intrinsics
- case Intrinsic::x86_sse2_psubus_b:
- case Intrinsic::x86_sse2_psubus_w:
- case Intrinsic::x86_avx2_psubus_b:
- case Intrinsic::x86_avx2_psubus_w:
- return DAG.getNode(X86ISD::SUBUS, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
-
- // SSE3/AVX horizontal add/sub intrinsics
- case Intrinsic::x86_sse3_hadd_ps:
- case Intrinsic::x86_sse3_hadd_pd:
- case Intrinsic::x86_avx_hadd_ps_256:
- case Intrinsic::x86_avx_hadd_pd_256:
- case Intrinsic::x86_sse3_hsub_ps:
- case Intrinsic::x86_sse3_hsub_pd:
- case Intrinsic::x86_avx_hsub_ps_256:
- case Intrinsic::x86_avx_hsub_pd_256:
- case Intrinsic::x86_ssse3_phadd_w_128:
- case Intrinsic::x86_ssse3_phadd_d_128:
- case Intrinsic::x86_avx2_phadd_w:
- case Intrinsic::x86_avx2_phadd_d:
- case Intrinsic::x86_ssse3_phsub_w_128:
- case Intrinsic::x86_ssse3_phsub_d_128:
- case Intrinsic::x86_avx2_phsub_w:
- case Intrinsic::x86_avx2_phsub_d: {
- unsigned Opcode;
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_sse3_hadd_ps:
- case Intrinsic::x86_sse3_hadd_pd:
- case Intrinsic::x86_avx_hadd_ps_256:
- case Intrinsic::x86_avx_hadd_pd_256:
- Opcode = X86ISD::FHADD;
- break;
- case Intrinsic::x86_sse3_hsub_ps:
- case Intrinsic::x86_sse3_hsub_pd:
- case Intrinsic::x86_avx_hsub_ps_256:
- case Intrinsic::x86_avx_hsub_pd_256:
- Opcode = X86ISD::FHSUB;
- break;
- case Intrinsic::x86_ssse3_phadd_w_128:
- case Intrinsic::x86_ssse3_phadd_d_128:
- case Intrinsic::x86_avx2_phadd_w:
- case Intrinsic::x86_avx2_phadd_d:
- Opcode = X86ISD::HADD;
- break;
- case Intrinsic::x86_ssse3_phsub_w_128:
- case Intrinsic::x86_ssse3_phsub_d_128:
- case Intrinsic::x86_avx2_phsub_w:
- case Intrinsic::x86_avx2_phsub_d:
- Opcode = X86ISD::HSUB;
- break;
+ return DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress,
+ PassThru);
}
- return DAG.getNode(Opcode, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
- }
-
- // SSE2/SSE41/AVX2 integer max/min intrinsics.
- case Intrinsic::x86_sse2_pmaxu_b:
- case Intrinsic::x86_sse41_pmaxuw:
- case Intrinsic::x86_sse41_pmaxud:
- case Intrinsic::x86_avx2_pmaxu_b:
- case Intrinsic::x86_avx2_pmaxu_w:
- case Intrinsic::x86_avx2_pmaxu_d:
- case Intrinsic::x86_sse2_pminu_b:
- case Intrinsic::x86_sse41_pminuw:
- case Intrinsic::x86_sse41_pminud:
- case Intrinsic::x86_avx2_pminu_b:
- case Intrinsic::x86_avx2_pminu_w:
- case Intrinsic::x86_avx2_pminu_d:
- case Intrinsic::x86_sse41_pmaxsb:
- case Intrinsic::x86_sse2_pmaxs_w:
- case Intrinsic::x86_sse41_pmaxsd:
- case Intrinsic::x86_avx2_pmaxs_b:
- case Intrinsic::x86_avx2_pmaxs_w:
- case Intrinsic::x86_avx2_pmaxs_d:
- case Intrinsic::x86_sse41_pminsb:
- case Intrinsic::x86_sse2_pmins_w:
- case Intrinsic::x86_sse41_pminsd:
- case Intrinsic::x86_avx2_pmins_b:
- case Intrinsic::x86_avx2_pmins_w:
- case Intrinsic::x86_avx2_pmins_d: {
- unsigned Opcode;
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_sse2_pmaxu_b:
- case Intrinsic::x86_sse41_pmaxuw:
- case Intrinsic::x86_sse41_pmaxud:
- case Intrinsic::x86_avx2_pmaxu_b:
- case Intrinsic::x86_avx2_pmaxu_w:
- case Intrinsic::x86_avx2_pmaxu_d:
- Opcode = X86ISD::UMAX;
- break;
- case Intrinsic::x86_sse2_pminu_b:
- case Intrinsic::x86_sse41_pminuw:
- case Intrinsic::x86_sse41_pminud:
- case Intrinsic::x86_avx2_pminu_b:
- case Intrinsic::x86_avx2_pminu_w:
- case Intrinsic::x86_avx2_pminu_d:
- Opcode = X86ISD::UMIN;
- break;
- case Intrinsic::x86_sse41_pmaxsb:
- case Intrinsic::x86_sse2_pmaxs_w:
- case Intrinsic::x86_sse41_pmaxsd:
- case Intrinsic::x86_avx2_pmaxs_b:
- case Intrinsic::x86_avx2_pmaxs_w:
- case Intrinsic::x86_avx2_pmaxs_d:
- Opcode = X86ISD::SMAX;
- break;
- case Intrinsic::x86_sse41_pminsb:
- case Intrinsic::x86_sse2_pmins_w:
- case Intrinsic::x86_sse41_pminsd:
- case Intrinsic::x86_avx2_pmins_b:
- case Intrinsic::x86_avx2_pmins_w:
- case Intrinsic::x86_avx2_pmins_d:
- Opcode = X86ISD::SMIN;
- break;
+ case BLEND: {
+ SDValue Mask = Op.getOperand(3);
+ EVT VT = Op.getValueType();
+ EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ VT.getVectorNumElements());
+ EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ Mask.getValueType().getSizeInBits());
+ SDLoc dl(Op);
+ SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
+ DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
+ DAG.getIntPtrConstant(0));
+ return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1),
+ Op.getOperand(2));
}
- return DAG.getNode(Opcode, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
- }
-
- // SSE/SSE2/AVX floating point max/min intrinsics.
- case Intrinsic::x86_sse_max_ps:
- case Intrinsic::x86_sse2_max_pd:
- case Intrinsic::x86_avx_max_ps_256:
- case Intrinsic::x86_avx_max_pd_256:
- case Intrinsic::x86_sse_min_ps:
- case Intrinsic::x86_sse2_min_pd:
- case Intrinsic::x86_avx_min_ps_256:
- case Intrinsic::x86_avx_min_pd_256: {
- unsigned Opcode;
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_sse_max_ps:
- case Intrinsic::x86_sse2_max_pd:
- case Intrinsic::x86_avx_max_ps_256:
- case Intrinsic::x86_avx_max_pd_256:
- Opcode = X86ISD::FMAX;
- break;
- case Intrinsic::x86_sse_min_ps:
- case Intrinsic::x86_sse2_min_pd:
- case Intrinsic::x86_avx_min_ps_256:
- case Intrinsic::x86_avx_min_pd_256:
- Opcode = X86ISD::FMIN;
- break;
+ case FMA_OP_MASK:
+ {
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
+ dl, Op.getValueType(),
+ Op.getOperand(1),
+ Op.getOperand(2),
+ Op.getOperand(3)),
+ Op.getOperand(4), Op.getOperand(1),
+ Subtarget, DAG);
}
- return DAG.getNode(Opcode, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
- }
-
- // AVX2 variable shift intrinsics
- case Intrinsic::x86_avx2_psllv_d:
- case Intrinsic::x86_avx2_psllv_q:
- case Intrinsic::x86_avx2_psllv_d_256:
- case Intrinsic::x86_avx2_psllv_q_256:
- case Intrinsic::x86_avx2_psrlv_d:
- case Intrinsic::x86_avx2_psrlv_q:
- case Intrinsic::x86_avx2_psrlv_d_256:
- case Intrinsic::x86_avx2_psrlv_q_256:
- case Intrinsic::x86_avx2_psrav_d:
- case Intrinsic::x86_avx2_psrav_d_256: {
- unsigned Opcode;
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_avx2_psllv_d:
- case Intrinsic::x86_avx2_psllv_q:
- case Intrinsic::x86_avx2_psllv_d_256:
- case Intrinsic::x86_avx2_psllv_q_256:
- Opcode = ISD::SHL;
- break;
- case Intrinsic::x86_avx2_psrlv_d:
- case Intrinsic::x86_avx2_psrlv_q:
- case Intrinsic::x86_avx2_psrlv_d_256:
- case Intrinsic::x86_avx2_psrlv_q_256:
- Opcode = ISD::SRL;
- break;
- case Intrinsic::x86_avx2_psrav_d:
- case Intrinsic::x86_avx2_psrav_d_256:
- Opcode = ISD::SRA;
+ default:
break;
}
- return DAG.getNode(Opcode, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
- }
-
- case Intrinsic::x86_sse2_packssdw_128:
- case Intrinsic::x86_sse2_packsswb_128:
- case Intrinsic::x86_avx2_packssdw:
- case Intrinsic::x86_avx2_packsswb:
- return DAG.getNode(X86ISD::PACKSS, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
-
- case Intrinsic::x86_sse2_packuswb_128:
- case Intrinsic::x86_sse41_packusdw:
- case Intrinsic::x86_avx2_packuswb:
- case Intrinsic::x86_avx2_packusdw:
- return DAG.getNode(X86ISD::PACKUS, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
-
- case Intrinsic::x86_ssse3_pshuf_b_128:
- case Intrinsic::x86_avx2_pshuf_b:
- return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
-
- case Intrinsic::x86_sse2_pshuf_d:
- return DAG.getNode(X86ISD::PSHUFD, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
-
- case Intrinsic::x86_sse2_pshufl_w:
- return DAG.getNode(X86ISD::PSHUFLW, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
-
- case Intrinsic::x86_sse2_pshufh_w:
- return DAG.getNode(X86ISD::PSHUFHW, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
-
- case Intrinsic::x86_ssse3_psign_b_128:
- case Intrinsic::x86_ssse3_psign_w_128:
- case Intrinsic::x86_ssse3_psign_d_128:
- case Intrinsic::x86_avx2_psign_b:
- case Intrinsic::x86_avx2_psign_w:
- case Intrinsic::x86_avx2_psign_d:
- return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
-
- case Intrinsic::x86_sse41_insertps:
- return DAG.getNode(X86ISD::INSERTPS, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
-
- case Intrinsic::x86_avx_vperm2f128_ps_256:
- case Intrinsic::x86_avx_vperm2f128_pd_256:
- case Intrinsic::x86_avx_vperm2f128_si_256:
- case Intrinsic::x86_avx2_vperm2i128:
- return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ }
- case Intrinsic::x86_avx2_permd:
- case Intrinsic::x86_avx2_permps:
- // Operands intentionally swapped. Mask is last operand to intrinsic,
- // but second operand for node/instruction.
- return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(1));
+ switch (IntNo) {
+ default: return SDValue(); // Don't custom lower most intrinsics.
- case Intrinsic::x86_sse_sqrt_ps:
- case Intrinsic::x86_sse2_sqrt_pd:
- case Intrinsic::x86_avx_sqrt_ps_256:
- case Intrinsic::x86_avx_sqrt_pd_256:
- return DAG.getNode(ISD::FSQRT, dl, Op.getValueType(), Op.getOperand(1));
+ case Intrinsic::x86_avx512_mask_valign_q_512:
+ case Intrinsic::x86_avx512_mask_valign_d_512:
+ // Vector source operands are swapped.
+ return getVectorMaskingNode(DAG.getNode(X86ISD::VALIGN, dl,
+ Op.getValueType(), Op.getOperand(2),
+ Op.getOperand(1),
+ Op.getOperand(3)),
+ Op.getOperand(5), Op.getOperand(4),
+ Subtarget, DAG);
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
@@ -13936,100 +17254,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
- // SSE/AVX shift intrinsics
- case Intrinsic::x86_sse2_psll_w:
- case Intrinsic::x86_sse2_psll_d:
- case Intrinsic::x86_sse2_psll_q:
- case Intrinsic::x86_avx2_psll_w:
- case Intrinsic::x86_avx2_psll_d:
- case Intrinsic::x86_avx2_psll_q:
- case Intrinsic::x86_sse2_psrl_w:
- case Intrinsic::x86_sse2_psrl_d:
- case Intrinsic::x86_sse2_psrl_q:
- case Intrinsic::x86_avx2_psrl_w:
- case Intrinsic::x86_avx2_psrl_d:
- case Intrinsic::x86_avx2_psrl_q:
- case Intrinsic::x86_sse2_psra_w:
- case Intrinsic::x86_sse2_psra_d:
- case Intrinsic::x86_avx2_psra_w:
- case Intrinsic::x86_avx2_psra_d: {
- unsigned Opcode;
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_sse2_psll_w:
- case Intrinsic::x86_sse2_psll_d:
- case Intrinsic::x86_sse2_psll_q:
- case Intrinsic::x86_avx2_psll_w:
- case Intrinsic::x86_avx2_psll_d:
- case Intrinsic::x86_avx2_psll_q:
- Opcode = X86ISD::VSHL;
- break;
- case Intrinsic::x86_sse2_psrl_w:
- case Intrinsic::x86_sse2_psrl_d:
- case Intrinsic::x86_sse2_psrl_q:
- case Intrinsic::x86_avx2_psrl_w:
- case Intrinsic::x86_avx2_psrl_d:
- case Intrinsic::x86_avx2_psrl_q:
- Opcode = X86ISD::VSRL;
- break;
- case Intrinsic::x86_sse2_psra_w:
- case Intrinsic::x86_sse2_psra_d:
- case Intrinsic::x86_avx2_psra_w:
- case Intrinsic::x86_avx2_psra_d:
- Opcode = X86ISD::VSRA;
- break;
- }
- return DAG.getNode(Opcode, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
- }
-
- // SSE/AVX immediate shift intrinsics
- case Intrinsic::x86_sse2_pslli_w:
- case Intrinsic::x86_sse2_pslli_d:
- case Intrinsic::x86_sse2_pslli_q:
- case Intrinsic::x86_avx2_pslli_w:
- case Intrinsic::x86_avx2_pslli_d:
- case Intrinsic::x86_avx2_pslli_q:
- case Intrinsic::x86_sse2_psrli_w:
- case Intrinsic::x86_sse2_psrli_d:
- case Intrinsic::x86_sse2_psrli_q:
- case Intrinsic::x86_avx2_psrli_w:
- case Intrinsic::x86_avx2_psrli_d:
- case Intrinsic::x86_avx2_psrli_q:
- case Intrinsic::x86_sse2_psrai_w:
- case Intrinsic::x86_sse2_psrai_d:
- case Intrinsic::x86_avx2_psrai_w:
- case Intrinsic::x86_avx2_psrai_d: {
- unsigned Opcode;
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_sse2_pslli_w:
- case Intrinsic::x86_sse2_pslli_d:
- case Intrinsic::x86_sse2_pslli_q:
- case Intrinsic::x86_avx2_pslli_w:
- case Intrinsic::x86_avx2_pslli_d:
- case Intrinsic::x86_avx2_pslli_q:
- Opcode = X86ISD::VSHLI;
- break;
- case Intrinsic::x86_sse2_psrli_w:
- case Intrinsic::x86_sse2_psrli_d:
- case Intrinsic::x86_sse2_psrli_q:
- case Intrinsic::x86_avx2_psrli_w:
- case Intrinsic::x86_avx2_psrli_d:
- case Intrinsic::x86_avx2_psrli_q:
- Opcode = X86ISD::VSRLI;
- break;
- case Intrinsic::x86_sse2_psrai_w:
- case Intrinsic::x86_sse2_psrai_d:
- case Intrinsic::x86_avx2_psrai_w:
- case Intrinsic::x86_avx2_psrai_d:
- Opcode = X86ISD::VSRAI;
- break;
- }
- return getTargetVShiftNode(Opcode, dl, Op.getSimpleValueType(),
- Op.getOperand(1), Op.getOperand(2), DAG);
- }
-
case Intrinsic::x86_sse42_pcmpistria128:
case Intrinsic::x86_sse42_pcmpestria128:
case Intrinsic::x86_sse42_pcmpistric128:
@@ -14106,6 +17330,32 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
return DAG.getNode(Opcode, dl, VTs, NewOps);
}
+
+ case Intrinsic::x86_fma_mask_vfmadd_ps_512:
+ case Intrinsic::x86_fma_mask_vfmadd_pd_512:
+ case Intrinsic::x86_fma_mask_vfmsub_ps_512:
+ case Intrinsic::x86_fma_mask_vfmsub_pd_512:
+ case Intrinsic::x86_fma_mask_vfnmadd_ps_512:
+ case Intrinsic::x86_fma_mask_vfnmadd_pd_512:
+ case Intrinsic::x86_fma_mask_vfnmsub_ps_512:
+ case Intrinsic::x86_fma_mask_vfnmsub_pd_512:
+ case Intrinsic::x86_fma_mask_vfmaddsub_ps_512:
+ case Intrinsic::x86_fma_mask_vfmaddsub_pd_512:
+ case Intrinsic::x86_fma_mask_vfmsubadd_ps_512:
+ case Intrinsic::x86_fma_mask_vfmsubadd_pd_512: {
+ auto *SAE = cast<ConstantSDNode>(Op.getOperand(5));
+ if (SAE->getZExtValue() == X86::STATIC_ROUNDING::CUR_DIRECTION)
+ return getVectorMaskingNode(DAG.getNode(getOpcodeForFMAIntrinsic(IntNo),
+ dl, Op.getValueType(),
+ Op.getOperand(1),
+ Op.getOperand(2),
+ Op.getOperand(3)),
+ Op.getOperand(4), Op.getOperand(1),
+ Subtarget, DAG);
+ else
+ return SDValue();
+ }
+
case Intrinsic::x86_fma_vfmadd_ps:
case Intrinsic::x86_fma_vfmadd_pd:
case Intrinsic::x86_fma_vfmsub_ps:
@@ -14130,74 +17380,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_fma_vfmaddsub_pd_256:
case Intrinsic::x86_fma_vfmsubadd_ps_256:
case Intrinsic::x86_fma_vfmsubadd_pd_256:
- case Intrinsic::x86_fma_vfmadd_ps_512:
- case Intrinsic::x86_fma_vfmadd_pd_512:
- case Intrinsic::x86_fma_vfmsub_ps_512:
- case Intrinsic::x86_fma_vfmsub_pd_512:
- case Intrinsic::x86_fma_vfnmadd_ps_512:
- case Intrinsic::x86_fma_vfnmadd_pd_512:
- case Intrinsic::x86_fma_vfnmsub_ps_512:
- case Intrinsic::x86_fma_vfnmsub_pd_512:
- case Intrinsic::x86_fma_vfmaddsub_ps_512:
- case Intrinsic::x86_fma_vfmaddsub_pd_512:
- case Intrinsic::x86_fma_vfmsubadd_ps_512:
- case Intrinsic::x86_fma_vfmsubadd_pd_512: {
- unsigned Opc;
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_fma_vfmadd_ps:
- case Intrinsic::x86_fma_vfmadd_pd:
- case Intrinsic::x86_fma_vfmadd_ps_256:
- case Intrinsic::x86_fma_vfmadd_pd_256:
- case Intrinsic::x86_fma_vfmadd_ps_512:
- case Intrinsic::x86_fma_vfmadd_pd_512:
- Opc = X86ISD::FMADD;
- break;
- case Intrinsic::x86_fma_vfmsub_ps:
- case Intrinsic::x86_fma_vfmsub_pd:
- case Intrinsic::x86_fma_vfmsub_ps_256:
- case Intrinsic::x86_fma_vfmsub_pd_256:
- case Intrinsic::x86_fma_vfmsub_ps_512:
- case Intrinsic::x86_fma_vfmsub_pd_512:
- Opc = X86ISD::FMSUB;
- break;
- case Intrinsic::x86_fma_vfnmadd_ps:
- case Intrinsic::x86_fma_vfnmadd_pd:
- case Intrinsic::x86_fma_vfnmadd_ps_256:
- case Intrinsic::x86_fma_vfnmadd_pd_256:
- case Intrinsic::x86_fma_vfnmadd_ps_512:
- case Intrinsic::x86_fma_vfnmadd_pd_512:
- Opc = X86ISD::FNMADD;
- break;
- case Intrinsic::x86_fma_vfnmsub_ps:
- case Intrinsic::x86_fma_vfnmsub_pd:
- case Intrinsic::x86_fma_vfnmsub_ps_256:
- case Intrinsic::x86_fma_vfnmsub_pd_256:
- case Intrinsic::x86_fma_vfnmsub_ps_512:
- case Intrinsic::x86_fma_vfnmsub_pd_512:
- Opc = X86ISD::FNMSUB;
- break;
- case Intrinsic::x86_fma_vfmaddsub_ps:
- case Intrinsic::x86_fma_vfmaddsub_pd:
- case Intrinsic::x86_fma_vfmaddsub_ps_256:
- case Intrinsic::x86_fma_vfmaddsub_pd_256:
- case Intrinsic::x86_fma_vfmaddsub_ps_512:
- case Intrinsic::x86_fma_vfmaddsub_pd_512:
- Opc = X86ISD::FMADDSUB;
- break;
- case Intrinsic::x86_fma_vfmsubadd_ps:
- case Intrinsic::x86_fma_vfmsubadd_pd:
- case Intrinsic::x86_fma_vfmsubadd_ps_256:
- case Intrinsic::x86_fma_vfmsubadd_pd_256:
- case Intrinsic::x86_fma_vfmsubadd_ps_512:
- case Intrinsic::x86_fma_vfmsubadd_pd_512:
- Opc = X86ISD::FMSUBADD;
- break;
- }
-
- return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1),
- Op.getOperand(2), Op.getOperand(3));
- }
+ return DAG.getNode(getOpcodeForFMAIntrinsic(IntNo), dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
}
}
@@ -14382,122 +17566,25 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getMergeValues(Results, DL);
}
-enum IntrinsicType {
- GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST
-};
-
-struct IntrinsicData {
- IntrinsicData(IntrinsicType IType, unsigned IOpc0, unsigned IOpc1)
- :Type(IType), Opc0(IOpc0), Opc1(IOpc1) {}
- IntrinsicType Type;
- unsigned Opc0;
- unsigned Opc1;
-};
-
-std::map < unsigned, IntrinsicData> IntrMap;
-static void InitIntinsicsMap() {
- static bool Initialized = false;
- if (Initialized)
- return;
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qps_512,
- IntrinsicData(GATHER, X86::VGATHERQPSZrm, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qps_512,
- IntrinsicData(GATHER, X86::VGATHERQPSZrm, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qpd_512,
- IntrinsicData(GATHER, X86::VGATHERQPDZrm, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dpd_512,
- IntrinsicData(GATHER, X86::VGATHERDPDZrm, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dps_512,
- IntrinsicData(GATHER, X86::VGATHERDPSZrm, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qpi_512,
- IntrinsicData(GATHER, X86::VPGATHERQDZrm, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qpq_512,
- IntrinsicData(GATHER, X86::VPGATHERQQZrm, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dpi_512,
- IntrinsicData(GATHER, X86::VPGATHERDDZrm, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dpq_512,
- IntrinsicData(GATHER, X86::VPGATHERDQZrm, 0)));
-
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qps_512,
- IntrinsicData(SCATTER, X86::VSCATTERQPSZmr, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpd_512,
- IntrinsicData(SCATTER, X86::VSCATTERQPDZmr, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpd_512,
- IntrinsicData(SCATTER, X86::VSCATTERDPDZmr, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dps_512,
- IntrinsicData(SCATTER, X86::VSCATTERDPSZmr, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpi_512,
- IntrinsicData(SCATTER, X86::VPSCATTERQDZmr, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpq_512,
- IntrinsicData(SCATTER, X86::VPSCATTERQQZmr, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpi_512,
- IntrinsicData(SCATTER, X86::VPSCATTERDDZmr, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpq_512,
- IntrinsicData(SCATTER, X86::VPSCATTERDQZmr, 0)));
-
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_qps_512,
- IntrinsicData(PREFETCH, X86::VGATHERPF0QPSm,
- X86::VGATHERPF1QPSm)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_qpd_512,
- IntrinsicData(PREFETCH, X86::VGATHERPF0QPDm,
- X86::VGATHERPF1QPDm)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_dpd_512,
- IntrinsicData(PREFETCH, X86::VGATHERPF0DPDm,
- X86::VGATHERPF1DPDm)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_dps_512,
- IntrinsicData(PREFETCH, X86::VGATHERPF0DPSm,
- X86::VGATHERPF1DPSm)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_qps_512,
- IntrinsicData(PREFETCH, X86::VSCATTERPF0QPSm,
- X86::VSCATTERPF1QPSm)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_qpd_512,
- IntrinsicData(PREFETCH, X86::VSCATTERPF0QPDm,
- X86::VSCATTERPF1QPDm)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_dpd_512,
- IntrinsicData(PREFETCH, X86::VSCATTERPF0DPDm,
- X86::VSCATTERPF1DPDm)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_dps_512,
- IntrinsicData(PREFETCH, X86::VSCATTERPF0DPSm,
- X86::VSCATTERPF1DPSm)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_rdrand_16,
- IntrinsicData(RDRAND, X86ISD::RDRAND, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_rdrand_32,
- IntrinsicData(RDRAND, X86ISD::RDRAND, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_rdrand_64,
- IntrinsicData(RDRAND, X86ISD::RDRAND, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_rdseed_16,
- IntrinsicData(RDSEED, X86ISD::RDSEED, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_rdseed_32,
- IntrinsicData(RDSEED, X86ISD::RDSEED, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_rdseed_64,
- IntrinsicData(RDSEED, X86ISD::RDSEED, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_xtest,
- IntrinsicData(XTEST, X86ISD::XTEST, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_rdtsc,
- IntrinsicData(RDTSC, X86ISD::RDTSC_DAG, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_rdtscp,
- IntrinsicData(RDTSC, X86ISD::RDTSCP_DAG, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_rdpmc,
- IntrinsicData(RDPMC, X86ISD::RDPMC_DAG, 0)));
- Initialized = true;
-}
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- InitIntinsicsMap();
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- std::map < unsigned, IntrinsicData>::const_iterator itr = IntrMap.find(IntNo);
- if (itr == IntrMap.end())
+
+ const IntrinsicData* IntrData = getIntrinsicWithChain(IntNo);
+ if (!IntrData)
return SDValue();
SDLoc dl(Op);
- IntrinsicData Intr = itr->second;
- switch(Intr.Type) {
+ switch(IntrData->Type) {
+ default:
+ llvm_unreachable("Unknown Intrinsic Type");
+ break;
case RDSEED:
case RDRAND: {
// Emit the node with the right value type.
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other);
- SDValue Result = DAG.getNode(Intr.Opc0, dl, VTs, Op.getOperand(0));
+ SDValue Result = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0));
// If the value returned by RDRAND/RDSEED was valid (CF=1), return 1.
// Otherwise return the value from Rand, which is always 0, casted to i32.
@@ -14521,7 +17608,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SDValue Index = Op.getOperand(4);
SDValue Mask = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
- return getGatherNode(Intr.Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain,
+ return getGatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain,
Subtarget);
}
case SCATTER: {
@@ -14532,7 +17619,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SDValue Index = Op.getOperand(4);
SDValue Src = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
- return getScatterNode(Intr.Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain);
+ return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain);
}
case PREFETCH: {
SDValue Hint = Op.getOperand(6);
@@ -14540,7 +17627,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
if (dyn_cast<ConstantSDNode> (Hint) == nullptr ||
(HintVal = dyn_cast<ConstantSDNode> (Hint)->getZExtValue()) > 1)
llvm_unreachable("Wrong prefetch hint in intrinsic: should be 0 or 1");
- unsigned Opcode = (HintVal ? Intr.Opc1 : Intr.Opc0);
+ unsigned Opcode = (HintVal ? IntrData->Opc1 : IntrData->Opc0);
SDValue Chain = Op.getOperand(0);
SDValue Mask = Op.getOperand(2);
SDValue Index = Op.getOperand(3);
@@ -14551,7 +17638,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
// Read Time Stamp Counter (RDTSC) and Processor ID (RDTSCP).
case RDTSC: {
SmallVector<SDValue, 2> Results;
- getReadTimeStampCounter(Op.getNode(), dl, Intr.Opc0, DAG, Subtarget, Results);
+ getReadTimeStampCounter(Op.getNode(), dl, IntrData->Opc0, DAG, Subtarget, Results);
return DAG.getMergeValues(Results, dl);
}
// Read Performance Monitoring Counters.
@@ -14563,7 +17650,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
// XTEST intrinsics.
case XTEST: {
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
- SDValue InTrans = DAG.getNode(X86ISD::XTEST, dl, VTs, Op.getOperand(0));
+ SDValue InTrans = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0));
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86::COND_NE, MVT::i8),
InTrans);
@@ -14571,8 +17658,79 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
Ret, SDValue(InTrans.getNode(), 1));
}
+ // ADC/ADCX/SBB
+ case ADX: {
+ SmallVector<SDValue, 2> Results;
+ SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
+ SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::Other);
+ SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2),
+ DAG.getConstant(-1, MVT::i8));
+ SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3),
+ Op.getOperand(4), GenCF.getValue(1));
+ SDValue Store = DAG.getStore(Op.getOperand(0), dl, Res.getValue(0),
+ Op.getOperand(5), MachinePointerInfo(),
+ false, false, 0);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_B, MVT::i8),
+ Res.getValue(1));
+ Results.push_back(SetCC);
+ Results.push_back(Store);
+ return DAG.getMergeValues(Results, dl);
+ }
+ case COMPRESS_TO_MEM: {
+ SDLoc dl(Op);
+ SDValue Mask = Op.getOperand(4);
+ SDValue DataToCompress = Op.getOperand(3);
+ SDValue Addr = Op.getOperand(2);
+ SDValue Chain = Op.getOperand(0);
+
+ if (isAllOnes(Mask)) // return just a store
+ return DAG.getStore(Chain, dl, DataToCompress, Addr,
+ MachinePointerInfo(), false, false, 0);
+
+ EVT VT = DataToCompress.getValueType();
+ EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ VT.getVectorNumElements());
+ EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ Mask.getValueType().getSizeInBits());
+ SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
+ DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
+ DAG.getIntPtrConstant(0));
+
+ SDValue Compressed = DAG.getNode(IntrData->Opc0, dl, VT, VMask,
+ DataToCompress, DAG.getUNDEF(VT));
+ return DAG.getStore(Chain, dl, Compressed, Addr,
+ MachinePointerInfo(), false, false, 0);
+ }
+ case EXPAND_FROM_MEM: {
+ SDLoc dl(Op);
+ SDValue Mask = Op.getOperand(4);
+ SDValue PathThru = Op.getOperand(3);
+ SDValue Addr = Op.getOperand(2);
+ SDValue Chain = Op.getOperand(0);
+ EVT VT = Op.getValueType();
+
+ if (isAllOnes(Mask)) // return just a load
+ return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false,
+ false, 0);
+ EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ VT.getVectorNumElements());
+ EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ Mask.getValueType().getSizeInBits());
+ SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
+ DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
+ DAG.getIntPtrConstant(0));
+
+ SDValue DataToExpand = DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(),
+ false, false, false, 0);
+
+ SmallVector<SDValue, 2> Results;
+ Results.push_back(DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToExpand,
+ PathThru));
+ Results.push_back(Chain);
+ return DAG.getMergeValues(Results, dl);
+ }
}
- llvm_unreachable("Unknown Intrinsic Type");
}
SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
@@ -14589,8 +17747,8 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), PtrVT);
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, PtrVT,
@@ -14611,9 +17769,10 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
- unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
+ unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(
+ DAG.getMachineFunction());
assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
(FrameReg == X86::EBP && VT == MVT::i32)) &&
"Invalid Frame Register!");
@@ -14640,8 +17799,8 @@ unsigned X86TargetLowering::getRegisterByName(const char* RegName,
SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
SelectionDAG &DAG) const {
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize());
}
@@ -14652,8 +17811,8 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl (Op);
EVT PtrVT = getPointerTy();
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
(FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
@@ -14700,7 +17859,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SDLoc dl (Op);
const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
- const TargetRegisterInfo* TRI = DAG.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
if (Subtarget->is64Bit()) {
SDValue OutChains[6];
@@ -14864,7 +18023,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering &TFI = *TM.getFrameLowering();
+ const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering();
unsigned StackAlignment = TFI.getStackAlignment();
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
@@ -15198,29 +18357,16 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1));
// Shuffle it back into the right order.
- // The internal representation is big endian.
- // In other words, a i64 bitcasted to 2 x i32 has its high part at index 0
- // and its low part at index 1.
- // Moreover, we have: Mul1 = <ae|cg> ; Mul2 = <bf|dh>
- // Vector index 0 1 ; 2 3
- // We want <ae|bf|cg|dh>
- // Vector index 0 2 1 3
- // Since each element is seen as 2 x i32, we get:
- // high_mask[i] = 2 x vector_index[i]
- // low_mask[i] = 2 x vector_index[i] + 1
- // where vector_index = {0, Size/2, 1, Size/2 + 1, ...,
- // Size/2 - 1, Size/2 + Size/2 - 1}
- // where Size is the number of element of the final vector.
SDValue Highs, Lows;
if (VT == MVT::v8i32) {
- const int HighMask[] = {0, 8, 2, 10, 4, 12, 6, 14};
+ const int HighMask[] = {1, 9, 3, 11, 5, 13, 7, 15};
Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
- const int LowMask[] = {1, 9, 3, 11, 5, 13, 7, 15};
+ const int LowMask[] = {0, 8, 2, 10, 4, 12, 6, 14};
Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
} else {
- const int HighMask[] = {0, 4, 2, 6};
+ const int HighMask[] = {1, 5, 3, 7};
Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
- const int LowMask[] = {1, 5, 3, 7};
+ const int LowMask[] = {0, 4, 2, 6};
Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
}
@@ -15238,9 +18384,10 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
Highs = DAG.getNode(ISD::SUB, dl, VT, Highs, Fixup);
}
- // The low part of a MUL_LOHI is supposed to be the first value and the
- // high part the second value.
- return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getValueType(), Lows, Highs);
+ // The first result of MUL_LOHI is actually the low value, followed by the
+ // high value.
+ SDValue Ops[] = {Lows, Highs};
+ return DAG.getMergeValues(Ops, dl);
}
static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
@@ -15430,55 +18577,43 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
SDValue BaseShAmt;
EVT EltVT = VT.getVectorElementType();
- if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
- unsigned NumElts = VT.getVectorNumElements();
- unsigned i, j;
- for (i = 0; i != NumElts; ++i) {
- if (Amt.getOperand(i).getOpcode() == ISD::UNDEF)
- continue;
- break;
- }
- for (j = i; j != NumElts; ++j) {
- SDValue Arg = Amt.getOperand(j);
- if (Arg.getOpcode() == ISD::UNDEF) continue;
- if (Arg != Amt.getOperand(i))
- break;
- }
- if (i != NumElts && j == NumElts)
- BaseShAmt = Amt.getOperand(i);
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Amt)) {
+ // Check if this build_vector node is doing a splat.
+ // If so, then set BaseShAmt equal to the splat value.
+ BaseShAmt = BV->getSplatValue();
+ if (BaseShAmt && BaseShAmt.getOpcode() == ISD::UNDEF)
+ BaseShAmt = SDValue();
} else {
if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR)
Amt = Amt.getOperand(0);
- if (Amt.getOpcode() == ISD::VECTOR_SHUFFLE &&
- cast<ShuffleVectorSDNode>(Amt)->isSplat()) {
+
+ ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(Amt);
+ if (SVN && SVN->isSplat()) {
+ unsigned SplatIdx = (unsigned)SVN->getSplatIndex();
SDValue InVec = Amt.getOperand(0);
if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
- unsigned NumElts = InVec.getValueType().getVectorNumElements();
- unsigned i = 0;
- for (; i != NumElts; ++i) {
- SDValue Arg = InVec.getOperand(i);
- if (Arg.getOpcode() == ISD::UNDEF) continue;
- BaseShAmt = Arg;
- break;
- }
+ assert((SplatIdx < InVec.getValueType().getVectorNumElements()) &&
+ "Unexpected shuffle index found!");
+ BaseShAmt = InVec.getOperand(SplatIdx);
} else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
- unsigned SplatIdx =
- cast<ShuffleVectorSDNode>(Amt)->getSplatIndex();
if (C->getZExtValue() == SplatIdx)
BaseShAmt = InVec.getOperand(1);
}
}
- if (!BaseShAmt.getNode())
- BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Amt,
- DAG.getIntPtrConstant(0));
+
+ if (!BaseShAmt)
+ // Avoid introducing an extract element from a shuffle.
+ BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InVec,
+ DAG.getIntPtrConstant(SplatIdx));
}
}
if (BaseShAmt.getNode()) {
- if (EltVT.bitsGT(MVT::i32))
- BaseShAmt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BaseShAmt);
+ assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
+ if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32))
+ BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, BaseShAmt);
else if (EltVT.bitsLT(MVT::i32))
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
@@ -15596,7 +18731,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
// If possible, lower this packed shift into a vector multiply instead of
// expanding it into a sequence of scalar shifts.
// Do this only if the vector shift count is a constant build_vector.
- if (Op.getOpcode() == ISD::SHL &&
+ if (Op.getOpcode() == ISD::SHL &&
(VT == MVT::v8i16 || VT == MVT::v4i32 ||
(Subtarget->hasInt256() && VT == MVT::v16i16)) &&
ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
@@ -15688,15 +18823,15 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
CanBeSimplified = Amt2 == Amt->getOperand(j);
}
}
-
+
if (CanBeSimplified && isa<ConstantSDNode>(Amt1) &&
isa<ConstantSDNode>(Amt2)) {
// Replace this node with two shifts followed by a MOVSS/MOVSD.
EVT CastVT = MVT::v4i32;
- SDValue Splat1 =
+ SDValue Splat1 =
DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), VT);
SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
- SDValue Splat2 =
+ SDValue Splat2 =
DAG.getConstant(cast<ConstantSDNode>(Amt2)->getAPIntValue(), VT);
SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2);
if (TargetOpcode == X86ISD::MOVSD)
@@ -15851,10 +18986,15 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
Cond = X86::COND_B;
break;
case ISD::SMULO:
- BaseOp = X86ISD::SMUL;
+ BaseOp = N->getValueType(0) == MVT::i8 ? X86ISD::SMUL8 : X86ISD::SMUL;
Cond = X86::COND_O;
break;
case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
+ if (N->getValueType(0) == MVT::i8) {
+ BaseOp = X86ISD::UMUL8;
+ Cond = X86::COND_O;
+ break;
+ }
SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
MVT::i32);
SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
@@ -15880,6 +19020,11 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
}
+// Sign extension of the low part of vector elements. This may be used either
+// when sign extend instructions are not available or if the vector element
+// sizes already match the sign-extended size. If the vector elements are in
+// their pre-extended size and sign extend instructions are available, that will
+// be handled by LowerSIGN_EXTEND.
SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -15925,37 +19070,151 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
case MVT::v4i32:
case MVT::v8i16: {
SDValue Op0 = Op.getOperand(0);
- SDValue Op00 = Op0.getOperand(0);
- SDValue Tmp1;
- // Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
- if (Op0.getOpcode() == ISD::BITCAST &&
- Op00.getOpcode() == ISD::VECTOR_SHUFFLE) {
- // (sext (vzext x)) -> (vsext x)
- Tmp1 = LowerVectorIntExtend(Op00, Subtarget, DAG);
- if (Tmp1.getNode()) {
- EVT ExtraEltVT = ExtraVT.getVectorElementType();
- // This folding is only valid when the in-reg type is a vector of i8,
- // i16, or i32.
- if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 ||
- ExtraEltVT == MVT::i32) {
- SDValue Tmp1Op0 = Tmp1.getOperand(0);
- assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
- "This optimization is invalid without a VZEXT.");
- return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
- }
- Op0 = Tmp1;
- }
- }
- // If the above didn't work, then just use Shift-Left + Shift-Right.
- Tmp1 = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Op0, BitsDiff,
- DAG);
- return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Tmp1, BitsDiff,
+ // This is a sign extension of some low part of vector elements without
+ // changing the size of the vector elements themselves:
+ // Shift-Left + Shift-Right-Algebraic.
+ SDValue Shl = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Op0,
+ BitsDiff, DAG);
+ return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Shl, BitsDiff,
DAG);
}
}
}
+/// Returns true if the operand type is exactly twice the native width, and
+/// the corresponding cmpxchg8b or cmpxchg16b instruction is available.
+/// Used to know whether to use cmpxchg8/16b when expanding atomic operations
+/// (otherwise we leave them alone to become __sync_fetch_and_... calls).
+bool X86TargetLowering::needsCmpXchgNb(const Type *MemType) const {
+ const X86Subtarget &Subtarget =
+ getTargetMachine().getSubtarget<X86Subtarget>();
+ unsigned OpWidth = MemType->getPrimitiveSizeInBits();
+
+ if (OpWidth == 64)
+ return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b
+ else if (OpWidth == 128)
+ return Subtarget.hasCmpxchg16b();
+ else
+ return false;
+}
+
+bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+ return needsCmpXchgNb(SI->getValueOperand()->getType());
+}
+
+// Note: this turns large loads into lock cmpxchg8b/16b.
+// FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b.
+bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+ auto PTy = cast<PointerType>(LI->getPointerOperand()->getType());
+ return needsCmpXchgNb(PTy->getElementType());
+}
+
+bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+ const X86Subtarget &Subtarget =
+ getTargetMachine().getSubtarget<X86Subtarget>();
+ unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
+ const Type *MemType = AI->getType();
+
+ // If the operand is too big, we must see if cmpxchg8/16b is available
+ // and default to library calls otherwise.
+ if (MemType->getPrimitiveSizeInBits() > NativeWidth)
+ return needsCmpXchgNb(MemType);
+
+ AtomicRMWInst::BinOp Op = AI->getOperation();
+ switch (Op) {
+ default:
+ llvm_unreachable("Unknown atomic operation");
+ case AtomicRMWInst::Xchg:
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ // It's better to use xadd, xsub or xchg for these in all cases.
+ return false;
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::And:
+ case AtomicRMWInst::Xor:
+ // If the atomicrmw's result isn't actually used, we can just add a "lock"
+ // prefix to a normal instruction for these operations.
+ return !AI->use_empty();
+ case AtomicRMWInst::Nand:
+ case AtomicRMWInst::Max:
+ case AtomicRMWInst::Min:
+ case AtomicRMWInst::UMax:
+ case AtomicRMWInst::UMin:
+ // These always require a non-trivial set of data operations on x86. We must
+ // use a cmpxchg loop.
+ return true;
+ }
+}
+
+static bool hasMFENCE(const X86Subtarget& Subtarget) {
+ // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
+ // no-sse2). There isn't any reason to disable it if the target processor
+ // supports it.
+ return Subtarget.hasSSE2() || Subtarget.is64Bit();
+}
+
+LoadInst *
+X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
+ const X86Subtarget &Subtarget =
+ getTargetMachine().getSubtarget<X86Subtarget>();
+ unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
+ const Type *MemType = AI->getType();
+ // Accesses larger than the native width are turned into cmpxchg/libcalls, so
+ // there is no benefit in turning such RMWs into loads, and it is actually
+ // harmful as it introduces a mfence.
+ if (MemType->getPrimitiveSizeInBits() > NativeWidth)
+ return nullptr;
+
+ auto Builder = IRBuilder<>(AI);
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ auto SynchScope = AI->getSynchScope();
+ // We must restrict the ordering to avoid generating loads with Release or
+ // ReleaseAcquire orderings.
+ auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering());
+ auto Ptr = AI->getPointerOperand();
+
+ // Before the load we need a fence. Here is an example lifted from
+ // http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf showing why a fence
+ // is required:
+ // Thread 0:
+ // x.store(1, relaxed);
+ // r1 = y.fetch_add(0, release);
+ // Thread 1:
+ // y.fetch_add(42, acquire);
+ // r2 = x.load(relaxed);
+ // r1 = r2 = 0 is impossible, but becomes possible if the idempotent rmw is
+ // lowered to just a load without a fence. A mfence flushes the store buffer,
+ // making the optimization clearly correct.
+ // FIXME: it is required if isAtLeastRelease(Order) but it is not clear
+ // otherwise, we might be able to be more agressive on relaxed idempotent
+ // rmw. In practice, they do not look useful, so we don't try to be
+ // especially clever.
+ if (SynchScope == SingleThread) {
+ // FIXME: we could just insert an X86ISD::MEMBARRIER here, except we are at
+ // the IR level, so we must wrap it in an intrinsic.
+ return nullptr;
+ } else if (hasMFENCE(Subtarget)) {
+ Function *MFence = llvm::Intrinsic::getDeclaration(M,
+ Intrinsic::x86_sse2_mfence);
+ Builder.CreateCall(MFence);
+ } else {
+ // FIXME: it might make sense to use a locked operation here but on a
+ // different cache-line to prevent cache-line bouncing. In practice it
+ // is probably a small win, and x86 processors without mfence are rare
+ // enough that we do not bother.
+ return nullptr;
+ }
+
+ // Finally we can emit the atomic load.
+ LoadInst *Loaded = Builder.CreateAlignedLoad(Ptr,
+ AI->getType()->getPrimitiveSizeInBits());
+ Loaded->setAtomic(Order, SynchScope);
+ AI->replaceAllUsesWith(Loaded);
+ AI->eraseFromParent();
+ return Loaded;
+}
+
static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
@@ -15967,10 +19226,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
// The only fence that needs an instruction is a sequentially-consistent
// cross-thread fence.
if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) {
- // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
- // no-sse2). There isn't any reason to disable it if the target processor
- // supports it.
- if (Subtarget->hasSSE2() || Subtarget->is64Bit())
+ if (hasMFENCE(*Subtarget))
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
SDValue Chain = Op.getOperand(0);
@@ -16085,6 +19341,139 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
return SDValue();
}
+static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDNode *Node = Op.getNode();
+ SDLoc dl(Node);
+
+ Op = Op.getOperand(0);
+ EVT VT = Op.getValueType();
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "CTPOP lowering only implemented for 128/256-bit wide vector types");
+
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ unsigned Len = EltVT.getSizeInBits();
+
+ // This is the vectorized version of the "best" algorithm from
+ // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+ // with a minor tweak to use a series of adds + shifts instead of vector
+ // multiplications. Implemented for the v2i64, v4i64, v4i32, v8i32 types:
+ //
+ // v2i64, v4i64, v4i32 => Only profitable w/ popcnt disabled
+ // v8i32 => Always profitable
+ //
+ // FIXME: There a couple of possible improvements:
+ //
+ // 1) Support for i8 and i16 vectors (needs measurements if popcnt enabled).
+ // 2) Use strategies from http://wm.ite.pl/articles/sse-popcount.html
+ //
+ assert(EltVT.isInteger() && (Len == 32 || Len == 64) && Len % 8 == 0 &&
+ "CTPOP not implemented for this vector element type.");
+
+ // X86 canonicalize ANDs to vXi64, generate the appropriate bitcasts to avoid
+ // extra legalization.
+ bool NeedsBitcast = EltVT == MVT::i32;
+ MVT BitcastVT = VT.is256BitVector() ? MVT::v4i64 : MVT::v2i64;
+
+ SDValue Cst55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), EltVT);
+ SDValue Cst33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), EltVT);
+ SDValue Cst0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), EltVT);
+
+ // v = v - ((v >> 1) & 0x55555555...)
+ SmallVector<SDValue, 8> Ones(NumElts, DAG.getConstant(1, EltVT));
+ SDValue OnesV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ones);
+ SDValue Srl = DAG.getNode(ISD::SRL, dl, VT, Op, OnesV);
+ if (NeedsBitcast)
+ Srl = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Srl);
+
+ SmallVector<SDValue, 8> Mask55(NumElts, Cst55);
+ SDValue M55 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask55);
+ if (NeedsBitcast)
+ M55 = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M55);
+
+ SDValue And = DAG.getNode(ISD::AND, dl, Srl.getValueType(), Srl, M55);
+ if (VT != And.getValueType())
+ And = DAG.getNode(ISD::BITCAST, dl, VT, And);
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Op, And);
+
+ // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+ SmallVector<SDValue, 8> Mask33(NumElts, Cst33);
+ SDValue M33 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask33);
+ SmallVector<SDValue, 8> Twos(NumElts, DAG.getConstant(2, EltVT));
+ SDValue TwosV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Twos);
+
+ Srl = DAG.getNode(ISD::SRL, dl, VT, Sub, TwosV);
+ if (NeedsBitcast) {
+ Srl = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Srl);
+ M33 = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M33);
+ Sub = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Sub);
+ }
+
+ SDValue AndRHS = DAG.getNode(ISD::AND, dl, M33.getValueType(), Srl, M33);
+ SDValue AndLHS = DAG.getNode(ISD::AND, dl, M33.getValueType(), Sub, M33);
+ if (VT != AndRHS.getValueType()) {
+ AndRHS = DAG.getNode(ISD::BITCAST, dl, VT, AndRHS);
+ AndLHS = DAG.getNode(ISD::BITCAST, dl, VT, AndLHS);
+ }
+ SDValue Add = DAG.getNode(ISD::ADD, dl, VT, AndLHS, AndRHS);
+
+ // v = (v + (v >> 4)) & 0x0F0F0F0F...
+ SmallVector<SDValue, 8> Fours(NumElts, DAG.getConstant(4, EltVT));
+ SDValue FoursV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Fours);
+ Srl = DAG.getNode(ISD::SRL, dl, VT, Add, FoursV);
+ Add = DAG.getNode(ISD::ADD, dl, VT, Add, Srl);
+
+ SmallVector<SDValue, 8> Mask0F(NumElts, Cst0F);
+ SDValue M0F = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask0F);
+ if (NeedsBitcast) {
+ Add = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Add);
+ M0F = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M0F);
+ }
+ And = DAG.getNode(ISD::AND, dl, M0F.getValueType(), Add, M0F);
+ if (VT != And.getValueType())
+ And = DAG.getNode(ISD::BITCAST, dl, VT, And);
+
+ // The algorithm mentioned above uses:
+ // v = (v * 0x01010101...) >> (Len - 8)
+ //
+ // Change it to use vector adds + vector shifts which yield faster results on
+ // Haswell than using vector integer multiplication.
+ //
+ // For i32 elements:
+ // v = v + (v >> 8)
+ // v = v + (v >> 16)
+ //
+ // For i64 elements:
+ // v = v + (v >> 8)
+ // v = v + (v >> 16)
+ // v = v + (v >> 32)
+ //
+ Add = And;
+ SmallVector<SDValue, 8> Csts;
+ for (unsigned i = 8; i <= Len/2; i *= 2) {
+ Csts.assign(NumElts, DAG.getConstant(i, EltVT));
+ SDValue CstsV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Csts);
+ Srl = DAG.getNode(ISD::SRL, dl, VT, Add, CstsV);
+ Add = DAG.getNode(ISD::ADD, dl, VT, Add, Srl);
+ Csts.clear();
+ }
+
+ // The result is on the least significant 6-bits on i32 and 7-bits on i64.
+ SDValue Cst3F = DAG.getConstant(APInt(Len, Len == 32 ? 0x3F : 0x7F), EltVT);
+ SmallVector<SDValue, 8> Cst3FV(NumElts, Cst3F);
+ SDValue M3F = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Cst3FV);
+ if (NeedsBitcast) {
+ Add = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Add);
+ M3F = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M3F);
+ }
+ And = DAG.getNode(ISD::AND, dl, M3F.getValueType(), Add, M3F);
+ if (VT != And.getValueType())
+ And = DAG.getNode(ISD::BITCAST, dl, VT, And);
+
+ return And;
+}
+
static SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
SDNode *Node = Op.getNode();
SDLoc dl(Node);
@@ -16181,7 +19570,7 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget,
SDValue Callee = DAG.getExternalSymbol(LibcallName, TLI.getPointerTy());
Type *RetTy = isF64
- ? (Type*)StructType::get(ArgTy, ArgTy, NULL)
+ ? (Type*)StructType::get(ArgTy, ArgTy, nullptr)
: (Type*)VectorType::get(ArgTy, 4);
TargetLowering::CallLoweringInfo CLI(DAG);
@@ -16212,6 +19601,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG);
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
return LowerCMP_SWAP(Op, Subtarget, DAG);
+ case ISD::CTPOP: return LowerCTPOP(Op, Subtarget, DAG);
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
@@ -16240,8 +19630,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
- case ISD::FABS: return LowerFABS(Op, DAG);
- case ISD::FNEG: return LowerFNEG(Op, DAG);
+ case ISD::LOAD: return LowerExtendedLoad(Op, Subtarget, DAG);
+ case ISD::FABS:
+ case ISD::FNEG: return LowerFABSorFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
@@ -16251,7 +19642,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::VAARG: return LowerVAARG(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG);
- case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, Subtarget, DAG);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
@@ -16292,29 +19683,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
}
-static void ReplaceATOMIC_LOAD(SDNode *Node,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) {
- SDLoc dl(Node);
- EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
-
- // Convert wide load -> cmpxchg8b/cmpxchg16b
- // FIXME: On 32-bit, load -> fild or movq would be more efficient
- // (The only way to get a 16-byte load is cmpxchg16b)
- // FIXME: 16-byte ATOMIC_CMP_SWAP isn't actually hooked up at the moment.
- SDValue Zero = DAG.getConstant(0, VT);
- SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
- SDValue Swap =
- DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, VT, VTs,
- Node->getOperand(0), Node->getOperand(1), Zero, Zero,
- cast<AtomicSDNode>(Node)->getMemOperand(),
- cast<AtomicSDNode>(Node)->getOrdering(),
- cast<AtomicSDNode>(Node)->getOrdering(),
- cast<AtomicSDNode>(Node)->getSynchScope());
- Results.push_back(Swap.getValue(0));
- Results.push_back(Swap.getValue(2));
-}
-
/// ReplaceNodeResults - Replace a node with an illegal result type
/// with a new node built out of custom code.
void X86TargetLowering::ReplaceNodeResults(SDNode *N,
@@ -16325,6 +19693,22 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
switch (N->getOpcode()) {
default:
llvm_unreachable("Do not know how to custom type legalize this operation!");
+ // We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32.
+ case X86ISD::FMINC:
+ case X86ISD::FMIN:
+ case X86ISD::FMAXC:
+ case X86ISD::FMAX: {
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v2f32)
+ llvm_unreachable("Unexpected type (!= v2f32) on FMIN/FMAX.");
+ SDValue UNDEF = DAG.getUNDEF(VT);
+ SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
+ N->getOperand(0), UNDEF);
+ SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
+ N->getOperand(1), UNDEF);
+ Results.push_back(DAG.getNode(N->getOpcode(), dl, MVT::v4f32, LHS, RHS));
+ return;
+ }
case ISD::SIGN_EXTEND_INREG:
case ISD::ADDC:
case ISD::ADDE:
@@ -16473,12 +19857,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD: {
// Delegate to generic TypeLegalization. Situations we can really handle
- // should have already been dealt with by X86AtomicExpand.cpp.
+ // should have already been dealt with by AtomicExpandPass.cpp.
break;
- case ISD::ATOMIC_LOAD: {
- ReplaceATOMIC_LOAD(N, Results, DAG);
- return;
}
case ISD::BITCAST: {
assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
@@ -16561,8 +19943,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
case X86ISD::ANDNP: return "X86ISD::ANDNP";
case X86ISD::PSIGN: return "X86ISD::PSIGN";
- case X86ISD::BLENDV: return "X86ISD::BLENDV";
case X86ISD::BLENDI: return "X86ISD::BLENDI";
+ case X86ISD::SHRUNKBLEND: return "X86ISD::SHRUNKBLEND";
case X86ISD::SUBUS: return "X86ISD::SUBUS";
case X86ISD::HADD: return "X86ISD::HADD";
case X86ISD::HSUB: return "X86ISD::HSUB";
@@ -16618,6 +20000,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::SBB: return "X86ISD::SBB";
case X86ISD::SMUL: return "X86ISD::SMUL";
case X86ISD::UMUL: return "X86ISD::UMUL";
+ case X86ISD::SMUL8: return "X86ISD::SMUL8";
+ case X86ISD::UMUL8: return "X86ISD::UMUL8";
+ case X86ISD::SDIVREM8_SEXT_HREG: return "X86ISD::SDIVREM8_SEXT_HREG";
+ case X86ISD::UDIVREM8_ZEXT_HREG: return "X86ISD::UDIVREM8_ZEXT_HREG";
case X86ISD::INC: return "X86ISD::INC";
case X86ISD::DEC: return "X86ISD::DEC";
case X86ISD::OR: return "X86ISD::OR";
@@ -16633,6 +20019,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PACKSS: return "X86ISD::PACKSS";
case X86ISD::PACKUS: return "X86ISD::PACKUS";
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
+ case X86ISD::VALIGN: return "X86ISD::VALIGN";
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
@@ -16652,7 +20039,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM";
case X86ISD::VEXTRACT: return "X86ISD::VEXTRACT";
- case X86ISD::VPERMILP: return "X86ISD::VPERMILP";
+ case X86ISD::VPERMILPI: return "X86ISD::VPERMILPI";
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
case X86ISD::VPERMV: return "X86ISD::VPERMV";
case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
@@ -16678,6 +20065,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI";
case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI";
case X86ISD::XTEST: return "X86ISD::XTEST";
+ case X86ISD::COMPRESS: return "X86ISD::COMPRESS";
+ case X86ISD::EXPAND: return "X86ISD::EXPAND";
+ case X86ISD::SELECT: return "X86ISD::SELECT";
}
}
@@ -16868,6 +20258,14 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
if (VT.getSizeInBits() == 64)
return false;
+ // This is an experimental legality test that is tailored to match the
+ // legality test of the experimental lowering more closely. They are gated
+ // separately to ease testing of performance differences.
+ if (ExperimentalVectorShuffleLegality)
+ // We only care that the types being shuffled are legal. The lowering can
+ // handle any possible shuffle mask that results.
+ return isTypeLegal(SVT);
+
// If this is a single-input shuffle with no 128 bit lane crossings we can
// lower it into pshufb.
if ((SVT.is128BitVector() && Subtarget->hasSSSE3()) ||
@@ -16888,9 +20286,12 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
return (SVT.getVectorNumElements() == 2 ||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isMOVLMask(M, SVT) ||
+ isCommutedMOVLMask(M, SVT) ||
isMOVHLPSMask(M, SVT) ||
isSHUFPMask(M, SVT) ||
+ isSHUFPMask(M, SVT, /* Commuted */ true) ||
isPSHUFDMask(M, SVT) ||
+ isPSHUFDMask(M, SVT, /* SecondOperand */ true) ||
isPSHUFHWMask(M, SVT, Subtarget->hasInt256()) ||
isPSHUFLWMask(M, SVT, Subtarget->hasInt256()) ||
isPALIGNRMask(M, SVT, Subtarget) ||
@@ -16898,7 +20299,8 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
isUNPCKHMask(M, SVT, Subtarget->hasInt256()) ||
isUNPCKL_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
- isBlendMask(M, SVT, Subtarget->hasSSE41(), Subtarget->hasInt256()));
+ isBlendMask(M, SVT, Subtarget->hasSSE41(), Subtarget->hasInt256()) ||
+ (Subtarget->hasSSE41() && isINSERTPSMask(M, SVT)));
}
bool
@@ -16908,6 +20310,14 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
return false;
MVT SVT = VT.getSimpleVT();
+
+ // This is an experimental legality test that is tailored to match the
+ // legality test of the experimental lowering more closely. They are gated
+ // separately to ease testing of performance differences.
+ if (ExperimentalVectorShuffleLegality)
+ // The new vector shuffle lowering is very good at managing zero-inputs.
+ return isShuffleMaskLegal(Mask, VT);
+
unsigned NumElts = SVT.getVectorNumElements();
// FIXME: This collection of masks seems suspect.
if (NumElts == 2)
@@ -16916,7 +20326,9 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
return (isMOVLMask(Mask, SVT) ||
isCommutedMOVLMask(Mask, SVT, true) ||
isSHUFPMask(Mask, SVT) ||
- isSHUFPMask(Mask, SVT, /* Commuted */ true));
+ isSHUFPMask(Mask, SVT, /* Commuted */ true) ||
+ isBlendMask(Mask, SVT, Subtarget->hasSSE41(),
+ Subtarget->hasInt256()));
}
return false;
}
@@ -17114,7 +20526,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
// Machine Information
- const TargetInstrInfo *TII = MBB->getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MBB->getParent()->getSubtarget().getInstrInfo();
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
@@ -17266,7 +20678,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
.setMemRefs(MMOBegin, MMOEnd);
// Jump to endMBB
- BuildMI(offsetMBB, DL, TII->get(X86::JMP_4))
+ BuildMI(offsetMBB, DL, TII->get(X86::JMP_1))
.addMBB(endMBB);
}
@@ -17370,7 +20782,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
XMMSaveMBB->addSuccessor(EndMBB);
// Now add the instructions.
- const TargetInstrInfo *TII = MBB->getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MBB->getParent()->getSubtarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned CountReg = MI->getOperand(0).getReg();
@@ -17380,7 +20792,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
if (!Subtarget->isTargetWin64()) {
// If %al is 0, branch around the XMM save block.
BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
- BuildMI(MBB, DL, TII->get(X86::JE_4)).addMBB(EndMBB);
+ BuildMI(MBB, DL, TII->get(X86::JE_1)).addMBB(EndMBB);
MBB->addSuccessor(EndMBB);
}
@@ -17453,7 +20865,7 @@ static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
MachineBasicBlock *
X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = BB->getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = BB->getParent()->getSubtarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// To "insert" a SELECT_CC instruction, we actually have to insert the
@@ -17479,7 +20891,8 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// If the EFLAGS register isn't dead in the terminator, then claim that it's
// live into the sink and copy blocks.
- const TargetRegisterInfo* TRI = BB->getParent()->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ BB->getParent()->getSubtarget().getRegisterInfo();
if (!MI->killsRegister(X86::EFLAGS) &&
!checkAndUpdateEFLAGSKill(MI, BB, TRI)) {
copy0MBB->addLiveIn(X86::EFLAGS);
@@ -17518,17 +20931,20 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
}
MachineBasicBlock *
-X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
- bool Is64Bit) const {
+X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
assert(MF->shouldSplitStack());
- unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
- unsigned TlsOffset = Is64Bit ? 0x70 : 0x30;
+ const bool Is64Bit = Subtarget->is64Bit();
+ const bool IsLP64 = Subtarget->isTarget64BitLP64();
+
+ const unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
+ const unsigned TlsOffset = IsLP64 ? 0x70 : Is64Bit ? 0x40 : 0x30;
// BB:
// ... [Till the alloca]
@@ -17552,14 +20968,14 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
MachineRegisterInfo &MRI = MF->getRegInfo();
const TargetRegisterClass *AddrRegClass =
- getRegClassFor(Is64Bit ? MVT::i64:MVT::i32);
+ getRegClassFor(getPointerTy());
unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass),
tmpSPVReg = MRI.createVirtualRegister(AddrRegClass),
SPLimitVReg = MRI.createVirtualRegister(AddrRegClass),
sizeVReg = MI->getOperand(1).getReg(),
- physSPReg = Is64Bit ? X86::RSP : X86::ESP;
+ physSPReg = IsLP64 || Subtarget->isTargetNaCl64() ? X86::RSP : X86::ESP;
MachineFunction::iterator MBBIter = BB;
++MBBIter;
@@ -17575,12 +20991,12 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
// Add code to the main basic block to check if the stack limit has been hit,
// and if so, jump to mallocMBB otherwise to bumpMBB.
BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
- BuildMI(BB, DL, TII->get(Is64Bit ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
+ BuildMI(BB, DL, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
.addReg(tmpSPVReg).addReg(sizeVReg);
- BuildMI(BB, DL, TII->get(Is64Bit ? X86::CMP64mr:X86::CMP32mr))
+ BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr))
.addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg)
.addReg(SPLimitVReg);
- BuildMI(BB, DL, TII->get(X86::JG_4)).addMBB(mallocMBB);
+ BuildMI(BB, DL, TII->get(X86::JG_1)).addMBB(mallocMBB);
// bumpMBB simply decreases the stack pointer, since we know the current
// stacklet has enough space.
@@ -17588,12 +21004,14 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
.addReg(SPLimitVReg);
BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
.addReg(SPLimitVReg);
- BuildMI(bumpMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB);
+ BuildMI(bumpMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB);
// Calls into a routine in libgcc to allocate more space from the heap.
- const uint32_t *RegMask =
- MF->getTarget().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
- if (Is64Bit) {
+ const uint32_t *RegMask = MF->getTarget()
+ .getSubtargetImpl()
+ ->getRegisterInfo()
+ ->getCallPreservedMask(CallingConv::C);
+ if (IsLP64) {
BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
.addReg(sizeVReg);
BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
@@ -17601,6 +21019,14 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
.addRegMask(RegMask)
.addReg(X86::RDI, RegState::Implicit)
.addReg(X86::RAX, RegState::ImplicitDefine);
+ } else if (Is64Bit) {
+ BuildMI(mallocMBB, DL, TII->get(X86::MOV32rr), X86::EDI)
+ .addReg(sizeVReg);
+ BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
+ .addExternalSymbol("__morestack_allocate_stack_space")
+ .addRegMask(RegMask)
+ .addReg(X86::EDI, RegState::Implicit)
+ .addReg(X86::EAX, RegState::ImplicitDefine);
} else {
BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
.addImm(12);
@@ -17616,8 +21042,8 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
.addImm(16);
BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg)
- .addReg(Is64Bit ? X86::RAX : X86::EAX);
- BuildMI(mallocMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB);
+ .addReg(IsLP64 ? X86::RAX : X86::EAX);
+ BuildMI(mallocMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB);
// Set up the CFG correctly.
BB->addSuccessor(bumpMBB);
@@ -17641,10 +21067,10 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
MachineBasicBlock *
X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = BB->getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = BB->getParent()->getSubtarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
- assert(!Subtarget->isTargetMacho());
+ assert(!Subtarget->isTargetMachO());
// The lowering is pretty easy: we're just emitting the call to _alloca. The
// non-trivial part is impdef of ESP.
@@ -17674,8 +21100,10 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
.addReg(X86::RAX);
}
} else {
- const char *StackProbeSymbol =
- Subtarget->isTargetKnownWindowsMSVC() ? "_chkstk" : "_alloca";
+ const char *StackProbeSymbol = (Subtarget->isTargetKnownWindowsMSVC() ||
+ Subtarget->isTargetWindowsItanium())
+ ? "_chkstk"
+ : "_alloca";
BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
.addExternalSymbol(StackProbeSymbol)
@@ -17698,8 +21126,8 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
// or EAX and doing an indirect call. The return value will then
// be in the normal return register.
MachineFunction *F = BB->getParent();
- const X86InstrInfo *TII
- = static_cast<const X86InstrInfo*>(F->getTarget().getInstrInfo());
+ const X86InstrInfo *TII =
+ static_cast<const X86InstrInfo *>(F->getSubtarget().getInstrInfo());
DebugLoc DL = MI->getDebugLoc();
assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
@@ -17708,8 +21136,10 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
// Get a register mask for the lowered call.
// FIXME: The 32-bit calls have non-standard calling conventions. Use a
// proper register mask.
- const uint32_t *RegMask =
- F->getTarget().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
+ const uint32_t *RegMask = F->getTarget()
+ .getSubtargetImpl()
+ ->getRegisterInfo()
+ ->getCallPreservedMask(CallingConv::C);
if (Subtarget->is64Bit()) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV64rm), X86::RDI)
@@ -17754,7 +21184,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
const BasicBlock *BB = MBB->getBasicBlock();
@@ -17795,6 +21225,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
// v = phi(main, restore)
//
// restoreMBB:
+ // if base pointer being used, load it from frame
// v_restore = 1
MachineBasicBlock *thisMBB = MBB;
@@ -17860,8 +21291,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
.addMBB(restoreMBB);
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(MF->getTarget().getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ MF->getSubtarget().getRegisterInfo());
MIB.addRegMask(RegInfo->getNoPreservedMask());
thisMBB->addSuccessor(mainMBB);
thisMBB->addSuccessor(restoreMBB);
@@ -17878,8 +21309,20 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
.addReg(restoreDstReg).addMBB(restoreMBB);
// restoreMBB:
+ if (RegInfo->hasBasePointer(*MF)) {
+ const X86Subtarget &STI = MF->getTarget().getSubtarget<X86Subtarget>();
+ const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
+ X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
+ X86FI->setRestoreBasePointer(MF);
+ unsigned FramePtr = RegInfo->getFrameRegister(*MF);
+ unsigned BasePtr = RegInfo->getBaseRegister();
+ unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm;
+ addRegOffset(BuildMI(restoreMBB, DL, TII->get(Opm), BasePtr),
+ FramePtr, true, X86FI->getRestoreBasePointerOffset())
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);
- BuildMI(restoreMBB, DL, TII->get(X86::JMP_4)).addMBB(sinkMBB);
+ BuildMI(restoreMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB);
restoreMBB->addSuccessor(sinkMBB);
MI->eraseFromParent();
@@ -17891,7 +21334,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
// Memory Reference
@@ -17906,8 +21349,8 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
(PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
unsigned Tmp = MRI.createVirtualRegister(RC);
// Since FP is only updated here but NOT referenced, it's treated as GPR.
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(MF->getTarget().getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ MF->getSubtarget().getRegisterInfo());
unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
unsigned SP = RegInfo->getStackRegister();
@@ -17951,7 +21394,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
// Replace 213-type (isel default) FMA3 instructions with 231-type for
// accumulator loops. Writing back to the accumulator allows the coalescer
-// to remove extra copies in the loop.
+// to remove extra copies in the loop.
MachineBasicBlock *
X86TargetLowering::emitFMA3Instr(MachineInstr *MI,
MachineBasicBlock *MBB) const {
@@ -18006,6 +21449,11 @@ X86TargetLowering::emitFMA3Instr(MachineInstr *MI,
case X86::VFNMSUBPSr213r: NewFMAOpc = X86::VFNMSUBPSr231r; break;
case X86::VFNMSUBSDr213r: NewFMAOpc = X86::VFNMSUBSDr231r; break;
case X86::VFNMSUBSSr213r: NewFMAOpc = X86::VFNMSUBSSr231r; break;
+ case X86::VFMADDSUBPDr213r: NewFMAOpc = X86::VFMADDSUBPDr231r; break;
+ case X86::VFMADDSUBPSr213r: NewFMAOpc = X86::VFMADDSUBPSr231r; break;
+ case X86::VFMSUBADDPDr213r: NewFMAOpc = X86::VFMSUBADDPDr231r; break;
+ case X86::VFMSUBADDPSr213r: NewFMAOpc = X86::VFMSUBADDPSr231r; break;
+
case X86::VFMADDPDr213rY: NewFMAOpc = X86::VFMADDPDr231rY; break;
case X86::VFMADDPSr213rY: NewFMAOpc = X86::VFMADDPSr231rY; break;
case X86::VFMSUBPDr213rY: NewFMAOpc = X86::VFMSUBPDr231rY; break;
@@ -18014,10 +21462,14 @@ X86TargetLowering::emitFMA3Instr(MachineInstr *MI,
case X86::VFNMADDPSr213rY: NewFMAOpc = X86::VFNMADDPSr231rY; break;
case X86::VFNMSUBPDr213rY: NewFMAOpc = X86::VFNMSUBPDr231rY; break;
case X86::VFNMSUBPSr213rY: NewFMAOpc = X86::VFNMSUBPSr231rY; break;
+ case X86::VFMADDSUBPDr213rY: NewFMAOpc = X86::VFMADDSUBPDr231rY; break;
+ case X86::VFMADDSUBPSr213rY: NewFMAOpc = X86::VFMADDSUBPSr231rY; break;
+ case X86::VFMSUBADDPDr213rY: NewFMAOpc = X86::VFMSUBADDPDr231rY; break;
+ case X86::VFMSUBADDPSr213rY: NewFMAOpc = X86::VFMSUBADDPSr231rY; break;
default: llvm_unreachable("Unrecognized FMA variant.");
}
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineInstrBuilder MIB =
BuildMI(MF, MI->getDebugLoc(), TII.get(NewFMAOpc))
.addOperand(MI->getOperand(0))
@@ -18048,9 +21500,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::WIN_ALLOCA:
return EmitLoweredWinAlloca(MI, BB);
case X86::SEG_ALLOCA_32:
- return EmitLoweredSegAlloca(MI, BB, false);
case X86::SEG_ALLOCA_64:
- return EmitLoweredSegAlloca(MI, BB, true);
+ return EmitLoweredSegAlloca(MI, BB);
case X86::TLSCall_32:
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
@@ -18083,7 +21534,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::FP80_TO_INT32_IN_MEM:
case X86::FP80_TO_INT64_IN_MEM: {
MachineFunction *F = BB->getParent();
- const TargetInstrInfo *TII = F->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = F->getSubtarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// Change the floating point control register to use "round towards zero"
@@ -18167,7 +21618,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::VPCMPESTRM128MEM:
assert(Subtarget->hasSSE42() &&
"Target must have SSE4.2 or AVX features enabled");
- return EmitPCMPSTRM(MI, BB, BB->getParent()->getTarget().getInstrInfo());
+ return EmitPCMPSTRM(MI, BB, BB->getParent()->getSubtarget().getInstrInfo());
// String/text processing lowering.
case X86::PCMPISTRIREG:
@@ -18180,15 +21631,16 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::VPCMPESTRIMEM:
assert(Subtarget->hasSSE42() &&
"Target must have SSE4.2 or AVX features enabled");
- return EmitPCMPSTRI(MI, BB, BB->getParent()->getTarget().getInstrInfo());
+ return EmitPCMPSTRI(MI, BB, BB->getParent()->getSubtarget().getInstrInfo());
// Thread synchronization.
case X86::MONITOR:
- return EmitMonitor(MI, BB, BB->getParent()->getTarget().getInstrInfo(), Subtarget);
+ return EmitMonitor(MI, BB, BB->getParent()->getSubtarget().getInstrInfo(),
+ Subtarget);
// xbegin
case X86::XBEGIN:
- return EmitXBegin(MI, BB, BB->getParent()->getTarget().getInstrInfo());
+ return EmitXBegin(MI, BB, BB->getParent()->getSubtarget().getInstrInfo());
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
@@ -18204,6 +21656,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::EH_SjLj_LongJmp64:
return emitEHSjLjLongJmp(MI, BB);
+ case TargetOpcode::STATEPOINT:
+ // As an implementation detail, STATEPOINT shares the STACKMAP format at
+ // this point in the process. We diverge later.
+ return emitPatchPoint(MI, BB);
+
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
@@ -18224,6 +21681,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::VFNMSUBPSr213r:
case X86::VFNMSUBSDr213r:
case X86::VFNMSUBSSr213r:
+ case X86::VFMADDSUBPDr213r:
+ case X86::VFMADDSUBPSr213r:
+ case X86::VFMSUBADDPDr213r:
+ case X86::VFMSUBADDPSr213r:
case X86::VFMADDPDr213rY:
case X86::VFMADDPSr213rY:
case X86::VFMSUBPDr213rY:
@@ -18232,6 +21693,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::VFNMADDPSr213rY:
case X86::VFNMSUBPDr213rY:
case X86::VFNMSUBPSr213rY:
+ case X86::VFMADDSUBPDr213rY:
+ case X86::VFMADDSUBPSr213rY:
+ case X86::VFMSUBADDPDr213rY:
+ case X86::VFMSUBADDPSr213rY:
return emitFMA3Instr(MI, BB);
}
}
@@ -18461,6 +21926,329 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// \brief Combine an arbitrary chain of shuffles into a single instruction if
+/// possible.
+///
+/// This is the leaf of the recursive combinine below. When we have found some
+/// chain of single-use x86 shuffle instructions and accumulated the combined
+/// shuffle mask represented by them, this will try to pattern match that mask
+/// into either a single instruction if there is a special purpose instruction
+/// for this operation, or into a PSHUFB instruction which is a fully general
+/// instruction but should only be used to replace chains over a certain depth.
+static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
+ int Depth, bool HasPSHUFB, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ assert(!Mask.empty() && "Cannot combine an empty shuffle mask!");
+
+ // Find the operand that enters the chain. Note that multiple uses are OK
+ // here, we're not going to remove the operand we find.
+ SDValue Input = Op.getOperand(0);
+ while (Input.getOpcode() == ISD::BITCAST)
+ Input = Input.getOperand(0);
+
+ MVT VT = Input.getSimpleValueType();
+ MVT RootVT = Root.getSimpleValueType();
+ SDLoc DL(Root);
+
+ // Just remove no-op shuffle masks.
+ if (Mask.size() == 1) {
+ DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Input),
+ /*AddTo*/ true);
+ return true;
+ }
+
+ // Use the float domain if the operand type is a floating point type.
+ bool FloatDomain = VT.isFloatingPoint();
+
+ // For floating point shuffles, we don't have free copies in the shuffle
+ // instructions or the ability to load as part of the instruction, so
+ // canonicalize their shuffles to UNPCK or MOV variants.
+ //
+ // Note that even with AVX we prefer the PSHUFD form of shuffle for integer
+ // vectors because it can have a load folded into it that UNPCK cannot. This
+ // doesn't preclude something switching to the shorter encoding post-RA.
+ if (FloatDomain) {
+ if (Mask.equals(0, 0) || Mask.equals(1, 1)) {
+ bool Lo = Mask.equals(0, 0);
+ unsigned Shuffle;
+ MVT ShuffleVT;
+ // Check if we have SSE3 which will let us use MOVDDUP. That instruction
+ // is no slower than UNPCKLPD but has the option to fold the input operand
+ // into even an unaligned memory load.
+ if (Lo && Subtarget->hasSSE3()) {
+ Shuffle = X86ISD::MOVDDUP;
+ ShuffleVT = MVT::v2f64;
+ } else {
+ // We have MOVLHPS and MOVHLPS throughout SSE and they encode smaller
+ // than the UNPCK variants.
+ Shuffle = Lo ? X86ISD::MOVLHPS : X86ISD::MOVHLPS;
+ ShuffleVT = MVT::v4f32;
+ }
+ if (Depth == 1 && Root->getOpcode() == Shuffle)
+ return false; // Nothing to do!
+ Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ DCI.AddToWorklist(Op.getNode());
+ if (Shuffle == X86ISD::MOVDDUP)
+ Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op);
+ else
+ Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ /*AddTo*/ true);
+ return true;
+ }
+ if (Subtarget->hasSSE3() &&
+ (Mask.equals(0, 0, 2, 2) || Mask.equals(1, 1, 3, 3))) {
+ bool Lo = Mask.equals(0, 0, 2, 2);
+ unsigned Shuffle = Lo ? X86ISD::MOVSLDUP : X86ISD::MOVSHDUP;
+ MVT ShuffleVT = MVT::v4f32;
+ if (Depth == 1 && Root->getOpcode() == Shuffle)
+ return false; // Nothing to do!
+ Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ DCI.AddToWorklist(Op.getNode());
+ Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op);
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ /*AddTo*/ true);
+ return true;
+ }
+ if (Mask.equals(0, 0, 1, 1) || Mask.equals(2, 2, 3, 3)) {
+ bool Lo = Mask.equals(0, 0, 1, 1);
+ unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
+ MVT ShuffleVT = MVT::v4f32;
+ if (Depth == 1 && Root->getOpcode() == Shuffle)
+ return false; // Nothing to do!
+ Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ DCI.AddToWorklist(Op.getNode());
+ Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ /*AddTo*/ true);
+ return true;
+ }
+ }
+
+ // We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK
+ // variants as none of these have single-instruction variants that are
+ // superior to the UNPCK formulation.
+ if (!FloatDomain &&
+ (Mask.equals(0, 0, 1, 1, 2, 2, 3, 3) ||
+ Mask.equals(4, 4, 5, 5, 6, 6, 7, 7) ||
+ Mask.equals(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7) ||
+ Mask.equals(8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15,
+ 15))) {
+ bool Lo = Mask[0] == 0;
+ unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
+ if (Depth == 1 && Root->getOpcode() == Shuffle)
+ return false; // Nothing to do!
+ MVT ShuffleVT;
+ switch (Mask.size()) {
+ case 8:
+ ShuffleVT = MVT::v8i16;
+ break;
+ case 16:
+ ShuffleVT = MVT::v16i8;
+ break;
+ default:
+ llvm_unreachable("Impossible mask size!");
+ };
+ Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ DCI.AddToWorklist(Op.getNode());
+ Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ /*AddTo*/ true);
+ return true;
+ }
+
+ // Don't try to re-form single instruction chains under any circumstances now
+ // that we've done encoding canonicalization for them.
+ if (Depth < 2)
+ return false;
+
+ // If we have 3 or more shuffle instructions or a chain involving PSHUFB, we
+ // can replace them with a single PSHUFB instruction profitably. Intel's
+ // manuals suggest only using PSHUFB if doing so replacing 5 instructions, but
+ // in practice PSHUFB tends to be *very* fast so we're more aggressive.
+ if ((Depth >= 3 || HasPSHUFB) && Subtarget->hasSSSE3()) {
+ SmallVector<SDValue, 16> PSHUFBMask;
+ assert(Mask.size() <= 16 && "Can't shuffle elements smaller than bytes!");
+ int Ratio = 16 / Mask.size();
+ for (unsigned i = 0; i < 16; ++i) {
+ if (Mask[i / Ratio] == SM_SentinelUndef) {
+ PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8));
+ continue;
+ }
+ int M = Mask[i / Ratio] != SM_SentinelZero
+ ? Ratio * Mask[i / Ratio] + i % Ratio
+ : 255;
+ PSHUFBMask.push_back(DAG.getConstant(M, MVT::i8));
+ }
+ Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Input);
+ DCI.AddToWorklist(Op.getNode());
+ SDValue PSHUFBMaskOp =
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, PSHUFBMask);
+ DCI.AddToWorklist(PSHUFBMaskOp.getNode());
+ Op = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, Op, PSHUFBMaskOp);
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ /*AddTo*/ true);
+ return true;
+ }
+
+ // Failed to find any combines.
+ return false;
+}
+
+/// \brief Fully generic combining of x86 shuffle instructions.
+///
+/// This should be the last combine run over the x86 shuffle instructions. Once
+/// they have been fully optimized, this will recursively consider all chains
+/// of single-use shuffle instructions, build a generic model of the cumulative
+/// shuffle operation, and check for simpler instructions which implement this
+/// operation. We use this primarily for two purposes:
+///
+/// 1) Collapse generic shuffles to specialized single instructions when
+/// equivalent. In most cases, this is just an encoding size win, but
+/// sometimes we will collapse multiple generic shuffles into a single
+/// special-purpose shuffle.
+/// 2) Look for sequences of shuffle instructions with 3 or more total
+/// instructions, and replace them with the slightly more expensive SSSE3
+/// PSHUFB instruction if available. We do this as the last combining step
+/// to ensure we avoid using PSHUFB if we can implement the shuffle with
+/// a suitable short sequence of other instructions. The PHUFB will either
+/// use a register or have to read from memory and so is slightly (but only
+/// slightly) more expensive than the other shuffle instructions.
+///
+/// Because this is inherently a quadratic operation (for each shuffle in
+/// a chain, we recurse up the chain), the depth is limited to 8 instructions.
+/// This should never be an issue in practice as the shuffle lowering doesn't
+/// produce sequences of more than 8 instructions.
+///
+/// FIXME: We will currently miss some cases where the redundant shuffling
+/// would simplify under the threshold for PSHUFB formation because of
+/// combine-ordering. To fix this, we should do the redundant instruction
+/// combining in this recursive walk.
+static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
+ ArrayRef<int> RootMask,
+ int Depth, bool HasPSHUFB,
+ SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ // Bound the depth of our recursive combine because this is ultimately
+ // quadratic in nature.
+ if (Depth > 8)
+ return false;
+
+ // Directly rip through bitcasts to find the underlying operand.
+ while (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).hasOneUse())
+ Op = Op.getOperand(0);
+
+ MVT VT = Op.getSimpleValueType();
+ if (!VT.isVector())
+ return false; // Bail if we hit a non-vector.
+ // FIXME: This routine should be taught about 256-bit shuffles, or a 256-bit
+ // version should be added.
+ if (VT.getSizeInBits() != 128)
+ return false;
+
+ assert(Root.getSimpleValueType().isVector() &&
+ "Shuffles operate on vector types!");
+ assert(VT.getSizeInBits() == Root.getSimpleValueType().getSizeInBits() &&
+ "Can only combine shuffles of the same vector register size.");
+
+ if (!isTargetShuffle(Op.getOpcode()))
+ return false;
+ SmallVector<int, 16> OpMask;
+ bool IsUnary;
+ bool HaveMask = getTargetShuffleMask(Op.getNode(), VT, OpMask, IsUnary);
+ // We only can combine unary shuffles which we can decode the mask for.
+ if (!HaveMask || !IsUnary)
+ return false;
+
+ assert(VT.getVectorNumElements() == OpMask.size() &&
+ "Different mask size from vector size!");
+ assert(((RootMask.size() > OpMask.size() &&
+ RootMask.size() % OpMask.size() == 0) ||
+ (OpMask.size() > RootMask.size() &&
+ OpMask.size() % RootMask.size() == 0) ||
+ OpMask.size() == RootMask.size()) &&
+ "The smaller number of elements must divide the larger.");
+ int RootRatio = std::max<int>(1, OpMask.size() / RootMask.size());
+ int OpRatio = std::max<int>(1, RootMask.size() / OpMask.size());
+ assert(((RootRatio == 1 && OpRatio == 1) ||
+ (RootRatio == 1) != (OpRatio == 1)) &&
+ "Must not have a ratio for both incoming and op masks!");
+
+ SmallVector<int, 16> Mask;
+ Mask.reserve(std::max(OpMask.size(), RootMask.size()));
+
+ // Merge this shuffle operation's mask into our accumulated mask. Note that
+ // this shuffle's mask will be the first applied to the input, followed by the
+ // root mask to get us all the way to the root value arrangement. The reason
+ // for this order is that we are recursing up the operation chain.
+ for (int i = 0, e = std::max(OpMask.size(), RootMask.size()); i < e; ++i) {
+ int RootIdx = i / RootRatio;
+ if (RootMask[RootIdx] < 0) {
+ // This is a zero or undef lane, we're done.
+ Mask.push_back(RootMask[RootIdx]);
+ continue;
+ }
+
+ int RootMaskedIdx = RootMask[RootIdx] * RootRatio + i % RootRatio;
+ int OpIdx = RootMaskedIdx / OpRatio;
+ if (OpMask[OpIdx] < 0) {
+ // The incoming lanes are zero or undef, it doesn't matter which ones we
+ // are using.
+ Mask.push_back(OpMask[OpIdx]);
+ continue;
+ }
+
+ // Ok, we have non-zero lanes, map them through.
+ Mask.push_back(OpMask[OpIdx] * OpRatio +
+ RootMaskedIdx % OpRatio);
+ }
+
+ // See if we can recurse into the operand to combine more things.
+ switch (Op.getOpcode()) {
+ case X86ISD::PSHUFB:
+ HasPSHUFB = true;
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ if (Op.getOperand(0).hasOneUse() &&
+ combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
+ HasPSHUFB, DAG, DCI, Subtarget))
+ return true;
+ break;
+
+ case X86ISD::UNPCKL:
+ case X86ISD::UNPCKH:
+ assert(Op.getOperand(0) == Op.getOperand(1) && "We only combine unary shuffles!");
+ // We can't check for single use, we have to check that this shuffle is the only user.
+ if (Op->isOnlyUserOf(Op.getOperand(0).getNode()) &&
+ combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
+ HasPSHUFB, DAG, DCI, Subtarget))
+ return true;
+ break;
+ }
+
+ // Minor canonicalization of the accumulated shuffle mask to make it easier
+ // to match below. All this does is detect masks with squential pairs of
+ // elements, and shrink them to the half-width mask. It does this in a loop
+ // so it will reduce the size of the mask to the minimal width mask which
+ // performs an equivalent shuffle.
+ SmallVector<int, 16> WidenedMask;
+ while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) {
+ Mask = std::move(WidenedMask);
+ WidenedMask.clear();
+ }
+
+ return combineX86ShuffleChain(Op, Root, Mask, Depth, HasPSHUFB, DAG, DCI,
+ Subtarget);
+}
+
/// \brief Get the PSHUF-style mask from PSHUF node.
///
/// This is a very minor wrapper around getTargetShuffleMask to easy forming v4
@@ -18493,19 +22281,23 @@ static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
/// We walk up the chain and look for a combinable shuffle, skipping over
/// shuffles that we could hoist this shuffle's transformation past without
/// altering anything.
-static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
- SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+static SDValue
+combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
+ SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
assert(N.getOpcode() == X86ISD::PSHUFD &&
"Called with something other than an x86 128-bit half shuffle!");
SDLoc DL(N);
- // Walk up a single-use chain looking for a combinable shuffle.
+ // Walk up a single-use chain looking for a combinable shuffle. Keep a stack
+ // of the shuffles in the chain so that we can form a fresh chain to replace
+ // this one.
+ SmallVector<SDValue, 8> Chain;
SDValue V = N.getOperand(0);
for (; V.hasOneUse(); V = V.getOperand(0)) {
switch (V.getOpcode()) {
default:
- return false; // Nothing combined!
+ return SDValue(); // Nothing combined!
case ISD::BITCAST:
// Skip bitcasts as we always know the type for the target specific
@@ -18521,8 +22313,9 @@ static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
// dword shuffle, and the high words are self-contained.
if (Mask[0] != 0 || Mask[1] != 1 ||
!(Mask[2] >= 2 && Mask[2] < 4 && Mask[3] >= 2 && Mask[3] < 4))
- return false;
+ return SDValue();
+ Chain.push_back(V);
continue;
case X86ISD::PSHUFHW:
@@ -18530,8 +22323,9 @@ static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
// dword shuffle, and the low words are self-contained.
if (Mask[2] != 2 || Mask[3] != 3 ||
!(Mask[0] >= 0 && Mask[0] < 2 && Mask[1] >= 0 && Mask[1] < 2))
- return false;
+ return SDValue();
+ Chain.push_back(V);
continue;
case X86ISD::UNPCKL:
@@ -18539,25 +22333,28 @@ static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
// For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword
// shuffle into a preceding word shuffle.
if (V.getValueType() != MVT::v16i8 && V.getValueType() != MVT::v8i16)
- return false;
+ return SDValue();
// Search for a half-shuffle which we can combine with.
unsigned CombineOp =
V.getOpcode() == X86ISD::UNPCKL ? X86ISD::PSHUFLW : X86ISD::PSHUFHW;
if (V.getOperand(0) != V.getOperand(1) ||
!V->isOnlyUserOf(V.getOperand(0).getNode()))
- return false;
+ return SDValue();
+ Chain.push_back(V);
V = V.getOperand(0);
do {
switch (V.getOpcode()) {
default:
- return false; // Nothing to combine.
+ return SDValue(); // Nothing to combine.
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
if (V.getOpcode() == CombineOp)
break;
+ Chain.push_back(V);
+
// Fallthrough!
case ISD::BITCAST:
V = V.getOperand(0);
@@ -18573,10 +22370,7 @@ static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
if (!V.hasOneUse())
// We fell out of the loop without finding a viable combining instruction.
- return false;
-
- // Record the old value to use in RAUW-ing.
- SDValue Old = V;
+ return SDValue();
// Merge this node's mask and our incoming mask.
SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
@@ -18585,20 +22379,34 @@ static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0),
getV4X86ShuffleImm8ForMask(Mask, DAG));
- // It is possible that one of the combinable shuffles was completely absorbed
- // by the other, just replace it and revisit all users in that case.
- if (Old.getNode() == V.getNode()) {
- DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo=*/true);
- return true;
- }
+ // Rebuild the chain around this new shuffle.
+ while (!Chain.empty()) {
+ SDValue W = Chain.pop_back_val();
- // Replace N with its operand as we're going to combine that shuffle away.
- DAG.ReplaceAllUsesWith(N, N.getOperand(0));
+ if (V.getValueType() != W.getOperand(0).getValueType())
+ V = DAG.getNode(ISD::BITCAST, DL, W.getOperand(0).getValueType(), V);
- // Replace the combinable shuffle with the combined one, updating all users
- // so that we re-evaluate the chain here.
- DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);
- return true;
+ switch (W.getOpcode()) {
+ default:
+ llvm_unreachable("Only PSHUF and UNPCK instructions get here!");
+
+ case X86ISD::UNPCKL:
+ case X86ISD::UNPCKH:
+ V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, V);
+ break;
+
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFLW:
+ case X86ISD::PSHUFHW:
+ V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, W.getOperand(1));
+ break;
+ }
+ }
+ if (V.getValueType() != N.getValueType())
+ V = DAG.getNode(ISD::BITCAST, DL, N.getValueType(), V);
+
+ // Return the new chain to replace N.
+ return V;
}
/// \brief Search for a combinable shuffle across a chain ending in pshuflw or pshufhw.
@@ -18634,26 +22442,6 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask,
// Other-half shuffles are no-ops.
continue;
-
- case X86ISD::PSHUFD: {
- // We can only handle pshufd if the half we are combining either stays in
- // its half, or switches to the other half. Bail if one of these isn't
- // true.
- SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
- int DOffset = CombineOpcode == X86ISD::PSHUFLW ? 0 : 2;
- if (!((VMask[DOffset + 0] < 2 && VMask[DOffset + 1] < 2) ||
- (VMask[DOffset + 0] >= 2 && VMask[DOffset + 1] >= 2)))
- return false;
-
- // Map the mask through the pshufd and keep walking up the chain.
- for (int i = 0; i < 4; ++i)
- Mask[i] = 2 * (VMask[DOffset + Mask[i] / 2] % 2) + Mask[i] % 2;
-
- // Switch halves if the pshufd does.
- CombineOpcode =
- VMask[DOffset + Mask[0] / 2] < 2 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW;
- continue;
- }
}
// Break out of the loop if we break out of the switch.
break;
@@ -18663,7 +22451,11 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask,
// We fell out of the loop without finding a viable combining instruction.
return false;
- // Record the old value to use in RAUW-ing.
+ // Combine away the bottom node as its shuffle will be accumulated into
+ // a preceding shuffle.
+ DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo*/ true);
+
+ // Record the old value.
SDValue Old = V;
// Merge this node's mask and our incoming mask (adjusted to account for all
@@ -18674,12 +22466,13 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask,
V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0),
getV4X86ShuffleImm8ForMask(Mask, DAG));
- // Replace N with its operand as we're going to combine that shuffle away.
- DAG.ReplaceAllUsesWith(N, N.getOperand(0));
+ // Check that the shuffles didn't cancel each other out. If not, we need to
+ // combine to the new one.
+ if (Old != V)
+ // Replace the combinable shuffle with the combined one, updating all users
+ // so that we re-evaluate the chain here.
+ DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);
- // Replace the combinable shuffle with the combined one, updating all users
- // so that we re-evaluate the chain here.
- DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);
return true;
}
@@ -18720,13 +22513,13 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
return SDValue(); // We combined away this shuffle, so we're done.
// See if this reduces to a PSHUFD which is no more expensive and can
- // combine with more operations.
- if (Mask[0] % 2 == 0 && Mask[2] % 2 == 0 &&
- areAdjacentMasksSequential(Mask)) {
- int DMask[] = {-1, -1, -1, -1};
+ // combine with more operations. Note that it has to at least flip the
+ // dwords as otherwise it would have been removed as a no-op.
+ if (Mask[0] == 2 && Mask[1] == 3 && Mask[2] == 0 && Mask[3] == 1) {
+ int DMask[] = {0, 1, 2, 3};
int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
- DMask[DOffset + 0] = DOffset + Mask[0] / 2;
- DMask[DOffset + 1] = DOffset + Mask[2] / 2;
+ DMask[DOffset + 0] = DOffset + 1;
+ DMask[DOffset + 1] = DOffset + 0;
V = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V);
DCI.AddToWorklist(V.getNode());
V = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V,
@@ -18779,8 +22572,8 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
break;
case X86ISD::PSHUFD:
- if (combineRedundantDWordShuffle(N, Mask, DAG, DCI))
- return SDValue(); // We combined away this shuffle.
+ if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DAG, DCI))
+ return NewN;
break;
}
@@ -18788,6 +22581,61 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
return SDValue();
}
+/// \brief Try to combine a shuffle into a target-specific add-sub node.
+///
+/// We combine this directly on the abstract vector shuffle nodes so it is
+/// easier to generically match. We also insert dummy vector shuffle nodes for
+/// the operands which explicitly discard the lanes which are unused by this
+/// operation to try to flow through the rest of the combiner the fact that
+/// they're unused.
+static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ // We only handle target-independent shuffles.
+ // FIXME: It would be easy and harmless to use the target shuffle mask
+ // extraction tool to support more.
+ if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
+ return SDValue();
+
+ auto *SVN = cast<ShuffleVectorSDNode>(N);
+ ArrayRef<int> Mask = SVN->getMask();
+ SDValue V1 = N->getOperand(0);
+ SDValue V2 = N->getOperand(1);
+
+ // We require the first shuffle operand to be the SUB node, and the second to
+ // be the ADD node.
+ // FIXME: We should support the commuted patterns.
+ if (V1->getOpcode() != ISD::FSUB || V2->getOpcode() != ISD::FADD)
+ return SDValue();
+
+ // If there are other uses of these operations we can't fold them.
+ if (!V1->hasOneUse() || !V2->hasOneUse())
+ return SDValue();
+
+ // Ensure that both operations have the same operands. Note that we can
+ // commute the FADD operands.
+ SDValue LHS = V1->getOperand(0), RHS = V1->getOperand(1);
+ if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) &&
+ (V2->getOperand(0) != RHS || V2->getOperand(1) != LHS))
+ return SDValue();
+
+ // We're looking for blends between FADD and FSUB nodes. We insist on these
+ // nodes being lined up in a specific expected pattern.
+ if (!(isShuffleEquivalent(Mask, 0, 3) ||
+ isShuffleEquivalent(Mask, 0, 5, 2, 7) ||
+ isShuffleEquivalent(Mask, 0, 9, 2, 11, 4, 13, 6, 15)))
+ return SDValue();
+
+ // Only specific types are legal at this point, assert so we notice if and
+ // when these change.
+ assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v8f32 ||
+ VT == MVT::v4f64) &&
+ "Unknown vector type encountered!");
+
+ return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS);
+}
+
/// PerformShuffleCombine - Performs several different shuffle combines.
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -18797,54 +22645,17 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
- // Canonicalize shuffles that perform 'addsub' on packed float vectors
- // according to the rule:
- // (shuffle (FADD A, B), (FSUB A, B), Mask) ->
- // (shuffle (FSUB A, -B), (FADD A, -B), Mask)
- //
- // Where 'Mask' is:
- // <0,5,2,7> -- for v4f32 and v4f64 shuffles;
- // <0,3> -- for v2f64 shuffles;
- // <0,9,2,11,4,13,6,15> -- for v8f32 shuffles.
- //
- // This helps pattern-matching more SSE3/AVX ADDSUB instructions
- // during ISel stage.
- if (N->getOpcode() == ISD::VECTOR_SHUFFLE &&
- ((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
- (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
- N0->getOpcode() == ISD::FADD && N1->getOpcode() == ISD::FSUB &&
- // Operands to the FADD and FSUB must be the same.
- ((N0->getOperand(0) == N1->getOperand(0) &&
- N0->getOperand(1) == N1->getOperand(1)) ||
- // FADD is commutable. See if by commuting the operands of the FADD
- // we would still be able to match the operands of the FSUB dag node.
- (N0->getOperand(1) == N1->getOperand(0) &&
- N0->getOperand(0) == N1->getOperand(1))) &&
- N0->getOperand(0)->getOpcode() != ISD::UNDEF &&
- N0->getOperand(1)->getOpcode() != ISD::UNDEF) {
-
- ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
- unsigned NumElts = VT.getVectorNumElements();
- ArrayRef<int> Mask = SV->getMask();
- bool CanFold = true;
-
- for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i)
- CanFold = Mask[i] == (int)((i & 1) ? i + NumElts : i);
-
- if (CanFold) {
- SDValue Op0 = N1->getOperand(0);
- SDValue Op1 = DAG.getNode(ISD::FNEG, dl, VT, N1->getOperand(1));
- SDValue Sub = DAG.getNode(ISD::FSUB, dl, VT, Op0, Op1);
- SDValue Add = DAG.getNode(ISD::FADD, dl, VT, Op0, Op1);
- return DAG.getVectorShuffle(VT, dl, Sub, Add, Mask);
- }
- }
-
// Don't create instructions with illegal types after legalize types has run.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
return SDValue();
+ // If we have legalized the vector types, look for blends of FADD and FSUB
+ // nodes that we can fuse into an ADDSUB node.
+ if (TLI.isTypeLegal(VT) && Subtarget->hasSSE3())
+ if (SDValue AddSub = combineShuffleToAddSub(N, DAG))
+ return AddSub;
+
// Combine 256-bit vector shuffles. This is only profitable when in AVX mode
if (Subtarget->hasFp256() && VT.is256BitVector() &&
N->getOpcode() == ISD::VECTOR_SHUFFLE)
@@ -18869,7 +22680,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
EVT SVT = BC0.getValueType();
unsigned Opcode = BC0.getOpcode();
unsigned NumElts = VT.getVectorNumElements();
-
+
if (BC0.hasOneUse() && SVT.isVector() &&
SVT.getVectorNumElements() * 2 == NumElts &&
TLI.isOperationLegal(Opcode, VT)) {
@@ -18921,6 +22732,18 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
PerformTargetShuffleCombine(SDValue(N, 0), DAG, DCI, Subtarget);
if (Shuffle.getNode())
return Shuffle;
+
+ // Try recursively combining arbitrary sequences of x86 shuffle
+ // instructions into higher-order shuffles. We do this after combining
+ // specific PSHUF instruction sequences into their minimal form so that we
+ // can evaluate how many specialized shuffle instructions are involved in
+ // a particular chain.
+ SmallVector<int, 1> NonceMask; // Just a placeholder.
+ NonceMask.push_back(0);
+ if (combineX86ShufflesRecursively(SDValue(N, 0), SDValue(N, 0), NonceMask,
+ /*Depth*/ 1, /*HasPSHUFB*/ false, DAG,
+ DCI, Subtarget))
+ return SDValue(); // This routine will use CombineTo to replace N.
}
return SDValue();
@@ -18938,7 +22761,7 @@ static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
/// XFormVExtractWithShuffleIntoLoad - Check if a vector extract from a target
/// specific shuffle of a load can be folded into a single element load.
/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
-/// shuffles have been customed lowered so we need to handle those here.
+/// shuffles have been custom lowered so we need to handle those here.
static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
if (DCI.isBeforeLegalizeOps())
@@ -18950,20 +22773,20 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
if (!isa<ConstantSDNode>(EltNo))
return SDValue();
- EVT VT = InVec.getValueType();
+ EVT OriginalVT = InVec.getValueType();
- bool HasShuffleIntoBitcast = false;
if (InVec.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses.
if (!InVec.hasOneUse())
return SDValue();
EVT BCVT = InVec.getOperand(0).getValueType();
- if (BCVT.getVectorNumElements() != VT.getVectorNumElements())
+ if (BCVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
return SDValue();
InVec = InVec.getOperand(0);
- HasShuffleIntoBitcast = true;
}
+ EVT CurrentVT = InVec.getValueType();
+
if (!isTargetShuffle(InVec.getOpcode()))
return SDValue();
@@ -18973,12 +22796,12 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
SmallVector<int, 16> ShuffleMask;
bool UnaryShuffle;
- if (!getTargetShuffleMask(InVec.getNode(), VT.getSimpleVT(), ShuffleMask,
- UnaryShuffle))
+ if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(),
+ ShuffleMask, UnaryShuffle))
return SDValue();
// Select the input vector, guarding against out of range extract vector.
- unsigned NumElems = VT.getVectorNumElements();
+ unsigned NumElems = CurrentVT.getVectorNumElements();
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt];
SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0)
@@ -19004,35 +22827,37 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
return SDValue();
- if (HasShuffleIntoBitcast) {
- // If there's a bitcast before the shuffle, check if the load type and
- // alignment is valid.
- unsigned Align = LN0->getAlignment();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned NewAlign = TLI.getDataLayout()->
- getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+ EVT EltVT = N->getValueType(0);
+ // If there's a bitcast before the shuffle, check if the load type and
+ // alignment is valid.
+ unsigned Align = LN0->getAlignment();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
+ EltVT.getTypeForEVT(*DAG.getContext()));
- if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
- return SDValue();
- }
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, EltVT))
+ return SDValue();
// All checks match so transform back to vector_shuffle so that DAG combiner
// can finish the job
SDLoc dl(N);
// Create shuffle node taking into account the case that its a unary shuffle
- SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(VT) : InVec.getOperand(1);
- Shuffle = DAG.getVectorShuffle(InVec.getValueType(), dl,
+ SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(CurrentVT)
+ : InVec.getOperand(1);
+ Shuffle = DAG.getVectorShuffle(CurrentVT, dl,
InVec.getOperand(0), Shuffle,
&ShuffleMask[0]);
- Shuffle = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+ Shuffle = DAG.getNode(ISD::BITCAST, dl, OriginalVT, Shuffle);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
EltNo);
}
/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
/// generation and convert it from being a bunch of shuffles and extracts
-/// to a simple store and scalar loads to extract the elements.
+/// into a somewhat faster sequence. For i686, the best sequence is apparently
+/// storing the value and loading scalars back, while for x64 we should
+/// use 64-bit extracts and shifts.
static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI);
@@ -19091,36 +22916,61 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
// Ok, we've now decided to do the transformation.
+ // If 64-bit shifts are legal, use the extract-shift sequence,
+ // otherwise bounce the vector off the cache.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Vals[4];
SDLoc dl(InputVector);
- // Store the value to a temporary stack slot.
- SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
- SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
- MachinePointerInfo(), false, false, 0);
+ if (TLI.isOperationLegal(ISD::SRA, MVT::i64)) {
+ SDValue Cst = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, InputVector);
+ EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy();
+ SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst,
+ DAG.getConstant(0, VecIdxTy));
+ SDValue TopHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst,
+ DAG.getConstant(1, VecIdxTy));
+
+ SDValue ShAmt = DAG.getConstant(32,
+ DAG.getTargetLoweringInfo().getShiftAmountTy(MVT::i64));
+ Vals[0] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BottomHalf);
+ Vals[1] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+ DAG.getNode(ISD::SRA, dl, MVT::i64, BottomHalf, ShAmt));
+ Vals[2] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, TopHalf);
+ Vals[3] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+ DAG.getNode(ISD::SRA, dl, MVT::i64, TopHalf, ShAmt));
+ } else {
+ // Store the value to a temporary stack slot.
+ SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
+ MachinePointerInfo(), false, false, 0);
- // Replace each use (extract) with a load of the appropriate element.
- for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
- UE = Uses.end(); UI != UE; ++UI) {
- SDNode *Extract = *UI;
+ EVT ElementType = InputVector.getValueType().getVectorElementType();
+ unsigned EltSize = ElementType.getSizeInBits() / 8;
- // cOMpute the element's address.
- SDValue Idx = Extract->getOperand(1);
- unsigned EltSize =
- InputVector.getValueType().getVectorElementType().getSizeInBits()/8;
- uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
+ // Replace each use (extract) with a load of the appropriate element.
+ for (unsigned i = 0; i < 4; ++i) {
+ uint64_t Offset = EltSize * i;
+ SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
- SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
- StackPtr, OffsetVal);
+ SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
+ StackPtr, OffsetVal);
+
+ // Load the scalar.
+ Vals[i] = DAG.getLoad(ElementType, dl, Ch,
+ ScalarAddr, MachinePointerInfo(),
+ false, false, false, 0);
+
+ }
+ }
- // Load the scalar.
- SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
- ScalarAddr, MachinePointerInfo(),
- false, false, false, 0);
+ // Replace the extracts
+ for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
+ UE = Uses.end(); UI != UE; ++UI) {
+ SDNode *Extract = *UI;
- // Replace the exact with the load.
- DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
+ SDValue Idx = Extract->getOperand(1);
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), Vals[IdxVal]);
}
// The replacement was made in place; don't return anything.
@@ -19137,6 +22987,21 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
bool NeedSplit = false;
switch (VT.getSimpleVT().SimpleTy) {
default: return std::make_pair(0, false);
+ case MVT::v4i64:
+ case MVT::v2i64:
+ if (!Subtarget->hasVLX())
+ return std::make_pair(0, false);
+ break;
+ case MVT::v64i8:
+ case MVT::v32i16:
+ if (!Subtarget->hasBWI())
+ return std::make_pair(0, false);
+ break;
+ case MVT::v16i32:
+ case MVT::v8i64:
+ if (!Subtarget->hasAVX512())
+ return std::make_pair(0, false);
+ break;
case MVT::v32i8:
case MVT::v16i16:
case MVT::v8i32:
@@ -19203,7 +23068,7 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
}
static SDValue
-TransformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG,
+transformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
SDLoc dl(N);
SDValue Cond = N->getOperand(0);
@@ -19216,25 +23081,21 @@ TransformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG,
Cond = CondSrc->getOperand(0);
}
- MVT VT = N->getSimpleValueType(0);
- MVT EltVT = VT.getVectorElementType();
- unsigned NumElems = VT.getVectorNumElements();
- // There is no blend with immediate in AVX-512.
- if (VT.is512BitVector())
- return SDValue();
-
- if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
- return SDValue();
- if (!Subtarget->hasInt256() && VT == MVT::v16i16)
+ if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
return SDValue();
- if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
+ // A vselect where all conditions and data are constants can be optimized into
+ // a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR().
+ if (ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) &&
+ ISD::isBuildVectorOfConstantSDNodes(RHS.getNode()))
return SDValue();
unsigned MaskValue = 0;
if (!BUILD_VECTORtoBlendMask(cast<BuildVectorSDNode>(Cond), MaskValue))
return SDValue();
+ MVT VT = N->getSimpleValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> ShuffleMask(NumElems, -1);
for (unsigned i = 0; i < NumElems; ++i) {
// Be sure we emit undef where we can.
@@ -19244,6 +23105,9 @@ TransformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG,
ShuffleMask[i] = i + NumElems * ((MaskValue >> i) & 1);
}
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isShuffleMaskLegal(ShuffleMask, VT))
+ return SDValue();
return DAG.getVectorShuffle(VT, dl, LHS, RHS, &ShuffleMask[0]);
}
@@ -19264,8 +23128,9 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// instructions match the semantics of the common C idiom x<y?x:y but not
// x<=y?x:y, because of how they handle negative zero (which can be
// ignored in unsafe-math mode).
+ // We also try to create v2f32 min/max nodes, which we later widen to v4f32.
if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
- VT != MVT::f80 && TLI.isTypeLegal(VT) &&
+ VT != MVT::f80 && (TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
(Subtarget->hasSSE2() ||
(Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
@@ -19408,13 +23273,15 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
if (Subtarget->hasAVX512() && VT.isVector() && CondVT.isVector() &&
CondVT.getVectorElementType() == MVT::i1) {
// v16i8 (select v16i1, v16i8, v16i8) does not have a proper
- // lowering on AVX-512. In this case we convert it to
+ // lowering on KNL. In this case we convert it to
// v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
- // The same situation for all 128 and 256-bit vectors of i8 and i16
+ // The same situation for all 128 and 256-bit vectors of i8 and i16.
+ // Since SKX these selects have a proper lowering.
EVT OpVT = LHS.getValueType();
if ((OpVT.is128BitVector() || OpVT.is256BitVector()) &&
(OpVT.getVectorElementType() == MVT::i8 ||
- OpVT.getVectorElementType() == MVT::i16)) {
+ OpVT.getVectorElementType() == MVT::i16) &&
+ !(Subtarget->hasBWI() && Subtarget->hasVLX())) {
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond);
DCI.AddToWorklist(Cond.getNode());
return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS);
@@ -19634,22 +23501,22 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
- // Simplify vector selection if the selector will be produced by CMPP*/PCMP*.
- if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
- // Check if SETCC has already been promoted
- TLI.getSetCCResultType(*DAG.getContext(), VT) == CondVT &&
- // Check that condition value type matches vselect operand type
- CondVT == VT) {
-
+ // Simplify vector selection if condition value type matches vselect
+ // operand type
+ if (N->getOpcode() == ISD::VSELECT && CondVT == VT) {
assert(Cond.getValueType().isVector() &&
"vector select expects a vector selector!");
bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
- if (!TValIsAllOnes && !FValIsAllZeros) {
- // Try invert the condition if true value is not all 1s and false value
- // is not all 0s.
+ // Try invert the condition if true value is not all 1s and false value
+ // is not all 0s.
+ if (!TValIsAllOnes && !FValIsAllZeros &&
+ // Check if the selector will be produced by CMPP*/PCMP*
+ Cond.getOpcode() == ISD::SETCC &&
+ // Check if SETCC has already been promoted
+ TLI.getSetCCResultType(*DAG.getContext(), VT) == CondVT) {
bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode());
@@ -19681,81 +23548,6 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
}
}
- // Try to fold this VSELECT into a MOVSS/MOVSD
- if (N->getOpcode() == ISD::VSELECT &&
- Cond.getOpcode() == ISD::BUILD_VECTOR && !DCI.isBeforeLegalize()) {
- if (VT == MVT::v4i32 || VT == MVT::v4f32 ||
- (Subtarget->hasSSE2() && (VT == MVT::v2i64 || VT == MVT::v2f64))) {
- bool CanFold = false;
- unsigned NumElems = Cond.getNumOperands();
- SDValue A = LHS;
- SDValue B = RHS;
-
- if (isZero(Cond.getOperand(0))) {
- CanFold = true;
-
- // fold (vselect <0,-1,-1,-1>, A, B) -> (movss A, B)
- // fold (vselect <0,-1> -> (movsd A, B)
- for (unsigned i = 1, e = NumElems; i != e && CanFold; ++i)
- CanFold = isAllOnes(Cond.getOperand(i));
- } else if (isAllOnes(Cond.getOperand(0))) {
- CanFold = true;
- std::swap(A, B);
-
- // fold (vselect <-1,0,0,0>, A, B) -> (movss B, A)
- // fold (vselect <-1,0> -> (movsd B, A)
- for (unsigned i = 1, e = NumElems; i != e && CanFold; ++i)
- CanFold = isZero(Cond.getOperand(i));
- }
-
- if (CanFold) {
- if (VT == MVT::v4i32 || VT == MVT::v4f32)
- return getTargetShuffleNode(X86ISD::MOVSS, DL, VT, A, B, DAG);
- return getTargetShuffleNode(X86ISD::MOVSD, DL, VT, A, B, DAG);
- }
-
- if (Subtarget->hasSSE2() && (VT == MVT::v4i32 || VT == MVT::v4f32)) {
- // fold (v4i32: vselect <0,0,-1,-1>, A, B) ->
- // (v4i32 (bitcast (movsd (v2i64 (bitcast A)),
- // (v2i64 (bitcast B)))))
- //
- // fold (v4f32: vselect <0,0,-1,-1>, A, B) ->
- // (v4f32 (bitcast (movsd (v2f64 (bitcast A)),
- // (v2f64 (bitcast B)))))
- //
- // fold (v4i32: vselect <-1,-1,0,0>, A, B) ->
- // (v4i32 (bitcast (movsd (v2i64 (bitcast B)),
- // (v2i64 (bitcast A)))))
- //
- // fold (v4f32: vselect <-1,-1,0,0>, A, B) ->
- // (v4f32 (bitcast (movsd (v2f64 (bitcast B)),
- // (v2f64 (bitcast A)))))
-
- CanFold = (isZero(Cond.getOperand(0)) &&
- isZero(Cond.getOperand(1)) &&
- isAllOnes(Cond.getOperand(2)) &&
- isAllOnes(Cond.getOperand(3)));
-
- if (!CanFold && isAllOnes(Cond.getOperand(0)) &&
- isAllOnes(Cond.getOperand(1)) &&
- isZero(Cond.getOperand(2)) &&
- isZero(Cond.getOperand(3))) {
- CanFold = true;
- std::swap(LHS, RHS);
- }
-
- if (CanFold) {
- EVT NVT = (VT == MVT::v4i32) ? MVT::v2i64 : MVT::v2f64;
- SDValue NewA = DAG.getNode(ISD::BITCAST, DL, NVT, LHS);
- SDValue NewB = DAG.getNode(ISD::BITCAST, DL, NVT, RHS);
- SDValue Select = getTargetShuffleNode(X86ISD::MOVSD, DL, NVT, NewA,
- NewB, DAG);
- return DAG.getNode(ISD::BITCAST, DL, VT, Select);
- }
- }
- }
- }
-
// If we know that this node is legal then we know that it is going to be
// matched by one of the SSE/AVX BLEND instructions. These instructions only
// depend on the highest bit in each word. Try to use SimplifyDemandedBits
@@ -19767,22 +23559,17 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// build_vector of constants. This will be taken care in a later
// condition.
(TLI.isOperationLegalOrCustom(ISD::VSELECT, VT) && VT != MVT::v16i16 &&
- VT != MVT::v8i16)) {
+ VT != MVT::v8i16) &&
+ // Don't optimize vector of constants. Those are handled by
+ // the generic code and all the bits must be properly set for
+ // the generic optimizer.
+ !ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) {
unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
// Don't optimize vector selects that map to mask-registers.
if (BitWidth == 1)
return SDValue();
- // Check all uses of that condition operand to check whether it will be
- // consumed by non-BLEND instructions, which may depend on all bits are set
- // properly.
- for (SDNode::use_iterator I = Cond->use_begin(),
- E = Cond->use_end(); I != E; ++I)
- if (I->getOpcode() != ISD::VSELECT)
- // TODO: Add other opcodes eventually lowered into BLEND.
- return SDValue();
-
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
@@ -19790,8 +23577,45 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
DCI.isBeforeLegalizeOps());
if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) ||
- TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, TLO))
- DCI.CommitTargetLoweringOpt(TLO);
+ TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne,
+ TLO)) {
+ // If we changed the computation somewhere in the DAG, this change
+ // will affect all users of Cond.
+ // Make sure it is fine and update all the nodes so that we do not
+ // use the generic VSELECT anymore. Otherwise, we may perform
+ // wrong optimizations as we messed up with the actual expectation
+ // for the vector boolean values.
+ if (Cond != TLO.Old) {
+ // Check all uses of that condition operand to check whether it will be
+ // consumed by non-BLEND instructions, which may depend on all bits are
+ // set properly.
+ for (SDNode::use_iterator I = Cond->use_begin(), E = Cond->use_end();
+ I != E; ++I)
+ if (I->getOpcode() != ISD::VSELECT)
+ // TODO: Add other opcodes eventually lowered into BLEND.
+ return SDValue();
+
+ // Update all the users of the condition, before committing the change,
+ // so that the VSELECT optimizations that expect the correct vector
+ // boolean value will not be triggered.
+ for (SDNode::use_iterator I = Cond->use_begin(), E = Cond->use_end();
+ I != E; ++I)
+ DAG.ReplaceAllUsesOfValueWith(
+ SDValue(*I, 0),
+ DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(*I), I->getValueType(0),
+ Cond, I->getOperand(1), I->getOperand(2)));
+ DCI.CommitTargetLoweringOpt(TLO);
+ return SDValue();
+ }
+ // At this point, only Cond is changed. Change the condition
+ // just for N to keep the opportunity to optimize all other
+ // users their own way.
+ DAG.ReplaceAllUsesOfValueWith(
+ SDValue(N, 0),
+ DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(N), N->getValueType(0),
+ TLO.New, N->getOperand(1), N->getOperand(2)));
+ return SDValue();
+ }
}
// We should generate an X86ISD::BLENDI from a vselect if its argument
@@ -19805,8 +23629,10 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Iff we find this pattern and the build_vectors are built from
// constants, we translate the vselect into a shuffle_vector that we
// know will be matched by LowerVECTOR_SHUFFLEtoBlend.
- if (N->getOpcode() == ISD::VSELECT && !DCI.isBeforeLegalize()) {
- SDValue Shuffle = TransformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget);
+ if ((N->getOpcode() == ISD::VSELECT ||
+ N->getOpcode() == X86ISD::SHRUNKBLEND) &&
+ !DCI.isBeforeLegalize()) {
+ SDValue Shuffle = transformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget);
if (Shuffle.getNode())
return Shuffle;
}
@@ -20163,7 +23989,7 @@ static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
// fold (blend A, B, allOnes) -> B
if (ISD::isBuildVectorAllOnes(Mask.getNode()))
return Op1;
-
+
// Simplify the case where the mask is a constant i32 value.
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mask)) {
if (C->isNullValue())
@@ -20871,13 +24697,13 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
EVT MemVT = Ld->getMemoryVT();
SDLoc dl(Ld);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned RegSz = RegVT.getSizeInBits();
- // On Sandybridge unaligned 256bit loads are inefficient.
+ // For chips with slow 32-byte unaligned loads, break the 32-byte operation
+ // into two 16-byte operations.
ISD::LoadExtType Ext = Ld->getExtensionType();
unsigned Alignment = Ld->getAlignment();
bool IsAligned = Alignment == 0 || Alignment >= MemVT.getSizeInBits()/8;
- if (RegVT.is256BitVector() && !Subtarget->hasInt256() &&
+ if (RegVT.is256BitVector() && Subtarget->isUnalignedMem32Slow() &&
!DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) {
unsigned NumElems = RegVT.getVectorNumElements();
if (NumElems < 2)
@@ -20907,153 +24733,6 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
return DCI.CombineTo(N, NewVec, TF, true);
}
- // If this is a vector EXT Load then attempt to optimize it using a
- // shuffle. If SSSE3 is not available we may emit an illegal shuffle but the
- // expansion is still better than scalar code.
- // We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise we'll
- // emit a shuffle and a arithmetic shift.
- // TODO: It is possible to support ZExt by zeroing the undef values
- // during the shuffle phase or after the shuffle.
- if (RegVT.isVector() && RegVT.isInteger() && Subtarget->hasSSE2() &&
- (Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD)) {
- assert(MemVT != RegVT && "Cannot extend to the same type");
- assert(MemVT.isVector() && "Must load a vector from memory");
-
- unsigned NumElems = RegVT.getVectorNumElements();
- unsigned MemSz = MemVT.getSizeInBits();
- assert(RegSz > MemSz && "Register size must be greater than the mem size");
-
- if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget->hasInt256())
- return SDValue();
-
- // All sizes must be a power of two.
- if (!isPowerOf2_32(RegSz * MemSz * NumElems))
- return SDValue();
-
- // Attempt to load the original value using scalar loads.
- // Find the largest scalar type that divides the total loaded size.
- MVT SclrLoadTy = MVT::i8;
- for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
- tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
- MVT Tp = (MVT::SimpleValueType)tp;
- if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) {
- SclrLoadTy = Tp;
- }
- }
-
- // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
- if (TLI.isTypeLegal(MVT::f64) && SclrLoadTy.getSizeInBits() < 64 &&
- (64 <= MemSz))
- SclrLoadTy = MVT::f64;
-
- // Calculate the number of scalar loads that we need to perform
- // in order to load our vector from memory.
- unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();
- if (Ext == ISD::SEXTLOAD && NumLoads > 1)
- return SDValue();
-
- unsigned loadRegZize = RegSz;
- if (Ext == ISD::SEXTLOAD && RegSz == 256)
- loadRegZize /= 2;
-
- // Represent our vector as a sequence of elements which are the
- // largest scalar that we can load.
- EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy,
- loadRegZize/SclrLoadTy.getSizeInBits());
-
- // Represent the data using the same element type that is stored in
- // memory. In practice, we ''widen'' MemVT.
- EVT WideVecVT =
- EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
- loadRegZize/MemVT.getScalarType().getSizeInBits());
-
- assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
- "Invalid vector type");
-
- // We can't shuffle using an illegal type.
- if (!TLI.isTypeLegal(WideVecVT))
- return SDValue();
-
- SmallVector<SDValue, 8> Chains;
- SDValue Ptr = Ld->getBasePtr();
- SDValue Increment = DAG.getConstant(SclrLoadTy.getSizeInBits()/8,
- TLI.getPointerTy());
- SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
-
- for (unsigned i = 0; i < NumLoads; ++i) {
- // Perform a single load.
- SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(),
- Ptr, Ld->getPointerInfo(),
- Ld->isVolatile(), Ld->isNonTemporal(),
- Ld->isInvariant(), Ld->getAlignment());
- Chains.push_back(ScalarLoad.getValue(1));
- // Create the first element type using SCALAR_TO_VECTOR in order to avoid
- // another round of DAGCombining.
- if (i == 0)
- Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad);
- else
- Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res,
- ScalarLoad, DAG.getIntPtrConstant(i));
-
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
- }
-
- SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
-
- // Bitcast the loaded value to a vector of the original element type, in
- // the size of the target vector type.
- SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res);
- unsigned SizeRatio = RegSz/MemSz;
-
- if (Ext == ISD::SEXTLOAD) {
- // If we have SSE4.1 we can directly emit a VSEXT node.
- if (Subtarget->hasSSE41()) {
- SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec);
- return DCI.CombineTo(N, Sext, TF, true);
- }
-
- // Otherwise we'll shuffle the small elements in the high bits of the
- // larger type and perform an arithmetic shift. If the shift is not legal
- // it's better to scalarize.
- if (!TLI.isOperationLegalOrCustom(ISD::SRA, RegVT))
- return SDValue();
-
- // Redistribute the loaded elements into the different locations.
- SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i*SizeRatio + SizeRatio-1] = i;
-
- SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
- DAG.getUNDEF(WideVecVT),
- &ShuffleVec[0]);
-
- Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
-
- // Build the arithmetic shift.
- unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
- MemVT.getVectorElementType().getSizeInBits();
- Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff,
- DAG.getConstant(Amt, RegVT));
-
- return DCI.CombineTo(N, Shuff, TF, true);
- }
-
- // Redistribute the loaded elements into the different locations.
- SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i*SizeRatio] = i;
-
- SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
- DAG.getUNDEF(WideVecVT),
- &ShuffleVec[0]);
-
- // Bitcast to the requested type.
- Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
- // Replace the original load with the new sequence
- // and return the new chain.
- return DCI.CombineTo(N, Shuff, TF, true);
- }
-
return SDValue();
}
@@ -21067,13 +24746,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue StoredVal = St->getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- // If we are saving a concatenation of two XMM registers, perform two stores.
- // On Sandy Bridge, 256-bit memory operations are executed by two
- // 128-bit ports. However, on Haswell it is better to issue a single 256-bit
- // memory operation.
+ // If we are saving a concatenation of two XMM registers and 32-byte stores
+ // are slow, such as on Sandy Bridge, perform two 16-byte stores.
unsigned Alignment = St->getAlignment();
bool IsAligned = Alignment == 0 || Alignment >= VT.getSizeInBits()/8;
- if (VT.is256BitVector() && !Subtarget->hasInt256() &&
+ if (VT.is256BitVector() && Subtarget->isUnalignedMem32Slow() &&
StVT == VT && !IsAligned) {
unsigned NumElems = VT.getVectorNumElements();
if (NumElems < 2)
@@ -21139,9 +24816,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
// Find the largest store unit
MVT StoreType = MVT::i8;
- for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
- tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
- MVT Tp = (MVT::SimpleValueType)tp;
+ for (MVT Tp : MVT::integer_valuetypes()) {
if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToSz)
StoreType = Tp;
}
@@ -21287,7 +24962,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// isHorizontalBinOp - Return 'true' if this vector operation is "horizontal"
+/// Return 'true' if this vector operation is "horizontal"
/// and return the operands for the horizontal operation in LHS and RHS. A
/// horizontal operation performs the binary operation on successive elements
/// of its first operand, then on successive elements of its second operand,
@@ -21413,7 +25088,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
return true;
}
-/// PerformFADDCombine - Do target-specific dag combines on floating point adds.
+/// Do target-specific dag combines on floating point adds.
static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
@@ -21428,7 +25103,7 @@ static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// PerformFSUBCombine - Do target-specific dag combines on floating point subs.
+/// Do target-specific dag combines on floating point subs.
static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
@@ -21443,8 +25118,7 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and
-/// X86ISD::FXOR nodes.
+/// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
// F[X]OR(0.0, x) -> x
@@ -21458,8 +25132,7 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-/// PerformFMinFMaxCombine - Do target-specific dag combines on X86ISD::FMIN and
-/// X86ISD::FMAX nodes.
+/// Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes.
static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX);
@@ -21480,7 +25153,7 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
N->getOperand(0), N->getOperand(1));
}
-/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
+/// Do target-specific dag combines on X86ISD::FAND nodes.
static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
// FAND(0.0, x) -> 0.0
// FAND(x, 0.0) -> 0.0
@@ -21493,7 +25166,7 @@ static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-/// PerformFANDNCombine - Do target-specific dag combines on X86ISD::FANDN nodes
+/// Do target-specific dag combines on X86ISD::FANDN nodes
static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) {
// FANDN(x, 0.0) -> 0.0
// FANDN(0.0, x) -> x
@@ -21576,13 +25249,29 @@ static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // (i8,i32 sext (sdivrem (i8 x, i8 y)) ->
+ // (i8,i32 (sdivrem_sext_hreg (i8 x, i8 y)
+ // This exposes the sext to the sdivrem lowering, so that it directly extends
+ // from AH (which we otherwise need to do contortions to access).
+ if (N0.getOpcode() == ISD::SDIVREM && N0.getResNo() == 1 &&
+ N0.getValueType() == MVT::i8 && VT == MVT::i32) {
+ SDLoc dl(N);
+ SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
+ SDValue R = DAG.getNode(X86ISD::SDIVREM8_SEXT_HREG, dl, NodeTys,
+ N0.getOperand(0), N0.getOperand(1));
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
+ return R.getValue(1);
+ }
+
if (!DCI.isBeforeLegalizeOps())
return SDValue();
if (!Subtarget->hasFp256())
return SDValue();
- EVT VT = N->getValueType(0);
if (VT.isVector() && VT.getSizeInBits() == 256) {
SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
if (R.getNode())
@@ -21675,6 +25364,20 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
return R;
}
+ // (i8,i32 zext (udivrem (i8 x, i8 y)) ->
+ // (i8,i32 (udivrem_zext_hreg (i8 x, i8 y)
+ // This exposes the zext to the udivrem lowering, so that it directly extends
+ // from AH (which we otherwise need to do contortions to access).
+ if (N0.getOpcode() == ISD::UDIVREM &&
+ N0.getResNo() == 1 && N0.getValueType() == MVT::i8 &&
+ (VT == MVT::i32 || VT == MVT::i64)) {
+ SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
+ SDValue R = DAG.getNode(X86ISD::UDIVREM8_ZEXT_HREG, dl, NodeTys,
+ N0.getOperand(0), N0.getOperand(1));
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
+ return R.getValue(1);
+ }
+
return SDValue();
}
@@ -21863,14 +25566,14 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
return SDValue();
- // Now check that the other operand of the AND is a constant splat. We could
+ // Now check that the other operand of the AND is a constant. We could
// make the transformation for non-constant splats as well, but it's unclear
// that would be a benefit as it would not eliminate any operations, just
// perform one more step in scalar code before moving to the vector unit.
if (BuildVectorSDNode *BV =
dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
- // Bail out if the vector isn't a constant splat.
- if (!BV->getConstantSplatNode())
+ // Bail out if the vector isn't a constant.
+ if (!BV->isConstant())
return SDValue();
// Everything checks out. Build up the new and improved node.
@@ -22044,18 +25747,68 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
/// performVZEXTCombine - Performs build vector combines
static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ SDLoc DL(N);
+ MVT VT = N->getSimpleValueType(0);
+ SDValue Op = N->getOperand(0);
+ MVT OpVT = Op.getSimpleValueType();
+ MVT OpEltVT = OpVT.getVectorElementType();
+ unsigned InputBits = OpEltVT.getSizeInBits() * VT.getVectorNumElements();
+
// (vzext (bitcast (vzext (x)) -> (vzext x)
- SDValue In = N->getOperand(0);
- while (In.getOpcode() == ISD::BITCAST)
- In = In.getOperand(0);
+ SDValue V = Op;
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
- if (In.getOpcode() != X86ISD::VZEXT)
- return SDValue();
+ if (V != Op && V.getOpcode() == X86ISD::VZEXT) {
+ MVT InnerVT = V.getSimpleValueType();
+ MVT InnerEltVT = InnerVT.getVectorElementType();
- return DAG.getNode(X86ISD::VZEXT, SDLoc(N), N->getValueType(0),
- In.getOperand(0));
+ // If the element sizes match exactly, we can just do one larger vzext. This
+ // is always an exact type match as vzext operates on integer types.
+ if (OpEltVT == InnerEltVT) {
+ assert(OpVT == InnerVT && "Types must match for vzext!");
+ return DAG.getNode(X86ISD::VZEXT, DL, VT, V.getOperand(0));
+ }
+
+ // The only other way we can combine them is if only a single element of the
+ // inner vzext is used in the input to the outer vzext.
+ if (InnerEltVT.getSizeInBits() < InputBits)
+ return SDValue();
+
+ // In this case, the inner vzext is completely dead because we're going to
+ // only look at bits inside of the low element. Just do the outer vzext on
+ // a bitcast of the input to the inner.
+ return DAG.getNode(X86ISD::VZEXT, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, OpVT, V));
+ }
+
+ // Check if we can bypass extracting and re-inserting an element of an input
+ // vector. Essentialy:
+ // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ V.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ V.getOperand(0).getSimpleValueType().getSizeInBits() == InputBits) {
+ SDValue ExtractedV = V.getOperand(0);
+ SDValue OrigV = ExtractedV.getOperand(0);
+ if (auto *ExtractIdx = dyn_cast<ConstantSDNode>(ExtractedV.getOperand(1)))
+ if (ExtractIdx->getZExtValue() == 0) {
+ MVT OrigVT = OrigV.getSimpleValueType();
+ // Extract a subvector if necessary...
+ if (OrigVT.getSizeInBits() > OpVT.getSizeInBits()) {
+ int Ratio = OrigVT.getSizeInBits() / OpVT.getSizeInBits();
+ OrigVT = MVT::getVectorVT(OrigVT.getVectorElementType(),
+ OrigVT.getVectorNumElements() / Ratio);
+ OrigV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigVT, OrigV,
+ DAG.getIntPtrConstant(0));
+ }
+ Op = DAG.getNode(ISD::BITCAST, DL, OpVT, OrigV);
+ return DAG.getNode(X86ISD::VZEXT, DL, VT, Op);
+ }
+ }
+
+ return SDValue();
}
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
@@ -22066,7 +25819,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::EXTRACT_VECTOR_ELT:
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI);
case ISD::VSELECT:
- case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget);
+ case ISD::SELECT:
+ case X86ISD::SHRUNKBLEND:
+ return PerformSELECTCombine(N, DAG, DCI, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget);
case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget);
case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget);
@@ -22107,12 +25862,13 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::UNPCKL:
case X86ISD::MOVHLPS:
case X86ISD::MOVLHPS:
+ case X86ISD::PSHUFB:
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
- case X86ISD::VPERMILP:
+ case X86ISD::VPERMILPI:
case X86ISD::VPERM2X128:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget);
@@ -22545,6 +26301,23 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
}
return;
+ case 'L':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getZExtValue() == 0xff || C->getZExtValue() == 0xffff ||
+ (Subtarget->is64Bit() && C->getZExtValue() == 0xffffffff)) {
+ Result = DAG.getTargetConstant(C->getSExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
+ case 'M':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getZExtValue() <= 3) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
case 'N':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 255) {
@@ -22553,6 +26326,14 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
}
return;
+ case 'O':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getZExtValue() <= 127) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
case 'e': {
// 32-bit signed value
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
@@ -22762,14 +26543,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
Constraint[5] == ')' &&
Constraint[6] == '}') {
- Res.first = X86::ST0+Constraint[4]-'0';
+ Res.first = X86::FP0+Constraint[4]-'0';
Res.second = &X86::RFP80RegClass;
return Res;
}
// GCC allows "st(0)" to be called just plain "st".
if (StringRef("{st}").equals_lower(Constraint)) {
- Res.first = X86::ST0;
+ Res.first = X86::FP0;
Res.second = &X86::RFP80RegClass;
return Res;
}
@@ -22897,7 +26678,7 @@ int X86TargetLowering::getScalingFactorCost(const AddrMode &AM,
// "load" ports instead of the dedicated "store" port.
// E.g., on Haswell:
// vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.
- // vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
+ // vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
if (isLegalAddressingMode(AM, Ty))
// Scale represents reg2 * scale, thus account for 1
// as soon as we use a second register.
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index c8cdce7c7664..ba077a8a0b46 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86ISELLOWERING_H
-#define X86ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
+#define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -187,12 +187,17 @@ namespace llvm {
/// PSIGN - Copy integer sign.
PSIGN,
- /// BLENDV - Blend where the selector is a register.
- BLENDV,
-
/// BLENDI - Blend where the selector is an immediate.
BLENDI,
+ /// SHRUNKBLEND - Blend where the condition has been shrunk.
+ /// This is used to emphasize that the condition mask is
+ /// no more valid for generic VSELECT optimizations.
+ SHRUNKBLEND,
+
+ /// ADDSUB - Combined add and sub on an FP vector.
+ ADDSUB,
+
// SUBUS - Integer sub with unsigned saturation.
SUBUS,
@@ -301,6 +306,13 @@ namespace llvm {
UMUL, // LOW, HI, FLAGS = umul LHS, RHS
+ // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS
+ SMUL8, UMUL8,
+
+ // 8-bit divrem that zero-extend the high result (AH).
+ UDIVREM8_ZEXT_HREG,
+ SDIVREM8_SEXT_HREG,
+
// MUL_IMM - X86 specific multiply by immediate.
MUL_IMM,
@@ -320,7 +332,10 @@ namespace llvm {
// Several flavors of instructions with vector shuffle behaviors.
PACKSS,
PACKUS,
+ // Intra-lane alignr
PALIGNR,
+ // AVX512 inter-lane alignr
+ VALIGN,
PSHUFD,
PSHUFHW,
PSHUFLW,
@@ -337,7 +352,8 @@ namespace llvm {
MOVSS,
UNPCKL,
UNPCKH,
- VPERMILP,
+ VPERMILPV,
+ VPERMILPI,
VPERMV,
VPERMV3,
VPERMIV3,
@@ -350,9 +366,9 @@ namespace llvm {
VINSERT,
VEXTRACT,
- // PMULUDQ - Vector multiply packed unsigned doubleword integers
+ // Vector multiply packed unsigned doubleword integers
PMULUDQ,
- // PMULUDQ - Vector multiply packed signed doubleword integers
+ // Vector multiply packed signed doubleword integers
PMULDQ,
// FMA nodes
@@ -363,20 +379,23 @@ namespace llvm {
FMADDSUB,
FMSUBADD,
- // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
- // according to %al. An operator is needed so that this can be expanded
- // with control flow.
+ // Compress and expand
+ COMPRESS,
+ EXPAND,
+
+ // Save xmm argument registers to the stack, according to %al. An operator
+ // is needed so that this can be expanded with control flow.
VASTART_SAVE_XMM_REGS,
- // WIN_ALLOCA - Windows's _chkstk call to do stack probing.
+ // Windows's _chkstk call to do stack probing.
WIN_ALLOCA,
- // SEG_ALLOCA - For allocating variable amounts of stack space when using
+ // For allocating variable amounts of stack space when using
// segmented stacks. Check if the current stacklet has enough space, and
// falls back to heap allocation if not.
SEG_ALLOCA,
- // WIN_FTOL - Windows's _ftol2 runtime routine to do fptoui.
+ // Windows's _ftol2 runtime routine to do fptoui.
WIN_FTOL,
// Memory barrier
@@ -385,38 +404,40 @@ namespace llvm {
SFENCE,
LFENCE,
- // FNSTSW16r - Store FP status word into i16 register.
+ // Store FP status word into i16 register.
FNSTSW16r,
- // SAHF - Store contents of %ah into %eflags.
+ // Store contents of %ah into %eflags.
SAHF,
- // RDRAND - Get a random integer and indicate whether it is valid in CF.
+ // Get a random integer and indicate whether it is valid in CF.
RDRAND,
- // RDSEED - Get a NIST SP800-90B & C compliant random integer and
+ // Get a NIST SP800-90B & C compliant random integer and
// indicate whether it is valid in CF.
RDSEED,
- // PCMP*STRI
PCMPISTRI,
PCMPESTRI,
- // XTEST - Test if in transactional execution.
+ // Test if in transactional execution.
XTEST,
- // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap.
+ // ERI instructions
+ RSQRT28, RCP28, EXP2,
+
+ // Compare and swap.
LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
LCMPXCHG8_DAG,
LCMPXCHG16_DAG,
- // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
+ // Load, scalar_to_vector, and zero extend.
VZEXT_LOAD,
- // FNSTCW16m - Store FP control world into i16 memory.
+ // Store FP control world into i16 memory.
FNSTCW16m,
- /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
+ /// This instruction implements FP_TO_SINT with the
/// integer destination in memory and a FP reg source. This corresponds
/// to the X86::FIST*m instructions and the rounding mode change stuff. It
/// has two inputs (token chain and address) and two outputs (int value
@@ -425,7 +446,7 @@ namespace llvm {
FP_TO_INT32_IN_MEM,
FP_TO_INT64_IN_MEM,
- /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
+ /// This instruction implements SINT_TO_FP with the
/// integer source in memory and FP reg result. This corresponds to the
/// X86::FILD*m instructions. It has three inputs (token chain, address,
/// and source type) and two outputs (FP value and token chain). FILD_FLAG
@@ -433,19 +454,19 @@ namespace llvm {
FILD,
FILD_FLAG,
- /// FLD - This instruction implements an extending load to FP stack slots.
+ /// This instruction implements an extending load to FP stack slots.
/// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
/// operand, ptr to load from, and a ValueType node indicating the type
/// to load to.
FLD,
- /// FST - This instruction implements a truncating store to FP stack
+ /// This instruction implements a truncating store to FP stack
/// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
/// chain operand, value to store, address, and a ValueType to store it
/// as.
FST,
- /// VAARG_64 - This instruction grabs the address of the next argument
+ /// This instruction grabs the address of the next argument
/// from a va_list. (reads and modifies the va_list in memory)
VAARG_64
@@ -457,67 +478,76 @@ namespace llvm {
/// Define some predicates that are used for node matching.
namespace X86 {
- /// isVEXTRACT128Index - Return true if the specified
+ /// Return true if the specified
/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
/// suitable for input to VEXTRACTF128, VEXTRACTI128 instructions.
bool isVEXTRACT128Index(SDNode *N);
- /// isVINSERT128Index - Return true if the specified
+ /// Return true if the specified
/// INSERT_SUBVECTOR operand specifies a subvector insert that is
/// suitable for input to VINSERTF128, VINSERTI128 instructions.
bool isVINSERT128Index(SDNode *N);
- /// isVEXTRACT256Index - Return true if the specified
+ /// Return true if the specified
/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
/// suitable for input to VEXTRACTF64X4, VEXTRACTI64X4 instructions.
bool isVEXTRACT256Index(SDNode *N);
- /// isVINSERT256Index - Return true if the specified
+ /// Return true if the specified
/// INSERT_SUBVECTOR operand specifies a subvector insert that is
/// suitable for input to VINSERTF64X4, VINSERTI64X4 instructions.
bool isVINSERT256Index(SDNode *N);
- /// getExtractVEXTRACT128Immediate - Return the appropriate
+ /// Return the appropriate
/// immediate to extract the specified EXTRACT_SUBVECTOR index
/// with VEXTRACTF128, VEXTRACTI128 instructions.
unsigned getExtractVEXTRACT128Immediate(SDNode *N);
- /// getInsertVINSERT128Immediate - Return the appropriate
+ /// Return the appropriate
/// immediate to insert at the specified INSERT_SUBVECTOR index
/// with VINSERTF128, VINSERT128 instructions.
unsigned getInsertVINSERT128Immediate(SDNode *N);
- /// getExtractVEXTRACT256Immediate - Return the appropriate
+ /// Return the appropriate
/// immediate to extract the specified EXTRACT_SUBVECTOR index
/// with VEXTRACTF64X4, VEXTRACTI64x4 instructions.
unsigned getExtractVEXTRACT256Immediate(SDNode *N);
- /// getInsertVINSERT256Immediate - Return the appropriate
+ /// Return the appropriate
/// immediate to insert at the specified INSERT_SUBVECTOR index
/// with VINSERTF64x4, VINSERTI64x4 instructions.
unsigned getInsertVINSERT256Immediate(SDNode *N);
- /// isZeroNode - Returns true if Elt is a constant zero or a floating point
- /// constant +0.0.
+ /// Returns true if Elt is a constant zero or floating point constant +0.0.
bool isZeroNode(SDValue Elt);
- /// isOffsetSuitableForCodeModel - Returns true of the given offset can be
+ /// Returns true of the given offset can be
/// fit into displacement field of the instruction.
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
bool hasSymbolicDisplacement = true);
- /// isCalleePop - Determines whether the callee is required to pop its
+ /// Determines whether the callee is required to pop its
/// own arguments. Callee pop is necessary to support tail calls.
bool isCalleePop(CallingConv::ID CallingConv,
bool is64Bit, bool IsVarArg, bool TailCallOpt);
+
+ /// AVX512 static rounding constants. These need to match the values in
+ /// avx512fintrin.h.
+ enum STATIC_ROUNDING {
+ TO_NEAREST_INT = 0,
+ TO_NEG_INF = 1,
+ TO_POS_INF = 2,
+ TO_ZERO = 3,
+ CUR_DIRECTION = 4
+ };
}
//===--------------------------------------------------------------------===//
- // X86TargetLowering - X86 Implementation of the TargetLowering interface
+ // X86 Implementation of the TargetLowering interface
class X86TargetLowering final : public TargetLowering {
public:
- explicit X86TargetLowering(X86TargetMachine &TM);
+ explicit X86TargetLowering(const X86TargetMachine &TM);
unsigned getJumpTableEncoding() const override;
@@ -528,21 +558,20 @@ namespace llvm {
const MachineBasicBlock *MBB, unsigned uid,
MCContext &Ctx) const override;
- /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
- /// jumptable.
+ /// Returns relocation base for the given PIC jumptable.
SDValue getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const override;
const MCExpr *
getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
unsigned JTI, MCContext &Ctx) const override;
- /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+ /// Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. For X86, aggregates
/// that contains are placed at 16-byte boundaries while the rest are at
/// 4-byte boundaries.
unsigned getByValTypeAlignment(Type *Ty) const override;
- /// getOptimalMemOpType - Returns the target specific optimal type for load
+ /// Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
@@ -557,7 +586,7 @@ namespace llvm {
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
MachineFunction &MF) const override;
- /// isSafeMemOpType - Returns true if it's safe to use load / store of the
+ /// Returns true if it's safe to use load / store of the
/// specified type to expand memcpy / memset inline. This is mostly true
/// for all types except for some special cases. For example, on X86
/// targets without SSE2 f64 load / store are done with fldl / fstpl which
@@ -565,17 +594,17 @@ namespace llvm {
/// legal as the hook is used before type legalization.
bool isSafeMemOpType(MVT VT) const override;
- /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+ /// Returns true if the target allows
/// unaligned memory accesses. of the specified type. Returns whether it
/// is "fast" by reference in the second argument.
- bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS,
+ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
bool *Fast) const override;
- /// LowerOperation - Provide custom lowering hooks for some operations.
+ /// Provide custom lowering hooks for some operations.
///
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
- /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// Replace the results of node with an illegal result
/// type with new values built out of custom code.
///
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
@@ -584,13 +613,13 @@ namespace llvm {
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
- /// isTypeDesirableForOp - Return true if the target has native support for
+ /// Return true if the target has native support for
/// the specified value type and it is 'desirable' to use the type for the
/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
/// instruction encodings are longer and some i16 instructions are slow.
bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
- /// isTypeDesirable - Return true if the target has native support for the
+ /// Return true if the target has native support for the
/// specified value type and it is 'desirable' to use the type. e.g. On x86
/// i16 is legal, but undesirable since i16 instruction encodings are longer
/// and some i16 instructions are slow.
@@ -601,24 +630,25 @@ namespace llvm {
MachineBasicBlock *MBB) const override;
- /// getTargetNodeName - This method returns the name of a target specific
- /// DAG node.
+ /// This method returns the name of a target specific DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
- /// getSetCCResultType - Return the value type to use for ISD::SETCC.
+ bool isCheapToSpeculateCttz() const override;
+
+ bool isCheapToSpeculateCtlz() const override;
+
+ /// Return the value type to use for ISD::SETCC.
EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
- /// computeKnownBitsForTargetNode - Determine which of the bits specified
- /// in Mask are known to be either zero or one and return them in the
- /// KnownZero/KnownOne bitsets.
+ /// Determine which of the bits specified in Mask are known to be either
+ /// zero or one and return them in the KnownZero/KnownOne bitsets.
void computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
- // ComputeNumSignBitsForTargetNode - Determine the number of bits in the
- // operation that are sign bits.
+ /// Determine the number of bits in the operation that are sign bits.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
const SelectionDAG &DAG,
unsigned Depth) const override;
@@ -641,16 +671,15 @@ namespace llvm {
const char *LowerXConstraint(EVT ConstraintVT) const override;
- /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
- /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
- /// true it means one of the asm constraint of the inline asm instruction
- /// being processed is 'm'.
+ /// Lower the specified operand into the Ops vector. If it is invalid, don't
+ /// add anything to Ops. If hasMemory is true it means one of the asm
+ /// constraint of the inline asm instruction being processed is 'm'.
void LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- /// getRegForInlineAsmConstraint - Given a physical register constraint
+ /// Given a physical register constraint
/// (e.g. {edx}), return the register number and the register class for the
/// register. This should only be used for C_Register constraints. On
/// error, this returns a register number of 0.
@@ -658,17 +687,17 @@ namespace llvm {
getRegForInlineAsmConstraint(const std::string &Constraint,
MVT VT) const override;
- /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
- /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can
/// compare a register against the immediate without having to materialize
/// the immediate into a register.
bool isLegalICmpImmediate(int64_t Imm) const override;
- /// isLegalAddImmediate - Return true if the specified immediate is legal
+ /// Return true if the specified immediate is legal
/// add immediate, that is the target has add instructions which can
/// add a register and the immediate without having to materialize
/// the immediate into a register.
@@ -683,7 +712,7 @@ namespace llvm {
bool isVectorShiftByScalarCheap(Type *Ty) const override;
- /// isTruncateFree - Return true if it's free to truncate a value of
+ /// Return true if it's free to truncate a value of
/// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
/// register EAX to i16 by referencing its sub-register AX.
bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
@@ -691,7 +720,7 @@ namespace llvm {
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
- /// isZExtFree - Return true if any actual instruction that defines a
+ /// Return true if any actual instruction that defines a
/// value of type Ty1 implicit zero-extends the value to Ty2 in the result
/// register. This does not necessarily include registers defined in
/// unknown ways, such as incoming arguments, or copies from unknown
@@ -703,37 +732,35 @@ namespace llvm {
bool isZExtFree(EVT VT1, EVT VT2) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
- /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
- /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
- /// expanded to FMAs when this method returns true, otherwise fmuladd is
- /// expanded to fmul + fadd.
+ /// Return true if an FMA operation is faster than a pair of fmul and fadd
+ /// instructions. fmuladd intrinsics will be expanded to FMAs when this
+ /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
- /// isNarrowingProfitable - Return true if it's profitable to narrow
+ /// Return true if it's profitable to narrow
/// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
/// from i32 to i8 but not from i32 to i16.
bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
- /// isFPImmLegal - Returns true if the target can instruction select the
+ /// Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
- /// isShuffleMaskLegal - Targets can use this to indicate that they only
- /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
- /// By default, if a target supports the VECTOR_SHUFFLE node, all mask
- /// values are assumed to be legal.
+ /// Targets can use this to indicate that they only support *some*
+ /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
+ /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
+ /// be legal.
bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
EVT VT) const override;
- /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
- /// used by Targets can use this to indicate if there is a suitable
- /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
- /// pool entry.
+ /// Similar to isShuffleMaskLegal. This is used by Targets can use this to
+ /// indicate if there is a suitable VECTOR_SHUFFLE that can be used to
+ /// replace a VAND with a constant pool entry.
bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
EVT VT) const override;
- /// ShouldShrinkFPConstant - If true, then instruction selection should
+ /// If true, then instruction selection should
/// seek to shrink the FP constant of the specified type to a smaller type
/// in order to save space and / or reduce runtime.
bool ShouldShrinkFPConstant(EVT VT) const override {
@@ -743,23 +770,27 @@ namespace llvm {
return !X86ScalarSSEf64 || VT == MVT::f80;
}
+ /// Return true if we believe it is correct and profitable to reduce the
+ /// load node to a smaller type.
+ bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
+ EVT NewVT) const override;
+
const X86Subtarget* getSubtarget() const {
return Subtarget;
}
- /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
- /// computed in an SSE register, not on the X87 floating point stack.
+ /// Return true if the specified scalar FP type is computed in an SSE
+ /// register, not on the X87 floating point stack.
bool isScalarFPTypeInSSEReg(EVT VT) const {
return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
(VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
}
- /// isTargetFTOL - Return true if the target uses the MSVC _ftol2 routine
- /// for fptoui.
+ /// Return true if the target uses the MSVC _ftol2 routine for fptoui.
bool isTargetFTOL() const;
- /// isIntegerTypeFTOL - Return true if the MSVC _ftol2 routine should be
- /// used for fptoui to the given type.
+ /// Return true if the MSVC _ftol2 routine should be used for fptoui to the
+ /// given type.
bool isIntegerTypeFTOL(EVT VT) const {
return isTargetFTOL() && VT == MVT::i64;
}
@@ -769,6 +800,10 @@ namespace llvm {
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
+ /// with this index.
+ bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const override;
+
/// Intel processors have a unified instruction and data cache
const char * getClearCacheBuiltinName() const override {
return nullptr; // nothing to do, move along.
@@ -776,15 +811,14 @@ namespace llvm {
unsigned getRegisterByName(const char* RegName, EVT VT) const override;
- /// createFastISel - This method returns a target specific FastISel object,
+ /// This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const override;
- /// getStackCookieLocation - Return true if the target stores stack
- /// protector cookies at a fixed offset in some non-standard address
- /// space, and populates the address space and offset as
- /// appropriate.
+ /// Return true if the target stores stack protector cookies at a fixed
+ /// offset in some non-standard address space, and populates the address
+ /// space and offset as appropriate.
bool getStackCookieLocation(unsigned &AddressSpace,
unsigned &Offset) const override;
@@ -796,6 +830,7 @@ namespace llvm {
/// \brief Reset the operation actions based on target options.
void resetOperationActions() override;
+ bool useLoadStackGuardNode() const override;
/// \brief Customize the preferred legalization strategy for certain types.
LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
@@ -804,7 +839,7 @@ namespace llvm {
findRepresentativeClass(MVT VT) const override;
private:
- /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
const DataLayout *TD;
@@ -813,17 +848,16 @@ namespace llvm {
/// the operation actions unless we have to.
TargetOptions TO;
- /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
- /// floating point ops.
+ /// Select between SSE or x87 floating point ops.
/// When SSE is available, use it for f32 operations.
/// When SSE2 is available, use it for f64 operations.
bool X86ScalarSSEf32;
bool X86ScalarSSEf64;
- /// LegalFPImmediates - A list of legal fp immediates.
+ /// A list of legal FP immediates.
std::vector<APFloat> LegalFPImmediates;
- /// addLegalFPImmediate - Indicate that this x86 target can instruction
+ /// Indicate that this x86 target can instruction
/// select the specified FP immediate natively.
void addLegalFPImmediate(const APFloat& Imm) {
LegalFPImmediates.push_back(Imm);
@@ -847,9 +881,8 @@ namespace llvm {
// Call lowering helpers.
- /// IsEligibleForTailCallOptimization - Check whether the call is eligible
- /// for tail call optimization. Targets which want to do tail call
- /// optimization should implement this function.
+ /// Check whether the call is eligible for tail call optimization. Targets
+ /// that want to do tail call optimization should implement this function.
bool IsEligibleForTailCallOptimization(SDValue Callee,
CallingConv::ID CalleeCC,
bool isVarArg,
@@ -936,7 +969,7 @@ namespace llvm {
bool mayBeEmittedAsTailCall(CallInst *CI) const override;
- MVT getTypeForExtArgOrReturn(MVT VT,
+ EVT getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const override;
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
@@ -946,6 +979,15 @@ namespace llvm {
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
+ bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
+ bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
+ bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+
+ LoadInst *
+ lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
+
+ bool needsCmpXchgNb(const Type *MemType) const;
+
/// Utility function to emit atomic-load-arith operations (and, or, xor,
/// nand, max, min, umax, umin). It takes the corresponding instruction to
/// expand, the associated machine basic block, and the associated X86
@@ -975,8 +1017,7 @@ namespace llvm {
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI,
- MachineBasicBlock *BB,
- bool Is64Bit) const;
+ MachineBasicBlock *BB) const;
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
MachineBasicBlock *BB) const;
@@ -1005,6 +1046,15 @@ namespace llvm {
/// Convert a comparison if required by the subtarget.
SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
+
+ /// Use rsqrt* to speed up sqrt calculations.
+ SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
+ bool &UseOneConstNR) const override;
+
+ /// Use rcp* to speed up fdiv calculations.
+ SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const override;
};
namespace X86 {
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index d2894088b80f..c4ec3df7b382 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -1,3 +1,273 @@
+// Group template arguments that can be derived from the vector type (EltNum x
+// EltVT). These are things like the register class for the writemask, etc.
+// The idea is to pass one of these as the template argument rather than the
+// individual arguments.
+// The template is also used for scalar types, in this case numelts is 1.
+class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
+ string suffix = ""> {
+ RegisterClass RC = rc;
+ ValueType EltVT = eltvt;
+ int NumElts = numelts;
+
+ // Corresponding mask register class.
+ RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
+
+ // Corresponding write-mask register class.
+ RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
+
+ // The GPR register class that can hold the write mask. Use GR8 for fewer
+ // than 8 elements. Use shift-right and equal to work around the lack of
+ // !lt in tablegen.
+ RegisterClass MRC =
+ !cast<RegisterClass>("GR" #
+ !if (!eq (!srl(NumElts, 3), 0), 8, NumElts));
+
+ // Suffix used in the instruction mnemonic.
+ string Suffix = suffix;
+
+ // VTName is a string name for vector VT. For vector types it will be
+ // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
+ // It is a little bit complex for scalar types, where NumElts = 1.
+ // In this case we build v4f32 or v2f64
+ string VTName = "v" # !if (!eq (NumElts, 1),
+ !if (!eq (EltVT.Size, 32), 4,
+ !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
+
+ // The vector VT.
+ ValueType VT = !cast<ValueType>(VTName);
+
+ string EltTypeName = !cast<string>(EltVT);
+ // Size of the element type in bits, e.g. 32 for v16i32.
+ string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
+ int EltSize = EltVT.Size;
+
+ // "i" for integer types and "f" for floating-point types
+ string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
+
+ // Size of RC in bits, e.g. 512 for VR512.
+ int Size = VT.Size;
+
+ // The corresponding memory operand, e.g. i512mem for VR512.
+ X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
+ X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
+
+ // Load patterns
+ // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
+ // due to load promotion during legalization
+ PatFrag LdFrag = !cast<PatFrag>("load" #
+ !if (!eq (TypeVariantName, "i"),
+ !if (!eq (Size, 128), "v2i64",
+ !if (!eq (Size, 256), "v4i64",
+ VTName)), VTName));
+ PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
+
+ // Load patterns used for memory operands. We only have this defined in
+ // case of i64 element types for sub-512 integer vectors. For now, keep
+ // MemOpFrag undefined in these cases.
+ PatFrag MemOpFrag =
+ !if (!eq (NumElts#EltTypeName, "1f32"), !cast<PatFrag>("memopfsf32"),
+ !if (!eq (NumElts#EltTypeName, "1f64"), !cast<PatFrag>("memopfsf64"),
+ !if (!eq (TypeVariantName, "f"), !cast<PatFrag>("memop" # VTName),
+ !if (!eq (EltTypeName, "i64"), !cast<PatFrag>("memop" # VTName),
+ !if (!eq (VTName, "v16i32"), !cast<PatFrag>("memop" # VTName), ?)))));
+
+ // The corresponding float type, e.g. v16f32 for v16i32
+ // Note: For EltSize < 32, FloatVT is illegal and TableGen
+ // fails to compile, so we choose FloatVT = VT
+ ValueType FloatVT = !cast<ValueType>(
+ !if (!eq (!srl(EltSize,5),0),
+ VTName,
+ !if (!eq(TypeVariantName, "i"),
+ "v" # NumElts # "f" # EltSize,
+ VTName)));
+
+ // The string to specify embedded broadcast in assembly.
+ string BroadcastStr = "{1to" # NumElts # "}";
+
+ // 8-bit compressed displacement tuple/subvector format. This is only
+ // defined for NumElts <= 8.
+ CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
+ !cast<CD8VForm>("CD8VT" # NumElts), ?);
+
+ SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
+ !if (!eq (Size, 256), sub_ymm, ?));
+
+ Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
+ !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
+ SSEPackedInt));
+
+ // A vector type of the same width with element type i32. This is used to
+ // create the canonical constant zero node ImmAllZerosV.
+ ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
+ dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
+}
+
+def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
+def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
+def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
+def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
+def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
+def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
+
+// "x" in v32i8x_info means RC = VR256X
+def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
+def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
+def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
+def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
+def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
+def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
+
+def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
+def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
+def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
+def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
+def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
+def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
+
+// We map scalar types to the smallest (128-bit) vector type
+// with the appropriate element type. This allows to use the same masking logic.
+def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
+def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
+
+class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
+ X86VectorVTInfo i128> {
+ X86VectorVTInfo info512 = i512;
+ X86VectorVTInfo info256 = i256;
+ X86VectorVTInfo info128 = i128;
+}
+
+def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
+ v16i8x_info>;
+def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
+ v8i16x_info>;
+def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
+ v4i32x_info>;
+def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
+ v2i64x_info>;
+def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
+ v4f32x_info>;
+def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
+ v2f64x_info>;
+
+// This multiclass generates the masking variants from the non-masking
+// variant. It only provides the assembly pieces for the masking variants.
+// It assumes custom ISel patterns for masking which can be provided as
+// template arguments.
+multiclass AVX512_maskable_custom<bits<8> O, Format F,
+ dag Outs,
+ dag Ins, dag MaskingIns, dag ZeroMaskingIns,
+ string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ list<dag> Pattern,
+ list<dag> MaskingPattern,
+ list<dag> ZeroMaskingPattern,
+ string Round = "",
+ string MaskingConstraint = "",
+ InstrItinClass itin = NoItinerary,
+ bit IsCommutable = 0> {
+ let isCommutable = IsCommutable in
+ def NAME: AVX512<O, F, Outs, Ins,
+ OpcodeStr#"\t{"#AttSrcAsm#", $dst "#Round#"|"#
+ "$dst "#Round#", "#IntelSrcAsm#"}",
+ Pattern, itin>;
+
+ // Prefer over VMOV*rrk Pat<>
+ let AddedComplexity = 20 in
+ def NAME#k: AVX512<O, F, Outs, MaskingIns,
+ OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}"#Round#"|"#
+ "$dst {${mask}}"#Round#", "#IntelSrcAsm#"}",
+ MaskingPattern, itin>,
+ EVEX_K {
+ // In case of the 3src subclass this is overridden with a let.
+ string Constraints = MaskingConstraint;
+ }
+ let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
+ def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
+ OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}"#Round#"|"#
+ "$dst {${mask}} {z}"#Round#", "#IntelSrcAsm#"}",
+ ZeroMaskingPattern,
+ itin>,
+ EVEX_KZ;
+}
+
+
+// Common base class of AVX512_maskable and AVX512_maskable_3src.
+multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs,
+ dag Ins, dag MaskingIns, dag ZeroMaskingIns,
+ string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ dag RHS, dag MaskingRHS,
+ SDNode Select = vselect, string Round = "",
+ string MaskingConstraint = "",
+ InstrItinClass itin = NoItinerary,
+ bit IsCommutable = 0> :
+ AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
+ AttSrcAsm, IntelSrcAsm,
+ [(set _.RC:$dst, RHS)],
+ [(set _.RC:$dst, MaskingRHS)],
+ [(set _.RC:$dst,
+ (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
+ Round, MaskingConstraint, NoItinerary, IsCommutable>;
+
+// This multiclass generates the unconditional/non-masking, the masking and
+// the zero-masking variant of the vector instruction. In the masking case, the
+// perserved vector elements come from a new dummy input operand tied to $dst.
+multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs, dag Ins, string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ dag RHS, string Round = "",
+ InstrItinClass itin = NoItinerary,
+ bit IsCommutable = 0> :
+ AVX512_maskable_common<O, F, _, Outs, Ins,
+ !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
+ !con((ins _.KRCWM:$mask), Ins),
+ OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
+ (vselect _.KRCWM:$mask, RHS, _.RC:$src0), vselect,
+ Round, "$src0 = $dst", itin, IsCommutable>;
+
+// This multiclass generates the unconditional/non-masking, the masking and
+// the zero-masking variant of the scalar instruction.
+multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs, dag Ins, string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ dag RHS, string Round = "",
+ InstrItinClass itin = NoItinerary,
+ bit IsCommutable = 0> :
+ AVX512_maskable_common<O, F, _, Outs, Ins,
+ !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
+ !con((ins _.KRCWM:$mask), Ins),
+ OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
+ (X86select _.KRCWM:$mask, RHS, _.RC:$src0), X86select,
+ Round, "$src0 = $dst", itin, IsCommutable>;
+
+// Similar to AVX512_maskable but in this case one of the source operands
+// ($src1) is already tied to $dst so we just use that for the preserved
+// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
+// $src1.
+multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs, dag NonTiedIns, string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ dag RHS> :
+ AVX512_maskable_common<O, F, _, Outs,
+ !con((ins _.RC:$src1), NonTiedIns),
+ !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
+ !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
+ OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
+ (vselect _.KRCWM:$mask, RHS, _.RC:$src1)>;
+
+
+multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs, dag Ins,
+ string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ list<dag> Pattern> :
+ AVX512_maskable_custom<O, F, Outs, Ins,
+ !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
+ !con((ins _.KRCWM:$mask), Ins),
+ OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], "",
+ "$src0 = $dst">;
+
// Bitcasts between 512-bit vector types. Return the original type since
// no instruction is needed for the conversion
let Predicates = [HasAVX512] in {
@@ -17,6 +287,7 @@ let Predicates = [HasAVX512] in {
def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
+ def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
@@ -116,119 +387,92 @@ def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
//===----------------------------------------------------------------------===//
// AVX-512 - VECTOR INSERT
//
-// -- 32x8 form --
-let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
-def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst),
- (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
- "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512;
-let mayLoad = 1 in
-def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst),
- (ins VR512:$src1, f128mem:$src2, i8imm:$src3),
- "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
-}
-
-// -- 64x4 fp form --
-let hasSideEffects = 0, ExeDomain = SSEPackedDouble in {
-def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst),
- (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
- "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512, VEX_W;
-let mayLoad = 1 in
-def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst),
- (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
- "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
+
+multiclass vinsert_for_size_no_alt<int Opcode,
+ X86VectorVTInfo From, X86VectorVTInfo To,
+ PatFrag vinsert_insert,
+ SDNodeXForm INSERT_get_vinsert_imm> {
+ let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
+ def rr : AVX512AIi8<Opcode, MRMSrcReg, (outs VR512:$dst),
+ (ins VR512:$src1, From.RC:$src2, i8imm:$src3),
+ "vinsert" # From.EltTypeName # "x" # From.NumElts #
+ "\t{$src3, $src2, $src1, $dst|"
+ "$dst, $src1, $src2, $src3}",
+ [(set To.RC:$dst, (vinsert_insert:$src3 (To.VT VR512:$src1),
+ (From.VT From.RC:$src2),
+ (iPTR imm)))]>,
+ EVEX_4V, EVEX_V512;
+
+ let mayLoad = 1 in
+ def rm : AVX512AIi8<Opcode, MRMSrcMem, (outs VR512:$dst),
+ (ins VR512:$src1, From.MemOp:$src2, i8imm:$src3),
+ "vinsert" # From.EltTypeName # "x" # From.NumElts #
+ "\t{$src3, $src2, $src1, $dst|"
+ "$dst, $src1, $src2, $src3}",
+ []>,
+ EVEX_4V, EVEX_V512, EVEX_CD8<From.EltSize, From.CD8TupleForm>;
+ }
}
-// -- 32x4 integer form --
-let hasSideEffects = 0 in {
-def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst),
- (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
- "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512;
-let mayLoad = 1 in
-def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
- (ins VR512:$src1, i128mem:$src2, i8imm:$src3),
- "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
+
+multiclass vinsert_for_size<int Opcode,
+ X86VectorVTInfo From, X86VectorVTInfo To,
+ X86VectorVTInfo AltFrom, X86VectorVTInfo AltTo,
+ PatFrag vinsert_insert,
+ SDNodeXForm INSERT_get_vinsert_imm> :
+ vinsert_for_size_no_alt<Opcode, From, To,
+ vinsert_insert, INSERT_get_vinsert_imm> {
+ // Codegen pattern with the alternative types, e.g. v2i64 -> v8i64 for
+ // vinserti32x4. Only add this if 64x2 and friends are not supported
+ // natively via AVX512DQ.
+ let Predicates = [NoDQI] in
+ def : Pat<(vinsert_insert:$ins
+ (AltTo.VT VR512:$src1), (AltFrom.VT From.RC:$src2), (iPTR imm)),
+ (AltTo.VT (!cast<Instruction>(NAME # From.EltSize # "x4rr")
+ VR512:$src1, From.RC:$src2,
+ (INSERT_get_vinsert_imm VR512:$ins)))>;
}
-let hasSideEffects = 0 in {
-// -- 64x4 form --
-def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst),
- (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
- "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512, VEX_W;
-let mayLoad = 1 in
-def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst),
- (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
- "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
-}
-
-def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2),
- (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2),
- (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2),
- (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2),
- (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-
-def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2),
- (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1),
- (bc_v4i32 (loadv2i64 addr:$src2)),
- (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2),
- (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2),
- (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-
-def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2),
- (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2),
- (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2),
- (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2),
- (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-
-def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2),
- (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2),
- (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2),
- (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
- (bc_v8i32 (loadv4i64 addr:$src2)),
- (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
+multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
+ ValueType EltVT64, int Opcode256> {
+ defm NAME # "32x4" : vinsert_for_size<Opcode128,
+ X86VectorVTInfo< 4, EltVT32, VR128X>,
+ X86VectorVTInfo<16, EltVT32, VR512>,
+ X86VectorVTInfo< 2, EltVT64, VR128X>,
+ X86VectorVTInfo< 8, EltVT64, VR512>,
+ vinsert128_insert,
+ INSERT_get_vinsert128_imm>;
+ let Predicates = [HasDQI] in
+ defm NAME # "64x2" : vinsert_for_size_no_alt<Opcode128,
+ X86VectorVTInfo< 2, EltVT64, VR128X>,
+ X86VectorVTInfo< 8, EltVT64, VR512>,
+ vinsert128_insert,
+ INSERT_get_vinsert128_imm>, VEX_W;
+ defm NAME # "64x4" : vinsert_for_size<Opcode256,
+ X86VectorVTInfo< 4, EltVT64, VR256X>,
+ X86VectorVTInfo< 8, EltVT64, VR512>,
+ X86VectorVTInfo< 8, EltVT32, VR256>,
+ X86VectorVTInfo<16, EltVT32, VR512>,
+ vinsert256_insert,
+ INSERT_get_vinsert256_imm>, VEX_W;
+ let Predicates = [HasDQI] in
+ defm NAME # "32x8" : vinsert_for_size_no_alt<Opcode256,
+ X86VectorVTInfo< 8, EltVT32, VR256X>,
+ X86VectorVTInfo<16, EltVT32, VR512>,
+ vinsert256_insert,
+ INSERT_get_vinsert256_imm>;
+}
+
+defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a>;
+defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a>;
// vinsertps - insert f32 to XMM
def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3),
+ (ins VR128X:$src1, VR128X:$src2, i8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
EVEX_4V;
def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
- (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3),
+ (ins VR128X:$src1, f32mem:$src2, i8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128X:$dst, (X86insertps VR128X:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
@@ -237,106 +481,90 @@ def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
//===----------------------------------------------------------------------===//
// AVX-512 VECTOR EXTRACT
//---
-let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
-// -- 32x4 form --
-def VEXTRACTF32x4rr : AVX512AIi8<0x19, MRMDestReg, (outs VR128X:$dst),
- (ins VR512:$src1, i8imm:$src2),
- "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX, EVEX_V512;
-def VEXTRACTF32x4mr : AVX512AIi8<0x19, MRMDestMem, (outs),
- (ins f128mem:$dst, VR512:$src1, i8imm:$src2),
- "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
-
-// -- 64x4 form --
-def VEXTRACTF64x4rr : AVX512AIi8<0x1b, MRMDestReg, (outs VR256X:$dst),
- (ins VR512:$src1, i8imm:$src2),
- "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX, EVEX_V512, VEX_W;
-let mayStore = 1 in
-def VEXTRACTF64x4mr : AVX512AIi8<0x1b, MRMDestMem, (outs),
- (ins f256mem:$dst, VR512:$src1, i8imm:$src2),
- "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
+
+multiclass vextract_for_size<int Opcode,
+ X86VectorVTInfo From, X86VectorVTInfo To,
+ X86VectorVTInfo AltFrom, X86VectorVTInfo AltTo,
+ PatFrag vextract_extract,
+ SDNodeXForm EXTRACT_get_vextract_imm> {
+ let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
+ defm rr : AVX512_maskable_in_asm<Opcode, MRMDestReg, To, (outs To.RC:$dst),
+ (ins VR512:$src1, i8imm:$idx),
+ "vextract" # To.EltTypeName # "x4",
+ "$idx, $src1", "$src1, $idx",
+ [(set To.RC:$dst, (vextract_extract:$idx (From.VT VR512:$src1),
+ (iPTR imm)))]>,
+ AVX512AIi8Base, EVEX, EVEX_V512;
+ let mayStore = 1 in
+ def rm : AVX512AIi8<Opcode, MRMDestMem, (outs),
+ (ins To.MemOp:$dst, VR512:$src1, i8imm:$src2),
+ "vextract" # To.EltTypeName # "x4\t{$src2, $src1, $dst|"
+ "$dst, $src1, $src2}",
+ []>, EVEX, EVEX_V512, EVEX_CD8<To.EltSize, CD8VT4>;
+ }
+
+ // Codegen pattern with the alternative types, e.g. v8i64 -> v2i64 for
+ // vextracti32x4
+ def : Pat<(vextract_extract:$ext (AltFrom.VT VR512:$src1), (iPTR imm)),
+ (AltTo.VT (!cast<Instruction>(NAME # To.EltSize # "x4rr")
+ VR512:$src1,
+ (EXTRACT_get_vextract_imm To.RC:$ext)))>;
+
+ // A 128/256-bit subvector extract from the first 512-bit vector position is
+ // a subregister copy that needs no instruction.
+ def : Pat<(To.VT (extract_subvector (From.VT VR512:$src), (iPTR 0))),
+ (To.VT
+ (EXTRACT_SUBREG (From.VT VR512:$src), To.SubRegIdx))>;
+
+ // And for the alternative types.
+ def : Pat<(AltTo.VT (extract_subvector (AltFrom.VT VR512:$src), (iPTR 0))),
+ (AltTo.VT
+ (EXTRACT_SUBREG (AltFrom.VT VR512:$src), AltTo.SubRegIdx))>;
+
+ // Intrinsic call with masking.
+ def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
+ "x4_512")
+ VR512:$src1, (iPTR imm:$idx), To.RC:$src0, GR8:$mask),
+ (!cast<Instruction>(NAME # To.EltSize # "x4rrk") To.RC:$src0,
+ (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)),
+ VR512:$src1, imm:$idx)>;
+
+ // Intrinsic call with zero-masking.
+ def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
+ "x4_512")
+ VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, GR8:$mask),
+ (!cast<Instruction>(NAME # To.EltSize # "x4rrkz")
+ (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)),
+ VR512:$src1, imm:$idx)>;
+
+ // Intrinsic call without masking.
+ def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
+ "x4_512")
+ VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
+ (!cast<Instruction>(NAME # To.EltSize # "x4rr")
+ VR512:$src1, imm:$idx)>;
}
-let hasSideEffects = 0 in {
-// -- 32x4 form --
-def VEXTRACTI32x4rr : AVX512AIi8<0x39, MRMDestReg, (outs VR128X:$dst),
- (ins VR512:$src1, i8imm:$src2),
- "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX, EVEX_V512;
-def VEXTRACTI32x4mr : AVX512AIi8<0x39, MRMDestMem, (outs),
- (ins i128mem:$dst, VR512:$src1, i8imm:$src2),
- "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
-
-// -- 64x4 form --
-def VEXTRACTI64x4rr : AVX512AIi8<0x3b, MRMDestReg, (outs VR256X:$dst),
- (ins VR512:$src1, i8imm:$src2),
- "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX, EVEX_V512, VEX_W;
-let mayStore = 1 in
-def VEXTRACTI64x4mr : AVX512AIi8<0x3b, MRMDestMem, (outs),
- (ins i256mem:$dst, VR512:$src1, i8imm:$src2),
- "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
-}
-
-def : Pat<(vextract128_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
- (v4f32 (VEXTRACTF32x4rr VR512:$src1,
- (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
-
-def : Pat<(vextract128_extract:$ext VR512:$src1, (iPTR imm)),
- (v4i32 (VEXTRACTF32x4rr VR512:$src1,
- (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
-
-def : Pat<(vextract128_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
- (v2f64 (VEXTRACTF32x4rr VR512:$src1,
- (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
-
-def : Pat<(vextract128_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
- (v2i64 (VEXTRACTI32x4rr VR512:$src1,
- (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
-
-
-def : Pat<(vextract256_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
- (v8f32 (VEXTRACTF64x4rr VR512:$src1,
- (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
-
-def : Pat<(vextract256_extract:$ext (v16i32 VR512:$src1), (iPTR imm)),
- (v8i32 (VEXTRACTI64x4rr VR512:$src1,
- (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
-
-def : Pat<(vextract256_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
- (v4f64 (VEXTRACTF64x4rr VR512:$src1,
- (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
-
-def : Pat<(vextract256_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
- (v4i64 (VEXTRACTI64x4rr VR512:$src1,
- (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
-
-// A 256-bit subvector extract from the first 512-bit vector position
-// is a subregister copy that needs no instruction.
-def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
- (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>;
-def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
- (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>;
-def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
- (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>;
-def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
- (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>;
-
-// zmm -> xmm
-def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
- (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
-def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
- (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
-def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
- (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
-def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
- (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
+multiclass vextract_for_type<ValueType EltVT32, int Opcode32,
+ ValueType EltVT64, int Opcode64> {
+ defm NAME # "32x4" : vextract_for_size<Opcode32,
+ X86VectorVTInfo<16, EltVT32, VR512>,
+ X86VectorVTInfo< 4, EltVT32, VR128X>,
+ X86VectorVTInfo< 8, EltVT64, VR512>,
+ X86VectorVTInfo< 2, EltVT64, VR128X>,
+ vextract128_extract,
+ EXTRACT_get_vextract128_imm>;
+ defm NAME # "64x4" : vextract_for_size<Opcode64,
+ X86VectorVTInfo< 8, EltVT64, VR512>,
+ X86VectorVTInfo< 4, EltVT64, VR256X>,
+ X86VectorVTInfo<16, EltVT32, VR512>,
+ X86VectorVTInfo< 8, EltVT32, VR256>,
+ vextract256_extract,
+ EXTRACT_get_vextract256_imm>, VEX_W;
+}
+defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b>;
+defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b>;
// A 128-bit subvector insert to the first 512-bit vector position
// is a subregister copy that needs no instruction.
@@ -368,13 +596,13 @@ def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
// vextractps - extract 32 bits from XMM
def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
- (ins VR128X:$src1, u32u8imm:$src2),
+ (ins VR128X:$src1, i32i8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
EVEX;
def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
- (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2),
+ (ins f32mem:$dst, VR128X:$src1, i32i8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
@@ -382,105 +610,175 @@ def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
//===---------------------------------------------------------------------===//
// AVX-512 BROADCAST
//---
-multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr,
- RegisterClass DestRC,
- RegisterClass SrcRC, X86MemOperand x86memop> {
- def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
- []>, EVEX;
- def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),[]>, EVEX;
+multiclass avx512_fp_broadcast<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
+ ValueType svt, X86VectorVTInfo _> {
+ defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins SrcRC:$src), "vbroadcast"## !subst("p", "s", _.Suffix),
+ "$src", "$src", (_.VT (OpNode (svt SrcRC:$src)))>,
+ T8PD, EVEX;
+
+ let mayLoad = 1 in {
+ defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src),
+ "vbroadcast"##!subst("p", "s", _.Suffix), "$src", "$src",
+ (_.VT (OpNode (_.ScalarLdFrag addr:$src)))>,
+ T8PD, EVEX;
+ }
+}
+
+multiclass avx512_fp_broadcast_vl<bits<8> opc, SDNode OpNode,
+ AVX512VLVectorVTInfo _> {
+ defm Z : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info512>,
+ EVEX_V512;
+
+ let Predicates = [HasVLX] in {
+ defm Z256 : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info256>,
+ EVEX_V256;
+ }
}
+
let ExeDomain = SSEPackedSingle in {
- defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512,
- VR128X, f32mem>,
- EVEX_V512, EVEX_CD8<32, CD8VT1>;
+ defm VBROADCASTSS : avx512_fp_broadcast_vl<0x18, X86VBroadcast,
+ avx512vl_f32_info>, EVEX_CD8<32, CD8VT1>;
+ let Predicates = [HasVLX] in {
+ defm VBROADCASTSSZ128 : avx512_fp_broadcast<0x18, X86VBroadcast, VR128X,
+ v4f32, v4f32x_info>, EVEX_V128,
+ EVEX_CD8<32, CD8VT1>;
+ }
}
let ExeDomain = SSEPackedDouble in {
- defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512,
- VR128X, f64mem>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+ defm VBROADCASTSD : avx512_fp_broadcast_vl<0x19, X86VBroadcast,
+ avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VT1>;
+}
+
+// avx512_broadcast_pat introduces patterns for broadcast with a scalar argument.
+// Later, we can canonize broadcast instructions before ISel phase and
+// eliminate additional patterns on ISel.
+// SrcRC_v and SrcRC_s are RegisterClasses for vector and scalar
+// representations of source
+multiclass avx512_broadcast_pat<string InstName, SDNode OpNode,
+ X86VectorVTInfo _, RegisterClass SrcRC_v,
+ RegisterClass SrcRC_s> {
+ def : Pat<(_.VT (OpNode (_.EltVT SrcRC_s:$src))),
+ (!cast<Instruction>(InstName##"r")
+ (COPY_TO_REGCLASS SrcRC_s:$src, SrcRC_v))>;
+
+ let AddedComplexity = 30 in {
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode (_.EltVT SrcRC_s:$src)), _.RC:$src0)),
+ (!cast<Instruction>(InstName##"rk") _.RC:$src0, _.KRCWM:$mask,
+ (COPY_TO_REGCLASS SrcRC_s:$src, SrcRC_v))>;
+
+ def : Pat<(_.VT(vselect _.KRCWM:$mask,
+ (OpNode (_.EltVT SrcRC_s:$src)), _.ImmAllZerosV)),
+ (!cast<Instruction>(InstName##"rkz") _.KRCWM:$mask,
+ (COPY_TO_REGCLASS SrcRC_s:$src, SrcRC_v))>;
+ }
+}
+
+defm : avx512_broadcast_pat<"VBROADCASTSSZ", X86VBroadcast, v16f32_info,
+ VR128X, FR32X>;
+defm : avx512_broadcast_pat<"VBROADCASTSDZ", X86VBroadcast, v8f64_info,
+ VR128X, FR64X>;
+
+let Predicates = [HasVLX] in {
+ defm : avx512_broadcast_pat<"VBROADCASTSSZ256", X86VBroadcast,
+ v8f32x_info, VR128X, FR32X>;
+ defm : avx512_broadcast_pat<"VBROADCASTSSZ128", X86VBroadcast,
+ v4f32x_info, VR128X, FR32X>;
+ defm : avx512_broadcast_pat<"VBROADCASTSDZ256", X86VBroadcast,
+ v4f64x_info, VR128X, FR64X>;
}
def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
- (VBROADCASTSSZrm addr:$src)>;
+ (VBROADCASTSSZm addr:$src)>;
def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
- (VBROADCASTSDZrm addr:$src)>;
+ (VBROADCASTSDZm addr:$src)>;
def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
- (VBROADCASTSSZrm addr:$src)>;
+ (VBROADCASTSSZm addr:$src)>;
def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
- (VBROADCASTSDZrm addr:$src)>;
-
-multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
- RegisterClass SrcRC, RegisterClass KRC> {
- def Zrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins SrcRC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
- []>, EVEX, EVEX_V512;
- def Zkrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst),
- (ins KRC:$mask, SrcRC:$src),
- !strconcat(OpcodeStr,
- " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
- []>, EVEX, EVEX_V512, EVEX_KZ;
-}
-
-defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>;
-defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>,
- VEX_W;
-
+ (VBROADCASTSDZm addr:$src)>;
+
+multiclass avx512_int_broadcast_reg<bits<8> opc, X86VectorVTInfo _,
+ RegisterClass SrcRC> {
+ defm r : AVX512_maskable_in_asm<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins SrcRC:$src), "vpbroadcast"##_.Suffix,
+ "$src", "$src", []>, T8PD, EVEX;
+}
+
+multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
+ RegisterClass SrcRC, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_int_broadcast_reg<opc, _.info512, SrcRC>, EVEX_V512;
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_int_broadcast_reg<opc, _.info256, SrcRC>, EVEX_V256;
+ defm Z128 : avx512_int_broadcast_reg<opc, _.info128, SrcRC>, EVEX_V128;
+ }
+}
+
+defm VPBROADCASTBr : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, GR32,
+ HasBWI>;
+defm VPBROADCASTWr : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, GR32,
+ HasBWI>;
+defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, GR32,
+ HasAVX512>;
+defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, GR64,
+ HasAVX512>, VEX_W;
+
def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
- (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
+ (VPBROADCASTDrZrkz VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
- (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
+ (VPBROADCASTQrZrkz VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
- (VPBROADCASTDrZrr GR32:$src)>;
+ (VPBROADCASTDrZr GR32:$src)>;
def : Pat<(v16i32 (X86VBroadcastm VK16WM:$mask, (i32 GR32:$src))),
- (VPBROADCASTDrZkrr VK16WM:$mask, GR32:$src)>;
+ (VPBROADCASTDrZrkz VK16WM:$mask, GR32:$src)>;
def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
- (VPBROADCASTQrZrr GR64:$src)>;
+ (VPBROADCASTQrZr GR64:$src)>;
def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))),
- (VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>;
+ (VPBROADCASTQrZrkz VK8WM:$mask, GR64:$src)>;
def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
- (VPBROADCASTDrZrr GR32:$src)>;
+ (VPBROADCASTDrZr GR32:$src)>;
def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
- (VPBROADCASTQrZrr GR64:$src)>;
+ (VPBROADCASTQrZr GR64:$src)>;
def : Pat<(v16i32 (int_x86_avx512_mask_pbroadcast_d_gpr_512 (i32 GR32:$src),
(v16i32 immAllZerosV), (i16 GR16:$mask))),
- (VPBROADCASTDrZkrr (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>;
+ (VPBROADCASTDrZrkz (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>;
def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
(bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
- (VPBROADCASTQrZkrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
+ (VPBROADCASTQrZrkz (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop, PatFrag ld_frag,
RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
RegisterClass KRC> {
def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst,
(OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
def krr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
VR128X:$src),
- !strconcat(OpcodeStr,
- " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
+ !strconcat(OpcodeStr,
+ "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
[(set DstRC:$dst,
(OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
EVEX, EVEX_KZ;
let mayLoad = 1 in {
def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst,
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst,
(OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
def krm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
x86memop:$src),
- !strconcat(OpcodeStr,
- " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
+ !strconcat(OpcodeStr,
+ "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
+ [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
(ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
}
}
@@ -497,12 +795,12 @@ multiclass avx512_int_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
RegisterClass KRC> {
let mayLoad = 1 in {
def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[]>, EVEX;
def krm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins KRC:$mask,
x86memop:$src),
!strconcat(OpcodeStr,
- " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
+ "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
[]>, EVEX, EVEX_KZ;
}
}
@@ -519,27 +817,32 @@ def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
(VPBROADCASTQZrr VR128X:$src)>;
-def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
- (VBROADCASTSSZrr VR128X:$src)>;
-def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
- (VBROADCASTSDZrr VR128X:$src)>;
+def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
+ (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
+def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
+ (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
+
+def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))),
+ (VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
+def : Pat<(v8i64 (X86VBroadcast (v8i64 VR512:$src))),
+ (VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
- (VBROADCASTSSZrr VR128X:$src)>;
+ (VBROADCASTSSZr VR128X:$src)>;
def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
- (VBROADCASTSDZrr VR128X:$src)>;
-
+ (VBROADCASTSDZr VR128X:$src)>;
+
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
- (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
+ (VBROADCASTSSZr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
- (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
+ (VBROADCASTSDZr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
let Predicates = [HasAVX512] in {
def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
- (EXTRACT_SUBREG
+ (EXTRACT_SUBREG
(v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
addr:$src)), sub_ymm)>;
}
@@ -548,64 +851,107 @@ def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
//---
multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
- RegisterClass DstRC, RegisterClass KRC,
- ValueType OpVT, ValueType SrcVT> {
-def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
- []>, EVEX;
+ RegisterClass KRC> {
+let Predicates = [HasCDI] in
+def Zrr : AVX512XS8I<opc, MRMSrcReg, (outs VR512:$dst), (ins KRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>, EVEX, EVEX_V512;
+
+let Predicates = [HasCDI, HasVLX] in {
+def Z128rr : AVX512XS8I<opc, MRMSrcReg, (outs VR128:$dst), (ins KRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>, EVEX, EVEX_V128;
+def Z256rr : AVX512XS8I<opc, MRMSrcReg, (outs VR256:$dst), (ins KRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>, EVEX, EVEX_V256;
+}
}
let Predicates = [HasCDI] in {
-defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
- VK16, v16i32, v16i1>, EVEX_V512;
-defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
- VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
+defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
+ VK16>;
+defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
+ VK8>, VEX_W;
}
//===----------------------------------------------------------------------===//
// AVX-512 - VPERM
//
// -- immediate form --
-multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
- SDNode OpNode, PatFrag mem_frag,
- X86MemOperand x86memop, ValueType OpVT> {
- def ri : AVX512AIi8<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, i8imm:$src2),
+multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
+ def ri : AVX512AIi8<opc, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.RC:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst,
- (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.RC:$dst,
+ (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
EVEX;
- def mi : AVX512AIi8<opc, MRMSrcMem, (outs RC:$dst),
- (ins x86memop:$src1, i8imm:$src2),
+ def mi : AVX512AIi8<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.MemOp:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst,
- (OpVT (OpNode (mem_frag addr:$src1),
- (i8 imm:$src2))))]>, EVEX;
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.RC:$dst,
+ (_.VT (OpNode (_.MemOpFrag addr:$src1),
+ (i8 imm:$src2))))]>,
+ EVEX, EVEX_CD8<_.EltSize, CD8VF>;
+}
}
-defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64,
- i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-let ExeDomain = SSEPackedDouble in
-defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64,
- f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+multiclass avx512_permil<bits<8> OpcImm, bits<8> OpcVar, X86VectorVTInfo _,
+ X86VectorVTInfo Ctrl> :
+ avx512_perm_imm<OpcImm, "vpermil" # _.Suffix, X86VPermilpi, _> {
+ let ExeDomain = _.ExeDomain in {
+ def rr : AVX5128I<OpcVar, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2),
+ !strconcat("vpermil" # _.Suffix,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.RC:$dst,
+ (_.VT (X86VPermilpv _.RC:$src1,
+ (Ctrl.VT Ctrl.RC:$src2))))]>,
+ EVEX_4V;
+ def rm : AVX5128I<OpcVar, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, Ctrl.MemOp:$src2),
+ !strconcat("vpermil" # _.Suffix,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.RC:$dst,
+ (_.VT (X86VPermilpv _.RC:$src1,
+ (Ctrl.VT (Ctrl.MemOpFrag addr:$src2)))))]>,
+ EVEX_4V;
+ }
+}
+
+defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", X86VPermi, v8i64_info>,
+ EVEX_V512, VEX_W;
+defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", X86VPermi, v8f64_info>,
+ EVEX_V512, VEX_W;
+
+defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>,
+ EVEX_V512;
+defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>,
+ EVEX_V512, VEX_W;
+
+def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
+ (VPERMILPSZri VR512:$src1, imm:$imm)>;
+def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
+ (VPERMILPDZri VR512:$src1, imm:$imm)>;
// -- VPERM - register form --
-multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
+multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
(OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V;
def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
(OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>,
EVEX_4V;
@@ -613,13 +959,13 @@ multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem,
v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
+defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
let ExeDomain = SSEPackedSingle in
defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem,
v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
let ExeDomain = SSEPackedDouble in
-defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
+defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
// -- VPERM2I - 3 source operands form --
@@ -630,7 +976,7 @@ let Constraints = "$src1 = $dst" in {
def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
- " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
EVEX_4V;
@@ -638,7 +984,7 @@ let Constraints = "$src1 = $dst" in {
def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
- " \t{$src3, $src2, $dst {${mask}}|"
+ "\t{$src3, $src2, $dst {${mask}}|"
"$dst {${mask}}, $src2, $src3}"),
[(set RC:$dst, (OpVT (vselect KRC:$mask,
(OpNode RC:$src1, RC:$src2,
@@ -650,7 +996,7 @@ let Constraints = "$src1 = $dst" in {
def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
- " \t{$src3, $src2, $dst {${mask}} {z} |",
+ "\t{$src3, $src2, $dst {${mask}} {z} |",
"$dst {${mask}} {z}, $src2, $src3}"),
[(set RC:$dst, (OpVT (vselect KRC:$mask,
(OpNode RC:$src1, RC:$src2,
@@ -662,7 +1008,7 @@ let Constraints = "$src1 = $dst" in {
def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
- " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpVT (OpNode RC:$src1, RC:$src2,
(mem_frag addr:$src3))))]>, EVEX_4V;
@@ -670,7 +1016,7 @@ let Constraints = "$src1 = $dst" in {
def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
- " \t{$src3, $src2, $dst {${mask}}|"
+ "\t{$src3, $src2, $dst {${mask}}|"
"$dst {${mask}}, $src2, $src3}"),
[(set RC:$dst,
(OpVT (vselect KRC:$mask,
@@ -683,7 +1029,7 @@ let Constraints = "$src1 = $dst" in {
def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
- " \t{$src3, $src2, $dst {${mask}} {z}|"
+ "\t{$src3, $src2, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, $src2, $src3}"),
[(set RC:$dst,
(OpVT (vselect KRC:$mask,
@@ -739,77 +1085,110 @@ defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem,
//===----------------------------------------------------------------------===//
// AVX-512 - BLEND using mask
//
-multiclass avx512_blendmask<bits<8> opc, string OpcodeStr,
- RegisterClass KRC, RegisterClass RC,
- X86MemOperand x86memop, PatFrag mem_frag,
- SDNode OpNode, ValueType vt> {
- def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, RC:$src2),
+multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
+ def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
- [(set RC:$dst, (OpNode KRC:$mask, (vt RC:$src2),
- (vt RC:$src1)))]>, EVEX_4V, EVEX_K;
- let mayLoad = 1 in
- def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86memop:$src2),
+ "\t{$src2, $src1, ${dst} |${dst}, $src1, $src2}"),
+ []>, EVEX_4V;
+ def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
+ [(set _.RC:$dst, (X86select _.KRCWM:$mask, (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2)))]>, EVEX_4V, EVEX_K;
+ def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
+ []>, EVEX_4V, EVEX_KZ;
+ let mayLoad = 1 in {
+ def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
- []>, EVEX_4V, EVEX_K;
+ "\t{$src2, $src1, ${dst} |${dst}, $src1, $src2}"),
+ []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
+ def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
+ [(set _.RC:$dst, (X86select _.KRCWM:$mask, (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
+ EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>;
+ def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
+ []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>;
+ }
+ }
}
+multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
+
+ def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
+ !strconcat(OpcodeStr,
+ "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
+ [(set _.RC:$dst,(X86select _.KRCWM:$mask, (_.VT _.RC:$src1),
+ (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
+ EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
+
+ def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2),
+ !strconcat(OpcodeStr,
+ "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
+ "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
+ []>, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
+
+}
+
+multiclass blendmask_dq <bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo VTInfo> {
+ defm Z : avx512_blendmask <opc, OpcodeStr, VTInfo.info512>,
+ avx512_blendmask_rmb <opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
+
+ let Predicates = [HasVLX] in {
+ defm Z256 : avx512_blendmask<opc, OpcodeStr, VTInfo.info256>,
+ avx512_blendmask_rmb <opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
+ defm Z128 : avx512_blendmask<opc, OpcodeStr, VTInfo.info128>,
+ avx512_blendmask_rmb <opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
+ }
+}
+
+multiclass blendmask_bw <bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo VTInfo> {
+ let Predicates = [HasBWI] in
+ defm Z : avx512_blendmask <opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
+
+ let Predicates = [HasBWI, HasVLX] in {
+ defm Z256 : avx512_blendmask <opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
+ defm Z128 : avx512_blendmask <opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
+ }
+}
+
+
+defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", avx512vl_f32_info>;
+defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", avx512vl_f64_info>, VEX_W;
+defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", avx512vl_i32_info>;
+defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", avx512vl_i64_info>, VEX_W;
+defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", avx512vl_i8_info>;
+defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", avx512vl_i16_info>, VEX_W;
-let ExeDomain = SSEPackedSingle in
-defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps",
- VK16WM, VR512, f512mem,
- memopv16f32, vselect, v16f32>,
- EVEX_CD8<32, CD8VF>, EVEX_V512;
-let ExeDomain = SSEPackedDouble in
-defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
- VK8WM, VR512, f512mem,
- memopv8f64, vselect, v8f64>,
- VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
-
-def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1),
- (v16f32 VR512:$src2), (i16 GR16:$mask))),
- (VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16WM),
- VR512:$src1, VR512:$src2)>;
-
-def : Pat<(v8f64 (int_x86_avx512_mask_blend_pd_512 (v8f64 VR512:$src1),
- (v8f64 VR512:$src2), (i8 GR8:$mask))),
- (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM),
- VR512:$src1, VR512:$src2)>;
-
-defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
- VK16WM, VR512, f512mem,
- memopv16i32, vselect, v16i32>,
- EVEX_CD8<32, CD8VF>, EVEX_V512;
-
-defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq",
- VK8WM, VR512, f512mem,
- memopv8i64, vselect, v8i64>,
- VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
-
-def : Pat<(v16i32 (int_x86_avx512_mask_blend_d_512 (v16i32 VR512:$src1),
- (v16i32 VR512:$src2), (i16 GR16:$mask))),
- (VPBLENDMDZrr (COPY_TO_REGCLASS GR16:$mask, VK16),
- VR512:$src1, VR512:$src2)>;
-
-def : Pat<(v8i64 (int_x86_avx512_mask_blend_q_512 (v8i64 VR512:$src1),
- (v8i64 VR512:$src2), (i8 GR8:$mask))),
- (VPBLENDMQZrr (COPY_TO_REGCLASS GR8:$mask, VK8),
- VR512:$src1, VR512:$src2)>;
let Predicates = [HasAVX512] in {
def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
(v8f32 VR256X:$src2))),
- (EXTRACT_SUBREG
- (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
+ (EXTRACT_SUBREG
+ (v16f32 (VBLENDMPSZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
(v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
(v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
(v8i32 VR256X:$src2))),
- (EXTRACT_SUBREG
- (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
+ (EXTRACT_SUBREG
+ (v16i32 (VPBLENDMDZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
}
@@ -850,98 +1229,295 @@ defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, AVXCC, X86cmpms, f64, loadf64,
XD, VEX_W;
}
-multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, RegisterClass KRC,
- RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
- SDNode OpNode, ValueType vt> {
+multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
def rr : AVX512BI<opc, MRMSrcReg,
- (outs KRC:$dst), (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+ let mayLoad = 1 in
def rm : AVX512BI<opc, MRMSrcMem,
- (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2)))],
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ def rrk : AVX512BI<opc, MRMSrcReg,
+ (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, $src2}"),
+ [(set _.KRC:$dst, (and _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
+ IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
+ let mayLoad = 1 in
+ def rmk : AVX512BI<opc, MRMSrcMem,
+ (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, $src2}"),
+ [(set _.KRC:$dst, (and _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert
+ (_.LdFrag addr:$src2))))))],
+ IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
}
-defm VPCMPEQDZ : avx512_icmp_packed<0x76, "vpcmpeqd", VK16, VR512, i512mem,
- memopv16i32, X86pcmpeqm, v16i32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-defm VPCMPEQQZ : avx512_icmp_packed<0x29, "vpcmpeqq", VK8, VR512, i512mem,
- memopv8i64, X86pcmpeqm, v8i64>, T8PD, EVEX_V512,
- VEX_W, EVEX_CD8<64, CD8VF>;
+multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> :
+ avx512_icmp_packed<opc, OpcodeStr, OpNode, _> {
+ let mayLoad = 1 in {
+ def rmb : AVX512BI<opc, MRMSrcMem,
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
+ !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
+ "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
+ [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
+ IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
+ def rmbk : AVX512BI<opc, MRMSrcMem,
+ (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
+ _.ScalarMemOp:$src2),
+ !strconcat(OpcodeStr,
+ "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
+ [(set _.KRC:$dst, (and _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)))))],
+ IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
+ }
+}
-defm VPCMPGTDZ : avx512_icmp_packed<0x66, "vpcmpgtd", VK16, VR512, i512mem,
- memopv16i32, X86pcmpgtm, v16i32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-defm VPCMPGTQZ : avx512_icmp_packed<0x37, "vpcmpgtq", VK8, VR512, i512mem,
- memopv8i64, X86pcmpgtm, v8i64>, T8PD, EVEX_V512,
- VEX_W, EVEX_CD8<64, CD8VF>;
+multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512>,
+ EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256>,
+ EVEX_V256;
+ defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128>,
+ EVEX_V128;
+ }
+}
+
+multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, AVX512VLVectorVTInfo VTInfo,
+ Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
+ EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
+ EVEX_V256;
+ defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
+ EVEX_V128;
+ }
+}
+
+defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
+ avx512vl_i8_info, HasBWI>,
+ EVEX_CD8<8, CD8VF>;
+
+defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
+ avx512vl_i16_info, HasBWI>,
+ EVEX_CD8<16, CD8VF>;
+
+defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
+ avx512vl_i32_info, HasAVX512>,
+ EVEX_CD8<32, CD8VF>;
+
+defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
+ avx512vl_i64_info, HasAVX512>,
+ T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
+ avx512vl_i8_info, HasBWI>,
+ EVEX_CD8<8, CD8VF>;
+
+defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
+ avx512vl_i16_info, HasBWI>,
+ EVEX_CD8<16, CD8VF>;
+
+defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
+ avx512vl_i32_info, HasAVX512>,
+ EVEX_CD8<32, CD8VF>;
+
+defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
+ avx512vl_i64_info, HasAVX512>,
+ T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
- (COPY_TO_REGCLASS (VPCMPGTDZrr
+ (COPY_TO_REGCLASS (VPCMPGTDZrr
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
- (COPY_TO_REGCLASS (VPCMPEQDZrr
+ (COPY_TO_REGCLASS (VPCMPEQDZrr
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
-multiclass avx512_icmp_cc<bits<8> opc, RegisterClass WMRC, RegisterClass KRC,
- RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
- SDNode OpNode, ValueType vt, Operand CC, string Suffix> {
+multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
+ X86VectorVTInfo _> {
def rri : AVX512AIi8<opc, MRMSrcReg,
- (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc),
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
!strconcat("vpcmp${cc}", Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))],
+ [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ imm:$cc))],
IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+ let mayLoad = 1 in
def rmi : AVX512AIi8<opc, MRMSrcMem,
- (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc),
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
!strconcat("vpcmp${cc}", Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2),
- imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ imm:$cc))],
+ IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ def rrik : AVX512AIi8<opc, MRMSrcReg,
+ (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
+ AVXCC:$cc),
+ !strconcat("vpcmp${cc}", Suffix,
+ "\t{$src2, $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, $src2}"),
+ [(set _.KRC:$dst, (and _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ imm:$cc)))],
+ IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
+ let mayLoad = 1 in
+ def rmik : AVX512AIi8<opc, MRMSrcMem,
+ (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
+ AVXCC:$cc),
+ !strconcat("vpcmp${cc}", Suffix,
+ "\t{$src2, $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, $src2}"),
+ [(set _.KRC:$dst, (and _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ imm:$cc)))],
+ IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
+
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def rri_alt : AVX512AIi8<opc, MRMSrcReg,
- (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
- !strconcat("vpcmp", Suffix,
- "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, i8imm:$cc),
+ !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
+ "$dst, $src1, $src2, $cc}"),
[], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+ def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, i8imm:$cc),
+ !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
+ "$dst, $src1, $src2, $cc}"),
+ [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
- (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, RC:$src2, i8imm:$cc),
+ (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
+ i8imm:$cc),
!strconcat("vpcmp", Suffix,
- "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"),
+ "\t{$cc, $src2, $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, $src2, $cc}"),
[], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
- def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
- (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
- !strconcat("vpcmp", Suffix,
- "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
- [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
- (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, x86memop:$src2, i8imm:$cc),
+ (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
+ i8imm:$cc),
!strconcat("vpcmp", Suffix,
- "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"),
+ "\t{$cc, $src2, $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, $src2, $cc}"),
[], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
}
}
-defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16WM, VK16, VR512, i512mem, memopv16i32,
- X86cmpm, v16i32, AVXCC, "d">,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16WM, VK16, VR512, i512mem, memopv16i32,
- X86cmpmu, v16i32, AVXCC, "ud">,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
+multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
+ X86VectorVTInfo _> :
+ avx512_icmp_cc<opc, Suffix, OpNode, _> {
+ let mayLoad = 1 in {
+ def rmib : AVX512AIi8<opc, MRMSrcMem,
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
+ AVXCC:$cc),
+ !strconcat("vpcmp${cc}", Suffix,
+ "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
+ "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
+ [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
+ imm:$cc))],
+ IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
+ def rmibk : AVX512AIi8<opc, MRMSrcMem,
+ (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
+ _.ScalarMemOp:$src2, AVXCC:$cc),
+ !strconcat("vpcmp${cc}", Suffix,
+ "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
+ [(set _.KRC:$dst, (and _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
+ imm:$cc)))],
+ IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
+ }
+
+ // Accept explicit immediate argument form instead of comparison code.
+ let isAsmParserOnly = 1, hasSideEffects = 0 in {
+ def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
+ i8imm:$cc),
+ !strconcat("vpcmp", Suffix,
+ "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
+ "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
+ [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
+ def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
+ (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
+ _.ScalarMemOp:$src2, i8imm:$cc),
+ !strconcat("vpcmp", Suffix,
+ "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
+ [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
+ }
+}
+
+multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info512>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info256>, EVEX_V256;
+ defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info128>, EVEX_V128;
+ }
+}
-defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8WM, VK8, VR512, i512mem, memopv8i64,
- X86cmpm, v8i64, AVXCC, "q">,
- VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8WM, VK8, VR512, i512mem, memopv8i64,
- X86cmpmu, v8i64, AVXCC, "uq">,
- VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info512>,
+ EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info256>,
+ EVEX_V256;
+ defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info128>,
+ EVEX_V128;
+ }
+}
+
+defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, avx512vl_i8_info,
+ HasBWI>, EVEX_CD8<8, CD8VF>;
+defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, avx512vl_i8_info,
+ HasBWI>, EVEX_CD8<8, CD8VF>;
+
+defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, avx512vl_i16_info,
+ HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
+defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, avx512vl_i16_info,
+ HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
+
+defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, avx512vl_i32_info,
+ HasAVX512>, EVEX_CD8<32, CD8VF>;
+defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, avx512vl_i32_info,
+ HasAVX512>, EVEX_CD8<32, CD8VF>;
+
+defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
+ HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
+ HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
// avx512_cmp_packed - compare packed instructions
multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
@@ -950,17 +1526,17 @@ multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
def rri : AVX512PIi8<0xC2, MRMSrcReg,
(outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
!strconcat("vcmp${cc}", suffix,
- " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
def rrib: AVX512PIi8<0xC2, MRMSrcReg,
(outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
!strconcat("vcmp${cc}", suffix,
- " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
+ "\t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
[], d>, EVEX_B;
def rmi : AVX512PIi8<0xC2, MRMSrcMem,
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
!strconcat("vcmp${cc}", suffix,
- " \t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set KRC:$dst,
(X86cmpm (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
@@ -969,11 +1545,11 @@ multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
(outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
!strconcat("vcmp", suffix,
- " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
+ "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
!strconcat("vcmp", suffix,
- " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
+ "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
}
}
@@ -1001,25 +1577,25 @@ def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
imm:$cc), VK8)>;
def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
- (v16f32 VR512:$src2), imm:$cc, (i16 -1),
+ (v16f32 VR512:$src2), i32immZExt5:$cc, (i16 -1),
FROUND_NO_EXC)),
(COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
(I8Imm imm:$cc)), GR16)>;
-
+
def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
- (v8f64 VR512:$src2), imm:$cc, (i8 -1),
+ (v8f64 VR512:$src2), i32immZExt5:$cc, (i8 -1),
FROUND_NO_EXC)),
(COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
(I8Imm imm:$cc)), GR8)>;
def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
- (v16f32 VR512:$src2), imm:$cc, (i16 -1),
+ (v16f32 VR512:$src2), i32immZExt5:$cc, (i16 -1),
FROUND_CURRENT)),
(COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2,
(I8Imm imm:$cc)), GR16)>;
def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
- (v8f64 VR512:$src2), imm:$cc, (i8 -1),
+ (v8f64 VR512:$src2), i32immZExt5:$cc, (i8 -1),
FROUND_CURRENT)),
(COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2,
(I8Imm imm:$cc)), GR8)>;
@@ -1031,17 +1607,17 @@ def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
//
multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
string OpcodeStr, RegisterClass KRC,
- ValueType vt, X86MemOperand x86memop> {
+ ValueType vvt, ValueType ivt, X86MemOperand x86memop> {
let hasSideEffects = 0 in {
def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
let mayLoad = 1 in
def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
- [(set KRC:$dst, (vt (load addr:$src)))]>;
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set KRC:$dst, (vvt (bitconvert (ivt (load addr:$src)))))]>;
let mayStore = 1 in
def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
}
}
@@ -1050,38 +1626,88 @@ multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
RegisterClass KRC, RegisterClass GRC> {
let hasSideEffects = 0 in {
def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
}
}
-let Predicates = [HasAVX512] in {
- defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
- VEX, PS;
- defm KMOVW : avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
+let Predicates = [HasDQI] in
+ defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8,
+ i8mem>,
+ avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
+ VEX, PD;
+
+let Predicates = [HasAVX512] in
+ defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16,
+ i16mem>,
+ avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
VEX, PS;
+
+let Predicates = [HasBWI] in {
+ defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1, i32,
+ i32mem>, VEX, PD, VEX_W;
+ defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
+ VEX, XD;
+}
+
+let Predicates = [HasBWI] in {
+ defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64,
+ i64mem>, VEX, PS, VEX_W;
+ defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
+ VEX, XD, VEX_W;
}
+// GR from/to mask register
+let Predicates = [HasDQI] in {
+ def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
+ (KMOVBkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit))>;
+ def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
+ (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>;
+}
let Predicates = [HasAVX512] in {
- // GR16 from/to 16-bit mask
def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
(KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>;
def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
(EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>;
+}
+let Predicates = [HasBWI] in {
+ def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>;
+ def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), (KMOVDrk VK32:$src)>;
+}
+let Predicates = [HasBWI] in {
+ def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), (KMOVQkr GR64:$src)>;
+ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), (KMOVQrk VK64:$src)>;
+}
- // Store kreg in memory
- def : Pat<(store (v16i1 VK16:$src), addr:$dst),
+// Load/store kreg
+let Predicates = [HasDQI] in {
+ def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
+ (KMOVBmk addr:$dst, VK8:$src)>;
+}
+let Predicates = [HasAVX512] in {
+ def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
(KMOVWmk addr:$dst, VK16:$src)>;
-
- def : Pat<(store VK8:$src, addr:$dst),
+ def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
(KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
-
def : Pat<(i1 (load addr:$src)),
(COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
-
- def : Pat<(v8i1 (load addr:$src)),
+ def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
(COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
+}
+let Predicates = [HasBWI] in {
+ def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
+ (KMOVDmk addr:$dst, VK32:$src)>;
+}
+let Predicates = [HasBWI] in {
+ def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
+ (KMOVQmk addr:$dst, VK64:$src)>;
+}
+
+let Predicates = [HasAVX512] in {
+ def : Pat<(i1 (trunc (i64 GR64:$src))),
+ (COPY_TO_REGCLASS (KMOVWkr (AND32ri (EXTRACT_SUBREG $src, sub_32bit),
+ (i32 1))), VK1)>;
def : Pat<(i1 (trunc (i32 GR32:$src))),
(COPY_TO_REGCLASS (KMOVWkr (AND32ri $src, (i32 1))), VK1)>;
@@ -1094,7 +1720,7 @@ let Predicates = [HasAVX512] in {
(COPY_TO_REGCLASS
(KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))),
VK1)>;
-
+
def : Pat<(i32 (zext VK1:$src)),
(AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
def : Pat<(i8 (zext VK1:$src)),
@@ -1113,6 +1739,14 @@ let Predicates = [HasAVX512] in {
def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
(COPY_TO_REGCLASS VK1:$src, VK8)>;
}
+let Predicates = [HasBWI] in {
+ def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK32)>;
+ def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK64)>;
+}
+
+
// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
let Predicates = [HasAVX512] in {
// GR from/to 8-bit mask without native support
@@ -1129,26 +1763,38 @@ let Predicates = [HasAVX512] in {
(COPY_TO_REGCLASS VK16:$src, VK1)>;
def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK8:$src, VK1)>;
-
+}
+let Predicates = [HasBWI] in {
+ def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK32:$src, VK1)>;
+ def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK64:$src, VK1)>;
}
// Mask unary operation
// - KNOT
multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
- RegisterClass KRC, SDPatternOperator OpNode> {
- let Predicates = [HasAVX512] in
+ RegisterClass KRC, SDPatternOperator OpNode,
+ Predicate prd> {
+ let Predicates = [prd] in
def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set KRC:$dst, (OpNode KRC:$src))]>;
}
-multiclass avx512_mask_unop_w<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode> {
- defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
- VEX, PS;
+multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
+ SDPatternOperator OpNode> {
+ defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
+ HasDQI>, VEX, PD;
+ defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
+ HasAVX512>, VEX, PS;
+ defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
+ HasBWI>, VEX, PD, VEX_W;
+ defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
+ HasBWI>, VEX, PS, VEX_W;
}
-defm KNOT : avx512_mask_unop_w<0x44, "knot", not>;
+defm KNOT : avx512_mask_unop_all<0x44, "knot", not>;
multiclass avx512_mask_unop_int<string IntName, string InstName> {
let Predicates = [HasAVX512] in
@@ -1159,43 +1805,60 @@ multiclass avx512_mask_unop_int<string IntName, string InstName> {
}
defm : avx512_mask_unop_int<"knot", "KNOT">;
+let Predicates = [HasDQI] in
+def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>;
+let Predicates = [HasAVX512] in
def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
+let Predicates = [HasBWI] in
+def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>;
+let Predicates = [HasBWI] in
+def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>;
+
+// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
+let Predicates = [HasAVX512] in {
def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)),
(COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
-// With AVX-512, 8-bit mask is promoted to 16-bit mask.
def : Pat<(not VK8:$src),
(COPY_TO_REGCLASS
(KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
+}
// Mask binary operation
// - KAND, KANDN, KOR, KXNOR, KXOR
multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
- RegisterClass KRC, SDPatternOperator OpNode> {
- let Predicates = [HasAVX512] in
+ RegisterClass KRC, SDPatternOperator OpNode,
+ Predicate prd> {
+ let Predicates = [prd] in
def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
}
-multiclass avx512_mask_binop_w<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode> {
- defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
- VEX_4V, VEX_L, PS;
+multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
+ SDPatternOperator OpNode> {
+ defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
+ HasDQI>, VEX_4V, VEX_L, PD;
+ defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
+ HasAVX512>, VEX_4V, VEX_L, PS;
+ defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
+ HasBWI>, VEX_4V, VEX_L, VEX_W, PD;
+ defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
+ HasBWI>, VEX_4V, VEX_L, VEX_W, PS;
}
def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
let isCommutable = 1 in {
- defm KAND : avx512_mask_binop_w<0x41, "kand", and>;
- let isCommutable = 0 in
- defm KANDN : avx512_mask_binop_w<0x42, "kandn", andn>;
- defm KOR : avx512_mask_binop_w<0x45, "kor", or>;
- defm KXNOR : avx512_mask_binop_w<0x46, "kxnor", xnor>;
- defm KXOR : avx512_mask_binop_w<0x47, "kxor", xor>;
+ defm KAND : avx512_mask_binop_all<0x41, "kand", and>;
+ defm KOR : avx512_mask_binop_all<0x45, "kor", or>;
+ defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor>;
+ defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor>;
}
+let isCommutable = 0 in
+ defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn>;
def : Pat<(xor VK1:$src1, VK1:$src2),
(COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
@@ -1245,7 +1908,7 @@ multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX512] in
def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
}
multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> {
@@ -1274,7 +1937,7 @@ multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
SDNode OpNode> {
let Predicates = [HasAVX512], Defs = [EFLAGS] in
def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1|$src1, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
}
@@ -1295,7 +1958,7 @@ multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
let Predicates = [HasAVX512] in
def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, i8imm:$imm),
!strconcat(OpcodeStr,
- " \t{$imm, $src, $dst|$dst, $src, $imm}"),
+ "\t{$imm, $src, $dst|$dst, $src, $imm}"),
[(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
}
@@ -1341,6 +2004,17 @@ def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
(v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
+let Predicates = [HasVLX] in {
+ def : Pat<(v8i1 (insert_subvector undef, (v4i1 VK4:$src), (iPTR 0))),
+ (v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>;
+ def : Pat<(v8i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
+ (v8i1 (COPY_TO_REGCLASS VK2:$src, VK8))>;
+ def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
+ (v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>;
+ def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
+ (v2i1 (COPY_TO_REGCLASS VK8:$src, VK2))>;
+}
+
def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
(v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
@@ -1350,104 +2024,176 @@ def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
// AVX-512 - Aligned and unaligned load and store
//
-multiclass avx512_load<bits<8> opc, RegisterClass RC, RegisterClass KRC,
- X86MemOperand x86memop, PatFrag ld_frag,
- string asm, Domain d,
- ValueType vt, bit IsReMaterializable = 1> {
+multiclass avx512_load<bits<8> opc, string OpcodeStr, PatFrag ld_frag,
+ RegisterClass KRC, RegisterClass RC,
+ ValueType vt, ValueType zvt, X86MemOperand memop,
+ Domain d, bit IsReMaterializable = 1> {
let hasSideEffects = 0 in {
def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(asm, " \t{$src, $dst|$dst, $src}"), [], d>,
- EVEX;
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
+ d>, EVEX;
def rrkz : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
- !strconcat(asm,
- " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- [], d>, EVEX, EVEX_KZ;
+ !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
+ "${dst} {${mask}} {z}, $src}"), [], d>, EVEX, EVEX_KZ;
}
- let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
- def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
- [(set (vt RC:$dst), (ld_frag addr:$src))], d>, EVEX;
- let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
- def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, KRC:$mask, RC:$src2),
- !strconcat(asm,
- " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
- EVEX, EVEX_K;
- let mayLoad = 1 in
- def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, KRC:$mask, x86memop:$src2),
- !strconcat(asm,
- " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
- [], d>, EVEX, EVEX_K;
+ let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
+ SchedRW = [WriteLoad] in
+ def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (vt (bitconvert (ld_frag addr:$src))))],
+ d>, EVEX;
+
+ let AddedComplexity = 20 in {
+ let Constraints = "$src0 = $dst", hasSideEffects = 0 in {
+ let hasSideEffects = 0 in
+ def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src0, KRC:$mask, RC:$src1),
+ !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
+ "${dst} {${mask}}, $src1}"),
+ [(set RC:$dst, (vt (vselect KRC:$mask,
+ (vt RC:$src1),
+ (vt RC:$src0))))],
+ d>, EVEX, EVEX_K;
+ let mayLoad = 1, SchedRW = [WriteLoad] in
+ def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src0, KRC:$mask, memop:$src1),
+ !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
+ "${dst} {${mask}}, $src1}"),
+ [(set RC:$dst, (vt
+ (vselect KRC:$mask,
+ (vt (bitconvert (ld_frag addr:$src1))),
+ (vt RC:$src0))))],
+ d>, EVEX, EVEX_K;
+ }
+ let mayLoad = 1, SchedRW = [WriteLoad] in
+ def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins KRC:$mask, memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
+ "${dst} {${mask}} {z}, $src}"),
+ [(set RC:$dst, (vt
+ (vselect KRC:$mask,
+ (vt (bitconvert (ld_frag addr:$src))),
+ (vt (bitconvert (zvt immAllZerosV))))))],
+ d>, EVEX, EVEX_KZ;
+ }
+}
+
+multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, string ld_pat,
+ string elty, string elsz, string vsz512,
+ string vsz256, string vsz128, Domain d,
+ Predicate prd, bit IsReMaterializable = 1> {
+ let Predicates = [prd] in
+ defm Z : avx512_load<opc, OpcodeStr,
+ !cast<PatFrag>(ld_pat##"v"##vsz512##elty##elsz),
+ !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
+ !cast<ValueType>("v"##vsz512##elty##elsz), v16i32,
+ !cast<X86MemOperand>(elty##"512mem"), d,
+ IsReMaterializable>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_load<opc, OpcodeStr,
+ !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
+ "v"##vsz256##elty##elsz, "v4i64")),
+ !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
+ !cast<ValueType>("v"##vsz256##elty##elsz), v8i32,
+ !cast<X86MemOperand>(elty##"256mem"), d,
+ IsReMaterializable>, EVEX_V256;
+
+ defm Z128 : avx512_load<opc, OpcodeStr,
+ !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
+ "v"##vsz128##elty##elsz, "v2i64")),
+ !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
+ !cast<ValueType>("v"##vsz128##elty##elsz), v4i32,
+ !cast<X86MemOperand>(elty##"128mem"), d,
+ IsReMaterializable>, EVEX_V128;
}
- let mayLoad = 1 in
- def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, x86memop:$src2),
- !strconcat(asm,
- " \t{$src2, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src2}"),
- [], d>, EVEX, EVEX_KZ;
}
-multiclass avx512_store<bits<8> opc, RegisterClass RC, RegisterClass KRC,
- X86MemOperand x86memop, PatFrag store_frag,
- string asm, Domain d, ValueType vt> {
+
+multiclass avx512_store<bits<8> opc, string OpcodeStr, PatFrag st_frag,
+ ValueType OpVT, RegisterClass KRC, RegisterClass RC,
+ X86MemOperand memop, Domain d> {
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def rr_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(asm, " \t{$src, $dst|$dst, $src}"), [], d>,
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], d>,
EVEX;
let Constraints = "$src1 = $dst" in
- def alt_rrk : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
- (ins RC:$src1, KRC:$mask, RC:$src2),
- !strconcat(asm,
- " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
+ def rrk_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, RC:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
EVEX, EVEX_K;
- def alt_rrkz : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
+ def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
(ins KRC:$mask, RC:$src),
- !strconcat(asm,
- " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
+ !strconcat(OpcodeStr,
+ "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
[], d>, EVEX, EVEX_KZ;
}
let mayStore = 1 in {
- def mr : AVX512PI<opc, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
- !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
- [(store_frag (vt RC:$src), addr:$dst)], d>, EVEX;
+ def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(st_frag (OpVT RC:$src), addr:$dst)], d>, EVEX;
def mrk : AVX512PI<opc, MRMDestMem, (outs),
- (ins x86memop:$dst, KRC:$mask, RC:$src),
- !strconcat(asm,
- " \t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
+ (ins memop:$dst, KRC:$mask, RC:$src),
+ !strconcat(OpcodeStr,
+ "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
[], d>, EVEX, EVEX_K;
- def mrkz : AVX512PI<opc, MRMDestMem, (outs),
- (ins x86memop:$dst, KRC:$mask, RC:$src),
- !strconcat(asm,
- " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- [], d>, EVEX, EVEX_KZ;
}
}
-defm VMOVAPSZ : avx512_load<0x28, VR512, VK16WM, f512mem, alignedloadv16f32,
- "vmovaps", SSEPackedSingle, v16f32>,
- avx512_store<0x29, VR512, VK16WM, f512mem, alignedstore512,
- "vmovaps", SSEPackedSingle, v16f32>,
- PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VMOVAPDZ : avx512_load<0x28, VR512, VK8WM, f512mem, alignedloadv8f64,
- "vmovapd", SSEPackedDouble, v8f64>,
- avx512_store<0x29, VR512, VK8WM, f512mem, alignedstore512,
- "vmovapd", SSEPackedDouble, v8f64>,
- PD, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
-defm VMOVUPSZ : avx512_load<0x10, VR512, VK16WM, f512mem, loadv16f32,
- "vmovups", SSEPackedSingle, v16f32>,
- avx512_store<0x11, VR512, VK16WM, f512mem, store,
- "vmovups", SSEPackedSingle, v16f32>,
- PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VMOVUPDZ : avx512_load<0x10, VR512, VK8WM, f512mem, loadv8f64,
- "vmovupd", SSEPackedDouble, v8f64, 0>,
- avx512_store<0x11, VR512, VK8WM, f512mem, store,
- "vmovupd", SSEPackedDouble, v8f64>,
- PD, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
+
+multiclass avx512_store_vl<bits<8> opc, string OpcodeStr, string st_pat,
+ string st_suff_512, string st_suff_256,
+ string st_suff_128, string elty, string elsz,
+ string vsz512, string vsz256, string vsz128,
+ Domain d, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_512),
+ !cast<ValueType>("v"##vsz512##elty##elsz),
+ !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
+ !cast<X86MemOperand>(elty##"512mem"), d>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_256),
+ !cast<ValueType>("v"##vsz256##elty##elsz),
+ !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
+ !cast<X86MemOperand>(elty##"256mem"), d>, EVEX_V256;
+
+ defm Z128 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_128),
+ !cast<ValueType>("v"##vsz128##elty##elsz),
+ !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
+ !cast<X86MemOperand>(elty##"128mem"), d>, EVEX_V128;
+ }
+}
+
+defm VMOVAPS : avx512_load_vl<0x28, "vmovaps", "alignedload", "f", "32",
+ "16", "8", "4", SSEPackedSingle, HasAVX512>,
+ avx512_store_vl<0x29, "vmovaps", "alignedstore",
+ "512", "256", "", "f", "32", "16", "8", "4",
+ SSEPackedSingle, HasAVX512>,
+ PS, EVEX_CD8<32, CD8VF>;
+
+defm VMOVAPD : avx512_load_vl<0x28, "vmovapd", "alignedload", "f", "64",
+ "8", "4", "2", SSEPackedDouble, HasAVX512>,
+ avx512_store_vl<0x29, "vmovapd", "alignedstore",
+ "512", "256", "", "f", "64", "8", "4", "2",
+ SSEPackedDouble, HasAVX512>,
+ PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VMOVUPS : avx512_load_vl<0x10, "vmovups", "load", "f", "32",
+ "16", "8", "4", SSEPackedSingle, HasAVX512>,
+ avx512_store_vl<0x11, "vmovups", "store", "", "", "", "f", "32",
+ "16", "8", "4", SSEPackedSingle, HasAVX512>,
+ PS, EVEX_CD8<32, CD8VF>;
+
+defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", "load", "f", "64",
+ "8", "4", "2", SSEPackedDouble, HasAVX512, 0>,
+ avx512_store_vl<0x11, "vmovupd", "store", "", "", "", "f", "64",
+ "8", "4", "2", SSEPackedDouble, HasAVX512>,
+ PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
- (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
+ (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
(VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
@@ -1463,75 +2209,160 @@ def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
(VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
VR512:$src)>;
-defm VMOVDQA32: avx512_load<0x6F, VR512, VK16WM, i512mem, alignedloadv16i32,
- "vmovdqa32", SSEPackedInt, v16i32>,
- avx512_store<0x7F, VR512, VK16WM, i512mem, alignedstore512,
- "vmovdqa32", SSEPackedInt, v16i32>,
- PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VMOVDQA64: avx512_load<0x6F, VR512, VK8WM, i512mem, alignedloadv8i64,
- "vmovdqa64", SSEPackedInt, v8i64>,
- avx512_store<0x7F, VR512, VK8WM, i512mem, alignedstore512,
- "vmovdqa64", SSEPackedInt, v8i64>,
- PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-defm VMOVDQU32: avx512_load<0x6F, VR512, VK16WM, i512mem, load,
- "vmovdqu32", SSEPackedInt, v16i32>,
- avx512_store<0x7F, VR512, VK16WM, i512mem, store,
- "vmovdqu32", SSEPackedInt, v16i32>,
- XS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VMOVDQU64: avx512_load<0x6F, VR512, VK8WM, i512mem, load,
- "vmovdqu64", SSEPackedInt, v8i64>,
- avx512_store<0x7F, VR512, VK8WM, i512mem, store,
- "vmovdqu64", SSEPackedInt, v8i64>,
- XS, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)),
+ (VMOVUPSZmrk addr:$ptr,
+ (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)),
+ (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>;
+
+def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, undef)),
+ (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz
+ (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
+
+def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src)),
+ (VMOVUPSZmrk addr:$ptr, VK16WM:$mask, VR512:$src)>;
+
+def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src)),
+ (VMOVUPDZmrk addr:$ptr, VK8WM:$mask, VR512:$src)>;
+
+def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, undef)),
+ (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>;
+
+def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask,
+ (bc_v16f32 (v16i32 immAllZerosV)))),
+ (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>;
+
+def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src0))),
+ (VMOVUPSZrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>;
+
+def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, undef)),
+ (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>;
+
+def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask,
+ (bc_v8f64 (v16i32 immAllZerosV)))),
+ (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>;
+
+def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src0))),
+ (VMOVUPDZrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>;
+
+def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src0))),
+ (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmk
+ (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src0, sub_ymm),
+ (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
+
+defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32",
+ "16", "8", "4", SSEPackedInt, HasAVX512>,
+ avx512_store_vl<0x7F, "vmovdqa32", "alignedstore",
+ "512", "256", "", "i", "32", "16", "8", "4",
+ SSEPackedInt, HasAVX512>,
+ PD, EVEX_CD8<32, CD8VF>;
+
+defm VMOVDQA64 : avx512_load_vl<0x6F, "vmovdqa64", "alignedload", "i", "64",
+ "8", "4", "2", SSEPackedInt, HasAVX512>,
+ avx512_store_vl<0x7F, "vmovdqa64", "alignedstore",
+ "512", "256", "", "i", "64", "8", "4", "2",
+ SSEPackedInt, HasAVX512>,
+ PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", "load", "i", "8",
+ "64", "32", "16", SSEPackedInt, HasBWI>,
+ avx512_store_vl<0x7F, "vmovdqu8", "store", "", "", "",
+ "i", "8", "64", "32", "16", SSEPackedInt,
+ HasBWI>, XD, EVEX_CD8<8, CD8VF>;
+
+defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", "load", "i", "16",
+ "32", "16", "8", SSEPackedInt, HasBWI>,
+ avx512_store_vl<0x7F, "vmovdqu16", "store", "", "", "",
+ "i", "16", "32", "16", "8", SSEPackedInt,
+ HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
+
+defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", "load", "i", "32",
+ "16", "8", "4", SSEPackedInt, HasAVX512>,
+ avx512_store_vl<0x7F, "vmovdqu32", "store", "", "", "",
+ "i", "32", "16", "8", "4", SSEPackedInt,
+ HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
+
+defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", "load", "i", "64",
+ "8", "4", "2", SSEPackedInt, HasAVX512>,
+ avx512_store_vl<0x7F, "vmovdqu64", "store", "", "", "",
+ "i", "64", "8", "4", "2", SSEPackedInt,
+ HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
(v16i32 immAllZerosV), GR16:$mask)),
- (VMOVDQU32rmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
+ (VMOVDQU32Zrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
- (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
- (VMOVDQU64rmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
+ (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
+ (VMOVDQU64Zrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
def: Pat<(int_x86_avx512_mask_storeu_d_512 addr:$ptr, (v16i32 VR512:$src),
- GR16:$mask),
- (VMOVDQU32mrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
+ GR16:$mask),
+ (VMOVDQU32Zmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
VR512:$src)>;
def: Pat<(int_x86_avx512_mask_storeu_q_512 addr:$ptr, (v8i64 VR512:$src),
- GR8:$mask),
- (VMOVDQU64mrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
+ GR8:$mask),
+ (VMOVDQU64Zmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
VR512:$src)>;
let AddedComplexity = 20 in {
def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
- (bc_v8i64 (v16i32 immAllZerosV)))),
- (VMOVDQU64rrkz VK8WM:$mask, VR512:$src)>;
+ (bc_v8i64 (v16i32 immAllZerosV)))),
+ (VMOVDQU64Zrrkz VK8WM:$mask, VR512:$src)>;
def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
- (v8i64 VR512:$src))),
- (VMOVDQU64rrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
+ (v8i64 VR512:$src))),
+ (VMOVDQU64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
VK8), VR512:$src)>;
def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
(v16i32 immAllZerosV))),
- (VMOVDQU32rrkz VK16WM:$mask, VR512:$src)>;
+ (VMOVDQU32Zrrkz VK16WM:$mask, VR512:$src)>;
def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
- (v16i32 VR512:$src))),
- (VMOVDQU32rrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
-
-def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1),
- (v16f32 VR512:$src2))),
- (VMOVUPSZrrk VR512:$src2, VK16WM:$mask, VR512:$src1)>;
-def : Pat<(v8f64 (vselect VK8WM:$mask, (v8f64 VR512:$src1),
- (v8f64 VR512:$src2))),
- (VMOVUPDZrrk VR512:$src2, VK8WM:$mask, VR512:$src1)>;
-def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src1),
- (v16i32 VR512:$src2))),
- (VMOVDQU32rrk VR512:$src2, VK16WM:$mask, VR512:$src1)>;
-def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src1),
- (v8i64 VR512:$src2))),
- (VMOVDQU64rrk VR512:$src2, VK8WM:$mask, VR512:$src1)>;
+ (v16i32 VR512:$src))),
+ (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
}
+
+def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 immAllZerosV))),
+ (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>;
+
+def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, undef)),
+ (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>;
+
+def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src0))),
+ (VMOVDQU32Zrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>;
+
+def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask,
+ (bc_v8i64 (v16i32 immAllZerosV)))),
+ (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>;
+
+def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, undef)),
+ (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>;
+
+def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src0))),
+ (VMOVDQU64Zrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>;
+
+def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src)),
+ (VMOVDQU32Zmrk addr:$ptr, VK16WM:$mask, VR512:$src)>;
+
+def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src)),
+ (VMOVDQU64Zmrk addr:$ptr, VK8WM:$mask, VR512:$src)>;
+
+// SKX replacement
+def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)),
+ (VMOVDQU32Z256mrk addr:$ptr, VK8WM:$mask, VR256:$src)>;
+
+// KNL replacement
+def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)),
+ (VMOVDQU32Zmrk addr:$ptr,
+ (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)),
+ (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>;
+
+def: Pat<(v8i32 (masked_load addr:$ptr, VK8WM:$mask, undef)),
+ (v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz
+ (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
+
+
// Move Int Doubleword to Packed Double Int
//
def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
@@ -1638,12 +2469,12 @@ def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
// AVX-512 MOVSS, MOVSD
//===----------------------------------------------------------------------===//
-multiclass avx512_move_scalar <string asm, RegisterClass RC,
+multiclass avx512_move_scalar <string asm, RegisterClass RC,
SDNode OpNode, ValueType vt,
X86MemOperand x86memop, PatFrag mem_pat> {
let hasSideEffects = 0 in {
- def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
- !strconcat(asm, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128X:$dst, (vt (OpNode VR128X:$src1,
(scalar_to_vector RC:$src2))))],
IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
@@ -1651,16 +2482,22 @@ multiclass avx512_move_scalar <string asm, RegisterClass RC,
def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
!strconcat(asm,
- " \t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
+ "\t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
[], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
EVEX, VEX_LIG;
+ let mayStore = 1 in {
def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
- !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
EVEX, VEX_LIG;
+ def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src),
+ !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
+ [], IIC_SSE_MOV_S_MR>,
+ EVEX, VEX_LIG, EVEX_K;
+ } // mayStore
} //hasSideEffects = 0
}
@@ -1680,6 +2517,10 @@ def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
(COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
+def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
+ (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
+ (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+
// For the disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
@@ -1710,7 +2551,7 @@ let Predicates = [HasAVX512] in {
// Move low f32 and clear high bits.
def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSZrr (v4f32 (V_SET0)),
+ (VMOVSSZrr (v4f32 (V_SET0)),
(EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
(SUBREG_TO_REG (i32 0),
@@ -1839,7 +2680,7 @@ let AddedComplexity = 15 in
def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
- [(set VR128X:$dst, (v2i64 (X86vzmovl
+ [(set VR128X:$dst, (v2i64 (X86vzmovl
(v2i64 VR128X:$src))))],
IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
@@ -1861,7 +2702,7 @@ let Predicates = [HasAVX512] in {
(VMOV64toPQIZrr GR64:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
(VMOVDI2PDIZrr GR32:$src)>;
-
+
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
(VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
@@ -1898,136 +2739,201 @@ def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
//===----------------------------------------------------------------------===//
// AVX-512 - Non-temporals
//===----------------------------------------------------------------------===//
+let SchedRW = [WriteLoad] in {
+ def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
+ (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
+ [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
+ SSEPackedInt>, EVEX, T8PD, EVEX_V512,
+ EVEX_CD8<64, CD8VF>;
+
+ let Predicates = [HasAVX512, HasVLX] in {
+ def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
+ (ins i256mem:$src),
+ "vmovntdqa\t{$src, $dst|$dst, $src}", [],
+ SSEPackedInt>, EVEX, T8PD, EVEX_V256,
+ EVEX_CD8<64, CD8VF>;
+
+ def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
+ (ins i128mem:$src),
+ "vmovntdqa\t{$src, $dst|$dst, $src}", [],
+ SSEPackedInt>, EVEX, T8PD, EVEX_V128,
+ EVEX_CD8<64, CD8VF>;
+ }
+}
-def VMOVNTDQAZrm : AVX5128I<0x2A, MRMSrcMem, (outs VR512:$dst),
- (ins i512mem:$src),
- "vmovntdqa\t{$src, $dst|$dst, $src}",
- [(set VR512:$dst,
- (int_x86_avx512_movntdqa addr:$src))]>,
- EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
-
-// Prefer non-temporal over temporal versions
-let AddedComplexity = 400, SchedRW = [WriteStore] in {
-
-def VMOVNTPSZmr : AVX512PSI<0x2B, MRMDestMem, (outs),
- (ins f512mem:$dst, VR512:$src),
- "vmovntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v16f32 VR512:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>,
- EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-def VMOVNTPDZmr : AVX512PDI<0x2B, MRMDestMem, (outs),
- (ins f512mem:$dst, VR512:$src),
- "vmovntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v8f64 VR512:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>,
- EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-
-def VMOVNTDQZmr : AVX512BI<0xE7, MRMDestMem, (outs),
- (ins i512mem:$dst, VR512:$src),
- "vmovntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v8i64 VR512:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>,
- EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
+multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
+ ValueType OpVT, RegisterClass RC, X86MemOperand memop,
+ Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
+ let SchedRW = [WriteStore], mayStore = 1,
+ AddedComplexity = 400 in
+ def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
}
+multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
+ string elty, string elsz, string vsz512,
+ string vsz256, string vsz128, Domain d,
+ Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
+ let Predicates = [prd] in
+ defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
+ !cast<ValueType>("v"##vsz512##elty##elsz), VR512,
+ !cast<X86MemOperand>(elty##"512mem"), d, itin>,
+ EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
+ !cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
+ !cast<X86MemOperand>(elty##"256mem"), d, itin>,
+ EVEX_V256;
+
+ defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
+ !cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
+ !cast<X86MemOperand>(elty##"128mem"), d, itin>,
+ EVEX_V128;
+ }
+}
+
+defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
+ "i", "64", "8", "4", "2", SSEPackedInt,
+ HasAVX512>, PD, EVEX_CD8<64, CD8VF>;
+
+defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
+ "f", "64", "8", "4", "2", SSEPackedDouble,
+ HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
+ "f", "32", "16", "8", "4", SSEPackedSingle,
+ HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
+
//===----------------------------------------------------------------------===//
// AVX-512 - Integer arithmetic
//
multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, RegisterClass KRC,
- RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop, PatFrag scalar_mfrag,
- X86MemOperand x86scalar_mop, string BrdcstStr,
- OpndItins itins, bit IsCommutable = 0> {
- let isCommutable = IsCommutable in
- def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
- itins.rr>, EVEX_4V;
- let AddedComplexity = 30 in {
- let Constraints = "$src0 = $dst" in
- def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
- RC:$src0)))],
- itins.rr>, EVEX_4V, EVEX_K;
- def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
- "|$dst {${mask}} {z}, $src1, $src2}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
- (OpVT immAllZerosV))))],
- itins.rr>, EVEX_4V, EVEX_KZ;
+ X86VectorVTInfo _, OpndItins itins,
+ bit IsCommutable = 0> {
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
+ "", itins.rr, IsCommutable>,
+ AVX512BIBase, EVEX_4V;
+
+ let mayLoad = 1 in
+ defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1,
+ (bitconvert (_.LdFrag addr:$src2)))),
+ "", itins.rm>,
+ AVX512BIBase, EVEX_4V;
+}
+
+multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _, OpndItins itins,
+ bit IsCommutable = 0> :
+ avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
+ let mayLoad = 1 in
+ defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
+ "${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr,
+ (_.VT (OpNode _.RC:$src1,
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)))),
+ "", itins.rm>,
+ AVX512BIBase, EVEX_4V, EVEX_B;
+}
+
+multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo, OpndItins itins,
+ Predicate prd, bit IsCommutable = 0> {
+ let Predicates = [prd] in
+ defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
+ IsCommutable>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
+ IsCommutable>, EVEX_V256;
+ defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
+ IsCommutable>, EVEX_V128;
}
+}
- let mayLoad = 1 in {
- def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))],
- itins.rm>, EVEX_4V;
- let AddedComplexity = 30 in {
- let Constraints = "$src0 = $dst" in
- def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src0, KRC:$mask, RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
- RC:$src0)))],
- itins.rm>, EVEX_4V, EVEX_K;
- def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
- (OpVT immAllZerosV))))],
- itins.rm>, EVEX_4V, EVEX_KZ;
- }
- def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
- ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
- [(set RC:$dst, (OpNode RC:$src1,
- (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))))],
- itins.rm>, EVEX_4V, EVEX_B;
- let AddedComplexity = 30 in {
- let Constraints = "$src0 = $dst" in
- def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src0, KRC:$mask, RC:$src1, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
- ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
- BrdcstStr, "}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode (OpVT RC:$src1),
- (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
- RC:$src0)))],
- itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
- def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
- ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
- BrdcstStr, "}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode (OpVT RC:$src1),
- (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
- (OpVT immAllZerosV))))],
- itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
- }
+multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo, OpndItins itins,
+ Predicate prd, bit IsCommutable = 0> {
+ let Predicates = [prd] in
+ defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
+ IsCommutable>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
+ IsCommutable>, EVEX_V256;
+ defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
+ IsCommutable>, EVEX_V128;
}
}
+multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins, Predicate prd,
+ bit IsCommutable = 0> {
+ defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
+ itins, prd, IsCommutable>,
+ VEX_W, EVEX_CD8<64, CD8VF>;
+}
+
+multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins, Predicate prd,
+ bit IsCommutable = 0> {
+ defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
+ itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
+}
+
+multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins, Predicate prd,
+ bit IsCommutable = 0> {
+ defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
+ itins, prd, IsCommutable>, EVEX_CD8<16, CD8VF>;
+}
+
+multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins, Predicate prd,
+ bit IsCommutable = 0> {
+ defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
+ itins, prd, IsCommutable>, EVEX_CD8<8, CD8VF>;
+}
+
+multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
+ SDNode OpNode, OpndItins itins, Predicate prd,
+ bit IsCommutable = 0> {
+ defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr, OpNode, itins, prd,
+ IsCommutable>;
+
+ defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr, OpNode, itins, prd,
+ IsCommutable>;
+}
+
+multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
+ SDNode OpNode, OpndItins itins, Predicate prd,
+ bit IsCommutable = 0> {
+ defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr, OpNode, itins, prd,
+ IsCommutable>;
+
+ defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr, OpNode, itins, prd,
+ IsCommutable>;
+}
+
+multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
+ bits<8> opc_d, bits<8> opc_q,
+ string OpcodeStr, SDNode OpNode,
+ OpndItins itins, bit IsCommutable = 0> {
+ defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
+ itins, HasAVX512, IsCommutable>,
+ avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
+ itins, HasBWI, IsCommutable>;
+}
+
multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
PatFrag memop_frag, X86MemOperand x86memop,
@@ -2037,73 +2943,64 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
{
def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, EVEX_4V;
def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
(ins KRC:$mask, RC:$src1, RC:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
[], itins.rr>, EVEX_4V, EVEX_K;
def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
(ins KRC:$mask, RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}} {z}" ,
"|$dst {${mask}} {z}, $src1, $src2}"),
[], itins.rr>, EVEX_4V, EVEX_KZ;
}
let mayLoad = 1 in {
def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, EVEX_4V;
def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins KRC:$mask, RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
[], itins.rm>, EVEX_4V, EVEX_K;
def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins KRC:$mask, RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
+ "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
[], itins.rm>, EVEX_4V, EVEX_KZ;
def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
+ !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
[], itins.rm>, EVEX_4V, EVEX_B;
def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
+ !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
BrdcstStr, "}"),
[], itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
+ !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
BrdcstStr, "}"),
[], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
}
}
-defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTALU_ITINS_P, 1>, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
-
-defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTALU_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
+ SSE_INTALU_ITINS_P, 1>;
+defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
+ SSE_INTALU_ITINS_P, 0>;
+defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmull", mul,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
+defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul,
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
+defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul,
+ SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
@@ -2124,41 +3021,33 @@ def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
(v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
(VPMULDQZrr VR512:$src1, VR512:$src2)>;
-defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 1>,
- T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTALU_ITINS_P, 0>,
- T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 1>,
- T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTALU_ITINS_P, 0>,
- T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 1>,
- T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTALU_ITINS_P, 0>,
- T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 1>,
- T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTALU_ITINS_P, 0>,
- T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax,
+ SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
+defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", X86smax,
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
+defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", X86smax,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
+
+defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", X86umax,
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
+defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", X86umax,
+ SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
+defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", X86umax,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
+
+defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", X86smin,
+ SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
+defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", X86smin,
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
+defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", X86smin,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
+
+defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", X86umin,
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
+defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", X86umin,
+ SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
+defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", X86umin,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1),
(v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
@@ -2223,12 +3112,12 @@ multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86MemOperand x86memop> {
def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
IIC_SSE_UNPCK>, EVEX_4V;
def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
(bitconvert (memop_frag addr:$src2)))))],
IIC_SSE_UNPCK>, EVEX_4V;
@@ -2250,19 +3139,19 @@ defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
//
multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
- SDNode OpNode, PatFrag mem_frag,
+ SDNode OpNode, PatFrag mem_frag,
X86MemOperand x86memop, ValueType OpVT> {
def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
(OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
EVEX;
def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst),
(ins x86memop:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
(OpVT (OpNode (mem_frag addr:$src1),
(i8 imm:$src2))))]>, EVEX;
@@ -2271,48 +3160,18 @@ multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32,
i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-let ExeDomain = SSEPackedSingle in
-defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp,
- memopv16f32, i512mem, v16f32>, TAPD, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-let ExeDomain = SSEPackedDouble in
-defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp,
- memopv8f64, i512mem, v8f64>, TAPD, EVEX_V512,
- VEX_W, EVEX_CD8<32, CD8VF>;
-
-def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
- (VPERMILPSZri VR512:$src1, imm:$imm)>;
-def : Pat<(v8i64 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
- (VPERMILPDZri VR512:$src1, imm:$imm)>;
-
//===----------------------------------------------------------------------===//
// AVX-512 Logical Instructions
//===----------------------------------------------------------------------===//
-defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VK16WM, VR512, memopv16i32,
- i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VK8WM, VR512, memopv8i64,
- i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VK16WM, VR512, memopv16i32,
- i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VK8WM, VR512, memopv8i64,
- i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VK16WM, VR512, memopv16i32,
- i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VK8WM, VR512, memopv8i64,
- i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_BIT_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>;
+defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>;
+defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>;
+defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>;
//===----------------------------------------------------------------------===//
// AVX-512 FP arithmetic
@@ -2340,118 +3199,58 @@ defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
}
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
- RegisterClass KRC,
- RegisterClass RC, ValueType vt,
- X86MemOperand x86memop, PatFrag mem_frag,
- X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
- string BrdcstStr,
- Domain d, OpndItins itins, bit commutable> {
- let isCommutable = commutable in {
- def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
- EVEX_4V;
-
- def rrk: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}} |$dst {${mask}}, $src1, $src2}"),
- [], itins.rr, d>, EVEX_4V, EVEX_K;
+ X86VectorVTInfo _, bit IsCommutable> {
+ defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, EVEX_4V;
+ let mayLoad = 1 in {
+ defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, EVEX_4V;
+ defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
+ "${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr,
+ (OpNode _.RC:$src1, (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2))))>,
+ EVEX_4V, EVEX_B;
+ }//let mayLoad = 1
+}
- def rrkz: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
- [], itins.rr, d>, EVEX_4V, EVEX_KZ;
- }
+multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit IsCommutable = 0> {
+ defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
+ IsCommutable>, EVEX_V512, PS,
+ EVEX_CD8<32, CD8VF>;
+ defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
+ IsCommutable>, EVEX_V512, PD, VEX_W,
+ EVEX_CD8<64, CD8VF>;
- let mayLoad = 1 in {
- def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
- itins.rm, d>, EVEX_4V;
-
- def rmb : PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
- ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
- [(set RC:$dst, (OpNode RC:$src1,
- (vt (X86VBroadcast (scalar_mfrag addr:$src2)))))],
- itins.rm, d>, EVEX_4V, EVEX_B;
-
- def rmk : PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [], itins.rm, d>, EVEX_4V, EVEX_K;
-
- def rmkz : PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
- [], itins.rm, d>, EVEX_4V, EVEX_KZ;
-
- def rmbk : PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
- " \t{${src2}", BrdcstStr,
- ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}", BrdcstStr, "}"),
- [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_K;
-
- def rmbkz : PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
- " \t{${src2}", BrdcstStr,
- ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
- BrdcstStr, "}"),
- [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_KZ;
+ // Define only if AVX512VL feature is present.
+ let Predicates = [HasVLX] in {
+ defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
+ IsCommutable>, EVEX_V128, PS,
+ EVEX_CD8<32, CD8VF>;
+ defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
+ IsCommutable>, EVEX_V256, PS,
+ EVEX_CD8<32, CD8VF>;
+ defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
+ IsCommutable>, EVEX_V128, PD, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+ defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
+ IsCommutable>, EVEX_V256, PD, VEX_W,
+ EVEX_CD8<64, CD8VF>;
}
}
-defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VK16WM, VR512, v16f32, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-
-defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VK8WM, VR512, v8f64, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
- SSE_ALU_ITINS_P.d, 1>,
- EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VK16WM, VR512, v16f32, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VK8WM, VR512, v8f64, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
- SSE_ALU_ITINS_P.d, 1>,
- EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VK16WM, VR512, v16f32, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 1>,
- EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VK16WM, VR512, v16f32, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 1>,
- EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-
-defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VK8WM, VR512, v8f64, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
- SSE_ALU_ITINS_P.d, 1>,
- EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VK8WM, VR512, v8f64, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
- SSE_ALU_ITINS_P.d, 1>,
- EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VK16WM, VR512, v16f32, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VK16WM, VR512, v16f32, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-
-defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VK8WM, VR512, v8f64, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
- SSE_ALU_ITINS_P.d, 0>,
- EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VK8WM, VR512, v8f64, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
- SSE_ALU_ITINS_P.d, 0>,
- EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>;
+defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>;
+defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, 1>;
+defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, 1>;
+defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>;
+defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>;
def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
(v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
@@ -2476,18 +3275,18 @@ def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1),
// AVX-512 VPTESTM instructions
//===----------------------------------------------------------------------===//
-multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
- RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
+multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
+ RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
SDNode OpNode, ValueType vt> {
def rr : AVX512PI<opc, MRMSrcReg,
- (outs KRC:$dst), (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ (outs KRC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
SSEPackedInt>, EVEX_4V;
def rm : AVX512PI<opc, MRMSrcMem,
- (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set KRC:$dst, (OpNode (vt RC:$src1),
+ (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set KRC:$dst, (OpNode (vt RC:$src1),
(bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
}
@@ -2514,154 +3313,122 @@ def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
(v8i64 VR512:$src2), (i8 -1))),
(COPY_TO_REGCLASS (VPTESTMQZrr VR512:$src1, VR512:$src2), GR8)>;
+
//===----------------------------------------------------------------------===//
// AVX-512 Shift instructions
//===----------------------------------------------------------------------===//
multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
- string OpcodeStr, SDNode OpNode, RegisterClass RC,
- ValueType vt, X86MemOperand x86memop, PatFrag mem_frag,
- RegisterClass KRC> {
- def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
- (ins RC:$src1, i8imm:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (vt (OpNode RC:$src1, (i8 imm:$src2))))],
- SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
- def rik : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, i8imm:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
- def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
- (ins x86memop:$src1, i8imm:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpNode (mem_frag addr:$src1),
- (i8 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
- def mik: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
- (ins KRC:$mask, x86memop:$src1, i8imm:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
+ string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
+ defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, i8imm:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
+ " ", SSE_INTSHIFT_ITINS_P.rr>, AVX512BIi8Base, EVEX_4V;
+ defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src1, i8imm:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode (_.MemOpFrag addr:$src1), (i8 imm:$src2))),
+ " ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V;
}
multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- RegisterClass RC, ValueType vt, ValueType SrcVT,
- PatFrag bc_frag, RegisterClass KRC> {
- // src2 is always 128-bit
- def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, VR128X:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
- SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
- def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, VR128X:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
- def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (vt (OpNode RC:$src1,
- (bc_frag (memopv2i64 addr:$src2)))))],
- SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
- def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, i128mem:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
+ ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
+ // src2 is always 128-bit
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
+ " ", SSE_INTSHIFT_ITINS_P.rr>, AVX512BIBase, EVEX_4V;
+ defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, (bc_frag (memopv2i64 addr:$src2)))),
+ " ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase, EVEX_4V;
+}
+
+multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
+ defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag, _>, EVEX_V512;
+}
+
+multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, string OpcodeStr,
+ SDNode OpNode> {
+ defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, v4i32, bc_v4i32,
+ v16i32_info>, EVEX_CD8<32, CD8VQ>;
+ defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, v2i64, bc_v2i64,
+ v8i64_info>, EVEX_CD8<64, CD8VQ>, VEX_W;
}
defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
- VR512, v16i32, i512mem, memopv16i32, VK16WM>,
+ v16i32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
- VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
- EVEX_CD8<32, CD8VQ>;
-
defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
- VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
+ v8i64_info>, EVEX_V512,
EVEX_CD8<64, CD8VF>, VEX_W;
-defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
- VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
- EVEX_CD8<64, CD8VQ>, VEX_W;
defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
- VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512,
+ v16i32_info>, EVEX_V512,
EVEX_CD8<32, CD8VF>;
-defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
- VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
- EVEX_CD8<32, CD8VQ>;
-
defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
- VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
+ v8i64_info>, EVEX_V512,
EVEX_CD8<64, CD8VF>, VEX_W;
-defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
- VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
- EVEX_CD8<64, CD8VQ>, VEX_W;
defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
- VR512, v16i32, i512mem, memopv16i32, VK16WM>,
+ v16i32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
- VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
- EVEX_CD8<32, CD8VQ>;
-
defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
- VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
+ v8i64_info>, EVEX_V512,
EVEX_CD8<64, CD8VF>, VEX_W;
-defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
- VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
- EVEX_CD8<64, CD8VQ>, VEX_W;
+
+defm VPSLL : avx512_shift_types<0xF2, 0xF3, "vpsll", X86vshl>;
+defm VPSRA : avx512_shift_types<0xE2, 0xE2, "vpsra", X86vsra>;
+defm VPSRL : avx512_shift_types<0xD2, 0xD3, "vpsrl", X86vsrl>;
//===-------------------------------------------------------------------===//
// Variable Bit Shifts
//===-------------------------------------------------------------------===//
multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
- RegisterClass RC, ValueType vt,
- X86MemOperand x86memop, PatFrag mem_frag> {
- def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst,
- (vt (OpNode RC:$src1, (vt RC:$src2))))]>,
- EVEX_4V;
- def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst,
- (vt (OpNode RC:$src1, (mem_frag addr:$src2))))]>,
- EVEX_4V;
+ X86VectorVTInfo _> {
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
+ " ", SSE_INTSHIFT_ITINS_P.rr>, AVX5128IBase, EVEX_4V;
+ defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, (_.MemOpFrag addr:$src2))),
+ " ", SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V;
}
-defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
- i512mem, memopv16i32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
- i512mem, memopv8i64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
-defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
- i512mem, memopv16i32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
- i512mem, memopv8i64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
-defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
- i512mem, memopv16i32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
- i512mem, memopv8i64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
+multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo _> {
+ defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
+}
+
+multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode,
+ avx512vl_i32_info>, EVEX_CD8<32, CD8VQ>;
+ defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode,
+ avx512vl_i64_info>, EVEX_CD8<64, CD8VQ>, VEX_W;
+}
+
+defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>;
+defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>;
+defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>;
//===----------------------------------------------------------------------===//
// AVX-512 - MOVDDUP
//===----------------------------------------------------------------------===//
-multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
+multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
X86MemOperand x86memop, PatFrag memop_frag> {
def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst,
(VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
}
@@ -2678,11 +3445,11 @@ multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
ValueType vt, RegisterClass RC, PatFrag mem_frag,
X86MemOperand x86memop> {
def rr : AVX512XSI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
let mayLoad = 1 in
def rm : AVX512XSI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
}
@@ -2729,175 +3496,139 @@ let Predicates = [HasAVX512] in {
//===----------------------------------------------------------------------===//
// FMA - Fused Multiply Operations
//
+
let Constraints = "$src1 = $dst" in {
-multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
- RegisterClass RC, X86MemOperand x86memop,
- PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
- string BrdcstStr, SDNode OpNode, ValueType OpVT> {
- def r: AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, RC:$src3),
- !strconcat(OpcodeStr," \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set RC:$dst, (OpVT(OpNode RC:$src1, RC:$src2, RC:$src3)))]>;
+// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching.
+multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ SDPatternOperator OpNode = null_frag> {
+ defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
+ AVX512FMA3Base;
let mayLoad = 1 in
- def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, x86memop:$src3),
- !strconcat(OpcodeStr, " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
- (mem_frag addr:$src3))))]>;
- def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, x86scalar_mop:$src3),
- !strconcat(OpcodeStr, " \t{${src3}", BrdcstStr,
- ", $src2, $dst|$dst, $src2, ${src3}", BrdcstStr, "}"),
- [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
- (OpVT (X86VBroadcast (scalar_mfrag addr:$src3)))))]>, EVEX_B;
-}
+ defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.MemOp:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>,
+ AVX512FMA3Base;
+
+ defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.ScalarMemOp:$src3),
+ OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ),
+ (OpNode _.RC:$src1, _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
+ AVX512FMA3Base, EVEX_B;
+ }
} // Constraints = "$src1 = $dst"
-let ExeDomain = SSEPackedSingle in {
- defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmadd, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmsub, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmaddsub, v16f32>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmsubadd, v16f32>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fnmadd, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fnmsub, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
+multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
+ string OpcodeStr, X86VectorVTInfo VTI,
+ SDPatternOperator OpNode> {
+ defm v213r : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
+ VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
+
+ defm v231r : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231", VTI.Suffix),
+ VTI>, EVEX_CD8<VTI.EltSize, CD8VF>;
}
+
+multiclass avx512_fma3p<bits<8> opc213, bits<8> opc231,
+ string OpcodeStr,
+ SDPatternOperator OpNode> {
+let ExeDomain = SSEPackedSingle in {
+ defm NAME##PSZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
+ v16f32_info, OpNode>, EVEX_V512;
+ defm NAME##PSZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
+ v8f32x_info, OpNode>, EVEX_V256;
+ defm NAME##PSZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
+ v4f32x_info, OpNode>, EVEX_V128;
+ }
let ExeDomain = SSEPackedDouble in {
- defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmadd, v8f64>, EVEX_V512,
- VEX_W, EVEX_CD8<64, CD8VF>;
- defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmsub, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
+ defm NAME##PDZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
+ v8f64_info, OpNode>, EVEX_V512, VEX_W;
+ defm NAME##PDZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
+ v4f64x_info, OpNode>, EVEX_V256, VEX_W;
+ defm NAME##PDZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
+ v2f64x_info, OpNode>, EVEX_V128, VEX_W;
+ }
}
+defm VFMADD : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd>;
+defm VFMSUB : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub>;
+defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub>;
+defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd>;
+defm VFNMADD : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd>;
+defm VFNMSUB : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub>;
+
let Constraints = "$src1 = $dst" in {
-multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr,
- RegisterClass RC, X86MemOperand x86memop,
- PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
- string BrdcstStr, SDNode OpNode, ValueType OpVT> {
+multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
let mayLoad = 1 in
- def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, RC:$src3, x86memop:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src3, $dst|$dst, $src3, $src2}"),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3)))]>;
- def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, RC:$src3, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
- ", $src3, $dst|$dst, $src3, ${src2}", BrdcstStr, "}"),
- [(set RC:$dst, (OpNode RC:$src1,
- (OpVT (X86VBroadcast (scalar_mfrag addr:$src2))), RC:$src3))]>, EVEX_B;
+ def m: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src3, _.MemOp:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src3, $dst|$dst, $src3, $src2}"),
+ [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, (_.MemOpFrag addr:$src2),
+ _.RC:$src3)))]>;
+ def mb: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src3, _.ScalarMemOp:$src2),
+ !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr,
+ ", $src3, $dst|$dst, $src3, ${src2}", _.BroadcastStr, "}"),
+ [(set _.RC:$dst,
+ (OpNode _.RC:$src1, (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2))),
+ _.RC:$src3))]>, EVEX_B;
}
} // Constraints = "$src1 = $dst"
+multiclass avx512_fma3p_m132_f<bits<8> opc,
+ string OpcodeStr,
+ SDNode OpNode> {
+
let ExeDomain = SSEPackedSingle in {
- defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmadd, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmsub, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmaddsub, v16f32>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmsubadd, v16f32>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fnmadd, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fnmsub, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-}
+ defm NAME##PSZ : avx512_fma3p_m132<opc, OpcodeStr##ps,
+ OpNode,v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm NAME##PSZ256 : avx512_fma3p_m132<opc, OpcodeStr##ps,
+ OpNode, v8f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VF>;
+ defm NAME##PSZ128 : avx512_fma3p_m132<opc, OpcodeStr##ps,
+ OpNode, v4f32x_info>, EVEX_V128, EVEX_CD8<32, CD8VF>;
+ }
let ExeDomain = SSEPackedDouble in {
- defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmadd, v8f64>, EVEX_V512,
- VEX_W, EVEX_CD8<64, CD8VF>;
- defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmsub, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
+ defm NAME##PDZ : avx512_fma3p_m132<opc, OpcodeStr##pd,
+ OpNode, v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VF>;
+ defm NAME##PDZ256 : avx512_fma3p_m132<opc, OpcodeStr##pd,
+ OpNode, v4f64x_info>, EVEX_V256, VEX_W, EVEX_CD8<32, CD8VF>;
+ defm NAME##PDZ128 : avx512_fma3p_m132<opc, OpcodeStr##pd,
+ OpNode, v2f64x_info>, EVEX_V128, VEX_W, EVEX_CD8<32, CD8VF>;
+ }
}
+defm VFMADD132 : avx512_fma3p_m132_f<0x98, "vfmadd132", X86Fmadd>;
+defm VFMSUB132 : avx512_fma3p_m132_f<0x9A, "vfmsub132", X86Fmsub>;
+defm VFMADDSUB132 : avx512_fma3p_m132_f<0x96, "vfmaddsub132", X86Fmaddsub>;
+defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>;
+defm VFNMADD132 : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>;
+defm VFNMSUB132 : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>;
+
+
// Scalar FMA
let Constraints = "$src1 = $dst" in {
-multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- RegisterClass RC, ValueType OpVT,
- X86MemOperand x86memop, Operand memop,
+multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, ValueType OpVT,
+ X86MemOperand x86memop, Operand memop,
PatFrag mem_frag> {
let isCommutable = 1 in
def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
- " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
let mayLoad = 1 in
def m : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, f128mem:$src3),
!strconcat(OpcodeStr,
- " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpVT (OpNode RC:$src2, RC:$src1,
(mem_frag addr:$src3))))]>;
@@ -2930,12 +3661,12 @@ multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm> {
let hasSideEffects = 0 in {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
- !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
EVEX_4V;
let mayLoad = 1 in
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src),
- !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
EVEX_4V;
} // hasSideEffects = 0
}
@@ -3003,12 +3734,12 @@ multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstR
string asm> {
let hasSideEffects = 0 in {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
- !strconcat(asm," \t{$src, $dst|$dst, $src}"),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG,
Requires<[HasAVX512]>;
let mayLoad = 1 in
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
- !strconcat(asm," \t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG,
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG,
Requires<[HasAVX512]>;
} // hasSideEffects = 0
}
@@ -3106,10 +3837,10 @@ multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
string asm> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
- !strconcat(asm," \t{$src, $dst|$dst, $src}"),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
- !strconcat(asm," \t{$src, $dst|$dst, $src}"),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
}
@@ -3182,21 +3913,21 @@ def : Pat<(extloadf32 addr:$src),
def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
Requires<[HasAVX512]>;
-multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
- RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
+multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
+ RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
Domain d> {
let hasSideEffects = 0 in {
def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
- !strconcat(asm," \t{$src, $dst|$dst, $src}"),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst,
(OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
- !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
+ !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
[], d>, EVEX, EVEX_B, EVEX_RC;
let mayLoad = 1 in
def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
- !strconcat(asm," \t{$src, $dst|$dst, $src}"),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst,
(OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
} // hasSideEffects = 0
@@ -3208,12 +3939,12 @@ multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
Domain d> {
let hasSideEffects = 0 in {
def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
- !strconcat(asm," \t{$src, $dst|$dst, $src}"),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst,
(OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
let mayLoad = 1 in
def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
- !strconcat(asm," \t{$src, $dst|$dst, $src}"),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst,
(OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
} // hasSideEffects = 0
@@ -3230,7 +3961,7 @@ defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
EVEX_CD8<32, CD8VH>;
def : Pat<(v8f64 (extloadv8f32 addr:$src)),
(VCVTPS2PDZrm addr:$src)>;
-
+
def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
(bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))),
(VCVTPD2PSZrr VR512:$src)>;
@@ -3259,7 +3990,7 @@ defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
EVEX_CD8<32, CD8VF>;
defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
- memopv8f64, f512mem, v8i32, v8f64,
+ memopv8f64, f512mem, v8i32, v8f64,
SSEPackedDouble>, EVEX_V512, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
@@ -3277,7 +4008,7 @@ defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uin
memopv8f64, f512mem, v8i32, v8f64,
SSEPackedDouble>, EVEX_V512, PS, VEX_W,
EVEX_CD8<64, CD8VF>;
-
+
// cvttpd2udq (src, 0, mask-all-ones, sae-current)
def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
(v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)),
@@ -3287,16 +4018,16 @@ defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
memopv4i64, f256mem, v8f64, v8i32,
SSEPackedDouble>, EVEX_V512, XS,
EVEX_CD8<32, CD8VH>;
-
+
defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
memopv16i32, f512mem, v16f32, v16i32,
SSEPackedSingle>, EVEX_V512, XD,
EVEX_CD8<32, CD8VF>;
def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
- (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
+ (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
(v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
-
+
def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
(EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
(v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
@@ -3304,7 +4035,7 @@ def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
(EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
-
+
def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
(EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
(v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
@@ -3331,14 +4062,14 @@ multiclass avx512_vcvt_fp2int<bits<8> opc, string asm, RegisterClass SrcRC,
X86MemOperand x86memop, Domain d> {
let hasSideEffects = 0 in {
def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
- !strconcat(asm," \t{$src, $dst|$dst, $src}"),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[], d>, EVEX;
def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
- !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
+ !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
[], d>, EVEX, EVEX_B, EVEX_RC;
let mayLoad = 1 in
def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
- !strconcat(asm," \t{$src, $dst|$dst, $src}"),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[], d>, EVEX;
} // hasSideEffects = 0
}
@@ -3397,12 +4128,12 @@ multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
X86MemOperand x86memop> {
def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
(ins srcRC:$src1, i32i8imm:$src2),
- "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, EVEX;
let hasSideEffects = 0, mayStore = 1 in
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
- "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
}
defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
@@ -3449,7 +4180,7 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
}
}
-
+
/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop> {
@@ -3457,12 +4188,12 @@ multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
let mayLoad = 1 in {
def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
}
}
}
@@ -3498,26 +4229,49 @@ def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
- RegisterClass RC, X86MemOperand x86memop,
- PatFrag mem_frag, ValueType OpVt> {
- def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr,
- " \t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (OpVt (OpNode RC:$src)))]>,
- EVEX;
- def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (OpVt (OpNode (mem_frag addr:$src))))]>,
- EVEX;
-}
-defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem,
- memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem,
- memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem,
- memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem,
- memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+ X86VectorVTInfo _> {
+ defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src), OpcodeStr, "$src", "$src",
+ (_.FloatVT (OpNode _.RC:$src))>, EVEX, T8PD;
+ let mayLoad = 1 in {
+ defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
+ (OpNode (_.FloatVT
+ (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD;
+ defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src), OpcodeStr,
+ "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
+ (OpNode (_.FloatVT
+ (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
+ EVEX, T8PD, EVEX_B;
+ }
+}
+
+multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, v16f32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, v8f64_info>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+ // Define only if AVX512VL feature is present.
+ let Predicates = [HasVLX] in {
+ defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
+ OpNode, v4f32x_info>,
+ EVEX_V128, EVEX_CD8<32, CD8VF>;
+ defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
+ OpNode, v8f32x_info>,
+ EVEX_V256, EVEX_CD8<32, CD8VF>;
+ defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
+ OpNode, v2f64x_info>,
+ EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
+ OpNode, v4f64x_info>,
+ EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
+ }
+}
+
+defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86frsqrt>;
+defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>;
def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
(bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
@@ -3534,148 +4288,100 @@ def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
(VRCP14PDZr VR512:$src)>;
/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
-multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
- X86MemOperand x86memop> {
- let hasSideEffects = 0, Predicates = [HasERI] in {
- def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
- def rrb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr,
- " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
- []>, EVEX_4V, EVEX_B;
- let mayLoad = 1 in {
- def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
- }
-}
+multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
+ SDNode OpNode> {
+
+ defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (i32 FROUND_CURRENT))>;
+
+ defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (i32 FROUND_NO_EXC)), "{sae}">, EVEX_B;
+
+ defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
+ (i32 FROUND_CURRENT))>;
}
-defm VRCP28SS : avx512_fp28_s<0xCB, "vrcp28ss", FR32X, f32mem>,
- EVEX_CD8<32, CD8VT1>;
-defm VRCP28SD : avx512_fp28_s<0xCB, "vrcp28sd", FR64X, f64mem>,
- VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VRSQRT28SS : avx512_fp28_s<0xCD, "vrsqrt28ss", FR32X, f32mem>,
- EVEX_CD8<32, CD8VT1>;
-defm VRSQRT28SD : avx512_fp28_s<0xCD, "vrsqrt28sd", FR64X, f64mem>,
- VEX_W, EVEX_CD8<64, CD8VT1>;
-
-def : Pat <(v4f32 (int_x86_avx512_rcp28_ss (v4f32 VR128X:$src1),
- (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
- FROUND_NO_EXC)),
- (COPY_TO_REGCLASS (VRCP28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
- (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
-
-def : Pat <(v2f64 (int_x86_avx512_rcp28_sd (v2f64 VR128X:$src1),
- (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
- FROUND_NO_EXC)),
- (COPY_TO_REGCLASS (VRCP28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
- (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
-
-def : Pat <(v4f32 (int_x86_avx512_rsqrt28_ss (v4f32 VR128X:$src1),
- (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
- FROUND_NO_EXC)),
- (COPY_TO_REGCLASS (VRSQRT28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
- (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
-
-def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1),
- (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
- FROUND_NO_EXC)),
- (COPY_TO_REGCLASS (VRSQRT28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
- (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
+multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode>,
+ EVEX_CD8<32, CD8VT1>;
+ defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode>,
+ EVEX_CD8<64, CD8VT1>, VEX_W;
+}
+let hasSideEffects = 0, Predicates = [HasERI] in {
+ defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s>, T8PD, EVEX_4V;
+ defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V;
+}
/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
-multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr,
- RegisterClass RC, X86MemOperand x86memop> {
- let hasSideEffects = 0, Predicates = [HasERI] in {
- def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr,
- " \t{$src, $dst|$dst, $src}"),
- []>, EVEX;
- def rb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr,
- " \t{{sae}, $src, $dst|$dst, $src, {sae}}"),
- []>, EVEX, EVEX_B;
- def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
- []>, EVEX;
- }
+
+multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ SDNode OpNode> {
+
+ defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src), OpcodeStr, "$src", "$src",
+ (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>;
+
+ defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src), OpcodeStr,
+ "$src", "$src",
+ (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC)),
+ "{sae}">, EVEX_B;
+
+ defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
+ (OpNode (_.FloatVT
+ (bitconvert (_.LdFrag addr:$src))),
+ (i32 FROUND_CURRENT))>;
+
+ defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
+ (OpNode (_.FloatVT
+ (X86VBroadcast (_.ScalarLdFrag addr:$src))),
+ (i32 FROUND_CURRENT))>, EVEX_B;
}
-defm VRSQRT28PSZ : avx512_fp28_p<0xCC, "vrsqrt28ps", VR512, f512mem>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VRSQRT28PDZ : avx512_fp28_p<0xCC, "vrsqrt28pd", VR512, f512mem>,
- VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-defm VRCP28PSZ : avx512_fp28_p<0xCA, "vrcp28ps", VR512, f512mem>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>,
- VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-
-def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src),
- (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
- (VRSQRT28PSZrb VR512:$src)>;
-def : Pat <(v8f64 (int_x86_avx512_rsqrt28_pd (v8f64 VR512:$src),
- (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
- (VRSQRT28PDZrb VR512:$src)>;
-
-def : Pat <(v16f32 (int_x86_avx512_rcp28_ps (v16f32 VR512:$src),
- (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
- (VRCP28PSZrb VR512:$src)>;
-def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src),
- (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
- (VRCP28PDZrb VR512:$src)>;
-
-multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
- Intrinsic V16F32Int, Intrinsic V8F64Int,
- OpndItins itins_s, OpndItins itins_d> {
- def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))], itins_s.rr>,
- EVEX, EVEX_V512;
- let mayLoad = 1 in
- def PSZrm : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst,
- (OpNode (v16f32 (bitconvert (memopv16f32 addr:$src)))))],
- itins_s.rm>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
+ EVEX_CD8<32, CD8VF>;
+ defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
+ VEX_W, EVEX_CD8<32, CD8VF>;
+}
- def PDZrr : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))], itins_d.rr>,
- EVEX, EVEX_V512;
+let Predicates = [HasERI], hasSideEffects = 0 in {
- let mayLoad = 1 in
- def PDZrm : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst, (OpNode
- (v8f64 (bitconvert (memopv16f32 addr:$src)))))],
- itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
+ defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX, EVEX_V512, T8PD;
+ defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX, EVEX_V512, T8PD;
+ defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX, EVEX_V512, T8PD;
+}
-let isCodeGenOnly = 1 in {
- def PSZr_Int : AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
- !strconcat(OpcodeStr,
- "ps\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst, (V16F32Int VR512:$src))]>,
- EVEX, EVEX_V512;
- def PSZm_Int : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst,
- (V16F32Int (memopv16f32 addr:$src)))]>, EVEX,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
- def PDZr_Int : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst, (V8F64Int VR512:$src))]>,
- EVEX, EVEX_V512, VEX_W;
- def PDZm_Int : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
- !strconcat(OpcodeStr,
- "pd\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst, (V8F64Int (memopv8f64 addr:$src)))]>,
- EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-} // isCodeGenOnly = 1
+multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, X86VectorVTInfo _>{
+ defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src), OpcodeStr, "$src", "$src",
+ (_.FloatVT (OpNode _.RC:$src))>, EVEX;
+ let mayLoad = 1 in {
+ defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
+ (OpNode (_.FloatVT
+ (bitconvert (_.LdFrag addr:$src))))>, EVEX;
+
+ defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src), OpcodeStr,
+ "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
+ (OpNode (_.FloatVT
+ (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
+ EVEX, EVEX_B;
+ }
}
multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
@@ -3691,7 +4397,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
(ins VR128X:$src1, VR128X:$src2),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128X:$dst,
+ [(set VR128X:$dst,
(F32Int VR128X:$src1, VR128X:$src2))],
itins_s.rr>, XS, EVEX_4V;
let mayLoad = 1 in {
@@ -3705,7 +4411,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
(ins VR128X:$src1, ssmem:$src2),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128X:$dst,
+ [(set VR128X:$dst,
(F32Int VR128X:$src1, sse_load_f32:$src2))],
itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
}
@@ -3719,7 +4425,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
(ins VR128X:$src1, VR128X:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128X:$dst,
+ [(set VR128X:$dst,
(F64Int VR128X:$src1, VR128X:$src2))],
itins_s.rr>, XD, EVEX_4V, VEX_W;
let mayLoad = 1 in {
@@ -3733,21 +4439,51 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
(ins VR128X:$src1, sdmem:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128X:$dst,
- (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
+ [(set VR128X:$dst,
+ (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
}
}
+multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+ v16f32_info>,
+ EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
+ defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+ v8f64_info>,
+ EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
+ // Define only if AVX512VL feature is present.
+ let Predicates = [HasVLX] in {
+ defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
+ OpNode, v4f32x_info>,
+ EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
+ defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
+ OpNode, v8f32x_info>,
+ EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
+ defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
+ OpNode, v2f64x_info>,
+ EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
+ defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
+ OpNode, v4f64x_info>,
+ EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
+ }
+}
+
+defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>;
-defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
- int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
- SSE_SQRTSS, SSE_SQRTSD>,
- avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
- int_x86_avx512_sqrt_ps_512, int_x86_avx512_sqrt_pd_512,
- SSE_SQRTPS, SSE_SQRTPD>;
+defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
+ int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
+ SSE_SQRTSS, SSE_SQRTSD>;
let Predicates = [HasAVX512] in {
+ def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1),
+ (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)),
+ (VSQRTPSZr VR512:$src1)>;
+ def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
+ (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
+ (VSQRTPDZr VR512:$src1)>;
+
def : Pat<(f32 (fsqrt FR32X:$src)),
(VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
def : Pat<(f32 (fsqrt (load addr:$src))),
@@ -3856,7 +4592,7 @@ let ExeDomain = GenericDomain in {
(ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128X:$dst, (F32Int VR128X:$src1,
+ [(set VR128X:$dst, (F32Int VR128X:$src1,
sse_load_f32:$src2, imm:$src3))]>,
EVEX_CD8<32, CD8VT1>;
@@ -3897,14 +4633,14 @@ let ExeDomain = d in {
def r : AVX512AIi8<opc, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, EVEX;
// Vector intrinsic operation, mem
def m : AVX512AIi8<opc, MRMSrcMem,
(outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, EVEX;
} // ExeDomain
}
@@ -3935,20 +4671,20 @@ let ExeDomain = d in {
def r : AVX512AIi8<opc, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, EVEX_4V;
def m : AVX512AIi8<opc, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, EVEX_4V;
} // ExeDomain
}
defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X,
SSEPackedSingle>, EVEX_CD8<32, CD8VT1>;
-
+
defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X,
SSEPackedDouble>, EVEX_CD8<64, CD8VT1>;
@@ -4004,32 +4740,32 @@ multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
RegisterClass KRC, X86MemOperand x86memop> {
def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
(ins srcRC:$src),
- !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
+ !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
[]>, EVEX;
def rrk : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
(ins KRC:$mask, srcRC:$src),
!strconcat(OpcodeStr,
- " \t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
+ "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
[]>, EVEX, EVEX_K;
def rrkz : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
(ins KRC:$mask, srcRC:$src),
!strconcat(OpcodeStr,
- " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
+ "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
[]>, EVEX, EVEX_KZ;
def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[]>, EVEX;
def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
(ins x86memop:$dst, KRC:$mask, srcRC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|${dst} {${mask}}, $src}"),
+ !strconcat(OpcodeStr, "\t{$src, $dst {${mask}}|${dst} {${mask}}, $src}"),
[]>, EVEX, EVEX_K;
}
-defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
+defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
@@ -4083,36 +4819,36 @@ multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass KRC,
def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
(ins SrcRC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
(ins KRC:$mask, SrcRC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
+ !strconcat(OpcodeStr, "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
[]>, EVEX, EVEX_K;
def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
(ins KRC:$mask, SrcRC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
+ !strconcat(OpcodeStr, "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
[]>, EVEX, EVEX_KZ;
let mayLoad = 1 in {
def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
(ins x86memop:$src),
- !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
+ !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst,
(OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
EVEX;
def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
(ins KRC:$mask, x86memop:$src),
- !strconcat(OpcodeStr," \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
+ !strconcat(OpcodeStr,"\t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
[]>,
EVEX, EVEX_K;
def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
(ins KRC:$mask, x86memop:$src),
- !strconcat(OpcodeStr," \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
+ !strconcat(OpcodeStr,"\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
[]>,
EVEX, EVEX_KZ;
}
@@ -4160,7 +4896,7 @@ let mayLoad = 1,
def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
(ins RC:$src1, KRC:$mask, memop:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
[]>, EVEX, EVEX_K;
}
@@ -4177,7 +4913,7 @@ defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
EVEX_V512, EVEX_CD8<32, CD8VT1>;
}
-
+
defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
@@ -4194,7 +4930,7 @@ let mayStore = 1, Constraints = "$mask = $mask_wb" in
def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
(ins memop:$dst, KRC:$mask, RC:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
[]>, EVEX, EVEX_K;
}
@@ -4227,7 +4963,7 @@ multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeSt
RegisterClass KRC, X86MemOperand memop> {
let Predicates = [HasPFI], hasSideEffects = 1 in
def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
- !strconcat(OpcodeStr, " \t{$src {${mask}}|{${mask}}, $src}"),
+ !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"),
[]>, EVEX, EVEX_K;
}
@@ -4242,7 +4978,7 @@ defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
-
+
defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
@@ -4287,14 +5023,14 @@ multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, i8imm:$src3),
!strconcat(OpcodeStr,
- " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
(i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, i8imm:$src3),
!strconcat(OpcodeStr,
- " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
(i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
EVEX_4V, Sched<[WriteShuffle]>;
@@ -4317,33 +5053,29 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1,
(memopv8i64 addr:$src2), (i8 imm:$imm))),
(VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
-multiclass avx512_alignr<string OpcodeStr, RegisterClass RC,
- X86MemOperand x86memop> {
- def rri : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, i8imm:$src3),
- !strconcat(OpcodeStr,
- " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, EVEX_4V;
+multiclass avx512_valign<X86VectorVTInfo _> {
+ defm rri : AVX512_maskable<0x03, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, i8imm:$src3),
+ "valign"##_.Suffix,
+ "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (_.VT (X86VAlign _.RC:$src2, _.RC:$src1,
+ (i8 imm:$src3)))>,
+ AVX512AIi8Base, EVEX_4V;
+
+ // Also match valign of packed floats.
+ def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
+ (!cast<Instruction>(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>;
+
let mayLoad = 1 in
- def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, i8imm:$src3),
- !strconcat(OpcodeStr,
- " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2, i8imm:$src3),
+ !strconcat("valign"##_.Suffix,
+ "\t{$src3, $src2, $src1, $dst|"
+ "$dst, $src1, $src2, $src3}"),
[]>, EVEX_4V;
}
-defm VALIGND : avx512_alignr<"valignd", VR512, i512mem>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VALIGNQ : avx512_alignr<"valignq", VR512, i512mem>,
- VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-
-def : Pat<(v16f32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
- (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>;
-def : Pat<(v8f64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
- (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>;
-def : Pat<(v16i32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
- (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>;
-def : Pat<(v8i64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
- (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>;
+defm VALIGND : avx512_valign<v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VALIGNQ : avx512_valign<v8i64_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
// Helper fragments to match sext vXi1 to vXiY.
def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
@@ -4354,43 +5086,43 @@ multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, ValueType OpVT,
X86MemOperand x86memop, X86MemOperand x86scalar_mop,
string BrdcstStr> {
def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[]>, EVEX;
def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
+ !strconcat(OpcodeStr, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
[]>, EVEX, EVEX_K;
def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
!strconcat(OpcodeStr,
- " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
+ "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
[]>, EVEX, EVEX_KZ;
let mayLoad = 1 in {
def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
(ins x86memop:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[]>, EVEX;
def rmk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
(ins KRC:$mask, x86memop:$src),
!strconcat(OpcodeStr,
- " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
+ "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
[]>, EVEX, EVEX_K;
def rmkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
(ins KRC:$mask, x86memop:$src),
!strconcat(OpcodeStr,
- " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
+ "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
[]>, EVEX, EVEX_KZ;
def rmb : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
(ins x86scalar_mop:$src),
- !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
+ !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
", $dst|$dst, ${src}", BrdcstStr, "}"),
[]>, EVEX, EVEX_B;
def rmbk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
(ins KRC:$mask, x86scalar_mop:$src),
- !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
+ !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
", $dst {${mask}}|$dst {${mask}}, ${src}", BrdcstStr, "}"),
[]>, EVEX, EVEX_B, EVEX_K;
def rmbkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
(ins KRC:$mask, x86scalar_mop:$src),
- !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
+ !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
", $dst {${mask}} {z}|$dst {${mask}} {z}, ${src}",
BrdcstStr, "}"),
[]>, EVEX, EVEX_B, EVEX_KZ;
@@ -4420,54 +5152,54 @@ def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src),
(bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
(VPABSQZrr VR512:$src)>;
-multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
+multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
RegisterClass RC, RegisterClass KRC,
X86MemOperand x86memop,
X86MemOperand x86scalar_mop, string BrdcstStr> {
def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src),
- !strconcat(OpcodeStr, " \t{$src, ${dst} |${dst}, $src}"),
+ !strconcat(OpcodeStr, "\t{$src, ${dst} |${dst}, $src}"),
[]>, EVEX;
def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins x86memop:$src),
- !strconcat(OpcodeStr, " \t{$src, ${dst}|${dst}, $src}"),
+ !strconcat(OpcodeStr, "\t{$src, ${dst}|${dst}, $src}"),
[]>, EVEX;
def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins x86scalar_mop:$src),
- !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
+ !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
[]>, EVEX, EVEX_B;
def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
(ins KRC:$mask, RC:$src),
!strconcat(OpcodeStr,
- " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
+ "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
[]>, EVEX, EVEX_KZ;
def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins KRC:$mask, x86memop:$src),
!strconcat(OpcodeStr,
- " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
+ "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
[]>, EVEX, EVEX_KZ;
def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins KRC:$mask, x86scalar_mop:$src),
- !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
+ !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
BrdcstStr, "}"),
[]>, EVEX, EVEX_KZ, EVEX_B;
-
+
let Constraints = "$src1 = $dst" in {
def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, KRC:$mask, RC:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
[]>, EVEX, EVEX_K;
def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, KRC:$mask, x86memop:$src2),
!strconcat(OpcodeStr,
- " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
[]>, EVEX, EVEX_K;
def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
+ !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
[]>, EVEX, EVEX_K, EVEX_B;
}
@@ -4541,3 +5273,135 @@ def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
def : Pat<(truncstorei1 GR8:$src, addr:$dst),
(MOV8mr addr:$dst, GR8:$src)>;
+multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
+def rr : AVX512XS8I<opc, MRMDestReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
+ !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
+ [(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))]>, EVEX;
+}
+
+multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
+ string OpcodeStr, Predicate prd> {
+let Predicates = [prd] in
+ defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
+ defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
+ }
+}
+
+multiclass avx512_convert_mask_to_vector<string OpcodeStr> {
+ defm NAME##B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, OpcodeStr,
+ HasBWI>;
+ defm NAME##W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, OpcodeStr,
+ HasBWI>, VEX_W;
+ defm NAME##D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, OpcodeStr,
+ HasDQI>;
+ defm NAME##Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, OpcodeStr,
+ HasDQI>, VEX_W;
+}
+
+defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - COMPRESS and EXPAND
+//
+multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
+ string OpcodeStr> {
+ def rrkz : AVX5128I<opc, MRMDestReg, (outs _.RC:$dst),
+ (ins _.KRCWM:$mask, _.RC:$src),
+ OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ [(set _.RC:$dst, (_.VT (X86compress _.KRCWM:$mask, _.RC:$src,
+ _.ImmAllZerosV)))]>, EVEX_KZ;
+
+ let Constraints = "$src0 = $dst" in
+ def rrk : AVX5128I<opc, MRMDestReg, (outs _.RC:$dst),
+ (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src),
+ OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
+ [(set _.RC:$dst, (_.VT (X86compress _.KRCWM:$mask, _.RC:$src,
+ _.RC:$src0)))]>, EVEX_K;
+
+ let mayStore = 1 in {
+ def mrk : AVX5128I<opc, MRMDestMem, (outs),
+ (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
+ OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
+ [(store (_.VT (X86compress _.KRCWM:$mask, _.RC:$src, undef)),
+ addr:$dst)]>,
+ EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
+ }
+}
+
+multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo VTInfo> {
+ defm Z : compress_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
+
+ let Predicates = [HasVLX] in {
+ defm Z256 : compress_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
+ defm Z128 : compress_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
+ }
+}
+
+defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", avx512vl_i32_info>,
+ EVEX;
+defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", avx512vl_i64_info>,
+ EVEX, VEX_W;
+defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", avx512vl_f32_info>,
+ EVEX;
+defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info>,
+ EVEX, VEX_W;
+
+// expand
+multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
+ string OpcodeStr> {
+ def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.KRCWM:$mask, _.RC:$src),
+ OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask, (_.VT _.RC:$src),
+ _.ImmAllZerosV)))]>, EVEX_KZ;
+
+ let Constraints = "$src0 = $dst" in
+ def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src),
+ OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
+ [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
+ (_.VT _.RC:$src), _.RC:$src0)))]>, EVEX_K;
+
+ let mayLoad = 1, Constraints = "$src0 = $dst" in
+ def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src),
+ OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
+ [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
+ (_.VT (bitconvert
+ (_.LdFrag addr:$src))),
+ _.RC:$src0)))]>,
+ EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
+
+ let mayLoad = 1 in
+ def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.KRCWM:$mask, _.MemOp:$src),
+ OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
+ (_.VT (bitconvert (_.LdFrag addr:$src))),
+ _.ImmAllZerosV)))]>,
+ EVEX_KZ, EVEX_CD8<_.EltSize, CD8VT1>;
+
+}
+
+multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo VTInfo> {
+ defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
+
+ let Predicates = [HasVLX] in {
+ defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
+ defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
+ }
+}
+
+defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", avx512vl_i32_info>,
+ EVEX;
+defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", avx512vl_i64_info>,
+ EVEX, VEX_W;
+defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>,
+ EVEX;
+defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
+ EVEX, VEX_W;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index f2574cc3700e..78efc4d57111 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -15,13 +15,13 @@
//===----------------------------------------------------------------------===//
// LEA - Load Effective Address
let SchedRW = [WriteLEA] in {
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def LEA16r : I<0x8D, MRMSrcMem,
- (outs GR16:$dst), (ins i32mem:$src),
+ (outs GR16:$dst), (ins anymem:$src),
"lea{w}\t{$src|$dst}, {$dst|$src}", [], IIC_LEA_16>, OpSize16;
let isReMaterializable = 1 in
def LEA32r : I<0x8D, MRMSrcMem,
- (outs GR32:$dst), (ins i32mem:$src),
+ (outs GR32:$dst), (ins anymem:$src),
"lea{l}\t{$src|$dst}, {$dst|$src}",
[(set GR32:$dst, lea32addr:$src)], IIC_LEA>,
OpSize32, Requires<[Not64BitMode]>;
@@ -65,18 +65,18 @@ def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
[(set AL, (mul AL, GR8:$src)),
(implicit EFLAGS)], IIC_MUL8>, Sched<[WriteIMul]>;
// AX,DX = AX*GR16
-let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
+let Defs = [AX,DX,EFLAGS], Uses = [AX], hasSideEffects = 0 in
def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
"mul{w}\t$src",
[], IIC_MUL16_REG>, OpSize16, Sched<[WriteIMul]>;
// EAX,EDX = EAX*GR32
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], hasSideEffects = 0 in
def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
"mul{l}\t$src",
[/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/],
IIC_MUL32_REG>, OpSize32, Sched<[WriteIMul]>;
// RAX,RDX = RAX*GR64
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], hasSideEffects = 0 in
def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
"mul{q}\t$src",
[/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/],
@@ -91,7 +91,7 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
[(set AL, (mul AL, (loadi8 addr:$src))),
(implicit EFLAGS)], IIC_MUL8>, SchedLoadReg<WriteIMulLd>;
// AX,DX = AX*[mem16]
-let mayLoad = 1, neverHasSideEffects = 1 in {
+let mayLoad = 1, hasSideEffects = 0 in {
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
"mul{w}\t$src",
@@ -107,7 +107,7 @@ def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
"mul{q}\t$src", [], IIC_MUL64>, SchedLoadReg<WriteIMulLd>;
}
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
// AL,AH = AL*GR8
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", [],
@@ -145,7 +145,7 @@ let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
"imul{q}\t$src", [], IIC_IMUL64>, SchedLoadReg<WriteIMulLd>;
}
-} // neverHasSideEffects
+} // hasSideEffects
let Defs = [EFLAGS] in {
@@ -456,64 +456,29 @@ def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"inc{b}\t$dst",
[(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))],
IIC_UNARY_REG>;
-
-let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
-def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
+let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
+def INC16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
"inc{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))], IIC_UNARY_REG>,
- OpSize16, Requires<[Not64BitMode]>;
-def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))],
+ IIC_UNARY_REG>, OpSize16;
+def INC32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
"inc{l}\t$dst",
[(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))],
- IIC_UNARY_REG>,
- OpSize32, Requires<[Not64BitMode]>;
+ IIC_UNARY_REG>, OpSize32;
def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
[(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))],
IIC_UNARY_REG>;
-} // isConvertibleToThreeAddress = 1, CodeSize = 1
-
-
-// In 64-bit mode, single byte INC and DEC cannot be encoded.
-let isConvertibleToThreeAddress = 1, CodeSize = 2 in {
-// Can transform into LEA.
-def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
- "inc{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))],
- IIC_UNARY_REG>,
- OpSize16, Requires<[In64BitMode]>;
-def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
- "inc{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))],
- IIC_UNARY_REG>,
- OpSize32, Requires<[In64BitMode]>;
-def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
- "dec{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))],
- IIC_UNARY_REG>,
- OpSize16, Requires<[In64BitMode]>;
-def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
- "dec{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))],
- IIC_UNARY_REG>,
- OpSize32, Requires<[In64BitMode]>;
} // isConvertibleToThreeAddress = 1, CodeSize = 2
-let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
- CodeSize = 2 in {
-def INC32_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
- "inc{w}\t$dst", [], IIC_UNARY_REG>,
- OpSize16, Requires<[Not64BitMode]>;
-def INC32_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
- "inc{l}\t$dst", [], IIC_UNARY_REG>,
- OpSize32, Requires<[Not64BitMode]>;
-def DEC32_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
- "dec{w}\t$dst", [], IIC_UNARY_REG>,
- OpSize16, Requires<[Not64BitMode]>;
-def DEC32_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
- "dec{l}\t$dst", [], IIC_UNARY_REG>,
- OpSize32, Requires<[Not64BitMode]>;
-} // isCodeGenOnly = 1, ForceDisassemble = 1, HasSideEffects = 0, CodeSize = 2
-
+// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
+let CodeSize = 1, hasSideEffects = 0 in {
+def INC16r_alt : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
+ "inc{w}\t$dst", [], IIC_UNARY_REG>,
+ OpSize16, Requires<[Not64BitMode]>;
+def INC32r_alt : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
+ "inc{l}\t$dst", [], IIC_UNARY_REG>,
+ OpSize32, Requires<[Not64BitMode]>;
+} // CodeSize = 1, hasSideEffects = 0
} // Constraints = "$src1 = $dst", SchedRW
let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
@@ -522,35 +487,13 @@ let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
(implicit EFLAGS)], IIC_UNARY_MEM>;
def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
[(store (add (loadi16 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)], IIC_UNARY_MEM>,
- OpSize16, Requires<[Not64BitMode]>;
+ (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize16;
def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
[(store (add (loadi32 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)], IIC_UNARY_MEM>,
- OpSize32, Requires<[Not64BitMode]>;
+ (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize32;
def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
[(store (add (loadi64 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
-
-// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
-// how to unfold them.
-// FIXME: What is this for??
-def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
- [(store (add (loadi16 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)], IIC_UNARY_MEM>,
- OpSize16, Requires<[In64BitMode]>;
-def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
- [(store (add (loadi32 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)], IIC_UNARY_MEM>,
- OpSize32, Requires<[In64BitMode]>;
-def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
- [(store (add (loadi16 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)], IIC_UNARY_MEM>,
- OpSize16, Requires<[In64BitMode]>;
-def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
- [(store (add (loadi32 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)], IIC_UNARY_MEM>,
- OpSize32, Requires<[In64BitMode]>;
} // CodeSize = 2, SchedRW
let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
@@ -559,21 +502,29 @@ def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"dec{b}\t$dst",
[(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))],
IIC_UNARY_REG>;
-let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
-def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
+let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
+def DEC16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
"dec{w}\t$dst",
[(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))],
- IIC_UNARY_REG>,
- OpSize16, Requires<[Not64BitMode]>;
-def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
+ IIC_UNARY_REG>, OpSize16;
+def DEC32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
"dec{l}\t$dst",
[(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))],
- IIC_UNARY_REG>,
- OpSize32, Requires<[Not64BitMode]>;
+ IIC_UNARY_REG>, OpSize32;
def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
[(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))],
IIC_UNARY_REG>;
-} // CodeSize = 2
+} // isConvertibleToThreeAddress = 1, CodeSize = 2
+
+// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
+let CodeSize = 1, hasSideEffects = 0 in {
+def DEC16r_alt : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
+ "dec{w}\t$dst", [], IIC_UNARY_REG>,
+ OpSize16, Requires<[Not64BitMode]>;
+def DEC32r_alt : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
+ "dec{l}\t$dst", [], IIC_UNARY_REG>,
+ OpSize32, Requires<[Not64BitMode]>;
+} // CodeSize = 1, hasSideEffects = 0
} // Constraints = "$src1 = $dst", SchedRW
@@ -583,12 +534,10 @@ let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
(implicit EFLAGS)], IIC_UNARY_MEM>;
def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
[(store (add (loadi16 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)], IIC_UNARY_MEM>,
- OpSize16, Requires<[Not64BitMode]>;
+ (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize16;
def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
[(store (add (loadi32 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)], IIC_UNARY_MEM>,
- OpSize32, Requires<[Not64BitMode]>;
+ (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize32;
def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
[(store (add (loadi64 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
@@ -710,15 +659,6 @@ class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>,
Sched<[WriteALU]>;
-// BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has
-// just a regclass (no eflags) as a result.
-class BinOpRR_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode>
- : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
- [(set typeinfo.RegClass:$dst,
- (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
- IIC_BIN_NONMEM>;
-
// BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has
// just a EFLAGS as a result.
class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -825,13 +765,6 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
let ImmT = typeinfo.ImmEncoding;
}
-// BinOpRI_R - Instructions like "add reg, reg, imm".
-class BinOpRI_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode, Format f>
- : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
- [(set typeinfo.RegClass:$dst,
- (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
-
// BinOpRI_F - Instructions like "cmp reg, imm".
class BinOpRI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDPatternOperator opnode, Format f>
@@ -864,30 +797,23 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
let ImmT = Imm8; // Always 8-bit immediate.
}
-// BinOpRI8_R - Instructions like "add reg, reg, imm8".
-class BinOpRI8_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode, Format f>
- : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
- [(set typeinfo.RegClass:$dst,
- (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
-
// BinOpRI8_F - Instructions like "cmp reg, imm8".
class BinOpRI8_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode, Format f>
+ SDPatternOperator opnode, Format f>
: BinOpRI8<opcode, mnemonic, typeinfo, f, (outs),
[(set EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
// BinOpRI8_RF - Instructions like "add reg, reg, imm8".
class BinOpRI8_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode, Format f>
+ SDPatternOperator opnode, Format f>
: BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
// BinOpRI8_RFF - Instructions like "adc reg, reg, imm8".
class BinOpRI8_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode, Format f>
+ SDPatternOperator opnode, Format f>
: BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2,
@@ -923,8 +849,8 @@ class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
[(set EFLAGS, (opnode (load addr:$dst), typeinfo.RegClass:$src))]>;
// BinOpMI - Instructions like "add [mem], imm".
-class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
- Format f, list<dag> pattern, bits<8> opcode = 0x80,
+class BinOpMI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Format f, list<dag> pattern,
InstrItinClass itin = IIC_BIN_MEM>
: ITy<opcode, f, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
@@ -934,27 +860,26 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
}
// BinOpMI_RMW - Instructions like "add [mem], imm".
-class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo,
+class BinOpMI_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode, Format f>
- : BinOpMI<mnemonic, typeinfo, f,
+ : BinOpMI<opcode, mnemonic, typeinfo, f,
[(store (opnode (typeinfo.VT (load addr:$dst)),
typeinfo.ImmOperator:$src), addr:$dst),
(implicit EFLAGS)]>;
// BinOpMI_RMW_FF - Instructions like "adc [mem], imm".
-class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode, Format f>
- : BinOpMI<mnemonic, typeinfo, f,
+class BinOpMI_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI<opcode, mnemonic, typeinfo, f,
[(store (opnode (typeinfo.VT (load addr:$dst)),
typeinfo.ImmOperator:$src, EFLAGS), addr:$dst),
- (implicit EFLAGS)], 0x80, IIC_BIN_CARRY_MEM>;
+ (implicit EFLAGS)], IIC_BIN_CARRY_MEM>;
// BinOpMI_F - Instructions like "cmp [mem], imm".
-class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo,
- SDPatternOperator opnode, Format f, bits<8> opcode = 0x80>
- : BinOpMI<mnemonic, typeinfo, f,
+class BinOpMI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode, Format f>
+ : BinOpMI<opcode, mnemonic, typeinfo, f,
[(set EFLAGS, (opnode (typeinfo.VT (load addr:$dst)),
- typeinfo.ImmOperator:$src))],
- opcode>;
+ typeinfo.ImmOperator:$src))]>;
// BinOpMI8 - Instructions like "add [mem], imm8".
class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
@@ -969,7 +894,7 @@ class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
// BinOpMI8_RMW - Instructions like "add [mem], imm8".
class BinOpMI8_RMW<string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode, Format f>
+ SDPatternOperator opnode, Format f>
: BinOpMI8<mnemonic, typeinfo, f,
[(store (opnode (load addr:$dst),
typeinfo.Imm8Operator:$src), addr:$dst),
@@ -977,7 +902,7 @@ class BinOpMI8_RMW<string mnemonic, X86TypeInfo typeinfo,
// BinOpMI8_RMW_FF - Instructions like "adc [mem], imm8".
class BinOpMI8_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode, Format f>
+ SDPatternOperator opnode, Format f>
: BinOpMI8<mnemonic, typeinfo, f,
[(store (opnode (load addr:$dst),
typeinfo.Imm8Operator:$src, EFLAGS), addr:$dst),
@@ -985,7 +910,7 @@ class BinOpMI8_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
// BinOpMI8_F - Instructions like "cmp [mem], imm8".
class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode, Format f>
+ SDPatternOperator opnode, Format f>
: BinOpMI8<mnemonic, typeinfo, f,
[(set EFLAGS, (opnode (load addr:$dst),
typeinfo.Imm8Operator:$src))]>;
@@ -1023,12 +948,13 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
bit CommutableRR, bit ConvertibleToThreeAddress> {
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst" in {
- let isCommutable = CommutableRR,
- isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ let isCommutable = CommutableRR in {
def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
- def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
- def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
- def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
+ def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
+ def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
+ } // isConvertibleToThreeAddress
} // isCommutable
def NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
@@ -1041,6 +967,8 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
+ def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
+
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
@@ -1048,7 +976,6 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>;
def NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>;
- def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
def NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>;
def NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>;
def NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>;
@@ -1066,10 +993,20 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def NAME#32mi8 : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>;
def NAME#64mi8 : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>;
- def NAME#8mi : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>;
- def NAME#16mi : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>;
- def NAME#32mi : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>;
- def NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>;
+ def NAME#8mi : BinOpMI_RMW<0x80, mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#16mi : BinOpMI_RMW<0x80, mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi : BinOpMI_RMW<0x80, mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi32 : BinOpMI_RMW<0x80, mnemonic, Xi64, opnode, MemMRM>;
+
+ // These are for the disassembler since 0x82 opcode behaves like 0x80, but
+ // not in 64-bit mode.
+ let Predicates = [Not64BitMode], isCodeGenOnly = 1, ForceDisassemble = 1,
+ hasSideEffects = 0 in {
+ let Constraints = "$src1 = $dst" in
+ def NAME#8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, null_frag, RegMRM>;
+ let mayLoad = 1, mayStore = 1 in
+ def NAME#8mi8 : BinOpMI8_RMW<mnemonic, Xi8, null_frag, MemMRM>;
+ }
} // Defs = [EFLAGS]
def NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
@@ -1094,12 +1031,13 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
bit ConvertibleToThreeAddress> {
let Uses = [EFLAGS], Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst" in {
- let isCommutable = CommutableRR,
- isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ let isCommutable = CommutableRR in {
def NAME#8rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>;
- def NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>;
- def NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>;
- def NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>;
+ def NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
+ } // isConvertibleToThreeAddress
} // isCommutable
def NAME#8rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi8>;
@@ -1112,6 +1050,8 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def NAME#32rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>;
def NAME#64rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>;
+ def NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
@@ -1119,7 +1059,6 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>;
def NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>;
- def NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
def NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>;
def NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>;
def NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>;
@@ -1137,10 +1076,20 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def NAME#32mi8 : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
def NAME#64mi8 : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
- def NAME#8mi : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>;
- def NAME#16mi : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
- def NAME#32mi : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
- def NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+ def NAME#8mi : BinOpMI_RMW_FF<0x80, mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#16mi : BinOpMI_RMW_FF<0x80, mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi : BinOpMI_RMW_FF<0x80, mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi32 : BinOpMI_RMW_FF<0x80, mnemonic, Xi64, opnode, MemMRM>;
+
+ // These are for the disassembler since 0x82 opcode behaves like 0x80, but
+ // not in 64-bit mode.
+ let Predicates = [Not64BitMode], isCodeGenOnly = 1, ForceDisassemble = 1,
+ hasSideEffects = 0 in {
+ let Constraints = "$src1 = $dst" in
+ def NAME#8ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi8, null_frag, RegMRM>;
+ let mayLoad = 1, mayStore = 1 in
+ def NAME#8mi8 : BinOpMI8_RMW_FF<mnemonic, Xi8, null_frag, MemMRM>;
+ }
} // Uses = [EFLAGS], Defs = [EFLAGS]
def NAME#8i8 : BinOpAI_FF<BaseOpc4, mnemonic, Xi8 , AL,
@@ -1162,12 +1111,13 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
SDNode opnode,
bit CommutableRR, bit ConvertibleToThreeAddress> {
let Defs = [EFLAGS] in {
- let isCommutable = CommutableRR,
- isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ let isCommutable = CommutableRR in {
def NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>;
- def NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>;
- def NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>;
- def NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>;
+ def NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
+ }
} // isCommutable
def NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>;
@@ -1180,6 +1130,8 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>;
def NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>;
+ def NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
@@ -1187,7 +1139,6 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>;
def NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>;
- def NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
def NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>;
def NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>;
def NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>;
@@ -1204,10 +1155,19 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>;
def NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>;
- def NAME#8mi : BinOpMI_F<mnemonic, Xi8 , opnode, MemMRM>;
- def NAME#16mi : BinOpMI_F<mnemonic, Xi16, opnode, MemMRM>;
- def NAME#32mi : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>;
- def NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>;
+ def NAME#8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#16mi : BinOpMI_F<0x80, mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi : BinOpMI_F<0x80, mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi32 : BinOpMI_F<0x80, mnemonic, Xi64, opnode, MemMRM>;
+
+ // These are for the disassembler since 0x82 opcode behaves like 0x80, but
+ // not in 64-bit mode.
+ let Predicates = [Not64BitMode], isCodeGenOnly = 1, ForceDisassemble = 1,
+ hasSideEffects = 0 in {
+ def NAME#8ri8 : BinOpRI8_F<0x82, mnemonic, Xi8, null_frag, RegMRM>;
+ let mayLoad = 1 in
+ def NAME#8mi8 : BinOpMI8_F<mnemonic, Xi8, null_frag, MemMRM>;
+ }
} // Defs = [EFLAGS]
def NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
@@ -1272,15 +1232,15 @@ let isCompare = 1 in {
def TEST32ri : BinOpRI_F<0xF6, "test", Xi32, X86testpat, MRM0r>;
def TEST64ri32 : BinOpRI_F<0xF6, "test", Xi64, X86testpat, MRM0r>;
- def TEST8mi : BinOpMI_F<"test", Xi8 , X86testpat, MRM0m, 0xF6>;
- def TEST16mi : BinOpMI_F<"test", Xi16, X86testpat, MRM0m, 0xF6>;
- def TEST32mi : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>;
- def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>;
+ def TEST8mi : BinOpMI_F<0xF6, "test", Xi8 , X86testpat, MRM0m>;
+ def TEST16mi : BinOpMI_F<0xF6, "test", Xi16, X86testpat, MRM0m>;
+ def TEST32mi : BinOpMI_F<0xF6, "test", Xi32, X86testpat, MRM0m>;
+ def TEST64mi32 : BinOpMI_F<0xF6, "test", Xi64, X86testpat, MRM0m>;
// When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the
// register class is constrained to GR8_NOREX. This pseudo is explicitly
// marked side-effect free, since it doesn't have an isel pattern like
- // other test instructions.
+ // other test instructions.
let isPseudo = 1, hasSideEffects = 0 in
def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
"", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>;
@@ -1332,7 +1292,7 @@ let Predicates = [HasBMI] in {
// MULX Instruction
//
multiclass bmi_mulx<string mnemonic, RegisterClass RC, X86MemOperand x86memop> {
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let isCommutable = 1 in
def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
@@ -1355,49 +1315,54 @@ let Predicates = [HasBMI2] in {
//===----------------------------------------------------------------------===//
// ADCX Instruction
//
-let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
+ Constraints = "$src0 = $dst", AddedComplexity = 10 in {
let SchedRW = [WriteALU] in {
- def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "adcx{l}\t{$src, $dst|$dst, $src}",
- [], IIC_BIN_NONMEM>, T8PD;
-
- def ADCX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "adcx{q}\t{$src, $dst|$dst, $src}",
- [], IIC_BIN_NONMEM>, T8PD, Requires<[In64BitMode]>;
+ def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst),
+ (ins GR32:$src0, GR32:$src), "adcx{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS,
+ (X86adc_flag GR32:$src0, GR32:$src, EFLAGS))],
+ IIC_BIN_CARRY_NONMEM>, T8PD;
+ def ADCX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst),
+ (ins GR64:$src0, GR64:$src), "adcx{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS,
+ (X86adc_flag GR64:$src0, GR64:$src, EFLAGS))],
+ IIC_BIN_CARRY_NONMEM>, T8PD;
} // SchedRW
let mayLoad = 1, SchedRW = [WriteALULd] in {
- def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "adcx{l}\t{$src, $dst|$dst, $src}",
- [], IIC_BIN_MEM>, T8PD;
-
- def ADCX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "adcx{q}\t{$src, $dst|$dst, $src}",
- [], IIC_BIN_MEM>, T8PD, Requires<[In64BitMode]>;
+ def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src0, i32mem:$src), "adcx{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS,
+ (X86adc_flag GR32:$src0, (loadi32 addr:$src), EFLAGS))],
+ IIC_BIN_CARRY_MEM>, T8PD;
+
+ def ADCX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$src0, i64mem:$src), "adcx{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS,
+ (X86adc_flag GR64:$src0, (loadi64 addr:$src), EFLAGS))],
+ IIC_BIN_CARRY_MEM>, T8PD;
}
}
//===----------------------------------------------------------------------===//
// ADOX Instruction
//
-let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+let Predicates = [HasADX], hasSideEffects = 0, Defs = [EFLAGS],
+ Uses = [EFLAGS] in {
let SchedRW = [WriteALU] in {
def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "adox{l}\t{$src, $dst|$dst, $src}",
- [], IIC_BIN_NONMEM>, T8XS;
+ "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS;
def ADOX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "adox{q}\t{$src, $dst|$dst, $src}",
- [], IIC_BIN_NONMEM>, T8XS, Requires<[In64BitMode]>;
+ "adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS;
} // SchedRW
let mayLoad = 1, SchedRW = [WriteALULd] in {
def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "adox{l}\t{$src, $dst|$dst, $src}",
- [], IIC_BIN_MEM>, T8XS;
+ "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8XS;
def ADOX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "adox{q}\t{$src, $dst|$dst, $src}",
- [], IIC_BIN_MEM>, T8XS, Requires<[In64BitMode]>;
+ "adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8XS;
}
}
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index e421f8c7b985..2056056d23a5 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -21,8 +21,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86INSTRBUILDER_H
-#define X86INSTRBUILDER_H
+#ifndef LLVM_LIB_TARGET_X86_X86INSTRBUILDER_H
+#define LLVM_LIB_TARGET_X86_X86INSTRBUILDER_H
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index ca4f608a6b80..ed0a6346929b 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -32,7 +32,7 @@ def GetLo8XForm : SDNodeXForm<imm, [{
// PIC base construction. This expands to code that looks like this:
// call $next_inst
// popl %destreg"
-let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
+let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP] in
def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
"", []>;
@@ -46,11 +46,11 @@ let Defs = [ESP, EFLAGS], Uses = [ESP] in {
def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
"#ADJCALLSTACKDOWN",
[(X86callseq_start timm:$amt)]>,
- Requires<[Not64BitMode]>;
+ Requires<[NotLP64]>;
def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
"#ADJCALLSTACKUP",
[(X86callseq_end timm:$amt1, timm:$amt2)]>,
- Requires<[Not64BitMode]>;
+ Requires<[NotLP64]>;
}
// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
@@ -62,11 +62,11 @@ let Defs = [RSP, EFLAGS], Uses = [RSP] in {
def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
"#ADJCALLSTACKDOWN",
[(X86callseq_start timm:$amt)]>,
- Requires<[In64BitMode]>;
+ Requires<[IsLP64]>;
def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
"#ADJCALLSTACKUP",
[(X86callseq_end timm:$amt1, timm:$amt2)]>,
- Requires<[In64BitMode]>;
+ Requires<[IsLP64]>;
}
@@ -118,7 +118,7 @@ def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
"# variable sized alloca for segmented stacks",
[(set GR32:$dst,
(X86SegAlloca GR32:$size))]>,
- Requires<[Not64BitMode]>;
+ Requires<[NotLP64]>;
let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
@@ -214,6 +214,8 @@ let isPseudo = 1 in {
"#SEH_PushFrame $mode", []>;
def SEH_EndPrologue : I<0, Pseudo, (outs), (ins),
"#SEH_EndPrologue", []>;
+ def SEH_Epilogue : I<0, Pseudo, (outs), (ins),
+ "#SEH_Epilogue", []>;
}
//===----------------------------------------------------------------------===//
@@ -257,7 +259,7 @@ def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {
// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
// that would make it more difficult to rematerialize.
let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
- isCodeGenOnly = 1, neverHasSideEffects = 1 in
+ isCodeGenOnly = 1, hasSideEffects = 0 in
def MOV32ri64 : Ii32<0xb8, AddRegFrm, (outs GR32:$dst), (ins i64i32imm:$src),
"", [], IIC_ALU_NONMEM>, Sched<[WriteALU]>;
@@ -407,7 +409,8 @@ let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
// All calls clobber the non-callee saved registers. ESP is marked as
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead.
-let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
+ ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
@@ -426,7 +429,8 @@ def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead.
let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
- FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
+ ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
@@ -596,12 +600,12 @@ def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, OpSize32, LOCK;
-def NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
- ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
- ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
- !strconcat(mnemonic, "{q}\t",
- "{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, LOCK;
+def NAME#64mi32 : RIi32S<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
+ !strconcat(mnemonic, "{q}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, LOCK;
def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
@@ -747,18 +751,88 @@ defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add",
IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>,
TB, LOCK;
-def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
- "#ACQUIRE_MOV PSEUDO!",
- [(set GR8:$dst, (atomic_load_8 addr:$src))]>;
-def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
- "#ACQUIRE_MOV PSEUDO!",
- [(set GR16:$dst, (atomic_load_16 addr:$src))]>;
-def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
- "#ACQUIRE_MOV PSEUDO!",
- [(set GR32:$dst, (atomic_load_32 addr:$src))]>;
-def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
- "#ACQUIRE_MOV PSEUDO!",
- [(set GR64:$dst, (atomic_load_64 addr:$src))]>;
+/* The following multiclass tries to make sure that in code like
+ * x.store (immediate op x.load(acquire), release)
+ * an operation directly on memory is generated instead of wasting a register.
+ * It is not automatic as atomic_store/load are only lowered to MOV instructions
+ * extremely late to prevent them from being accidentally reordered in the backend
+ * (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions)
+ */
+multiclass RELEASE_BINOP_MI<string op> {
+ def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
+ "#RELEASE_BINOP PSEUDO!",
+ [(atomic_store_8 addr:$dst, (!cast<PatFrag>(op)
+ (atomic_load_8 addr:$dst), (i8 imm:$src)))]>;
+ // NAME#16 is not generated as 16-bit arithmetic instructions are considered
+ // costly and avoided as far as possible by this backend anyway
+ def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
+ "#RELEASE_BINOP PSEUDO!",
+ [(atomic_store_32 addr:$dst, (!cast<PatFrag>(op)
+ (atomic_load_32 addr:$dst), (i32 imm:$src)))]>;
+ def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
+ "#RELEASE_BINOP PSEUDO!",
+ [(atomic_store_64 addr:$dst, (!cast<PatFrag>(op)
+ (atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>;
+}
+defm RELEASE_ADD : RELEASE_BINOP_MI<"add">;
+defm RELEASE_AND : RELEASE_BINOP_MI<"and">;
+defm RELEASE_OR : RELEASE_BINOP_MI<"or">;
+defm RELEASE_XOR : RELEASE_BINOP_MI<"xor">;
+// Note: we don't deal with sub, because substractions of constants are
+// optimized into additions before this code can run
+
+multiclass RELEASE_UNOP<dag dag8, dag dag16, dag dag32, dag dag64> {
+ def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst),
+ "#RELEASE_UNOP PSEUDO!",
+ [(atomic_store_8 addr:$dst, dag8)]>;
+ def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst),
+ "#RELEASE_UNOP PSEUDO!",
+ [(atomic_store_16 addr:$dst, dag16)]>;
+ def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst),
+ "#RELEASE_UNOP PSEUDO!",
+ [(atomic_store_32 addr:$dst, dag32)]>;
+ def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst),
+ "#RELEASE_UNOP PSEUDO!",
+ [(atomic_store_64 addr:$dst, dag64)]>;
+}
+
+defm RELEASE_INC : RELEASE_UNOP<
+ (add (atomic_load_8 addr:$dst), (i8 1)),
+ (add (atomic_load_16 addr:$dst), (i16 1)),
+ (add (atomic_load_32 addr:$dst), (i32 1)),
+ (add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>;
+defm RELEASE_DEC : RELEASE_UNOP<
+ (add (atomic_load_8 addr:$dst), (i8 -1)),
+ (add (atomic_load_16 addr:$dst), (i16 -1)),
+ (add (atomic_load_32 addr:$dst), (i32 -1)),
+ (add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>;
+/*
+TODO: These don't work because the type inference of TableGen fails.
+TODO: find a way to fix it.
+defm RELEASE_NEG : RELEASE_UNOP<
+ (ineg (atomic_load_8 addr:$dst)),
+ (ineg (atomic_load_16 addr:$dst)),
+ (ineg (atomic_load_32 addr:$dst)),
+ (ineg (atomic_load_64 addr:$dst))>;
+defm RELEASE_NOT : RELEASE_UNOP<
+ (not (atomic_load_8 addr:$dst)),
+ (not (atomic_load_16 addr:$dst)),
+ (not (atomic_load_32 addr:$dst)),
+ (not (atomic_load_64 addr:$dst))>;
+*/
+
+def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
+ "#RELEASE_MOV PSEUDO !",
+ [(atomic_store_8 addr:$dst, (i8 imm:$src))]>;
+def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src),
+ "#RELEASE_MOV PSEUDO !",
+ [(atomic_store_16 addr:$dst, (i16 imm:$src))]>;
+def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
+ "#RELEASE_MOV PSEUDO !",
+ [(atomic_store_32 addr:$dst, (i32 imm:$src))]>;
+def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
+ "#RELEASE_MOV PSEUDO !",
+ [(atomic_store_64 addr:$dst, i64immSExt32:$src)]>;
def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),
"#RELEASE_MOV PSEUDO!",
@@ -773,11 +847,22 @@ def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
"#RELEASE_MOV PSEUDO!",
[(atomic_store_64 addr:$dst, GR64:$src)]>;
+def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR8:$dst, (atomic_load_8 addr:$src))]>;
+def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR16:$dst, (atomic_load_16 addr:$src))]>;
+def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR32:$dst, (atomic_load_32 addr:$src))]>;
+def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR64:$dst, (atomic_load_64 addr:$src))]>;
//===----------------------------------------------------------------------===//
// Conditional Move Pseudo Instructions.
//===----------------------------------------------------------------------===//
-
// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded after
// instruction selection into a branch sequence.
let Uses = [EFLAGS], usesCustomInserter = 1 in {
@@ -925,6 +1010,9 @@ def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
(MOV64mi32 addr:$dst, tblockaddress:$src)>,
Requires<[NearData, IsStatic]>;
+def : Pat<(i32 (X86RecoverFrameAlloc texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
+def : Pat<(i64 (X86RecoverFrameAlloc texternalsym:$dst)), (MOV64ri texternalsym:$dst)>;
+
// Calls
// tls has some funny stuff here...
@@ -1106,6 +1194,7 @@ def def32 : PatLeaf<(i32 GR32:$src), [{
return N->getOpcode() != ISD::TRUNCATE &&
N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
N->getOpcode() != ISD::CopyFromReg &&
+ N->getOpcode() != ISD::AssertSext &&
N->getOpcode() != X86ISD::CMOV;
}]>;
@@ -1638,35 +1727,18 @@ def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
(IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
-// Increment reg.
-// Do not make INC if it is slow
-def : Pat<(add GR8:$src, 1),
- (INC8r GR8:$src)>, Requires<[NotSlowIncDec]>;
-def : Pat<(add GR16:$src, 1),
- (INC16r GR16:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
-def : Pat<(add GR16:$src, 1),
- (INC64_16r GR16:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
-def : Pat<(add GR32:$src, 1),
- (INC32r GR32:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
-def : Pat<(add GR32:$src, 1),
- (INC64_32r GR32:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
-def : Pat<(add GR64:$src, 1),
- (INC64r GR64:$src)>, Requires<[NotSlowIncDec]>;
-
-// Decrement reg.
-// Do not make DEC if it is slow
-def : Pat<(add GR8:$src, -1),
- (DEC8r GR8:$src)>, Requires<[NotSlowIncDec]>;
-def : Pat<(add GR16:$src, -1),
- (DEC16r GR16:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
-def : Pat<(add GR16:$src, -1),
- (DEC64_16r GR16:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
-def : Pat<(add GR32:$src, -1),
- (DEC32r GR32:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
-def : Pat<(add GR32:$src, -1),
- (DEC64_32r GR32:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
-def : Pat<(add GR64:$src, -1),
- (DEC64r GR64:$src)>, Requires<[NotSlowIncDec]>;
+// Increment/Decrement reg.
+// Do not make INC/DEC if it is slow
+let Predicates = [NotSlowIncDec] in {
+ def : Pat<(add GR8:$src, 1), (INC8r GR8:$src)>;
+ def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>;
+ def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>;
+ def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
+ def : Pat<(add GR8:$src, -1), (DEC8r GR8:$src)>;
+ def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>;
+ def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>;
+ def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
+}
// or reg/reg.
def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index 39ad3954af79..71415c6fde8e 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -57,33 +57,32 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
// Unconditional branches.
let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
- def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
- "jmp\t$dst", [(br bb:$dst)], IIC_JMP_REL>, OpSize32;
- def JMP_2 : Ii16PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
- "jmp\t$dst", [(br bb:$dst)], IIC_JMP_REL>, OpSize16,
- Requires<[In16BitMode]>;
- let hasSideEffects = 0 in
def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
- "jmp\t$dst", [], IIC_JMP_REL>;
+ "jmp\t$dst", [(br bb:$dst)], IIC_JMP_REL>;
+ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
+ def JMP_2 : Ii16PCRel<0xE9, RawFrm, (outs), (ins brtarget16:$dst),
+ "jmp\t$dst", [], IIC_JMP_REL>, OpSize16;
+ def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget32:$dst),
+ "jmp\t$dst", [], IIC_JMP_REL>, OpSize32;
+ }
}
// Conditional Branches.
let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in {
multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
- let hasSideEffects = 0 in
- def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, [],
- IIC_Jcc>;
- def _2 : Ii16PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
- [(X86brcond bb:$dst, Cond, EFLAGS)], IIC_Jcc>, OpSize16,
- TB, Requires<[In16BitMode]>;
- def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
- [(X86brcond bb:$dst, Cond, EFLAGS)], IIC_Jcc>, TB,
- OpSize32;
+ def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm,
+ [(X86brcond bb:$dst, Cond, EFLAGS)], IIC_Jcc>;
+ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
+ def _2 : Ii16PCRel<opc4, RawFrm, (outs), (ins brtarget16:$dst), asm,
+ [], IIC_Jcc>, OpSize16, TB;
+ def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget32:$dst), asm,
+ [], IIC_Jcc>, TB, OpSize32;
+ }
}
}
defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
-defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>;
+defm JNO : ICBr<0x71, 0x81, "jno\t$dst", X86_COND_NO>;
defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
@@ -106,20 +105,14 @@ let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in
// jecxz.
let Uses = [CX] in
def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
- "jcxz\t$dst", [], IIC_JCXZ>, AdSize, Requires<[Not64BitMode]>;
+ "jcxz\t$dst", [], IIC_JCXZ>, AdSize16;
let Uses = [ECX] in
- def JECXZ_32 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
- "jecxz\t$dst", [], IIC_JCXZ>, Requires<[Not64BitMode]>;
+ def JECXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jecxz\t$dst", [], IIC_JCXZ>, AdSize32;
- // J*CXZ instruction: 64-bit versions of this instruction for the asmparser.
- // In 64-bit mode, the address size prefix is jecxz and the unprefixed version
- // is jrcxz.
- let Uses = [ECX] in
- def JECXZ_64 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
- "jecxz\t$dst", [], IIC_JCXZ>, AdSize, Requires<[In64BitMode]>;
let Uses = [RCX] in
def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
- "jrcxz\t$dst", [], IIC_JCXZ>, Requires<[In64BitMode]>;
+ "jrcxz\t$dst", [], IIC_JCXZ>, AdSize64;
}
// Indirect branches
@@ -145,14 +138,16 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
[(brind (loadi64 addr:$dst))], IIC_JMP_MEM>,
Requires<[In64BitMode]>, Sched<[WriteJumpLd]>;
- def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
- (ins i16imm:$off, i16imm:$seg),
- "ljmp{w}\t{$seg, $off|$off, $seg}", [],
- IIC_JMP_FAR_PTR>, OpSize16, Sched<[WriteJump]>;
- def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
- (ins i32imm:$off, i16imm:$seg),
- "ljmp{l}\t{$seg, $off|$off, $seg}", [],
- IIC_JMP_FAR_PTR>, OpSize32, Sched<[WriteJump]>;
+ let Predicates = [Not64BitMode] in {
+ def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
+ (ins i16imm:$off, i16imm:$seg),
+ "ljmp{w}\t$seg, $off", [],
+ IIC_JMP_FAR_PTR>, OpSize16, Sched<[WriteJump]>;
+ def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
+ (ins i32imm:$off, i16imm:$seg),
+ "ljmp{l}\t$seg, $off", [],
+ IIC_JMP_FAR_PTR>, OpSize32, Sched<[WriteJump]>;
+ }
def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
"ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>,
Sched<[WriteJump]>;
@@ -186,10 +181,11 @@ let isCall = 1 in
(outs), (ins i32imm_pcrel:$dst),
"call{l}\t$dst", [], IIC_CALL_RI>, OpSize32,
Requires<[Not64BitMode]>, Sched<[WriteJump]>;
- def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
- (outs), (ins i16imm_pcrel:$dst),
- "call{w}\t$dst", [], IIC_CALL_RI>, OpSize16,
- Sched<[WriteJump]>;
+ let hasSideEffects = 0 in
+ def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
+ (outs), (ins i16imm_pcrel:$dst),
+ "call{w}\t$dst", [], IIC_CALL_RI>, OpSize16,
+ Sched<[WriteJump]>;
def CALL16r : I<0xFF, MRM2r, (outs), (ins GR16:$dst),
"call{w}\t{*}$dst", [(X86call GR16:$dst)], IIC_CALL_RI>,
OpSize16, Requires<[Not64BitMode]>, Sched<[WriteJump]>;
@@ -207,14 +203,16 @@ let isCall = 1 in
Requires<[Not64BitMode,FavorMemIndirectCall]>,
Sched<[WriteJumpLd]>;
- def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
- (ins i16imm:$off, i16imm:$seg),
- "lcall{w}\t{$seg, $off|$off, $seg}", [],
- IIC_CALL_FAR_PTR>, OpSize16, Sched<[WriteJump]>;
- def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
- (ins i32imm:$off, i16imm:$seg),
- "lcall{l}\t{$seg, $off|$off, $seg}", [],
- IIC_CALL_FAR_PTR>, OpSize32, Sched<[WriteJump]>;
+ let Predicates = [Not64BitMode] in {
+ def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
+ (ins i16imm:$off, i16imm:$seg),
+ "lcall{w}\t$seg, $off", [],
+ IIC_CALL_FAR_PTR>, OpSize16, Sched<[WriteJump]>;
+ def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
+ (ins i32imm:$off, i16imm:$seg),
+ "lcall{l}\t$seg, $off", [],
+ IIC_CALL_FAR_PTR>, OpSize32, Sched<[WriteJump]>;
+ }
def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
"lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize16,
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index 6be6a1fc6cb7..eea4f17db3f2 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let Defs = [AX], Uses = [AL] in
def CBW : I<0x98, RawFrm, (outs), (ins),
"{cbtw|cbw}", [], IIC_CBW>, OpSize16; // AX = signext(AL)
@@ -39,7 +39,7 @@ let neverHasSideEffects = 1 in {
// Sign/Zero extenders
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>,
TB, OpSize16, Sched<[WriteALU]>;
@@ -47,7 +47,7 @@ let mayLoad = 1 in
def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>,
TB, OpSize16, Sched<[WriteALULd]>;
-} // neverHasSideEffects = 1
+} // hasSideEffects = 0
def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB,
@@ -65,7 +65,7 @@ def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
[(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>,
OpSize32, TB, Sched<[WriteALULd]>;
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>,
TB, OpSize16, Sched<[WriteALU]>;
@@ -73,7 +73,7 @@ let mayLoad = 1 in
def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>,
TB, OpSize16, Sched<[WriteALULd]>;
-} // neverHasSideEffects = 1
+} // hasSideEffects = 0
def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB,
@@ -94,16 +94,26 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
// except that they use GR32_NOREX for the output operand register class
// instead of GR32. This allows them to operate on h registers on x86-64.
-let neverHasSideEffects = 1, isCodeGenOnly = 1 in {
+let hasSideEffects = 0, isCodeGenOnly = 1 in {
def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
- "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
[], IIC_MOVZX>, TB, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
- "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
[], IIC_MOVZX>, TB, Sched<[WriteALULd]>;
+
+def MOVSX32_NOREXrr8 : I<0xBE, MRMSrcReg,
+ (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
+ "movs{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
+ [], IIC_MOVSX>, TB, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVSX32_NOREXrm8 : I<0xBE, MRMSrcMem,
+ (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
+ "movs{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
+ [], IIC_MOVSX>, TB, Sched<[WriteALULd]>;
}
// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index c0a6864e2585..2993e42443d4 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -69,7 +69,7 @@ multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
/* IsRVariantCommutable */ 1,
/* IsMVariantCommutable */ 1,
Op>;
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
defm r132 : fma3p_rm<opc132,
!strconcat(OpcodeStr, "132", PackTy),
MemFrag128, MemFrag256, OpTy128, OpTy256>;
@@ -81,7 +81,7 @@ let neverHasSideEffects = 1 in {
MemFrag128, MemFrag256, OpTy128, OpTy256,
/* IsRVariantCommutable */ 1,
/* IsMVariantCommutable */ 0>;
-} // neverHasSideEffects = 1
+} // hasSideEffects = 0
}
// Fused Multiply-Add
@@ -155,7 +155,7 @@ multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
SDNode OpNode, RegisterClass RC, ValueType OpVT,
X86MemOperand x86memop, Operand memop, PatFrag mem_frag,
ComplexPattern mem_cpat> {
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy),
x86memop, RC, OpVT, mem_frag>;
// See the other defm of r231 for the explanation regarding the
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 4ad7b7e32a0a..e1abf26664bb 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -17,13 +17,13 @@
// FPStack specific DAG Nodes.
//===----------------------------------------------------------------------===//
-def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
+def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
SDTCisVT<1, f80>]>;
def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>,
- SDTCisPtrTy<1>,
+ SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>,
- SDTCisPtrTy<1>,
+ SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
@@ -98,7 +98,7 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection.
// All FP Stack operations are represented with four instructions here. The
// first three instructions, generated by the instruction selector, use "RFP32"
// "RFP64" or "RFP80" registers: traditional register files to reference 32-bit,
-// 64-bit or 80-bit floating point values. These sizes apply to the values,
+// 64-bit or 80-bit floating point values. These sizes apply to the values,
// not the registers, which are always 80 bits; RFP32, RFP64 and RFP80 can be
// copied to each other without losing information. These instructions are all
// pseudo instructions and use the "_Fp" suffix.
@@ -107,16 +107,13 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection.
// The second instruction is defined with FPI, which is the actual instruction
// emitted by the assembler. These use "RST" registers, although frequently
// the actual register(s) used are implicit. These are always 80 bits.
-// The FP stackifier pass converts one to the other after register allocation
+// The FP stackifier pass converts one to the other after register allocation
// occurs.
//
// Note that the FpI instruction should have instruction selection info (e.g.
// a pattern) and the FPI instruction should have emission info (e.g. opcode
// encoding and asm printing info).
-// Pseudo Instruction for FP stack return values.
-def FpPOP_RETVAL : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>;
-
// FpIf32, FpIf64 - Floating Point Pseudo Instruction template.
// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
@@ -142,66 +139,66 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
// These instructions cannot address 80-bit memory.
multiclass FPBinary<SDNode OpNode, Format fp, string asmstring> {
// ST(0) = ST(0) + [mem]
-def _Fp32m : FpIf32<(outs RFP32:$dst),
+def _Fp32m : FpIf32<(outs RFP32:$dst),
(ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
- [(set RFP32:$dst,
+ [(set RFP32:$dst,
(OpNode RFP32:$src1, (loadf32 addr:$src2)))]>;
-def _Fp64m : FpIf64<(outs RFP64:$dst),
+def _Fp64m : FpIf64<(outs RFP64:$dst),
(ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
- [(set RFP64:$dst,
+ [(set RFP64:$dst,
(OpNode RFP64:$src1, (loadf64 addr:$src2)))]>;
-def _Fp64m32: FpIf64<(outs RFP64:$dst),
+def _Fp64m32: FpIf64<(outs RFP64:$dst),
(ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
- [(set RFP64:$dst,
+ [(set RFP64:$dst,
(OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>;
-def _Fp80m32: FpI_<(outs RFP80:$dst),
+def _Fp80m32: FpI_<(outs RFP80:$dst),
(ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
- [(set RFP80:$dst,
+ [(set RFP80:$dst,
(OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2))))]>;
-def _Fp80m64: FpI_<(outs RFP80:$dst),
+def _Fp80m64: FpI_<(outs RFP80:$dst),
(ins RFP80:$src1, f64mem:$src2), OneArgFPRW,
- [(set RFP80:$dst,
+ [(set RFP80:$dst,
(OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>;
-def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
- !strconcat("f", asmstring, "{s}\t$src")> {
- let mayLoad = 1;
+def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
+ !strconcat("f", asmstring, "{s}\t$src")> {
+ let mayLoad = 1;
}
-def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
- !strconcat("f", asmstring, "{l}\t$src")> {
- let mayLoad = 1;
+def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
+ !strconcat("f", asmstring, "{l}\t$src")> {
+ let mayLoad = 1;
}
// ST(0) = ST(0) + [memint]
-def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2),
+def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2),
OneArgFPRW,
[(set RFP32:$dst, (OpNode RFP32:$src1,
(X86fild addr:$src2, i16)))]>;
-def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2),
+def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2),
OneArgFPRW,
[(set RFP32:$dst, (OpNode RFP32:$src1,
(X86fild addr:$src2, i32)))]>;
-def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2),
+def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2),
OneArgFPRW,
[(set RFP64:$dst, (OpNode RFP64:$src1,
(X86fild addr:$src2, i16)))]>;
-def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2),
+def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2),
OneArgFPRW,
[(set RFP64:$dst, (OpNode RFP64:$src1,
(X86fild addr:$src2, i32)))]>;
-def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2),
+def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2),
OneArgFPRW,
[(set RFP80:$dst, (OpNode RFP80:$src1,
(X86fild addr:$src2, i16)))]>;
-def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
+def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
OneArgFPRW,
[(set RFP80:$dst, (OpNode RFP80:$src1,
(X86fild addr:$src2, i32)))]>;
-def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
- !strconcat("fi", asmstring, "{s}\t$src")> {
- let mayLoad = 1;
+def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
+ !strconcat("fi", asmstring, "{s}\t$src")> {
+ let mayLoad = 1;
}
-def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
- !strconcat("fi", asmstring, "{l}\t$src")> {
- let mayLoad = 1;
+def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
+ !strconcat("fi", asmstring, "{l}\t$src")> {
+ let mayLoad = 1;
}
}
@@ -285,7 +282,7 @@ defm SQRT: FPUnary<fsqrt,MRM_FA, "fsqrt">;
defm SIN : FPUnary<fsin, MRM_FE, "fsin">;
defm COS : FPUnary<fcos, MRM_FF, "fcos">;
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>;
def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
@@ -418,7 +415,7 @@ def ST_Fp80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP,
[(truncstoref64 RFP80:$src, addr:$op)]>;
// FST does not support 80-bit memory target; FSTP must be used.
-let mayStore = 1, neverHasSideEffects = 1 in {
+let mayStore = 1, hasSideEffects = 0 in {
def ST_FpP32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>;
def ST_FpP64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>;
def ST_FpP64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>;
@@ -427,7 +424,7 @@ def ST_FpP80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>;
}
def ST_FpP80m : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP,
[(store RFP80:$src, addr:$op)]>;
-let mayStore = 1, neverHasSideEffects = 1 in {
+let mayStore = 1, hasSideEffects = 0 in {
def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
@@ -503,7 +500,7 @@ def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst",
IIC_FST>;
def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst",
IIC_FST>;
-def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst),
+def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst),
"fisttp{ll}\t$dst", IIC_FST>;
}
@@ -639,7 +636,7 @@ def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", [], IIC_FCOMPP>;
def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
"fxsave\t$dst", [], IIC_FXSAVE>, TB;
def FXSAVE64 : RI<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
- "fxsave{q|64}\t$dst", [], IIC_FXSAVE>, TB,
+ "fxsave{q|64}\t$dst", [], IIC_FXSAVE>, TB,
Requires<[In64BitMode]>;
def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
"fxrstor\t$src", [], IIC_FXRSTOR>, TB;
@@ -659,12 +656,12 @@ def : Pat<(X86fld addr:$src, f80), (LD_Fp80m addr:$src)>;
// Required for CALL which return f32 / f64 / f80 values.
def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>;
-def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op,
+def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op,
RFP64:$src)>;
def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>;
-def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op,
+def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op,
RFP80:$src)>;
-def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op,
+def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op,
RFP80:$src)>;
def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op,
RFP80:$src)>;
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 8ef5f901c185..ba2387823f5e 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -36,20 +36,21 @@ def MRM6m : Format<30>; def MRM7m : Format<31>;
def MRM_C0 : Format<32>; def MRM_C1 : Format<33>; def MRM_C2 : Format<34>;
def MRM_C3 : Format<35>; def MRM_C4 : Format<36>; def MRM_C8 : Format<37>;
def MRM_C9 : Format<38>; def MRM_CA : Format<39>; def MRM_CB : Format<40>;
-def MRM_D0 : Format<41>; def MRM_D1 : Format<42>; def MRM_D4 : Format<43>;
-def MRM_D5 : Format<44>; def MRM_D6 : Format<45>; def MRM_D8 : Format<46>;
-def MRM_D9 : Format<47>; def MRM_DA : Format<48>; def MRM_DB : Format<49>;
-def MRM_DC : Format<50>; def MRM_DD : Format<51>; def MRM_DE : Format<52>;
-def MRM_DF : Format<53>; def MRM_E0 : Format<54>; def MRM_E1 : Format<55>;
-def MRM_E2 : Format<56>; def MRM_E3 : Format<57>; def MRM_E4 : Format<58>;
-def MRM_E5 : Format<59>; def MRM_E8 : Format<60>; def MRM_E9 : Format<61>;
-def MRM_EA : Format<62>; def MRM_EB : Format<63>; def MRM_EC : Format<64>;
-def MRM_ED : Format<65>; def MRM_EE : Format<66>; def MRM_F0 : Format<67>;
-def MRM_F1 : Format<68>; def MRM_F2 : Format<69>; def MRM_F3 : Format<70>;
-def MRM_F4 : Format<71>; def MRM_F5 : Format<72>; def MRM_F6 : Format<73>;
-def MRM_F7 : Format<74>; def MRM_F8 : Format<75>; def MRM_F9 : Format<76>;
-def MRM_FA : Format<77>; def MRM_FB : Format<78>; def MRM_FC : Format<79>;
-def MRM_FD : Format<80>; def MRM_FE : Format<81>; def MRM_FF : Format<82>;
+def MRM_CF : Format<41>; def MRM_D0 : Format<42>; def MRM_D1 : Format<43>;
+def MRM_D4 : Format<44>; def MRM_D5 : Format<45>; def MRM_D6 : Format<46>;
+def MRM_D7 : Format<47>; def MRM_D8 : Format<48>; def MRM_D9 : Format<49>;
+def MRM_DA : Format<50>; def MRM_DB : Format<51>; def MRM_DC : Format<52>;
+def MRM_DD : Format<53>; def MRM_DE : Format<54>; def MRM_DF : Format<55>;
+def MRM_E0 : Format<56>; def MRM_E1 : Format<57>; def MRM_E2 : Format<58>;
+def MRM_E3 : Format<59>; def MRM_E4 : Format<60>; def MRM_E5 : Format<61>;
+def MRM_E8 : Format<62>; def MRM_E9 : Format<63>; def MRM_EA : Format<64>;
+def MRM_EB : Format<65>; def MRM_EC : Format<66>; def MRM_ED : Format<67>;
+def MRM_EE : Format<68>; def MRM_F0 : Format<69>; def MRM_F1 : Format<70>;
+def MRM_F2 : Format<71>; def MRM_F3 : Format<72>; def MRM_F4 : Format<73>;
+def MRM_F5 : Format<74>; def MRM_F6 : Format<75>; def MRM_F7 : Format<76>;
+def MRM_F8 : Format<77>; def MRM_F9 : Format<78>; def MRM_FA : Format<79>;
+def MRM_FB : Format<80>; def MRM_FC : Format<81>; def MRM_FD : Format<82>;
+def MRM_FE : Format<83>; def MRM_FF : Format<84>;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -100,6 +101,7 @@ def CD8VF : CD8VForm<0>; // v := VL
def CD8VH : CD8VForm<1>; // v := VL/2
def CD8VQ : CD8VForm<2>; // v := VL/4
def CD8VO : CD8VForm<3>; // v := VL/8
+// The tuple (subvector) forms.
def CD8VT1 : CD8VForm<4>; // v := 1
def CD8VT2 : CD8VForm<5>; // v := 2
def CD8VT4 : CD8VForm<6>; // v := 4
@@ -144,11 +146,22 @@ def OpSizeFixed : OperandSize<0>; // Never needs a 0x66 prefix.
def OpSize16 : OperandSize<1>; // Needs 0x66 prefix in 32-bit mode.
def OpSize32 : OperandSize<2>; // Needs 0x66 prefix in 16-bit mode.
+// Address size for encodings that change based on mode.
+class AddressSize<bits<2> val> {
+ bits<2> Value = val;
+}
+def AdSizeX : AddressSize<0>; // Address size determined using addr operand.
+def AdSize16 : AddressSize<1>; // Encodes a 16-bit address.
+def AdSize32 : AddressSize<2>; // Encodes a 32-bit address.
+def AdSize64 : AddressSize<3>; // Encodes a 64-bit address.
+
// Prefix byte classes which are used to indicate to the ad-hoc machine code
// emitter that various prefix bytes are required.
class OpSize16 { OperandSize OpSize = OpSize16; }
class OpSize32 { OperandSize OpSize = OpSize32; }
-class AdSize { bit hasAdSizePrefix = 1; }
+class AdSize16 { AddressSize AdSize = AdSize16; }
+class AdSize32 { AddressSize AdSize = AdSize32; }
+class AdSize64 { AddressSize AdSize = AdSize64; }
class REX_W { bit hasREX_WPrefix = 1; }
class LOCK { bit hasLockPrefix = 1; }
class REP { bit hasREPPrefix = 1; }
@@ -229,9 +242,11 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
// AsmString from the parser, but still disassemble.
OperandSize OpSize = OpSizeFixed; // Does this instruction's encoding change
- // based on operand size of the mode
+ // based on operand size of the mode?
bits<2> OpSizeBits = OpSize.Value;
- bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix?
+ AddressSize AdSize = AdSizeX; // Does this instruction's encoding change
+ // based on address size of the mode?
+ bits<2> AdSizeBits = AdSize.Value;
Prefix OpPrefix = NoPrfx; // Which prefix byte does this inst have?
bits<3> OpPrefixBits = OpPrefix.Value;
@@ -282,35 +297,35 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
CD8_EltSize,
!srl(VectSize, CD8_Form{1-0}))), 0);
- // TSFlags layout should be kept in sync with X86InstrInfo.h.
+ // TSFlags layout should be kept in sync with X86BaseInfo.h.
let TSFlags{6-0} = FormBits;
let TSFlags{8-7} = OpSizeBits;
- let TSFlags{9} = hasAdSizePrefix;
- let TSFlags{12-10} = OpPrefixBits;
- let TSFlags{15-13} = OpMapBits;
- let TSFlags{16} = hasREX_WPrefix;
- let TSFlags{20-17} = ImmT.Value;
- let TSFlags{23-21} = FPForm.Value;
- let TSFlags{24} = hasLockPrefix;
- let TSFlags{25} = hasREPPrefix;
- let TSFlags{27-26} = ExeDomain.Value;
- let TSFlags{29-28} = OpEncBits;
- let TSFlags{37-30} = Opcode;
- let TSFlags{38} = hasVEX_WPrefix;
- let TSFlags{39} = hasVEX_4V;
- let TSFlags{40} = hasVEX_4VOp3;
- let TSFlags{41} = hasVEX_i8ImmReg;
- let TSFlags{42} = hasVEX_L;
- let TSFlags{43} = ignoresVEX_L;
- let TSFlags{44} = hasEVEX_K;
- let TSFlags{45} = hasEVEX_Z;
- let TSFlags{46} = hasEVEX_L2;
- let TSFlags{47} = hasEVEX_B;
+ let TSFlags{10-9} = AdSizeBits;
+ let TSFlags{13-11} = OpPrefixBits;
+ let TSFlags{16-14} = OpMapBits;
+ let TSFlags{17} = hasREX_WPrefix;
+ let TSFlags{21-18} = ImmT.Value;
+ let TSFlags{24-22} = FPForm.Value;
+ let TSFlags{25} = hasLockPrefix;
+ let TSFlags{26} = hasREPPrefix;
+ let TSFlags{28-27} = ExeDomain.Value;
+ let TSFlags{30-29} = OpEncBits;
+ let TSFlags{38-31} = Opcode;
+ let TSFlags{39} = hasVEX_WPrefix;
+ let TSFlags{40} = hasVEX_4V;
+ let TSFlags{41} = hasVEX_4VOp3;
+ let TSFlags{42} = hasVEX_i8ImmReg;
+ let TSFlags{43} = hasVEX_L;
+ let TSFlags{44} = ignoresVEX_L;
+ let TSFlags{45} = hasEVEX_K;
+ let TSFlags{46} = hasEVEX_Z;
+ let TSFlags{47} = hasEVEX_L2;
+ let TSFlags{48} = hasEVEX_B;
// If we run out of TSFlags bits, it's possible to encode this in 3 bits.
- let TSFlags{54-48} = CD8_Scale;
- let TSFlags{55} = has3DNow0F0FOpcode;
- let TSFlags{56} = hasMemOp4Prefix;
- let TSFlags{57} = hasEVEX_RC;
+ let TSFlags{55-49} = CD8_Scale;
+ let TSFlags{56} = has3DNow0F0FOpcode;
+ let TSFlags{57} = hasMemOp4Prefix;
+ let TSFlags{58} = hasEVEX_RC;
}
class PseudoI<dag oops, dag iops, list<dag> pattern>
@@ -325,26 +340,26 @@ class I<bits<8> o, Format f, dag outs, dag ins, string asm,
let Pattern = pattern;
let CodeSize = 3;
}
-class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm,
+class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary,
Domain d = GenericDomain>
: X86Inst<o, f, Imm8, outs, ins, asm, itin, d> {
let Pattern = pattern;
let CodeSize = 3;
}
-class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm8PCRel, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
-class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm,
+class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm16, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
-class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
+class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm32, outs, ins, asm, itin> {
let Pattern = pattern;
@@ -357,14 +372,14 @@ class Ii32S<bits<8> o, Format f, dag outs, dag ins, string asm,
let CodeSize = 3;
}
-class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm16PCRel, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
-class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm32PCRel, outs, ins, asm, itin> {
let Pattern = pattern;
@@ -391,14 +406,14 @@ class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern,
// Iseg16 - 16-bit segment selector, 16-bit offset
// Iseg32 - 16-bit segment selector, 32-bit offset
-class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
+class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm16, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
-class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
+class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm32, outs, ins, asm, itin> {
let Pattern = pattern;
@@ -476,7 +491,7 @@ class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
}
// SSE1 Instruction Templates:
-//
+//
// SSI - SSE1 instructions with XS prefix.
// PSI - SSE1 instructions with PS prefix.
// PSIi8 - SSE1 instructions with ImmT == Imm8 and PS prefix.
@@ -507,7 +522,7 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
Requires<[HasAVX]>;
// SSE2 Instruction Templates:
-//
+//
// SDI - SSE2 instructions with XD prefix.
// SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix.
// S2SI - SSE2 instructions with XS prefix.
@@ -571,16 +586,16 @@ class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
// SSE3 Instruction Templates:
-//
+//
// S3I - SSE3 instructions with PD prefixes.
// S3SI - SSE3 instructions with XS prefix.
// S3DI - SSE3 instructions with XD prefix.
-class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, XS,
Requires<[UseSSE3]>;
-class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
+class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XD,
Requires<[UseSSE3]>;
@@ -591,7 +606,7 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
// SSSE3 Instruction Templates:
-//
+//
// SS38I - SSSE3 instructions with T8 prefix.
// SS3AI - SSSE3 instructions with TA prefix.
// MMXSS38I - SSSE3 instructions with T8 prefix and MMX operands.
@@ -619,7 +634,7 @@ class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
Requires<[HasSSSE3]>;
// SSE4.1 Instruction Templates:
-//
+//
// SS48I - SSE 4.1 instructions with T8 prefix.
// SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
//
@@ -633,7 +648,7 @@ class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
Requires<[UseSSE41]>;
// SSE4.2 Instruction Templates:
-//
+//
// SS428I - SSE 4.2 instructions with T8 prefix.
class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
@@ -697,6 +712,9 @@ class AVX5128I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8PD,
Requires<[HasAVX512]>;
+class AVX5128IBase : T8PD {
+ Domain ExeDomain = SSEPackedInt;
+}
class AVX512XS8I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8XS,
@@ -713,14 +731,25 @@ class AVX512BI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, PD,
Requires<[HasAVX512]>;
+class AVX512BIBase : PD {
+ Domain ExeDomain = SSEPackedInt;
+}
class AVX512BIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, PD,
Requires<[HasAVX512]>;
+class AVX512BIi8Base : PD {
+ Domain ExeDomain = SSEPackedInt;
+ ImmType ImmT = Imm8;
+}
class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD,
Requires<[HasAVX512]>;
+class AVX512AIi8Base : TAPD {
+ Domain ExeDomain = SSEPackedInt;
+ ImmType ImmT = Imm8;
+}
class AVX512Ii8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>,
@@ -743,6 +772,11 @@ class AVX512FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, T8PD,
EVEX_4V, Requires<[HasAVX512]>;
+class AVX512FMA3Base : T8PD, EVEX_4V;
+
+class AVX512<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin>, Requires<[HasAVX512]>;
// AES Instruction Templates:
//
@@ -850,27 +884,27 @@ class VRS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
// MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix.
// MMXID - MMX instructions with XD prefix.
// MMXIS - MMX instructions with XS prefix.
-class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
+class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, PS, Requires<[HasMMX]>;
-class MMXI32<bits<8> o, Format F, dag outs, dag ins, string asm,
+class MMXI32<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, PS, Requires<[HasMMX,Not64BitMode]>;
-class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
+class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, PS, Requires<[HasMMX,In64BitMode]>;
-class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm,
+class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, PS, REX_W, Requires<[HasMMX]>;
-class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, PD, Requires<[HasMMX]>;
-class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, PS, Requires<[HasMMX]>;
-class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,
+class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasMMX]>;
-class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,
+class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasMMX]>;
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 6f0fa9462770..76e8fad78de3 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -83,7 +83,7 @@ def X86pinsrw : SDNode<"X86ISD::PINSRW",
SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
def X86insertps : SDNode<"X86ISD::INSERTPS",
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
- SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
+ SDTCisVT<2, v4f32>, SDTCisVT<3, i8>]>>;
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
@@ -188,6 +188,8 @@ def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
def SDTShuff3Op : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>;
+def SDTShuff2OpM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisVec<2>]>;
def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisInt<2>]>;
def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
@@ -197,12 +199,17 @@ def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
+ SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
+def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>,
+ SDTCisVec<0>, SDTCisInt<2>]>;
+def STDFp2SrcRm : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
+ SDTCisVec<0>, SDTCisInt<3>]>;
def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
+def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
@@ -231,10 +238,11 @@ def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>;
def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
-def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>;
-def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>;
-def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>;
-def X86VPermv3 : SDNode<"X86ISD::VPERMV3", SDTShuff3Op>;
+def X86VPermilpv : SDNode<"X86ISD::VPERMILPV", SDTShuff2OpM>;
+def X86VPermilpi : SDNode<"X86ISD::VPERMILPI", SDTShuff2OpI>;
+def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>;
+def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>;
+def X86VPermv3 : SDNode<"X86ISD::VPERMV3", SDTShuff3Op>;
def X86VPermiv3 : SDNode<"X86ISD::VPERMIV3", SDTShuff3Op>;
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
@@ -247,6 +255,9 @@ def X86Vextract : SDNode<"X86ISD::VEXTRACT", SDTypeProfile<1, 2,
[SDTCisVec<1>, SDTCisPtrTy<2>]>, []>;
def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
+
+def X86Addsub : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>;
+
def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>;
@@ -254,6 +265,13 @@ def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFma>;
def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFma>;
def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFma>;
+def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", STDFp1SrcRm>;
+def X86rcp28 : SDNode<"X86ISD::RCP28", STDFp1SrcRm>;
+def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>;
+
+def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>;
+def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>;
+
def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
SDTCisVT<4, i8>]>;
@@ -265,6 +283,13 @@ def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>;
def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>;
+def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
+ SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
+def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 3>,
+ SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
+
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
//===----------------------------------------------------------------------===//
@@ -311,6 +336,8 @@ def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
// 512-bit load pattern fragments
def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>;
def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>;
+def loadv64i8 : PatFrag<(ops node:$ptr), (v64i8 (load node:$ptr))>;
+def loadv32i16 : PatFrag<(ops node:$ptr), (v32i16 (load node:$ptr))>;
def loadv16i32 : PatFrag<(ops node:$ptr), (v16i32 (load node:$ptr))>;
def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>;
@@ -401,16 +428,6 @@ def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|| cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
-def memop4 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return Subtarget->hasVectorUAMem()
- || cast<LoadSDNode>(N)->getAlignment() >= 4;
-}]>;
-
-def memop8 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return Subtarget->hasVectorUAMem()
- || cast<LoadSDNode>(N)->getAlignment() >= 8;
-}]>;
-
def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
@@ -427,10 +444,10 @@ def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
// 512-bit memop pattern fragments
-def memopv16f32 : PatFrag<(ops node:$ptr), (v16f32 (memop4 node:$ptr))>;
-def memopv8f64 : PatFrag<(ops node:$ptr), (v8f64 (memop8 node:$ptr))>;
-def memopv16i32 : PatFrag<(ops node:$ptr), (v16i32 (memop4 node:$ptr))>;
-def memopv8i64 : PatFrag<(ops node:$ptr), (v8i64 (memop8 node:$ptr))>;
+def memopv16f32 : PatFrag<(ops node:$ptr), (v16f32 (memop node:$ptr))>;
+def memopv8f64 : PatFrag<(ops node:$ptr), (v8f64 (memop node:$ptr))>;
+def memopv16i32 : PatFrag<(ops node:$ptr), (v16i32 (memop node:$ptr))>;
+def memopv8i64 : PatFrag<(ops node:$ptr), (v8i64 (memop node:$ptr))>;
// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
// 16-byte boundary.
@@ -509,7 +526,9 @@ def I8Imm : SDNodeXForm<imm, [{
}]>;
def FROUND_NO_EXC : ImmLeaf<i32, [{ return Imm == 8; }]>;
-def FROUND_CURRENT : ImmLeaf<i32, [{ return Imm == 4; }]>;
+def FROUND_CURRENT : ImmLeaf<i32, [{
+ return Imm == X86::STATIC_ROUNDING::CUR_DIRECTION;
+}]>;
// BYTE_imm - Transform bit immediates into byte immediates.
def BYTE_imm : SDNodeXForm<imm, [{
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 0d3afc43c2bb..461569345a11 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -64,6 +65,7 @@ enum {
TB_INDEX_1 = 1,
TB_INDEX_2 = 2,
TB_INDEX_3 = 3,
+ TB_INDEX_4 = 4,
TB_INDEX_MASK = 0xf,
// Do not insert the reverse map (MemOp -> RegOp) into the table.
@@ -100,8 +102,8 @@ void X86InstrInfo::anchor() {}
X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
: X86GenInstrInfo(
- (STI.is64Bit() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32),
- (STI.is64Bit() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)),
+ (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32),
+ (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)),
Subtarget(STI), RI(STI) {
static const X86OpTblEntry OpTbl2Addr[] = {
@@ -144,14 +146,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::AND8rr, X86::AND8mr, 0 },
{ X86::DEC16r, X86::DEC16m, 0 },
{ X86::DEC32r, X86::DEC32m, 0 },
- { X86::DEC64_16r, X86::DEC64_16m, 0 },
- { X86::DEC64_32r, X86::DEC64_32m, 0 },
{ X86::DEC64r, X86::DEC64m, 0 },
{ X86::DEC8r, X86::DEC8m, 0 },
{ X86::INC16r, X86::INC16m, 0 },
{ X86::INC32r, X86::INC32m, 0 },
- { X86::INC64_16r, X86::INC64_16m, 0 },
- { X86::INC64_32r, X86::INC64_32m, 0 },
{ X86::INC64r, X86::INC64m, 0 },
{ X86::INC8r, X86::INC8m, 0 },
{ X86::NEG16r, X86::NEG16m, 0 },
@@ -377,7 +375,39 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE },
{ X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE },
// AVX-512 foldable instructions
- { X86::VMOVPDI2DIZrr,X86::VMOVPDI2DIZmr, TB_FOLDED_STORE }
+ { X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE },
+ { X86::VMOVAPDZrr, X86::VMOVAPDZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
+ { X86::VMOVAPSZrr, X86::VMOVAPSZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
+ { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
+ { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
+ { X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE },
+ { X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE },
+ { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zmr, TB_FOLDED_STORE },
+ { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zmr, TB_FOLDED_STORE },
+ { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zmr, TB_FOLDED_STORE },
+ { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zmr, TB_FOLDED_STORE },
+ // AVX-512 foldable instructions (256-bit versions)
+ { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256mr, TB_FOLDED_STORE },
+ { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256mr, TB_FOLDED_STORE },
+ // AVX-512 foldable instructions (128-bit versions)
+ { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128mr, TB_FOLDED_STORE },
+ { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128mr, TB_FOLDED_STORE }
};
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
@@ -415,6 +445,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
{ X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 },
{ X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 },
+ { X86::CVTDQ2PSrr, X86::CVTDQ2PSrm, TB_ALIGN_16 },
+ { X86::CVTPD2DQrr, X86::CVTPD2DQrm, TB_ALIGN_16 },
+ { X86::CVTPD2PSrr, X86::CVTPD2PSrm, TB_ALIGN_16 },
+ { X86::CVTPS2DQrr, X86::CVTPS2DQrm, TB_ALIGN_16 },
{ X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
{ X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
{ X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
@@ -493,6 +527,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
{ X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 },
{ X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 },
+ { X86::VCVTDQ2PSrr, X86::VCVTDQ2PSrm, 0 },
+ { X86::VCVTPD2DQrr, X86::VCVTPD2DQXrm, 0 },
+ { X86::VCVTPD2PSrr, X86::VCVTPD2PSXrm, 0 },
+ { X86::VCVTPS2DQrr, X86::VCVTPS2DQrm, 0 },
+ { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, 0 },
+ { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 },
{ X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
{ X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 },
{ X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 },
@@ -526,6 +566,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE },
// AVX 256-bit foldable instructions
+ { X86::VCVTDQ2PSYrr, X86::VCVTDQ2PSYrm, 0 },
+ { X86::VCVTPD2DQYrr, X86::VCVTPD2DQYrm, 0 },
+ { X86::VCVTPD2PSYrr, X86::VCVTPD2PSYrm, 0 },
+ { X86::VCVTPS2DQYrr, X86::VCVTPS2DQYrm, 0 },
+ { X86::VCVTTPD2DQYrr, X86::VCVTTPD2DQYrm, 0 },
+ { X86::VCVTTPS2DQYrr, X86::VCVTTPS2DQYrm, 0 },
{ X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
{ X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 },
{ X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 },
@@ -533,6 +579,13 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
{ X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 },
{ X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 },
+ { X86::VRCPPSYr, X86::VRCPPSYm, 0 },
+ { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, 0 },
+ { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
+ { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
+ { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },
+ { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
+ { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
// AVX2 foldable instructions
{ X86::VPABSBrr256, X86::VPABSBrm256, 0 },
@@ -541,13 +594,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDYri, X86::VPSHUFDYmi, 0 },
{ X86::VPSHUFHWYri, X86::VPSHUFHWYmi, 0 },
{ X86::VPSHUFLWYri, X86::VPSHUFLWYmi, 0 },
- { X86::VRCPPSYr, X86::VRCPPSYm, 0 },
- { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, 0 },
- { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
- { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
- { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },
- { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
- { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
// BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions
{ X86::BEXTR32rr, X86::BEXTR32rm, 0 },
@@ -601,18 +647,51 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// AVX-512 foldable instructions
{ X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
{ X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 },
- { X86::VMOVDQA32rr, X86::VMOVDQA32rm, TB_ALIGN_64 },
- { X86::VMOVDQA64rr, X86::VMOVDQA64rm, TB_ALIGN_64 },
- { X86::VMOVDQU32rr, X86::VMOVDQU32rm, 0 },
- { X86::VMOVDQU64rr, X86::VMOVDQU64rm, 0 },
+ { X86::VMOVAPDZrr, X86::VMOVAPDZrm, TB_ALIGN_64 },
+ { X86::VMOVAPSZrr, X86::VMOVAPSZrm, TB_ALIGN_64 },
+ { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 },
+ { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrm, TB_ALIGN_64 },
+ { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zrm, 0 },
+ { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zrm, 0 },
+ { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zrm, 0 },
+ { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zrm, 0 },
+ { X86::VMOVUPDZrr, X86::VMOVUPDZrm, 0 },
+ { X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 },
{ X86::VPABSDZrr, X86::VPABSDZrm, 0 },
{ X86::VPABSQZrr, X86::VPABSQZrm, 0 },
+ { X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE },
+ // AVX-512 foldable instructions (256-bit versions)
+ { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 },
+ { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 },
+ { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 },
+ { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256rm, TB_ALIGN_32 },
+ { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256rm, 0 },
+ { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256rm, 0 },
+ { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256rm, 0 },
+ { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 },
+ { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 },
+ { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 },
+ { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE },
+ // AVX-512 foldable instructions (256-bit versions)
+ { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 },
+ { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 },
+ { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 },
+ { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128rm, TB_ALIGN_16 },
+ { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128rm, 0 },
+ { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128rm, 0 },
+ { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128rm, 0 },
+ { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 },
+ { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 },
+ { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 },
+ { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE },
// AES foldable instructions
{ X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 },
{ X86::AESKEYGENASSIST128rr, X86::AESKEYGENASSIST128rm, TB_ALIGN_16 },
{ X86::VAESIMCrr, X86::VAESIMCrm, TB_ALIGN_16 },
- { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, TB_ALIGN_16 },
+ { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, TB_ALIGN_16 }
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
@@ -869,8 +948,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
{ X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
{ X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 },
- { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, 0 },
- { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 },
{ X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
{ X86::VSQRTSDr, X86::VSQRTSDm, 0 },
{ X86::VSQRTSSr, X86::VSQRTSSm, 0 },
@@ -1249,6 +1326,19 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VALIGNQrri, X86::VALIGNQrmi, 0 },
{ X86::VALIGNDrri, X86::VALIGNDrmi, 0 },
{ X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 },
+ { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE },
+
+ // AVX-512{F,VL} foldable instructions
+ { X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE },
+ { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE },
+
+ // AVX-512{F,VL} foldable instructions
+ { X86::VADDPDZ128rr, X86::VADDPDZ128rm, 0 },
+ { X86::VADDPDZ256rr, X86::VADDPDZ256rm, 0 },
+ { X86::VADDPSZ128rr, X86::VADDPSZ128rm, 0 },
+ { X86::VADDPSZ256rr, X86::VADDPSZ256rm, 0 },
// AES foldable instructions
{ X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 },
@@ -1429,7 +1519,51 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VBLENDMPDZrr, X86::VBLENDMPDZrm, 0 },
{ X86::VBLENDMPSZrr, X86::VBLENDMPSZrm, 0 },
{ X86::VPBLENDMDZrr, X86::VPBLENDMDZrm, 0 },
- { X86::VPBLENDMQZrr, X86::VPBLENDMQZrm, 0 }
+ { X86::VPBLENDMQZrr, X86::VPBLENDMQZrm, 0 },
+ { X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE },
+ { X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE },
+ { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE },
+ // AVX-512 arithmetic instructions
+ { X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 },
+ { X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 },
+ { X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 },
+ { X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 },
+ { X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 },
+ { X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 },
+ { X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 },
+ { X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 },
+ { X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 },
+ { X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0 },
+ { X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0 },
+ { X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 },
+ // AVX-512{F,VL} arithmetic instructions 256-bit
+ { X86::VADDPSZ256rrkz, X86::VADDPSZ256rmkz, 0 },
+ { X86::VADDPDZ256rrkz, X86::VADDPDZ256rmkz, 0 },
+ { X86::VSUBPSZ256rrkz, X86::VSUBPSZ256rmkz, 0 },
+ { X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0 },
+ { X86::VMULPSZ256rrkz, X86::VMULPSZ256rmkz, 0 },
+ { X86::VMULPDZ256rrkz, X86::VMULPDZ256rmkz, 0 },
+ { X86::VDIVPSZ256rrkz, X86::VDIVPSZ256rmkz, 0 },
+ { X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0 },
+ { X86::VMINPSZ256rrkz, X86::VMINPSZ256rmkz, 0 },
+ { X86::VMINPDZ256rrkz, X86::VMINPDZ256rmkz, 0 },
+ { X86::VMAXPSZ256rrkz, X86::VMAXPSZ256rmkz, 0 },
+ { X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmkz, 0 },
+ // AVX-512{F,VL} arithmetic instructions 128-bit
+ { X86::VADDPSZ128rrkz, X86::VADDPSZ128rmkz, 0 },
+ { X86::VADDPDZ128rrkz, X86::VADDPDZ128rmkz, 0 },
+ { X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 },
+ { X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0 },
+ { X86::VMULPSZ128rrkz, X86::VMULPSZ128rmkz, 0 },
+ { X86::VMULPDZ128rrkz, X86::VMULPDZ128rmkz, 0 },
+ { X86::VDIVPSZ128rrkz, X86::VDIVPSZ128rmkz, 0 },
+ { X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmkz, 0 },
+ { X86::VMINPSZ128rrkz, X86::VMINPSZ128rmkz, 0 },
+ { X86::VMINPDZ128rrkz, X86::VMINPDZ128rmkz, 0 },
+ { X86::VMAXPSZ128rrkz, X86::VMAXPSZ128rmkz, 0 },
+ { X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0 }
};
for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
@@ -1442,6 +1576,57 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
}
+ static const X86OpTblEntry OpTbl4[] = {
+ // AVX-512 foldable instructions
+ { X86::VADDPSZrrk, X86::VADDPSZrmk, 0 },
+ { X86::VADDPDZrrk, X86::VADDPDZrmk, 0 },
+ { X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 },
+ { X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 },
+ { X86::VMULPSZrrk, X86::VMULPSZrmk, 0 },
+ { X86::VMULPDZrrk, X86::VMULPDZrmk, 0 },
+ { X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 },
+ { X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 },
+ { X86::VMINPSZrrk, X86::VMINPSZrmk, 0 },
+ { X86::VMINPDZrrk, X86::VMINPDZrmk, 0 },
+ { X86::VMAXPSZrrk, X86::VMAXPSZrmk, 0 },
+ { X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 },
+ // AVX-512{F,VL} foldable instructions 256-bit
+ { X86::VADDPSZ256rrk, X86::VADDPSZ256rmk, 0 },
+ { X86::VADDPDZ256rrk, X86::VADDPDZ256rmk, 0 },
+ { X86::VSUBPSZ256rrk, X86::VSUBPSZ256rmk, 0 },
+ { X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0 },
+ { X86::VMULPSZ256rrk, X86::VMULPSZ256rmk, 0 },
+ { X86::VMULPDZ256rrk, X86::VMULPDZ256rmk, 0 },
+ { X86::VDIVPSZ256rrk, X86::VDIVPSZ256rmk, 0 },
+ { X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0 },
+ { X86::VMINPSZ256rrk, X86::VMINPSZ256rmk, 0 },
+ { X86::VMINPDZ256rrk, X86::VMINPDZ256rmk, 0 },
+ { X86::VMAXPSZ256rrk, X86::VMAXPSZ256rmk, 0 },
+ { X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmk, 0 },
+ // AVX-512{F,VL} foldable instructions 128-bit
+ { X86::VADDPSZ128rrk, X86::VADDPSZ128rmk, 0 },
+ { X86::VADDPDZ128rrk, X86::VADDPDZ128rmk, 0 },
+ { X86::VSUBPSZ128rrk, X86::VSUBPSZ128rmk, 0 },
+ { X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0 },
+ { X86::VMULPSZ128rrk, X86::VMULPSZ128rmk, 0 },
+ { X86::VMULPDZ128rrk, X86::VMULPDZ128rmk, 0 },
+ { X86::VDIVPSZ128rrk, X86::VDIVPSZ128rmk, 0 },
+ { X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmk, 0 },
+ { X86::VMINPSZ128rrk, X86::VMINPSZ128rmk, 0 },
+ { X86::VMINPDZ128rrk, X86::VMINPDZ128rmk, 0 },
+ { X86::VMAXPSZ128rrk, X86::VMAXPSZ128rmk, 0 },
+ { X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0 }
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl4); i != e; ++i) {
+ unsigned RegOp = OpTbl4[i].RegOp;
+ unsigned MemOp = OpTbl4[i].MemOp;
+ unsigned Flags = OpTbl4[i].Flags;
+ AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
+ RegOp, MemOp,
+ // Index 4, folded load
+ Flags | TB_INDEX_4 | TB_FOLDED_LOAD);
+ }
}
void
@@ -1543,8 +1728,11 @@ static bool isFrameLoadOpcode(int Opcode) {
case X86::VMOVAPSrm:
case X86::VMOVAPDrm:
case X86::VMOVDQArm:
+ case X86::VMOVUPSYrm:
case X86::VMOVAPSYrm:
+ case X86::VMOVUPDYrm:
case X86::VMOVAPDYrm:
+ case X86::VMOVDQUYrm:
case X86::VMOVDQAYrm:
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
@@ -1572,8 +1760,11 @@ static bool isFrameStoreOpcode(int Opcode) {
case X86::VMOVAPSmr:
case X86::VMOVAPDmr:
case X86::VMOVDQAmr:
+ case X86::VMOVUPSYmr:
case X86::VMOVAPSYmr:
+ case X86::VMOVUPDYmr:
case X86::VMOVAPDYmr:
+ case X86::VMOVDQUYmr:
case X86::VMOVDQAYmr:
case X86::VMOVUPSZmr:
case X86::VMOVAPSZmr:
@@ -1980,11 +2171,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
break;
}
case X86::INC16r:
- case X86::INC64_16r:
addRegOffset(MIB, leaInReg, true, 1);
break;
case X86::DEC16r:
- case X86::DEC64_16r:
addRegOffset(MIB, leaInReg, true, -1);
break;
case X86::ADD16ri:
@@ -2078,34 +2267,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned MIOpc = MI->getOpcode();
switch (MIOpc) {
- case X86::SHUFPSrri: {
- assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
- if (!Subtarget.hasSSE2()) return nullptr;
-
- unsigned B = MI->getOperand(1).getReg();
- unsigned C = MI->getOperand(2).getReg();
- if (B != C) return nullptr;
- unsigned M = MI->getOperand(3).getImm();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
- .addOperand(Dest).addOperand(Src).addImm(M);
- break;
- }
- case X86::SHUFPDrri: {
- assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!");
- if (!Subtarget.hasSSE2()) return nullptr;
-
- unsigned B = MI->getOperand(1).getReg();
- unsigned C = MI->getOperand(2).getReg();
- if (B != C) return nullptr;
- unsigned M = MI->getOperand(3).getImm();
-
- // Convert to PSHUFD mask.
- M = ((M & 1) << 1) | ((M & 1) << 3) | ((M & 2) << 4) | ((M & 2) << 6)| 0x44;
-
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
- .addOperand(Dest).addOperand(Src).addImm(M);
- break;
- }
+ default: return nullptr;
case X86::SHL64ri: {
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
@@ -2160,185 +2322,175 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
.addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0);
break;
}
- default: {
+ case X86::INC64r:
+ case X86::INC32r: {
+ assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
+ unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
+ : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ bool isKill, isUndef;
+ unsigned SrcReg;
+ MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
+ SrcReg, isKill, isUndef, ImplicitOp))
+ return nullptr;
- switch (MIOpc) {
- default: return nullptr;
- case X86::INC64r:
- case X86::INC32r:
- case X86::INC64_32r: {
- assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
- unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
- : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
- bool isKill, isUndef;
- unsigned SrcReg;
- MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
- SrcReg, isKill, isUndef, ImplicitOp))
- return nullptr;
+ MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addOperand(Dest)
+ .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef));
+ if (ImplicitOp.getReg() != 0)
+ MIB.addOperand(ImplicitOp);
- MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addOperand(Dest)
- .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef));
- if (ImplicitOp.getReg() != 0)
- MIB.addOperand(ImplicitOp);
+ NewMI = addOffset(MIB, 1);
+ break;
+ }
+ case X86::INC16r:
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
+ : nullptr;
+ assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addOperand(Dest).addOperand(Src), 1);
+ break;
+ case X86::DEC64r:
+ case X86::DEC32r: {
+ assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
+ unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
+ : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
- NewMI = addOffset(MIB, 1);
- break;
- }
- case X86::INC16r:
- case X86::INC64_16r:
- if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
- : nullptr;
- assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
- NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addOperand(Dest).addOperand(Src), 1);
- break;
- case X86::DEC64r:
- case X86::DEC32r:
- case X86::DEC64_32r: {
- assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
- unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
- : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
-
- bool isKill, isUndef;
- unsigned SrcReg;
- MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
- SrcReg, isKill, isUndef, ImplicitOp))
- return nullptr;
+ bool isKill, isUndef;
+ unsigned SrcReg;
+ MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
+ SrcReg, isKill, isUndef, ImplicitOp))
+ return nullptr;
- MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addOperand(Dest)
- .addReg(SrcReg, getUndefRegState(isUndef) | getKillRegState(isKill));
- if (ImplicitOp.getReg() != 0)
- MIB.addOperand(ImplicitOp);
+ MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addOperand(Dest)
+ .addReg(SrcReg, getUndefRegState(isUndef) | getKillRegState(isKill));
+ if (ImplicitOp.getReg() != 0)
+ MIB.addOperand(ImplicitOp);
- NewMI = addOffset(MIB, -1);
+ NewMI = addOffset(MIB, -1);
- break;
- }
- case X86::DEC16r:
- case X86::DEC64_16r:
- if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
- : nullptr;
- assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
- NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addOperand(Dest).addOperand(Src), -1);
- break;
- case X86::ADD64rr:
- case X86::ADD64rr_DB:
- case X86::ADD32rr:
- case X86::ADD32rr_DB: {
- assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- unsigned Opc;
- if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
- Opc = X86::LEA64r;
- else
- Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ break;
+ }
+ case X86::DEC16r:
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
+ : nullptr;
+ assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addOperand(Dest).addOperand(Src), -1);
+ break;
+ case X86::ADD64rr:
+ case X86::ADD64rr_DB:
+ case X86::ADD32rr:
+ case X86::ADD32rr_DB: {
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ unsigned Opc;
+ if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
+ Opc = X86::LEA64r;
+ else
+ Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
- bool isKill, isUndef;
- unsigned SrcReg;
- MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
- SrcReg, isKill, isUndef, ImplicitOp))
- return nullptr;
+ bool isKill, isUndef;
+ unsigned SrcReg;
+ MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
+ SrcReg, isKill, isUndef, ImplicitOp))
+ return nullptr;
- const MachineOperand &Src2 = MI->getOperand(2);
- bool isKill2, isUndef2;
- unsigned SrcReg2;
- MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false,
- SrcReg2, isKill2, isUndef2, ImplicitOp2))
- return nullptr;
+ const MachineOperand &Src2 = MI->getOperand(2);
+ bool isKill2, isUndef2;
+ unsigned SrcReg2;
+ MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
+ if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false,
+ SrcReg2, isKill2, isUndef2, ImplicitOp2))
+ return nullptr;
- MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addOperand(Dest);
- if (ImplicitOp.getReg() != 0)
- MIB.addOperand(ImplicitOp);
- if (ImplicitOp2.getReg() != 0)
- MIB.addOperand(ImplicitOp2);
+ MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addOperand(Dest);
+ if (ImplicitOp.getReg() != 0)
+ MIB.addOperand(ImplicitOp);
+ if (ImplicitOp2.getReg() != 0)
+ MIB.addOperand(ImplicitOp2);
- NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2);
+ NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2);
- // Preserve undefness of the operands.
- NewMI->getOperand(1).setIsUndef(isUndef);
- NewMI->getOperand(3).setIsUndef(isUndef2);
+ // Preserve undefness of the operands.
+ NewMI->getOperand(1).setIsUndef(isUndef);
+ NewMI->getOperand(3).setIsUndef(isUndef2);
- if (LV && Src2.isKill())
- LV->replaceKillInstruction(SrcReg2, MI, NewMI);
- break;
- }
- case X86::ADD16rr:
- case X86::ADD16rr_DB: {
- if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
- : nullptr;
- assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- unsigned Src2 = MI->getOperand(2).getReg();
- bool isKill2 = MI->getOperand(2).isKill();
- NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addOperand(Dest),
- Src.getReg(), Src.isKill(), Src2, isKill2);
-
- // Preserve undefness of the operands.
- bool isUndef = MI->getOperand(1).isUndef();
- bool isUndef2 = MI->getOperand(2).isUndef();
- NewMI->getOperand(1).setIsUndef(isUndef);
- NewMI->getOperand(3).setIsUndef(isUndef2);
-
- if (LV && isKill2)
- LV->replaceKillInstruction(Src2, MI, NewMI);
- break;
- }
- case X86::ADD64ri32:
- case X86::ADD64ri8:
- case X86::ADD64ri32_DB:
- case X86::ADD64ri8_DB:
- assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
- .addOperand(Dest).addOperand(Src),
- MI->getOperand(2).getImm());
- break;
- case X86::ADD32ri:
- case X86::ADD32ri8:
- case X86::ADD32ri_DB:
- case X86::ADD32ri8_DB: {
- assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
-
- bool isKill, isUndef;
- unsigned SrcReg;
- MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
- SrcReg, isKill, isUndef, ImplicitOp))
- return nullptr;
+ if (LV && Src2.isKill())
+ LV->replaceKillInstruction(SrcReg2, MI, NewMI);
+ break;
+ }
+ case X86::ADD16rr:
+ case X86::ADD16rr_DB: {
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
+ : nullptr;
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+ NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addOperand(Dest),
+ Src.getReg(), Src.isKill(), Src2, isKill2);
+
+ // Preserve undefness of the operands.
+ bool isUndef = MI->getOperand(1).isUndef();
+ bool isUndef2 = MI->getOperand(2).isUndef();
+ NewMI->getOperand(1).setIsUndef(isUndef);
+ NewMI->getOperand(3).setIsUndef(isUndef2);
+
+ if (LV && isKill2)
+ LV->replaceKillInstruction(Src2, MI, NewMI);
+ break;
+ }
+ case X86::ADD64ri32:
+ case X86::ADD64ri8:
+ case X86::ADD64ri32_DB:
+ case X86::ADD64ri8_DB:
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ .addOperand(Dest).addOperand(Src),
+ MI->getOperand(2).getImm());
+ break;
+ case X86::ADD32ri:
+ case X86::ADD32ri8:
+ case X86::ADD32ri_DB:
+ case X86::ADD32ri8_DB: {
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+
+ bool isKill, isUndef;
+ unsigned SrcReg;
+ MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
+ SrcReg, isKill, isUndef, ImplicitOp))
+ return nullptr;
- MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addOperand(Dest)
- .addReg(SrcReg, getUndefRegState(isUndef) | getKillRegState(isKill));
- if (ImplicitOp.getReg() != 0)
- MIB.addOperand(ImplicitOp);
+ MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addOperand(Dest)
+ .addReg(SrcReg, getUndefRegState(isUndef) | getKillRegState(isKill));
+ if (ImplicitOp.getReg() != 0)
+ MIB.addOperand(ImplicitOp);
- NewMI = addOffset(MIB, MI->getOperand(2).getImm());
- break;
- }
- case X86::ADD16ri:
- case X86::ADD16ri8:
- case X86::ADD16ri_DB:
- case X86::ADD16ri8_DB:
- if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
- : nullptr;
- assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addOperand(Dest).addOperand(Src),
- MI->getOperand(2).getImm());
- break;
- }
+ NewMI = addOffset(MIB, MI->getOperand(2).getImm());
+ break;
}
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ case X86::ADD16ri_DB:
+ case X86::ADD16ri8_DB:
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
+ : nullptr;
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addOperand(Dest).addOperand(Src),
+ MI->getOperand(2).getImm());
+ break;
}
if (!NewMI) return nullptr;
@@ -2387,6 +2539,42 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
MI->getOperand(3).setImm(Size-Amt);
return TargetInstrInfo::commuteInstruction(MI, NewMI);
}
+ case X86::BLENDPDrri:
+ case X86::BLENDPSrri:
+ case X86::PBLENDWrri:
+ case X86::VBLENDPDrri:
+ case X86::VBLENDPSrri:
+ case X86::VBLENDPDYrri:
+ case X86::VBLENDPSYrri:
+ case X86::VPBLENDDrri:
+ case X86::VPBLENDWrri:
+ case X86::VPBLENDDYrri:
+ case X86::VPBLENDWYrri:{
+ unsigned Mask;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::BLENDPDrri: Mask = 0x03; break;
+ case X86::BLENDPSrri: Mask = 0x0F; break;
+ case X86::PBLENDWrri: Mask = 0xFF; break;
+ case X86::VBLENDPDrri: Mask = 0x03; break;
+ case X86::VBLENDPSrri: Mask = 0x0F; break;
+ case X86::VBLENDPDYrri: Mask = 0x0F; break;
+ case X86::VBLENDPSYrri: Mask = 0xFF; break;
+ case X86::VPBLENDDrri: Mask = 0x0F; break;
+ case X86::VPBLENDWrri: Mask = 0xFF; break;
+ case X86::VPBLENDDYrri: Mask = 0xFF; break;
+ case X86::VPBLENDWYrri: Mask = 0xFF; break;
+ }
+ // Only the least significant bits of Imm are used.
+ unsigned Imm = MI->getOperand(3).getImm() & Mask;
+ if (NewMI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MI = MF.CloneMachineInstr(MI);
+ NewMI = false;
+ }
+ MI->getOperand(3).setImm(Mask ^ Imm);
+ return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ }
case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr:
case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
@@ -2471,6 +2659,20 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
switch (MI->getOpcode()) {
+ case X86::BLENDPDrri:
+ case X86::BLENDPSrri:
+ case X86::PBLENDWrri:
+ case X86::VBLENDPDrri:
+ case X86::VBLENDPSrri:
+ case X86::VBLENDPDYrri:
+ case X86::VBLENDPSYrri:
+ case X86::VPBLENDDrri:
+ case X86::VPBLENDDYrri:
+ case X86::VPBLENDWrri:
+ case X86::VPBLENDWYrri:
+ SrcOpIdx1 = 1;
+ SrcOpIdx2 = 2;
+ return true;
case X86::VFMADDPDr231r:
case X86::VFMADDPSr231r:
case X86::VFMADDSDr231r:
@@ -2506,22 +2708,22 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) {
switch (BrOpc) {
default: return X86::COND_INVALID;
- case X86::JE_4: return X86::COND_E;
- case X86::JNE_4: return X86::COND_NE;
- case X86::JL_4: return X86::COND_L;
- case X86::JLE_4: return X86::COND_LE;
- case X86::JG_4: return X86::COND_G;
- case X86::JGE_4: return X86::COND_GE;
- case X86::JB_4: return X86::COND_B;
- case X86::JBE_4: return X86::COND_BE;
- case X86::JA_4: return X86::COND_A;
- case X86::JAE_4: return X86::COND_AE;
- case X86::JS_4: return X86::COND_S;
- case X86::JNS_4: return X86::COND_NS;
- case X86::JP_4: return X86::COND_P;
- case X86::JNP_4: return X86::COND_NP;
- case X86::JO_4: return X86::COND_O;
- case X86::JNO_4: return X86::COND_NO;
+ case X86::JE_1: return X86::COND_E;
+ case X86::JNE_1: return X86::COND_NE;
+ case X86::JL_1: return X86::COND_L;
+ case X86::JLE_1: return X86::COND_LE;
+ case X86::JG_1: return X86::COND_G;
+ case X86::JGE_1: return X86::COND_GE;
+ case X86::JB_1: return X86::COND_B;
+ case X86::JBE_1: return X86::COND_BE;
+ case X86::JA_1: return X86::COND_A;
+ case X86::JAE_1: return X86::COND_AE;
+ case X86::JS_1: return X86::COND_S;
+ case X86::JNS_1: return X86::COND_NS;
+ case X86::JP_1: return X86::COND_P;
+ case X86::JNP_1: return X86::COND_NP;
+ case X86::JO_1: return X86::COND_O;
+ case X86::JNO_1: return X86::COND_NO;
}
}
@@ -2606,22 +2808,22 @@ X86::CondCode X86::getCondFromCMovOpc(unsigned Opc) {
unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
switch (CC) {
default: llvm_unreachable("Illegal condition code!");
- case X86::COND_E: return X86::JE_4;
- case X86::COND_NE: return X86::JNE_4;
- case X86::COND_L: return X86::JL_4;
- case X86::COND_LE: return X86::JLE_4;
- case X86::COND_G: return X86::JG_4;
- case X86::COND_GE: return X86::JGE_4;
- case X86::COND_B: return X86::JB_4;
- case X86::COND_BE: return X86::JBE_4;
- case X86::COND_A: return X86::JA_4;
- case X86::COND_AE: return X86::JAE_4;
- case X86::COND_S: return X86::JS_4;
- case X86::COND_NS: return X86::JNS_4;
- case X86::COND_P: return X86::JP_4;
- case X86::COND_NP: return X86::JNP_4;
- case X86::COND_O: return X86::JO_4;
- case X86::COND_NO: return X86::JNO_4;
+ case X86::COND_E: return X86::JE_1;
+ case X86::COND_NE: return X86::JNE_1;
+ case X86::COND_L: return X86::JL_1;
+ case X86::COND_LE: return X86::JLE_1;
+ case X86::COND_G: return X86::JG_1;
+ case X86::COND_GE: return X86::JGE_1;
+ case X86::COND_B: return X86::JB_1;
+ case X86::COND_BE: return X86::JBE_1;
+ case X86::COND_A: return X86::JA_1;
+ case X86::COND_AE: return X86::JAE_1;
+ case X86::COND_S: return X86::JS_1;
+ case X86::COND_NS: return X86::JNS_1;
+ case X86::COND_P: return X86::JP_1;
+ case X86::COND_NP: return X86::JNP_1;
+ case X86::COND_O: return X86::JO_1;
+ case X86::COND_NO: return X86::JNO_1;
}
}
@@ -2779,7 +2981,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return true;
// Handle unconditional branches.
- if (I->getOpcode() == X86::JMP_4) {
+ if (I->getOpcode() == X86::JMP_1) {
UnCondBrIter = I;
if (!AllowModify) {
@@ -2841,7 +3043,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC))
.addMBB(UnCondBrIter->getOperand(0).getMBB());
- BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4))
+ BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_1))
.addMBB(TargetBB);
OldInst->eraseFromParent();
@@ -2906,7 +3108,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
--I;
if (I->isDebugValue())
continue;
- if (I->getOpcode() != X86::JMP_4 &&
+ if (I->getOpcode() != X86::JMP_1 &&
getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
break;
// Remove the branch.
@@ -2931,7 +3133,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
if (Cond.empty()) {
// Unconditional branch?
assert(!FBB && "Unconditional branch with multiple successors!");
- BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(TBB);
return 1;
}
@@ -2941,16 +3143,16 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
switch (CC) {
case X86::COND_NP_OR_E:
// Synthesize NP_OR_E with two branches.
- BuildMI(&MBB, DL, get(X86::JNP_4)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JNP_1)).addMBB(TBB);
++Count;
- BuildMI(&MBB, DL, get(X86::JE_4)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JE_1)).addMBB(TBB);
++Count;
break;
case X86::COND_NE_OR_P:
// Synthesize NE_OR_P with two branches.
- BuildMI(&MBB, DL, get(X86::JNE_4)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(TBB);
++Count;
- BuildMI(&MBB, DL, get(X86::JP_4)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JP_1)).addMBB(TBB);
++Count;
break;
default: {
@@ -2961,7 +3163,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
}
if (FBB) {
// Two-way Conditional branch. Insert the second branch.
- BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(FBB);
++Count;
}
return Count;
@@ -3067,6 +3269,8 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
inline static bool MaskRegClassContains(unsigned Reg) {
return X86::VK8RegClass.contains(Reg) ||
X86::VK16RegClass.contains(Reg) ||
+ X86::VK32RegClass.contains(Reg) ||
+ X86::VK64RegClass.contains(Reg) ||
X86::VK1RegClass.contains(Reg);
}
static
@@ -3143,7 +3347,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Moving EFLAGS to / from another register requires a push and a pop.
// Notice that we have to adjust the stack if we don't want to clobber the
- // first frame index. See X86FrameLowering.cpp - colobbersTheStack.
+ // first frame index. See X86FrameLowering.cpp - clobbersTheStack.
if (SrcReg == X86::EFLAGS) {
if (X86::GR64RegClass.contains(DestReg)) {
BuildMI(MBB, MI, DL, get(X86::PUSHF64));
@@ -3287,9 +3491,11 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
"Stack slot too small for store");
unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
- bool isAligned =
- (MF.getTarget().getFrameLowering()->getStackAlignment() >= Alignment) ||
- RI.canRealignStack(MF);
+ bool isAligned = (MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment() >= Alignment) ||
+ RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
DebugLoc DL = MBB.findDebugLoc(MI);
addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
@@ -3324,9 +3530,11 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
- bool isAligned =
- (MF.getTarget().getFrameLowering()->getStackAlignment() >= Alignment) ||
- RI.canRealignStack(MF);
+ bool isAligned = (MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment() >= Alignment) ||
+ RI.canRealignStack(MF);
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
DebugLoc DL = MBB.findDebugLoc(MI);
addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
@@ -3495,14 +3703,12 @@ inline static bool isDefConvertible(MachineInstr *MI) {
case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm:
case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm:
case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r:
- case X86::DEC64_32r: case X86::DEC64_16r:
case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri:
case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8:
case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr:
case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm:
case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm:
case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r:
- case X86::INC64_32r: case X86::INC64_16r:
case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri:
case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8:
case X86::AND8ri: case X86::AND64rr: case X86::AND32rr:
@@ -3868,10 +4074,10 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
/// operand at the use. We fold the load instructions if load defines a virtual
/// register, the virtual register is used once in the same BB, and the
/// instructions in-between do not load or store, and have no side effects.
-MachineInstr* X86InstrInfo::
-optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
- unsigned &FoldAsLoadDefReg,
- MachineInstr *&DefMI) const {
+MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr *MI,
+ const MachineRegisterInfo *MRI,
+ unsigned &FoldAsLoadDefReg,
+ MachineInstr *&DefMI) const {
if (FoldAsLoadDefReg == 0)
return nullptr;
// To be conservative, if there exists another load, clear the load candidate.
@@ -3887,55 +4093,35 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
if (!DefMI->isSafeToMove(this, nullptr, SawStore))
return nullptr;
- // We try to commute MI if possible.
- unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1;
- for (unsigned Idx = 0; Idx < IdxEnd; Idx++) {
- // Collect information about virtual register operands of MI.
- unsigned SrcOperandId = 0;
- bool FoundSrcOperand = false;
- for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (Reg != FoldAsLoadDefReg)
- continue;
- // Do not fold if we have a subreg use or a def or multiple uses.
- if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
- return nullptr;
-
- SrcOperandId = i;
- FoundSrcOperand = true;
- }
- if (!FoundSrcOperand) return nullptr;
-
- // Check whether we can fold the def into SrcOperandId.
- SmallVector<unsigned, 8> Ops;
- Ops.push_back(SrcOperandId);
- MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
- if (FoldMI) {
- FoldAsLoadDefReg = 0;
- return FoldMI;
- }
-
- if (Idx == 1) {
- // MI was changed but it didn't help, commute it back!
- commuteInstruction(MI, false);
+ // Collect information about virtual register operands of MI.
+ unsigned SrcOperandId = 0;
+ bool FoundSrcOperand = false;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg != FoldAsLoadDefReg)
+ continue;
+ // Do not fold if we have a subreg use or a def or multiple uses.
+ if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
return nullptr;
- }
- // Check whether we can commute MI and enable folding.
- if (MI->isCommutable()) {
- MachineInstr *NewMI = commuteInstruction(MI, false);
- // Unable to commute.
- if (!NewMI) return nullptr;
- if (NewMI != MI) {
- // New instruction. It doesn't need to be kept.
- NewMI->eraseFromParent();
- return nullptr;
- }
- }
+ SrcOperandId = i;
+ FoundSrcOperand = true;
}
+ if (!FoundSrcOperand)
+ return nullptr;
+
+ // Check whether we can fold the def into SrcOperandId.
+ SmallVector<unsigned, 8> Ops;
+ Ops.push_back(SrcOperandId);
+ MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
+ if (FoldMI) {
+ FoldAsLoadDefReg = 0;
+ return FoldMI;
+ }
+
return nullptr;
}
@@ -3961,6 +4147,28 @@ static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
return true;
}
+// LoadStackGuard has so far only been implemented for 64-bit MachO. Different
+// code sequence is needed for other targets.
+static void expandLoadStackGuard(MachineInstrBuilder &MIB,
+ const TargetInstrInfo &TII) {
+ MachineBasicBlock &MBB = *MIB->getParent();
+ DebugLoc DL = MIB->getDebugLoc();
+ unsigned Reg = MIB->getOperand(0).getReg();
+ const GlobalValue *GV =
+ cast<GlobalValue>((*MIB->memoperands_begin())->getValue());
+ unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+ MachineMemOperand *MMO = MBB.getParent()->
+ getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 8, 8);
+ MachineBasicBlock::iterator I = MIB.getInstr();
+
+ BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg).addReg(X86::RIP).addImm(1)
+ .addReg(0).addGlobalAddress(GV, 0, X86II::MO_GOTPCREL).addReg(0)
+ .addMemOperand(MMO);
+ MIB->setDebugLoc(DL);
+ MIB->setDesc(TII.get(X86::MOV64rm));
+ MIB.addReg(Reg, RegState::Kill).addImm(1).addReg(0).addImm(0).addReg(0);
+}
+
bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
bool HasAVX = Subtarget.hasAVX();
MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
@@ -3991,10 +4199,13 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case X86::TEST8ri_NOREX:
MI->setDesc(get(X86::TEST8ri));
return true;
- case X86::KSET0B:
+ case X86::KSET0B:
case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr));
case X86::KSET1B:
case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr));
+ case TargetOpcode::LOAD_STACK_GUARD:
+ expandLoadStackGuard(MIB, *this);
+ return true;
}
return false;
}
@@ -4070,7 +4281,8 @@ MachineInstr*
X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI, unsigned i,
const SmallVectorImpl<MachineOperand> &MOs,
- unsigned Size, unsigned Align) const {
+ unsigned Size, unsigned Align,
+ bool AllowCommute) const {
const DenseMap<unsigned,
std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
bool isCallRegIndirect = Subtarget.callRegIndirect();
@@ -4117,6 +4329,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
OpcodeTablePtr = &RegOp2MemOpTable2;
} else if (i == 3) {
OpcodeTablePtr = &RegOp2MemOpTable3;
+ } else if (i == 4) {
+ OpcodeTablePtr = &RegOp2MemOpTable4;
}
// If table selected...
@@ -4138,8 +4352,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
return nullptr;
// If this is a 64-bit load, but the spill slot is 32, then we can do
- // a 32-bit load which is implicitly zero-extended. This likely is due
- // to liveintervalanalysis remat'ing a load from stack slot.
+ // a 32-bit load which is implicitly zero-extended. This likely is
+ // due to live interval analysis remat'ing a load from stack slot.
if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg())
return nullptr;
Opcode = X86::MOV32rm;
@@ -4158,8 +4372,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// to a 32-bit one.
unsigned DstReg = NewMI->getOperand(0).getReg();
if (TargetRegisterInfo::isPhysicalRegister(DstReg))
- NewMI->getOperand(0).setReg(RI.getSubReg(DstReg,
- X86::sub_32bit));
+ NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit));
else
NewMI->getOperand(0).setSubReg(X86::sub_32bit);
}
@@ -4167,6 +4380,65 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
}
}
+ // If the instruction and target operand are commutable, commute the
+ // instruction and try again.
+ if (AllowCommute) {
+ unsigned OriginalOpIdx = i, CommuteOpIdx1, CommuteOpIdx2;
+ if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) {
+ bool HasDef = MI->getDesc().getNumDefs();
+ unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
+ unsigned Reg1 = MI->getOperand(CommuteOpIdx1).getReg();
+ unsigned Reg2 = MI->getOperand(CommuteOpIdx2).getReg();
+ bool Tied0 =
+ 0 == MI->getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO);
+ bool Tied1 =
+ 0 == MI->getDesc().getOperandConstraint(CommuteOpIdx2, MCOI::TIED_TO);
+
+ // If either of the commutable operands are tied to the destination
+ // then we can not commute + fold.
+ if ((HasDef && Reg0 == Reg1 && Tied0) ||
+ (HasDef && Reg0 == Reg2 && Tied1))
+ return nullptr;
+
+ if ((CommuteOpIdx1 == OriginalOpIdx) ||
+ (CommuteOpIdx2 == OriginalOpIdx)) {
+ MachineInstr *CommutedMI = commuteInstruction(MI, false);
+ if (!CommutedMI) {
+ // Unable to commute.
+ return nullptr;
+ }
+ if (CommutedMI != MI) {
+ // New instruction. We can't fold from this.
+ CommutedMI->eraseFromParent();
+ return nullptr;
+ }
+
+ // Attempt to fold with the commuted version of the instruction.
+ unsigned CommuteOp =
+ (CommuteOpIdx1 == OriginalOpIdx ? CommuteOpIdx2 : CommuteOpIdx1);
+ NewMI = foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, Size, Align,
+ /*AllowCommute=*/false);
+ if (NewMI)
+ return NewMI;
+
+ // Folding failed again - undo the commute before returning.
+ MachineInstr *UncommutedMI = commuteInstruction(MI, false);
+ if (!UncommutedMI) {
+ // Unable to commute.
+ return nullptr;
+ }
+ if (UncommutedMI != MI) {
+ // New instruction. It doesn't need to be kept.
+ UncommutedMI->eraseFromParent();
+ return nullptr;
+ }
+
+ // Return here to prevent duplicate fuse failure report.
+ return nullptr;
+ }
+ }
+ }
+
// No fusion
if (PrintFailedFusing && !MI->isCopy())
dbgs() << "We failed to fuse operand " << i << " in " << *MI;
@@ -4192,23 +4464,43 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
static bool hasPartialRegUpdate(unsigned Opcode) {
switch (Opcode) {
case X86::CVTSI2SSrr:
+ case X86::CVTSI2SSrm:
case X86::CVTSI2SS64rr:
+ case X86::CVTSI2SS64rm:
case X86::CVTSI2SDrr:
+ case X86::CVTSI2SDrm:
case X86::CVTSI2SD64rr:
+ case X86::CVTSI2SD64rm:
case X86::CVTSD2SSrr:
+ case X86::CVTSD2SSrm:
case X86::Int_CVTSD2SSrr:
+ case X86::Int_CVTSD2SSrm:
case X86::CVTSS2SDrr:
+ case X86::CVTSS2SDrm:
case X86::Int_CVTSS2SDrr:
+ case X86::Int_CVTSS2SDrm:
case X86::RCPSSr:
+ case X86::RCPSSm:
case X86::RCPSSr_Int:
+ case X86::RCPSSm_Int:
case X86::ROUNDSDr:
+ case X86::ROUNDSDm:
case X86::ROUNDSDr_Int:
case X86::ROUNDSSr:
+ case X86::ROUNDSSm:
case X86::ROUNDSSr_Int:
case X86::RSQRTSSr:
+ case X86::RSQRTSSm:
case X86::RSQRTSSr_Int:
+ case X86::RSQRTSSm_Int:
case X86::SQRTSSr:
+ case X86::SQRTSSm:
case X86::SQRTSSr_Int:
+ case X86::SQRTSSm_Int:
+ case X86::SQRTSDr:
+ case X86::SQRTSDm:
+ case X86::SQRTSDr_Int:
+ case X86::SQRTSDm_Int:
return true;
}
@@ -4245,28 +4537,52 @@ getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
static bool hasUndefRegUpdate(unsigned Opcode) {
switch (Opcode) {
case X86::VCVTSI2SSrr:
+ case X86::VCVTSI2SSrm:
case X86::Int_VCVTSI2SSrr:
+ case X86::Int_VCVTSI2SSrm:
case X86::VCVTSI2SS64rr:
+ case X86::VCVTSI2SS64rm:
case X86::Int_VCVTSI2SS64rr:
+ case X86::Int_VCVTSI2SS64rm:
case X86::VCVTSI2SDrr:
+ case X86::VCVTSI2SDrm:
case X86::Int_VCVTSI2SDrr:
+ case X86::Int_VCVTSI2SDrm:
case X86::VCVTSI2SD64rr:
+ case X86::VCVTSI2SD64rm:
case X86::Int_VCVTSI2SD64rr:
+ case X86::Int_VCVTSI2SD64rm:
case X86::VCVTSD2SSrr:
+ case X86::VCVTSD2SSrm:
case X86::Int_VCVTSD2SSrr:
+ case X86::Int_VCVTSD2SSrm:
case X86::VCVTSS2SDrr:
+ case X86::VCVTSS2SDrm:
case X86::Int_VCVTSS2SDrr:
+ case X86::Int_VCVTSS2SDrm:
case X86::VRCPSSr:
+ case X86::VRCPSSm:
+ case X86::VRCPSSm_Int:
case X86::VROUNDSDr:
+ case X86::VROUNDSDm:
case X86::VROUNDSDr_Int:
case X86::VROUNDSSr:
+ case X86::VROUNDSSm:
case X86::VROUNDSSr_Int:
case X86::VRSQRTSSr:
+ case X86::VRSQRTSSm:
+ case X86::VRSQRTSSm_Int:
case X86::VSQRTSSr:
-
- // AVX-512
+ case X86::VSQRTSSm:
+ case X86::VSQRTSSm_Int:
+ case X86::VSQRTSDr:
+ case X86::VSQRTSDm:
+ case X86::VSQRTSDm_Int:
+ // AVX-512
case X86::VCVTSD2SSZrr:
+ case X86::VCVTSD2SSZrm:
case X86::VCVTSS2SDZrr:
+ case X86::VCVTSS2SDZrm:
return true;
}
@@ -4350,8 +4666,10 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
// If the function stack isn't realigned we don't want to fold instructions
// that need increased alignment.
if (!RI.needsStackRealignment(MF))
- Alignment = std::min(
- Alignment, MF.getTarget().getFrameLowering()->getStackAlignment());
+ Alignment = std::min(Alignment, MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment());
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
unsigned NewOpc = 0;
unsigned RCSize = 0;
@@ -4374,7 +4692,27 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
SmallVector<MachineOperand,4> MOs;
MOs.push_back(MachineOperand::CreateFI(FrameIndex));
- return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment);
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs,
+ Size, Alignment, /*AllowCommute=*/true);
+}
+
+static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
+ const MachineFunction &MF) {
+ unsigned Opc = LoadMI.getOpcode();
+ unsigned RegSize =
+ MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg())->getSize();
+
+ if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm) && RegSize > 4)
+ // These instructions only load 32 bits, we can't fold them if the
+ // destination register is wider than 32 bits (4 bytes).
+ return true;
+
+ if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm) && RegSize > 8)
+ // These instructions only load 64 bits, we can't fold them if the
+ // destination register is wider than 64 bits (8 bytes).
+ return true;
+
+ return false;
}
MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
@@ -4384,8 +4722,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// If loading from a FrameIndex, fold directly from the FrameIndex.
unsigned NumOps = LoadMI->getDesc().getNumOperands();
int FrameIndex;
- if (isLoadFromStackSlot(LoadMI, FrameIndex))
+ if (isLoadFromStackSlot(LoadMI, FrameIndex)) {
+ if (isPartialRegisterLoad(*LoadMI, MF))
+ return nullptr;
return foldMemoryOperandImpl(MF, MI, Ops, FrameIndex);
+ }
// Check switch flag
if (NoFusing) return nullptr;
@@ -4496,19 +4837,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
break;
}
default: {
- if ((LoadMI->getOpcode() == X86::MOVSSrm ||
- LoadMI->getOpcode() == X86::VMOVSSrm) &&
- MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
- > 4)
- // These instructions only load 32 bits, we can't fold them if the
- // destination register is wider than 32 bits (4 bytes).
- return nullptr;
- if ((LoadMI->getOpcode() == X86::MOVSDrm ||
- LoadMI->getOpcode() == X86::VMOVSDrm) &&
- MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
- > 8)
- // These instructions only load 64 bits, we can't fold them if the
- // destination register is wider than 64 bits (8 bytes).
+ if (isPartialRegisterLoad(*LoadMI, MF))
return nullptr;
// Folding a normal load. Just copy the load's address operands.
@@ -4517,7 +4846,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
break;
}
}
- return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment);
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs,
+ /*Size=*/0, Alignment, /*AllowCommute=*/true);
}
@@ -4997,26 +5327,26 @@ bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr* First,
switch(Second->getOpcode()) {
default:
return false;
- case X86::JE_4:
- case X86::JNE_4:
- case X86::JL_4:
- case X86::JLE_4:
- case X86::JG_4:
- case X86::JGE_4:
+ case X86::JE_1:
+ case X86::JNE_1:
+ case X86::JL_1:
+ case X86::JLE_1:
+ case X86::JG_1:
+ case X86::JGE_1:
FuseKind = FuseInc;
break;
- case X86::JB_4:
- case X86::JBE_4:
- case X86::JA_4:
- case X86::JAE_4:
+ case X86::JB_1:
+ case X86::JBE_1:
+ case X86::JA_1:
+ case X86::JAE_1:
FuseKind = FuseCmp;
break;
- case X86::JS_4:
- case X86::JNS_4:
- case X86::JP_4:
- case X86::JNP_4:
- case X86::JO_4:
- case X86::JNO_4:
+ case X86::JS_1:
+ case X86::JNS_1:
+ case X86::JP_1:
+ case X86::JNP_1:
+ case X86::JO_1:
+ case X86::JNO_1:
FuseKind = FuseTest;
break;
}
@@ -5129,14 +5459,10 @@ bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr* First,
return FuseKind == FuseCmp || FuseKind == FuseInc;
case X86::INC16r:
case X86::INC32r:
- case X86::INC64_16r:
- case X86::INC64_32r:
case X86::INC64r:
case X86::INC8r:
case X86::DEC16r:
case X86::DEC32r:
- case X86::DEC64_16r:
- case X86::DEC64_32r:
case X86::DEC64r:
case X86::DEC8r:
return FuseKind == FuseInc;
@@ -5299,16 +5625,32 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
NopInst.setOpcode(X86::NOOP);
}
+// This code must remain in sync with getJumpInstrTableEntryBound in this class!
+// In particular, getJumpInstrTableEntryBound must always return an upper bound
+// on the encoding lengths of the instructions generated by
+// getUnconditionalBranch and getTrap.
void X86InstrInfo::getUnconditionalBranch(
MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const {
- Branch.setOpcode(X86::JMP_4);
+ Branch.setOpcode(X86::JMP_1);
Branch.addOperand(MCOperand::CreateExpr(BranchTarget));
}
+// This code must remain in sync with getJumpInstrTableEntryBound in this class!
+// In particular, getJumpInstrTableEntryBound must always return an upper bound
+// on the encoding lengths of the instructions generated by
+// getUnconditionalBranch and getTrap.
void X86InstrInfo::getTrap(MCInst &MI) const {
MI.setOpcode(X86::TRAP);
}
+// See getTrap and getUnconditionalBranch for conditions on the value returned
+// by this function.
+unsigned X86InstrInfo::getJumpInstrTableEntryBound() const {
+ // 5 bytes suffice: JMP_4 Symbol@PLT is uses 1 byte (E9) for the JMP_4 and 4
+ // bytes for the symbol offset. And TRAP is ud2, which is two bytes (0F 0B).
+ return 5;
+}
+
bool X86InstrInfo::isHighLatencyDef(int opc) const {
switch (opc) {
default: return false;
@@ -5351,10 +5693,10 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
case X86::VSQRTSSm:
case X86::VSQRTSSm_Int:
case X86::VSQRTSSr:
- case X86::VSQRTPDZrm:
- case X86::VSQRTPDZrr:
- case X86::VSQRTPSZrm:
- case X86::VSQRTPSZrr:
+ case X86::VSQRTPDZm:
+ case X86::VSQRTPDZr:
+ case X86::VSQRTPSZm:
+ case X86::VSQRTPSZr:
case X86::VSQRTSDZm:
case X86::VSQRTSDZm_Int:
case X86::VSQRTSDZr:
@@ -5426,7 +5768,7 @@ namespace {
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- const X86InstrInfo *TII = TM->getInstrInfo();
+ const X86InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
unsigned PC;
if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
@@ -5524,7 +5866,7 @@ namespace {
const X86TargetMachine *TM =
static_cast<const X86TargetMachine *>(&MF->getTarget());
const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
- const X86InstrInfo *TII = TM->getInstrInfo();
+ const X86InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
// Insert a Copy from TLSBaseAddrReg to RAX/EAX.
MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
@@ -5545,7 +5887,7 @@ namespace {
const X86TargetMachine *TM =
static_cast<const X86TargetMachine *>(&MF->getTarget());
const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
- const X86InstrInfo *TII = TM->getInstrInfo();
+ const X86InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
// Create a virtual register for the TLS base address.
MachineRegisterInfo &RegInfo = MF->getRegInfo();
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index c177e3a5c7c7..5662e86932c2 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86INSTRUCTIONINFO_H
-#define X86INSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_X86_X86INSTRINFO_H
+#define LLVM_LIB_TARGET_X86_X86INSTRINFO_H
#include "MCTargetDesc/X86BaseInfo.h"
#include "X86RegisterInfo.h"
@@ -152,6 +152,7 @@ class X86InstrInfo final : public X86GenInstrInfo {
RegOp2MemOpTableType RegOp2MemOpTable1;
RegOp2MemOpTableType RegOp2MemOpTable2;
RegOp2MemOpTableType RegOp2MemOpTable3;
+ RegOp2MemOpTableType RegOp2MemOpTable4;
/// MemOp2RegOpTable - Load / store unfolding opcode map.
///
@@ -404,7 +405,8 @@ public:
MachineInstr* MI,
unsigned OpNum,
const SmallVectorImpl<MachineOperand> &MOs,
- unsigned Size, unsigned Alignment) const;
+ unsigned Size, unsigned Alignment,
+ bool AllowCommute) const;
void
getUnconditionalBranch(MCInst &Branch,
@@ -412,6 +414,8 @@ public:
void getTrap(MCInst &MI) const override;
+ unsigned getJumpInstrTableEntryBound() const override;
+
bool isHighLatencyDef(int opc) const override;
bool hasHighOperandLatency(const InstrItineraryData *ItinData,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 0f872a676c25..53715dc6dd39 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -32,7 +32,8 @@ def SDTX86Cmov : SDTypeProfile<1, 4,
// Unary and binary operator instructions that set EFLAGS as a side-effect.
def SDTUnaryArithWithFlags : SDTypeProfile<2, 1,
- [SDTCisInt<0>, SDTCisVT<1, i32>]>;
+ [SDTCisSameAs<0, 2>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
@@ -188,11 +189,15 @@ def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void,
def X86rdtscp : SDNode<"X86ISD::RDTSCP_DAG", SDTX86Void,
[SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
def X86rdpmc : SDNode<"X86ISD::RDPMC_DAG", SDTX86Void,
- [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
+def X86RecoverFrameAlloc : SDNode<"ISD::FRAME_ALLOC_RECOVER",
+ SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+ SDTCisInt<1>]>>;
+
def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
@@ -261,121 +266,75 @@ def ptr_rc_nosp : PointerLikeRegClass<1>;
def X86MemAsmOperand : AsmOperandClass {
let Name = "Mem";
}
-def X86Mem8AsmOperand : AsmOperandClass {
- let Name = "Mem8"; let RenderMethod = "addMemOperands";
-}
-def X86Mem16AsmOperand : AsmOperandClass {
- let Name = "Mem16"; let RenderMethod = "addMemOperands";
-}
-def X86Mem32AsmOperand : AsmOperandClass {
- let Name = "Mem32"; let RenderMethod = "addMemOperands";
-}
-def X86Mem64AsmOperand : AsmOperandClass {
- let Name = "Mem64"; let RenderMethod = "addMemOperands";
-}
-def X86Mem80AsmOperand : AsmOperandClass {
- let Name = "Mem80"; let RenderMethod = "addMemOperands";
-}
-def X86Mem128AsmOperand : AsmOperandClass {
- let Name = "Mem128"; let RenderMethod = "addMemOperands";
-}
-def X86Mem256AsmOperand : AsmOperandClass {
- let Name = "Mem256"; let RenderMethod = "addMemOperands";
-}
-def X86Mem512AsmOperand : AsmOperandClass {
- let Name = "Mem512"; let RenderMethod = "addMemOperands";
-}
-
-// Gather mem operands
-def X86MemVX32Operand : AsmOperandClass {
- let Name = "MemVX32"; let RenderMethod = "addMemOperands";
-}
-def X86MemVY32Operand : AsmOperandClass {
- let Name = "MemVY32"; let RenderMethod = "addMemOperands";
-}
-def X86MemVZ32Operand : AsmOperandClass {
- let Name = "MemVZ32"; let RenderMethod = "addMemOperands";
-}
-def X86MemVX64Operand : AsmOperandClass {
- let Name = "MemVX64"; let RenderMethod = "addMemOperands";
-}
-def X86MemVY64Operand : AsmOperandClass {
- let Name = "MemVY64"; let RenderMethod = "addMemOperands";
-}
-def X86MemVZ64Operand : AsmOperandClass {
- let Name = "MemVZ64"; let RenderMethod = "addMemOperands";
+let RenderMethod = "addMemOperands" in {
+ def X86Mem8AsmOperand : AsmOperandClass { let Name = "Mem8"; }
+ def X86Mem16AsmOperand : AsmOperandClass { let Name = "Mem16"; }
+ def X86Mem32AsmOperand : AsmOperandClass { let Name = "Mem32"; }
+ def X86Mem64AsmOperand : AsmOperandClass { let Name = "Mem64"; }
+ def X86Mem80AsmOperand : AsmOperandClass { let Name = "Mem80"; }
+ def X86Mem128AsmOperand : AsmOperandClass { let Name = "Mem128"; }
+ def X86Mem256AsmOperand : AsmOperandClass { let Name = "Mem256"; }
+ def X86Mem512AsmOperand : AsmOperandClass { let Name = "Mem512"; }
+ // Gather mem operands
+ def X86MemVX32Operand : AsmOperandClass { let Name = "MemVX32"; }
+ def X86MemVY32Operand : AsmOperandClass { let Name = "MemVY32"; }
+ def X86MemVZ32Operand : AsmOperandClass { let Name = "MemVZ32"; }
+ def X86MemVX64Operand : AsmOperandClass { let Name = "MemVX64"; }
+ def X86MemVY64Operand : AsmOperandClass { let Name = "MemVY64"; }
+ def X86MemVZ64Operand : AsmOperandClass { let Name = "MemVZ64"; }
}
def X86AbsMemAsmOperand : AsmOperandClass {
let Name = "AbsMem";
let SuperClasses = [X86MemAsmOperand];
}
-class X86MemOperand<string printMethod> : Operand<iPTR> {
+
+class X86MemOperand<string printMethod,
+ AsmOperandClass parserMatchClass = X86MemAsmOperand> : Operand<iPTR> {
let PrintMethod = printMethod;
let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
- let ParserMatchClass = X86MemAsmOperand;
+ let ParserMatchClass = parserMatchClass;
+ let OperandType = "OPERAND_MEMORY";
}
-let OperandType = "OPERAND_MEMORY" in {
+// Gather mem operands
+class X86VMemOperand<RegisterClass RC, string printMethod,
+ AsmOperandClass parserMatchClass>
+ : X86MemOperand<printMethod, parserMatchClass> {
+ let MIOperandInfo = (ops ptr_rc, i8imm, RC, i32imm, i8imm);
+}
+
+def anymem : X86MemOperand<"printanymem">;
+
def opaque32mem : X86MemOperand<"printopaquemem">;
def opaque48mem : X86MemOperand<"printopaquemem">;
def opaque80mem : X86MemOperand<"printopaquemem">;
def opaque512mem : X86MemOperand<"printopaquemem">;
-def i8mem : X86MemOperand<"printi8mem"> {
- let ParserMatchClass = X86Mem8AsmOperand; }
-def i16mem : X86MemOperand<"printi16mem"> {
- let ParserMatchClass = X86Mem16AsmOperand; }
-def i32mem : X86MemOperand<"printi32mem"> {
- let ParserMatchClass = X86Mem32AsmOperand; }
-def i64mem : X86MemOperand<"printi64mem"> {
- let ParserMatchClass = X86Mem64AsmOperand; }
-def i128mem : X86MemOperand<"printi128mem"> {
- let ParserMatchClass = X86Mem128AsmOperand; }
-def i256mem : X86MemOperand<"printi256mem"> {
- let ParserMatchClass = X86Mem256AsmOperand; }
-def i512mem : X86MemOperand<"printi512mem"> {
- let ParserMatchClass = X86Mem512AsmOperand; }
-def f32mem : X86MemOperand<"printf32mem"> {
- let ParserMatchClass = X86Mem32AsmOperand; }
-def f64mem : X86MemOperand<"printf64mem"> {
- let ParserMatchClass = X86Mem64AsmOperand; }
-def f80mem : X86MemOperand<"printf80mem"> {
- let ParserMatchClass = X86Mem80AsmOperand; }
-def f128mem : X86MemOperand<"printf128mem"> {
- let ParserMatchClass = X86Mem128AsmOperand; }
-def f256mem : X86MemOperand<"printf256mem">{
- let ParserMatchClass = X86Mem256AsmOperand; }
-def f512mem : X86MemOperand<"printf512mem">{
- let ParserMatchClass = X86Mem512AsmOperand; }
-def v512mem : Operand<iPTR> {
- let PrintMethod = "printf512mem";
- let MIOperandInfo = (ops ptr_rc, i8imm, VR512, i32imm, i8imm);
- let ParserMatchClass = X86Mem512AsmOperand; }
+def i8mem : X86MemOperand<"printi8mem", X86Mem8AsmOperand>;
+def i16mem : X86MemOperand<"printi16mem", X86Mem16AsmOperand>;
+def i32mem : X86MemOperand<"printi32mem", X86Mem32AsmOperand>;
+def i64mem : X86MemOperand<"printi64mem", X86Mem64AsmOperand>;
+def i128mem : X86MemOperand<"printi128mem", X86Mem128AsmOperand>;
+def i256mem : X86MemOperand<"printi256mem", X86Mem256AsmOperand>;
+def i512mem : X86MemOperand<"printi512mem", X86Mem512AsmOperand>;
+def f32mem : X86MemOperand<"printf32mem", X86Mem32AsmOperand>;
+def f64mem : X86MemOperand<"printf64mem", X86Mem64AsmOperand>;
+def f80mem : X86MemOperand<"printf80mem", X86Mem80AsmOperand>;
+def f128mem : X86MemOperand<"printf128mem", X86Mem128AsmOperand>;
+def f256mem : X86MemOperand<"printf256mem", X86Mem256AsmOperand>;
+def f512mem : X86MemOperand<"printf512mem", X86Mem512AsmOperand>;
+
+def v512mem : X86VMemOperand<VR512, "printf512mem", X86Mem512AsmOperand>;
// Gather mem operands
-def vx32mem : X86MemOperand<"printi32mem">{
- let MIOperandInfo = (ops ptr_rc, i8imm, VR128, i32imm, i8imm);
- let ParserMatchClass = X86MemVX32Operand; }
-def vy32mem : X86MemOperand<"printi32mem">{
- let MIOperandInfo = (ops ptr_rc, i8imm, VR256, i32imm, i8imm);
- let ParserMatchClass = X86MemVY32Operand; }
-def vx64mem : X86MemOperand<"printi64mem">{
- let MIOperandInfo = (ops ptr_rc, i8imm, VR128, i32imm, i8imm);
- let ParserMatchClass = X86MemVX64Operand; }
-def vy64mem : X86MemOperand<"printi64mem">{
- let MIOperandInfo = (ops ptr_rc, i8imm, VR256, i32imm, i8imm);
- let ParserMatchClass = X86MemVY64Operand; }
-def vy64xmem : X86MemOperand<"printi64mem">{
- let MIOperandInfo = (ops ptr_rc, i8imm, VR256X, i32imm, i8imm);
- let ParserMatchClass = X86MemVY64Operand; }
-def vz32mem : X86MemOperand<"printi32mem">{
- let MIOperandInfo = (ops ptr_rc, i16imm, VR512, i32imm, i8imm);
- let ParserMatchClass = X86MemVZ32Operand; }
-def vz64mem : X86MemOperand<"printi64mem">{
- let MIOperandInfo = (ops ptr_rc, i8imm, VR512, i32imm, i8imm);
- let ParserMatchClass = X86MemVZ64Operand; }
-}
+def vx32mem : X86VMemOperand<VR128, "printi32mem", X86MemVX32Operand>;
+def vy32mem : X86VMemOperand<VR256, "printi32mem", X86MemVY32Operand>;
+def vx64mem : X86VMemOperand<VR128, "printi64mem", X86MemVX64Operand>;
+def vy64mem : X86VMemOperand<VR256, "printi64mem", X86MemVY64Operand>;
+def vy64xmem : X86VMemOperand<VR256X, "printi64mem", X86MemVY64Operand>;
+def vz32mem : X86VMemOperand<VR512, "printi32mem", X86MemVZ32Operand>;
+def vz64mem : X86VMemOperand<VR512, "printi64mem", X86MemVZ64Operand>;
// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
// plain GR64, so that it doesn't potentially require a REX prefix.
@@ -424,134 +383,184 @@ def brtarget8 : Operand<OtherVT>;
}
-def X86SrcIdx8Operand : AsmOperandClass {
- let Name = "SrcIdx8";
- let RenderMethod = "addSrcIdxOperands";
- let SuperClasses = [X86Mem8AsmOperand];
-}
-def X86SrcIdx16Operand : AsmOperandClass {
- let Name = "SrcIdx16";
- let RenderMethod = "addSrcIdxOperands";
- let SuperClasses = [X86Mem16AsmOperand];
-}
-def X86SrcIdx32Operand : AsmOperandClass {
- let Name = "SrcIdx32";
- let RenderMethod = "addSrcIdxOperands";
- let SuperClasses = [X86Mem32AsmOperand];
-}
-def X86SrcIdx64Operand : AsmOperandClass {
- let Name = "SrcIdx64";
- let RenderMethod = "addSrcIdxOperands";
- let SuperClasses = [X86Mem64AsmOperand];
-}
-def X86DstIdx8Operand : AsmOperandClass {
- let Name = "DstIdx8";
- let RenderMethod = "addDstIdxOperands";
- let SuperClasses = [X86Mem8AsmOperand];
-}
-def X86DstIdx16Operand : AsmOperandClass {
- let Name = "DstIdx16";
- let RenderMethod = "addDstIdxOperands";
- let SuperClasses = [X86Mem16AsmOperand];
-}
-def X86DstIdx32Operand : AsmOperandClass {
- let Name = "DstIdx32";
- let RenderMethod = "addDstIdxOperands";
- let SuperClasses = [X86Mem32AsmOperand];
-}
-def X86DstIdx64Operand : AsmOperandClass {
- let Name = "DstIdx64";
- let RenderMethod = "addDstIdxOperands";
- let SuperClasses = [X86Mem64AsmOperand];
-}
-def X86MemOffs8AsmOperand : AsmOperandClass {
- let Name = "MemOffs8";
- let RenderMethod = "addMemOffsOperands";
- let SuperClasses = [X86Mem8AsmOperand];
-}
-def X86MemOffs16AsmOperand : AsmOperandClass {
- let Name = "MemOffs16";
- let RenderMethod = "addMemOffsOperands";
- let SuperClasses = [X86Mem16AsmOperand];
-}
-def X86MemOffs32AsmOperand : AsmOperandClass {
- let Name = "MemOffs32";
- let RenderMethod = "addMemOffsOperands";
- let SuperClasses = [X86Mem32AsmOperand];
-}
-def X86MemOffs64AsmOperand : AsmOperandClass {
- let Name = "MemOffs64";
- let RenderMethod = "addMemOffsOperands";
- let SuperClasses = [X86Mem64AsmOperand];
-}
-let OperandType = "OPERAND_MEMORY" in {
-def srcidx8 : Operand<iPTR> {
- let ParserMatchClass = X86SrcIdx8Operand;
- let MIOperandInfo = (ops ptr_rc, i8imm);
- let PrintMethod = "printSrcIdx8"; }
-def srcidx16 : Operand<iPTR> {
- let ParserMatchClass = X86SrcIdx16Operand;
- let MIOperandInfo = (ops ptr_rc, i8imm);
- let PrintMethod = "printSrcIdx16"; }
-def srcidx32 : Operand<iPTR> {
- let ParserMatchClass = X86SrcIdx32Operand;
- let MIOperandInfo = (ops ptr_rc, i8imm);
- let PrintMethod = "printSrcIdx32"; }
-def srcidx64 : Operand<iPTR> {
- let ParserMatchClass = X86SrcIdx64Operand;
+// Special parsers to detect mode to disambiguate.
+def X86AbsMem16AsmOperand : AsmOperandClass {
+ let Name = "AbsMem16";
+ let RenderMethod = "addAbsMemOperands";
+ let SuperClasses = [X86AbsMemAsmOperand];
+}
+
+def X86AbsMem32AsmOperand : AsmOperandClass {
+ let Name = "AbsMem32";
+ let RenderMethod = "addAbsMemOperands";
+ let SuperClasses = [X86AbsMemAsmOperand];
+}
+
+// Branch targets have OtherVT type and print as pc-relative values.
+let OperandType = "OPERAND_PCREL",
+ PrintMethod = "printPCRelImm" in {
+let ParserMatchClass = X86AbsMem16AsmOperand in
+ def brtarget16 : Operand<OtherVT>;
+let ParserMatchClass = X86AbsMem32AsmOperand in
+ def brtarget32 : Operand<OtherVT>;
+}
+
+let RenderMethod = "addSrcIdxOperands" in {
+ def X86SrcIdx8Operand : AsmOperandClass {
+ let Name = "SrcIdx8";
+ let SuperClasses = [X86Mem8AsmOperand];
+ }
+ def X86SrcIdx16Operand : AsmOperandClass {
+ let Name = "SrcIdx16";
+ let SuperClasses = [X86Mem16AsmOperand];
+ }
+ def X86SrcIdx32Operand : AsmOperandClass {
+ let Name = "SrcIdx32";
+ let SuperClasses = [X86Mem32AsmOperand];
+ }
+ def X86SrcIdx64Operand : AsmOperandClass {
+ let Name = "SrcIdx64";
+ let SuperClasses = [X86Mem64AsmOperand];
+ }
+} // RenderMethod = "addSrcIdxOperands"
+
+let RenderMethod = "addDstIdxOperands" in {
+ def X86DstIdx8Operand : AsmOperandClass {
+ let Name = "DstIdx8";
+ let SuperClasses = [X86Mem8AsmOperand];
+ }
+ def X86DstIdx16Operand : AsmOperandClass {
+ let Name = "DstIdx16";
+ let SuperClasses = [X86Mem16AsmOperand];
+ }
+ def X86DstIdx32Operand : AsmOperandClass {
+ let Name = "DstIdx32";
+ let SuperClasses = [X86Mem32AsmOperand];
+ }
+ def X86DstIdx64Operand : AsmOperandClass {
+ let Name = "DstIdx64";
+ let SuperClasses = [X86Mem64AsmOperand];
+ }
+} // RenderMethod = "addDstIdxOperands"
+
+let RenderMethod = "addMemOffsOperands" in {
+ def X86MemOffs16_8AsmOperand : AsmOperandClass {
+ let Name = "MemOffs16_8";
+ let SuperClasses = [X86Mem8AsmOperand];
+ }
+ def X86MemOffs16_16AsmOperand : AsmOperandClass {
+ let Name = "MemOffs16_16";
+ let SuperClasses = [X86Mem16AsmOperand];
+ }
+ def X86MemOffs16_32AsmOperand : AsmOperandClass {
+ let Name = "MemOffs16_32";
+ let SuperClasses = [X86Mem32AsmOperand];
+ }
+ def X86MemOffs32_8AsmOperand : AsmOperandClass {
+ let Name = "MemOffs32_8";
+ let SuperClasses = [X86Mem8AsmOperand];
+ }
+ def X86MemOffs32_16AsmOperand : AsmOperandClass {
+ let Name = "MemOffs32_16";
+ let SuperClasses = [X86Mem16AsmOperand];
+ }
+ def X86MemOffs32_32AsmOperand : AsmOperandClass {
+ let Name = "MemOffs32_32";
+ let SuperClasses = [X86Mem32AsmOperand];
+ }
+ def X86MemOffs32_64AsmOperand : AsmOperandClass {
+ let Name = "MemOffs32_64";
+ let SuperClasses = [X86Mem64AsmOperand];
+ }
+ def X86MemOffs64_8AsmOperand : AsmOperandClass {
+ let Name = "MemOffs64_8";
+ let SuperClasses = [X86Mem8AsmOperand];
+ }
+ def X86MemOffs64_16AsmOperand : AsmOperandClass {
+ let Name = "MemOffs64_16";
+ let SuperClasses = [X86Mem16AsmOperand];
+ }
+ def X86MemOffs64_32AsmOperand : AsmOperandClass {
+ let Name = "MemOffs64_32";
+ let SuperClasses = [X86Mem32AsmOperand];
+ }
+ def X86MemOffs64_64AsmOperand : AsmOperandClass {
+ let Name = "MemOffs64_64";
+ let SuperClasses = [X86Mem64AsmOperand];
+ }
+} // RenderMethod = "addMemOffsOperands"
+
+class X86SrcIdxOperand<string printMethod, AsmOperandClass parserMatchClass>
+ : X86MemOperand<printMethod, parserMatchClass> {
let MIOperandInfo = (ops ptr_rc, i8imm);
- let PrintMethod = "printSrcIdx64"; }
-def dstidx8 : Operand<iPTR> {
- let ParserMatchClass = X86DstIdx8Operand;
- let MIOperandInfo = (ops ptr_rc);
- let PrintMethod = "printDstIdx8"; }
-def dstidx16 : Operand<iPTR> {
- let ParserMatchClass = X86DstIdx16Operand;
- let MIOperandInfo = (ops ptr_rc);
- let PrintMethod = "printDstIdx16"; }
-def dstidx32 : Operand<iPTR> {
- let ParserMatchClass = X86DstIdx32Operand;
- let MIOperandInfo = (ops ptr_rc);
- let PrintMethod = "printDstIdx32"; }
-def dstidx64 : Operand<iPTR> {
- let ParserMatchClass = X86DstIdx64Operand;
+}
+
+class X86DstIdxOperand<string printMethod, AsmOperandClass parserMatchClass>
+ : X86MemOperand<printMethod, parserMatchClass> {
let MIOperandInfo = (ops ptr_rc);
- let PrintMethod = "printDstIdx64"; }
-def offset8 : Operand<iPTR> {
- let ParserMatchClass = X86MemOffs8AsmOperand;
- let MIOperandInfo = (ops i64imm, i8imm);
- let PrintMethod = "printMemOffs8"; }
-def offset16 : Operand<iPTR> {
- let ParserMatchClass = X86MemOffs16AsmOperand;
- let MIOperandInfo = (ops i64imm, i8imm);
- let PrintMethod = "printMemOffs16"; }
-def offset32 : Operand<iPTR> {
- let ParserMatchClass = X86MemOffs32AsmOperand;
- let MIOperandInfo = (ops i64imm, i8imm);
- let PrintMethod = "printMemOffs32"; }
-def offset64 : Operand<iPTR> {
- let ParserMatchClass = X86MemOffs64AsmOperand;
- let MIOperandInfo = (ops i64imm, i8imm);
- let PrintMethod = "printMemOffs64"; }
}
+def srcidx8 : X86SrcIdxOperand<"printSrcIdx8", X86SrcIdx8Operand>;
+def srcidx16 : X86SrcIdxOperand<"printSrcIdx16", X86SrcIdx16Operand>;
+def srcidx32 : X86SrcIdxOperand<"printSrcIdx32", X86SrcIdx32Operand>;
+def srcidx64 : X86SrcIdxOperand<"printSrcIdx64", X86SrcIdx64Operand>;
+def dstidx8 : X86DstIdxOperand<"printDstIdx8", X86DstIdx8Operand>;
+def dstidx16 : X86DstIdxOperand<"printDstIdx16", X86DstIdx16Operand>;
+def dstidx32 : X86DstIdxOperand<"printDstIdx32", X86DstIdx32Operand>;
+def dstidx64 : X86DstIdxOperand<"printDstIdx64", X86DstIdx64Operand>;
+
+class X86MemOffsOperand<Operand immOperand, string printMethod,
+ AsmOperandClass parserMatchClass>
+ : X86MemOperand<printMethod, parserMatchClass> {
+ let MIOperandInfo = (ops immOperand, i8imm);
+}
+
+def offset16_8 : X86MemOffsOperand<i16imm, "printMemOffs8",
+ X86MemOffs16_8AsmOperand>;
+def offset16_16 : X86MemOffsOperand<i16imm, "printMemOffs16",
+ X86MemOffs16_16AsmOperand>;
+def offset16_32 : X86MemOffsOperand<i16imm, "printMemOffs32",
+ X86MemOffs16_32AsmOperand>;
+def offset32_8 : X86MemOffsOperand<i32imm, "printMemOffs8",
+ X86MemOffs32_8AsmOperand>;
+def offset32_16 : X86MemOffsOperand<i32imm, "printMemOffs16",
+ X86MemOffs32_16AsmOperand>;
+def offset32_32 : X86MemOffsOperand<i32imm, "printMemOffs32",
+ X86MemOffs32_32AsmOperand>;
+def offset32_64 : X86MemOffsOperand<i32imm, "printMemOffs64",
+ X86MemOffs32_64AsmOperand>;
+def offset64_8 : X86MemOffsOperand<i64imm, "printMemOffs8",
+ X86MemOffs64_8AsmOperand>;
+def offset64_16 : X86MemOffsOperand<i64imm, "printMemOffs16",
+ X86MemOffs64_16AsmOperand>;
+def offset64_32 : X86MemOffsOperand<i64imm, "printMemOffs32",
+ X86MemOffs64_32AsmOperand>;
+def offset64_64 : X86MemOffsOperand<i64imm, "printMemOffs64",
+ X86MemOffs64_64AsmOperand>;
def SSECC : Operand<i8> {
let PrintMethod = "printSSECC";
let OperandType = "OPERAND_IMMEDIATE";
}
+def i8immZExt3 : ImmLeaf<i8, [{
+ return Imm >= 0 && Imm < 8;
+}]>;
+
def AVXCC : Operand<i8> {
let PrintMethod = "printAVXCC";
let OperandType = "OPERAND_IMMEDIATE";
}
-class ImmSExtAsmOperandClass : AsmOperandClass {
- let SuperClasses = [ImmAsmOperand];
- let RenderMethod = "addImmOperands";
-}
+def i8immZExt5 : ImmLeaf<i8, [{
+ return Imm >= 0 && Imm < 32;
+}]>;
+// AVX-512 uses a 32-bit immediate in their intrinsics
+def i32immZExt5 : ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 32;
+}]>;
-class ImmZExtAsmOperandClass : AsmOperandClass {
+class ImmSExtAsmOperandClass : AsmOperandClass {
let SuperClasses = [ImmAsmOperand];
let RenderMethod = "addImmOperands";
}
@@ -568,6 +577,7 @@ def AVX512RC : Operand<i32> {
let PrintMethod = "printRoundingControl";
let OperandType = "OPERAND_IMMEDIATE";
}
+
// Sign-extended immediate classes. We don't need to define the full lattice
// here because there is no instruction with an ambiguity between ImmSExti64i32
// and ImmSExti32i8.
@@ -595,12 +605,6 @@ def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass {
let Name = "ImmSExti32i8";
}
-// [0, 0x000000FF]
-def ImmZExtu32u8AsmOperand : ImmZExtAsmOperandClass {
- let Name = "ImmZExtu32u8";
-}
-
-
// [0, 0x0000007F] |
// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
@@ -620,11 +624,6 @@ def i32i8imm : Operand<i32> {
let ParserMatchClass = ImmSExti32i8AsmOperand;
let OperandType = "OPERAND_IMMEDIATE";
}
-// 32-bits but only 8 bits are significant, and those 8 bits are unsigned.
-def u32u8imm : Operand<i32> {
- let ParserMatchClass = ImmZExtu32u8AsmOperand;
- let OperandType = "OPERAND_IMMEDIATE";
-}
// 64-bits but only 32 bits are significant.
def i64i32imm : Operand<i64> {
@@ -647,14 +646,14 @@ def i64i8imm : Operand<i64> {
}
def lea64_32mem : Operand<i32> {
- let PrintMethod = "printi32mem";
+ let PrintMethod = "printanymem";
let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, i8imm);
let ParserMatchClass = X86MemAsmOperand;
}
// Memory operands that use 64-bit pointers in both ILP32 and LP64.
def lea64mem : Operand<i64> {
- let PrintMethod = "printi64mem";
+ let PrintMethod = "printanymem";
let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, i8imm);
let ParserMatchClass = X86MemAsmOperand;
}
@@ -708,6 +707,7 @@ def UseSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">;
def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">;
def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
+def NoSSE41 : Predicate<"!Subtarget->hasSSE41()">;
def UseSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">;
def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
def UseSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">;
@@ -724,9 +724,11 @@ def HasCDI : Predicate<"Subtarget->hasCDI()">;
def HasPFI : Predicate<"Subtarget->hasPFI()">;
def HasERI : Predicate<"Subtarget->hasERI()">;
def HasDQI : Predicate<"Subtarget->hasDQI()">;
+def NoDQI : Predicate<"!Subtarget->hasDQI()">;
def HasBWI : Predicate<"Subtarget->hasBWI()">;
def HasVLX : Predicate<"Subtarget->hasVLX()">,
AssemblerPredicate<"FeatureVLX", "AVX-512 VLX ISA">;
+def NoVLX : Predicate<"!Subtarget->hasVLX()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
@@ -748,8 +750,10 @@ def HasHLE : Predicate<"Subtarget->hasHLE()">;
def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;
def HasADX : Predicate<"Subtarget->hasADX()">;
def HasSHA : Predicate<"Subtarget->hasSHA()">;
+def HasSGX : Predicate<"Subtarget->hasSGX()">;
def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
+def HasSMAP : Predicate<"Subtarget->hasSMAP()">;
def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
@@ -758,6 +762,8 @@ def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">;
def In64BitMode : Predicate<"Subtarget->is64Bit()">,
AssemblerPredicate<"Mode64Bit", "64-bit mode">;
+def IsLP64 : Predicate<"Subtarget->isTarget64BitLP64()">;
+def NotLP64 : Predicate<"!Subtarget->isTarget64BitLP64()">;
def In16BitMode : Predicate<"Subtarget->is16Bit()">,
AssemblerPredicate<"Mode16Bit", "16-bit mode">;
def Not16BitMode : Predicate<"!Subtarget->is16Bit()">,
@@ -781,6 +787,7 @@ def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;
def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;
+def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
@@ -811,6 +818,11 @@ def X86_COND_O : PatLeaf<(i8 13)>;
def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE
def X86_COND_S : PatLeaf<(i8 15)>;
+// Predicate used to help when pattern matching LZCNT/TZCNT.
+def X86_COND_E_OR_NE : ImmLeaf<i8, [{
+ return (Imm == X86::COND_E) || (Imm == X86::COND_NE);
+}]>;
+
let FastIselShouldIgnore = 1 in { // FastIsel should ignore all simm8 instrs.
def i16immSExt8 : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>;
def i32immSExt8 : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>;
@@ -913,7 +925,7 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
//
// Nop
-let neverHasSideEffects = 1, SchedRW = [WriteZero] in {
+let hasSideEffects = 0, SchedRW = [WriteZero] in {
def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", [], IIC_NOP>;
def NOOPW : I<0x1f, MRMXm, (outs), (ins i16mem:$zero),
"nop{w}\t$zero", [], IIC_NOP>, TB, OpSize16;
@@ -927,12 +939,12 @@ def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl),
"enter\t$len, $lvl", [], IIC_ENTER>, Sched<[WriteMicrocoded]>;
let SchedRW = [WriteALU] in {
-let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
+let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, hasSideEffects=0 in
def LEAVE : I<0xC9, RawFrm,
(outs), (ins), "leave", [], IIC_LEAVE>,
Requires<[Not64BitMode]>;
-let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
+let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, hasSideEffects = 0 in
def LEAVE64 : I<0xC9, RawFrm,
(outs), (ins), "leave", [], IIC_LEAVE>,
Requires<[In64BitMode]>;
@@ -942,7 +954,7 @@ def LEAVE64 : I<0xC9, RawFrm,
// Miscellaneous Instructions.
//
-let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
+let Defs = [ESP], Uses = [ESP], hasSideEffects=0 in {
let mayLoad = 1, SchedRW = [WriteLoad] in {
def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", [],
IIC_POP_REG16>, OpSize16;
@@ -956,11 +968,6 @@ def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", [],
IIC_POP_REG>, OpSize32, Requires<[Not64BitMode]>;
def POP32rmm: I<0x8F, MRM0m, (outs), (ins i32mem:$dst), "pop{l}\t$dst", [],
IIC_POP_MEM>, OpSize32, Requires<[Not64BitMode]>;
-
-def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>,
- OpSize16;
-def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>,
- OpSize32, Requires<[Not64BitMode]>;
} // mayLoad, SchedRW
let mayStore = 1, SchedRW = [WriteStore] in {
@@ -989,16 +996,26 @@ def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
"push{l}\t$imm", [], IIC_PUSH_IMM>, OpSize32,
Requires<[Not64BitMode]>;
+} // mayStore, SchedRW
+}
+let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, hasSideEffects=0,
+ SchedRW = [WriteLoad] in {
+def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>,
+ OpSize16;
+def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>,
+ OpSize32, Requires<[Not64BitMode]>;
+}
+
+let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, hasSideEffects=0,
+ SchedRW = [WriteStore] in {
def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>,
OpSize16;
def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", [], IIC_PUSH_F>,
OpSize32, Requires<[Not64BitMode]>;
-
-} // mayStore, SchedRW
}
-let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
+let Defs = [RSP], Uses = [RSP], hasSideEffects=0 in {
let mayLoad = 1, SchedRW = [WriteLoad] in {
def POP64r : I<0x58, AddRegFrm, (outs GR64:$reg), (ins), "pop{q}\t$reg", [],
IIC_POP_REG>, OpSize32, Requires<[In64BitMode]>;
@@ -1017,7 +1034,7 @@ def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [],
} // mayStore, SchedRW
}
-let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1,
+let Defs = [RSP], Uses = [RSP], hasSideEffects = 0, mayStore = 1,
SchedRW = [WriteStore] in {
def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
"push{q}\t$imm", [], IIC_PUSH_IMM>, Requires<[In64BitMode]>;
@@ -1029,22 +1046,22 @@ def PUSH64i32 : Ii32S<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
Requires<[In64BitMode]>;
}
-let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
+let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, hasSideEffects=0 in
def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", [], IIC_POP_FD>,
OpSize32, Requires<[In64BitMode]>, Sched<[WriteLoad]>;
-let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
+let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, hasSideEffects=0 in
def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>,
OpSize32, Requires<[In64BitMode]>, Sched<[WriteStore]>;
let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
- mayLoad = 1, neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
+ mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteLoad] in {
def POPA32 : I<0x61, RawFrm, (outs), (ins), "popal", [], IIC_POP_A>,
OpSize32, Requires<[Not64BitMode]>;
def POPA16 : I<0x61, RawFrm, (outs), (ins), "popaw", [], IIC_POP_A>,
OpSize16, Requires<[Not64BitMode]>;
}
let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
- mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
+ mayStore = 1, hasSideEffects = 0, SchedRW = [WriteStore] in {
def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pushal", [], IIC_PUSH_A>,
OpSize32, Requires<[Not64BitMode]>;
def PUSHA16 : I<0x60, RawFrm, (outs), (ins), "pushaw", [], IIC_PUSH_A>,
@@ -1174,7 +1191,7 @@ def CMPSQ : RI<0xA7, RawFrmDstSrc, (outs), (ins dstidx64:$dst, srcidx64:$src),
// Move Instructions.
//
let SchedRW = [WriteMove] in {
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
"mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
@@ -1233,62 +1250,67 @@ def MOV64mi32 : RIi32S<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
let hasSideEffects = 0 in {
-/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
-/// 32-bit offset from the segment base. These are only valid in x86-32 mode.
+/// Memory offset versions of moves. The immediate is an address mode sized
+/// offset from the segment base.
let SchedRW = [WriteALU] in {
let mayLoad = 1 in {
let Defs = [AL] in
-def MOV8o8a : Ii32 <0xA0, RawFrmMemOffs, (outs), (ins offset8:$src),
- "mov{b}\t{$src, %al|al, $src}", [], IIC_MOV_MEM>,
- Requires<[In32BitMode]>;
+def MOV8ao32 : Ii32<0xA0, RawFrmMemOffs, (outs), (ins offset32_8:$src),
+ "mov{b}\t{$src, %al|al, $src}", [], IIC_MOV_MEM>,
+ AdSize32;
let Defs = [AX] in
-def MOV16o16a : Ii32 <0xA1, RawFrmMemOffs, (outs), (ins offset16:$src),
- "mov{w}\t{$src, %ax|ax, $src}", [], IIC_MOV_MEM>,
- OpSize16, Requires<[In32BitMode]>;
+def MOV16ao32 : Ii32<0xA1, RawFrmMemOffs, (outs), (ins offset32_16:$src),
+ "mov{w}\t{$src, %ax|ax, $src}", [], IIC_MOV_MEM>,
+ OpSize16, AdSize32;
let Defs = [EAX] in
-def MOV32o32a : Ii32 <0xA1, RawFrmMemOffs, (outs), (ins offset32:$src),
- "mov{l}\t{$src, %eax|eax, $src}", [], IIC_MOV_MEM>,
- OpSize32, Requires<[In32BitMode]>;
+def MOV32ao32 : Ii32<0xA1, RawFrmMemOffs, (outs), (ins offset32_32:$src),
+ "mov{l}\t{$src, %eax|eax, $src}", [], IIC_MOV_MEM>,
+ OpSize32, AdSize32;
+let Defs = [RAX] in
+def MOV64ao32 : RIi32<0xA1, RawFrmMemOffs, (outs), (ins offset32_64:$src),
+ "mov{q}\t{$src, %rax|rax, $src}", [], IIC_MOV_MEM>,
+ AdSize32;
let Defs = [AL] in
-def MOV8o8a_16 : Ii16 <0xA0, RawFrmMemOffs, (outs), (ins offset8:$src),
- "mov{b}\t{$src, %al|al, $src}", [], IIC_MOV_MEM>,
- AdSize, Requires<[In16BitMode]>;
+def MOV8ao16 : Ii16<0xA0, RawFrmMemOffs, (outs), (ins offset16_8:$src),
+ "mov{b}\t{$src, %al|al, $src}", [], IIC_MOV_MEM>, AdSize16;
let Defs = [AX] in
-def MOV16o16a_16 : Ii16 <0xA1, RawFrmMemOffs, (outs), (ins offset16:$src),
- "mov{w}\t{$src, %ax|ax, $src}", [], IIC_MOV_MEM>,
- OpSize16, AdSize, Requires<[In16BitMode]>;
+def MOV16ao16 : Ii16<0xA1, RawFrmMemOffs, (outs), (ins offset16_16:$src),
+ "mov{w}\t{$src, %ax|ax, $src}", [], IIC_MOV_MEM>,
+ OpSize16, AdSize16;
let Defs = [EAX] in
-def MOV32o32a_16 : Ii16 <0xA1, RawFrmMemOffs, (outs), (ins offset32:$src),
- "mov{l}\t{$src, %eax|eax, $src}", [], IIC_MOV_MEM>,
- AdSize, OpSize32, Requires<[In16BitMode]>;
+def MOV32ao16 : Ii16<0xA1, RawFrmMemOffs, (outs), (ins offset16_32:$src),
+ "mov{l}\t{$src, %eax|eax, $src}", [], IIC_MOV_MEM>,
+ AdSize16, OpSize32;
}
let mayStore = 1 in {
let Uses = [AL] in
-def MOV8ao8 : Ii32 <0xA2, RawFrmMemOffs, (outs offset8:$dst), (ins),
- "mov{b}\t{%al, $dst|$dst, al}", [], IIC_MOV_MEM>,
- Requires<[In32BitMode]>;
+def MOV8o32a : Ii32<0xA2, RawFrmMemOffs, (outs offset32_8:$dst), (ins),
+ "mov{b}\t{%al, $dst|$dst, al}", [], IIC_MOV_MEM>, AdSize32;
let Uses = [AX] in
-def MOV16ao16 : Ii32 <0xA3, RawFrmMemOffs, (outs offset16:$dst), (ins),
- "mov{w}\t{%ax, $dst|$dst, ax}", [], IIC_MOV_MEM>,
- OpSize16, Requires<[In32BitMode]>;
+def MOV16o32a : Ii32<0xA3, RawFrmMemOffs, (outs offset32_16:$dst), (ins),
+ "mov{w}\t{%ax, $dst|$dst, ax}", [], IIC_MOV_MEM>,
+ OpSize16, AdSize32;
let Uses = [EAX] in
-def MOV32ao32 : Ii32 <0xA3, RawFrmMemOffs, (outs offset32:$dst), (ins),
- "mov{l}\t{%eax, $dst|$dst, eax}", [], IIC_MOV_MEM>,
- OpSize32, Requires<[In32BitMode]>;
+def MOV32o32a : Ii32<0xA3, RawFrmMemOffs, (outs offset32_32:$dst), (ins),
+ "mov{l}\t{%eax, $dst|$dst, eax}", [], IIC_MOV_MEM>,
+ OpSize32, AdSize32;
+let Uses = [RAX] in
+def MOV64o32a : RIi32<0xA3, RawFrmMemOffs, (outs offset32_64:$dst), (ins),
+ "mov{q}\t{%rax, $dst|$dst, rax}", [], IIC_MOV_MEM>,
+ AdSize32;
let Uses = [AL] in
-def MOV8ao8_16 : Ii16 <0xA2, RawFrmMemOffs, (outs offset8:$dst), (ins),
- "mov{b}\t{%al, $dst|$dst, al}", [], IIC_MOV_MEM>,
- AdSize, Requires<[In16BitMode]>;
+def MOV8o16a : Ii16<0xA2, RawFrmMemOffs, (outs offset16_8:$dst), (ins),
+ "mov{b}\t{%al, $dst|$dst, al}", [], IIC_MOV_MEM>, AdSize16;
let Uses = [AX] in
-def MOV16ao16_16 : Ii16 <0xA3, RawFrmMemOffs, (outs offset16:$dst), (ins),
- "mov{w}\t{%ax, $dst|$dst, ax}", [], IIC_MOV_MEM>,
- OpSize16, AdSize, Requires<[In16BitMode]>;
+def MOV16o16a : Ii16<0xA3, RawFrmMemOffs, (outs offset16_16:$dst), (ins),
+ "mov{w}\t{%ax, $dst|$dst, ax}", [], IIC_MOV_MEM>,
+ OpSize16, AdSize16;
let Uses = [EAX] in
-def MOV32ao32_16 : Ii16 <0xA3, RawFrmMemOffs, (outs offset32:$dst), (ins),
- "mov{l}\t{%eax, $dst|$dst, eax}", [], IIC_MOV_MEM>,
- OpSize32, AdSize, Requires<[In16BitMode]>;
+def MOV32o16a : Ii16<0xA3, RawFrmMemOffs, (outs offset16_32:$dst), (ins),
+ "mov{l}\t{%eax, $dst|$dst, eax}", [], IIC_MOV_MEM>,
+ OpSize32, AdSize16;
}
}
@@ -1296,40 +1318,34 @@ def MOV32ao32_16 : Ii16 <0xA3, RawFrmMemOffs, (outs offset32:$dst), (ins),
// and use the movabs mnemonic to indicate this specific form.
let mayLoad = 1 in {
let Defs = [AL] in
-def MOV64o8a : RIi64_NOREX<0xA0, RawFrmMemOffs, (outs), (ins offset8:$src),
- "movabs{b}\t{$src, %al|al, $src}", []>,
- Requires<[In64BitMode]>;
+def MOV8ao64 : RIi64_NOREX<0xA0, RawFrmMemOffs, (outs), (ins offset64_8:$src),
+ "movabs{b}\t{$src, %al|al, $src}", []>, AdSize64;
let Defs = [AX] in
-def MOV64o16a : RIi64_NOREX<0xA1, RawFrmMemOffs, (outs), (ins offset16:$src),
- "movabs{w}\t{$src, %ax|ax, $src}", []>, OpSize16,
- Requires<[In64BitMode]>;
+def MOV16ao64 : RIi64_NOREX<0xA1, RawFrmMemOffs, (outs), (ins offset64_16:$src),
+ "movabs{w}\t{$src, %ax|ax, $src}", []>, OpSize16, AdSize64;
let Defs = [EAX] in
-def MOV64o32a : RIi64_NOREX<0xA1, RawFrmMemOffs, (outs), (ins offset32:$src),
+def MOV32ao64 : RIi64_NOREX<0xA1, RawFrmMemOffs, (outs), (ins offset64_32:$src),
"movabs{l}\t{$src, %eax|eax, $src}", []>, OpSize32,
- Requires<[In64BitMode]>;
+ AdSize64;
let Defs = [RAX] in
-def MOV64o64a : RIi64<0xA1, RawFrmMemOffs, (outs), (ins offset64:$src),
- "movabs{q}\t{$src, %rax|rax, $src}", []>,
- Requires<[In64BitMode]>;
+def MOV64ao64 : RIi64<0xA1, RawFrmMemOffs, (outs), (ins offset64_64:$src),
+ "movabs{q}\t{$src, %rax|rax, $src}", []>, AdSize64;
}
let mayStore = 1 in {
let Uses = [AL] in
-def MOV64ao8 : RIi64_NOREX<0xA2, RawFrmMemOffs, (outs offset8:$dst), (ins),
- "movabs{b}\t{%al, $dst|$dst, al}", []>,
- Requires<[In64BitMode]>;
+def MOV8o64a : RIi64_NOREX<0xA2, RawFrmMemOffs, (outs offset64_8:$dst), (ins),
+ "movabs{b}\t{%al, $dst|$dst, al}", []>, AdSize64;
let Uses = [AX] in
-def MOV64ao16 : RIi64_NOREX<0xA3, RawFrmMemOffs, (outs offset16:$dst), (ins),
- "movabs{w}\t{%ax, $dst|$dst, ax}", []>, OpSize16,
- Requires<[In64BitMode]>;
+def MOV16o64a : RIi64_NOREX<0xA3, RawFrmMemOffs, (outs offset64_16:$dst), (ins),
+ "movabs{w}\t{%ax, $dst|$dst, ax}", []>, OpSize16, AdSize64;
let Uses = [EAX] in
-def MOV64ao32 : RIi64_NOREX<0xA3, RawFrmMemOffs, (outs offset32:$dst), (ins),
+def MOV32o64a : RIi64_NOREX<0xA3, RawFrmMemOffs, (outs offset64_32:$dst), (ins),
"movabs{l}\t{%eax, $dst|$dst, eax}", []>, OpSize32,
- Requires<[In64BitMode]>;
+ AdSize64;
let Uses = [RAX] in
-def MOV64ao64 : RIi64<0xA3, RawFrmMemOffs, (outs offset64:$dst), (ins),
- "movabs{q}\t{%rax, $dst|$dst, rax}", []>,
- Requires<[In64BitMode]>;
+def MOV64o64a : RIi64<0xA3, RawFrmMemOffs, (outs offset64_64:$dst), (ins),
+ "movabs{q}\t{%rax, $dst|$dst, rax}", []>, AdSize64;
}
} // hasSideEffects = 0
@@ -1379,17 +1395,17 @@ def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
// that they can be used for copying and storing h registers, which can't be
// encoded when a REX prefix is present.
let isCodeGenOnly = 1 in {
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def MOV8rr_NOREX : I<0x88, MRMDestReg,
(outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>,
Sched<[WriteMove]>;
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, hasSideEffects = 0 in
def MOV8mr_NOREX : I<0x88, MRMDestMem,
(outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", [],
IIC_MOV_MEM>, Sched<[WriteStore]>;
-let mayLoad = 1, neverHasSideEffects = 1,
+let mayLoad = 1, hasSideEffects = 0,
canFoldAsLoad = 1, isReMaterializable = 1 in
def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
(outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
@@ -1403,7 +1419,7 @@ let SchedRW = [WriteALU] in {
let Defs = [EFLAGS], Uses = [AH] in
def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf",
[(set EFLAGS, (X86sahf AH))], IIC_AHF>;
-let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
+let Defs = [AH], Uses = [EFLAGS], hasSideEffects = 0 in
def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [],
IIC_AHF>; // AH = flags
} // SchedRW
@@ -1989,42 +2005,42 @@ let Predicates = [HasLZCNT], Defs = [EFLAGS] in {
}
let Predicates = [HasLZCNT] in {
- def : Pat<(X86cmov (ctlz GR16:$src), (i16 16), (X86_COND_E),
- (X86cmp GR16:$src, (i16 0))),
+ def : Pat<(X86cmov (ctlz GR16:$src), (i16 16), (X86_COND_E_OR_NE),
+ (X86cmp GR16:$src, (i16 0))),
(LZCNT16rr GR16:$src)>;
- def : Pat<(X86cmov (ctlz GR32:$src), (i32 32), (X86_COND_E),
+ def : Pat<(X86cmov (ctlz GR32:$src), (i32 32), (X86_COND_E_OR_NE),
(X86cmp GR32:$src, (i32 0))),
(LZCNT32rr GR32:$src)>;
- def : Pat<(X86cmov (ctlz GR64:$src), (i64 64), (X86_COND_E),
+ def : Pat<(X86cmov (ctlz GR64:$src), (i64 64), (X86_COND_E_OR_NE),
(X86cmp GR64:$src, (i64 0))),
(LZCNT64rr GR64:$src)>;
- def : Pat<(X86cmov (i16 16), (ctlz GR16:$src), (X86_COND_E),
+ def : Pat<(X86cmov (i16 16), (ctlz GR16:$src), (X86_COND_E_OR_NE),
(X86cmp GR16:$src, (i16 0))),
(LZCNT16rr GR16:$src)>;
- def : Pat<(X86cmov (i32 32), (ctlz GR32:$src), (X86_COND_E),
+ def : Pat<(X86cmov (i32 32), (ctlz GR32:$src), (X86_COND_E_OR_NE),
(X86cmp GR32:$src, (i32 0))),
(LZCNT32rr GR32:$src)>;
- def : Pat<(X86cmov (i64 64), (ctlz GR64:$src), (X86_COND_E),
+ def : Pat<(X86cmov (i64 64), (ctlz GR64:$src), (X86_COND_E_OR_NE),
(X86cmp GR64:$src, (i64 0))),
(LZCNT64rr GR64:$src)>;
- def : Pat<(X86cmov (ctlz (loadi16 addr:$src)), (i16 16), (X86_COND_E),
- (X86cmp (loadi16 addr:$src), (i16 0))),
+ def : Pat<(X86cmov (ctlz (loadi16 addr:$src)), (i16 16), (X86_COND_E_OR_NE),
+ (X86cmp (loadi16 addr:$src), (i16 0))),
(LZCNT16rm addr:$src)>;
- def : Pat<(X86cmov (ctlz (loadi32 addr:$src)), (i32 32), (X86_COND_E),
- (X86cmp (loadi32 addr:$src), (i32 0))),
+ def : Pat<(X86cmov (ctlz (loadi32 addr:$src)), (i32 32), (X86_COND_E_OR_NE),
+ (X86cmp (loadi32 addr:$src), (i32 0))),
(LZCNT32rm addr:$src)>;
- def : Pat<(X86cmov (ctlz (loadi64 addr:$src)), (i64 64), (X86_COND_E),
- (X86cmp (loadi64 addr:$src), (i64 0))),
+ def : Pat<(X86cmov (ctlz (loadi64 addr:$src)), (i64 64), (X86_COND_E_OR_NE),
+ (X86cmp (loadi64 addr:$src), (i64 0))),
(LZCNT64rm addr:$src)>;
- def : Pat<(X86cmov (i16 16), (ctlz (loadi16 addr:$src)), (X86_COND_E),
- (X86cmp (loadi16 addr:$src), (i16 0))),
+ def : Pat<(X86cmov (i16 16), (ctlz (loadi16 addr:$src)), (X86_COND_E_OR_NE),
+ (X86cmp (loadi16 addr:$src), (i16 0))),
(LZCNT16rm addr:$src)>;
- def : Pat<(X86cmov (i32 32), (ctlz (loadi32 addr:$src)), (X86_COND_E),
- (X86cmp (loadi32 addr:$src), (i32 0))),
+ def : Pat<(X86cmov (i32 32), (ctlz (loadi32 addr:$src)), (X86_COND_E_OR_NE),
+ (X86cmp (loadi32 addr:$src), (i32 0))),
(LZCNT32rm addr:$src)>;
- def : Pat<(X86cmov (i64 64), (ctlz (loadi64 addr:$src)), (X86_COND_E),
- (X86cmp (loadi64 addr:$src), (i64 0))),
+ def : Pat<(X86cmov (i64 64), (ctlz (loadi64 addr:$src)), (X86_COND_E_OR_NE),
+ (X86cmp (loadi64 addr:$src), (i64 0))),
(LZCNT64rm addr:$src)>;
}
@@ -2105,42 +2121,42 @@ let Predicates = [HasBMI] in {
}
let Predicates = [HasBMI] in {
- def : Pat<(X86cmov (cttz GR16:$src), (i16 16), (X86_COND_E),
+ def : Pat<(X86cmov (cttz GR16:$src), (i16 16), (X86_COND_E_OR_NE),
(X86cmp GR16:$src, (i16 0))),
(TZCNT16rr GR16:$src)>;
- def : Pat<(X86cmov (cttz GR32:$src), (i32 32), (X86_COND_E),
+ def : Pat<(X86cmov (cttz GR32:$src), (i32 32), (X86_COND_E_OR_NE),
(X86cmp GR32:$src, (i32 0))),
(TZCNT32rr GR32:$src)>;
- def : Pat<(X86cmov (cttz GR64:$src), (i64 64), (X86_COND_E),
+ def : Pat<(X86cmov (cttz GR64:$src), (i64 64), (X86_COND_E_OR_NE),
(X86cmp GR64:$src, (i64 0))),
(TZCNT64rr GR64:$src)>;
- def : Pat<(X86cmov (i16 16), (cttz GR16:$src), (X86_COND_E),
+ def : Pat<(X86cmov (i16 16), (cttz GR16:$src), (X86_COND_E_OR_NE),
(X86cmp GR16:$src, (i16 0))),
(TZCNT16rr GR16:$src)>;
- def : Pat<(X86cmov (i32 32), (cttz GR32:$src), (X86_COND_E),
+ def : Pat<(X86cmov (i32 32), (cttz GR32:$src), (X86_COND_E_OR_NE),
(X86cmp GR32:$src, (i32 0))),
(TZCNT32rr GR32:$src)>;
- def : Pat<(X86cmov (i64 64), (cttz GR64:$src), (X86_COND_E),
+ def : Pat<(X86cmov (i64 64), (cttz GR64:$src), (X86_COND_E_OR_NE),
(X86cmp GR64:$src, (i64 0))),
(TZCNT64rr GR64:$src)>;
- def : Pat<(X86cmov (cttz (loadi16 addr:$src)), (i16 16), (X86_COND_E),
- (X86cmp (loadi16 addr:$src), (i16 0))),
+ def : Pat<(X86cmov (cttz (loadi16 addr:$src)), (i16 16), (X86_COND_E_OR_NE),
+ (X86cmp (loadi16 addr:$src), (i16 0))),
(TZCNT16rm addr:$src)>;
- def : Pat<(X86cmov (cttz (loadi32 addr:$src)), (i32 32), (X86_COND_E),
- (X86cmp (loadi32 addr:$src), (i32 0))),
+ def : Pat<(X86cmov (cttz (loadi32 addr:$src)), (i32 32), (X86_COND_E_OR_NE),
+ (X86cmp (loadi32 addr:$src), (i32 0))),
(TZCNT32rm addr:$src)>;
- def : Pat<(X86cmov (cttz (loadi64 addr:$src)), (i64 64), (X86_COND_E),
- (X86cmp (loadi64 addr:$src), (i64 0))),
+ def : Pat<(X86cmov (cttz (loadi64 addr:$src)), (i64 64), (X86_COND_E_OR_NE),
+ (X86cmp (loadi64 addr:$src), (i64 0))),
(TZCNT64rm addr:$src)>;
- def : Pat<(X86cmov (i16 16), (cttz (loadi16 addr:$src)), (X86_COND_E),
- (X86cmp (loadi16 addr:$src), (i16 0))),
+ def : Pat<(X86cmov (i16 16), (cttz (loadi16 addr:$src)), (X86_COND_E_OR_NE),
+ (X86cmp (loadi16 addr:$src), (i16 0))),
(TZCNT16rm addr:$src)>;
- def : Pat<(X86cmov (i32 32), (cttz (loadi32 addr:$src)), (X86_COND_E),
- (X86cmp (loadi32 addr:$src), (i32 0))),
+ def : Pat<(X86cmov (i32 32), (cttz (loadi32 addr:$src)), (X86_COND_E_OR_NE),
+ (X86cmp (loadi32 addr:$src), (i32 0))),
(TZCNT32rm addr:$src)>;
- def : Pat<(X86cmov (i64 64), (cttz (loadi64 addr:$src)), (X86_COND_E),
- (X86cmp (loadi64 addr:$src), (i64 0))),
+ def : Pat<(X86cmov (i64 64), (cttz (loadi64 addr:$src)), (X86_COND_E_OR_NE),
+ (X86cmp (loadi64 addr:$src), (i64 0))),
(TZCNT64rm addr:$src)>;
}
@@ -2400,6 +2416,7 @@ include "X86InstrVMX.td"
include "X86InstrSVM.td"
include "X86InstrTSX.td"
+include "X86InstrSGX.td"
// System instructions.
include "X86InstrSystem.td"
@@ -2518,7 +2535,7 @@ def : MnemonicAlias<"fldcww", "fldcw", "att">;
def : MnemonicAlias<"fnstcww", "fnstcw", "att">;
def : MnemonicAlias<"fnstsww", "fnstsw", "att">;
def : MnemonicAlias<"fucomip", "fucompi", "att">;
-def : MnemonicAlias<"fwait", "wait", "att">;
+def : MnemonicAlias<"fwait", "wait">;
class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond,
@@ -2707,28 +2724,28 @@ def : InstAlias<"fnstsw" , (FNSTSW16r)>;
// this is compatible with what GAS does.
def : InstAlias<"lcall $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>;
def : InstAlias<"ljmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>;
-def : InstAlias<"lcall *$dst", (FARCALL32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>;
-def : InstAlias<"ljmp *$dst", (FARJMP32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"lcall {*}$dst", (FARCALL32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"ljmp {*}$dst", (FARJMP32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>;
def : InstAlias<"lcall $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
def : InstAlias<"ljmp $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
-def : InstAlias<"lcall *$dst", (FARCALL16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>;
-def : InstAlias<"ljmp *$dst", (FARJMP16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"lcall {*}$dst", (FARCALL16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"ljmp {*}$dst", (FARJMP16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>;
-def : InstAlias<"call *$dst", (CALL64m i16mem:$dst), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"jmp *$dst", (JMP64m i16mem:$dst), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"call *$dst", (CALL32m i16mem:$dst), 0>, Requires<[In32BitMode]>;
-def : InstAlias<"jmp *$dst", (JMP32m i16mem:$dst), 0>, Requires<[In32BitMode]>;
-def : InstAlias<"call *$dst", (CALL16m i16mem:$dst), 0>, Requires<[In16BitMode]>;
-def : InstAlias<"jmp *$dst", (JMP16m i16mem:$dst), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"call {*}$dst", (CALL64m i64mem:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"jmp {*}$dst", (JMP64m i64mem:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"call {*}$dst", (CALL32m i32mem:$dst), 0>, Requires<[In32BitMode]>;
+def : InstAlias<"jmp {*}$dst", (JMP32m i32mem:$dst), 0>, Requires<[In32BitMode]>;
+def : InstAlias<"call {*}$dst", (CALL16m i16mem:$dst), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"jmp {*}$dst", (JMP16m i16mem:$dst), 0>, Requires<[In16BitMode]>;
// "imul <imm>, B" is an alias for "imul <imm>, B, B".
-def : InstAlias<"imulw $imm, $r", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm)>;
-def : InstAlias<"imulw $imm, $r", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm)>;
-def : InstAlias<"imull $imm, $r", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm)>;
-def : InstAlias<"imull $imm, $r", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm)>;
-def : InstAlias<"imulq $imm, $r",(IMUL64rri32 GR64:$r, GR64:$r,i64i32imm:$imm)>;
-def : InstAlias<"imulq $imm, $r", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm)>;
+def : InstAlias<"imulw {$imm, $r|$r, $imm}", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm), 0>;
+def : InstAlias<"imulw {$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>;
+def : InstAlias<"imull {$imm, $r|$r, $imm}", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm), 0>;
+def : InstAlias<"imull {$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>;
+def : InstAlias<"imulq {$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>;
+def : InstAlias<"imulq {$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>;
// inb %dx -> inb %al, %dx
def : InstAlias<"inb\t{%dx|dx}", (IN8rr), 0>;
@@ -2752,34 +2769,34 @@ def : InstAlias<"jmpl $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
// Force mov without a suffix with a segment and mem to prefer the 'l' form of
// the move. All segment/mem forms are equivalent, this has the shortest
// encoding.
-def : InstAlias<"mov $mem, $seg", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem), 0>;
-def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg), 0>;
+def : InstAlias<"mov {$mem, $seg|$seg, $mem}", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem), 0>;
+def : InstAlias<"mov {$seg, $mem|$mem, $seg}", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg), 0>;
// Match 'movq <largeimm>, <reg>' as an alias for movabsq.
-def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm), 0>;
+def : InstAlias<"movq {$imm, $reg|$reg, $imm}", (MOV64ri GR64:$reg, i64imm:$imm), 0>;
// Match 'movq GR64, MMX' as an alias for movd.
-def : InstAlias<"movq $src, $dst",
+def : InstAlias<"movq {$src, $dst|$dst, $src}",
(MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
-def : InstAlias<"movq $src, $dst",
+def : InstAlias<"movq {$src, $dst|$dst, $src}",
(MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
// movsx aliases
-def : InstAlias<"movsx $src, $dst", (MOVSX16rr8 GR16:$dst, GR8:$src), 0>;
-def : InstAlias<"movsx $src, $dst", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0>;
-def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>;
-def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>;
-def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>;
-def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>;
-def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>;
+def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>;
// movzx aliases
-def : InstAlias<"movzx $src, $dst", (MOVZX16rr8 GR16:$dst, GR8:$src), 0>;
-def : InstAlias<"movzx $src, $dst", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0>;
-def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>;
-def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>;
-def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src), 0>;
-def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src), 0>;
+def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX64rr8_Q GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX64rr16_Q GR64:$dst, GR16:$src), 0>;
// Note: No GR32->GR64 movzx form.
// outb %dx -> outb %al, %dx
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index ecf80a1ac9bf..5b36427acb1b 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -38,12 +38,17 @@ def MMX_PHADDSUBD : OpndItins<
>;
}
+let Sched = WriteVecLogic in
+def MMX_INTALU_ITINS_VECLOGICSCHED : OpndItins<
+ IIC_MMX_ALU_RR, IIC_MMX_ALU_RM
+>;
+
let Sched = WriteVecIMul in
def MMX_PMUL_ITINS : OpndItins<
IIC_MMX_PMUL, IIC_MMX_PMUL
>;
-let Sched = WriteVecALU in {
+let Sched = WriteVecIMul in {
def MMX_PSADBW_ITINS : OpndItins<
IIC_MMX_PSADBW, IIC_MMX_PSADBW
>;
@@ -166,13 +171,15 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
def R64irr : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2, i8imm:$src3),
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>;
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>,
+ Sched<[WriteShuffle]>;
def R64irm : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2, i8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR64:$dst, (IntId VR64:$src1,
- (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>;
+ (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
@@ -192,11 +199,11 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst),
(ins DstRC:$src1, SrcRC:$src2), asm,
[(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))],
- NoItinerary, d>;
+ NoItinerary, d>, Sched<[WriteCvtI2F]>;
def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src2), asm,
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))],
- NoItinerary, d>;
+ NoItinerary, d>, Sched<[WriteCvtI2FLd]>;
}
//===----------------------------------------------------------------------===//
@@ -213,7 +220,7 @@ def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms",
// Data Transfer Instructions
def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst,
+ [(set VR64:$dst,
(x86mmx (scalar_to_vector GR32:$src)))],
IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
let canFoldAsLoad = 1 in
@@ -247,10 +254,10 @@ def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
let SchedRW = [WriteMove] in {
def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
(outs GR64:$dst), (ins VR64:$src),
- "movd\t{$src, $dst|$dst, $src}",
+ "movd\t{$src, $dst|$dst, $src}",
[(set GR64:$dst,
(bitconvert VR64:$src))], IIC_MMX_MOV_REG_MM>;
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
"movq\t{$src, $dst|$dst, $src}", [],
IIC_MMX_MOVQ_RR>;
@@ -427,13 +434,13 @@ let Constraints = "$src1 = $dst" in
// Logical Instructions
defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand,
- MMX_INTALU_ITINS, 1>;
+ MMX_INTALU_ITINS_VECLOGICSCHED, 1>;
defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por,
- MMX_INTALU_ITINS, 1>;
+ MMX_INTALU_ITINS_VECLOGICSCHED, 1>;
defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor,
- MMX_INTALU_ITINS, 1>;
+ MMX_INTALU_ITINS_VECLOGICSCHED, 1>;
defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn,
- MMX_INTALU_ITINS>;
+ MMX_INTALU_ITINS_VECLOGICSCHED>;
// Shift Instructions
defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
@@ -479,19 +486,19 @@ defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d,
MMX_INTALU_ITINS>;
// -- Unpack Instructions
-defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw",
+defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw",
int_x86_mmx_punpckhbw,
MMX_UNPCK_H_ITINS>;
-defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd",
+defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd",
int_x86_mmx_punpckhwd,
MMX_UNPCK_H_ITINS>;
-defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
+defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
int_x86_mmx_punpckhdq,
MMX_UNPCK_H_ITINS>;
-defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
+defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
int_x86_mmx_punpcklbw,
MMX_UNPCK_L_ITINS>;
-defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
+defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
int_x86_mmx_punpcklwd,
MMX_UNPCK_L_ITINS>;
defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
@@ -559,7 +566,7 @@ def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
IIC_MMX_PEXTR>, Sched<[WriteShuffle]>;
let Constraints = "$src1 = $dst" in {
def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
- (outs VR64:$dst),
+ (outs VR64:$dst),
(ins VR64:$src1, GR32orGR64:$src2, i32i8imm:$src3),
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
diff --git a/lib/Target/X86/X86InstrSGX.td b/lib/Target/X86/X86InstrSGX.td
new file mode 100644
index 000000000000..47c5dc57fcc9
--- /dev/null
+++ b/lib/Target/X86/X86InstrSGX.td
@@ -0,0 +1,24 @@
+//===-- X86InstrSGX.td - SGX Instruction Set Extension -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel SGX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SGX instructions
+
+// ENCLS - Execute an Enclave System Function of Specified Leaf Number
+def ENCLS : I<0x01, MRM_CF, (outs), (ins),
+ "encls", []>, TB, Requires<[HasSGX]>;
+
+// ENCLU - Execute an Enclave User Function of Specified Leaf Number
+def ENCLU : I<0x01, MRM_D7, (outs), (ins),
+ "enclu", []>, TB, Requires<[HasSGX]>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 2bb898e7465b..fca0c443bf48 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -181,6 +181,7 @@ def SSE_MPSADBW_ITINS : OpndItins<
IIC_SSE_MPSADBW_RR, IIC_SSE_MPSADBW_RM
>;
+let Sched = WriteVecIMul in
def SSE_PMULLD_ITINS : OpndItins<
IIC_SSE_PMULLD_RR, IIC_SSE_PMULLD_RM
>;
@@ -218,11 +219,21 @@ def DEFAULT_ITINS_BLENDSCHED : OpndItins<
IIC_ALU_NONMEM, IIC_ALU_MEM
>;
+let Sched = WriteVarBlend in
+def DEFAULT_ITINS_VARBLENDSCHED : OpndItins<
+ IIC_ALU_NONMEM, IIC_ALU_MEM
+>;
+
let Sched = WriteFBlend in
def SSE_INTALU_ITINS_FBLEND_P : OpndItins<
IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
>;
+let Sched = WriteBlend in
+def SSE_INTALU_ITINS_BLEND_P : OpndItins<
+ IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
+>;
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 Instructions Classes
//===----------------------------------------------------------------------===//
@@ -601,29 +612,6 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
// Patterns
let Predicates = [UseAVX] in {
- let AddedComplexity = 15 in {
- // Move scalar to XMM zero-extended, zeroing a VR128 then do a
- // MOVS{S,D} to the lower bits.
- def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
- (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
- def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (VMOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
- def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (VMOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
- def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
- (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
-
- // Move low f32 and clear high bits.
- def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
- (SUBREG_TO_REG (i32 0),
- (VMOVSSrr (v4f32 (V_SET0)),
- (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), sub_xmm)>;
- def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
- (SUBREG_TO_REG (i32 0),
- (VMOVSSrr (v4i32 (V_SET0)),
- (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), sub_xmm)>;
- }
-
let AddedComplexity = 20 in {
// MOVSSrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
@@ -659,31 +647,10 @@ let Predicates = [UseAVX] in {
(v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
}
- def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
- (v4f32 (scalar_to_vector FR32:$src)), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0),
- (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)),
- sub_xmm)>;
- def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
- (v2f64 (scalar_to_vector FR64:$src)), (iPTR 0)))),
- (SUBREG_TO_REG (i64 0),
- (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
- sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
(v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>;
- // Move low f64 and clear high bits.
- def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
- (SUBREG_TO_REG (i32 0),
- (VMOVSDrr (v2f64 (V_SET0)),
- (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)), sub_xmm)>;
-
- def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
- (SUBREG_TO_REG (i32 0),
- (VMOVSDrr (v2i64 (V_SET0)),
- (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)), sub_xmm)>;
-
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
@@ -734,7 +701,6 @@ let Predicates = [UseAVX] in {
(EXTRACT_SUBREG (v4f64 VR256:$src2), sub_xmm)),
sub_xmm)>;
-
// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
// is during lowering, where it's not possible to recognize the fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
@@ -750,7 +716,7 @@ let Predicates = [UseAVX] in {
}
let Predicates = [UseSSE1] in {
- let AddedComplexity = 15 in {
+ let Predicates = [NoSSE41], AddedComplexity = 15 in {
// Move scalar to XMM zero-extended, zeroing a VR128 then do a
// MOVSS to the lower bits.
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
@@ -784,7 +750,7 @@ let Predicates = [UseSSE1] in {
}
let Predicates = [UseSSE2] in {
- let AddedComplexity = 15 in {
+ let Predicates = [NoSSE41], AddedComplexity = 15 in {
// Move scalar to XMM zero-extended, zeroing a VR128 then do a
// MOVSD to the lower bits.
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
@@ -843,7 +809,7 @@ multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
string asm, Domain d,
OpndItins itins,
bit IsReMaterializable = 1> {
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>,
Sched<[WriteFShuffle]>;
@@ -854,6 +820,7 @@ let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
Sched<[WriteLoad]>;
}
+let Predicates = [HasAVX, NoVLX] in {
defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
"movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
PS, VEX;
@@ -879,20 +846,26 @@ defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
"movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
PD, VEX, VEX_L;
+}
+
+let Predicates = [UseSSE1] in {
defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
"movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
PS;
-defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
- "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
- PD;
defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
"movups", SSEPackedSingle, SSE_MOVU_ITINS>,
PS;
+}
+let Predicates = [UseSSE2] in {
+defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+ "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
+ PD;
defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
"movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
PD;
+}
-let SchedRW = [WriteStore] in {
+let SchedRW = [WriteStore], Predicates = [HasAVX, NoVLX] in {
def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)],
@@ -1006,7 +979,7 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
- SchedRW = [WriteMove] in {
+ SchedRW = [WriteFShuffle] in {
def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>;
@@ -1036,7 +1009,7 @@ let Predicates = [UseSSE2] in
(MOVUPDmr addr:$dst, VR128:$src)>;
// Use vmovaps/vmovups for AVX integer load/store.
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
// 128-bit load/store
def : Pat<(alignedloadv2i64 addr:$src),
(VMOVAPSrm addr:$src)>;
@@ -1251,6 +1224,9 @@ let Predicates = [HasAVX] in {
(VMOVLPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
(VMOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Movsd VR128:$src1,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
// Store patterns
def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
@@ -1298,6 +1274,9 @@ let Predicates = [UseSSE2] in {
(MOVLPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
(MOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Movsd VR128:$src1,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
// Store patterns
def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
@@ -1353,6 +1332,8 @@ let Predicates = [HasAVX] in {
(bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
(VMOVHPSrm VR128:$src1, addr:$src2)>;
+ // VMOVHPD patterns
+
// FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
// is during lowering, where it's not possible to recognize the load fold
// cause it has two uses through a bitcast. One use disappears at isel time
@@ -1360,6 +1341,16 @@ let Predicates = [HasAVX] in {
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))),
(VMOVHPDrm VR128:$src1, addr:$src2)>;
+ // Also handle an i64 load because that may get selected as a faster way to
+ // load the data.
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1,
+ (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
+ (VMOVHPDrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86VPermilpi VR128:$src, (i8 1))),
+ (iPTR 0))), addr:$dst),
+ (VMOVHPDmr addr:$dst, VR128:$src)>;
}
let Predicates = [UseSSE1] in {
@@ -1373,6 +1364,8 @@ let Predicates = [UseSSE1] in {
}
let Predicates = [UseSSE2] in {
+ // MOVHPD patterns
+
// FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
// is during lowering, where it's not possible to recognize the load fold
// cause it has two uses through a bitcast. One use disappears at isel time
@@ -1380,6 +1373,16 @@ let Predicates = [UseSSE2] in {
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))),
(MOVHPDrm VR128:$src1, addr:$src2)>;
+ // Also handle an i64 load because that may get selected as a faster way to
+ // load the data.
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1,
+ (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))),
+ (iPTR 0))), addr:$dst),
+ (MOVHPDmr addr:$dst, VR128:$src)>;
}
//===----------------------------------------------------------------------===//
@@ -1488,7 +1491,7 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm, Domain d,
OpndItins itins> {
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
[], itins.rr, d>, Sched<[itins.Sched]>;
let mayLoad = 1 in
@@ -1499,7 +1502,7 @@ let neverHasSideEffects = 1 in {
multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm> {
-let neverHasSideEffects = 1, Predicates = [UseAVX] in {
+let hasSideEffects = 0, Predicates = [UseAVX] in {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Sched<[WriteCvtI2F]>;
@@ -1508,7 +1511,7 @@ let neverHasSideEffects = 1, Predicates = [UseAVX] in {
(ins DstRC:$src1, x86memop:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Sched<[WriteCvtI2FLd, ReadAfterLd]>;
-} // neverHasSideEffects = 1
+} // hasSideEffects = 0
}
let Predicates = [UseAVX] in {
@@ -1815,7 +1818,7 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
/// SSE 2 Only
// Convert scalar double to scalar single
-let neverHasSideEffects = 1, Predicates = [UseAVX] in {
+let hasSideEffects = 0, Predicates = [UseAVX] in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR64:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
@@ -1880,7 +1883,7 @@ def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
// Convert scalar single to scalar double
// SSE2 instructions with XS prefix
-let neverHasSideEffects = 1, Predicates = [UseAVX] in {
+let hasSideEffects = 0, Predicates = [UseAVX] in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR32:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -2202,7 +2205,7 @@ def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
// Convert Packed DW Integers to Packed Double FP
let Predicates = [HasAVX] in {
-let neverHasSideEffects = 1, mayLoad = 1 in
+let hasSideEffects = 0, mayLoad = 1 in
def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[]>, VEX, Sched<[WriteCvtI2FLd]>;
@@ -2224,7 +2227,7 @@ def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
Sched<[WriteCvtI2F]>;
}
-let neverHasSideEffects = 1, mayLoad = 1 in
+let hasSideEffects = 0, mayLoad = 1 in
def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>;
@@ -2330,15 +2333,15 @@ let Predicates = [UseSSE2] in {
multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
Operand CC, SDNode OpNode, ValueType VT,
PatFrag ld_frag, string asm, string asm_alt,
- OpndItins itins> {
+ OpndItins itins, ImmLeaf immLeaf> {
def rr : SIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
- [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
+ [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, immLeaf:$cc))],
itins.rr>, Sched<[itins.Sched]>;
def rm : SIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1),
- (ld_frag addr:$src2), imm:$cc))],
+ (ld_frag addr:$src2), immLeaf:$cc))],
itins.rm>,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
@@ -2358,38 +2361,37 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmps, f32, loadf32,
"cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSE_ALU_F32S>,
- XS, VEX_4V, VEX_LIG;
+ SSE_ALU_F32S, i8immZExt5>, XS, VEX_4V, VEX_LIG;
defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmps, f64, loadf64,
"cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSE_ALU_F32S>, // same latency as 32 bit compare
+ SSE_ALU_F32S, i8immZExt5>, // same latency as 32 bit compare
XD, VEX_4V, VEX_LIG;
let Constraints = "$src1 = $dst" in {
defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmps, f32, loadf32,
"cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
- "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S>,
- XS;
+ "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S,
+ i8immZExt3>, XS;
defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmps, f64, loadf64,
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
"cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SSE_ALU_F64S>,
- XD;
+ SSE_ALU_F64S, i8immZExt3>, XD;
}
multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC,
- Intrinsic Int, string asm, OpndItins itins> {
+ Intrinsic Int, string asm, OpndItins itins,
+ ImmLeaf immLeaf> {
def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
- VR128:$src, imm:$cc))],
+ VR128:$src, immLeaf:$cc))],
itins.rr>,
Sched<[itins.Sched]>;
def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, x86memop:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
- (load addr:$src), imm:$cc))],
+ (load addr:$src), immLeaf:$cc))],
itins.rm>,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
@@ -2398,19 +2400,19 @@ let isCodeGenOnly = 1 in {
// Aliases to match intrinsics which expect XMM operand(s).
defm Int_VCMPSS : sse12_cmp_scalar_int<f32mem, AVXCC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
- SSE_ALU_F32S>,
+ SSE_ALU_F32S, i8immZExt5>,
XS, VEX_4V;
defm Int_VCMPSD : sse12_cmp_scalar_int<f64mem, AVXCC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
- SSE_ALU_F32S>, // same latency as f32
+ SSE_ALU_F32S, i8immZExt5>, // same latency as f32
XD, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm Int_CMPSS : sse12_cmp_scalar_int<f32mem, SSECC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
- SSE_ALU_F32S>, XS;
+ SSE_ALU_F32S, i8immZExt3>, XS;
defm Int_CMPSD : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
- SSE_ALU_F64S>,
+ SSE_ALU_F64S, i8immZExt3>,
XD;
}
}
@@ -2484,16 +2486,16 @@ let Defs = [EFLAGS] in {
// sse12_cmp_packed - sse 1 & 2 compare packed instructions
multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
Operand CC, Intrinsic Int, string asm,
- string asm_alt, Domain d,
+ string asm_alt, Domain d, ImmLeaf immLeaf,
OpndItins itins = SSE_ALU_F32P> {
def rri : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
+ [(set RC:$dst, (Int RC:$src1, RC:$src2, immLeaf:$cc))],
itins.rr, d>,
Sched<[WriteFAdd]>;
def rmi : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))],
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), immLeaf:$cc))],
itins.rm, d>,
Sched<[WriteFAddLd, ReadAfterLd]>;
@@ -2512,28 +2514,28 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse_cmp_ps,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSEPackedSingle>, PS, VEX_4V;
+ SSEPackedSingle, i8immZExt5>, PS, VEX_4V;
defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSEPackedDouble>, PD, VEX_4V;
+ SSEPackedDouble, i8immZExt5>, PD, VEX_4V;
defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSEPackedSingle>, PS, VEX_4V, VEX_L;
+ SSEPackedSingle, i8immZExt5>, PS, VEX_4V, VEX_L;
defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSEPackedDouble>, PD, VEX_4V, VEX_L;
+ SSEPackedDouble, i8immZExt5>, PD, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps,
"cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
"cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SSEPackedSingle, SSE_ALU_F32P>, PS;
+ SSEPackedSingle, i8immZExt5, SSE_ALU_F32P>, PS;
defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd,
"cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
"cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SSEPackedDouble, SSE_ALU_F64P>, PD;
+ SSEPackedDouble, i8immZExt5, SSE_ALU_F64P>, PD;
}
let Predicates = [HasAVX] in {
@@ -2577,18 +2579,17 @@ def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
/// sse12_shuffle - sse 1 & 2 fp shuffle instructions
multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
ValueType vt, string asm, PatFrag mem_frag,
- Domain d, bit IsConvertibleToThreeAddress = 0> {
+ Domain d> {
def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, i8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
(i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
Sched<[WriteFShuffleLd, ReadAfterLd]>;
- let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
- def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, i8imm:$src3), asm,
- [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
- (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
- Sched<[WriteFShuffle]>;
+ def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i8imm:$src3), asm,
+ [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
+ (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
+ Sched<[WriteFShuffle]>;
}
defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
@@ -2607,10 +2608,10 @@ defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
let Constraints = "$src1 = $dst" in {
defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
"shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>, PS;
+ memopv4f32, SSEPackedSingle>, PS;
defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- memopv2f64, SSEPackedDouble, 1 /* cvt to pshufd */>, PD;
+ memopv2f64, SSEPackedDouble>, PD;
}
let Predicates = [HasAVX] in {
@@ -2850,7 +2851,7 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
ValueType OpVT128, ValueType OpVT256,
OpndItins itins, bit IsCommutable = 0> {
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoVLX] in
defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
VR128, loadv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V;
@@ -2858,7 +2859,7 @@ let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
memopv2i64, i128mem, itins, IsCommutable, 1>;
-let Predicates = [HasAVX2] in
+let Predicates = [HasAVX2, NoVLX] in
defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
OpVT256, VR256, loadv4i64, i256mem, itins,
IsCommutable, 0>, VEX_4V, VEX_L;
@@ -2920,6 +2921,7 @@ let isCodeGenOnly = 1 in {
///
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
+ let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f256mem,
[(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
@@ -2950,6 +2952,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
(loadv2i64 addr:$src2)))], 0>,
PD, VEX_4V;
+ }
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
@@ -3005,6 +3008,7 @@ let Predicates = [HasAVX1Only] in {
/// classes below
multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
SDNode OpNode, SizeItins itins> {
+ let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
VR128, v4f32, f128mem, loadv4f32,
SSEPackedSingle, itins.s, 0>, PS, VEX_4V;
@@ -3018,6 +3022,7 @@ multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
OpNode, VR256, v4f64, f256mem, loadv4f64,
SSEPackedDouble, itins.d, 0>, PD, VEX_4V, VEX_L;
+ }
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
@@ -3111,7 +3116,7 @@ let isCodeGenOnly = 1 in {
// previously we generated:
// addss %xmm0, %xmm1
// movss %xmm1, %xmm0
-//
+//
// we now generate:
// addss %xmm1, %xmm0
@@ -3136,7 +3141,6 @@ let Predicates = [UseSSE1] in {
let Predicates = [UseSSE2] in {
// SSE2 patterns to select scalar double-precision fp arithmetic instructions
-
def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd
(f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
FR64:$src))))),
@@ -3156,10 +3160,10 @@ let Predicates = [UseSSE2] in {
}
let Predicates = [UseSSE41] in {
- // If the subtarget has SSE4.1 but not AVX, the vector insert
- // instruction is lowered into a X86insertps rather than a X86Movss.
- // When selecting SSE scalar single-precision fp arithmetic instructions,
- // make sure that we correctly match the X86insertps.
+ // If the subtarget has SSE4.1 but not AVX, the vector insert instruction is
+ // lowered into a X86insertps or a X86Blendi rather than a X86Movss. When
+ // selecting SSE scalar single-precision fp arithmetic instructions, make
+ // sure that we correctly match them.
def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
(fadd (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
@@ -3177,6 +3181,57 @@ let Predicates = [UseSSE41] in {
(fdiv (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))), (iPTR 0))),
(DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fadd
+ (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ FR32:$src))), (i8 1))),
+ (ADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fsub
+ (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ FR32:$src))), (i8 1))),
+ (SUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fmul
+ (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ FR32:$src))), (i8 1))),
+ (MULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fdiv
+ (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ FR32:$src))), (i8 1))),
+ (DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (i8 1))),
+ (ADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (i8 1))),
+ (SUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (i8 1))),
+ (MULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (i8 1))),
+ (DIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+
+ def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fadd
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
+ (ADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fsub
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
+ (SUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fmul
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
+ (MULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fdiv
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
+ (DIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
}
let Predicates = [HasAVX] in {
@@ -3215,6 +3270,57 @@ let Predicates = [HasAVX] in {
(fdiv (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))), (iPTR 0))),
(VDIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fadd
+ (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ FR32:$src))), (i8 1))),
+ (VADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fsub
+ (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ FR32:$src))), (i8 1))),
+ (VSUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fmul
+ (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ FR32:$src))), (i8 1))),
+ (VMULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fdiv
+ (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ FR32:$src))), (i8 1))),
+ (VDIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (i8 1))),
+ (VADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (i8 1))),
+ (VSUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (i8 1))),
+ (VMULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (i8 1))),
+ (VDIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+
+ def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fadd
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
+ (VADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fsub
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
+ (VSUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fmul
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
+ (VMULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fdiv
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
+ (VDIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
}
// Patterns used to select SSE scalar fp arithmetic instructions from
@@ -3232,7 +3338,7 @@ let Predicates = [HasAVX] in {
// previously we generated:
// addps %xmm0, %xmm1
// movss %xmm1, %xmm0
-//
+//
// we now generate:
// addss %xmm1, %xmm0
@@ -3240,13 +3346,13 @@ let Predicates = [UseSSE1] in {
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
(fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
(ADDSSrr_Int v4f32:$dst, v4f32:$src)>;
- def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
+ def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
(fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
(SUBSSrr_Int v4f32:$dst, v4f32:$src)>;
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
(fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
(MULSSrr_Int v4f32:$dst, v4f32:$src)>;
- def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
+ def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
(fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
(DIVSSrr_Int v4f32:$dst, v4f32:$src)>;
}
@@ -3269,6 +3375,49 @@ let Predicates = [UseSSE2] in {
(DIVSDrr_Int v2f64:$dst, v2f64:$src)>;
}
+let Predicates = [UseSSE41] in {
+ // With SSE4.1 we may see these operations using X86Blendi rather than
+ // X86Movs{s,d}.
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
+ (ADDSSrr_Int v4f32:$dst, v4f32:$src)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
+ (SUBSSrr_Int v4f32:$dst, v4f32:$src)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
+ (MULSSrr_Int v4f32:$dst, v4f32:$src)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
+ (DIVSSrr_Int v4f32:$dst, v4f32:$src)>;
+
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
+ (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
+ (ADDSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
+ (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
+ (SUBSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
+ (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
+ (MULSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
+ (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
+ (DIVSDrr_Int v2f64:$dst, v2f64:$src)>;
+
+ def : Pat<(v2f64 (X86Blendi (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)),
+ (v2f64 VR128:$dst), (i8 2))),
+ (ADDSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)),
+ (v2f64 VR128:$dst), (i8 2))),
+ (SUBSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)),
+ (v2f64 VR128:$dst), (i8 2))),
+ (MULSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)),
+ (v2f64 VR128:$dst), (i8 2))),
+ (DIVSDrr_Int v2f64:$dst, v2f64:$src)>;
+}
+
let Predicates = [HasAVX] in {
// The following patterns select AVX Scalar single/double precision fp
// arithmetic instructions from a packed single precision fp instruction
@@ -3298,6 +3447,46 @@ let Predicates = [HasAVX] in {
def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
(fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
(VDIVSDrr_Int v2f64:$dst, v2f64:$src)>;
+
+ // Also handle X86Blendi-based patterns.
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
+ (VADDSSrr_Int v4f32:$dst, v4f32:$src)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
+ (VSUBSSrr_Int v4f32:$dst, v4f32:$src)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
+ (VMULSSrr_Int v4f32:$dst, v4f32:$src)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
+ (VDIVSSrr_Int v4f32:$dst, v4f32:$src)>;
+
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
+ (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
+ (VADDSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
+ (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
+ (VSUBSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
+ (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
+ (VMULSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
+ (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
+ (VDIVSDrr_Int v2f64:$dst, v2f64:$src)>;
+
+ def : Pat<(v2f64 (X86Blendi (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)),
+ (v2f64 VR128:$dst), (i8 2))),
+ (VADDSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)),
+ (v2f64 VR128:$dst), (i8 2))),
+ (VSUBSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)),
+ (v2f64 VR128:$dst), (i8 2))),
+ (VMULSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)),
+ (v2f64 VR128:$dst), (i8 2))),
+ (VDIVSDrr_Int v2f64:$dst, v2f64:$src)>;
}
/// Unop Arithmetic
@@ -3326,6 +3515,16 @@ def SSE_SQRTSD : OpndItins<
>;
}
+let Sched = WriteFRsqrt in {
+def SSE_RSQRTPS : OpndItins<
+ IIC_SSE_RSQRTPS_RR, IIC_SSE_RSQRTPS_RM
+>;
+
+def SSE_RSQRTSS : OpndItins<
+ IIC_SSE_RSQRTSS_RR, IIC_SSE_RSQRTSS_RM
+>;
+}
+
let Sched = WriteFRcp in {
def SSE_RCPP : OpndItins<
IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM
@@ -3336,57 +3535,10 @@ def SSE_RCPS : OpndItins<
>;
}
-/// sse1_fp_unop_s - SSE1 unops in scalar form.
-multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
- SDNode OpNode, Intrinsic F32Int, OpndItins itins> {
-let Predicates = [HasAVX], hasSideEffects = 0 in {
- def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst),
- (ins FR32:$src1, FR32:$src2),
- !strconcat("v", OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
- let mayLoad = 1 in {
- def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
- (ins FR32:$src1,f32mem:$src2),
- !strconcat("v", OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG,
- Sched<[itins.Sched.Folded, ReadAfterLd]>;
- let isCodeGenOnly = 1 in
- def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2),
- !strconcat("v", OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG,
- Sched<[itins.Sched.Folded, ReadAfterLd]>;
- }
-}
-
- def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
- // For scalar unary operations, fold a load into the operation
- // only in OptForSize mode. It eliminates an instruction, but it also
- // eliminates a whole-register clobber (the load), so it introduces a
- // partial register update condition.
- def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
- Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
-let isCodeGenOnly = 1 in {
- def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>,
- Sched<[itins.Sched]>;
- def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>,
- Sched<[itins.Sched.Folded]>;
-}
-}
-
-/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
-multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+/// sse1_fp_unop_s - SSE1 unops in scalar form
+/// For the non-AVX defs, we need $src1 to be tied to $dst because
+/// the HW instructions are 2 operand / destructive.
+multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins> {
let Predicates = [HasAVX], hasSideEffects = 0 in {
def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst),
@@ -3595,20 +3747,19 @@ let Predicates = [HasAVX] in {
}
// Square root.
-defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss,
- SSE_SQRTSS>,
+defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>,
sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>,
- sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd,
+ sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd,
SSE_SQRTSD>,
sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>;
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
-defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTSS>,
- sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTPS>,
+defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS>,
+ sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS>,
sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
- int_x86_avx_rsqrt_ps_256, SSE_SQRTPS>;
-defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>,
+ int_x86_avx_rsqrt_ps_256, SSE_RSQRTPS>;
+defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS>,
sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>,
sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps,
int_x86_avx_rcp_ps_256, SSE_RCPP>;
@@ -3669,13 +3820,15 @@ let Predicates = [HasAVX] in {
(VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
}
-// Reciprocal approximations. Note that these typically require refinement
-// in order to obtain suitable precision.
+// These are unary operations, but they are modeled as having 2 source operands
+// because the high elements of the destination are unchanged in SSE.
let Predicates = [UseSSE1] in {
def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
(RSQRTSSr_Int VR128:$src, VR128:$src)>;
def : Pat<(int_x86_sse_rcp_ss VR128:$src),
(RCPSSr_Int VR128:$src, VR128:$src)>;
+ def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
+ (SQRTSSr_Int VR128:$src, VR128:$src)>;
}
// There is no f64 version of the reciprocal approximation instructions.
@@ -3686,6 +3839,7 @@ let Predicates = [UseSSE1] in {
let AddedComplexity = 400 in { // Prefer non-temporal versions
let SchedRW = [WriteStore] in {
+let Predicates = [HasAVX, NoVLX] in {
def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
@@ -3726,6 +3880,7 @@ def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
[(alignednontemporalstore (v4i64 VR256:$src),
addr:$dst)],
IIC_SSE_MOVNT>, VEX, VEX_L;
+}
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
@@ -3755,6 +3910,14 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
PS, Requires<[HasSSE2]>;
} // SchedRW = [WriteStore]
+let Predicates = [HasAVX, NoVLX] in {
+ def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
+ (VMOVNTPSmr addr:$dst, VR128:$src)>;
+}
+
+def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
+ (MOVNTPSmr addr:$dst, VR128:$src)>;
+
} // AddedComplexity
//===----------------------------------------------------------------------===//
@@ -3788,8 +3951,8 @@ def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
let SchedRW = [WriteNop] in {
// Pause. This "instruction" is encoded as "rep; nop", so even though it
// was introduced with SSE2, it's backward compatible.
-def PAUSE : I<0x90, RawFrm, (outs), (ins),
- "pause", [(int_x86_sse2_pause)], IIC_SSE_PAUSE>,
+def PAUSE : I<0x90, RawFrm, (outs), (ins),
+ "pause", [(int_x86_sse2_pause)], IIC_SSE_PAUSE>,
OBXS, Requires<[HasSSE2]>;
}
@@ -3821,12 +3984,14 @@ def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
"stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>;
-def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
- "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
- IIC_SSE_LDMXCSR>, Sched<[WriteLoad]>;
-def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
- "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
- IIC_SSE_STMXCSR>, Sched<[WriteStore]>;
+let Predicates = [UseSSE1] in {
+def LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src),
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
+ IIC_SSE_LDMXCSR>, TB, Sched<[WriteLoad]>;
+def STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
+ IIC_SSE_STMXCSR>, TB, Sched<[WriteStore]>;
+}
//===---------------------------------------------------------------------===//
// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
@@ -3834,7 +3999,7 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
let ExeDomain = SSEPackedInt in { // SSE integer instructions
-let neverHasSideEffects = 1, SchedRW = [WriteMove] in {
+let hasSideEffects = 0, SchedRW = [WriteMove] in {
def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
VEX;
@@ -3869,7 +4034,7 @@ def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
}
let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
- neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
+ hasSideEffects = 0, SchedRW = [WriteLoad] in {
def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
VEX;
@@ -3886,7 +4051,7 @@ let Predicates = [HasAVX] in {
}
}
-let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
+let mayStore = 1, hasSideEffects = 0, SchedRW = [WriteStore] in {
def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
@@ -3906,7 +4071,7 @@ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
}
let SchedRW = [WriteMove] in {
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>;
@@ -3927,7 +4092,7 @@ def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
} // SchedRW
let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
- neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
+ hasSideEffects = 0, SchedRW = [WriteLoad] in {
def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/],
@@ -3939,7 +4104,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
XS, Requires<[UseSSE2]>;
}
-let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
+let mayStore = 1, hasSideEffects = 0, SchedRW = [WriteStore] in {
def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/],
@@ -5222,7 +5387,7 @@ let Predicates = [UseSSE3] in {
//===---------------------------------------------------------------------===//
multiclass sse3_replicate_dfp<string OpcodeStr> {
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[], IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>;
@@ -5277,6 +5442,13 @@ let Predicates = [HasAVX] in {
(VMOVDDUPYrr VR256:$src)>;
}
+let Predicates = [UseAVX, OptForSize] in {
+ def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ (VMOVDDUPrm addr:$src)>;
+ def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
+ (VMOVDDUPrm addr:$src)>;
+}
+
let Predicates = [UseSSE3] in {
def : Pat<(X86Movddup (memopv2f64 addr:$src)),
(MOVDDUPrm addr:$src)>;
@@ -5357,56 +5529,34 @@ let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
// Patterns used to select 'addsub' instructions.
let Predicates = [HasAVX] in {
- // Constant 170 corresponds to the binary mask '10101010'.
- // When used as a blend mask, it allows selecting eight elements from two
- // input vectors as follow:
- // - Even-numbered values in the destination are copied from
- // the corresponding elements in the first input vector;
- // - Odd-numbered values in the destination are copied from
- // the corresponding elements in the second input vector.
-
- def : Pat<(v8f32 (X86Blendi (v8f32 (fsub VR256:$lhs, VR256:$rhs)),
- (v8f32 (fadd VR256:$lhs, VR256:$rhs)), (i32 170))),
- (VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>;
-
- // Constant 10 corresponds to the binary mask '1010'.
- // In the two pattens below, constant 10 is used as a blend mask to select
- // - the 1st and 3rd element from the first input vector (the 'fsub' node);
- // - the 2nd and 4th element from the second input vector (the 'fadd' node).
-
- def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
- (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
- (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
- def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
- (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
- (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
- def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
- (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))),
+ def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 VR128:$rhs))),
(VADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
- def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
- (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))),
- (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
- def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
- (v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
+ def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 (memop addr:$rhs)))),
+ (VADDSUBPSrm VR128:$lhs, f128mem:$rhs)>;
+ def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 VR128:$rhs))),
(VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
+ def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 (memop addr:$rhs)))),
+ (VADDSUBPDrm VR128:$lhs, f128mem:$rhs)>;
+
+ def : Pat<(v8f32 (X86Addsub (v8f32 VR256:$lhs), (v8f32 VR256:$rhs))),
+ (VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>;
+ def : Pat<(v8f32 (X86Addsub (v8f32 VR256:$lhs), (v8f32 (memop addr:$rhs)))),
+ (VADDSUBPSYrm VR256:$lhs, f256mem:$rhs)>;
+ def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (v4f64 VR256:$rhs))),
+ (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
+ def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (v4f64 (memop addr:$rhs)))),
+ (VADDSUBPDYrm VR256:$lhs, f256mem:$rhs)>;
}
let Predicates = [UseSSE3] in {
- // Constant 10 corresponds to the binary mask '1010'.
- // In the pattern below, it is used as a blend mask to select:
- // - the 1st and 3rd element from the first input vector (the fsub node);
- // - the 2nd and 4th element from the second input vector (the fadd node).
-
- def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
- (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))),
+ def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 VR128:$rhs))),
(ADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
-
- def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
- (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))),
- (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
- def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
- (v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
+ def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 (memop addr:$rhs)))),
+ (ADDSUBPSrm VR128:$lhs, f128mem:$rhs)>;
+ def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 VR128:$rhs))),
(ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
+ def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 (memop addr:$rhs)))),
+ (ADDSUBPDrm VR128:$lhs, f128mem:$rhs)>;
}
//===---------------------------------------------------------------------===//
@@ -5810,7 +5960,7 @@ defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw",
//===---------------------------------------------------------------------===//
multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
- let neverHasSideEffects = 1 in {
+ let hasSideEffects = 0 in {
def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
!if(Is2Addr,
@@ -5830,7 +5980,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
}
multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> {
- let neverHasSideEffects = 1 in {
+ let hasSideEffects = 0 in {
def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
!strconcat(asm,
@@ -5917,552 +6067,240 @@ def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORrrr)>,
// SSE4.1 - Packed Move with Sign/Zero Extend
//===----------------------------------------------------------------------===//
-multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId,
- OpndItins itins = DEFAULT_ITINS> {
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId VR128:$src))], itins.rr>,
- Sched<[itins.Sched]>;
-
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst,
- (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))],
- itins.rm>, Sched<[itins.Sched.Folded]>;
-}
-
-multiclass SS41I_binop_rm_int16_y<bits<8> opc, string OpcodeStr,
- Intrinsic IntId, X86FoldableSchedWrite Sched> {
- def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (IntId VR128:$src))]>, Sched<[Sched]>;
-
- def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (IntId (load addr:$src)))]>,
- Sched<[Sched.Folded]>;
-}
-
-let Predicates = [HasAVX] in {
-defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw",
- int_x86_sse41_pmovsxbw,
- DEFAULT_ITINS_SHUFFLESCHED>, VEX;
-defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd",
- int_x86_sse41_pmovsxwd,
- DEFAULT_ITINS_SHUFFLESCHED>, VEX;
-defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq",
- int_x86_sse41_pmovsxdq,
- DEFAULT_ITINS_SHUFFLESCHED>, VEX;
-defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw",
- int_x86_sse41_pmovzxbw,
- DEFAULT_ITINS_SHUFFLESCHED>, VEX;
-defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd",
- int_x86_sse41_pmovzxwd,
- DEFAULT_ITINS_SHUFFLESCHED>, VEX;
-defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq",
- int_x86_sse41_pmovzxdq,
- DEFAULT_ITINS_SHUFFLESCHED>, VEX;
-}
-
-let Predicates = [HasAVX2] in {
-defm VPMOVSXBW : SS41I_binop_rm_int16_y<0x20, "vpmovsxbw",
- int_x86_avx2_pmovsxbw,
- WriteShuffle>, VEX, VEX_L;
-defm VPMOVSXWD : SS41I_binop_rm_int16_y<0x23, "vpmovsxwd",
- int_x86_avx2_pmovsxwd,
- WriteShuffle>, VEX, VEX_L;
-defm VPMOVSXDQ : SS41I_binop_rm_int16_y<0x25, "vpmovsxdq",
- int_x86_avx2_pmovsxdq,
- WriteShuffle>, VEX, VEX_L;
-defm VPMOVZXBW : SS41I_binop_rm_int16_y<0x30, "vpmovzxbw",
- int_x86_avx2_pmovzxbw,
- WriteShuffle>, VEX, VEX_L;
-defm VPMOVZXWD : SS41I_binop_rm_int16_y<0x33, "vpmovzxwd",
- int_x86_avx2_pmovzxwd,
- WriteShuffle>, VEX, VEX_L;
-defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq",
- int_x86_avx2_pmovzxdq,
- WriteShuffle>, VEX, VEX_L;
-}
-
-defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw,
- SSE_INTALU_ITINS_SHUFF_P>;
-defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd,
- SSE_INTALU_ITINS_SHUFF_P>;
-defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq,
- SSE_INTALU_ITINS_SHUFF_P>;
-defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw,
- SSE_INTALU_ITINS_SHUFF_P>;
-defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd,
- SSE_INTALU_ITINS_SHUFF_P>;
-defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq,
- SSE_INTALU_ITINS_SHUFF_P>;
-
-let Predicates = [HasAVX] in {
- // Common patterns involving scalar load.
- def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
- (VPMOVSXBWrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
- (VPMOVSXBWrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))),
- (VPMOVSXBWrm addr:$src)>;
-
- def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
- (VPMOVSXWDrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
- (VPMOVSXWDrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))),
- (VPMOVSXWDrm addr:$src)>;
-
- def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
- (VPMOVSXDQrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
- (VPMOVSXDQrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))),
- (VPMOVSXDQrm addr:$src)>;
-
- def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
- (VPMOVZXBWrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
- (VPMOVZXBWrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))),
- (VPMOVZXBWrm addr:$src)>;
-
- def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
- (VPMOVZXWDrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
- (VPMOVZXWDrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))),
- (VPMOVZXWDrm addr:$src)>;
-
- def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
- (VPMOVZXDQrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
- (VPMOVZXDQrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))),
- (VPMOVZXDQrm addr:$src)>;
-}
-
-let Predicates = [UseSSE41] in {
- // Common patterns involving scalar load.
- def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
- (PMOVSXBWrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
- (PMOVSXBWrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))),
- (PMOVSXBWrm addr:$src)>;
-
- def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
- (PMOVSXWDrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
- (PMOVSXWDrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))),
- (PMOVSXWDrm addr:$src)>;
-
- def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
- (PMOVSXDQrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
- (PMOVSXDQrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))),
- (PMOVSXDQrm addr:$src)>;
-
- def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
- (PMOVZXBWrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
- (PMOVZXBWrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))),
- (PMOVZXBWrm addr:$src)>;
-
- def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
- (PMOVZXWDrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
- (PMOVZXWDrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))),
- (PMOVZXWDrm addr:$src)>;
-
- def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
- (PMOVZXDQrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
- (PMOVZXDQrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))),
- (PMOVZXDQrm addr:$src)>;
-}
-
-multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId,
- OpndItins itins = DEFAULT_ITINS> {
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+multiclass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
+ RegisterClass OutRC, RegisterClass InRC,
+ OpndItins itins> {
+ def rr : SS48I<opc, MRMSrcReg, (outs OutRC:$dst), (ins InRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId VR128:$src))], itins.rr>,
+ [], itins.rr>,
Sched<[itins.Sched]>;
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst,
- (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))],
- itins.rm>, Sched<[itins.Sched.Folded]>;
-}
-
-multiclass SS41I_binop_rm_int8_y<bits<8> opc, string OpcodeStr,
- Intrinsic IntId, X86FoldableSchedWrite Sched> {
- def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (IntId VR128:$src))]>, Sched<[Sched]>;
-
- def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i32mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst,
- (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
- Sched<[Sched.Folded]>;
-}
-
-let Predicates = [HasAVX] in {
-defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd,
- DEFAULT_ITINS_SHUFFLESCHED>, VEX;
-defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq,
- DEFAULT_ITINS_SHUFFLESCHED>, VEX;
-defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd,
- DEFAULT_ITINS_SHUFFLESCHED>, VEX;
-defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq,
- DEFAULT_ITINS_SHUFFLESCHED>, VEX;
-}
-
-let Predicates = [HasAVX2] in {
-defm VPMOVSXBD : SS41I_binop_rm_int8_y<0x21, "vpmovsxbd",
- int_x86_avx2_pmovsxbd, WriteShuffle>,
- VEX, VEX_L;
-defm VPMOVSXWQ : SS41I_binop_rm_int8_y<0x24, "vpmovsxwq",
- int_x86_avx2_pmovsxwq, WriteShuffle>,
- VEX, VEX_L;
-defm VPMOVZXBD : SS41I_binop_rm_int8_y<0x31, "vpmovzxbd",
- int_x86_avx2_pmovzxbd, WriteShuffle>,
- VEX, VEX_L;
-defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq",
- int_x86_avx2_pmovzxwq, WriteShuffle>,
- VEX, VEX_L;
-}
-
-defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd,
- SSE_INTALU_ITINS_SHUFF_P>;
-defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq,
- SSE_INTALU_ITINS_SHUFF_P>;
-defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd,
- SSE_INTALU_ITINS_SHUFF_P>;
-defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq,
- SSE_INTALU_ITINS_SHUFF_P>;
-
-let Predicates = [HasAVX] in {
- // Common patterns involving scalar load
- def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
- (VPMOVSXBDrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
- (VPMOVSXWQrm addr:$src)>;
-
- def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
- (VPMOVZXBDrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
- (VPMOVZXWQrm addr:$src)>;
-}
-
-let Predicates = [UseSSE41] in {
- // Common patterns involving scalar load
- def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
- (PMOVSXBDrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
- (PMOVSXWQrm addr:$src)>;
-
- def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
- (PMOVZXBDrm addr:$src)>;
- def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
- (PMOVZXWQrm addr:$src)>;
-}
-
-multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId,
- X86FoldableSchedWrite Sched> {
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ def rm : SS48I<opc, MRMSrcMem, (outs OutRC:$dst), (ins MemOp:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId VR128:$src))]>, Sched<[Sched]>;
-
- // Expecting a i16 load any extended to i32 value.
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId (bitconvert
- (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>,
- Sched<[Sched.Folded]>;
-}
-
-multiclass SS41I_binop_rm_int4_y<bits<8> opc, string OpcodeStr,
- Intrinsic IntId, X86FoldableSchedWrite Sched> {
- def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (IntId VR128:$src))]>, Sched<[Sched]>;
-
- // Expecting a i16 load any extended to i32 value.
- def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i16mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (IntId (bitconvert
- (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
- Sched<[Sched.Folded]>;
+ [],
+ itins.rm>, Sched<[itins.Sched.Folded]>;
}
-let Predicates = [HasAVX] in {
-defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq,
- WriteShuffle>, VEX;
-defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq,
- WriteShuffle>, VEX;
-}
-let Predicates = [HasAVX2] in {
-defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq", int_x86_avx2_pmovsxbq,
- WriteShuffle>, VEX, VEX_L;
-defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq", int_x86_avx2_pmovzxbq,
- WriteShuffle>, VEX, VEX_L;
-}
-defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq,
- WriteShuffle>;
-defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq,
- WriteShuffle>;
-
-let Predicates = [HasAVX2] in {
- def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>;
- def : Pat<(v8i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDYrr VR128:$src)>;
- def : Pat<(v4i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQYrr VR128:$src)>;
-
- def : Pat<(v8i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>;
- def : Pat<(v4i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQYrr VR128:$src)>;
-
- def : Pat<(v4i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>;
-
- def : Pat<(v16i16 (X86vsext (v32i8 VR256:$src))),
- (VPMOVSXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
- def : Pat<(v8i32 (X86vsext (v32i8 VR256:$src))),
- (VPMOVSXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
- def : Pat<(v4i64 (X86vsext (v32i8 VR256:$src))),
- (VPMOVSXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
-
- def : Pat<(v8i32 (X86vsext (v16i16 VR256:$src))),
- (VPMOVSXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
- def : Pat<(v4i64 (X86vsext (v16i16 VR256:$src))),
- (VPMOVSXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
-
- def : Pat<(v4i64 (X86vsext (v8i32 VR256:$src))),
- (VPMOVSXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
-
- def : Pat<(v8i32 (X86vsext (v8i16 (bitconvert (v2i64 (load addr:$src)))))),
- (VPMOVSXWDYrm addr:$src)>;
- def : Pat<(v4i64 (X86vsext (v4i32 (bitconvert (v2i64 (load addr:$src)))))),
- (VPMOVSXDQYrm addr:$src)>;
-
- def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64
- (scalar_to_vector (loadi64 addr:$src))))))),
- (VPMOVSXBDYrm addr:$src)>;
- def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2f64
- (scalar_to_vector (loadf64 addr:$src))))))),
- (VPMOVSXBDYrm addr:$src)>;
-
- def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2i64
- (scalar_to_vector (loadi64 addr:$src))))))),
- (VPMOVSXWQYrm addr:$src)>;
- def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2f64
- (scalar_to_vector (loadf64 addr:$src))))))),
- (VPMOVSXWQYrm addr:$src)>;
-
- def : Pat<(v4i64 (X86vsext (v16i8 (bitconvert (v4i32
- (scalar_to_vector (loadi32 addr:$src))))))),
- (VPMOVSXBQYrm addr:$src)>;
-}
-
-let Predicates = [HasAVX] in {
- // Common patterns involving scalar load
- def : Pat<(int_x86_sse41_pmovsxbq
- (bitconvert (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
- (VPMOVSXBQrm addr:$src)>;
-
- def : Pat<(int_x86_sse41_pmovzxbq
- (bitconvert (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
- (VPMOVZXBQrm addr:$src)>;
-}
-
-let Predicates = [UseSSE41] in {
- def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>;
- def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (PMOVSXBDrr VR128:$src)>;
- def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (PMOVSXBQrr VR128:$src)>;
-
- def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
- def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (PMOVSXWQrr VR128:$src)>;
-
- def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
-
- // Common patterns involving scalar load
- def : Pat<(int_x86_sse41_pmovsxbq
- (bitconvert (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
- (PMOVSXBQrm addr:$src)>;
-
- def : Pat<(int_x86_sse41_pmovzxbq
- (bitconvert (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
- (PMOVZXBQrm addr:$src)>;
-
- def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64
- (scalar_to_vector (loadi64 addr:$src))))))),
- (PMOVSXWDrm addr:$src)>;
- def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64
- (scalar_to_vector (loadf64 addr:$src))))))),
- (PMOVSXWDrm addr:$src)>;
- def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32
- (scalar_to_vector (loadi32 addr:$src))))))),
- (PMOVSXBDrm addr:$src)>;
- def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32
- (scalar_to_vector (loadi32 addr:$src))))))),
- (PMOVSXWQrm addr:$src)>;
- def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32
- (scalar_to_vector (extloadi32i16 addr:$src))))))),
- (PMOVSXBQrm addr:$src)>;
- def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64
- (scalar_to_vector (loadi64 addr:$src))))))),
- (PMOVSXDQrm addr:$src)>;
- def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64
- (scalar_to_vector (loadf64 addr:$src))))))),
- (PMOVSXDQrm addr:$src)>;
- def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64
- (scalar_to_vector (loadi64 addr:$src))))))),
- (PMOVSXBWrm addr:$src)>;
- def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64
- (scalar_to_vector (loadf64 addr:$src))))))),
- (PMOVSXBWrm addr:$src)>;
+multiclass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr,
+ X86MemOperand MemOp, X86MemOperand MemYOp,
+ OpndItins SSEItins, OpndItins AVXItins,
+ OpndItins AVX2Itins> {
+ defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, SSEItins>;
+ let Predicates = [HasAVX] in
+ defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp,
+ VR128, VR128, AVXItins>, VEX;
+ let Predicates = [HasAVX2] in
+ defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp,
+ VR256, VR128, AVX2Itins>, VEX, VEX_L;
+}
+
+multiclass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr,
+ X86MemOperand MemOp, X86MemOperand MemYOp> {
+ defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr),
+ MemOp, MemYOp,
+ SSE_INTALU_ITINS_SHUFF_P,
+ DEFAULT_ITINS_SHUFFLESCHED,
+ DEFAULT_ITINS_SHUFFLESCHED>;
+ defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10),
+ !strconcat("pmovzx", OpcodeStr),
+ MemOp, MemYOp,
+ SSE_INTALU_ITINS_SHUFF_P,
+ DEFAULT_ITINS_SHUFFLESCHED,
+ DEFAULT_ITINS_SHUFFLESCHED>;
+}
+
+defm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem>;
+defm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem>;
+defm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem>;
+
+defm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem>;
+defm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem>;
+
+defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem>;
+
+// AVX2 Patterns
+multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, SDNode ExtOp> {
+ // Register-Register patterns
+ def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
+ (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
+ def : Pat<(v8i32 (ExtOp (v16i8 VR128:$src))),
+ (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>;
+ def : Pat<(v4i64 (ExtOp (v16i8 VR128:$src))),
+ (!cast<I>(OpcPrefix#BQYrr) VR128:$src)>;
+
+ def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))),
+ (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>;
+ def : Pat<(v4i64 (ExtOp (v8i16 VR128:$src))),
+ (!cast<I>(OpcPrefix#WQYrr) VR128:$src)>;
+
+ def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))),
+ (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
+
+ // On AVX2, we also support 256bit inputs.
+ // FIXME: remove these patterns when the old shuffle lowering goes away.
+ def : Pat<(v16i16 (ExtOp (v32i8 VR256:$src))),
+ (!cast<I>(OpcPrefix#BWYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v8i32 (ExtOp (v32i8 VR256:$src))),
+ (!cast<I>(OpcPrefix#BDYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v4i64 (ExtOp (v32i8 VR256:$src))),
+ (!cast<I>(OpcPrefix#BQYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v8i32 (ExtOp (v16i16 VR256:$src))),
+ (!cast<I>(OpcPrefix#WDYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v4i64 (ExtOp (v16i16 VR256:$src))),
+ (!cast<I>(OpcPrefix#WQYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v4i64 (ExtOp (v8i32 VR256:$src))),
+ (!cast<I>(OpcPrefix#DQYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ // AVX2 Register-Memory patterns
+ def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
+ def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
+ def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
+ def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
+
+ def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
+ def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
+ def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
+ def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
+
+ def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
+ (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
+ (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
+
+ def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
+ def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
+ def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
+ def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
+
+ def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
+
+ def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
}
let Predicates = [HasAVX2] in {
- def : Pat<(v16i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWYrr VR128:$src)>;
- def : Pat<(v8i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDYrr VR128:$src)>;
- def : Pat<(v4i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQYrr VR128:$src)>;
-
- def : Pat<(v8i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDYrr VR128:$src)>;
- def : Pat<(v4i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQYrr VR128:$src)>;
-
- def : Pat<(v4i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQYrr VR128:$src)>;
-
- def : Pat<(v16i16 (X86vzext (v32i8 VR256:$src))),
- (VPMOVZXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
- def : Pat<(v8i32 (X86vzext (v32i8 VR256:$src))),
- (VPMOVZXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
- def : Pat<(v4i64 (X86vzext (v32i8 VR256:$src))),
- (VPMOVZXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
-
- def : Pat<(v8i32 (X86vzext (v16i16 VR256:$src))),
- (VPMOVZXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
- def : Pat<(v4i64 (X86vzext (v16i16 VR256:$src))),
- (VPMOVZXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
-
- def : Pat<(v4i64 (X86vzext (v8i32 VR256:$src))),
- (VPMOVZXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", X86vsext>;
+ defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", X86vzext>;
+}
+
+// SSE4.1/AVX patterns.
+multiclass SS41I_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
+ PatFrag ExtLoad16> {
+ def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))),
+ (!cast<I>(OpcPrefix#BWrr) VR128:$src)>;
+ def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))),
+ (!cast<I>(OpcPrefix#BDrr) VR128:$src)>;
+ def : Pat<(v2i64 (ExtOp (v16i8 VR128:$src))),
+ (!cast<I>(OpcPrefix#BQrr) VR128:$src)>;
+
+ def : Pat<(v4i32 (ExtOp (v8i16 VR128:$src))),
+ (!cast<I>(OpcPrefix#WDrr) VR128:$src)>;
+ def : Pat<(v2i64 (ExtOp (v8i16 VR128:$src))),
+ (!cast<I>(OpcPrefix#WQrr) VR128:$src)>;
+
+ def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))),
+ (!cast<I>(OpcPrefix#DQrr) VR128:$src)>;
+
+ def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
+ def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
+ def : Pat<(v8i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
+ def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
+ def : Pat<(v8i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
+
+ def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
+ (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
+ def : Pat<(v4i32 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
+ (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
+ def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
+ def : Pat<(v4i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
+
+ def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
+ (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
+ (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
+
+ def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
+ def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
+ def : Pat<(v4i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
+ def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
+ def : Pat<(v4i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
+
+ def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
+ (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (v8i16 (vzmovl_v4i32 addr:$src)))),
+ (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
+
+ def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
}
let Predicates = [HasAVX] in {
- def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWrr VR128:$src)>;
- def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDrr VR128:$src)>;
- def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQrr VR128:$src)>;
-
- def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDrr VR128:$src)>;
- def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQrr VR128:$src)>;
-
- def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQrr VR128:$src)>;
-
- def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
- (VPMOVZXBWrm addr:$src)>;
- def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
- (VPMOVZXBWrm addr:$src)>;
- def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
- (VPMOVZXBDrm addr:$src)>;
- def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))),
- (VPMOVZXBQrm addr:$src)>;
-
- def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
- (VPMOVZXWDrm addr:$src)>;
- def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
- (VPMOVZXWDrm addr:$src)>;
- def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
- (VPMOVZXWQrm addr:$src)>;
-
- def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
- (VPMOVZXDQrm addr:$src)>;
- def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
- (VPMOVZXDQrm addr:$src)>;
- def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),
- (VPMOVZXDQrm addr:$src)>;
-
- def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>;
- def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDrr VR128:$src)>;
- def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQrr VR128:$src)>;
-
- def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
- def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQrr VR128:$src)>;
-
- def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
-
- def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64
- (scalar_to_vector (loadi64 addr:$src))))))),
- (VPMOVSXWDrm addr:$src)>;
- def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64
- (scalar_to_vector (loadi64 addr:$src))))))),
- (VPMOVSXDQrm addr:$src)>;
- def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64
- (scalar_to_vector (loadf64 addr:$src))))))),
- (VPMOVSXWDrm addr:$src)>;
- def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64
- (scalar_to_vector (loadf64 addr:$src))))))),
- (VPMOVSXDQrm addr:$src)>;
- def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64
- (scalar_to_vector (loadi64 addr:$src))))))),
- (VPMOVSXBWrm addr:$src)>;
- def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64
- (scalar_to_vector (loadf64 addr:$src))))))),
- (VPMOVSXBWrm addr:$src)>;
-
- def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32
- (scalar_to_vector (loadi32 addr:$src))))))),
- (VPMOVSXBDrm addr:$src)>;
- def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32
- (scalar_to_vector (loadi32 addr:$src))))))),
- (VPMOVSXWQrm addr:$src)>;
- def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32
- (scalar_to_vector (extloadi32i16 addr:$src))))))),
- (VPMOVSXBQrm addr:$src)>;
+ defm : SS41I_pmovx_patterns<"VPMOVSX", X86vsext, extloadi32i16>;
+ defm : SS41I_pmovx_patterns<"VPMOVZX", X86vzext, loadi16_anyext>;
}
let Predicates = [UseSSE41] in {
- def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (PMOVZXBWrr VR128:$src)>;
- def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (PMOVZXBDrr VR128:$src)>;
- def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (PMOVZXBQrr VR128:$src)>;
-
- def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (PMOVZXWDrr VR128:$src)>;
- def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (PMOVZXWQrr VR128:$src)>;
-
- def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (PMOVZXDQrr VR128:$src)>;
-
- def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
- (PMOVZXBWrm addr:$src)>;
- def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
- (PMOVZXBWrm addr:$src)>;
- def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
- (PMOVZXBDrm addr:$src)>;
- def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))),
- (PMOVZXBQrm addr:$src)>;
-
- def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
- (PMOVZXWDrm addr:$src)>;
- def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
- (PMOVZXWDrm addr:$src)>;
- def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
- (PMOVZXWQrm addr:$src)>;
-
- def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
- (PMOVZXDQrm addr:$src)>;
- def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
- (PMOVZXDQrm addr:$src)>;
- def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),
- (PMOVZXDQrm addr:$src)>;
+ defm : SS41I_pmovx_patterns<"PMOVSX", X86vsext, extloadi32i16>;
+ defm : SS41I_pmovx_patterns<"PMOVZX", X86vzext, loadi16_anyext>;
}
//===----------------------------------------------------------------------===//
@@ -6478,7 +6316,7 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
[(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1),
imm:$src2))]>,
Sched<[WriteShuffle]>;
- let neverHasSideEffects = 1, mayStore = 1,
+ let hasSideEffects = 0, mayStore = 1,
SchedRW = [WriteShuffleLd, WriteRMW] in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
@@ -6503,7 +6341,7 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, Sched<[WriteShuffle]>;
- let neverHasSideEffects = 1, mayStore = 1,
+ let hasSideEffects = 0, mayStore = 1,
SchedRW = [WriteShuffleLd, WriteRMW] in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
@@ -6692,7 +6530,7 @@ let Constraints = "$src1 = $dst" in
multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
OpndItins itins = DEFAULT_ITINS> {
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, u32u8imm:$src3),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
@@ -6701,7 +6539,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
(X86insertps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>,
Sched<[WriteFShuffle]>;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f32mem:$src2, u32u8imm:$src3),
+ (ins VR128:$src1, f32mem:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
@@ -7221,7 +7059,7 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
let isCommutable = 0 in
defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
@@ -7252,7 +7090,7 @@ let Predicates = [HasAVX] in {
SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
}
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX] in {
let isCommutable = 0 in
defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
@@ -7306,9 +7144,9 @@ let Constraints = "$src1 = $dst" in {
SSE_INTMUL_ITINS_P, 1>;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
- memopv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
+ memopv2i64, i128mem, 0, SSE_PMULLD_ITINS>,
VEX_4V;
defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
memopv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
@@ -7316,7 +7154,7 @@ let Predicates = [HasAVX] in {
}
let Predicates = [HasAVX2] in {
defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
- memopv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
+ memopv4i64, i256mem, 0, SSE_PMULLD_ITINS>,
VEX_4V, VEX_L;
defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
memopv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
@@ -7337,7 +7175,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
OpndItins itins = DEFAULT_ITINS> {
let isCommutable = 1 in
def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, u32u8imm:$src3),
+ (ins RC:$src1, RC:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -7346,7 +7184,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>,
Sched<[itins.Sched]>;
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
+ (ins RC:$src1, x86memop:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -7360,31 +7198,33 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX] in {
let isCommutable = 0 in {
- let ExeDomain = SSEPackedSingle in {
- defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
- VR128, loadv4f32, f128mem, 0,
- DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
- defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
- int_x86_avx_blend_ps_256, VR256, loadv8f32,
- f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
- VEX_4V, VEX_L;
- }
- let ExeDomain = SSEPackedDouble in {
- defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
- VR128, loadv2f64, f128mem, 0,
- DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
- defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
- int_x86_avx_blend_pd_256,VR256, loadv4f64,
- f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
- VEX_4V, VEX_L;
- }
+ defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
+ VR128, loadv2i64, i128mem, 0,
+ DEFAULT_ITINS_MPSADSCHED>, VEX_4V;
+ }
+
+ let ExeDomain = SSEPackedSingle in {
+ defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
+ VR128, loadv4f32, f128mem, 0,
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
+ defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
+ int_x86_avx_blend_ps_256, VR256, loadv8f32,
+ f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
+ VEX_4V, VEX_L;
+ }
+ let ExeDomain = SSEPackedDouble in {
+ defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
+ VR128, loadv2f64, f128mem, 0,
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
+ defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
+ int_x86_avx_blend_pd_256,VR256, loadv4f64,
+ f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
+ VEX_4V, VEX_L;
+ }
defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
VR128, loadv2i64, i128mem, 0,
DEFAULT_ITINS_BLENDSCHED>, VEX_4V;
- defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
- VR128, loadv2i64, i128mem, 0,
- DEFAULT_ITINS_MPSADSCHED>, VEX_4V;
- }
+
let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
VR128, loadv4f32, f128mem, 0,
@@ -7401,17 +7241,21 @@ let Predicates = [HasAVX] in {
let Predicates = [HasAVX2] in {
let isCommutable = 0 in {
- defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
- VR256, loadv4i64, i256mem, 0,
- DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L;
defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
VR256, loadv4i64, i256mem, 0,
DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_L;
}
+ defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
+ VR256, loadv4i64, i256mem, 0,
+ DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L;
}
let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
+ defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
+ VR128, memopv2i64, i128mem,
+ 1, SSE_MPSADBW_ITINS>;
+ }
let ExeDomain = SSEPackedSingle in
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
VR128, memopv4f32, f128mem,
@@ -7422,11 +7266,7 @@ let Constraints = "$src1 = $dst" in {
1, SSE_INTALU_ITINS_FBLEND_P>;
defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
VR128, memopv2i64, i128mem,
- 1, SSE_INTALU_ITINS_FBLEND_P>;
- defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
- VR128, memopv2i64, i128mem,
- 1, SSE_MPSADBW_ITINS>;
- }
+ 1, SSE_INTALU_ITINS_BLEND_P>;
let ExeDomain = SSEPackedSingle in
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
VR128, memopv4f32, f128mem, 1,
@@ -7545,6 +7385,57 @@ let Predicates = [HasAVX2] in {
(VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>;
}
+// Patterns
+let Predicates = [UseAVX] in {
+ let AddedComplexity = 15 in {
+ // Move scalar to XMM zero-extended, zeroing a VR128 then do a
+ // MOVS{S,D} to the lower bits.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
+ (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
+ def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (VBLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+ (VPBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
+ (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+
+ // Move low f32 and clear high bits.
+ def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
+ (VBLENDPSYrri (v8f32 (AVX_SET0)), VR256:$src, (i8 1))>;
+ def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
+ (VBLENDPSYrri (v8i32 (AVX_SET0)), VR256:$src, (i8 1))>;
+ }
+
+ def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
+ (v4f32 (scalar_to_vector FR32:$src)), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0),
+ (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)),
+ sub_xmm)>;
+ def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
+ (v2f64 (scalar_to_vector FR64:$src)), (iPTR 0)))),
+ (SUBREG_TO_REG (i64 0),
+ (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
+ sub_xmm)>;
+
+ // Move low f64 and clear high bits.
+ def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
+ (VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>;
+
+ def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
+ (VBLENDPDYrri (v4i64 (AVX_SET0)), VR256:$src, (i8 1))>;
+}
+
+let Predicates = [UseSSE41] in {
+ // With SSE41 we can use blends for these patterns.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (BLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+ (PBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+ (BLENDPDrri (v2f64 (V_SET0)), VR128:$src, (i8 1))>;
+}
+
+
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
@@ -7555,7 +7446,7 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
!strconcat(OpcodeStr,
"\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, x86memop:$src2),
@@ -7564,18 +7455,21 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
[(set VR128:$dst,
(IntId VR128:$src1,
(bitconvert (mem_frag addr:$src2)), XMM0))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
let ExeDomain = SSEPackedDouble in
defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64, f128mem,
- int_x86_sse41_blendvpd>;
+ int_x86_sse41_blendvpd,
+ DEFAULT_ITINS_FBLENDSCHED>;
let ExeDomain = SSEPackedSingle in
defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, f128mem,
- int_x86_sse41_blendvps>;
+ int_x86_sse41_blendvps,
+ DEFAULT_ITINS_FBLENDSCHED>;
defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem,
- int_x86_sse41_pblendvb>;
+ int_x86_sse41_pblendvb,
+ DEFAULT_ITINS_VARBLENDSCHED>;
// Aliases with the implicit xmm0 argument
def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
@@ -7704,7 +7598,7 @@ multiclass pcmpistrm_SS42AI<string asm> {
[]>, Sched<[WritePCmpIStrMLd, ReadAfterLd]>;
}
-let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in {
+let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
defm VPCMPISTRM128 : pcmpistrm_SS42AI<"vpcmpistrm">, VEX;
defm PCMPISTRM128 : pcmpistrm_SS42AI<"pcmpistrm"> ;
@@ -7739,7 +7633,7 @@ multiclass SS42AI_pcmpestrm<string asm> {
[]>, Sched<[WritePCmpEStrMLd, ReadAfterLd]>;
}
-let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
+let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
defm VPCMPESTRM128 : SS42AI_pcmpestrm<"vpcmpestrm">, VEX;
defm PCMPESTRM128 : SS42AI_pcmpestrm<"pcmpestrm">;
@@ -7774,7 +7668,7 @@ multiclass SS42AI_pcmpistri<string asm> {
[]>, Sched<[WritePCmpIStrILd, ReadAfterLd]>;
}
-let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in {
+let Defs = [ECX, EFLAGS], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX;
defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">;
@@ -7810,7 +7704,7 @@ multiclass SS42AI_pcmpestri<string asm> {
[]>, Sched<[WritePCmpEStrILd, ReadAfterLd]>;
}
-let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
+let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX;
defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">;
@@ -8189,7 +8083,7 @@ def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
//===----------------------------------------------------------------------===//
// VINSERTF128 - Insert packed floating-point values
//
-let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
+let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR128:$src2, i8imm:$src3),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -8221,6 +8115,49 @@ def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
(INSERT_get_vinsert128_imm VR256:$ins))>;
}
+// Combine two consecutive 16-byte loads with a common destination register into
+// one 32-byte load to that register.
+let Predicates = [HasAVX, HasFastMem32] in {
+ def : Pat<(insert_subvector
+ (v8f32 (insert_subvector undef, (loadv4f32 addr:$src), (iPTR 0))),
+ (loadv4f32 (add addr:$src, (iPTR 16))),
+ (iPTR 4)),
+ (VMOVUPSYrm addr:$src)>;
+
+ def : Pat<(insert_subvector
+ (v4f64 (insert_subvector undef, (loadv2f64 addr:$src), (iPTR 0))),
+ (loadv2f64 (add addr:$src, (iPTR 16))),
+ (iPTR 2)),
+ (VMOVUPDYrm addr:$src)>;
+
+ def : Pat<(insert_subvector
+ (v32i8 (insert_subvector
+ undef, (bc_v16i8 (loadv2i64 addr:$src)), (iPTR 0))),
+ (bc_v16i8 (loadv2i64 (add addr:$src, (iPTR 16)))),
+ (iPTR 16)),
+ (VMOVDQUYrm addr:$src)>;
+
+ def : Pat<(insert_subvector
+ (v16i16 (insert_subvector
+ undef, (bc_v8i16 (loadv2i64 addr:$src)), (iPTR 0))),
+ (bc_v8i16 (loadv2i64 (add addr:$src, (iPTR 16)))),
+ (iPTR 8)),
+ (VMOVDQUYrm addr:$src)>;
+
+ def : Pat<(insert_subvector
+ (v8i32 (insert_subvector
+ undef, (bc_v4i32 (loadv2i64 addr:$src)), (iPTR 0))),
+ (bc_v4i32 (loadv2i64 (add addr:$src, (iPTR 16)))),
+ (iPTR 4)),
+ (VMOVDQUYrm addr:$src)>;
+
+ def : Pat<(insert_subvector
+ (v4i64 (insert_subvector undef, (loadv2i64 addr:$src), (iPTR 0))),
+ (loadv2i64 (add addr:$src, (iPTR 16))),
+ (iPTR 2)),
+ (VMOVDQUYrm addr:$src)>;
+}
+
let Predicates = [HasAVX1Only] in {
def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
(iPTR imm)),
@@ -8263,7 +8200,7 @@ def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
//===----------------------------------------------------------------------===//
// VEXTRACTF128 - Extract packed floating-point values
//
-let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
+let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
(ins VR256:$src1, i8imm:$src2),
"vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -8393,13 +8330,13 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (vt (X86VPermilp RC:$src1, (i8 imm:$src2))))]>, VEX,
+ [(set RC:$dst, (vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX,
Sched<[WriteFShuffle]>;
def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
(ins x86memop_f:$src1, i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
- (vt (X86VPermilp (memop addr:$src1), (i8 imm:$src2))))]>, VEX,
+ (vt (X86VPermilpi (memop addr:$src1), (i8 imm:$src2))))]>, VEX,
Sched<[WriteFShuffleLd]>;
}
@@ -8417,19 +8354,37 @@ let ExeDomain = SSEPackedDouble in {
}
let Predicates = [HasAVX] in {
-def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
+def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (v8i32 VR256:$src2))),
+ (VPERMILPSYrr VR256:$src1, VR256:$src2)>;
+def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
+ (VPERMILPSYrm VR256:$src1, addr:$src2)>;
+def : Pat<(v4f64 (X86VPermilpv VR256:$src1, (v4i64 VR256:$src2))),
+ (VPERMILPDYrr VR256:$src1, VR256:$src2)>;
+def : Pat<(v4f64 (X86VPermilpv VR256:$src1, (loadv4i64 addr:$src2))),
+ (VPERMILPDYrm VR256:$src1, addr:$src2)>;
+
+def : Pat<(v8i32 (X86VPermilpi VR256:$src1, (i8 imm:$imm))),
(VPERMILPSYri VR256:$src1, imm:$imm)>;
-def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
+def : Pat<(v4i64 (X86VPermilpi VR256:$src1, (i8 imm:$imm))),
(VPERMILPDYri VR256:$src1, imm:$imm)>;
-def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (loadv4i64 addr:$src1)),
+def : Pat<(v8i32 (X86VPermilpi (bc_v8i32 (loadv4i64 addr:$src1)),
(i8 imm:$imm))),
(VPERMILPSYmi addr:$src1, imm:$imm)>;
-def : Pat<(v4i64 (X86VPermilp (loadv4i64 addr:$src1), (i8 imm:$imm))),
+def : Pat<(v4i64 (X86VPermilpi (loadv4i64 addr:$src1), (i8 imm:$imm))),
(VPERMILPDYmi addr:$src1, imm:$imm)>;
-def : Pat<(v2i64 (X86VPermilp VR128:$src1, (i8 imm:$imm))),
+def : Pat<(v4f32 (X86VPermilpv VR128:$src1, (v4i32 VR128:$src2))),
+ (VPERMILPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4f32 (X86VPermilpv VR128:$src1, (bc_v4i32 (loadv2i64 addr:$src2)))),
+ (VPERMILPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2f64 (X86VPermilpv VR128:$src1, (v2i64 VR128:$src2))),
+ (VPERMILPDrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2f64 (X86VPermilpv VR128:$src1, (loadv2i64 addr:$src2))),
+ (VPERMILPDrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v2i64 (X86VPermilpi VR128:$src1, (i8 imm:$imm))),
(VPERMILPDri VR128:$src1, imm:$imm)>;
-def : Pat<(v2i64 (X86VPermilp (loadv2i64 addr:$src1), (i8 imm:$imm))),
+def : Pat<(v2i64 (X86VPermilpi (loadv2i64 addr:$src1), (i8 imm:$imm))),
(VPERMILPDmi addr:$src1, imm:$imm)>;
}
@@ -8505,7 +8460,7 @@ multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
"vcvtph2ps\t{$src, $dst|$dst, $src}",
[(set RC:$dst, (Int VR128:$src))]>,
T8PD, VEX, Sched<[WriteCvtF2F]>;
- let neverHasSideEffects = 1, mayLoad = 1 in
+ let hasSideEffects = 0, mayLoad = 1 in
def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8PD, VEX,
Sched<[WriteCvtF2FLd]>;
@@ -8517,7 +8472,7 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (Int RC:$src1, imm:$src2))]>,
TAPD, VEX, Sched<[WriteCvtF2F]>;
- let neverHasSideEffects = 1, mayStore = 1,
+ let hasSideEffects = 0, mayStore = 1,
SchedRW = [WriteCvtF2FLd, WriteRMW] in
def mr : Ii8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, RC:$src1, i32i8imm:$src2),
@@ -8563,13 +8518,13 @@ multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop> {
let isCommutable = 1 in
def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, u32u8imm:$src3),
+ (ins RC:$src1, RC:$src2, i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
Sched<[WriteBlend]>, VEX_4V;
def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
+ (ins RC:$src1, x86memop:$src2, i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
@@ -8578,12 +8533,10 @@ multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
Sched<[WriteBlendLd, ReadAfterLd]>, VEX_4V;
}
-let isCommutable = 0 in {
defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
VR128, loadv2i64, i128mem>;
defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
VR256, loadv4i64, i256mem>, VEX_L;
-}
def : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2),
imm:$mask)),
@@ -8675,6 +8628,27 @@ let Predicates = [HasAVX2] in {
def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))),
(VBROADCASTSDYrr VR128:$src)>;
+ // Provide aliases for broadcast from the same regitser class that
+ // automatically does the extract.
+ def : Pat<(v32i8 (X86VBroadcast (v32i8 VR256:$src))),
+ (VPBROADCASTBYrr (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src),
+ sub_xmm)))>;
+ def : Pat<(v16i16 (X86VBroadcast (v16i16 VR256:$src))),
+ (VPBROADCASTWYrr (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src),
+ sub_xmm)))>;
+ def : Pat<(v8i32 (X86VBroadcast (v8i32 VR256:$src))),
+ (VPBROADCASTDYrr (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src),
+ sub_xmm)))>;
+ def : Pat<(v4i64 (X86VBroadcast (v4i64 VR256:$src))),
+ (VPBROADCASTQYrr (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src),
+ sub_xmm)))>;
+ def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))),
+ (VBROADCASTSSYrr (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src),
+ sub_xmm)))>;
+ def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256:$src))),
+ (VBROADCASTSDYrr (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src),
+ sub_xmm)))>;
+
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
let AddedComplexity = 20 in {
@@ -8756,6 +8730,9 @@ let Predicates = [HasAVX] in {
(VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm),
(VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>;
}
+
+ def : Pat<(v2f64 (X86VBroadcast f64:$src)),
+ (VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
}
//===----------------------------------------------------------------------===//
@@ -8763,14 +8740,14 @@ let Predicates = [HasAVX] in {
//
multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
- ValueType OpVT> {
+ ValueType OpVT, X86FoldableSchedWrite Sched> {
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>,
- Sched<[WriteFShuffle256]>, VEX_4V, VEX_L;
+ Sched<[Sched]>, VEX_4V, VEX_L;
def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr,
@@ -8778,22 +8755,22 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
[(set VR256:$dst,
(OpVT (X86VPermv VR256:$src1,
(bitconvert (mem_frag addr:$src2)))))]>,
- Sched<[WriteFShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
+ Sched<[Sched.Folded, ReadAfterLd]>, VEX_4V, VEX_L;
}
-defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32>;
+defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteShuffle256>;
let ExeDomain = SSEPackedSingle in
-defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32>;
+defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256>;
multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
- ValueType OpVT> {
+ ValueType OpVT, X86FoldableSchedWrite Sched> {
def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>,
- Sched<[WriteShuffle256]>, VEX, VEX_L;
+ Sched<[Sched]>, VEX, VEX_L;
def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
@@ -8801,12 +8778,14 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
[(set VR256:$dst,
(OpVT (X86VPermi (mem_frag addr:$src1),
(i8 imm:$src2))))]>,
- Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX, VEX_L;
+ Sched<[Sched.Folded, ReadAfterLd]>, VEX, VEX_L;
}
-defm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64>, VEX_W;
+defm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64,
+ WriteShuffle256>, VEX_W;
let ExeDomain = SSEPackedDouble in
-defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64>, VEX_W;
+defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64,
+ WriteFShuffle256>, VEX_W;
//===----------------------------------------------------------------------===//
// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
@@ -8847,7 +8826,7 @@ def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)),
//===----------------------------------------------------------------------===//
// VINSERTI128 - Insert packed integer values
//
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR128:$src2, i8imm:$src3),
"vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -8907,7 +8886,7 @@ def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
[(set VR128:$dst,
(int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>,
Sched<[WriteShuffle256]>, VEX, VEX_L;
-let neverHasSideEffects = 1, mayStore = 1 in
+let hasSideEffects = 0, mayStore = 1 in
def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
(ins i128mem:$dst, VR256:$src1, i8imm:$src2),
"vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
@@ -8985,6 +8964,115 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
int_x86_avx2_maskstore_q,
int_x86_avx2_maskstore_q_256>, VEX_W;
+def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src)),
+ (VMASKMOVPSYmr addr:$ptr, VR256:$mask, VR256:$src)>;
+
+def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src)),
+ (VPMASKMOVDYmr addr:$ptr, VR256:$mask, VR256:$src)>;
+
+def: Pat<(masked_store addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src)),
+ (VMASKMOVPSmr addr:$ptr, VR128:$mask, VR128:$src)>;
+
+def: Pat<(masked_store addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src)),
+ (VPMASKMOVDmr addr:$ptr, VR128:$mask, VR128:$src)>;
+
+def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)),
+ (VMASKMOVPSYrm VR256:$mask, addr:$ptr)>;
+
+def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask),
+ (bc_v8f32 (v8i32 immAllZerosV)))),
+ (VMASKMOVPSYrm VR256:$mask, addr:$ptr)>;
+
+def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src0))),
+ (VBLENDVPSYrr VR256:$src0, (VMASKMOVPSYrm VR256:$mask, addr:$ptr),
+ VR256:$mask)>;
+
+def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)),
+ (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>;
+
+def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 immAllZerosV))),
+ (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>;
+
+def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src0))),
+ (VBLENDVPSYrr VR256:$src0, (VPMASKMOVDYrm VR256:$mask, addr:$ptr),
+ VR256:$mask)>;
+
+def: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask), undef)),
+ (VMASKMOVPSrm VR128:$mask, addr:$ptr)>;
+
+def: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask),
+ (bc_v4f32 (v4i32 immAllZerosV)))),
+ (VMASKMOVPSrm VR128:$mask, addr:$ptr)>;
+
+def: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src0))),
+ (VBLENDVPSrr VR128:$src0, (VMASKMOVPSrm VR128:$mask, addr:$ptr),
+ VR128:$mask)>;
+
+def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), undef)),
+ (VPMASKMOVDrm VR128:$mask, addr:$ptr)>;
+
+def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 immAllZerosV))),
+ (VPMASKMOVDrm VR128:$mask, addr:$ptr)>;
+
+def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src0))),
+ (VBLENDVPSrr VR128:$src0, (VPMASKMOVDrm VR128:$mask, addr:$ptr),
+ VR128:$mask)>;
+
+def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src)),
+ (VMASKMOVPDYmr addr:$ptr, VR256:$mask, VR256:$src)>;
+
+def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src)),
+ (VPMASKMOVQYmr addr:$ptr, VR256:$mask, VR256:$src)>;
+
+def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)),
+ (VMASKMOVPDYrm VR256:$mask, addr:$ptr)>;
+
+def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask),
+ (v4f64 immAllZerosV))),
+ (VMASKMOVPDYrm VR256:$mask, addr:$ptr)>;
+
+def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src0))),
+ (VBLENDVPDYrr VR256:$src0, (VMASKMOVPDYrm VR256:$mask, addr:$ptr),
+ VR256:$mask)>;
+
+def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)),
+ (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>;
+
+def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask),
+ (bc_v4i64 (v8i32 immAllZerosV)))),
+ (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>;
+
+def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src0))),
+ (VBLENDVPDYrr VR256:$src0, (VPMASKMOVQYrm VR256:$mask, addr:$ptr),
+ VR256:$mask)>;
+
+def: Pat<(masked_store addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src)),
+ (VMASKMOVPDmr addr:$ptr, VR128:$mask, VR128:$src)>;
+
+def: Pat<(masked_store addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src)),
+ (VPMASKMOVQmr addr:$ptr, VR128:$mask, VR128:$src)>;
+
+def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)),
+ (VMASKMOVPDrm VR128:$mask, addr:$ptr)>;
+
+def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask),
+ (v2f64 immAllZerosV))),
+ (VMASKMOVPDrm VR128:$mask, addr:$ptr)>;
+
+def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src0))),
+ (VBLENDVPDrr VR128:$src0, (VMASKMOVPDrm VR128:$mask, addr:$ptr),
+ VR128:$mask)>;
+
+def: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)),
+ (VPMASKMOVQrm VR128:$mask, addr:$ptr)>;
+
+def: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask),
+ (bc_v2i64 (v4i32 immAllZerosV)))),
+ (VPMASKMOVQrm VR128:$mask, addr:$ptr)>;
+
+def: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src0))),
+ (VBLENDVPDrr VR128:$src0, (VPMASKMOVQrm VR128:$mask, addr:$ptr),
+ VR128:$mask)>;
//===----------------------------------------------------------------------===//
// Variable Bit Shifts
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td
index d0bb5231a2d0..c706d43c9f5c 100644
--- a/lib/Target/X86/X86InstrShiftRotate.td
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -49,6 +49,7 @@ def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
"shl{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))],
IIC_SR>;
+} // isConvertibleToThreeAddress = 1
// NOTE: We don't include patterns for shifts of a register by one, because
// 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one).
@@ -62,7 +63,6 @@ def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
"shl{q}\t$dst", [], IIC_SR>;
} // hasSideEffects = 0
-} // isConvertibleToThreeAddress = 1
} // Constraints = "$src = $dst", SchedRW
@@ -289,11 +289,11 @@ def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
"sar{w}\t{%cl, $dst|$dst, cl}",
[(store (sra (loadi16 addr:$dst), CL), addr:$dst)],
IIC_SR>, OpSize16;
-def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst),
+def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst),
"sar{l}\t{%cl, $dst|$dst, cl}",
[(store (sra (loadi32 addr:$dst), CL), addr:$dst)],
IIC_SR>, OpSize32;
-def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
+def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
"sar{q}\t{%cl, $dst|$dst, cl}",
[(store (sra (loadi64 addr:$dst), CL), addr:$dst)],
IIC_SR>;
@@ -347,7 +347,7 @@ def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
let Uses = [CL] in
def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
"rcl{b}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
-
+
def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
"rcl{w}\t$dst", [], IIC_SR>, OpSize16;
def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
@@ -381,7 +381,7 @@ def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
let Uses = [CL] in
def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
"rcr{b}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
-
+
def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
"rcr{w}\t$dst", [], IIC_SR>, OpSize16;
def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
@@ -397,7 +397,7 @@ def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
let Uses = [CL] in
def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
"rcr{l}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize32;
-
+
def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
"rcr{q}\t$dst", [], IIC_SR>;
def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
@@ -493,7 +493,7 @@ def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
"rol{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize32;
-def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
+def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
(ins GR64:$src1, i8imm:$src2),
"rol{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))],
@@ -600,7 +600,7 @@ def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
"ror{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize32;
-def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
+def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
(ins GR64:$src1, i8imm:$src2),
"ror{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))],
@@ -635,11 +635,11 @@ def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
"ror{w}\t{%cl, $dst|$dst, cl}",
[(store (rotr (loadi16 addr:$dst), CL), addr:$dst)],
IIC_SR>, OpSize16;
-def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst),
+def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst),
"ror{l}\t{%cl, $dst|$dst, cl}",
[(store (rotr (loadi32 addr:$dst), CL), addr:$dst)],
IIC_SR>, OpSize32;
-def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
+def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
"ror{q}\t{%cl, $dst|$dst, cl}",
[(store (rotr (loadi64 addr:$dst), CL), addr:$dst)],
IIC_SR>;
@@ -688,19 +688,19 @@ def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
let Uses = [CL] in {
-def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
+def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))],
IIC_SHD16_REG_CL>,
TB, OpSize16;
-def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
+def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"shrd{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))],
IIC_SHD16_REG_CL>,
TB, OpSize16;
-def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
+def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"shld{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))],
@@ -710,58 +710,58 @@ def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
"shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))],
IIC_SHD32_REG_CL>, TB, OpSize32;
-def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
+def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))],
- IIC_SHD64_REG_CL>,
+ IIC_SHD64_REG_CL>,
TB;
-def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
+def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))],
- IIC_SHD64_REG_CL>,
+ IIC_SHD64_REG_CL>,
TB;
}
let isCommutable = 1 in { // These instructions commute to each other.
def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
- (outs GR16:$dst),
+ (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2, i8imm:$src3),
"shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
(i8 imm:$src3)))], IIC_SHD16_REG_IM>,
TB, OpSize16;
def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
- (outs GR16:$dst),
+ (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2, i8imm:$src3),
"shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
(i8 imm:$src3)))], IIC_SHD16_REG_IM>,
TB, OpSize16;
def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
- (outs GR32:$dst),
+ (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2, i8imm:$src3),
"shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
(i8 imm:$src3)))], IIC_SHD32_REG_IM>,
TB, OpSize32;
def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
- (outs GR32:$dst),
+ (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2, i8imm:$src3),
"shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
(i8 imm:$src3)))], IIC_SHD32_REG_IM>,
TB, OpSize32;
def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
- (outs GR64:$dst),
+ (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2, i8imm:$src3),
"shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
(i8 imm:$src3)))], IIC_SHD64_REG_IM>,
TB;
def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
- (outs GR64:$dst),
+ (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2, i8imm:$src3),
"shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
@@ -789,7 +789,7 @@ def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
"shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
addr:$dst)], IIC_SHD32_MEM_CL>, TB, OpSize32;
-
+
def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
@@ -807,7 +807,7 @@ def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
(i8 imm:$src3)), addr:$dst)],
IIC_SHD16_MEM_IM>,
TB, OpSize16;
-def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
+def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
(outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
"shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
@@ -822,7 +822,7 @@ def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
(i8 imm:$src3)), addr:$dst)],
IIC_SHD32_MEM_IM>,
TB, OpSize32;
-def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
+def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
(outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
"shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
@@ -837,7 +837,7 @@ def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
(i8 imm:$src3)), addr:$dst)],
IIC_SHD64_MEM_IM>,
TB;
-def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
+def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
(outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
"shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
@@ -859,7 +859,7 @@ def ROT64L2R_imm8 : SDNodeXForm<imm, [{
}]>;
multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, TAXD, VEX, Sched<[WriteShift]>;
@@ -872,7 +872,7 @@ let neverHasSideEffects = 1 in {
}
multiclass bmi_shift<string asm, RegisterClass RC, X86MemOperand x86memop> {
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def rr : I<0xF7, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
VEX_4VOp3, Sched<[WriteShift]>;
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 540278021f02..848eeeb07c82 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -207,7 +207,7 @@ def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
let SchedRW = [WriteSystem] in {
def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB;
-def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"lar{w}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB,
OpSize16;
def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
@@ -215,14 +215,14 @@ def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
OpSize16;
// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
-def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB,
OpSize32;
def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB,
OpSize32;
// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
-def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB;
def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
"lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB;
@@ -240,7 +240,7 @@ def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"lsl{l}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB,
OpSize32;
def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB;
+ "lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB;
def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB;
@@ -260,7 +260,7 @@ def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
"ltr{w}\t$src", [], IIC_LTR>, TB;
def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
"ltr{w}\t$src", [], IIC_LTR>, TB;
-
+
def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins),
"push{w}\t{%cs|cs}", [], IIC_PUSH_SR>,
OpSize16, Requires<[Not64BitMode]>;
@@ -347,31 +347,31 @@ def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
"lds{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize16;
def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
"lds{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize32;
-
+
def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
"lss{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize16;
def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
"lss{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize32;
def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
"lss{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
-
+
def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
"les{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize16;
def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
"les{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize32;
-
+
def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
"lfs{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize16;
def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
"lfs{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize32;
def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
"lfs{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
-
+
def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
"lgs{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize16;
def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
"lgs{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize32;
-
+
def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
"lgs{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
@@ -408,7 +408,7 @@ def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins),
"sldt{w}\t$dst", [], IIC_SLDT>, TB;
def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
"sldt{l}\t$dst", [], IIC_SLDT>, OpSize32, TB;
-
+
// LLDT is not interpreted specially in 64-bit mode because there is no sign
// extension.
def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
@@ -444,12 +444,12 @@ let Defs = [RAX, RDX], Uses = [ECX] in
def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [(X86rdpmc)], IIC_RDPMC>,
TB;
-def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
+def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
"smsw{w}\t$dst", [], IIC_SMSW>, OpSize16, TB;
-def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
+def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
"smsw{l}\t$dst", [], IIC_SMSW>, OpSize32, TB;
// no m form encodable; use SMSW16m
-def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins),
+def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins),
"smsw{q}\t$dst", [], IIC_SMSW>, TB;
// For memory operands, there is only a 16-bit form
@@ -462,11 +462,7 @@ def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
"lmsw{w}\t$src", [], IIC_LMSW_REG>, TB;
let Defs = [EAX, EBX, ECX, EDX], Uses = [EAX, ECX] in
- def CPUID32 : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB,
- Requires<[Not64BitMode]>;
-let Defs = [RAX, RBX, RCX, RDX], Uses = [RAX, RCX] in
- def CPUID64 : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB,
- Requires<[In64BitMode]>;
+ def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB;
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -479,10 +475,10 @@ def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB;
//===----------------------------------------------------------------------===//
// XSAVE instructions
let SchedRW = [WriteSystem] in {
-let Defs = [RDX, RAX], Uses = [RCX] in
+let Defs = [EDX, EAX], Uses = [ECX] in
def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
-let Uses = [RDX, RAX, RCX] in
+let Uses = [EDX, EAX, ECX] in
def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB;
let Uses = [RDX, RAX] in {
@@ -563,7 +559,7 @@ def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
//===----------------------------------------------------------------------===//
// SMAP Instruction
-let Defs = [EFLAGS], Uses = [EFLAGS] in {
+let Predicates = [HasSMAP], Defs = [EFLAGS] in {
def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, TB;
def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, TB;
}
diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td
index 4940efc4443d..7267d752653e 100644
--- a/lib/Target/X86/X86InstrTSX.td
+++ b/lib/Target/X86/X86InstrTSX.td
@@ -23,9 +23,12 @@ def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins),
"# XBEGIN", [(set GR32:$dst, (int_x86_xbegin))]>,
Requires<[HasRTM]>;
-let isBranch = 1, isTerminator = 1, Defs = [EAX] in
-def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget:$dst),
- "xbegin\t$dst", []>, Requires<[HasRTM]>;
+let isBranch = 1, isTerminator = 1, Defs = [EAX] in {
+def XBEGIN_2 : Ii16PCRel<0xc7, MRM_F8, (outs), (ins brtarget16:$dst),
+ "xbegin\t$dst", []>, OpSize16, Requires<[HasRTM]>;
+def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget32:$dst),
+ "xbegin\t$dst", []>, OpSize32, Requires<[HasRTM]>;
+}
def XEND : I<0x01, MRM_D5, (outs), (ins),
"xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>;
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
new file mode 100644
index 000000000000..7130ae2c3097
--- /dev/null
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -0,0 +1,547 @@
+//===-- X86IntinsicsInfo.h - X86 Instrinsics ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the details for lowering X86 intrinsics
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_X86INTRINSICSINFO_H
+#define LLVM_LIB_TARGET_X86_X86INTRINSICSINFO_H
+
+namespace llvm {
+
+enum IntrinsicType {
+ INTR_NO_TYPE,
+ GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
+ INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
+ CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
+ INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, FMA_OP_MASK, INTR_TYPE_SCALAR_MASK_RM,
+ COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, EXPAND_FROM_MEM, BLEND
+};
+
+struct IntrinsicData {
+
+ unsigned Id;
+ IntrinsicType Type;
+ unsigned Opc0;
+ unsigned Opc1;
+
+ bool operator<(const IntrinsicData &RHS) const {
+ return Id < RHS.Id;
+ }
+ bool operator==(const IntrinsicData &RHS) const {
+ return RHS.Id == Id;
+ }
+};
+
+#define X86_INTRINSIC_DATA(id, type, op0, op1) \
+ { Intrinsic::x86_##id, type, op0, op1 }
+
+/*
+ * IntrinsicsWithChain - the table should be sorted by Intrinsic ID - in
+ * the alphabetical order.
+ */
+static const IntrinsicData IntrinsicsWithChain[] = {
+ X86_INTRINSIC_DATA(addcarry_u32, ADX, X86ISD::ADC, 0),
+ X86_INTRINSIC_DATA(addcarry_u64, ADX, X86ISD::ADC, 0),
+ X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0),
+ X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0),
+
+ X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
+ X86_INTRINSIC_DATA(avx512_gather_dps_512, GATHER, X86::VGATHERDPSZrm, 0),
+ X86_INTRINSIC_DATA(avx512_gather_qpd_512, GATHER, X86::VGATHERQPDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_gather_qpi_512, GATHER, X86::VPGATHERQDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_gather_qpq_512, GATHER, X86::VPGATHERQQZrm, 0),
+ X86_INTRINSIC_DATA(avx512_gather_qps_512, GATHER, X86::VGATHERQPSZrm, 0),
+
+ X86_INTRINSIC_DATA(avx512_gatherpf_dpd_512, PREFETCH,
+ X86::VGATHERPF0DPDm, X86::VGATHERPF1DPDm),
+ X86_INTRINSIC_DATA(avx512_gatherpf_dps_512, PREFETCH,
+ X86::VGATHERPF0DPSm, X86::VGATHERPF1DPSm),
+ X86_INTRINSIC_DATA(avx512_gatherpf_qpd_512, PREFETCH,
+ X86::VGATHERPF0QPDm, X86::VGATHERPF1QPDm),
+ X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH,
+ X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm),
+
+ X86_INTRINSIC_DATA(avx512_mask_compress_store_d_128,
+ COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_store_d_256,
+ COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_store_d_512,
+ COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_store_pd_128,
+ COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_store_pd_256,
+ COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_store_pd_512,
+ COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_store_ps_128,
+ COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_store_ps_256,
+ COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_store_ps_512,
+ COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_store_q_128,
+ COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_store_q_256,
+ COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_store_q_512,
+ COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_load_d_128,
+ EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_load_d_256,
+ EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_load_d_512,
+ EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_load_pd_128,
+ EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_load_pd_256,
+ EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_load_pd_512,
+ EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_load_ps_128,
+ EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_load_ps_256,
+ EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_load_ps_512,
+ EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_load_q_128,
+ EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_load_q_256,
+ EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512,
+ EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_dps_512, SCATTER, X86::VSCATTERDPSZmr, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_qpd_512, SCATTER, X86::VSCATTERQPDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_qpi_512, SCATTER, X86::VPSCATTERQDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_qpq_512, SCATTER, X86::VPSCATTERQQZmr, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_qps_512, SCATTER, X86::VSCATTERQPSZmr, 0),
+
+ X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH,
+ X86::VSCATTERPF0DPDm, X86::VSCATTERPF1DPDm),
+ X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH,
+ X86::VSCATTERPF0DPSm, X86::VSCATTERPF1DPSm),
+ X86_INTRINSIC_DATA(avx512_scatterpf_qpd_512, PREFETCH,
+ X86::VSCATTERPF0QPDm, X86::VSCATTERPF1QPDm),
+ X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH,
+ X86::VSCATTERPF0QPSm, X86::VSCATTERPF1QPSm),
+
+ X86_INTRINSIC_DATA(rdpmc, RDPMC, X86ISD::RDPMC_DAG, 0),
+ X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0),
+ X86_INTRINSIC_DATA(rdrand_32, RDRAND, X86ISD::RDRAND, 0),
+ X86_INTRINSIC_DATA(rdrand_64, RDRAND, X86ISD::RDRAND, 0),
+ X86_INTRINSIC_DATA(rdseed_16, RDSEED, X86ISD::RDSEED, 0),
+ X86_INTRINSIC_DATA(rdseed_32, RDSEED, X86ISD::RDSEED, 0),
+ X86_INTRINSIC_DATA(rdseed_64, RDSEED, X86ISD::RDSEED, 0),
+ X86_INTRINSIC_DATA(rdtsc, RDTSC, X86ISD::RDTSC_DAG, 0),
+ X86_INTRINSIC_DATA(rdtscp, RDTSC, X86ISD::RDTSCP_DAG, 0),
+
+ X86_INTRINSIC_DATA(subborrow_u32, ADX, X86ISD::SBB, 0),
+ X86_INTRINSIC_DATA(subborrow_u64, ADX, X86ISD::SBB, 0),
+ X86_INTRINSIC_DATA(xtest, XTEST, X86ISD::XTEST, 0),
+};
+
+/*
+ * Find Intrinsic data by intrinsic ID
+ */
+static const IntrinsicData* getIntrinsicWithChain(unsigned IntNo) {
+
+ IntrinsicData IntrinsicToFind = {IntNo, INTR_NO_TYPE, 0, 0 };
+ const IntrinsicData *Data = std::lower_bound(std::begin(IntrinsicsWithChain),
+ std::end(IntrinsicsWithChain),
+ IntrinsicToFind);
+ if (Data != std::end(IntrinsicsWithChain) && *Data == IntrinsicToFind)
+ return Data;
+ return nullptr;
+}
+
+/*
+ * IntrinsicsWithoutChain - the table should be sorted by Intrinsic ID - in
+ * the alphabetical order.
+ */
+static const IntrinsicData IntrinsicsWithoutChain[] = {
+ X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
+ X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
+ X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+ X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+ X86_INTRINSIC_DATA(avx2_permd, INTR_TYPE_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx2_permps, INTR_TYPE_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
+ X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
+ X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
+ X86_INTRINSIC_DATA(avx2_phsub_w, INTR_TYPE_2OP, X86ISD::HSUB, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxs_b, INTR_TYPE_2OP, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxs_d, INTR_TYPE_2OP, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxs_w, INTR_TYPE_2OP, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxu_b, INTR_TYPE_2OP, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxu_d, INTR_TYPE_2OP, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxu_w, INTR_TYPE_2OP, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmins_b, INTR_TYPE_2OP, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pmins_d, INTR_TYPE_2OP, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pmins_w, INTR_TYPE_2OP, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovsxdq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovsxwd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovsxwq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovzxbd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovzxbq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovzxbw, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovzxdq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovzxwd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovzxwq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmul_dq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
+ X86_INTRINSIC_DATA(avx2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
+ X86_INTRINSIC_DATA(avx2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
+ X86_INTRINSIC_DATA(avx2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
+ X86_INTRINSIC_DATA(avx2_pshuf_b, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
+ X86_INTRINSIC_DATA(avx2_psign_b, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
+ X86_INTRINSIC_DATA(avx2_psign_d, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
+ X86_INTRINSIC_DATA(avx2_psign_w, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
+ X86_INTRINSIC_DATA(avx2_psll_d, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx2_psll_q, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx2_psll_w, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx2_psrai_w, VSHIFT, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx2_psrav_d, INTR_TYPE_2OP, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx2_psrav_d_256, INTR_TYPE_2OP, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx2_psrl_d, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx2_psrl_q, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx2_psrl_w, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
+ X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
+ X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
+ X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::EXP2, 0),
+ X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::EXP2, 0),
+ X86_INTRINSIC_DATA(avx512_mask_blend_b_128, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_blend_b_256, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_blend_b_512, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_blend_d_128, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_blend_d_256, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_blend_d_512, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_blend_pd_128, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_blend_pd_256, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_blend_pd_512, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_blend_ps_128, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_blend_ps_256, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_blend_ps_512, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_blend_q_128, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_blend_q_256, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_blend_q_512, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_blend_w_128, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_blend_w_256, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_blend_w_512, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_b_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_b_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_b_512, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_d_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_d_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_d_512, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_d_128, COMPRESS_EXPAND_IN_REG,
+ X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_d_256, COMPRESS_EXPAND_IN_REG,
+ X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_d_512, COMPRESS_EXPAND_IN_REG,
+ X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_pd_128, COMPRESS_EXPAND_IN_REG,
+ X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_pd_256, COMPRESS_EXPAND_IN_REG,
+ X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_pd_512, COMPRESS_EXPAND_IN_REG,
+ X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_ps_128, COMPRESS_EXPAND_IN_REG,
+ X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_ps_256, COMPRESS_EXPAND_IN_REG,
+ X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_ps_512, COMPRESS_EXPAND_IN_REG,
+ X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_q_128, COMPRESS_EXPAND_IN_REG,
+ X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_q_256, COMPRESS_EXPAND_IN_REG,
+ X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_q_512, COMPRESS_EXPAND_IN_REG,
+ X86ISD::COMPRESS, 0),
+
+ X86_INTRINSIC_DATA(avx512_mask_expand_d_128, COMPRESS_EXPAND_IN_REG,
+ X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_d_256, COMPRESS_EXPAND_IN_REG,
+ X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_d_512, COMPRESS_EXPAND_IN_REG,
+ X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_pd_128, COMPRESS_EXPAND_IN_REG,
+ X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_pd_256, COMPRESS_EXPAND_IN_REG,
+ X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_pd_512, COMPRESS_EXPAND_IN_REG,
+ X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_ps_128, COMPRESS_EXPAND_IN_REG,
+ X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_ps_256, COMPRESS_EXPAND_IN_REG,
+ X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_ps_512, COMPRESS_EXPAND_IN_REG,
+ X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_q_128, COMPRESS_EXPAND_IN_REG,
+ X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_q_256, COMPRESS_EXPAND_IN_REG,
+ X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG,
+ X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_128, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_256, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_512, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_128, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_256, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_512, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_128, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_256, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_512, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_b_128, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_b_256, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_b_512, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_d_128, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_d_256, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_d_512, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_q_128, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_q_256, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_q_512, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pslli_q, VSHIFT_MASK, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psllv_d, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psllv_q, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_d, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_q, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrai_d, VSHIFT_MASK, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrai_q, VSHIFT_MASK, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav_d, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav_q, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_d, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_q, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrli_d, VSHIFT_MASK, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrlv_q, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_b_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_b_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_b_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_d_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_d_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_d_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_q_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_q_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_q_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
+ X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
+ X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RCP28, 0),
+ X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RCP28, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
+ X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
+ X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
+ X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
+ X86_INTRINSIC_DATA(avx_hsub_ps_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
+ X86_INTRINSIC_DATA(avx_max_pd_256, INTR_TYPE_2OP, X86ISD::FMAX, 0),
+ X86_INTRINSIC_DATA(avx_max_ps_256, INTR_TYPE_2OP, X86ISD::FMAX, 0),
+ X86_INTRINSIC_DATA(avx_min_pd_256, INTR_TYPE_2OP, X86ISD::FMIN, 0),
+ X86_INTRINSIC_DATA(avx_min_ps_256, INTR_TYPE_2OP, X86ISD::FMIN, 0),
+ X86_INTRINSIC_DATA(avx_sqrt_pd_256, INTR_TYPE_1OP, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(avx_sqrt_ps_256, INTR_TYPE_1OP, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(avx_vperm2f128_pd_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
+ X86_INTRINSIC_DATA(avx_vperm2f128_ps_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
+ X86_INTRINSIC_DATA(avx_vperm2f128_si_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB , 0),
+ X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(sse2_comieq_sd, COMI, X86ISD::COMI, ISD::SETEQ),
+ X86_INTRINSIC_DATA(sse2_comige_sd, COMI, X86ISD::COMI, ISD::SETGE),
+ X86_INTRINSIC_DATA(sse2_comigt_sd, COMI, X86ISD::COMI, ISD::SETGT),
+ X86_INTRINSIC_DATA(sse2_comile_sd, COMI, X86ISD::COMI, ISD::SETLE),
+ X86_INTRINSIC_DATA(sse2_comilt_sd, COMI, X86ISD::COMI, ISD::SETLT),
+ X86_INTRINSIC_DATA(sse2_comineq_sd, COMI, X86ISD::COMI, ISD::SETNE),
+ X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0),
+ X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0),
+ X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
+ X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
+ X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+ X86_INTRINSIC_DATA(sse2_pmaxs_w, INTR_TYPE_2OP, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(sse2_pmaxu_b, INTR_TYPE_2OP, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(sse2_pmins_w, INTR_TYPE_2OP, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(sse2_pminu_b, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
+ X86_INTRINSIC_DATA(sse2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
+ X86_INTRINSIC_DATA(sse2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
+ X86_INTRINSIC_DATA(sse2_pshuf_d, INTR_TYPE_2OP, X86ISD::PSHUFD, 0),
+ X86_INTRINSIC_DATA(sse2_pshufh_w, INTR_TYPE_2OP, X86ISD::PSHUFHW, 0),
+ X86_INTRINSIC_DATA(sse2_pshufl_w, INTR_TYPE_2OP, X86ISD::PSHUFLW, 0),
+ X86_INTRINSIC_DATA(sse2_psll_d, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(sse2_psll_q, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(sse2_psll_w, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(sse2_pslli_d, VSHIFT, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(sse2_pslli_q, VSHIFT, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(sse2_pslli_w, VSHIFT, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(sse2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(sse2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(sse2_psrai_d, VSHIFT, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(sse2_psrai_w, VSHIFT, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(sse2_psrl_d, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(sse2_psrl_q, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(sse2_psrl_w, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(sse2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(sse2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(sse2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(sse2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
+ X86_INTRINSIC_DATA(sse2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
+ X86_INTRINSIC_DATA(sse2_sqrt_pd, INTR_TYPE_1OP, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(sse2_ucomieq_sd, COMI, X86ISD::UCOMI, ISD::SETEQ),
+ X86_INTRINSIC_DATA(sse2_ucomige_sd, COMI, X86ISD::UCOMI, ISD::SETGE),
+ X86_INTRINSIC_DATA(sse2_ucomigt_sd, COMI, X86ISD::UCOMI, ISD::SETGT),
+ X86_INTRINSIC_DATA(sse2_ucomile_sd, COMI, X86ISD::UCOMI, ISD::SETLE),
+ X86_INTRINSIC_DATA(sse2_ucomilt_sd, COMI, X86ISD::UCOMI, ISD::SETLT),
+ X86_INTRINSIC_DATA(sse2_ucomineq_sd, COMI, X86ISD::UCOMI, ISD::SETNE),
+ X86_INTRINSIC_DATA(sse3_hadd_pd, INTR_TYPE_2OP, X86ISD::FHADD, 0),
+ X86_INTRINSIC_DATA(sse3_hadd_ps, INTR_TYPE_2OP, X86ISD::FHADD, 0),
+ X86_INTRINSIC_DATA(sse3_hsub_pd, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
+ X86_INTRINSIC_DATA(sse3_hsub_ps, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
+ X86_INTRINSIC_DATA(sse41_insertps, INTR_TYPE_3OP, X86ISD::INSERTPS, 0),
+ X86_INTRINSIC_DATA(sse41_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+ X86_INTRINSIC_DATA(sse41_pmaxsb, INTR_TYPE_2OP, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(sse41_pmaxsd, INTR_TYPE_2OP, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(sse41_pmaxud, INTR_TYPE_2OP, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(sse41_pmaxuw, INTR_TYPE_2OP, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(sse41_pminsb, INTR_TYPE_2OP, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(sse41_pminsd, INTR_TYPE_2OP, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(sse41_pminud, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(sse41_pminuw, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(sse41_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(sse41_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(sse41_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(sse41_pmovsxdq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(sse41_pmovsxwd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(sse41_pmovsxwq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(sse41_pmovzxbd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(sse41_pmovzxbq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(sse41_pmovzxbw, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(sse41_pmovzxdq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(sse41_pmovzxwd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(sse41_pmovzxwq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(sse41_pmuldq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
+ X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ),
+ X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE),
+ X86_INTRINSIC_DATA(sse_comigt_ss, COMI, X86ISD::COMI, ISD::SETGT),
+ X86_INTRINSIC_DATA(sse_comile_ss, COMI, X86ISD::COMI, ISD::SETLE),
+ X86_INTRINSIC_DATA(sse_comilt_ss, COMI, X86ISD::COMI, ISD::SETLT),
+ X86_INTRINSIC_DATA(sse_comineq_ss, COMI, X86ISD::COMI, ISD::SETNE),
+ X86_INTRINSIC_DATA(sse_max_ps, INTR_TYPE_2OP, X86ISD::FMAX, 0),
+ X86_INTRINSIC_DATA(sse_min_ps, INTR_TYPE_2OP, X86ISD::FMIN, 0),
+ X86_INTRINSIC_DATA(sse_sqrt_ps, INTR_TYPE_1OP, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(sse_ucomieq_ss, COMI, X86ISD::UCOMI, ISD::SETEQ),
+ X86_INTRINSIC_DATA(sse_ucomige_ss, COMI, X86ISD::UCOMI, ISD::SETGE),
+ X86_INTRINSIC_DATA(sse_ucomigt_ss, COMI, X86ISD::UCOMI, ISD::SETGT),
+ X86_INTRINSIC_DATA(sse_ucomile_ss, COMI, X86ISD::UCOMI, ISD::SETLE),
+ X86_INTRINSIC_DATA(sse_ucomilt_ss, COMI, X86ISD::UCOMI, ISD::SETLT),
+ X86_INTRINSIC_DATA(sse_ucomineq_ss, COMI, X86ISD::UCOMI, ISD::SETNE),
+ X86_INTRINSIC_DATA(ssse3_phadd_d_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
+ X86_INTRINSIC_DATA(ssse3_phadd_w_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
+ X86_INTRINSIC_DATA(ssse3_phsub_d_128, INTR_TYPE_2OP, X86ISD::HSUB, 0),
+ X86_INTRINSIC_DATA(ssse3_phsub_w_128, INTR_TYPE_2OP, X86ISD::HSUB, 0),
+ X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
+ X86_INTRINSIC_DATA(ssse3_psign_b_128, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
+ X86_INTRINSIC_DATA(ssse3_psign_d_128, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
+ X86_INTRINSIC_DATA(ssse3_psign_w_128, INTR_TYPE_2OP, X86ISD::PSIGN, 0)
+};
+
+/*
+ * Retrieve data for Intrinsic without chain.
+ * Return nullptr if intrinsic is not defined in the table.
+ */
+static const IntrinsicData* getIntrinsicWithoutChain(unsigned IntNo) {
+ IntrinsicData IntrinsicToFind = { IntNo, INTR_NO_TYPE, 0, 0 };
+ const IntrinsicData *Data = std::lower_bound(std::begin(IntrinsicsWithoutChain),
+ std::end(IntrinsicsWithoutChain),
+ IntrinsicToFind);
+ if (Data != std::end(IntrinsicsWithoutChain) && *Data == IntrinsicToFind)
+ return Data;
+ return nullptr;
+}
+
+static void verifyIntrinsicTables() {
+ assert(std::is_sorted(std::begin(IntrinsicsWithoutChain),
+ std::end(IntrinsicsWithoutChain)) &&
+ std::is_sorted(std::begin(IntrinsicsWithChain),
+ std::end(IntrinsicsWithChain)) &&
+ "Intrinsic data tables should be sorted by Intrinsic ID");
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
deleted file mode 100644
index a082c4f8b0ef..000000000000
--- a/lib/Target/X86/X86JITInfo.cpp
+++ /dev/null
@@ -1,588 +0,0 @@
-//===-- X86JITInfo.cpp - Implement the JIT interfaces for the X86 target --===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the JIT interfaces for the X86 target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86JITInfo.h"
-#include "X86Relocations.h"
-#include "X86Subtarget.h"
-#include "X86TargetMachine.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Valgrind.h"
-#include <cstdlib>
-#include <cstring>
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-// Determine the platform we're running on
-#if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64)
-# define X86_64_JIT
-#elif defined(__i386__) || defined(i386) || defined(_M_IX86)
-# define X86_32_JIT
-#endif
-
-void X86JITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
- unsigned char *OldByte = (unsigned char *)Old;
- *OldByte++ = 0xE9; // Emit JMP opcode.
- unsigned *OldWord = (unsigned *)OldByte;
- unsigned NewAddr = (intptr_t)New;
- unsigned OldAddr = (intptr_t)OldWord;
- *OldWord = NewAddr - OldAddr - 4; // Emit PC-relative addr of New code.
-
- // X86 doesn't need to invalidate the processor cache, so just invalidate
- // Valgrind's cache directly.
- sys::ValgrindDiscardTranslations(Old, 5);
-}
-
-
-/// JITCompilerFunction - This contains the address of the JIT function used to
-/// compile a function lazily.
-static TargetJITInfo::JITCompilerFn JITCompilerFunction;
-
-// Get the ASMPREFIX for the current host. This is often '_'.
-#ifndef __USER_LABEL_PREFIX__
-#define __USER_LABEL_PREFIX__
-#endif
-#define GETASMPREFIX2(X) #X
-#define GETASMPREFIX(X) GETASMPREFIX2(X)
-#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
-
-// For ELF targets, use a .size and .type directive, to let tools
-// know the extent of functions defined in assembler.
-#if defined(__ELF__)
-# define SIZE(sym) ".size " #sym ", . - " #sym "\n"
-# define TYPE_FUNCTION(sym) ".type " #sym ", @function\n"
-#else
-# define SIZE(sym)
-# define TYPE_FUNCTION(sym)
-#endif
-
-// Provide a convenient way for disabling usage of CFI directives.
-// This is needed for old/broken assemblers (for example, gas on
-// Darwin is pretty old and doesn't support these directives)
-#if defined(__APPLE__)
-# define CFI(x)
-#else
-// FIXME: Disable this until we really want to use it. Also, we will
-// need to add some workarounds for compilers, which support
-// only subset of these directives.
-# define CFI(x)
-#endif
-
-// Provide a wrapper for LLVMX86CompilationCallback2 that saves non-traditional
-// callee saved registers, for the fastcc calling convention.
-extern "C" {
-#if defined(X86_64_JIT)
-# ifndef _MSC_VER
- // No need to save EAX/EDX for X86-64.
- void X86CompilationCallback(void);
- asm(
- ".text\n"
- ".align 8\n"
- ".globl " ASMPREFIX "X86CompilationCallback\n"
- TYPE_FUNCTION(X86CompilationCallback)
- ASMPREFIX "X86CompilationCallback:\n"
- CFI(".cfi_startproc\n")
- // Save RBP
- "pushq %rbp\n"
- CFI(".cfi_def_cfa_offset 16\n")
- CFI(".cfi_offset %rbp, -16\n")
- // Save RSP
- "movq %rsp, %rbp\n"
- CFI(".cfi_def_cfa_register %rbp\n")
- // Save all int arg registers
- "pushq %rdi\n"
- CFI(".cfi_rel_offset %rdi, 0\n")
- "pushq %rsi\n"
- CFI(".cfi_rel_offset %rsi, 8\n")
- "pushq %rdx\n"
- CFI(".cfi_rel_offset %rdx, 16\n")
- "pushq %rcx\n"
- CFI(".cfi_rel_offset %rcx, 24\n")
- "pushq %r8\n"
- CFI(".cfi_rel_offset %r8, 32\n")
- "pushq %r9\n"
- CFI(".cfi_rel_offset %r9, 40\n")
- // Align stack on 16-byte boundary. ESP might not be properly aligned
- // (8 byte) if this is called from an indirect stub.
- "andq $-16, %rsp\n"
- // Save all XMM arg registers
- "subq $128, %rsp\n"
- "movaps %xmm0, (%rsp)\n"
- "movaps %xmm1, 16(%rsp)\n"
- "movaps %xmm2, 32(%rsp)\n"
- "movaps %xmm3, 48(%rsp)\n"
- "movaps %xmm4, 64(%rsp)\n"
- "movaps %xmm5, 80(%rsp)\n"
- "movaps %xmm6, 96(%rsp)\n"
- "movaps %xmm7, 112(%rsp)\n"
- // JIT callee
-#if defined(_WIN64) || defined(__CYGWIN__)
- "subq $32, %rsp\n"
- "movq %rbp, %rcx\n" // Pass prev frame and return address
- "movq 8(%rbp), %rdx\n"
- "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
- "addq $32, %rsp\n"
-#else
- "movq %rbp, %rdi\n" // Pass prev frame and return address
- "movq 8(%rbp), %rsi\n"
- "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
-#endif
- // Restore all XMM arg registers
- "movaps 112(%rsp), %xmm7\n"
- "movaps 96(%rsp), %xmm6\n"
- "movaps 80(%rsp), %xmm5\n"
- "movaps 64(%rsp), %xmm4\n"
- "movaps 48(%rsp), %xmm3\n"
- "movaps 32(%rsp), %xmm2\n"
- "movaps 16(%rsp), %xmm1\n"
- "movaps (%rsp), %xmm0\n"
- // Restore RSP
- "movq %rbp, %rsp\n"
- CFI(".cfi_def_cfa_register %rsp\n")
- // Restore all int arg registers
- "subq $48, %rsp\n"
- CFI(".cfi_adjust_cfa_offset 48\n")
- "popq %r9\n"
- CFI(".cfi_adjust_cfa_offset -8\n")
- CFI(".cfi_restore %r9\n")
- "popq %r8\n"
- CFI(".cfi_adjust_cfa_offset -8\n")
- CFI(".cfi_restore %r8\n")
- "popq %rcx\n"
- CFI(".cfi_adjust_cfa_offset -8\n")
- CFI(".cfi_restore %rcx\n")
- "popq %rdx\n"
- CFI(".cfi_adjust_cfa_offset -8\n")
- CFI(".cfi_restore %rdx\n")
- "popq %rsi\n"
- CFI(".cfi_adjust_cfa_offset -8\n")
- CFI(".cfi_restore %rsi\n")
- "popq %rdi\n"
- CFI(".cfi_adjust_cfa_offset -8\n")
- CFI(".cfi_restore %rdi\n")
- // Restore RBP
- "popq %rbp\n"
- CFI(".cfi_adjust_cfa_offset -8\n")
- CFI(".cfi_restore %rbp\n")
- "ret\n"
- CFI(".cfi_endproc\n")
- SIZE(X86CompilationCallback)
- );
-# else
- // No inline assembler support on this platform. The routine is in external
- // file.
- void X86CompilationCallback();
-
-# endif
-#elif defined (X86_32_JIT)
-# ifndef _MSC_VER
- void X86CompilationCallback(void);
- asm(
- ".text\n"
- ".align 8\n"
- ".globl " ASMPREFIX "X86CompilationCallback\n"
- TYPE_FUNCTION(X86CompilationCallback)
- ASMPREFIX "X86CompilationCallback:\n"
- CFI(".cfi_startproc\n")
- "pushl %ebp\n"
- CFI(".cfi_def_cfa_offset 8\n")
- CFI(".cfi_offset %ebp, -8\n")
- "movl %esp, %ebp\n" // Standard prologue
- CFI(".cfi_def_cfa_register %ebp\n")
- "pushl %eax\n"
- CFI(".cfi_rel_offset %eax, 0\n")
- "pushl %edx\n" // Save EAX/EDX/ECX
- CFI(".cfi_rel_offset %edx, 4\n")
- "pushl %ecx\n"
- CFI(".cfi_rel_offset %ecx, 8\n")
-# if defined(__APPLE__)
- "andl $-16, %esp\n" // Align ESP on 16-byte boundary
-# endif
- "subl $16, %esp\n"
- "movl 4(%ebp), %eax\n" // Pass prev frame and return address
- "movl %eax, 4(%esp)\n"
- "movl %ebp, (%esp)\n"
- "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
- "movl %ebp, %esp\n" // Restore ESP
- CFI(".cfi_def_cfa_register %esp\n")
- "subl $12, %esp\n"
- CFI(".cfi_adjust_cfa_offset 12\n")
- "popl %ecx\n"
- CFI(".cfi_adjust_cfa_offset -4\n")
- CFI(".cfi_restore %ecx\n")
- "popl %edx\n"
- CFI(".cfi_adjust_cfa_offset -4\n")
- CFI(".cfi_restore %edx\n")
- "popl %eax\n"
- CFI(".cfi_adjust_cfa_offset -4\n")
- CFI(".cfi_restore %eax\n")
- "popl %ebp\n"
- CFI(".cfi_adjust_cfa_offset -4\n")
- CFI(".cfi_restore %ebp\n")
- "ret\n"
- CFI(".cfi_endproc\n")
- SIZE(X86CompilationCallback)
- );
-
- // Same as X86CompilationCallback but also saves XMM argument registers.
- void X86CompilationCallback_SSE(void);
- asm(
- ".text\n"
- ".align 8\n"
- ".globl " ASMPREFIX "X86CompilationCallback_SSE\n"
- TYPE_FUNCTION(X86CompilationCallback_SSE)
- ASMPREFIX "X86CompilationCallback_SSE:\n"
- CFI(".cfi_startproc\n")
- "pushl %ebp\n"
- CFI(".cfi_def_cfa_offset 8\n")
- CFI(".cfi_offset %ebp, -8\n")
- "movl %esp, %ebp\n" // Standard prologue
- CFI(".cfi_def_cfa_register %ebp\n")
- "pushl %eax\n"
- CFI(".cfi_rel_offset %eax, 0\n")
- "pushl %edx\n" // Save EAX/EDX/ECX
- CFI(".cfi_rel_offset %edx, 4\n")
- "pushl %ecx\n"
- CFI(".cfi_rel_offset %ecx, 8\n")
- "andl $-16, %esp\n" // Align ESP on 16-byte boundary
- // Save all XMM arg registers
- "subl $64, %esp\n"
- // FIXME: provide frame move information for xmm registers.
- // This can be tricky, because CFA register is ebp (unaligned)
- // and we need to produce offsets relative to it.
- "movaps %xmm0, (%esp)\n"
- "movaps %xmm1, 16(%esp)\n"
- "movaps %xmm2, 32(%esp)\n"
- "movaps %xmm3, 48(%esp)\n"
- "subl $16, %esp\n"
- "movl 4(%ebp), %eax\n" // Pass prev frame and return address
- "movl %eax, 4(%esp)\n"
- "movl %ebp, (%esp)\n"
- "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
- "addl $16, %esp\n"
- "movaps 48(%esp), %xmm3\n"
- CFI(".cfi_restore %xmm3\n")
- "movaps 32(%esp), %xmm2\n"
- CFI(".cfi_restore %xmm2\n")
- "movaps 16(%esp), %xmm1\n"
- CFI(".cfi_restore %xmm1\n")
- "movaps (%esp), %xmm0\n"
- CFI(".cfi_restore %xmm0\n")
- "movl %ebp, %esp\n" // Restore ESP
- CFI(".cfi_def_cfa_register esp\n")
- "subl $12, %esp\n"
- CFI(".cfi_adjust_cfa_offset 12\n")
- "popl %ecx\n"
- CFI(".cfi_adjust_cfa_offset -4\n")
- CFI(".cfi_restore %ecx\n")
- "popl %edx\n"
- CFI(".cfi_adjust_cfa_offset -4\n")
- CFI(".cfi_restore %edx\n")
- "popl %eax\n"
- CFI(".cfi_adjust_cfa_offset -4\n")
- CFI(".cfi_restore %eax\n")
- "popl %ebp\n"
- CFI(".cfi_adjust_cfa_offset -4\n")
- CFI(".cfi_restore %ebp\n")
- "ret\n"
- CFI(".cfi_endproc\n")
- SIZE(X86CompilationCallback_SSE)
- );
-# else
- void LLVMX86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);
-
- _declspec(naked) void X86CompilationCallback(void) {
- __asm {
- push ebp
- mov ebp, esp
- push eax
- push edx
- push ecx
- and esp, -16
- sub esp, 16
- mov eax, dword ptr [ebp+4]
- mov dword ptr [esp+4], eax
- mov dword ptr [esp], ebp
- call LLVMX86CompilationCallback2
- mov esp, ebp
- sub esp, 12
- pop ecx
- pop edx
- pop eax
- pop ebp
- ret
- }
- }
-
-# endif // _MSC_VER
-
-#else // Not an i386 host
- void X86CompilationCallback() {
- llvm_unreachable("Cannot call X86CompilationCallback() on a non-x86 arch!");
- }
-#endif
-}
-
-/// This is the target-specific function invoked by the
-/// function stub when we did not know the real target of a call. This function
-/// must locate the start of the stub or call site and pass it into the JIT
-/// compiler function.
-extern "C" {
-LLVM_ATTRIBUTE_USED // Referenced from inline asm.
-LLVM_LIBRARY_VISIBILITY void LLVMX86CompilationCallback2(intptr_t *StackPtr,
- intptr_t RetAddr) {
- intptr_t *RetAddrLoc = &StackPtr[1];
- // We are reading raw stack data here. Tell MemorySanitizer that it is
- // sufficiently initialized.
- __msan_unpoison(RetAddrLoc, sizeof(*RetAddrLoc));
- assert(*RetAddrLoc == RetAddr &&
- "Could not find return address on the stack!");
-
- // It's a stub if there is an interrupt marker after the call.
- bool isStub = ((unsigned char*)RetAddr)[0] == 0xCE;
-
- // The call instruction should have pushed the return value onto the stack...
-#if defined (X86_64_JIT)
- RetAddr--; // Backtrack to the reference itself...
-#else
- RetAddr -= 4; // Backtrack to the reference itself...
-#endif
-
-#if 0
- DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr
- << " ESP=" << (void*)StackPtr
- << ": Resolving call to function: "
- << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n");
-#endif
-
- // Sanity check to make sure this really is a call instruction.
-#if defined (X86_64_JIT)
- assert(((unsigned char*)RetAddr)[-2] == 0x41 &&"Not a call instr!");
- assert(((unsigned char*)RetAddr)[-1] == 0xFF &&"Not a call instr!");
-#else
- assert(((unsigned char*)RetAddr)[-1] == 0xE8 &&"Not a call instr!");
-#endif
-
- intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr);
-
- // Rewrite the call target... so that we don't end up here every time we
- // execute the call.
-#if defined (X86_64_JIT)
- assert(isStub &&
- "X86-64 doesn't support rewriting non-stub lazy compilation calls:"
- " the call instruction varies too much.");
-#else
- *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4);
-#endif
-
- if (isStub) {
- // If this is a stub, rewrite the call into an unconditional branch
- // instruction so that two return addresses are not pushed onto the stack
- // when the requested function finally gets called. This also makes the
- // 0xCE byte (interrupt) dead, so the marker doesn't effect anything.
-#if defined (X86_64_JIT)
- // If the target address is within 32-bit range of the stub, use a
- // PC-relative branch instead of loading the actual address. (This is
- // considerably shorter than the 64-bit immediate load already there.)
- // We assume here intptr_t is 64 bits.
- intptr_t diff = NewVal-RetAddr+7;
- if (diff >= -2147483648LL && diff <= 2147483647LL) {
- *(unsigned char*)(RetAddr-0xc) = 0xE9;
- *(intptr_t *)(RetAddr-0xb) = diff & 0xffffffff;
- } else {
- *(intptr_t *)(RetAddr - 0xa) = NewVal;
- ((unsigned char*)RetAddr)[0] = (2 | (4 << 3) | (3 << 6));
- }
- sys::ValgrindDiscardTranslations((void*)(RetAddr-0xc), 0xd);
-#else
- ((unsigned char*)RetAddr)[-1] = 0xE9;
- sys::ValgrindDiscardTranslations((void*)(RetAddr-1), 5);
-#endif
- }
-
- // Change the return address to reexecute the call instruction...
-#if defined (X86_64_JIT)
- *RetAddrLoc -= 0xd;
-#else
- *RetAddrLoc -= 5;
-#endif
-}
-}
-
-TargetJITInfo::LazyResolverFn
-X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
- TsanIgnoreWritesBegin();
- JITCompilerFunction = F;
- TsanIgnoreWritesEnd();
-
-#if defined (X86_32_JIT) && !defined (_MSC_VER)
-#if defined(__SSE__)
- // SSE Callback should be called for SSE-enabled LLVM.
- return X86CompilationCallback_SSE;
-#else
- if (useSSE)
- return X86CompilationCallback_SSE;
-#endif
-#endif
-
- return X86CompilationCallback;
-}
-
-X86JITInfo::X86JITInfo(bool UseSSE) {
- useSSE = UseSSE;
- useGOT = 0;
- TLSOffset = nullptr;
-}
-
-void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
- JITCodeEmitter &JCE) {
-#if defined (X86_64_JIT)
- const unsigned Alignment = 8;
- uint8_t Buffer[8];
- uint8_t *Cur = Buffer;
- MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(intptr_t)ptr);
- MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(((intptr_t)ptr) >> 32));
-#else
- const unsigned Alignment = 4;
- uint8_t Buffer[4];
- uint8_t *Cur = Buffer;
- MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)ptr);
-#endif
- return JCE.allocIndirectGV(GV, Buffer, sizeof(Buffer), Alignment);
-}
-
-TargetJITInfo::StubLayout X86JITInfo::getStubLayout() {
- // The 64-bit stub contains:
- // movabs r10 <- 8-byte-target-address # 10 bytes
- // call|jmp *r10 # 3 bytes
- // The 32-bit stub contains a 5-byte call|jmp.
- // If the stub is a call to the compilation callback, an extra byte is added
- // to mark it as a stub.
- StubLayout Result = {14, 4};
- return Result;
-}
-
-void *X86JITInfo::emitFunctionStub(const Function* F, void *Target,
- JITCodeEmitter &JCE) {
- // Note, we cast to intptr_t here to silence a -pedantic warning that
- // complains about casting a function pointer to a normal pointer.
-#if defined (X86_32_JIT) && !defined (_MSC_VER)
- bool NotCC = (Target != (void*)(intptr_t)X86CompilationCallback &&
- Target != (void*)(intptr_t)X86CompilationCallback_SSE);
-#else
- bool NotCC = Target != (void*)(intptr_t)X86CompilationCallback;
-#endif
- JCE.emitAlignment(4);
- void *Result = (void*)JCE.getCurrentPCValue();
- if (NotCC) {
-#if defined (X86_64_JIT)
- JCE.emitByte(0x49); // REX prefix
- JCE.emitByte(0xB8+2); // movabsq r10
- JCE.emitWordLE((unsigned)(intptr_t)Target);
- JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32));
- JCE.emitByte(0x41); // REX prefix
- JCE.emitByte(0xFF); // jmpq *r10
- JCE.emitByte(2 | (4 << 3) | (3 << 6));
-#else
- JCE.emitByte(0xE9);
- JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4);
-#endif
- return Result;
- }
-
-#if defined (X86_64_JIT)
- JCE.emitByte(0x49); // REX prefix
- JCE.emitByte(0xB8+2); // movabsq r10
- JCE.emitWordLE((unsigned)(intptr_t)Target);
- JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32));
- JCE.emitByte(0x41); // REX prefix
- JCE.emitByte(0xFF); // callq *r10
- JCE.emitByte(2 | (2 << 3) | (3 << 6));
-#else
- JCE.emitByte(0xE8); // Call with 32 bit pc-rel destination...
-
- JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4);
-#endif
-
- // This used to use 0xCD, but that value is used by JITMemoryManager to
- // initialize the buffer with garbage, which means it may follow a
- // noreturn function call, confusing LLVMX86CompilationCallback2. PR 4929.
- JCE.emitByte(0xCE); // Interrupt - Just a marker identifying the stub!
- return Result;
-}
-
-/// getPICJumpTableEntry - Returns the value of the jumptable entry for the
-/// specific basic block.
-uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) {
-#if defined(X86_64_JIT)
- return BB - Entry;
-#else
- return BB - PICBase;
-#endif
-}
-
-template<typename T> static void addUnaligned(void *Pos, T Delta) {
- T Value;
- std::memcpy(reinterpret_cast<char*>(&Value), reinterpret_cast<char*>(Pos),
- sizeof(T));
- Value += Delta;
- std::memcpy(reinterpret_cast<char*>(Pos), reinterpret_cast<char*>(&Value),
- sizeof(T));
-}
-
-/// relocate - Before the JIT can run a block of code that has been emitted,
-/// it must rewrite the code to contain the actual addresses of any
-/// referenced global symbols.
-void X86JITInfo::relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase) {
- for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
- void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
- intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
- switch ((X86::RelocationType)MR->getRelocationType()) {
- case X86::reloc_pcrel_word: {
- // PC relative relocation, add the relocated value to the value already in
- // memory, after we adjust it for where the PC is.
- ResultPtr = ResultPtr -(intptr_t)RelocPos - 4 - MR->getConstantVal();
- addUnaligned<unsigned>(RelocPos, ResultPtr);
- break;
- }
- case X86::reloc_picrel_word: {
- // PIC base relative relocation, add the relocated value to the value
- // already in memory, after we adjust it for where the PIC base is.
- ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal());
- addUnaligned<unsigned>(RelocPos, ResultPtr);
- break;
- }
- case X86::reloc_absolute_word:
- case X86::reloc_absolute_word_sext:
- // Absolute relocation, just add the relocated value to the value already
- // in memory.
- addUnaligned<unsigned>(RelocPos, ResultPtr);
- break;
- case X86::reloc_absolute_dword:
- addUnaligned<intptr_t>(RelocPos, ResultPtr);
- break;
- }
- }
-}
-
-char* X86JITInfo::allocateThreadLocalMemory(size_t size) {
-#if defined(X86_32_JIT) && !defined(__APPLE__) && !defined(_MSC_VER)
- TLSOffset -= size;
- return TLSOffset;
-#else
- llvm_unreachable("Cannot allocate thread local storage on this arch!");
-#endif
-}
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
deleted file mode 100644
index 564343ffa3fa..000000000000
--- a/lib/Target/X86/X86JITInfo.h
+++ /dev/null
@@ -1,79 +0,0 @@
-//===-- X86JITInfo.h - X86 implementation of the JIT interface --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the X86 implementation of the TargetJITInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86JITINFO_H
-#define X86JITINFO_H
-
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Target/TargetJITInfo.h"
-
-namespace llvm {
- class X86Subtarget;
-
- class X86JITInfo : public TargetJITInfo {
- uintptr_t PICBase;
- char *TLSOffset;
- bool useSSE;
- public:
- explicit X86JITInfo(bool UseSSE);
-
- /// replaceMachineCodeForFunction - Make it so that calling the function
- /// whose machine code is at OLD turns into a call to NEW, perhaps by
- /// overwriting OLD with a branch to NEW. This is used for self-modifying
- /// code.
- ///
- void replaceMachineCodeForFunction(void *Old, void *New) override;
-
- /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
- /// to emit an indirect symbol which contains the address of the specified
- /// ptr.
- void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
- JITCodeEmitter &JCE) override;
-
- // getStubLayout - Returns the size and alignment of the largest call stub
- // on X86.
- StubLayout getStubLayout() override;
-
- /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
- /// small native function that simply calls the function at the specified
- /// address.
- void *emitFunctionStub(const Function* F, void *Target,
- JITCodeEmitter &JCE) override;
-
- /// getPICJumpTableEntry - Returns the value of the jumptable entry for the
- /// specific basic block.
- uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase) override;
-
- /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
- LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
-
- /// relocate - Before the JIT can run a block of code that has been emitted,
- /// it must rewrite the code to contain the actual addresses of any
- /// referenced global symbols.
- void relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase) override;
-
- /// allocateThreadLocalMemory - Each target has its own way of
- /// handling thread local variables. This method returns a value only
- /// meaningful to the target.
- char* allocateThreadLocalMemory(size_t size) override;
-
- /// setPICBase / getPICBase - Getter / setter of PICBase, used to compute
- /// PIC jumptable entry.
- void setPICBase(uintptr_t Base) { PICBase = Base; }
- uintptr_t getPICBase() const { return PICBase; }
- };
-}
-
-#endif
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 2bd70a96c432..63ccded5ac9f 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -16,20 +16,25 @@
#include "X86RegisterInfo.h"
#include "InstPrinter/X86ATTInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
+#include "Utils/X86ShuffleDecode.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
namespace {
@@ -58,6 +63,53 @@ private:
} // end anonymous namespace
+// Emit a minimal sequence of nops spanning NumBytes bytes.
+static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
+ const MCSubtargetInfo &STI);
+
+namespace llvm {
+ X86AsmPrinter::StackMapShadowTracker::StackMapShadowTracker(TargetMachine &TM)
+ : TM(TM), InShadow(false), RequiredShadowSize(0), CurrentShadowSize(0) {}
+
+ X86AsmPrinter::StackMapShadowTracker::~StackMapShadowTracker() {}
+
+ void
+ X86AsmPrinter::StackMapShadowTracker::startFunction(MachineFunction &MF) {
+ CodeEmitter.reset(TM.getTarget().createMCCodeEmitter(
+ *TM.getSubtargetImpl()->getInstrInfo(),
+ *TM.getSubtargetImpl()->getRegisterInfo(), *TM.getSubtargetImpl(),
+ MF.getContext()));
+ }
+
+ void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
+ const MCSubtargetInfo &STI) {
+ if (InShadow) {
+ SmallString<256> Code;
+ SmallVector<MCFixup, 4> Fixups;
+ raw_svector_ostream VecOS(Code);
+ CodeEmitter->EncodeInstruction(Inst, VecOS, Fixups, STI);
+ VecOS.flush();
+ CurrentShadowSize += Code.size();
+ if (CurrentShadowSize >= RequiredShadowSize)
+ InShadow = false; // The shadow is big enough. Stop counting.
+ }
+ }
+
+ void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
+ MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
+ if (InShadow && CurrentShadowSize < RequiredShadowSize) {
+ InShadow = false;
+ EmitNops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
+ TM.getSubtarget<X86Subtarget>().is64Bit(), STI);
+ }
+ }
+
+ void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
+ OutStreamer.EmitInstruction(Inst, getSubtargetInfo());
+ SMShadowTracker.count(Inst, getSubtargetInfo());
+ }
+} // end llvm namespace
+
X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
X86AsmPrinter &asmprinter)
: Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()),
@@ -72,7 +124,7 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
/// operand to an MCSymbol.
MCSymbol *X86MCInstLower::
GetSymbolFromOperand(const MachineOperand &MO) const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference");
SmallString<128> Name;
@@ -212,7 +264,8 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
Expr = MCBinaryExpr::CreateSub(Expr,
MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx),
Ctx);
- if (MO.isJTI() && MAI.hasSetDirective()) {
+ if (MO.isJTI()) {
+ assert(MAI.doesSetDirectiveSuppressesReloc());
// If .set directive is supported, use it to reduce the number of
// relocations the assembler will generate for differences between
// local labels. This is only safe when the symbols are in the same
@@ -337,9 +390,8 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
Inst.addOperand(Seg);
}
-static unsigned getRetOpcode(const X86Subtarget &Subtarget)
-{
- return Subtarget.is64Bit() ? X86::RETQ : X86::RETL;
+static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
+ return Subtarget.is64Bit() ? X86::RETQ : X86::RETL;
}
void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
@@ -493,6 +545,24 @@ ReSimplify:
break;
}
+ case X86::DEC16r:
+ case X86::DEC32r:
+ case X86::INC16r:
+ case X86::INC32r:
+ // If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions.
+ if (!AsmPrinter.getSubtarget().is64Bit()) {
+ unsigned Opcode;
+ switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::DEC16r: Opcode = X86::DEC16r_alt; break;
+ case X86::DEC32r: Opcode = X86::DEC32r_alt; break;
+ case X86::INC16r: Opcode = X86::INC16r_alt; break;
+ case X86::INC32r: Opcode = X86::INC32r_alt; break;
+ }
+ OutMI.setOpcode(Opcode);
+ }
+ break;
+
// These are pseudo-ops for OR to help with the OR->ADD transformation. We do
// this with an ugly goto in case the resultant OR uses EAX and needs the
// short form.
@@ -506,39 +576,41 @@ ReSimplify:
case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
- // The assembler backend wants to see branches in their small form and relax
- // them to their large form. The JIT can only handle the large form because
- // it does not do relaxation. For now, translate the large form to the
- // small one here.
- case X86::JMP_4: OutMI.setOpcode(X86::JMP_1); break;
- case X86::JO_4: OutMI.setOpcode(X86::JO_1); break;
- case X86::JNO_4: OutMI.setOpcode(X86::JNO_1); break;
- case X86::JB_4: OutMI.setOpcode(X86::JB_1); break;
- case X86::JAE_4: OutMI.setOpcode(X86::JAE_1); break;
- case X86::JE_4: OutMI.setOpcode(X86::JE_1); break;
- case X86::JNE_4: OutMI.setOpcode(X86::JNE_1); break;
- case X86::JBE_4: OutMI.setOpcode(X86::JBE_1); break;
- case X86::JA_4: OutMI.setOpcode(X86::JA_1); break;
- case X86::JS_4: OutMI.setOpcode(X86::JS_1); break;
- case X86::JNS_4: OutMI.setOpcode(X86::JNS_1); break;
- case X86::JP_4: OutMI.setOpcode(X86::JP_1); break;
- case X86::JNP_4: OutMI.setOpcode(X86::JNP_1); break;
- case X86::JL_4: OutMI.setOpcode(X86::JL_1); break;
- case X86::JGE_4: OutMI.setOpcode(X86::JGE_1); break;
- case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break;
- case X86::JG_4: OutMI.setOpcode(X86::JG_1); break;
-
// Atomic load and store require a separate pseudo-inst because Acquire
// implies mayStore and Release implies mayLoad; fix these to regular MOV
// instructions here
- case X86::ACQUIRE_MOV8rm: OutMI.setOpcode(X86::MOV8rm); goto ReSimplify;
- case X86::ACQUIRE_MOV16rm: OutMI.setOpcode(X86::MOV16rm); goto ReSimplify;
- case X86::ACQUIRE_MOV32rm: OutMI.setOpcode(X86::MOV32rm); goto ReSimplify;
- case X86::ACQUIRE_MOV64rm: OutMI.setOpcode(X86::MOV64rm); goto ReSimplify;
- case X86::RELEASE_MOV8mr: OutMI.setOpcode(X86::MOV8mr); goto ReSimplify;
- case X86::RELEASE_MOV16mr: OutMI.setOpcode(X86::MOV16mr); goto ReSimplify;
- case X86::RELEASE_MOV32mr: OutMI.setOpcode(X86::MOV32mr); goto ReSimplify;
- case X86::RELEASE_MOV64mr: OutMI.setOpcode(X86::MOV64mr); goto ReSimplify;
+ case X86::ACQUIRE_MOV8rm: OutMI.setOpcode(X86::MOV8rm); goto ReSimplify;
+ case X86::ACQUIRE_MOV16rm: OutMI.setOpcode(X86::MOV16rm); goto ReSimplify;
+ case X86::ACQUIRE_MOV32rm: OutMI.setOpcode(X86::MOV32rm); goto ReSimplify;
+ case X86::ACQUIRE_MOV64rm: OutMI.setOpcode(X86::MOV64rm); goto ReSimplify;
+ case X86::RELEASE_MOV8mr: OutMI.setOpcode(X86::MOV8mr); goto ReSimplify;
+ case X86::RELEASE_MOV16mr: OutMI.setOpcode(X86::MOV16mr); goto ReSimplify;
+ case X86::RELEASE_MOV32mr: OutMI.setOpcode(X86::MOV32mr); goto ReSimplify;
+ case X86::RELEASE_MOV64mr: OutMI.setOpcode(X86::MOV64mr); goto ReSimplify;
+ case X86::RELEASE_MOV8mi: OutMI.setOpcode(X86::MOV8mi); goto ReSimplify;
+ case X86::RELEASE_MOV16mi: OutMI.setOpcode(X86::MOV16mi); goto ReSimplify;
+ case X86::RELEASE_MOV32mi: OutMI.setOpcode(X86::MOV32mi); goto ReSimplify;
+ case X86::RELEASE_MOV64mi32: OutMI.setOpcode(X86::MOV64mi32); goto ReSimplify;
+ case X86::RELEASE_ADD8mi: OutMI.setOpcode(X86::ADD8mi); goto ReSimplify;
+ case X86::RELEASE_ADD32mi: OutMI.setOpcode(X86::ADD32mi); goto ReSimplify;
+ case X86::RELEASE_ADD64mi32: OutMI.setOpcode(X86::ADD64mi32); goto ReSimplify;
+ case X86::RELEASE_AND8mi: OutMI.setOpcode(X86::AND8mi); goto ReSimplify;
+ case X86::RELEASE_AND32mi: OutMI.setOpcode(X86::AND32mi); goto ReSimplify;
+ case X86::RELEASE_AND64mi32: OutMI.setOpcode(X86::AND64mi32); goto ReSimplify;
+ case X86::RELEASE_OR8mi: OutMI.setOpcode(X86::OR8mi); goto ReSimplify;
+ case X86::RELEASE_OR32mi: OutMI.setOpcode(X86::OR32mi); goto ReSimplify;
+ case X86::RELEASE_OR64mi32: OutMI.setOpcode(X86::OR64mi32); goto ReSimplify;
+ case X86::RELEASE_XOR8mi: OutMI.setOpcode(X86::XOR8mi); goto ReSimplify;
+ case X86::RELEASE_XOR32mi: OutMI.setOpcode(X86::XOR32mi); goto ReSimplify;
+ case X86::RELEASE_XOR64mi32: OutMI.setOpcode(X86::XOR64mi32); goto ReSimplify;
+ case X86::RELEASE_INC8m: OutMI.setOpcode(X86::INC8m); goto ReSimplify;
+ case X86::RELEASE_INC16m: OutMI.setOpcode(X86::INC16m); goto ReSimplify;
+ case X86::RELEASE_INC32m: OutMI.setOpcode(X86::INC32m); goto ReSimplify;
+ case X86::RELEASE_INC64m: OutMI.setOpcode(X86::INC64m); goto ReSimplify;
+ case X86::RELEASE_DEC8m: OutMI.setOpcode(X86::DEC8m); goto ReSimplify;
+ case X86::RELEASE_DEC16m: OutMI.setOpcode(X86::DEC16m); goto ReSimplify;
+ case X86::RELEASE_DEC32m: OutMI.setOpcode(X86::DEC32m); goto ReSimplify;
+ case X86::RELEASE_DEC64m: OutMI.setOpcode(X86::DEC64m); goto ReSimplify;
// We don't currently select the correct instruction form for instructions
// which have a short %eax, etc. form. Handle this by custom lowering, for
@@ -548,13 +620,13 @@ ReSimplify:
// MOV64ao8, MOV64o8a
// XCHG16ar, XCHG32ar, XCHG64ar
case X86::MOV8mr_NOREX:
- case X86::MOV8mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8ao8); break;
+ case X86::MOV8mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8o32a); break;
case X86::MOV8rm_NOREX:
- case X86::MOV8rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8o8a); break;
- case X86::MOV16mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16ao16); break;
- case X86::MOV16rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16o16a); break;
- case X86::MOV32mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32ao32); break;
- case X86::MOV32rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32o32a); break;
+ case X86::MOV8rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8ao32); break;
+ case X86::MOV16mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16o32a); break;
+ case X86::MOV16rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16ao32); break;
+ case X86::MOV32mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32o32a); break;
+ case X86::MOV32rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32ao32); break;
case X86::ADC8ri: SimplifyShortImmForm(OutMI, X86::ADC8i8); break;
case X86::ADC16ri: SimplifyShortImmForm(OutMI, X86::ADC16i16); break;
@@ -602,10 +674,8 @@ ReSimplify:
}
}
-static void LowerTlsAddr(MCStreamer &OutStreamer,
- X86MCInstLower &MCInstLowering,
- const MachineInstr &MI,
- const MCSubtargetInfo& STI) {
+void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
+ const MachineInstr &MI) {
bool is64Bits = MI.getOpcode() == X86::TLS_addr64 ||
MI.getOpcode() == X86::TLS_base_addr64;
@@ -615,7 +685,7 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
MCContext &context = OutStreamer.getContext();
if (needsPadding)
- OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX), STI);
+ EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
MCSymbolRefExpr::VariantKind SRVK;
switch (MI.getOpcode()) {
@@ -662,12 +732,12 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp
LEA.addOperand(MCOperand::CreateReg(0)); // seg
}
- OutStreamer.EmitInstruction(LEA, STI);
+ EmitAndCountInstruction(LEA);
if (needsPadding) {
- OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX), STI);
- OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX), STI);
- OutStreamer.EmitInstruction(MCInstBuilder(X86::REX64_PREFIX), STI);
+ EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+ EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+ EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
}
StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
@@ -677,9 +747,9 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
MCSymbolRefExpr::VK_PLT,
context);
- OutStreamer.EmitInstruction(MCInstBuilder(is64Bits ? X86::CALL64pcrel32
- : X86::CALLpcrel32)
- .addExpr(tlsRef), STI);
+ EmitAndCountInstruction(MCInstBuilder(is64Bits ? X86::CALL64pcrel32
+ : X86::CALLpcrel32)
+ .addExpr(tlsRef));
}
/// \brief Emit the optimal amount of multi-byte nops on X86.
@@ -725,33 +795,82 @@ static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, const MCSu
break;
case X86::NOOPL:
case X86::NOOPW:
- OS.EmitInstruction(MCInstBuilder(Opc).addReg(BaseReg).addImm(ScaleVal)
- .addReg(IndexReg)
- .addImm(Displacement)
- .addReg(SegmentReg), STI);
+ OS.EmitInstruction(MCInstBuilder(Opc).addReg(BaseReg)
+ .addImm(ScaleVal).addReg(IndexReg)
+ .addImm(Displacement).addReg(SegmentReg), STI);
break;
}
} // while (NumBytes)
}
+static void LowerSTATEPOINT(MCStreamer &OS, StackMaps &SM,
+ const MachineInstr &MI, bool Is64Bit,
+ const TargetMachine& TM,
+ const MCSubtargetInfo& STI,
+ X86MCInstLower &MCInstLowering) {
+ assert(Is64Bit && "Statepoint currently only supports X86-64");
+
+ // Lower call target and choose correct opcode
+ const MachineOperand &call_target = StatepointOpers(&MI).getCallTarget();
+ MCOperand call_target_mcop;
+ unsigned call_opcode;
+ switch (call_target.getType()) {
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ call_target_mcop = MCInstLowering.LowerSymbolOperand(
+ call_target,
+ MCInstLowering.GetSymbolFromOperand(call_target));
+ call_opcode = X86::CALL64pcrel32;
+ // Currently, we only support relative addressing with statepoints.
+ // Otherwise, we'll need a scratch register to hold the target
+ // address. You'll fail asserts during load & relocation if this
+ // symbol is to far away. (TODO: support non-relative addressing)
+ break;
+ case MachineOperand::MO_Immediate:
+ call_target_mcop = MCOperand::CreateImm(call_target.getImm());
+ call_opcode = X86::CALL64pcrel32;
+ // Currently, we only support relative addressing with statepoints.
+ // Otherwise, we'll need a scratch register to hold the target
+ // immediate. You'll fail asserts during load & relocation if this
+ // address is to far away. (TODO: support non-relative addressing)
+ break;
+ case MachineOperand::MO_Register:
+ call_target_mcop = MCOperand::CreateReg(call_target.getReg());
+ call_opcode = X86::CALL64r;
+ break;
+ default:
+ llvm_unreachable("Unsupported operand type in statepoint call target");
+ break;
+ }
+
+ // Emit call
+ MCInst call_inst;
+ call_inst.setOpcode(call_opcode);
+ call_inst.addOperand(call_target_mcop);
+ OS.EmitInstruction(call_inst, STI);
+
+ // Record our statepoint node in the same section used by STACKMAP
+ // and PATCHPOINT
+ SM.recordStatepoint(MI);
+}
+
+
// Lower a stackmap of the form:
// <id>, <shadowBytes>, ...
-static void LowerSTACKMAP(MCStreamer &OS, StackMaps &SM,
- const MachineInstr &MI, bool Is64Bit, const MCSubtargetInfo& STI) {
- unsigned NumBytes = MI.getOperand(1).getImm();
+void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
+ SMShadowTracker.emitShadowPadding(OutStreamer, getSubtargetInfo());
SM.recordStackMap(MI);
- // Emit padding.
- // FIXME: These nops ensure that the stackmap's shadow is covered by
- // instructions from the same basic block, but the nops should not be
- // necessary if instructions from the same block follow the stackmap.
- EmitNops(OS, NumBytes, Is64Bit, STI);
+ unsigned NumShadowBytes = MI.getOperand(1).getImm();
+ SMShadowTracker.reset(NumShadowBytes);
}
// Lower a patchpoint of the form:
// [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
-static void LowerPATCHPOINT(MCStreamer &OS, StackMaps &SM,
- const MachineInstr &MI, bool Is64Bit, const MCSubtargetInfo& STI) {
- assert(Is64Bit && "Patchpoint currently only supports X86-64");
+void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI) {
+ assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
+
+ SMShadowTracker.emitShadowPadding(OutStreamer, getSubtargetInfo());
+
SM.recordPatchPoint(MI);
PatchPointOpers opers(&MI);
@@ -766,22 +885,111 @@ static void LowerPATCHPOINT(MCStreamer &OS, StackMaps &SM,
EncodedBytes = 13;
else
EncodedBytes = 12;
- OS.EmitInstruction(MCInstBuilder(X86::MOV64ri).addReg(ScratchReg)
- .addImm(CallTarget), STI);
- OS.EmitInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg), STI);
+ EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri).addReg(ScratchReg)
+ .addImm(CallTarget));
+ EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
}
// Emit padding.
unsigned NumBytes = opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
assert(NumBytes >= EncodedBytes &&
"Patchpoint can't request size less than the length of a call.");
- EmitNops(OS, NumBytes - EncodedBytes, Is64Bit, STI);
+ EmitNops(OutStreamer, NumBytes - EncodedBytes, Subtarget->is64Bit(),
+ getSubtargetInfo());
+}
+
+// Returns instruction preceding MBBI in MachineFunction.
+// If MBBI is the first instruction of the first basic block, returns null.
+static MachineBasicBlock::const_iterator
+PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
+ const MachineBasicBlock *MBB = MBBI->getParent();
+ while (MBBI == MBB->begin()) {
+ if (MBB == MBB->getParent()->begin())
+ return nullptr;
+ MBB = MBB->getPrevNode();
+ MBBI = MBB->end();
+ }
+ return --MBBI;
+}
+
+static const Constant *getConstantFromPool(const MachineInstr &MI,
+ const MachineOperand &Op) {
+ if (!Op.isCPI())
+ return nullptr;
+
+ ArrayRef<MachineConstantPoolEntry> Constants =
+ MI.getParent()->getParent()->getConstantPool()->getConstants();
+ const MachineConstantPoolEntry &ConstantEntry =
+ Constants[Op.getIndex()];
+
+ // Bail if this is a machine constant pool entry, we won't be able to dig out
+ // anything useful.
+ if (ConstantEntry.isMachineConstantPoolEntry())
+ return nullptr;
+
+ auto *C = dyn_cast<Constant>(ConstantEntry.Val.ConstVal);
+ assert((!C || ConstantEntry.getType() == C->getType()) &&
+ "Expected a constant of the same type!");
+ return C;
+}
+
+static std::string getShuffleComment(const MachineOperand &DstOp,
+ const MachineOperand &SrcOp,
+ ArrayRef<int> Mask) {
+ std::string Comment;
+
+ // Compute the name for a register. This is really goofy because we have
+ // multiple instruction printers that could (in theory) use different
+ // names. Fortunately most people use the ATT style (outside of Windows)
+ // and they actually agree on register naming here. Ultimately, this is
+ // a comment, and so its OK if it isn't perfect.
+ auto GetRegisterName = [](unsigned RegNum) -> StringRef {
+ return X86ATTInstPrinter::getRegisterName(RegNum);
+ };
+
+ StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
+ StringRef SrcName = SrcOp.isReg() ? GetRegisterName(SrcOp.getReg()) : "mem";
+
+ raw_string_ostream CS(Comment);
+ CS << DstName << " = ";
+ bool NeedComma = false;
+ bool InSrc = false;
+ for (int M : Mask) {
+ // Wrap up any prior entry...
+ if (M == SM_SentinelZero && InSrc) {
+ InSrc = false;
+ CS << "]";
+ }
+ if (NeedComma)
+ CS << ",";
+ else
+ NeedComma = true;
+
+ // Print this shuffle...
+ if (M == SM_SentinelZero) {
+ CS << "zero";
+ } else {
+ if (!InSrc) {
+ InSrc = true;
+ CS << SrcName << "[";
+ }
+ if (M == SM_SentinelUndef)
+ CS << "u";
+ else
+ CS << M;
+ }
+ }
+ if (InSrc)
+ CS << "]";
+ CS.flush();
+
+ return Comment;
}
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
X86MCInstLower MCInstLowering(*MF, *this);
- const X86RegisterInfo *RI =
- static_cast<const X86RegisterInfo *>(TM.getRegisterInfo());
+ const X86RegisterInfo *RI = static_cast<const X86RegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
switch (MI->getOpcode()) {
case TargetOpcode::DBG_VALUE:
@@ -812,7 +1020,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::TLS_addr64:
case X86::TLS_base_addr32:
case X86::TLS_base_addr64:
- return LowerTlsAddr(OutStreamer, MCInstLowering, *MI, getSubtargetInfo());
+ return LowerTlsAddr(MCInstLowering, *MI);
case X86::MOVPC32r: {
// This is a pseudo op for a two instruction sequence with a label, which
@@ -825,15 +1033,15 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbol *PICBase = MF->getPICBaseSymbol();
// FIXME: We would like an efficient form for this, so we don't have to do a
// lot of extra uniquing.
- EmitToStreamer(OutStreamer, MCInstBuilder(X86::CALLpcrel32)
+ EmitAndCountInstruction(MCInstBuilder(X86::CALLpcrel32)
.addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
// Emit the label.
OutStreamer.EmitLabel(PICBase);
// popl $reg
- EmitToStreamer(OutStreamer, MCInstBuilder(X86::POP32r)
- .addReg(MI->getOperand(0).getReg()));
+ EmitAndCountInstruction(MCInstBuilder(X86::POP32r)
+ .addReg(MI->getOperand(0).getReg()));
return;
}
@@ -863,29 +1071,32 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
DotExpr, OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(X86::ADD32ri)
+ EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(DotExpr));
return;
}
+ case TargetOpcode::STATEPOINT:
+ return LowerSTATEPOINT(OutStreamer, SM, *MI, Subtarget->is64Bit(), TM,
+ getSubtargetInfo(), MCInstLowering);
case TargetOpcode::STACKMAP:
- return LowerSTACKMAP(OutStreamer, SM, *MI, Subtarget->is64Bit(), getSubtargetInfo());
+ return LowerSTACKMAP(*MI);
case TargetOpcode::PATCHPOINT:
- return LowerPATCHPOINT(OutStreamer, SM, *MI, Subtarget->is64Bit(), getSubtargetInfo());
+ return LowerPATCHPOINT(*MI);
case X86::MORESTACK_RET:
- EmitToStreamer(OutStreamer, MCInstBuilder(getRetOpcode(*Subtarget)));
+ EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
return;
case X86::MORESTACK_RET_RESTORE_R10:
// Return, then restore R10.
- EmitToStreamer(OutStreamer, MCInstBuilder(getRetOpcode(*Subtarget)));
- EmitToStreamer(OutStreamer, MCInstBuilder(X86::MOV64rr)
- .addReg(X86::R10)
- .addReg(X86::RAX));
+ EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
+ EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr)
+ .addReg(X86::R10)
+ .addReg(X86::RAX));
return;
case X86::SEH_PushReg:
@@ -918,9 +1129,151 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::SEH_EndPrologue:
OutStreamer.EmitWinCFIEndProlog();
return;
+
+ case X86::SEH_Epilogue: {
+ MachineBasicBlock::const_iterator MBBI(MI);
+ // Check if preceded by a call and emit nop if so.
+ for (MBBI = PrevCrossBBInst(MBBI); MBBI; MBBI = PrevCrossBBInst(MBBI)) {
+ // Conservatively assume that pseudo instructions don't emit code and keep
+ // looking for a call. We may emit an unnecessary nop in some cases.
+ if (!MBBI->isPseudo()) {
+ if (MBBI->isCall())
+ EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
+ break;
+ }
+ }
+ return;
+ }
+
+ // Lower PSHUFB and VPERMILP normally but add a comment if we can find
+ // a constant shuffle mask. We won't be able to do this at the MC layer
+ // because the mask isn't an immediate.
+ case X86::PSHUFBrm:
+ case X86::VPSHUFBrm:
+ case X86::VPSHUFBYrm: {
+ if (!OutStreamer.isVerboseAsm())
+ break;
+ assert(MI->getNumOperands() > 5 &&
+ "We should always have at least 5 operands!");
+ const MachineOperand &DstOp = MI->getOperand(0);
+ const MachineOperand &SrcOp = MI->getOperand(1);
+ const MachineOperand &MaskOp = MI->getOperand(5);
+
+ if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+ SmallVector<int, 16> Mask;
+ DecodePSHUFBMask(C, Mask);
+ if (!Mask.empty())
+ OutStreamer.AddComment(getShuffleComment(DstOp, SrcOp, Mask));
+ }
+ break;
+ }
+ case X86::VPERMILPSrm:
+ case X86::VPERMILPDrm:
+ case X86::VPERMILPSYrm:
+ case X86::VPERMILPDYrm: {
+ if (!OutStreamer.isVerboseAsm())
+ break;
+ assert(MI->getNumOperands() > 5 &&
+ "We should always have at least 5 operands!");
+ const MachineOperand &DstOp = MI->getOperand(0);
+ const MachineOperand &SrcOp = MI->getOperand(1);
+ const MachineOperand &MaskOp = MI->getOperand(5);
+
+ if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+ SmallVector<int, 16> Mask;
+ DecodeVPERMILPMask(C, Mask);
+ if (!Mask.empty())
+ OutStreamer.AddComment(getShuffleComment(DstOp, SrcOp, Mask));
+ }
+ break;
+ }
+
+ // For loads from a constant pool to a vector register, print the constant
+ // loaded.
+ case X86::MOVAPDrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVAPDYrm:
+ case X86::MOVUPDrm:
+ case X86::VMOVUPDrm:
+ case X86::VMOVUPDYrm:
+ case X86::MOVAPSrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVAPSYrm:
+ case X86::MOVUPSrm:
+ case X86::VMOVUPSrm:
+ case X86::VMOVUPSYrm:
+ case X86::MOVDQArm:
+ case X86::VMOVDQArm:
+ case X86::VMOVDQAYrm:
+ case X86::MOVDQUrm:
+ case X86::VMOVDQUrm:
+ case X86::VMOVDQUYrm:
+ if (!OutStreamer.isVerboseAsm())
+ break;
+ if (MI->getNumOperands() > 4)
+ if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
+ std::string Comment;
+ raw_string_ostream CS(Comment);
+ const MachineOperand &DstOp = MI->getOperand(0);
+ CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
+ if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
+ CS << "[";
+ for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) {
+ if (i != 0)
+ CS << ",";
+ if (CDS->getElementType()->isIntegerTy())
+ CS << CDS->getElementAsInteger(i);
+ else if (CDS->getElementType()->isFloatTy())
+ CS << CDS->getElementAsFloat(i);
+ else if (CDS->getElementType()->isDoubleTy())
+ CS << CDS->getElementAsDouble(i);
+ else
+ CS << "?";
+ }
+ CS << "]";
+ OutStreamer.AddComment(CS.str());
+ } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
+ CS << "<";
+ for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) {
+ if (i != 0)
+ CS << ",";
+ Constant *COp = CV->getOperand(i);
+ if (isa<UndefValue>(COp)) {
+ CS << "u";
+ } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
+ CS << CI->getZExtValue();
+ } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
+ SmallString<32> Str;
+ CF->getValueAPF().toString(Str);
+ CS << Str;
+ } else {
+ CS << "?";
+ }
+ }
+ CS << ">";
+ OutStreamer.AddComment(CS.str());
+ }
+ }
+ break;
}
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
- EmitToStreamer(OutStreamer, TmpInst);
+
+ // Stackmap shadows cannot include branch targets, so we can count the bytes
+ // in a call towards the shadow, but must ensure that the no thread returns
+ // in to the stackmap shadow. The only way to achieve this is if the call
+ // is at the end of the shadow.
+ if (MI->isCall()) {
+ // Count then size of the call towards the shadow
+ SMShadowTracker.count(TmpInst, getSubtargetInfo());
+ // Then flush the shadow so that we fill with nops before the call, not
+ // after it.
+ SMShadowTracker.emitShadowPadding(OutStreamer, getSubtargetInfo());
+ // Then emit the call
+ OutStreamer.EmitInstruction(TmpInst, getSubtargetInfo());
+ return;
+ }
+
+ EmitAndCountInstruction(TmpInst);
}
diff --git a/lib/Target/X86/X86MachineFunctionInfo.cpp b/lib/Target/X86/X86MachineFunctionInfo.cpp
index 568dc222d9d1..ac2cdc8c6567 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.cpp
+++ b/lib/Target/X86/X86MachineFunctionInfo.cpp
@@ -8,7 +8,26 @@
//===----------------------------------------------------------------------===//
#include "X86MachineFunctionInfo.h"
+#include "X86RegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
void X86MachineFunctionInfo::anchor() { }
+
+void X86MachineFunctionInfo::setRestoreBasePointer(const MachineFunction *MF) {
+ if (!RestoreBasePointerOffset) {
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ MF->getSubtarget().getRegisterInfo());
+ unsigned SlotSize = RegInfo->getSlotSize();
+ for (const MCPhysReg *CSR =
+ RegInfo->X86RegisterInfo::getCalleeSavedRegs(MF);
+ unsigned Reg = *CSR;
+ ++CSR)
+ {
+ if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
+ RestoreBasePointerOffset -= SlotSize;
+ }
+ }
+}
+
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index 78d20ce870f1..b23a744da686 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -11,10 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86MACHINEFUNCTIONINFO_H
-#define X86MACHINEFUNCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include <vector>
namespace llvm {
@@ -29,6 +32,12 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// contains stack pointer re-alignment code which requires FP.
bool ForceFramePointer;
+ /// RestoreBasePointerOffset - Non-zero if the function has base pointer
+ /// and makes call to llvm.eh.sjlj.setjmp. When non-zero, the value is a
+ /// displacement from the frame pointer to a slot where the base pointer
+ /// is stashed.
+ signed char RestoreBasePointerOffset;
+
/// CalleeSavedFrameSize - Size of the callee-saved register portion of the
/// stack frame in bytes.
unsigned CalleeSavedFrameSize;
@@ -69,8 +78,14 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// NumLocalDynamics - Number of local-dynamic TLS accesses.
unsigned NumLocalDynamics;
+private:
+ /// ForwardedMustTailRegParms - A list of virtual and physical registers
+ /// that must be forwarded to every musttail call.
+ SmallVector<ForwardedRegister, 1> ForwardedMustTailRegParms;
+
public:
X86MachineFunctionInfo() : ForceFramePointer(false),
+ RestoreBasePointerOffset(0),
CalleeSavedFrameSize(0),
BytesToPopOnReturn(0),
ReturnAddrIndex(0),
@@ -86,6 +101,7 @@ public:
explicit X86MachineFunctionInfo(MachineFunction &MF)
: ForceFramePointer(false),
+ RestoreBasePointerOffset(0),
CalleeSavedFrameSize(0),
BytesToPopOnReturn(0),
ReturnAddrIndex(0),
@@ -102,6 +118,10 @@ public:
bool getForceFramePointer() const { return ForceFramePointer;}
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
+ bool getRestoreBasePointer() const { return RestoreBasePointerOffset!=0; }
+ void setRestoreBasePointer(const MachineFunction *MF);
+ int getRestoreBasePointerOffset() const {return RestoreBasePointerOffset; }
+
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
@@ -138,6 +158,9 @@ public:
unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
+ SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms() {
+ return ForwardedMustTailRegParms;
+ }
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp
index 6639875d07e3..adc05b2f6ea4 100644
--- a/lib/Target/X86/X86PadShortFunction.cpp
+++ b/lib/Target/X86/X86PadShortFunction.cpp
@@ -105,7 +105,7 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
if (!TM->getSubtarget<X86Subtarget>().padShortFunctions())
return false;
- TII = TM->getInstrInfo();
+ TII = TM->getSubtargetImpl()->getInstrInfo();
// Search through basic blocks and mark the ones that have early returns
ReturnBBs.clear();
@@ -195,7 +195,8 @@ bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB,
return true;
}
- CyclesToEnd += TII->getInstrLatency(TM->getInstrItineraryData(), MI);
+ CyclesToEnd += TII->getInstrLatency(
+ TM->getSubtargetImpl()->getInstrItineraryData(), MI);
}
VisitedBBs[MBB] = VisitedBBInfo(false, CyclesToEnd);
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index e8a7e84bb7e3..09e651cebfb9 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -68,8 +68,10 @@ X86RegisterInfo::X86RegisterInfo(const X86Subtarget &STI)
if (Is64Bit) {
SlotSize = 8;
- StackPtr = X86::RSP;
- FramePtr = X86::RBP;
+ StackPtr = (Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64()) ?
+ X86::RSP : X86::ESP;
+ FramePtr = (Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64()) ?
+ X86::RBP : X86::EBP;
} else {
SlotSize = 4;
StackPtr = X86::ESP;
@@ -120,7 +122,7 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
const TargetRegisterClass*
X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
// Don't allow super-classes of GR8_NOREX. This class is only used after
- // extrating sub_8bit_hi sub-registers. The H sub-registers cannot be copied
+ // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied
// to the full GR8 register class in 64-bit mode, so we cannot allow the
// reigster class inflation.
//
@@ -196,7 +198,7 @@ X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
unsigned
X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
switch (RC->getID()) {
@@ -324,7 +326,7 @@ X86RegisterInfo::getNoPreservedMask() const {
BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
// Set the stack-pointer register and its aliases as reserved.
for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid();
@@ -441,7 +443,8 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
- unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned StackAlign =
+ MF.getSubtarget().getFrameLowering()->getStackAlignment();
bool requiresRealignment =
((MFI->getMaxAlignment() > StackAlign) ||
F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
@@ -456,13 +459,9 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
unsigned Reg, int &FrameIdx) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- if (Reg == FramePtr && TFI->hasFP(MF)) {
- FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
- return true;
- }
- return false;
+ // Since X86 defines assignCalleeSavedSpillSlots which always return true
+ // this function neither used nor tested.
+ llvm_unreachable("Unused function on X86. Otherwise need a test case.");
}
void
@@ -473,7 +472,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
unsigned BasePtr;
@@ -488,6 +487,12 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
else
BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
+ // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
+ // register as source operand, semantic is the same and destination is
+ // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
+ if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr))
+ BasePtr = getX86SubSuperRegister(BasePtr, MVT::i64, false);
+
// This must be part of a four operand memory reference. Replace the
// FrameIndex with base register with EBP. Add an offset to the offset.
MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
@@ -526,10 +531,18 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
return TFI->hasFP(MF) ? FramePtr : StackPtr;
}
+unsigned X86RegisterInfo::getPtrSizedFrameRegister(
+ const MachineFunction &MF) const {
+ unsigned FrameReg = getFrameRegister(MF);
+ if (Subtarget.isTarget64BitILP32())
+ FrameReg = getX86SubSuperRegister(FrameReg, MVT::i32, false);
+ return FrameReg;
+}
+
namespace llvm {
unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
bool High) {
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 74efd1fe32d7..406b1fcd8d0f 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86REGISTERINFO_H
-#define X86REGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_X86_X86REGISTERINFO_H
+#define LLVM_LIB_TARGET_X86_X86REGISTERINFO_H
#include "llvm/Target/TargetRegisterInfo.h"
@@ -122,6 +122,7 @@ public:
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const override;
+ unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const;
unsigned getStackRegister() const { return StackPtr; }
unsigned getBaseRegister() const { return BasePtr; }
// FIXME: Move to FrameInfok
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 0da98637496e..30cd09a5a2f1 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -166,6 +166,7 @@ def FP3 : X86Reg<"fp3", 0>;
def FP4 : X86Reg<"fp4", 0>;
def FP5 : X86Reg<"fp5", 0>;
def FP6 : X86Reg<"fp6", 0>;
+def FP7 : X86Reg<"fp7", 0>;
// XMM Registers, used by the various SSE instruction set extensions.
def XMM0: X86Reg<"xmm0", 0>, DwarfRegNum<[17, 21, 21]>;
@@ -234,22 +235,18 @@ let SubRegIndices = [sub_ymm] in {
def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124, -2, -2]>;
def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, -2, -2]>;
-class STRegister<string n, bits<16> Enc, list<Register> A> : X86Reg<n, Enc> {
- let Aliases = A;
-}
-
// Floating point stack registers. These don't map one-to-one to the FP
// pseudo registers, but we still mark them as aliasing FP registers. That
// way both kinds can be live without exceeding the stack depth. ST registers
// are only live around inline assembly.
-def ST0 : STRegister<"st(0)", 0, []>, DwarfRegNum<[33, 12, 11]>;
-def ST1 : STRegister<"st(1)", 1, [FP6]>, DwarfRegNum<[34, 13, 12]>;
-def ST2 : STRegister<"st(2)", 2, [FP5]>, DwarfRegNum<[35, 14, 13]>;
-def ST3 : STRegister<"st(3)", 3, [FP4]>, DwarfRegNum<[36, 15, 14]>;
-def ST4 : STRegister<"st(4)", 4, [FP3]>, DwarfRegNum<[37, 16, 15]>;
-def ST5 : STRegister<"st(5)", 5, [FP2]>, DwarfRegNum<[38, 17, 16]>;
-def ST6 : STRegister<"st(6)", 6, [FP1]>, DwarfRegNum<[39, 18, 17]>;
-def ST7 : STRegister<"st(7)", 7, [FP0]>, DwarfRegNum<[40, 19, 18]>;
+def ST0 : X86Reg<"st(0)", 0>, DwarfRegNum<[33, 12, 11]>;
+def ST1 : X86Reg<"st(1)", 1>, DwarfRegNum<[34, 13, 12]>;
+def ST2 : X86Reg<"st(2)", 2>, DwarfRegNum<[35, 14, 13]>;
+def ST3 : X86Reg<"st(3)", 3>, DwarfRegNum<[36, 15, 14]>;
+def ST4 : X86Reg<"st(4)", 4>, DwarfRegNum<[37, 16, 15]>;
+def ST5 : X86Reg<"st(5)", 5>, DwarfRegNum<[38, 17, 16]>;
+def ST6 : X86Reg<"st(6)", 6>, DwarfRegNum<[39, 18, 17]>;
+def ST7 : X86Reg<"st(7)", 7>, DwarfRegNum<[40, 19, 18]>;
// Floating-point status word
def FPSW : X86Reg<"fpsw", 0>;
@@ -266,14 +263,22 @@ def FS : X86Reg<"fs", 4>;
def GS : X86Reg<"gs", 5>;
// Debug registers
-def DR0 : X86Reg<"dr0", 0>;
-def DR1 : X86Reg<"dr1", 1>;
-def DR2 : X86Reg<"dr2", 2>;
-def DR3 : X86Reg<"dr3", 3>;
-def DR4 : X86Reg<"dr4", 4>;
-def DR5 : X86Reg<"dr5", 5>;
-def DR6 : X86Reg<"dr6", 6>;
-def DR7 : X86Reg<"dr7", 7>;
+def DR0 : X86Reg<"dr0", 0>;
+def DR1 : X86Reg<"dr1", 1>;
+def DR2 : X86Reg<"dr2", 2>;
+def DR3 : X86Reg<"dr3", 3>;
+def DR4 : X86Reg<"dr4", 4>;
+def DR5 : X86Reg<"dr5", 5>;
+def DR6 : X86Reg<"dr6", 6>;
+def DR7 : X86Reg<"dr7", 7>;
+def DR8 : X86Reg<"dr8", 8>;
+def DR9 : X86Reg<"dr9", 9>;
+def DR10 : X86Reg<"dr10", 10>;
+def DR11 : X86Reg<"dr11", 11>;
+def DR12 : X86Reg<"dr12", 12>;
+def DR13 : X86Reg<"dr13", 13>;
+def DR14 : X86Reg<"dr14", 14>;
+def DR15 : X86Reg<"dr15", 15>;
// Control registers
def CR0 : X86Reg<"cr0", 0>;
diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h
deleted file mode 100644
index 03330566161d..000000000000
--- a/lib/Target/X86/X86Relocations.h
+++ /dev/null
@@ -1,52 +0,0 @@
-//===-- X86Relocations.h - X86 Code Relocations -----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the X86 target-specific relocation types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86RELOCATIONS_H
-#define X86RELOCATIONS_H
-
-#include "llvm/CodeGen/MachineRelocation.h"
-
-namespace llvm {
- namespace X86 {
- /// RelocationType - An enum for the x86 relocation codes. Note that
- /// the terminology here doesn't follow x86 convention - word means
- /// 32-bit and dword means 64-bit. The relocations will be treated
- /// by JIT or ObjectCode emitters, this is transparent to the x86 code
- /// emitter but JIT and ObjectCode will treat them differently
- enum RelocationType {
- /// reloc_pcrel_word - PC relative relocation, add the relocated value to
- /// the value already in memory, after we adjust it for where the PC is.
- reloc_pcrel_word = 0,
-
- /// reloc_picrel_word - PIC base relative relocation, add the relocated
- /// value to the value already in memory, after we adjust it for where the
- /// PIC base is.
- reloc_picrel_word = 1,
-
- /// reloc_absolute_word - absolute relocation, just add the relocated
- /// value to the value already in memory.
- reloc_absolute_word = 2,
-
- /// reloc_absolute_word_sext - absolute relocation, just add the relocated
- /// value to the value already in memory. In object files, it represents a
- /// value which must be sign-extended when resolving the relocation.
- reloc_absolute_word_sext = 3,
-
- /// reloc_absolute_dword - absolute relocation, just add the relocated
- /// value to the value already in memory.
- reloc_absolute_dword = 4
- };
- }
-}
-
-#endif
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
index 6966d616f8e3..73a32304302a 100644
--- a/lib/Target/X86/X86SchedHaswell.td
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -48,13 +48,17 @@ def HWPort6 : ProcResource<1>;
def HWPort7 : ProcResource<1>;
// Many micro-ops are capable of issuing on multiple ports.
+def HWPort01 : ProcResGroup<[HWPort0, HWPort1]>;
def HWPort23 : ProcResGroup<[HWPort2, HWPort3]>;
def HWPort237 : ProcResGroup<[HWPort2, HWPort3, HWPort7]>;
+def HWPort04 : ProcResGroup<[HWPort0, HWPort4]>;
def HWPort05 : ProcResGroup<[HWPort0, HWPort5]>;
-def HWPort06 : ProcResGroup<[HWPort0, HWPort6]>;
+def HWPort06 : ProcResGroup<[HWPort0, HWPort6]>;
def HWPort15 : ProcResGroup<[HWPort1, HWPort5]>;
def HWPort16 : ProcResGroup<[HWPort1, HWPort6]>;
+def HWPort56 : ProcResGroup<[HWPort5, HWPort6]>;
def HWPort015 : ProcResGroup<[HWPort0, HWPort1, HWPort5]>;
+def HWPort056 : ProcResGroup<[HWPort0, HWPort5, HWPort6]>;
def HWPort0156: ProcResGroup<[HWPort0, HWPort1, HWPort5, HWPort6]>;
// 60 Entry Unified Scheduler
@@ -125,6 +129,7 @@ defm : HWWriteResPair<WriteFAdd, HWPort1, 3>;
defm : HWWriteResPair<WriteFMul, HWPort0, 5>;
defm : HWWriteResPair<WriteFDiv, HWPort0, 12>; // 10-14 cycles.
defm : HWWriteResPair<WriteFRcp, HWPort0, 5>;
+defm : HWWriteResPair<WriteFRsqrt, HWPort0, 5>;
defm : HWWriteResPair<WriteFSqrt, HWPort0, 15>;
defm : HWWriteResPair<WriteCvtF2I, HWPort1, 3>;
defm : HWWriteResPair<WriteCvtI2F, HWPort1, 4>;
@@ -261,4 +266,1882 @@ def : WriteRes<WriteSystem, [HWPort0156]> { let Latency = 100; }
def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; }
def : WriteRes<WriteFence, [HWPort23, HWPort4]>;
def : WriteRes<WriteNop, []>;
+
+//================ Exceptions ================//
+
+//-- Specific Scheduling Models --//
+
+// Starting with P0.
+def WriteP0 : SchedWriteRes<[HWPort0]>;
+
+def WriteP0_P1_Lat4 : SchedWriteRes<[HWPort0, HWPort1]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+
+def WriteP0_P1_Lat4Ld : SchedWriteRes<[HWPort0, HWPort1, HWPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+
+def WriteP01 : SchedWriteRes<[HWPort01]>;
+
+def Write2P01 : SchedWriteRes<[HWPort01]> {
+ let NumMicroOps = 2;
+}
+def Write3P01 : SchedWriteRes<[HWPort01]> {
+ let NumMicroOps = 3;
+}
+
+def WriteP015 : SchedWriteRes<[HWPort015]>;
+
+def WriteP01_P5 : SchedWriteRes<[HWPort01, HWPort5]> {
+ let NumMicroOps = 2;
+}
+def WriteP06 : SchedWriteRes<[HWPort06]>;
+
+def Write2P06 : SchedWriteRes<[HWPort06]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+
+def Write3P06_Lat2 : SchedWriteRes<[HWPort06]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+}
+
+def WriteP0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> {
+ let NumMicroOps = 2;
+}
+
+def Write2P0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> {
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+
+def Write2P0156_Lat2 : SchedWriteRes<[HWPort0156]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+}
+def Write2P0156_Lat2Ld : SchedWriteRes<[HWPort0156, HWPort23]> {
+ let Latency = 6;
+ let ResourceCycles = [2, 1];
+}
+
+def Write5P0156 : SchedWriteRes<[HWPort0156]> {
+ let NumMicroOps = 5;
+ let ResourceCycles = [5];
+}
+
+def WriteP0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> {
+ let Latency = 1;
+ let ResourceCycles = [1, 2, 1];
+}
+
+def Write2P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> {
+ let Latency = 1;
+ let ResourceCycles = [2, 2, 1];
+}
+
+def Write3P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> {
+ let Latency = 1;
+ let ResourceCycles = [3, 2, 1];
+}
+
+// Starting with P1.
+def WriteP1 : SchedWriteRes<[HWPort1]>;
+
+def WriteP1_P23 : SchedWriteRes<[HWPort1, HWPort23]> {
+ let NumMicroOps = 2;
+}
+def WriteP1_Lat3 : SchedWriteRes<[HWPort1]> {
+ let Latency = 3;
+}
+def WriteP1_Lat3Ld : SchedWriteRes<[HWPort1, HWPort23]> {
+ let Latency = 7;
+}
+
+def Write2P1 : SchedWriteRes<[HWPort1]> {
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def Write2P1_P23 : SchedWriteRes<[HWPort1, HWPort23]> {
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def WriteP15 : SchedWriteRes<[HWPort15]>;
+def WriteP15Ld : SchedWriteRes<[HWPort15, HWPort23]> {
+ let Latency = 4;
+}
+
+def WriteP1_P5_Lat4 : SchedWriteRes<[HWPort1, HWPort5]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+
+def WriteP1_P5_Lat4Ld : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+
+def WriteP1_P5_Lat6 : SchedWriteRes<[HWPort1, HWPort5]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+
+def WriteP1_P5_Lat6Ld : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+
+// Starting with P2.
+def Write2P237_P4 : SchedWriteRes<[HWPort237, HWPort4]> {
+ let Latency = 1;
+ let ResourceCycles = [2, 1];
+}
+
+// Starting with P5.
+def WriteP5 : SchedWriteRes<[HWPort5]>;
+def WriteP5Ld : SchedWriteRes<[HWPort5, HWPort23]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+
+// Notation:
+// - r: register.
+// - mm: 64 bit mmx register.
+// - x = 128 bit xmm register.
+// - (x)mm = mmx or xmm register.
+// - y = 256 bit ymm register.
+// - v = any vector register.
+// - m = memory.
+
+//=== Integer Instructions ===//
+//-- Move instructions --//
+
+// MOV.
+// r16,m.
+def : InstRW<[WriteALULd], (instregex "MOV16rm")>;
+
+// MOVSX, MOVZX.
+// r,m.
+def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>;
+
+// CMOVcc.
+// r,r.
+def : InstRW<[Write2P0156_Lat2],
+ (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rr")>;
+// r,m.
+def : InstRW<[Write2P0156_Lat2Ld, ReadAfterLd],
+ (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rm")>;
+
+// XCHG.
+// r,r.
+def WriteXCHG : SchedWriteRes<[HWPort0156]> {
+ let Latency = 2;
+ let ResourceCycles = [3];
+}
+
+def : InstRW<[WriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>;
+
+// r,m.
+def WriteXCHGrm : SchedWriteRes<[]> {
+ let Latency = 21;
+ let NumMicroOps = 8;
+}
+def : InstRW<[WriteXCHGrm], (instregex "XCHG(8|16|32|64)rm")>;
+
+// XLAT.
+def WriteXLAT : SchedWriteRes<[]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteXLAT], (instregex "XLAT")>;
+
+// PUSH.
+// m.
+def : InstRW<[Write2P237_P4], (instregex "PUSH(16|32)rmm")>;
+
+// PUSHF.
+def WritePushF : SchedWriteRes<[HWPort1, HWPort4, HWPort237, HWPort06]> {
+ let NumMicroOps = 4;
+}
+def : InstRW<[WritePushF], (instregex "PUSHF(16|32)")>;
+
+// PUSHA.
+def WritePushA : SchedWriteRes<[]> {
+ let NumMicroOps = 19;
+}
+def : InstRW<[WritePushA], (instregex "PUSHA(16|32)")>;
+
+// POP.
+// m.
+def : InstRW<[Write2P237_P4], (instregex "POP(16|32)rmm")>;
+
+// POPF.
+def WritePopF : SchedWriteRes<[]> {
+ let NumMicroOps = 9;
+}
+def : InstRW<[WritePopF], (instregex "POPF(16|32)")>;
+
+// POPA.
+def WritePopA : SchedWriteRes<[]> {
+ let NumMicroOps = 18;
+}
+def : InstRW<[WritePopA], (instregex "POPA(16|32)")>;
+
+// LAHF SAHF.
+def : InstRW<[WriteP06], (instregex "(S|L)AHF")>;
+
+// BSWAP.
+// r32.
+def WriteBSwap32 : SchedWriteRes<[HWPort15]>;
+def : InstRW<[WriteBSwap32], (instregex "BSWAP32r")>;
+
+// r64.
+def WriteBSwap64 : SchedWriteRes<[HWPort06, HWPort15]> {
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteBSwap64], (instregex "BSWAP64r")>;
+
+// MOVBE.
+// r16,m16 / r64,m64.
+def : InstRW<[Write2P0156_Lat2Ld], (instregex "MOVBE(16|64)rm")>;
+
+// r32, m32.
+def WriteMoveBE32rm : SchedWriteRes<[HWPort15, HWPort23]> {
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteMoveBE32rm], (instregex "MOVBE32rm")>;
+
+// m16,r16.
+def WriteMoveBE16mr : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteMoveBE16mr], (instregex "MOVBE16mr")>;
+
+// m32,r32.
+def WriteMoveBE32mr : SchedWriteRes<[HWPort15, HWPort237, HWPort4]> {
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteMoveBE32mr], (instregex "MOVBE32mr")>;
+
+// m64,r64.
+def WriteMoveBE64mr : SchedWriteRes<[HWPort06, HWPort15, HWPort237, HWPort4]> {
+ let NumMicroOps = 4;
+}
+def : InstRW<[WriteMoveBE64mr], (instregex "MOVBE64mr")>;
+
+//-- Arithmetic instructions --//
+
+// ADD SUB.
+// m,r/i.
+def : InstRW<[Write2P0156_2P237_P4],
+ (instregex "(ADD|SUB)(8|16|32|64)m(r|i)",
+ "(ADD|SUB)(8|16|32|64)mi8", "(ADD|SUB)64mi32")>;
+
+// ADC SBB.
+// r,r/i.
+def : InstRW<[Write2P0156_Lat2], (instregex "(ADC|SBB)(8|16|32|64)r(r|i)",
+ "(ADC|SBB)(16|32|64)ri8",
+ "(ADC|SBB)64ri32",
+ "(ADC|SBB)(8|16|32|64)rr_REV")>;
+
+// r,m.
+def : InstRW<[Write2P0156_Lat2Ld, ReadAfterLd], (instregex "(ADC|SBB)(8|16|32|64)rm")>;
+
+// m,r/i.
+def : InstRW<[Write3P0156_2P237_P4],
+ (instregex "(ADC|SBB)(8|16|32|64)m(r|i)",
+ "(ADC|SBB)(16|32|64)mi8",
+ "(ADC|SBB)64mi32")>;
+
+// INC DEC NOT NEG.
+// m.
+def : InstRW<[WriteP0156_2P237_P4],
+ (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m",
+ "(INC|DEC)64(16|32)m")>;
+
+// MUL IMUL.
+// r16.
+def WriteMul16 : SchedWriteRes<[HWPort1, HWPort0156]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+def : InstRW<[WriteMul16], (instregex "IMUL16r", "MUL16r")>;
+
+// m16.
+def WriteMul16Ld : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+}
+def : InstRW<[WriteMul16Ld], (instregex "IMUL16m", "MUL16m")>;
+
+// r32.
+def WriteMul32 : SchedWriteRes<[HWPort1, HWPort0156]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteMul32], (instregex "IMUL32r", "MUL32r")>;
+
+// m32.
+def WriteMul32Ld : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+def : InstRW<[WriteMul32Ld], (instregex "IMUL32m", "MUL32m")>;
+
+// r64.
+def WriteMul64 : SchedWriteRes<[HWPort1, HWPort6]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteMul64], (instregex "IMUL64r", "MUL64r")>;
+
+// m64.
+def WriteMul64Ld : SchedWriteRes<[HWPort1, HWPort6, HWPort23]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteMul64Ld], (instregex "IMUL64m", "MUL64m")>;
+
+// r16,r16.
+def WriteMul16rri : SchedWriteRes<[HWPort1, HWPort0156]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteMul16rri], (instregex "IMUL16rri", "IMUL16rri8")>;
+
+// r16,m16.
+def WriteMul16rmi : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteMul16rmi], (instregex "IMUL16rmi", "IMUL16rmi8")>;
+
+// MULX.
+// r32,r32,r32.
+def WriteMulX32 : SchedWriteRes<[HWPort1, HWPort056]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[WriteMulX32], (instregex "MULX32rr")>;
+
+// r32,r32,m32.
+def WriteMulX32Ld : SchedWriteRes<[HWPort1, HWPort056, HWPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 2, 1];
+}
+def : InstRW<[WriteMulX32Ld], (instregex "MULX32rm")>;
+
+// r64,r64,r64.
+def WriteMulX64 : SchedWriteRes<[HWPort1, HWPort6]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteMulX64], (instregex "MULX64rr")>;
+
+// r64,r64,m64.
+def WriteMulX64Ld : SchedWriteRes<[HWPort1, HWPort6, HWPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteMulX64Ld], (instregex "MULX64rm")>;
+
+// DIV.
+// r8.
+def WriteDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
+ let Latency = 22;
+ let NumMicroOps = 9;
+}
+def : InstRW<[WriteDiv8], (instregex "DIV8r")>;
+
+// r16.
+def WriteDiv16 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
+ let Latency = 23;
+ let NumMicroOps = 10;
+}
+def : InstRW<[WriteDiv16], (instregex "DIV16r")>;
+
+// r32.
+def WriteDiv32 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
+ let Latency = 22;
+ let NumMicroOps = 10;
+}
+def : InstRW<[WriteDiv32], (instregex "DIV32r")>;
+
+// r64.
+def WriteDiv64 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
+ let Latency = 32;
+ let NumMicroOps = 36;
+}
+def : InstRW<[WriteDiv64], (instregex "DIV64r")>;
+
+// IDIV.
+// r8.
+def WriteIDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
+ let Latency = 23;
+ let NumMicroOps = 9;
+}
+def : InstRW<[WriteIDiv8], (instregex "IDIV8r")>;
+
+// r16.
+def WriteIDiv16 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
+ let Latency = 23;
+ let NumMicroOps = 10;
+}
+def : InstRW<[WriteIDiv16], (instregex "IDIV16r")>;
+
+// r32.
+def WriteIDiv32 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
+ let Latency = 22;
+ let NumMicroOps = 9;
+}
+def : InstRW<[WriteIDiv32], (instregex "IDIV32r")>;
+
+// r64.
+def WriteIDiv64 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
+ let Latency = 39;
+ let NumMicroOps = 59;
+}
+def : InstRW<[WriteIDiv64], (instregex "IDIV64r")>;
+
+//-- Logic instructions --//
+
+// AND OR XOR.
+// m,r/i.
+def : InstRW<[Write2P0156_2P237_P4],
+ (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)",
+ "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>;
+
+// SHR SHL SAR.
+// m,i.
+def WriteShiftRMW : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1];
+}
+def : InstRW<[WriteShiftRMW], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>;
+
+// r,cl.
+def : InstRW<[Write3P06_Lat2], (instregex "S(A|H)(R|L)(8|16|32|64)rCL")>;
+
+// m,cl.
+def WriteShiftClLdRMW : SchedWriteRes<[HWPort06, HWPort23, HWPort4]> {
+ let NumMicroOps = 6;
+ let ResourceCycles = [3, 2, 1];
+}
+def : InstRW<[WriteShiftClLdRMW], (instregex "S(A|H)(R|L)(8|16|32|64)mCL")>;
+
+// ROR ROL.
+// r,1.
+def : InstRW<[Write2P06], (instregex "RO(R|L)(8|16|32|64)r1")>;
+
+// m,i.
+def WriteRotateRMW : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
+ let NumMicroOps = 5;
+ let ResourceCycles = [2, 2, 1];
+}
+def : InstRW<[WriteRotateRMW], (instregex "RO(R|L)(8|16|32|64)mi")>;
+
+// r,cl.
+def : InstRW<[Write3P06_Lat2], (instregex "RO(R|L)(8|16|32|64)rCL")>;
+
+// m,cl.
+def WriteRotateRMWCL : SchedWriteRes<[]> {
+ let NumMicroOps = 6;
+}
+def : InstRW<[WriteRotateRMWCL], (instregex "RO(R|L)(8|16|32|64)mCL")>;
+
+// RCR RCL.
+// r,1.
+def WriteRCr1 : SchedWriteRes<[HWPort06, HWPort0156]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteRCr1], (instregex "RC(R|L)(8|16|32|64)r1")>;
+
+// m,1.
+def WriteRCm1 : SchedWriteRes<[]> {
+ let NumMicroOps = 6;
+}
+def : InstRW<[WriteRCm1], (instregex "RC(R|L)(8|16|32|64)m1")>;
+
+// r,i.
+def WriteRCri : SchedWriteRes<[HWPort0156]> {
+ let Latency = 6;
+ let NumMicroOps = 8;
+}
+def : InstRW<[WriteRCri], (instregex "RC(R|L)(8|16|32|64)r(i|CL)")>;
+
+// m,i.
+def WriteRCmi : SchedWriteRes<[]> {
+ let NumMicroOps = 11;
+}
+def : InstRW<[WriteRCmi], (instregex "RC(R|L)(8|16|32|64)m(i|CL)")>;
+
+// SHRD SHLD.
+// r,r,i.
+def WriteShDrr : SchedWriteRes<[HWPort1]> {
+ let Latency = 3;
+}
+def : InstRW<[WriteShDrr], (instregex "SH(R|L)D(16|32|64)rri8")>;
+
+// m,r,i.
+def WriteShDmr : SchedWriteRes<[]> {
+ let NumMicroOps = 5;
+}
+def : InstRW<[WriteShDmr], (instregex "SH(R|L)D(16|32|64)mri8")>;
+
+// r,r,cl.
+def WriteShlDCL : SchedWriteRes<[HWPort0156]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+def : InstRW<[WriteShlDCL], (instregex "SHLD(16|32|64)rrCL")>;
+
+// r,r,cl.
+def WriteShrDCL : SchedWriteRes<[HWPort0156]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+def : InstRW<[WriteShrDCL], (instregex "SHRD(16|32|64)rrCL")>;
+
+// m,r,cl.
+def WriteShDmrCL : SchedWriteRes<[]> {
+ let NumMicroOps = 7;
+}
+def : InstRW<[WriteShDmrCL], (instregex "SH(R|L)D(16|32|64)mrCL")>;
+
+// BT.
+// r,r/i.
+def : InstRW<[WriteShift], (instregex "BT(16|32|64)r(r|i8)")>;
+
+// m,r.
+def WriteBTmr : SchedWriteRes<[]> {
+ let NumMicroOps = 10;
+}
+def : InstRW<[WriteBTmr], (instregex "BT(16|32|64)mr")>;
+
+// m,i.
+def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>;
+
+// BTR BTS BTC.
+// r,r,i.
+def : InstRW<[WriteShift], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>;
+
+// m,r.
+def WriteBTRSCmr : SchedWriteRes<[]> {
+ let NumMicroOps = 11;
+}
+def : InstRW<[WriteBTRSCmr], (instregex "BT(R|S|C)(16|32|64)mr")>;
+
+// m,i.
+def : InstRW<[WriteShiftLd], (instregex "BT(R|S|C)(16|32|64)mi8")>;
+
+// BSF BSR.
+// r,r.
+def : InstRW<[WriteP1_Lat3], (instregex "BS(R|F)(16|32|64)rr")>;
+// r,m.
+def : InstRW<[WriteP1_Lat3Ld], (instregex "BS(R|F)(16|32|64)rm")>;
+
+// SETcc.
+// r.
+def : InstRW<[WriteShift],
+ (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)r")>;
+// m.
+def WriteSetCCm : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteSetCCm],
+ (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)m")>;
+
+// CLD STD.
+def WriteCldStd : SchedWriteRes<[HWPort15, HWPort6]> {
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteCldStd], (instregex "STD", "CLD")>;
+
+// LZCNT TZCNT.
+// r,r.
+def : InstRW<[WriteP1_Lat3], (instregex "(L|TZCNT)(16|32|64)rr")>;
+// r,m.
+def : InstRW<[WriteP1_Lat3Ld], (instregex "(L|TZCNT)(16|32|64)rm")>;
+
+// ANDN.
+// r,r.
+def : InstRW<[WriteP15], (instregex "ANDN(32|64)rr")>;
+// r,m.
+def : InstRW<[WriteP15Ld], (instregex "ANDN(32|64)rm")>;
+
+// BLSI BLSMSK BLSR.
+// r,r.
+def : InstRW<[WriteP15], (instregex "BLS(I|MSK|R)(32|64)rr")>;
+// r,m.
+def : InstRW<[WriteP15Ld], (instregex "BLS(I|MSK|R)(32|64)rm")>;
+
+// BEXTR.
+// r,r,r.
+def : InstRW<[Write2P0156_Lat2], (instregex "BEXTR(32|64)rr")>;
+// r,m,r.
+def : InstRW<[Write2P0156_Lat2Ld], (instregex "BEXTR(32|64)rm")>;
+
+// BZHI.
+// r,r,r.
+def : InstRW<[WriteP15], (instregex "BZHI(32|64)rr")>;
+// r,m,r.
+def : InstRW<[WriteP15Ld], (instregex "BZHI(32|64)rm")>;
+
+// PDEP PEXT.
+// r,r,r.
+def : InstRW<[WriteP1_Lat3], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>;
+// r,m,r.
+def : InstRW<[WriteP1_Lat3Ld], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>;
+
+//-- Control transfer instructions --//
+
+// J(E|R)CXZ.
+def WriteJCXZ : SchedWriteRes<[HWPort0156, HWPort6]> {
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteJCXZ], (instregex "JCXZ", "JECXZ_(32|64)", "JRCXZ")>;
+
+// LOOP.
+def WriteLOOP : SchedWriteRes<[]> {
+ let NumMicroOps = 7;
+}
+def : InstRW<[WriteLOOP], (instregex "LOOP")>;
+
+// LOOP(N)E
+def WriteLOOPE : SchedWriteRes<[]> {
+ let NumMicroOps = 11;
+}
+def : InstRW<[WriteLOOPE], (instregex "LOOPE", "LOOPNE")>;
+
+// CALL.
+// r.
+def WriteCALLr : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> {
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteCALLr], (instregex "CALL(16|32)r")>;
+
+// m.
+def WriteCALLm : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> {
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1];
+}
+def : InstRW<[WriteCALLm], (instregex "CALL(16|32)m")>;
+
+// RET.
+def WriteRET : SchedWriteRes<[HWPort237, HWPort6]> {
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)")>;
+
+// i.
+def WriteRETI : SchedWriteRes<[HWPort23, HWPort6, HWPort015]> {
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 2, 1];
+}
+def : InstRW<[WriteRETI], (instregex "RETI(L|Q|W)", "LRETI(L|Q|W)")>;
+
+// BOUND.
+// r,m.
+def WriteBOUND : SchedWriteRes<[]> {
+ let NumMicroOps = 15;
+}
+def : InstRW<[WriteBOUND], (instregex "BOUNDS(16|32)rm")>;
+
+// INTO.
+def WriteINTO : SchedWriteRes<[]> {
+ let NumMicroOps = 4;
+}
+def : InstRW<[WriteINTO], (instregex "INTO")>;
+
+//-- String instructions --//
+
+// LODSB/W.
+def : InstRW<[Write2P0156_P23], (instregex "LODS(B|W)")>;
+
+// LODSD/Q.
+def : InstRW<[WriteP0156_P23], (instregex "LODS(L|Q)")>;
+
+// STOS.
+def WriteSTOS : SchedWriteRes<[HWPort23, HWPort0156, HWPort4]> {
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteSTOS], (instregex "STOS(B|L|Q|W)")>;
+
+// MOVS.
+def WriteMOVS : SchedWriteRes<[HWPort23, HWPort4, HWPort0156]> {
+ let Latency = 4;
+ let NumMicroOps = 5;
+ let ResourceCycles = [2, 1, 2];
+}
+def : InstRW<[WriteMOVS], (instregex "MOVS(B|L|Q|W)")>;
+
+// SCAS.
+def : InstRW<[Write2P0156_P23], (instregex "SCAS(B|W|L|Q)")>;
+
+// CMPS.
+def WriteCMPS : SchedWriteRes<[HWPort23, HWPort0156]> {
+ let Latency = 4;
+ let NumMicroOps = 5;
+ let ResourceCycles = [2, 3];
+}
+def : InstRW<[WriteCMPS], (instregex "CMPS(B|L|Q|W)")>;
+
+//-- Synchronization instructions --//
+
+// XADD.
+def WriteXADD : SchedWriteRes<[]> {
+ let NumMicroOps = 5;
+}
+def : InstRW<[WriteXADD], (instregex "XADD(8|16|32|64)rm")>;
+
+// CMPXCHG.
+def WriteCMPXCHG : SchedWriteRes<[]> {
+ let NumMicroOps = 6;
+}
+def : InstRW<[WriteCMPXCHG], (instregex "CMPXCHG(8|16|32|64)rm")>;
+
+// CMPXCHG8B.
+def WriteCMPXCHG8B : SchedWriteRes<[]> {
+ let NumMicroOps = 15;
+}
+def : InstRW<[WriteCMPXCHG8B], (instregex "CMPXCHG8B")>;
+
+// CMPXCHG16B.
+def WriteCMPXCHG16B : SchedWriteRes<[]> {
+ let NumMicroOps = 22;
+}
+def : InstRW<[WriteCMPXCHG16B], (instregex "CMPXCHG16B")>;
+
+//-- Other --//
+
+// PAUSE.
+def WritePAUSE : SchedWriteRes<[HWPort05, HWPort6]> {
+ let NumMicroOps = 5;
+ let ResourceCycles = [1, 3];
+}
+def : InstRW<[WritePAUSE], (instregex "PAUSE")>;
+
+// LEAVE.
+def : InstRW<[Write2P0156_P23], (instregex "LEAVE")>;
+
+// XGETBV.
+def WriteXGETBV : SchedWriteRes<[]> {
+ let NumMicroOps = 8;
+}
+def : InstRW<[WriteXGETBV], (instregex "XGETBV")>;
+
+// RDTSC.
+def WriteRDTSC : SchedWriteRes<[]> {
+ let NumMicroOps = 15;
+}
+def : InstRW<[WriteRDTSC], (instregex "RDTSC")>;
+
+// RDPMC.
+def WriteRDPMC : SchedWriteRes<[]> {
+ let NumMicroOps = 34;
+}
+def : InstRW<[WriteRDPMC], (instregex "RDPMC")>;
+
+// RDRAND.
+def WriteRDRAND : SchedWriteRes<[HWPort23, HWPort015]> {
+ let NumMicroOps = 17;
+ let ResourceCycles = [1, 16];
+}
+def : InstRW<[WriteRDRAND], (instregex "RDRAND(16|32|64)r")>;
+
+//=== Floating Point x87 Instructions ===//
+//-- Move instructions --//
+
+// FLD.
+// m80.
+def : InstRW<[WriteP01], (instregex "LD_Frr")>;
+
+def WriteLD_F80m : SchedWriteRes<[HWPort01, HWPort23]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 2];
+}
+def : InstRW<[WriteLD_F80m], (instregex "LD_F80m")>;
+
+// FBLD.
+// m80.
+def WriteFBLD : SchedWriteRes<[]> {
+ let Latency = 47;
+ let NumMicroOps = 43;
+}
+def : InstRW<[WriteFBLD], (instregex "FBLDm")>;
+
+// FST(P).
+// r.
+def : InstRW<[WriteP01], (instregex "ST_(F|FP)rr")>;
+
+// m80.
+def WriteST_FP80m : SchedWriteRes<[HWPort0156, HWPort23, HWPort4]> {
+ let NumMicroOps = 7;
+ let ResourceCycles = [3, 2, 2];
+}
+def : InstRW<[WriteST_FP80m], (instregex "ST_FP80m")>;
+
+// FBSTP.
+// m80.
+def WriteFBSTP : SchedWriteRes<[]> {
+ let NumMicroOps = 226;
+}
+def : InstRW<[WriteFBSTP], (instregex "FBSTPm")>;
+
+// FXCHG.
+def : InstRW<[WriteNop], (instregex "XCH_F")>;
+
+// FILD.
+def WriteFILD : SchedWriteRes<[HWPort01, HWPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteFILD], (instregex "ILD_F(16|32|64)m")>;
+
+// FIST(P) FISTTP.
+def WriteFIST : SchedWriteRes<[HWPort1, HWPort23, HWPort4]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteFIST], (instregex "IST_(F|FP)(16|32)m")>;
+
+// FLDZ.
+def : InstRW<[WriteP01], (instregex "LD_F0")>;
+
+// FLD1.
+def : InstRW<[Write2P01], (instregex "LD_F1")>;
+
+// FLDPI FLDL2E etc.
+def : InstRW<[Write2P01], (instregex "FLDPI", "FLDL2(T|E)" "FLDL(G|N)2")>;
+
+// FCMOVcc.
+def WriteFCMOVcc : SchedWriteRes<[HWPort0, HWPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteFCMOVcc], (instregex "CMOV(B|BE|P|NB|NBE|NE|NP)_F")>;
+
+// FNSTSW.
+// AX.
+def WriteFNSTSW : SchedWriteRes<[HWPort0, HWPort0156]> {
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteFNSTSW], (instregex "FNSTSW16r")>;
+
+// m16.
+def WriteFNSTSWm : SchedWriteRes<[HWPort0, HWPort4, HWPort237]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteFNSTSWm], (instregex "FNSTSWm")>;
+
+// FLDCW.
+def WriteFLDCW : SchedWriteRes<[HWPort01, HWPort23, HWPort6]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteFLDCW], (instregex "FLDCW16m")>;
+
+// FNSTCW.
+def WriteFNSTCW : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> {
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteFNSTCW], (instregex "FNSTCW16m")>;
+
+// FINCSTP FDECSTP.
+def : InstRW<[WriteP01], (instregex "FINCSTP", "FDECSTP")>;
+
+// FFREE.
+def : InstRW<[WriteP01], (instregex "FFREE")>;
+
+// FNSAVE.
+def WriteFNSAVE : SchedWriteRes<[]> {
+ let NumMicroOps = 147;
+}
+def : InstRW<[WriteFNSAVE], (instregex "FSAVEm")>;
+
+// FRSTOR.
+def WriteFRSTOR : SchedWriteRes<[]> {
+ let NumMicroOps = 90;
+}
+def : InstRW<[WriteFRSTOR], (instregex "FRSTORm")>;
+
+//-- Arithmetic instructions --//
+
+// FABS.
+def : InstRW<[WriteP0], (instregex "ABS_F")>;
+
+// FCHS.
+def : InstRW<[WriteP0], (instregex "CHS_F")>;
+
+// FCOM(P) FUCOM(P).
+// r.
+def : InstRW<[WriteP1], (instregex "COM_FST0r", "COMP_FST0r", "UCOM_Fr",
+ "UCOM_FPr")>;
+// m.
+def : InstRW<[WriteP1_P23], (instregex "FCOM(32|64)m", "FCOMP(32|64)m")>;
+
+// FCOMPP FUCOMPP.
+// r.
+def : InstRW<[Write2P01], (instregex "FCOMPP", "UCOM_FPPr")>;
+
+// FCOMI(P) FUCOMI(P).
+// m.
+def : InstRW<[Write3P01], (instregex "COM_FIr", "COM_FIPr", "UCOM_FIr",
+ "UCOM_FIPr")>;
+
+// FICOM(P).
+def : InstRW<[Write2P1_P23], (instregex "FICOM(16|32)m", "FICOMP(16|32)m")>;
+
+// FTST.
+def : InstRW<[WriteP1], (instregex "TST_F")>;
+
+// FXAM.
+def : InstRW<[Write2P1], (instregex "FXAM")>;
+
+// FPREM.
+def WriteFPREM : SchedWriteRes<[]> {
+ let Latency = 19;
+ let NumMicroOps = 28;
+}
+def : InstRW<[WriteFPREM], (instregex "FPREM")>;
+
+// FPREM1.
+def WriteFPREM1 : SchedWriteRes<[]> {
+ let Latency = 27;
+ let NumMicroOps = 41;
+}
+def : InstRW<[WriteFPREM1], (instregex "FPREM1")>;
+
+// FRNDINT.
+def WriteFRNDINT : SchedWriteRes<[]> {
+ let Latency = 11;
+ let NumMicroOps = 17;
+}
+def : InstRW<[WriteFRNDINT], (instregex "FRNDINT")>;
+
+//-- Math instructions --//
+
+// FSCALE.
+def WriteFSCALE : SchedWriteRes<[]> {
+ let Latency = 75; // 49-125
+ let NumMicroOps = 50; // 25-75
+}
+def : InstRW<[WriteFSCALE], (instregex "FSCALE")>;
+
+// FXTRACT.
+def WriteFXTRACT : SchedWriteRes<[]> {
+ let Latency = 15;
+ let NumMicroOps = 17;
+}
+def : InstRW<[WriteFXTRACT], (instregex "FXTRACT")>;
+
+//-- Other instructions --//
+
+// FNOP.
+def : InstRW<[WriteP01], (instregex "FNOP")>;
+
+// WAIT.
+def : InstRW<[Write2P01], (instregex "WAIT")>;
+
+// FNCLEX.
+def : InstRW<[Write5P0156], (instregex "FNCLEX")>;
+
+// FNINIT.
+def WriteFNINIT : SchedWriteRes<[]> {
+ let NumMicroOps = 26;
+}
+def : InstRW<[WriteFNINIT], (instregex "FNINIT")>;
+
+//=== Integer MMX and XMM Instructions ===//
+//-- Move instructions --//
+
+// MOVD.
+// r32/64 <- (x)mm.
+def : InstRW<[WriteP0], (instregex "MMX_MOVD64grr", "MMX_MOVD64from64rr",
+ "VMOVPDI2DIrr", "MOVPDI2DIrr")>;
+
+// (x)mm <- r32/64.
+def : InstRW<[WriteP5], (instregex "MMX_MOVD64rr", "MMX_MOVD64to64rr",
+ "VMOVDI2PDIrr", "MOVDI2PDIrr")>;
+
+// MOVQ.
+// r64 <- (x)mm.
+def : InstRW<[WriteP0], (instregex "VMOVPQIto64rr")>;
+
+// (x)mm <- r64.
+def : InstRW<[WriteP5], (instregex "VMOV64toPQIrr", "VMOVZQI2PQIrr")>;
+
+// (x)mm <- (x)mm.
+def : InstRW<[WriteP015], (instregex "MMX_MOVQ64rr")>;
+
+// (V)MOVDQA/U.
+// x <- x.
+def : InstRW<[WriteP015], (instregex "MOVDQ(A|U)rr", "VMOVDQ(A|U)rr",
+ "MOVDQ(A|U)rr_REV", "VMOVDQ(A|U)rr_REV",
+ "VMOVDQ(A|U)Yrr", "VMOVDQ(A|U)Yrr_REV")>;
+
+// MOVDQ2Q.
+def : InstRW<[WriteP01_P5], (instregex "MMX_MOVDQ2Qrr")>;
+
+// MOVQ2DQ.
+def : InstRW<[WriteP015], (instregex "MMX_MOVQ2DQrr")>;
+
+
+// PACKSSWB/DW.
+// mm <- mm.
+def WriteMMXPACKSSrr : SchedWriteRes<[HWPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+}
+def : InstRW<[WriteMMXPACKSSrr], (instregex "MMX_PACKSSDWirr",
+ "MMX_PACKSSWBirr", "MMX_PACKUSWBirr")>;
+
+// mm <- m64.
+def WriteMMXPACKSSrm : SchedWriteRes<[HWPort23, HWPort5]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 3];
+}
+def : InstRW<[WriteMMXPACKSSrm], (instregex "MMX_PACKSSDWirm",
+ "MMX_PACKSSWBirm", "MMX_PACKUSWBirm")>;
+
+// VPMOVSX/ZX BW BD BQ DW DQ.
+// y <- x.
+def WriteVPMOVSX : SchedWriteRes<[HWPort5]> {
+ let Latency = 3;
+ let NumMicroOps = 1;
+}
+def : InstRW<[WriteVPMOVSX], (instregex "VPMOV(SX|ZX)(BW|BQ|DW|DQ)Yrr")>;
+
+// PBLENDW.
+// x,x,i / v,v,v,i
+def WritePBLENDWr : SchedWriteRes<[HWPort5]>;
+def : InstRW<[WritePBLENDWr], (instregex "(V?)PBLENDW(Y?)rri")>;
+
+// x,m,i / v,v,m,i
+def WritePBLENDWm : SchedWriteRes<[HWPort5, HWPort23]> {
+ let NumMicroOps = 2;
+ let Latency = 4;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WritePBLENDWm, ReadAfterLd], (instregex "(V?)PBLENDW(Y?)rmi")>;
+
+// VPBLENDD.
+// v,v,v,i.
+def WriteVPBLENDDr : SchedWriteRes<[HWPort015]>;
+def : InstRW<[WriteVPBLENDDr], (instregex "VPBLENDD(Y?)rri")>;
+
+// v,v,m,i
+def WriteVPBLENDDm : SchedWriteRes<[HWPort015, HWPort23]> {
+ let NumMicroOps = 2;
+ let Latency = 4;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteVPBLENDDm, ReadAfterLd], (instregex "VPBLENDD(Y?)rmi")>;
+
+// MASKMOVQ.
+def WriteMASKMOVQ : SchedWriteRes<[HWPort0, HWPort4, HWPort23]> {
+ let Latency = 13;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 1, 2];
+}
+def : InstRW<[WriteMASKMOVQ], (instregex "MMX_MASKMOVQ(64)?")>;
+
+// MASKMOVDQU.
+def WriteMASKMOVDQU : SchedWriteRes<[HWPort04, HWPort56, HWPort23]> {
+ let Latency = 14;
+ let NumMicroOps = 10;
+ let ResourceCycles = [4, 2, 4];
+}
+def : InstRW<[WriteMASKMOVDQU], (instregex "(V?)MASKMOVDQU(64)?")>;
+
+// VPMASKMOV D/Q.
+// v,v,m.
+def WriteVPMASKMOVr : SchedWriteRes<[HWPort5, HWPort23]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteVPMASKMOVr, ReadAfterLd],
+ (instregex "VPMASKMOV(D|Q)(Y?)rm")>;
+
+// m, v,v.
+def WriteVPMASKMOVm : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> {
+ let Latency = 13;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 1, 1, 1];
+}
+def : InstRW<[WriteVPMASKMOVm], (instregex "VPMASKMOV(D|Q)(Y?)mr")>;
+
+// PMOVMSKB.
+def WritePMOVMSKB : SchedWriteRes<[HWPort0]> {
+ let Latency = 3;
+}
+def : InstRW<[WritePMOVMSKB], (instregex "(V|MMX_)?PMOVMSKB(Y?)rr")>;
+
+// PEXTR B/W/D/Q.
+// r32,x,i.
+def WritePEXTRr : SchedWriteRes<[HWPort0, HWPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WritePEXTRr], (instregex "PEXTR(B|W|D|Q)rr", "MMX_PEXTRWirri")>;
+
+// m8,x,i.
+def WritePEXTRm : SchedWriteRes<[HWPort23, HWPort4, HWPort5]> {
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+def : InstRW<[WritePEXTRm], (instregex "PEXTR(B|W|D|Q)mr")>;
+
+// VPBROADCAST B/W.
+// x, m8/16.
+def WriteVPBROADCAST128Ld : SchedWriteRes<[HWPort01, HWPort23, HWPort5]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+def : InstRW<[WriteVPBROADCAST128Ld, ReadAfterLd],
+ (instregex "VPBROADCAST(B|W)rm")>;
+
+// y, m8/16
+def WriteVPBROADCAST256Ld : SchedWriteRes<[HWPort01, HWPort23, HWPort5]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+def : InstRW<[WriteVPBROADCAST256Ld, ReadAfterLd],
+ (instregex "VPBROADCAST(B|W)Yrm")>;
+
+// VPGATHERDD.
+// x.
+def WriteVPGATHERDD128 : SchedWriteRes<[]> {
+ let NumMicroOps = 20;
+}
+def : InstRW<[WriteVPGATHERDD128, ReadAfterLd], (instregex "VPGATHERDDrm")>;
+
+// y.
+def WriteVPGATHERDD256 : SchedWriteRes<[]> {
+ let NumMicroOps = 34;
+}
+def : InstRW<[WriteVPGATHERDD256, ReadAfterLd], (instregex "VPGATHERDDYrm")>;
+
+// VPGATHERQD.
+// x.
+def WriteVPGATHERQD128 : SchedWriteRes<[]> {
+ let NumMicroOps = 15;
+}
+def : InstRW<[WriteVPGATHERQD128, ReadAfterLd], (instregex "VPGATHERQDrm")>;
+
+// y.
+def WriteVPGATHERQD256 : SchedWriteRes<[]> {
+ let NumMicroOps = 22;
+}
+def : InstRW<[WriteVPGATHERQD256, ReadAfterLd], (instregex "VPGATHERQDYrm")>;
+
+// VPGATHERDQ.
+// x.
+def WriteVPGATHERDQ128 : SchedWriteRes<[]> {
+ let NumMicroOps = 12;
+}
+def : InstRW<[WriteVPGATHERDQ128, ReadAfterLd], (instregex "VPGATHERDQrm")>;
+
+// y.
+def WriteVPGATHERDQ256 : SchedWriteRes<[]> {
+ let NumMicroOps = 20;
+}
+def : InstRW<[WriteVPGATHERDQ256, ReadAfterLd], (instregex "VPGATHERDQYrm")>;
+
+// VPGATHERQQ.
+// x.
+def WriteVPGATHERQQ128 : SchedWriteRes<[]> {
+ let NumMicroOps = 14;
+}
+def : InstRW<[WriteVPGATHERQQ128, ReadAfterLd], (instregex "VPGATHERQQrm")>;
+
+// y.
+def WriteVPGATHERQQ256 : SchedWriteRes<[]> {
+ let NumMicroOps = 22;
+}
+def : InstRW<[WriteVPGATHERQQ256, ReadAfterLd], (instregex "VPGATHERQQYrm")>;
+
+//-- Arithmetic instructions --//
+
+// PHADD|PHSUB (S) W/D.
+// v <- v,v.
+def WritePHADDSUBr : SchedWriteRes<[HWPort1, HWPort5]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[WritePHADDSUBr], (instregex "MMX_PHADD(W?)rr64",
+ "MMX_PHADDSWrr64",
+ "MMX_PHSUB(W|D)rr64",
+ "MMX_PHSUBSWrr64",
+ "(V?)PH(ADD|SUB)(W|D)(Y?)rr",
+ "(V?)PH(ADD|SUB)SWrr(256)?")>;
+
+// v <- v,m.
+def WritePHADDSUBm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 2, 1];
+}
+def : InstRW<[WritePHADDSUBm, ReadAfterLd],
+ (instregex "MMX_PHADD(W?)rm64",
+ "MMX_PHADDSWrm64",
+ "MMX_PHSUB(W|D)rm64",
+ "MMX_PHSUBSWrm64",
+ "(V?)PH(ADD|SUB)(W|D)(Y?)rm",
+ "(V?)PH(ADD|SUB)SWrm(128|256)?")>;
+
+// PCMPGTQ.
+// v <- v,v.
+def WritePCMPGTQr : SchedWriteRes<[HWPort0]> {
+ let Latency = 5;
+ let NumMicroOps = 1;
+}
+def : InstRW<[WritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>;
+
+// v <- v,m.
+def WritePCMPGTQm : SchedWriteRes<[HWPort0, HWPort23]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WritePCMPGTQm, ReadAfterLd], (instregex "(V?)PCMPGTQ(Y?)rm")>;
+
+// PMULLD.
+// x,x / y,y,y.
+def WritePMULLDr : SchedWriteRes<[HWPort0]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def : InstRW<[WritePMULLDr], (instregex "(V?)PMULLD(Y?)rr")>;
+
+// x,m / y,y,m.
+def WritePMULLDm : SchedWriteRes<[HWPort0, HWPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WritePMULLDm, ReadAfterLd], (instregex "(V?)PMULLD(Y?)rm")>;
+
+//-- Logic instructions --//
+
+// PTEST.
+// v,v.
+def WritePTESTr : SchedWriteRes<[HWPort0, HWPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WritePTESTr], (instregex "(V?)PTEST(Y?)rr")>;
+
+// v,m.
+def WritePTESTm : SchedWriteRes<[HWPort0, HWPort5, HWPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+def : InstRW<[WritePTESTr], (instregex "(V?)PTEST(Y?)rm")>;
+
+// PSLL,PSRL,PSRA W/D/Q.
+// x,x / v,v,x.
+def WritePShift : SchedWriteRes<[HWPort0, HWPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WritePShift], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)(Y?)rr")>;
+
+// PSLL,PSRL DQ.
+def : InstRW<[WriteP5], (instregex "(V?)PS(R|L)LDQ(Y?)ri")>;
+
+//-- Other --//
+
+// EMMS.
+def WriteEMMS : SchedWriteRes<[]> {
+ let Latency = 13;
+ let NumMicroOps = 31;
+}
+def : InstRW<[WriteEMMS], (instregex "MMX_EMMS")>;
+
+//=== Floating Point XMM and YMM Instructions ===//
+//-- Move instructions --//
+
+// MOVMSKP S/D.
+// r32 <- x.
+def WriteMOVMSKPr : SchedWriteRes<[HWPort0]> {
+ let Latency = 3;
+}
+def : InstRW<[WriteMOVMSKPr], (instregex "(V?)MOVMSKP(S|D)rr")>;
+
+// r32 <- y.
+def WriteVMOVMSKPYr : SchedWriteRes<[HWPort0]> {
+ let Latency = 2;
+}
+def : InstRW<[WriteVMOVMSKPYr], (instregex "VMOVMSKP(S|D)Yrr")>;
+
+// VPERM2F128.
+def : InstRW<[WriteFShuffle256], (instregex "VPERM2F128rr")>;
+def : InstRW<[WriteFShuffle256Ld, ReadAfterLd], (instregex "VPERM2F128rm")>;
+
+// BLENDVP S/D.
+def : InstRW<[WriteFVarBlend], (instregex "BLENDVP(S|D)rr0")>;
+def : InstRW<[WriteFVarBlendLd, ReadAfterLd], (instregex "BLENDVP(S|D)rm0")>;
+
+// VBROADCASTF128.
+def : InstRW<[WriteLoad], (instregex "VBROADCASTF128")>;
+
+// EXTRACTPS.
+// r32,x,i.
+def WriteEXTRACTPSr : SchedWriteRes<[HWPort0, HWPort5]> {
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>;
+
+// m32,x,i.
+def WriteEXTRACTPSm : SchedWriteRes<[HWPort0, HWPort5, HWPort23]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+def : InstRW<[WriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>;
+
+// VEXTRACTF128.
+// x,y,i.
+def : InstRW<[WriteFShuffle256], (instregex "VEXTRACTF128rr")>;
+
+// m128,y,i.
+def WriteVEXTRACTF128m : SchedWriteRes<[HWPort23, HWPort4]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteVEXTRACTF128m], (instregex "VEXTRACTF128mr")>;
+
+// VINSERTF128.
+// y,y,x,i.
+def : InstRW<[WriteFShuffle256], (instregex "VINSERTF128rr")>;
+
+// y,y,m128,i.
+def WriteVINSERTF128m : SchedWriteRes<[HWPort015, HWPort23]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteFShuffle256, ReadAfterLd], (instregex "VINSERTF128rm")>;
+
+// VMASKMOVP S/D.
+// v,v,m.
+def WriteVMASKMOVPrm : SchedWriteRes<[HWPort5, HWPort23]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteVMASKMOVPrm], (instregex "VMASKMOVP(S|D)(Y?)rm")>;
+
+// m128,x,x.
+def WriteVMASKMOVPmr : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> {
+ let Latency = 13;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 1, 1, 1];
+}
+def : InstRW<[WriteVMASKMOVPmr], (instregex "VMASKMOVP(S|D)mr")>;
+
+// m256,y,y.
+def WriteVMASKMOVPYmr : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> {
+ let Latency = 14;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 1, 1, 1];
+}
+def : InstRW<[WriteVMASKMOVPYmr], (instregex "VMASKMOVP(S|D)Ymr")>;
+
+// VGATHERDPS.
+// x.
+def WriteVGATHERDPS128 : SchedWriteRes<[]> {
+ let NumMicroOps = 20;
+}
+def : InstRW<[WriteVGATHERDPS128, ReadAfterLd], (instregex "VGATHERDPSrm")>;
+
+// y.
+def WriteVGATHERDPS256 : SchedWriteRes<[]> {
+ let NumMicroOps = 34;
+}
+def : InstRW<[WriteVGATHERDPS256, ReadAfterLd], (instregex "VGATHERDPSYrm")>;
+
+// VGATHERQPS.
+// x.
+def WriteVGATHERQPS128 : SchedWriteRes<[]> {
+ let NumMicroOps = 15;
+}
+def : InstRW<[WriteVGATHERQPS128, ReadAfterLd], (instregex "VGATHERQPSrm")>;
+
+// y.
+def WriteVGATHERQPS256 : SchedWriteRes<[]> {
+ let NumMicroOps = 22;
+}
+def : InstRW<[WriteVGATHERQPS256, ReadAfterLd], (instregex "VGATHERQPSYrm")>;
+
+// VGATHERDPD.
+// x.
+def WriteVGATHERDPD128 : SchedWriteRes<[]> {
+ let NumMicroOps = 12;
+}
+def : InstRW<[WriteVGATHERDPD128, ReadAfterLd], (instregex "VGATHERDPDrm")>;
+
+// y.
+def WriteVGATHERDPD256 : SchedWriteRes<[]> {
+ let NumMicroOps = 20;
+}
+def : InstRW<[WriteVGATHERDPD256, ReadAfterLd], (instregex "VGATHERDPDYrm")>;
+
+// VGATHERQPD.
+// x.
+def WriteVGATHERQPD128 : SchedWriteRes<[]> {
+ let NumMicroOps = 14;
+}
+def : InstRW<[WriteVGATHERQPD128, ReadAfterLd], (instregex "VGATHERQPDrm")>;
+
+// y.
+def WriteVGATHERQPD256 : SchedWriteRes<[]> {
+ let NumMicroOps = 22;
+}
+def : InstRW<[WriteVGATHERQPD256, ReadAfterLd], (instregex "VGATHERQPDYrm")>;
+
+//-- Conversion instructions --//
+
+// CVTPD2PS.
+// x,x.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "(V?)CVTPD2PSrr")>;
+
+// x,m128.
+def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(V?)CVTPD2PS(X?)rm")>;
+
+// x,y.
+def WriteCVTPD2PSYrr : SchedWriteRes<[HWPort1, HWPort5]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteCVTPD2PSYrr], (instregex "(V?)CVTPD2PSYrr")>;
+
+// x,m256.
+def WriteCVTPD2PSYrm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+def : InstRW<[WriteCVTPD2PSYrm], (instregex "(V?)CVTPD2PSYrm")>;
+
+// CVTSD2SS.
+// x,x.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "(Int_)?(V)?CVTSD2SSrr")>;
+
+// x,m64.
+def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(Int_)?(V)?CVTSD2SSrm")>;
+
+// CVTPS2PD.
+// x,x.
+def WriteCVTPS2PDrr : SchedWriteRes<[HWPort0, HWPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteCVTPS2PDrr], (instregex "(V?)CVTPS2PDrr")>;
+
+// x,m64.
+// y,m128.
+def WriteCVTPS2PDrm : SchedWriteRes<[HWPort0, HWPort23]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteCVTPS2PDrm], (instregex "(V?)CVTPS2PD(Y?)rm")>;
+
+// y,x.
+def WriteVCVTPS2PDYrr : SchedWriteRes<[HWPort0, HWPort5]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteVCVTPS2PDYrr], (instregex "VCVTPS2PDYrr")>;
+
+// CVTSS2SD.
+// x,x.
+def WriteCVTSS2SDrr : SchedWriteRes<[HWPort0, HWPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteCVTSS2SDrr], (instregex "(Int_)?(V?)CVTSS2SDrr")>;
+
+// x,m32.
+def WriteCVTSS2SDrm : SchedWriteRes<[HWPort0, HWPort23]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteCVTSS2SDrm], (instregex "(Int_)?(V?)CVTSS2SDrm")>;
+
+// CVTDQ2PD.
+// x,x.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "(V)?CVTDQ2PDrr")>;
+
+// y,x.
+def : InstRW<[WriteP1_P5_Lat6], (instregex "VCVTDQ2PDYrr")>;
+
+// CVT(T)PD2DQ.
+// x,x.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "(V?)CVT(T?)PD2DQrr")>;
+// x,m128.
+def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(V?)CVT(T?)PD2DQrm")>;
+// x,y.
+def : InstRW<[WriteP1_P5_Lat6], (instregex "VCVT(T?)PD2DQYrr")>;
+// x,m256.
+def : InstRW<[WriteP1_P5_Lat6Ld], (instregex "VCVT(T?)PD2DQYrm")>;
+
+// CVT(T)PS2PI.
+// mm,x.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PS2PIirr")>;
+
+// CVTPI2PD.
+// x,mm.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PI2PDirr")>;
+
+// CVT(T)PD2PI.
+// mm,x.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PD2PIirr")>;
+
+// CVSTSI2SS.
+// x,r32.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "(Int_)?(V?)CVT(T?)SI2SS(64)?rr")>;
+
+// CVT(T)SS2SI.
+// r32,x.
+def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rr")>;
+// r32,m32.
+def : InstRW<[WriteP0_P1_Lat4Ld], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rm")>;
+
+// CVTSI2SD.
+// x,r32/64.
+def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVTSI2SS(64)?rr")>;
+
+// CVTSD2SI.
+// r32/64
+def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVT(T?)SD2SI(64)?rr")>;
+// r32,m32.
+def : InstRW<[WriteP0_P1_Lat4Ld], (instregex "(Int_)?(V?)CVT(T?)SD2SI(64)?rm")>;
+
+// VCVTPS2PH.
+// x,v,i.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "VCVTPS2PH(Y?)rr")>;
+// m,v,i.
+def : InstRW<[WriteP1_P5_Lat4Ld, WriteRMW], (instregex "VCVTPS2PH(Y?)mr")>;
+
+// VCVTPH2PS.
+// v,x.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "VCVTPH2PS(Y?)rr")>;
+
+//-- Arithmetic instructions --//
+
+// HADD, HSUB PS/PD
+// x,x / v,v,v.
+def WriteHADDSUBPr : SchedWriteRes<[HWPort1, HWPort5]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[WriteHADDSUBPr], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)rr")>;
+
+// x,m / v,v,m.
+def WriteHADDSUBPm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 2, 1];
+}
+def : InstRW<[WriteHADDSUBPm], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)rm")>;
+
+// MULL SS/SD PS/PD.
+// x,x / v,v,v.
+def WriteMULr : SchedWriteRes<[HWPort01]> {
+ let Latency = 5;
+}
+def : InstRW<[WriteMULr], (instregex "(V?)MUL(P|S)(S|D)rr")>;
+
+// x,m / v,v,m.
+def WriteMULm : SchedWriteRes<[HWPort01, HWPort23]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteMULm], (instregex "(V?)MUL(P|S)(S|D)rm")>;
+
+// VDIVPS.
+// y,y,y.
+def WriteVDIVPSYrr : SchedWriteRes<[HWPort0, HWPort15]> {
+ let Latency = 19; // 18-21 cycles.
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteVDIVPSYrr], (instregex "VDIVPSYrr")>;
+
+// y,y,m256.
+def WriteVDIVPSYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
+ let Latency = 23; // 18-21 + 4 cycles.
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1];
+}
+def : InstRW<[WriteVDIVPSYrm, ReadAfterLd], (instregex "VDIVPSYrm")>;
+
+// VDIVPD.
+// y,y,y.
+def WriteVDIVPDYrr : SchedWriteRes<[HWPort0, HWPort15]> {
+ let Latency = 27; // 19-35 cycles.
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteVDIVPDYrr], (instregex "VDIVPDYrr")>;
+
+// y,y,m256.
+def WriteVDIVPDYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
+ let Latency = 31; // 19-35 + 4 cycles.
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1];
+}
+def : InstRW<[WriteVDIVPDYrm, ReadAfterLd], (instregex "VDIVPDYrm")>;
+
+// VRCPPS.
+// y,y.
+def WriteVRCPPSr : SchedWriteRes<[HWPort0, HWPort15]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteVRCPPSr], (instregex "VRCPPSYr(_Int)?")>;
+
+// y,m256.
+def WriteVRCPPSm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1];
+}
+def : InstRW<[WriteVRCPPSm], (instregex "VRCPPSYm(_Int)?")>;
+
+// ROUND SS/SD PS/PD.
+// v,v,i.
+def WriteROUNDr : SchedWriteRes<[HWPort1]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def : InstRW<[WriteROUNDr], (instregex "(V?)ROUND(Y?)(S|P)(S|D)r(_Int)?")>;
+
+// v,m,i.
+def WriteROUNDm : SchedWriteRes<[HWPort1, HWPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteROUNDm], (instregex "(V?)ROUND(Y?)(S|P)(S|D)m(_Int)?")>;
+
+// DPPS.
+// x,x,i / v,v,v,i.
+def WriteDPPSr : SchedWriteRes<[HWPort0, HWPort1, HWPort5]> {
+ let Latency = 14;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1];
+}
+def : InstRW<[WriteDPPSr], (instregex "(V?)DPPS(Y?)rri")>;
+
+// x,m,i / v,v,m,i.
+def WriteDPPSm : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort23, HWPort6]> {
+ let Latency = 18;
+ let NumMicroOps = 6;
+ let ResourceCycles = [2, 1, 1, 1, 1];
+}
+def : InstRW<[WriteDPPSm, ReadAfterLd], (instregex "(V?)DPPS(Y?)rmi")>;
+
+// DPPD.
+// x,x,i.
+def WriteDPPDr : SchedWriteRes<[HWPort0, HWPort1, HWPort5]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+def : InstRW<[WriteDPPDr], (instregex "(V?)DPPDrri")>;
+
+// x,m,i.
+def WriteDPPDm : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort23]> {
+ let Latency = 13;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 1, 1, 1];
+}
+def : InstRW<[WriteDPPDm], (instregex "(V?)DPPDrmi")>;
+
+// VFMADD.
+// v,v,v.
+def WriteFMADDr : SchedWriteRes<[HWPort01]> {
+ let Latency = 5;
+ let NumMicroOps = 1;
+}
+def : InstRW<[WriteFMADDr],
+ (instregex
+ // 3p forms.
+ "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)r(Y)?",
+ // 3s forms.
+ "VF(N?)M(ADD|SUB)S(S|D)(r132|231|213)r",
+ // 4s/4s_int forms.
+ "VF(N?)M(ADD|SUB)S(S|D)4rr(_REV|_Int)?",
+ // 4p forms.
+ "VF(N?)M(ADD|SUB)P(S|D)4rr(Y)?(_REV)?")>;
+
+// v,v,m.
+def WriteFMADDm : SchedWriteRes<[HWPort01, HWPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteFMADDm],
+ (instregex
+ // 3p forms.
+ "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)m(Y)?",
+ // 3s forms.
+ "VF(N?)M(ADD|SUB)S(S|D)(r132|231|213)m",
+ // 4s/4s_int forms.
+ "VF(N?)M(ADD|SUB)S(S|D)4(rm|mr)(_Int)?",
+ // 4p forms.
+ "VF(N?)M(ADD|SUB)P(S|D)4(rm|mr)(Y)?")>;
+
+//-- Math instructions --//
+
+// VSQRTPS.
+// y,y.
+def WriteVSQRTPSYr : SchedWriteRes<[HWPort0, HWPort15]> {
+ let Latency = 19;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteVSQRTPSYr], (instregex "VSQRTPSYr")>;
+
+// y,m256.
+def WriteVSQRTPSYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
+ let Latency = 23;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1];
+}
+def : InstRW<[WriteVSQRTPSYm], (instregex "VSQRTPSYm")>;
+
+// VSQRTPD.
+// y,y.
+def WriteVSQRTPDYr : SchedWriteRes<[HWPort0, HWPort15]> {
+ let Latency = 28;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteVSQRTPDYr], (instregex "VSQRTPDYr")>;
+
+// y,m256.
+def WriteVSQRTPDYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
+ let Latency = 32;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1];
+}
+def : InstRW<[WriteVSQRTPDYm], (instregex "VSQRTPDYm")>;
+
+// RSQRT SS/PS.
+// x,x.
+def WriteRSQRTr : SchedWriteRes<[HWPort0]> {
+ let Latency = 5;
+}
+def : InstRW<[WriteRSQRTr], (instregex "(V?)RSQRT(SS|PS)r(_Int)?")>;
+
+// x,m128.
+def WriteRSQRTm : SchedWriteRes<[HWPort0, HWPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteRSQRTm], (instregex "(V?)RSQRT(SS|PS)m(_Int)?")>;
+
+// RSQRTPS 256.
+// y,y.
+def WriteRSQRTPSYr : SchedWriteRes<[HWPort0, HWPort15]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteRSQRTPSYr], (instregex "VRSQRTPSYr(_Int)?")>;
+
+// y,m256.
+def WriteRSQRTPSYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1];
+}
+def : InstRW<[WriteRSQRTPSYm], (instregex "VRSQRTPSYm(_Int)?")>;
+
+//-- Logic instructions --//
+
+// AND, ANDN, OR, XOR PS/PD.
+// x,x / v,v,v.
+def : InstRW<[WriteP5], (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rr")>;
+// x,m / v,v,m.
+def : InstRW<[WriteP5Ld, ReadAfterLd],
+ (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rm")>;
+
+//-- Other instructions --//
+
+// VZEROUPPER.
+def WriteVZEROUPPER : SchedWriteRes<[]> {
+ let NumMicroOps = 4;
+}
+def : InstRW<[WriteVZEROUPPER], (instregex "VZEROUPPER")>;
+
+// VZEROALL.
+def WriteVZEROALL : SchedWriteRes<[]> {
+ let NumMicroOps = 12;
+}
+def : InstRW<[WriteVZEROALL], (instregex "VZEROALL")>;
+
+// LDMXCSR.
+def WriteLDMXCSR : SchedWriteRes<[HWPort0, HWPort6, HWPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+def : InstRW<[WriteLDMXCSR], (instregex "(V)?LDMXCSR")>;
+
+// STMXCSR.
+def WriteSTMXCSR : SchedWriteRes<[HWPort0, HWPort4, HWPort6, HWPort237]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 1, 1, 1];
+}
+def : InstRW<[WriteSTMXCSR], (instregex "(V)?STMXCSR")>;
+
} // SchedModel
diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td
index 83f053425aa1..eca65c2892b7 100644
--- a/lib/Target/X86/X86SchedSandyBridge.td
+++ b/lib/Target/X86/X86SchedSandyBridge.td
@@ -117,6 +117,7 @@ defm : SBWriteResPair<WriteFAdd, SBPort1, 3>;
defm : SBWriteResPair<WriteFMul, SBPort0, 5>;
defm : SBWriteResPair<WriteFDiv, SBPort0, 12>; // 10-14 cycles.
defm : SBWriteResPair<WriteFRcp, SBPort0, 5>;
+defm : SBWriteResPair<WriteFRsqrt, SBPort0, 5>;
defm : SBWriteResPair<WriteFSqrt, SBPort0, 15>;
defm : SBWriteResPair<WriteCvtF2I, SBPort1, 3>;
defm : SBWriteResPair<WriteCvtI2F, SBPort1, 4>;
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index b76850aa1c8b..a261356afe6a 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -63,12 +63,13 @@ def WriteZero : SchedWrite;
defm WriteJump : X86SchedWritePair;
// Floating point. This covers both scalar and vector operations.
-defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare.
-defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
-defm WriteFDiv : X86SchedWritePair; // Floating point division.
-defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
-defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal.
-defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
+defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare.
+defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
+defm WriteFDiv : X86SchedWritePair; // Floating point division.
+defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
+defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate.
+defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate.
+defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
defm WriteFBlend : X86SchedWritePair; // Floating point vector blends.
defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends.
@@ -314,6 +315,11 @@ def IIC_SSE_SQRTPD_RM : InstrItinClass;
def IIC_SSE_SQRTSD_RR : InstrItinClass;
def IIC_SSE_SQRTSD_RM : InstrItinClass;
+def IIC_SSE_RSQRTPS_RR : InstrItinClass;
+def IIC_SSE_RSQRTPS_RM : InstrItinClass;
+def IIC_SSE_RSQRTSS_RR : InstrItinClass;
+def IIC_SSE_RSQRTSS_RM : InstrItinClass;
+
def IIC_SSE_RCPP_RR : InstrItinClass;
def IIC_SSE_RCPP_RM : InstrItinClass;
def IIC_SSE_RCPS_RR : InstrItinClass;
@@ -640,3 +646,5 @@ include "X86ScheduleAtom.td"
include "X86SchedSandyBridge.td"
include "X86SchedHaswell.td"
include "X86ScheduleSLM.td"
+include "X86ScheduleBtVer2.td"
+
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index c8820aa2d8df..4c559c9c1798 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -224,6 +224,11 @@ def AtomItineraries : ProcessorItineraries<
InstrItinData<IIC_SSE_SQRTSD_RR, [InstrStage<62, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_SQRTSD_RM, [InstrStage<62, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_RSQRTPS_RR, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_RSQRTPS_RM, [InstrStage<10, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_RSQRTSS_RR, [InstrStage<4, [Port0]>] >,
+ InstrItinData<IIC_SSE_RSQRTSS_RM, [InstrStage<4, [Port0]>] >,
+
InstrItinData<IIC_SSE_RCPP_RR, [InstrStage<9, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_RCPP_RM, [InstrStage<10, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_RCPS_RR, [InstrStage<4, [Port0]>] >,
diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td
new file mode 100644
index 000000000000..ce1ece34e431
--- /dev/null
+++ b/lib/Target/X86/X86ScheduleBtVer2.td
@@ -0,0 +1,341 @@
+//=- X86ScheduleBtVer2.td - X86 BtVer2 (Jaguar) Scheduling ---*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for AMD btver2 (Jaguar) to support
+// instruction scheduling and other instruction cost heuristics. Based off AMD Software
+// Optimization Guide for AMD Family 16h Processors & Instruction Latency appendix.
+//
+//===----------------------------------------------------------------------===//
+
+def BtVer2Model : SchedMachineModel {
+ // All x86 instructions are modeled as a single micro-op, and btver2 can
+ // decode 2 instructions per cycle.
+ let IssueWidth = 2;
+ let MicroOpBufferSize = 64; // Retire Control Unit
+ let LoadLatency = 5; // FPU latency (worse case cf Integer 3 cycle latency)
+ let HighLatency = 25;
+ let MispredictPenalty = 14; // Minimum branch misdirection penalty
+ let PostRAScheduler = 1;
+
+ // FIXME: SSE4/AVX is unimplemented. This flag is set to allow
+ // the scheduler to assign a default model to unrecognized opcodes.
+ let CompleteModel = 0;
+}
+
+let SchedModel = BtVer2Model in {
+
+// Jaguar can issue up to 6 micro-ops in one cycle
+def JALU0 : ProcResource<1>; // Integer Pipe0: integer ALU0 (also handle FP->INT jam)
+def JALU1 : ProcResource<1>; // Integer Pipe1: integer ALU1/MUL/DIV
+def JLAGU : ProcResource<1>; // Integer Pipe2: LAGU
+def JSAGU : ProcResource<1>; // Integer Pipe3: SAGU (also handles 3-operand LEA)
+def JFPU0 : ProcResource<1>; // Vector/FPU Pipe0: VALU0/VIMUL/FPA
+def JFPU1 : ProcResource<1>; // Vector/FPU Pipe1: VALU1/STC/FPM
+
+// Any pipe - FIXME we need this until we can discriminate between int/fpu load/store/moves properly
+def JAny : ProcResGroup<[JALU0, JALU1, JLAGU, JSAGU, JFPU0, JFPU1]>;
+
+// Integer Pipe Scheduler
+def JALU01 : ProcResGroup<[JALU0, JALU1]> {
+ let BufferSize=20;
+}
+
+// AGU Pipe Scheduler
+def JLSAGU : ProcResGroup<[JLAGU, JSAGU]> {
+ let BufferSize=12;
+}
+
+// Fpu Pipe Scheduler
+def JFPU01 : ProcResGroup<[JFPU0, JFPU1]> {
+ let BufferSize=18;
+}
+
+def JDiv : ProcResource<1>; // integer division
+def JMul : ProcResource<1>; // integer multiplication
+def JVALU0 : ProcResource<1>; // vector integer
+def JVALU1 : ProcResource<1>; // vector integer
+def JVIMUL : ProcResource<1>; // vector integer multiplication
+def JSTC : ProcResource<1>; // vector store/convert
+def JFPM : ProcResource<1>; // FP multiplication
+def JFPA : ProcResource<1>; // FP addition
+
+// Integer loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 3>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when dispatched by the schedulers.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass JWriteResIntPair<X86FoldableSchedWrite SchedRW,
+ ProcResourceKind ExePort,
+ int Lat> {
+ // Register variant is using a single cycle on ExePort.
+ def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
+
+ // Memory variant also uses a cycle on JLAGU and adds 3 cycles to the
+ // latency.
+ def : WriteRes<SchedRW.Folded, [JLAGU, ExePort]> {
+ let Latency = !add(Lat, 3);
+ }
+}
+
+multiclass JWriteResFpuPair<X86FoldableSchedWrite SchedRW,
+ ProcResourceKind ExePort,
+ int Lat> {
+ // Register variant is using a single cycle on ExePort.
+ def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
+
+ // Memory variant also uses a cycle on JLAGU and adds 5 cycles to the
+ // latency.
+ def : WriteRes<SchedRW.Folded, [JLAGU, ExePort]> {
+ let Latency = !add(Lat, 5);
+ }
+}
+
+// A folded store needs a cycle on the SAGU for the store data.
+def : WriteRes<WriteRMW, [JSAGU]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Arithmetic.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResIntPair<WriteALU, JALU01, 1>;
+defm : JWriteResIntPair<WriteIMul, JALU1, 3>;
+
+def : WriteRes<WriteIMulH, [JALU1]> {
+ let Latency = 6;
+ let ResourceCycles = [4];
+}
+
+// FIXME 8/16 bit divisions
+def : WriteRes<WriteIDiv, [JALU1, JDiv]> {
+ let Latency = 25;
+ let ResourceCycles = [1, 25];
+}
+def : WriteRes<WriteIDivLd, [JALU1, JLAGU, JDiv]> {
+ let Latency = 41;
+ let ResourceCycles = [1, 1, 25];
+}
+
+// This is for simple LEAs with one or two input operands.
+// FIXME: SAGU 3-operand LEA
+def : WriteRes<WriteLEA, [JALU01]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Integer shifts and rotates.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResIntPair<WriteShift, JALU01, 1>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Loads, stores, and moves, not folded with other operations.
+// FIXME: Split x86 and SSE load/store/moves
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 5; }
+def : WriteRes<WriteStore, [JSAGU]>;
+def : WriteRes<WriteMove, [JAny]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteZero, []>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResIntPair<WriteJump, JALU01, 1>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Floating point. This covers both scalar and vector operations.
+// FIXME: should we bother splitting JFPU pipe + unit stages for fast instructions?
+// FIXME: Double precision latencies
+// FIXME: SS vs PS latencies
+// FIXME: ymm latencies
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResFpuPair<WriteFAdd, JFPU0, 3>;
+defm : JWriteResFpuPair<WriteFMul, JFPU1, 2>;
+defm : JWriteResFpuPair<WriteFRcp, JFPU1, 2>;
+defm : JWriteResFpuPair<WriteFRsqrt, JFPU1, 2>;
+defm : JWriteResFpuPair<WriteFShuffle, JFPU01, 1>;
+defm : JWriteResFpuPair<WriteFBlend, JFPU01, 1>;
+defm : JWriteResFpuPair<WriteFShuffle256, JFPU01, 1>;
+
+def : WriteRes<WriteFSqrt, [JFPU1, JLAGU, JFPM]> {
+ let Latency = 21;
+ let ResourceCycles = [1, 1, 21];
+}
+def : WriteRes<WriteFSqrtLd, [JFPU1, JLAGU, JFPM]> {
+ let Latency = 26;
+ let ResourceCycles = [1, 1, 21];
+}
+
+def : WriteRes<WriteFDiv, [JFPU1, JLAGU, JFPM]> {
+ let Latency = 19;
+ let ResourceCycles = [1, 1, 19];
+}
+def : WriteRes<WriteFDivLd, [JFPU1, JLAGU, JFPM]> {
+ let Latency = 24;
+ let ResourceCycles = [1, 1, 19];
+}
+
+// FIXME: integer pipes
+defm : JWriteResFpuPair<WriteCvtF2I, JFPU1, 3>; // Float -> Integer.
+defm : JWriteResFpuPair<WriteCvtI2F, JFPU1, 3>; // Integer -> Float.
+defm : JWriteResFpuPair<WriteCvtF2F, JFPU1, 3>; // Float -> Float size conversion.
+
+def : WriteRes<WriteFVarBlend, [JFPU01]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+}
+def : WriteRes<WriteFVarBlendLd, [JLAGU, JFPU01]> {
+ let Latency = 7;
+ let ResourceCycles = [1, 2];
+}
+
+// Vector integer operations.
+defm : JWriteResFpuPair<WriteVecALU, JFPU01, 1>;
+defm : JWriteResFpuPair<WriteVecShift, JFPU01, 1>;
+defm : JWriteResFpuPair<WriteVecIMul, JFPU0, 2>;
+defm : JWriteResFpuPair<WriteShuffle, JFPU01, 1>;
+defm : JWriteResFpuPair<WriteBlend, JFPU01, 1>;
+defm : JWriteResFpuPair<WriteVecLogic, JFPU01, 1>;
+defm : JWriteResFpuPair<WriteShuffle256, JFPU01, 1>;
+
+def : WriteRes<WriteVarBlend, [JFPU01]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+}
+def : WriteRes<WriteVarBlendLd, [JLAGU, JFPU01]> {
+ let Latency = 7;
+ let ResourceCycles = [1, 2];
+}
+
+// FIXME: why do we need to define AVX2 resource on CPU that doesn't have AVX2?
+def : WriteRes<WriteVarVecShift, [JFPU01]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+}
+def : WriteRes<WriteVarVecShiftLd, [JLAGU, JFPU01]> {
+ let Latency = 6;
+ let ResourceCycles = [1, 1];
+}
+
+def : WriteRes<WriteMPSAD, [JFPU0]> {
+ let Latency = 3;
+ let ResourceCycles = [2];
+}
+def : WriteRes<WriteMPSADLd, [JLAGU, JFPU0]> {
+ let Latency = 8;
+ let ResourceCycles = [1, 2];
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// String instructions.
+// Packed Compare Implicit Length Strings, Return Mask
+// FIXME: approximate latencies + pipe dependencies
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WritePCmpIStrM, [JFPU01]> {
+ let Latency = 7;
+ let ResourceCycles = [2];
+}
+def : WriteRes<WritePCmpIStrMLd, [JLAGU, JFPU01]> {
+ let Latency = 12;
+ let ResourceCycles = [1, 2];
+}
+
+// Packed Compare Explicit Length Strings, Return Mask
+def : WriteRes<WritePCmpEStrM, [JFPU01]> {
+ let Latency = 13;
+ let ResourceCycles = [5];
+}
+def : WriteRes<WritePCmpEStrMLd, [JLAGU, JFPU01]> {
+ let Latency = 18;
+ let ResourceCycles = [1, 5];
+}
+
+// Packed Compare Implicit Length Strings, Return Index
+def : WriteRes<WritePCmpIStrI, [JFPU01]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+}
+def : WriteRes<WritePCmpIStrILd, [JLAGU, JFPU01]> {
+ let Latency = 11;
+ let ResourceCycles = [1, 2];
+}
+
+// Packed Compare Explicit Length Strings, Return Index
+def : WriteRes<WritePCmpEStrI, [JFPU01]> {
+ let Latency = 13;
+ let ResourceCycles = [5];
+}
+def : WriteRes<WritePCmpEStrILd, [JLAGU, JFPU01]> {
+ let Latency = 18;
+ let ResourceCycles = [1, 5];
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// AES Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteAESDecEnc, [JFPU01, JVIMUL]> {
+ let Latency = 3;
+ let ResourceCycles = [1, 1];
+}
+def : WriteRes<WriteAESDecEncLd, [JFPU01, JLAGU, JVIMUL]> {
+ let Latency = 8;
+ let ResourceCycles = [1, 1, 1];
+}
+
+def : WriteRes<WriteAESIMC, [JVIMUL]> {
+ let Latency = 2;
+ let ResourceCycles = [1];
+}
+def : WriteRes<WriteAESIMCLd, [JLAGU, JVIMUL]> {
+ let Latency = 7;
+ let ResourceCycles = [1, 1];
+}
+
+def : WriteRes<WriteAESKeyGen, [JVIMUL]> {
+ let Latency = 2;
+ let ResourceCycles = [1];
+}
+def : WriteRes<WriteAESKeyGenLd, [JLAGU, JVIMUL]> {
+ let Latency = 7;
+ let ResourceCycles = [1, 1];
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Carry-less multiplication instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteCLMul, [JVIMUL]> {
+ let Latency = 2;
+ let ResourceCycles = [1];
+}
+def : WriteRes<WriteCLMulLd, [JLAGU, JVIMUL]> {
+ let Latency = 7;
+ let ResourceCycles = [1, 1];
+}
+
+// FIXME: pipe for system/microcode?
+def : WriteRes<WriteSystem, [JAny]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [JAny]> { let Latency = 100; }
+def : WriteRes<WriteFence, [JSAGU]>;
+def : WriteRes<WriteNop, []>;
+} // SchedModel
+
diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td
index 90d858788124..f95d4fa04177 100644
--- a/lib/Target/X86/X86ScheduleSLM.td
+++ b/lib/Target/X86/X86ScheduleSLM.td
@@ -101,6 +101,7 @@ def : WriteRes<WriteIDivLd, [MEC_RSV, IEC_RSV01, SMDivider]> {
// Scalar and vector floating point.
defm : SMWriteResPair<WriteFAdd, FPC_RSV1, 3>;
defm : SMWriteResPair<WriteFRcp, FPC_RSV0, 5>;
+defm : SMWriteResPair<WriteFRsqrt, FPC_RSV0, 5>;
defm : SMWriteResPair<WriteFSqrt, FPC_RSV0, 15>;
defm : SMWriteResPair<WriteCvtF2I, FPC_RSV01, 4>;
defm : SMWriteResPair<WriteCvtI2F, FPC_RSV01, 4>;
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index a83dd9b2eea7..821044f4bccd 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -29,6 +29,26 @@ X86SelectionDAGInfo::X86SelectionDAGInfo(const DataLayout &DL)
X86SelectionDAGInfo::~X86SelectionDAGInfo() {}
+bool X86SelectionDAGInfo::isBaseRegConflictPossible(
+ SelectionDAG &DAG, ArrayRef<unsigned> ClobberSet) const {
+ // We cannot use TRI->hasBasePointer() until *after* we select all basic
+ // blocks. Legalization may introduce new stack temporaries with large
+ // alignment requirements. Fall back to generic code if there are any
+ // dynamic stack adjustments (hopefully rare) and the base pointer would
+ // conflict if we had to use it.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ if (!MFI->hasVarSizedObjects() && !MFI->hasInlineAsmWithSPAdjust())
+ return false;
+
+ const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
+ unsigned BaseReg = TRI->getBaseRegister();
+ for (unsigned R : ClobberSet)
+ if (BaseReg == R)
+ return true;
+ return false;
+}
+
SDValue
X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
SDValue Chain,
@@ -39,6 +59,13 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
const X86Subtarget &Subtarget = DAG.getTarget().getSubtarget<X86Subtarget>();
+#ifndef NDEBUG
+ // If the base register might conflict with our physical registers, bail out.
+ unsigned ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
+ X86::ECX, X86::EAX, X86::EDI};
+ assert(!isBaseRegConflictPossible(DAG, ClobberSet));
+#endif
+
// If to a segment-relative address space, use the default lowering.
if (DstPtrInfo.getAddrSpace() >= 256)
return SDValue();
@@ -201,12 +228,10 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
SrcPtrInfo.getAddrSpace() >= 256)
return SDValue();
- // ESI might be used as a base pointer, in that case we can't simply overwrite
- // the register. Fall back to generic code.
- const X86RegisterInfo *TRI =
- static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo());
- if (TRI->hasBasePointer(DAG.getMachineFunction()) &&
- TRI->getBaseRegister() == X86::ESI)
+ // If the base register might conflict with our physical registers, bail out.
+ unsigned ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
+ X86::ECX, X86::ESI, X86::EDI};
+ if (isBaseRegConflictPossible(DAG, ClobberSet))
return SDValue();
MVT AVT;
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
index c12555a59617..eb7e0ed9de6c 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86SELECTIONDAGINFO_H
-#define X86SELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_X86_X86SELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_X86_X86SELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
@@ -23,6 +23,11 @@ class X86TargetMachine;
class X86Subtarget;
class X86SelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Returns true if it is possible for the base register to conflict with the
+ /// given set of clobbers for a memory intrinsic.
+ bool isBaseRegConflictPossible(SelectionDAG &DAG,
+ ArrayRef<unsigned> ClobberSet) const;
+
public:
explicit X86SelectionDAGInfo(const DataLayout &DL);
~X86SelectionDAGInfo();
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 41551a1d677f..e59395c06a5c 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -13,6 +13,7 @@
#include "X86Subtarget.h"
#include "X86InstrInfo.h"
+#include "X86TargetMachine.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
@@ -67,12 +68,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
if (GV->hasDLLImportStorageClass())
return X86II::MO_DLLIMPORT;
- // Determine whether this is a reference to a definition or a declaration.
- // Materializable GVs (in JIT lazy compilation mode) do not require an extra
- // load from stub.
- bool isDecl = GV->hasAvailableExternallyLinkage();
- if (GV->isDeclaration() && !GV->isMaterializable())
- isDecl = true;
+ bool isDecl = GV->isDeclarationForLinker();
// X86-64 in PIC mode.
if (isPICStyleRIPRel()) {
@@ -182,23 +178,7 @@ bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
}
-void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) {
- AttributeSet FnAttrs = MF->getFunction()->getAttributes();
- Attribute CPUAttr =
- FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
- Attribute FSAttr =
- FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
- std::string CPU =
- !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString() : "";
- std::string FS =
- !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
- if (!FS.empty()) {
- initializeEnvironment();
- resetSubtargetFeatures(CPU, FS);
- }
-}
-
-void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
+void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
std::string CPUName = CPU;
if (CPUName.empty())
CPUName = "generic";
@@ -277,56 +257,64 @@ void X86Subtarget::initializeEnvironment() {
HasVLX = false;
HasADX = false;
HasSHA = false;
+ HasSGX = false;
HasPRFCHW = false;
HasRDSEED = false;
+ HasSMAP = false;
IsBTMemSlow = false;
IsSHLDSlow = false;
IsUAMemFast = false;
+ IsUAMem32Slow = false;
HasVectorUAMem = false;
HasCmpxchg16b = false;
UseLeaForSP = false;
- HasSlowDivide = false;
+ HasSlowDivide32 = false;
+ HasSlowDivide64 = false;
PadShortFunctions = false;
CallRegIndirect = false;
LEAUsesAG = false;
SlowLEA = false;
SlowIncDec = false;
+ UseSqrtEst = false;
+ UseReciprocalEst = false;
stackAlignment = 4;
// FIXME: this is a known good value for Yonah. How about others?
MaxInlineSizeThreshold = 128;
}
-static std::string computeDataLayout(const X86Subtarget &ST) {
+static std::string computeDataLayout(const Triple &TT) {
// X86 is little endian
std::string Ret = "e";
- Ret += DataLayout::getManglingComponent(ST.getTargetTriple());
+ Ret += DataLayout::getManglingComponent(TT);
// X86 and x32 have 32 bit pointers.
- if (ST.isTarget64BitILP32() || !ST.is64Bit())
+ if ((TT.isArch64Bit() &&
+ (TT.getEnvironment() == Triple::GNUX32 || TT.isOSNaCl())) ||
+ !TT.isArch64Bit())
Ret += "-p:32:32";
// Some ABIs align 64 bit integers and doubles to 64 bits, others to 32.
- if (ST.is64Bit() || ST.isOSWindows() || ST.isTargetNaCl())
+ if (TT.isArch64Bit() || TT.isOSWindows() || TT.isOSNaCl())
Ret += "-i64:64";
else
Ret += "-f64:32:64";
// Some ABIs align long double to 128 bits, others to 32.
- if (ST.isTargetNaCl())
+ if (TT.isOSNaCl())
; // No f80
- else if (ST.is64Bit() || ST.isTargetDarwin())
+ else if (TT.isArch64Bit() || TT.isOSDarwin())
Ret += "-f80:128";
else
Ret += "-f80:32";
// The registers can hold 8, 16, 32 or, in x86-64, 64 bits.
- if (ST.is64Bit())
+ if (TT.isArch64Bit())
Ret += "-n8:16:32:64";
else
Ret += "-n8:16:32";
// The stack is aligned to 32 bits on some ABIs and 128 bits on others.
- if (!ST.is64Bit() && ST.isOSWindows())
+ if (!TT.isArch64Bit() && TT.isOSWindows())
Ret += "-S32";
else
Ret += "-S128";
@@ -337,26 +325,45 @@ static std::string computeDataLayout(const X86Subtarget &ST) {
X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
initializeEnvironment();
- resetSubtargetFeatures(CPU, FS);
+ initSubtargetFeatures(CPU, FS);
return *this;
}
X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, X86TargetMachine &TM,
+ const std::string &FS, const X86TargetMachine &TM,
unsigned StackAlignOverride)
: X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others),
PICStyle(PICStyles::None), TargetTriple(TT),
+ DL(computeDataLayout(TargetTriple)),
StackAlignOverride(StackAlignOverride),
In64BitMode(TargetTriple.getArch() == Triple::x86_64),
In32BitMode(TargetTriple.getArch() == Triple::x86 &&
TargetTriple.getEnvironment() != Triple::CODE16),
In16BitMode(TargetTriple.getArch() == Triple::x86 &&
TargetTriple.getEnvironment() == Triple::CODE16),
- DL(computeDataLayout(*this)), TSInfo(DL),
- InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM),
- FrameLowering(TargetFrameLowering::StackGrowsDown, getStackAlignment(),
- is64Bit() ? -8 : -4),
- JITInfo(hasSSE1()) {}
+ TSInfo(DL), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
+ TLInfo(TM), FrameLowering(TargetFrameLowering::StackGrowsDown,
+ getStackAlignment(), is64Bit() ? -8 : -4) {
+ // Determine the PICStyle based on the target selected.
+ if (TM.getRelocationModel() == Reloc::Static) {
+ // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
+ setPICStyle(PICStyles::None);
+ } else if (is64Bit()) {
+ // PIC in 64 bit mode is always rip-rel.
+ setPICStyle(PICStyles::RIPRel);
+ } else if (isTargetCOFF()) {
+ setPICStyle(PICStyles::None);
+ } else if (isTargetDarwin()) {
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ setPICStyle(PICStyles::StubPIC);
+ else {
+ assert(TM.getRelocationModel() == Reloc::DynamicNoPIC);
+ setPICStyle(PICStyles::StubDynamicNoPIC);
+ }
+ } else if (isTargetELF()) {
+ setPICStyle(PICStyles::GOT);
+ }
+}
bool X86Subtarget::enableEarlyIfConversion() const {
return hasCMov() && X86EarlyIfConv;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 5f5df5e0818c..754b5b924717 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -11,13 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86SUBTARGET_H
-#define X86SUBTARGET_H
+#ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H
+#define LLVM_LIB_TARGET_X86_X86SUBTARGET_H
#include "X86FrameLowering.h"
#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
-#include "X86JITInfo.h"
#include "X86SelectionDAGInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/CallingConv.h"
@@ -139,12 +138,18 @@ protected:
/// HasSHA - Processor has SHA instructions.
bool HasSHA;
+ /// HasSGX - Processor has SGX instructions.
+ bool HasSGX;
+
/// HasPRFCHW - Processor has PRFCHW instructions.
bool HasPRFCHW;
/// HasRDSEED - Processor has RDSEED instructions.
bool HasRDSEED;
+ /// HasSMAP - Processor has SMAP instructions.
+ bool HasSMAP;
+
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
@@ -154,6 +159,9 @@ protected:
/// IsUAMemFast - True if unaligned memory access is fast.
bool IsUAMemFast;
+ /// True if unaligned 32-byte memory accesses are slow.
+ bool IsUAMem32Slow;
+
/// HasVectorUAMem - True if SIMD operations can have unaligned memory
/// operands. This may require setting a feature bit in the processor.
bool HasVectorUAMem;
@@ -166,9 +174,13 @@ protected:
/// the stack pointer. This is an optimization for Intel Atom processors.
bool UseLeaForSP;
- /// HasSlowDivide - True if smaller divides are significantly faster than
- /// full divides and should be used when possible.
- bool HasSlowDivide;
+ /// HasSlowDivide32 - True if 8-bit divisions are significantly faster than
+ /// 32-bit divisions and should be used when possible.
+ bool HasSlowDivide32;
+
+ /// HasSlowDivide64 - True if 16-bit divides are significantly faster than
+ /// 64-bit divisions and should be used when possible.
+ bool HasSlowDivide64;
/// PadShortFunctions - True if the short functions should be padded to prevent
/// a stall when returning too early.
@@ -187,6 +199,16 @@ protected:
/// SlowIncDec - True if INC and DEC instructions are slow when writing to flags
bool SlowIncDec;
+ /// Use the RSQRT* instructions to optimize square root calculations.
+ /// For this to be profitable, the cost of FSQRT and FDIV must be
+ /// substantially higher than normal FP ops like FADD and FMUL.
+ bool UseSqrtEst;
+
+ /// Use the RCP* instructions to optimize FP division calculations.
+ /// For this to be profitable, the cost of FDIV must be
+ /// substantially higher than normal FP ops like FADD and FMUL.
+ bool UseReciprocalEst;
+
/// Processor has AVX-512 PreFetch Instructions
bool HasPFI;
@@ -220,6 +242,9 @@ protected:
InstrItineraryData InstrItins;
private:
+ // Calculates type size & alignment
+ const DataLayout DL;
+
/// StackAlignOverride - Override the stack alignment.
unsigned StackAlignOverride;
@@ -232,30 +257,35 @@ private:
/// In16BitMode - True if compiling for 16-bit, false for 32-bit or 64-bit.
bool In16BitMode;
- // Calculates type size & alignment
- const DataLayout DL;
X86SelectionDAGInfo TSInfo;
// Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
// X86TargetLowering needs.
X86InstrInfo InstrInfo;
X86TargetLowering TLInfo;
X86FrameLowering FrameLowering;
- X86JITInfo JITInfo;
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
X86Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, X86TargetMachine &TM,
+ const std::string &FS, const X86TargetMachine &TM,
unsigned StackAlignOverride);
- const X86TargetLowering *getTargetLowering() const { return &TLInfo; }
- const X86InstrInfo *getInstrInfo() const { return &InstrInfo; }
- const DataLayout *getDataLayout() const { return &DL; }
- const X86FrameLowering *getFrameLowering() const { return &FrameLowering; }
- const X86SelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
- X86JITInfo *getJITInfo() { return &JITInfo; }
+ const X86TargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const X86FrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const X86RegisterInfo *getRegisterInfo() const override {
+ return &getInstrInfo()->getRegisterInfo();
+ }
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
@@ -270,14 +300,12 @@ public:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- /// \brief Reset the features for the X86 target.
- void resetSubtargetFeatures(const MachineFunction *MF) override;
private:
/// \brief Initialize the full set of dependencies so we can use an initializer
/// list for X86Subtarget.
X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
void initializeEnvironment();
- void resetSubtargetFeatures(StringRef CPU, StringRef FS);
+ void initSubtargetFeatures(StringRef CPU, StringRef FS);
public:
/// Is this x86_64? (disregarding specific ABI / programming model)
bool is64Bit() const {
@@ -295,12 +323,13 @@ public:
/// Is this x86_64 with the ILP32 programming model (x32 ABI)?
bool isTarget64BitILP32() const {
return In64BitMode && (TargetTriple.getEnvironment() == Triple::GNUX32 ||
- TargetTriple.getOS() == Triple::NaCl);
+ TargetTriple.isOSNaCl());
}
/// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
bool isTarget64BitLP64() const {
- return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32);
+ return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32 &&
+ !TargetTriple.isOSNaCl());
}
PICStyles::Style getPICStyle() const { return PICStyle; }
@@ -341,20 +370,26 @@ public:
bool hasHLE() const { return HasHLE; }
bool hasADX() const { return HasADX; }
bool hasSHA() const { return HasSHA; }
+ bool hasSGX() const { return HasSGX; }
bool hasPRFCHW() const { return HasPRFCHW; }
bool hasRDSEED() const { return HasRDSEED; }
+ bool hasSMAP() const { return HasSMAP; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool isSHLDSlow() const { return IsSHLDSlow; }
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
+ bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
bool hasVectorUAMem() const { return HasVectorUAMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
bool useLeaForSP() const { return UseLeaForSP; }
- bool hasSlowDivide() const { return HasSlowDivide; }
+ bool hasSlowDivide32() const { return HasSlowDivide32; }
+ bool hasSlowDivide64() const { return HasSlowDivide64; }
bool padShortFunctions() const { return PadShortFunctions; }
bool callRegIndirect() const { return CallRegIndirect; }
bool LEAusesAG() const { return LEAUsesAG; }
bool slowLEA() const { return SlowLEA; }
bool slowIncDec() const { return SlowIncDec; }
+ bool useSqrtEst() const { return UseSqrtEst; }
+ bool useReciprocalEst() const { return UseReciprocalEst; }
bool hasCDI() const { return HasCDI; }
bool hasPFI() const { return HasPFI; }
bool hasERI() const { return HasERI; }
@@ -368,16 +403,13 @@ public:
const Triple &getTargetTriple() const { return TargetTriple; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
- bool isTargetFreeBSD() const {
- return TargetTriple.getOS() == Triple::FreeBSD;
- }
- bool isTargetSolaris() const {
- return TargetTriple.getOS() == Triple::Solaris;
- }
+ bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
+ bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
+ bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
- bool isTargetMacho() const { return TargetTriple.isOSBinFormatMachO(); }
+ bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
@@ -400,6 +432,10 @@ public:
return TargetTriple.isWindowsGNUEnvironment();
}
+ bool isTargetWindowsItanium() const {
+ return TargetTriple.isWindowsItaniumEnvironment();
+ }
+
bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); }
bool isOSWindows() const { return TargetTriple.isOSWindows(); }
@@ -466,7 +502,9 @@ public:
/// getInstrItins = Return the instruction itineraries based on the
/// subtarget selection.
- const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
AntiDepBreakMode getAntiDepBreakMode() const override {
return TargetSubtargetInfo::ANTIDEP_CRITICAL;
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index f12140f1f161..5e6aa7d3dbf4 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -13,7 +13,9 @@
#include "X86TargetMachine.h"
#include "X86.h"
+#include "X86TargetObjectFile.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
@@ -27,7 +29,23 @@ extern "C" void LLVMInitializeX86Target() {
RegisterTargetMachine<X86TargetMachine> Y(TheX86_64Target);
}
-void X86TargetMachine::anchor() { }
+static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
+ if (TT.isOSBinFormatMachO()) {
+ if (TT.getArch() == Triple::x86_64)
+ return make_unique<X86_64MachoTargetObjectFile>();
+ return make_unique<TargetLoweringObjectFileMachO>();
+ }
+
+ if (TT.isOSLinux())
+ return make_unique<X86LinuxTargetObjectFile>();
+ if (TT.isOSBinFormatELF())
+ return make_unique<TargetLoweringObjectFileELF>();
+ if (TT.isKnownWindowsMSVCEnvironment())
+ return make_unique<X86WindowsTargetObjectFile>();
+ if (TT.isOSBinFormatCOFF())
+ return make_unique<TargetLoweringObjectFileCOFF>();
+ llvm_unreachable("unknown subtarget type");
+}
/// X86TargetMachine ctor - Create an X86 target.
///
@@ -36,27 +54,8 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ TLOF(createTLOF(Triple(getTargetTriple()))),
Subtarget(TT, CPU, FS, *this, Options.StackAlignmentOverride) {
- // Determine the PICStyle based on the target selected.
- if (getRelocationModel() == Reloc::Static) {
- // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
- Subtarget.setPICStyle(PICStyles::None);
- } else if (Subtarget.is64Bit()) {
- // PIC in 64 bit mode is always rip-rel.
- Subtarget.setPICStyle(PICStyles::RIPRel);
- } else if (Subtarget.isTargetCOFF()) {
- Subtarget.setPICStyle(PICStyles::None);
- } else if (Subtarget.isTargetDarwin()) {
- if (getRelocationModel() == Reloc::PIC_)
- Subtarget.setPICStyle(PICStyles::StubPIC);
- else {
- assert(getRelocationModel() == Reloc::DynamicNoPIC);
- Subtarget.setPICStyle(PICStyles::StubDynamicNoPIC);
- }
- } else if (Subtarget.isTargetELF()) {
- Subtarget.setPICStyle(PICStyles::GOT);
- }
-
// default to hard float ABI
if (Options.FloatABIType == FloatABI::Default)
this->Options.FloatABIType = FloatABI::Hard;
@@ -71,6 +70,47 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU,
initAsmInfo();
}
+X86TargetMachine::~X86TargetMachine() {}
+
+const X86Subtarget *
+X86TargetMachine::getSubtargetImpl(const Function &F) const {
+ AttributeSet FnAttrs = F.getAttributes();
+ Attribute CPUAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+ Attribute FSAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+
+ // FIXME: This is related to the code below to reset the target options,
+ // we need to know whether or not the soft float flag is set on the
+ // function before we can generate a subtarget. We also need to use
+ // it as a key for the subtarget since that can be the only difference
+ // between two functions.
+ Attribute SFAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "use-soft-float");
+ bool SoftFloat = !SFAttr.hasAttribute(Attribute::None)
+ ? SFAttr.getValueAsString() == "true"
+ : Options.UseSoftFloat;
+
+ auto &I = SubtargetMap[CPU + FS + (SoftFloat ? "use-soft-float=true"
+ : "use-soft-float=false")];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = llvm::make_unique<X86Subtarget>(TargetTriple, CPU, FS, *this,
+ Options.StackAlignmentOverride);
+ }
+ return I.get();
+}
+
//===----------------------------------------------------------------------===//
// Command line options for x86
//===----------------------------------------------------------------------===//
@@ -114,9 +154,8 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
bool addILPOpts() override;
- bool addPreRegAlloc() override;
- bool addPostRegAlloc() override;
- bool addPreEmitPass() override;
+ void addPostRegAlloc() override;
+ void addPreEmitPass() override;
};
} // namespace
@@ -125,7 +164,7 @@ TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
}
void X86PassConfig::addIRPasses() {
- addPass(createX86AtomicExpandPass(&getX86TargetMachine()));
+ addPass(createAtomicExpandPass(&getX86TargetMachine()));
TargetPassConfig::addIRPasses();
}
@@ -148,39 +187,19 @@ bool X86PassConfig::addILPOpts() {
return true;
}
-bool X86PassConfig::addPreRegAlloc() {
- return false; // -print-machineinstr shouldn't print after this.
-}
-
-bool X86PassConfig::addPostRegAlloc() {
+void X86PassConfig::addPostRegAlloc() {
addPass(createX86FloatingPointStackifierPass());
- return true; // -print-machineinstr should print after this.
}
-bool X86PassConfig::addPreEmitPass() {
- bool ShouldPrint = false;
- if (getOptLevel() != CodeGenOpt::None && getX86Subtarget().hasSSE2()) {
+void X86PassConfig::addPreEmitPass() {
+ if (getOptLevel() != CodeGenOpt::None && getX86Subtarget().hasSSE2())
addPass(createExecutionDependencyFixPass(&X86::VR128RegClass));
- ShouldPrint = true;
- }
- if (UseVZeroUpper) {
+ if (UseVZeroUpper)
addPass(createX86IssueVZeroUpperPass());
- ShouldPrint = true;
- }
if (getOptLevel() != CodeGenOpt::None) {
addPass(createX86PadShortFunctions());
addPass(createX86FixupLEAs());
- ShouldPrint = true;
}
-
- return ShouldPrint;
-}
-
-bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
- JITCodeEmitter &JCE) {
- PM.add(createX86JITCodeEmitterPass(*this, JCE));
-
- return false;
}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 41d51570b9ab..916278cd7de3 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86TARGETMACHINE_H
-#define X86TARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_X86_X86TARGETMACHINE_H
+#define LLVM_LIB_TARGET_X86_X86TARGETMACHINE_H
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
#include "llvm/IR/DataLayout.h"
@@ -23,46 +23,29 @@ namespace llvm {
class StringRef;
class X86TargetMachine final : public LLVMTargetMachine {
- virtual void anchor();
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
X86Subtarget Subtarget;
+ mutable StringMap<std::unique_ptr<X86Subtarget>> SubtargetMap;
+
public:
X86TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
+ ~X86TargetMachine() override;
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
- const X86InstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const TargetFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
- X86JITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
const X86Subtarget *getSubtargetImpl() const override { return &Subtarget; }
- const X86TargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
- const X86RegisterInfo *getRegisterInfo() const override {
- return &getInstrInfo()->getRegisterInfo();
- }
- const InstrItineraryData *getInstrItineraryData() const override {
- return &getSubtargetImpl()->getInstrItineraryData();
- }
+ const X86Subtarget *getSubtargetImpl(const Function &F) const override;
/// \brief Register X86 analysis passes with a pass manager.
void addAnalysisPasses(PassManagerBase &PM) override;
// Set up the pass pipeline.
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
-
- bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) override;
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index 4a10b7ea6b42..6a6988a2e2f6 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_X86_TARGETOBJECTFILE_H
-#define LLVM_TARGET_X86_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_X86_X86TARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_X86_X86TARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index c961e2f5b2c8..67488f7ad791 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -48,8 +48,8 @@ public:
}
X86TTI(const X86TargetMachine *TM)
- : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
- TLI(TM->getTargetLowering()) {
+ : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
+ TLI(TM->getSubtargetImpl()->getTargetLowering()) {
initializeX86TTIPass(*PassRegistry::getPassRegistry());
}
@@ -82,9 +82,10 @@ public:
unsigned getNumberOfRegisters(bool Vector) const override;
unsigned getRegisterBitWidth(bool Vector) const override;
- unsigned getMaximumUnrollFactor() const override;
+ unsigned getMaxInterleaveFactor() const override;
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
- OperandValueKind) const override;
+ OperandValueKind, OperandValueProperties,
+ OperandValueProperties) const override;
unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
int Index, Type *SubTp) const override;
unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
@@ -110,6 +111,8 @@ public:
Type *Ty) const override;
unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) const override;
+ bool isLegalMaskedLoad (Type *DataType, int Consecutive) const override;
+ bool isLegalMaskedStore(Type *DataType, int Consecutive) const override;
/// @}
};
@@ -166,7 +169,7 @@ unsigned X86TTI::getRegisterBitWidth(bool Vector) const {
}
-unsigned X86TTI::getMaximumUnrollFactor() const {
+unsigned X86TTI::getMaxInterleaveFactor() const {
if (ST->isAtom())
return 1;
@@ -178,15 +181,37 @@ unsigned X86TTI::getMaximumUnrollFactor() const {
return 2;
}
-unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind Op1Info,
- OperandValueKind Op2Info) const {
+unsigned X86TTI::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+ OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
+ OperandValueProperties Opd2PropInfo) const {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ if (ISD == ISD::SDIV &&
+ Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+ Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
+ // On X86, vector signed division by constants power-of-two are
+ // normally expanded to the sequence SRA + SRL + ADD + SRA.
+ // The OperandValue properties many not be same as that of previous
+ // operation;conservatively assume OP_None.
+ unsigned Cost =
+ 2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, Op2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::Add, Ty, Op1Info, Op2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+
+ return Cost;
+ }
+
static const CostTblEntry<MVT::SimpleValueType>
AVX2UniformConstCostTable[] = {
{ ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
@@ -202,6 +227,15 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
return LT.first * AVX2UniformConstCostTable[Idx].Cost;
}
+ static const CostTblEntry<MVT::SimpleValueType> AVX512CostTable[] = {
+ { ISD::SHL, MVT::v16i32, 1 },
+ { ISD::SRL, MVT::v16i32, 1 },
+ { ISD::SRA, MVT::v16i32, 1 },
+ { ISD::SHL, MVT::v8i64, 1 },
+ { ISD::SRL, MVT::v8i64, 1 },
+ { ISD::SRA, MVT::v8i64, 1 },
+ };
+
static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
// customize them to detect the cases where shift amount is a scalar one.
@@ -237,6 +271,11 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
{ ISD::UDIV, MVT::v4i64, 4*20 },
};
+ if (ST->hasAVX512()) {
+ int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * AVX512CostTable[Idx].Cost;
+ }
// Look for AVX2 lowering tricks.
if (ST->hasAVX2()) {
if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
@@ -315,7 +354,7 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
{ ISD::SHL, MVT::v8i16, 8*10 }, // Scalarized.
{ ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
{ ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized.
- { ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized.
+ { ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized.
{ ISD::SRL, MVT::v16i8, 16*10 }, // Scalarized.
{ ISD::SRL, MVT::v8i16, 8*10 }, // Scalarized.
@@ -488,7 +527,7 @@ unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or
};
-
+
if (ST->hasSSSE3()) {
int Idx = CostTableLookup(SSSE3AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx != -1)
@@ -501,7 +540,7 @@ unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd
-
+
// This is expanded into a long sequence of four extract + four insert.
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw.
@@ -509,7 +548,7 @@ unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 48}
};
- // Fall-back (SSE3 and SSE2).
+ // Fall-back (SSE3 and SSE2).
int Idx = CostTableLookup(SSEAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx != -1)
return LT.first * SSEAltShuffleTbl[Idx].Cost;
@@ -541,7 +580,7 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
// There are faster sequences for float conversions.
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
- { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
@@ -557,6 +596,45 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
return LTSrc.first * SSE2ConvTbl[Idx].Cost;
}
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ AVX512ConversionTbl[] = {
+ { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 },
+ { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 },
+ { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 },
+ { ISD::FP_ROUND, MVT::v16f32, MVT::v8f64, 3 },
+
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 1 },
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 1 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 1 },
+ { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 },
+ { ISD::TRUNCATE, MVT::v16i32, MVT::v8i64, 4 },
+
+ // v16i1 -> v16i32 - load + broadcast
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
+
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
+
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
+ { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
+ };
+
+ if (ST->hasAVX512()) {
+ int Idx = ConvertCostTableLookup(AVX512ConversionTbl, ISD, LTDest.second,
+ LTSrc.second);
+ if (Idx != -1)
+ return AVX512ConversionTbl[Idx].Cost;
+ }
EVT SrcTy = TLI->getValueType(Src);
EVT DstTy = TLI->getValueType(Dst);
@@ -589,6 +667,11 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 },
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 4 },
+
+ { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 },
+ { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 },
+
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 },
};
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
@@ -715,6 +798,19 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SETCC, MVT::v32i8, 1 },
};
+ static const CostTblEntry<MVT::SimpleValueType> AVX512CostTbl[] = {
+ { ISD::SETCC, MVT::v8i64, 1 },
+ { ISD::SETCC, MVT::v16i32, 1 },
+ { ISD::SETCC, MVT::v8f64, 1 },
+ { ISD::SETCC, MVT::v16f32, 1 },
+ };
+
+ if (ST->hasAVX512()) {
+ int Idx = CostTableLookup(AVX512CostTbl, ISD, MTy);
+ if (Idx != -1)
+ return LT.first * AVX512CostTbl[Idx].Cost;
+ }
+
if (ST->hasAVX2()) {
int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy);
if (Idx != -1)
@@ -836,17 +932,17 @@ unsigned X86TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy,
bool IsPairwise) const {
-
+
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
-
+
MVT MTy = LT.second;
-
+
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
-
- // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
- // and make it as the cost.
-
+
+ // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
+ // and make it as the cost.
+
static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblPairWise[] = {
{ ISD::FADD, MVT::v2f64, 2 },
{ ISD::FADD, MVT::v4f32, 4 },
@@ -854,7 +950,7 @@ unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy,
{ ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
{ ISD::ADD, MVT::v8i16, 5 },
};
-
+
static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblPairWise[] = {
{ ISD::FADD, MVT::v4f32, 4 },
{ ISD::FADD, MVT::v4f64, 5 },
@@ -873,7 +969,7 @@ unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy,
{ ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.3".
{ ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3".
};
-
+
static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblNoPairWise[] = {
{ ISD::FADD, MVT::v4f32, 3 },
{ ISD::FADD, MVT::v4f64, 3 },
@@ -884,14 +980,14 @@ unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy,
{ ISD::ADD, MVT::v8i16, 4 },
{ ISD::ADD, MVT::v8i32, 5 },
};
-
+
if (IsPairwise) {
if (ST->hasAVX()) {
int Idx = CostTableLookup(AVX1CostTblPairWise, ISD, MTy);
if (Idx != -1)
return LT.first * AVX1CostTblPairWise[Idx].Cost;
}
-
+
if (ST->hasSSE42()) {
int Idx = CostTableLookup(SSE42CostTblPairWise, ISD, MTy);
if (Idx != -1)
@@ -903,7 +999,7 @@ unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy,
if (Idx != -1)
return LT.first * AVX1CostTblNoPairWise[Idx].Cost;
}
-
+
if (ST->hasSSE42()) {
int Idx = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy);
if (Idx != -1)
@@ -1062,3 +1158,19 @@ unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
}
return X86TTI::getIntImmCost(Imm, Ty);
}
+
+bool X86TTI::isLegalMaskedLoad(Type *DataTy, int Consecutive) const {
+ int DataWidth = DataTy->getPrimitiveSizeInBits();
+
+ // Todo: AVX512 allows gather/scatter, works with strided and random as well
+ if ((DataWidth < 32) || (Consecutive == 0))
+ return false;
+ if (ST->hasAVX512() || ST->hasAVX2())
+ return true;
+ return false;
+}
+
+bool X86TTI::isLegalMaskedStore(Type *DataType, int Consecutive) const {
+ return isLegalMaskedLoad(DataType, Consecutive);
+}
+
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 0bb5f990cae7..5c01f3e1ff63 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -250,20 +250,24 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
const X86Subtarget &ST = MF.getTarget().getSubtarget<X86Subtarget>();
if (!ST.hasAVX() || ST.hasAVX512())
return false;
- TII = MF.getTarget().getInstrInfo();
+ TII = MF.getSubtarget().getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
EverMadeChange = false;
+ bool FnHasLiveInYmm = checkFnHasLiveInYmm(MRI);
+
// Fast check: if the function doesn't use any ymm registers, we don't need
// to insert any VZEROUPPER instructions. This is constant-time, so it is
// cheap in the common case of no ymm use.
- bool YMMUsed = false;
- const TargetRegisterClass *RC = &X86::VR256RegClass;
- for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end();
- i != e; i++) {
- if (!MRI.reg_nodbg_empty(*i)) {
- YMMUsed = true;
- break;
+ bool YMMUsed = FnHasLiveInYmm;
+ if (!YMMUsed) {
+ const TargetRegisterClass *RC = &X86::VR256RegClass;
+ for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e;
+ i++) {
+ if (!MRI.reg_nodbg_empty(*i)) {
+ YMMUsed = true;
+ break;
+ }
}
}
if (!YMMUsed) {
@@ -282,7 +286,7 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
// If any YMM regs are live in to this function, add the entry block to the
// DirtySuccessors list
- if (checkFnHasLiveInYmm(MRI))
+ if (FnHasLiveInYmm)
addDirtySuccessor(MF.front());
// Re-visit all blocks that are successors of EXITS_DIRTY bsocks. Add
diff --git a/lib/Target/XCore/Disassembler/LLVMBuild.txt b/lib/Target/XCore/Disassembler/LLVMBuild.txt
index 028de2cb3433..cc30d0400c4b 100644
--- a/lib/Target/XCore/Disassembler/LLVMBuild.txt
+++ b/lib/Target/XCore/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = XCoreDisassembler
parent = XCore
-required_libraries = MC Support XCoreInfo
+required_libraries = MCDisassembler Support XCoreInfo
add_to_library_groups = XCore
diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index 7fef7960a8ff..640e6b09d64f 100644
--- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -19,7 +19,6 @@
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -36,47 +35,35 @@ public:
XCoreDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
MCDisassembler(STI, Ctx) {}
- /// \brief See MCDisassembler.
- virtual DecodeStatus getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const override;
-
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
};
}
-static bool readInstruction16(const MemoryObject &region,
- uint64_t address,
- uint64_t &size,
- uint16_t &insn) {
- uint8_t Bytes[4];
-
+static bool readInstruction16(ArrayRef<uint8_t> Bytes, uint64_t Address,
+ uint64_t &Size, uint16_t &Insn) {
// We want to read exactly 2 Bytes of data.
- if (region.readBytes(address, 2, Bytes) == -1) {
- size = 0;
+ if (Bytes.size() < 2) {
+ Size = 0;
return false;
}
// Encoded as a little-endian 16-bit word in the stream.
- insn = (Bytes[0] << 0) | (Bytes[1] << 8);
+ Insn = (Bytes[0] << 0) | (Bytes[1] << 8);
return true;
}
-static bool readInstruction32(const MemoryObject &region,
- uint64_t address,
- uint64_t &size,
- uint32_t &insn) {
- uint8_t Bytes[4];
-
+static bool readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address,
+ uint64_t &Size, uint32_t &Insn) {
// We want to read exactly 4 Bytes of data.
- if (region.readBytes(address, 4, Bytes) == -1) {
- size = 0;
+ if (Bytes.size() < 4) {
+ Size = 0;
return false;
}
// Encoded as a little-endian 32-bit word in the stream.
- insn = (Bytes[0] << 0) | (Bytes[1] << 8) | (Bytes[2] << 16) |
- (Bytes[3] << 24);
+ Insn =
+ (Bytes[0] << 0) | (Bytes[1] << 8) | (Bytes[2] << 16) | (Bytes[3] << 24);
return true;
}
@@ -748,16 +735,12 @@ DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-MCDisassembler::DecodeStatus
-XCoreDisassembler::getInstruction(MCInst &instr,
- uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &vStream,
- raw_ostream &cStream) const {
+MCDisassembler::DecodeStatus XCoreDisassembler::getInstruction(
+ MCInst &instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &vStream, raw_ostream &cStream) const {
uint16_t insn16;
- if (!readInstruction16(Region, Address, Size, insn16)) {
+ if (!readInstruction16(Bytes, Address, Size, insn16)) {
return Fail;
}
@@ -771,7 +754,7 @@ XCoreDisassembler::getInstruction(MCInst &instr,
uint32_t insn32;
- if (!readInstruction32(Region, Address, Size, insn32)) {
+ if (!readInstruction32(Bytes, Address, Size, insn32)) {
return Fail;
}
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
index 98e7c98653f8..78521fd1fd6c 100644
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
@@ -13,8 +13,8 @@
///
//===----------------------------------------------------------------------===//
-#ifndef XCOREINSTPRINTER_H
-#define XCOREINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_XCORE_INSTPRINTER_XCOREINSTPRINTER_H
+#define LLVM_LIB_TARGET_XCORE_INSTPRINTER_XCOREINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
index 56659117afc7..f2d2b37d6f21 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
@@ -28,7 +28,6 @@ XCoreMCAsmInfo::XCoreMCAsmInfo(StringRef TT) {
ProtectedVisibilityAttr = MCSA_Invalid;
// Debug
- HasLEB128 = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
DwarfRegNumForCFI = true;
}
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
index da2689a6400c..26df211eecee 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCORETARGETASMINFO_H
-#define XCORETARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCASMINFO_H
+#define LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCASMINFO_H
#include "llvm/MC/MCAsmInfoELF.h"
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index d54e94fb7d97..4073549bc6e2 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -99,10 +99,10 @@ class XCoreTargetAsmStreamer : public XCoreTargetStreamer {
formatted_raw_ostream &OS;
public:
XCoreTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
- virtual void emitCCTopData(StringRef Name) override;
- virtual void emitCCTopFunction(StringRef Name) override;
- virtual void emitCCBottomData(StringRef Name) override;
- virtual void emitCCBottomFunction(StringRef Name) override;
+ void emitCCTopData(StringRef Name) override;
+ void emitCCTopFunction(StringRef Name) override;
+ void emitCCBottomData(StringRef Name) override;
+ void emitCCBottomFunction(StringRef Name) override;
};
XCoreTargetAsmStreamer::XCoreTargetAsmStreamer(MCStreamer &S,
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
index a255adb2e0f2..0ff5961b1443 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCOREMCTARGETDESC_H
-#define XCOREMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H
+#define LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H
namespace llvm {
class Target;
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
index d707edc8735f..140ba2a4a80e 100644
--- a/lib/Target/XCore/XCore.h
+++ b/lib/Target/XCore/XCore.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef TARGET_XCORE_H
-#define TARGET_XCORE_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCORE_H
+#define LLVM_LIB_TARGET_XCORE_XCORE_H
#include "MCTargetDesc/XCoreMCTargetDesc.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index e98d4f933dfb..82e4e3690b48 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -117,7 +117,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
EmitSpecialLLVMGlobal(GV))
return;
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
OutStreamer.SwitchSection(
getObjFileLowering().SectionForGlobal(GV, *Mang, TM));
@@ -210,7 +210,7 @@ printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
raw_ostream &O) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
const MachineOperand &MO = MI->getOperand(opNum);
switch (MO.getType()) {
case MachineOperand::MO_Register:
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index e6947369c2d7..7c743401ae1d 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -16,6 +16,7 @@
#include "XCore.h"
#include "XCoreInstrInfo.h"
#include "XCoreMachineFunctionInfo.h"
+#include "XCoreSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -226,7 +227,7 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineModuleInfo *MMI = &MF.getMMI();
const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo();
const XCoreInstrInfo &TII =
- *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo());
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
@@ -262,7 +263,8 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
MBB.addLiveIn(XCore::LR);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opcode));
MIB.addImm(Adjusted);
- MIB->addRegisterKilled(XCore::LR, MF.getTarget().getRegisterInfo(), true);
+ MIB->addRegisterKilled(XCore::LR, MF.getSubtarget().getRegisterInfo(),
+ true);
if (emitFrameMoves) {
EmitDefCfaOffset(MBB, MBBI, dl, TII, MMI, Adjusted*4);
unsigned DRegNum = MRI->getDwarfRegNum(XCore::LR, true);
@@ -310,11 +312,10 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
if (emitFrameMoves) {
// Frame moves for callee saved.
- auto SpillLabels = XFI->getSpillLabels();
- for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) {
- MachineBasicBlock::iterator Pos = SpillLabels[I].first;
+ for (const auto &SpillLabel : XFI->getSpillLabels()) {
+ MachineBasicBlock::iterator Pos = SpillLabel.first;
++Pos;
- CalleeSavedInfo &CSI = SpillLabels[I].second;
+ const CalleeSavedInfo &CSI = SpillLabel.second;
int Offset = MFI->getObjectOffset(CSI.getFrameIdx());
unsigned DRegNum = MRI->getDwarfRegNum(CSI.getReg(), true);
EmitCfiOffset(MBB, Pos, dl, TII, MMI, DRegNum, Offset);
@@ -323,7 +324,8 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
// The unwinder requires stack slot & CFI offsets for the exception info.
// We do not save/spill these registers.
SmallVector<StackSlotInfo,2> SpillList;
- GetEHSpillList(SpillList, MFI, XFI, MF.getTarget().getTargetLowering());
+ GetEHSpillList(SpillList, MFI, XFI,
+ MF.getSubtarget().getTargetLowering());
assert(SpillList.size()==2 && "Unexpected SpillList size");
EmitCfiOffset(MBB, MBBI, dl, TII, MMI,
MRI->getDwarfRegNum(SpillList[0].Reg, true),
@@ -340,7 +342,7 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
const XCoreInstrInfo &TII =
- *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo());
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
DebugLoc dl = MBBI->getDebugLoc();
unsigned RetOpcode = MBBI->getOpcode();
@@ -355,7 +357,7 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
// 'Restore' the exception info the unwinder has placed into the stack
// slots.
SmallVector<StackSlotInfo,2> SpillList;
- GetEHSpillList(SpillList, MFI, XFI, MF.getTarget().getTargetLowering());
+ GetEHSpillList(SpillList, MFI, XFI, MF.getSubtarget().getTargetLowering());
RestoreSpillList(MBB, MBBI, dl, TII, RemainingAdj, SpillList);
// Return to the landing pad.
@@ -413,7 +415,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
MachineFunction *MF = MBB.getParent();
- const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
XCoreFunctionInfo *XFI = MF->getInfo<XCoreFunctionInfo>();
bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF);
@@ -446,7 +448,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const{
MachineFunction *MF = MBB.getParent();
- const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
bool AtStart = MI == MBB.begin();
MachineBasicBlock::iterator BeforeI = MI;
if (!AtStart)
@@ -479,7 +481,7 @@ void XCoreFrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const XCoreInstrInfo &TII =
- *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo());
if (!hasReservedCallFrame(MF)) {
// Turn the adjcallstackdown instruction into 'extsp <amt>' and the
// adjcallstackup instruction into 'ldaw sp, sp[<amt>]'
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index e4f806a4520c..7b169c2b8cdd 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCOREFRAMEINFO_H
-#define XCOREFRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCOREFRAMELOWERING_H
+#define LLVM_LIB_TARGET_XCORE_XCOREFRAMELOWERING_H
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -59,4 +59,4 @@ namespace llvm {
};
}
-#endif // XCOREFRAMEINFO_H
+#endif
diff --git a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
index 30c7b59098dd..77292c4f8f52 100644
--- a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
+++ b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
@@ -13,6 +13,7 @@
#include "XCore.h"
#include "XCoreInstrInfo.h"
+#include "XCoreSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -43,7 +44,7 @@ FunctionPass *llvm::createXCoreFrameToArgsOffsetEliminationPass() {
bool XCoreFTAOElim::runOnMachineFunction(MachineFunction &MF) {
const XCoreInstrInfo &TII =
- *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo());
unsigned StackSize = MF.getFrameInfo()->getStackSize();
for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
++MFI) {
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index be7ef6420193..51e4d036fe91 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -69,7 +69,7 @@ getTargetNodeName(unsigned Opcode) const
}
XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM)
- : TargetLowering(TM, new XCoreTargetObjectFile()), TM(TM),
+ : TargetLowering(TM), TM(TM),
Subtarget(TM.getSubtarget<XCoreSubtarget>()) {
// Set up the register classes.
@@ -127,12 +127,14 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM)
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
// Loads
- setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Expand);
+ }
// Custom expand misaligned loads / stores.
setOperationAction(ISD::LOAD, MVT::i32, Custom);
@@ -426,7 +428,9 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
"Unexpected extension type");
assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT");
- if (allowsUnalignedMemoryAccesses(LD->getMemoryVT()))
+ if (allowsMisalignedMemoryAccesses(LD->getMemoryVT(),
+ LD->getAddressSpace(),
+ LD->getAlignment()))
return SDValue();
unsigned ABIAlignment = getDataLayout()->
@@ -461,14 +465,15 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (LD->getAlignment() == 2) {
SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain,
BasePtr, LD->getPointerInfo(), MVT::i16,
- LD->isVolatile(), LD->isNonTemporal(), 2);
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), 2);
SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(2, MVT::i32));
SDValue High = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
HighAddr,
LD->getPointerInfo().getWithOffset(2),
MVT::i16, LD->isVolatile(),
- LD->isNonTemporal(), 2);
+ LD->isNonTemporal(), LD->isInvariant(), 2);
SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High,
DAG.getConstant(16, MVT::i32));
SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Low, HighShifted);
@@ -504,7 +509,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
StoreSDNode *ST = cast<StoreSDNode>(Op);
assert(!ST->isTruncatingStore() && "Unexpected store type");
assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT");
- if (allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ if (allowsMisalignedMemoryAccesses(ST->getMemoryVT(),
+ ST->getAddressSpace(),
+ ST->getAlignment())) {
return SDValue();
}
unsigned ABIAlignment = getDataLayout()->
@@ -800,7 +807,8 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
return SDValue();
MachineFunction &MF = DAG.getMachineFunction();
- const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *RegInfo =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op),
RegInfo->getFrameRegister(MF), MVT::i32);
}
@@ -846,7 +854,8 @@ LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
// Absolute SP = (FP + FrameToArgs) + Offset
- const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *RegInfo =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
SDValue Stack = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
RegInfo->getFrameRegister(MF), MVT::i32);
SDValue FrameToArgs = DAG.getNode(XCoreISD::FRAME_TO_ARGS_OFFSET, dl,
@@ -969,7 +978,7 @@ LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const {
N->getBasePtr(), N->getPointerInfo(),
N->isVolatile(), N->isNonTemporal(),
N->isInvariant(), N->getAlignment(),
- N->getTBAAInfo(), N->getRanges());
+ N->getAAInfo(), N->getRanges());
}
if (N->getMemoryVT() == MVT::i16) {
if (N->getAlignment() < 2)
@@ -977,13 +986,13 @@ LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const {
return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), MVT::i32, N->getChain(),
N->getBasePtr(), N->getPointerInfo(), MVT::i16,
N->isVolatile(), N->isNonTemporal(),
- N->getAlignment(), N->getTBAAInfo());
+ N->isInvariant(), N->getAlignment(), N->getAAInfo());
}
if (N->getMemoryVT() == MVT::i8)
return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), MVT::i32, N->getChain(),
N->getBasePtr(), N->getPointerInfo(), MVT::i8,
N->isVolatile(), N->isNonTemporal(),
- N->getAlignment(), N->getTBAAInfo());
+ N->isInvariant(), N->getAlignment(), N->getAAInfo());
return SDValue();
}
@@ -999,7 +1008,7 @@ LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const {
return DAG.getStore(N->getChain(), SDLoc(Op), N->getVal(),
N->getBasePtr(), N->getPointerInfo(),
N->isVolatile(), N->isNonTemporal(),
- N->getAlignment(), N->getTBAAInfo());
+ N->getAlignment(), N->getAAInfo());
}
if (N->getMemoryVT() == MVT::i16) {
if (N->getAlignment() < 2)
@@ -1007,13 +1016,13 @@ LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const {
return DAG.getTruncStore(N->getChain(), SDLoc(Op), N->getVal(),
N->getBasePtr(), N->getPointerInfo(), MVT::i16,
N->isVolatile(), N->isNonTemporal(),
- N->getAlignment(), N->getTBAAInfo());
+ N->getAlignment(), N->getAAInfo());
}
if (N->getMemoryVT() == MVT::i8)
return DAG.getTruncStore(N->getChain(), SDLoc(Op), N->getVal(),
N->getBasePtr(), N->getPointerInfo(), MVT::i8,
N->isVolatile(), N->isNonTemporal(),
- N->getAlignment(), N->getTBAAInfo());
+ N->getAlignment(), N->getAAInfo());
return SDValue();
}
@@ -1118,8 +1127,8 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
// The ABI dictates there should be one stack slot available to the callee
// on function entry (for saving lr).
@@ -1129,8 +1138,8 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
SmallVector<CCValAssign, 16> RVLocs;
// Analyze return values to determine the number of bytes of stack required.
- CCState RetCCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState RetCCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
RetCCInfo.AllocateStack(CCInfo.getNextStackOffset(), 4);
RetCCInfo.AnalyzeCallResult(Ins, RetCC_XCore);
@@ -1284,8 +1293,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_XCore);
@@ -1443,7 +1452,7 @@ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
if (!CCInfo.CheckReturn(Outs, RetCC_XCore))
return false;
if (CCInfo.getNextStackOffset() != 0 && isVarArg)
@@ -1467,8 +1476,8 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
// Analyze return values.
if (!isVarArg)
@@ -1541,7 +1550,8 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
MachineBasicBlock *
XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ const TargetInstrInfo &TII =
+ *getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
assert((MI->getOpcode() == XCore::SELECT_CC) &&
"Unexpected instr type to insert");
@@ -1803,7 +1813,9 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
// Replace unaligned store of unaligned load with memmove.
StoreSDNode *ST = cast<StoreSDNode>(N);
if (!DCI.isBeforeLegalize() ||
- allowsUnalignedMemoryAccesses(ST->getMemoryVT()) ||
+ allowsMisalignedMemoryAccesses(ST->getMemoryVT(),
+ ST->getAddressSpace(),
+ ST->getAlignment()) ||
ST->isVolatile() || ST->isIndexed()) {
break;
}
@@ -1912,7 +1924,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
if (Ty->getTypeID() == Type::VoidTyID)
return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
unsigned Size = TD->getTypeAllocSize(Ty);
if (AM.BaseGV) {
return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 &&
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 62b89c348dc7..13154c640fd1 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCOREISELLOWERING_H
-#define XCOREISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCOREISELLOWERING_H
+#define LLVM_LIB_TARGET_XCORE_XCOREISELLOWERING_H
#include "XCore.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -215,4 +215,4 @@ namespace llvm {
};
}
-#endif // XCOREISELLOWERING_H
+#endif
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index 36ea9a087da5..c310aa3a179f 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -446,16 +446,19 @@ MachineBasicBlock::iterator XCoreInstrInfo::loadImmediate(
dl = MI->getDebugLoc();
if (isImmMskBitp(Value)) {
int N = Log2_32(Value) + 1;
- return BuildMI(MBB, MI, dl, get(XCore::MKMSK_rus), Reg).addImm(N);
+ return BuildMI(MBB, MI, dl, get(XCore::MKMSK_rus), Reg)
+ .addImm(N)
+ .getInstr();
}
if (isImmU16(Value)) {
int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6;
- return BuildMI(MBB, MI, dl, get(Opcode), Reg).addImm(Value);
+ return BuildMI(MBB, MI, dl, get(Opcode), Reg).addImm(Value).getInstr();
}
MachineConstantPool *ConstantPool = MBB.getParent()->getConstantPool();
const Constant *C = ConstantInt::get(
Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Value);
unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
return BuildMI(MBB, MI, dl, get(XCore::LDWCP_lru6), Reg)
- .addConstantPoolIndex(Idx);
+ .addConstantPoolIndex(Idx)
+ .getInstr();
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index e0be96b6ad4d..60bb3f8c39af 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCOREINSTRUCTIONINFO_H
-#define XCOREINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCOREINSTRINFO_H
+#define LLVM_LIB_TARGET_XCORE_XCOREINSTRINFO_H
#include "XCoreRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 00cb705c8d3b..8e9bb4525600 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -381,7 +381,7 @@ def Int_MemBarrier : PseudoInstXCore<(outs), (ins), "#MEMBARRIER",
// Three operand short
defm ADD : F3R_2RUS<0b00010, 0b10010, "add", add>;
defm SUB : F3R_2RUS<0b00011, 0b10011, "sub", sub>;
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
defm EQ : F3R_2RUS_np<0b00110, 0b10110, "eq">;
def LSS_3r : F3R_np<0b11000, "lss">;
def LSU_3r : F3R_np<0b11001, "lsu">;
@@ -412,7 +412,7 @@ def STW_l3r : _FL3R<0b000001100, (outs),
(ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
"stw $val, $addr[$offset]", []>;
-def STW_2rus : _F2RUS<0b0000, (outs),
+def STW_2rus : _F2RUS<0b00000, (outs),
(ins GRRegs:$val, GRRegs:$addr, i32imm:$offset),
"stw $val, $addr[$offset]", []>;
}
@@ -432,7 +432,7 @@ def LDAWF_l3r : _FL3R<0b000111100, (outs GRRegs:$dst),
[(set GRRegs:$dst,
(ldawf GRRegs:$addr, GRRegs:$offset))]>;
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def LDAWF_l2rus : _FL2RUS<0b100111100, (outs GRRegs:$dst),
(ins GRRegs:$addr, i32imm:$offset),
"ldaw $dst, $addr[$offset]", []>;
@@ -443,7 +443,7 @@ def LDAWB_l3r : _FL3R<0b001001100, (outs GRRegs:$dst),
[(set GRRegs:$dst,
(ldawb GRRegs:$addr, GRRegs:$offset))]>;
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def LDAWB_l2rus : _FL2RUS<0b101001100, (outs GRRegs:$dst),
(ins GRRegs:$addr, i32imm:$offset),
"ldaw $dst, $addr[-$offset]", []>;
@@ -538,7 +538,7 @@ def LMUL_l6r : _FL6R<
// Register - U6
//let Uses = [DP] in ...
-let neverHasSideEffects = 1, isReMaterializable = 1 in
+let hasSideEffects = 0, isReMaterializable = 1 in
def LDAWDP_ru6: _FRU6<0b011000, (outs RRegs:$a), (ins i32imm:$b),
"ldaw $a, dp[$b]", []>;
@@ -564,7 +564,7 @@ def STWDP_lru6 : _FLRU6<0b010100, (outs), (ins RRegs:$a, i32imm:$b),
[(store RRegs:$a, (dprelwrapper tglobaladdr:$b))]>;
//let Uses = [CP] in ..
-let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in {
+let mayLoad = 1, isReMaterializable = 1, hasSideEffects = 0 in {
def LDWCP_ru6 : _FRU6<0b011011, (outs RRegs:$a), (ins i32imm:$b),
"ldw $a, cp[$b]", []>;
def LDWCP_lru6: _FLRU6<0b011011, (outs RRegs:$a), (ins i32imm:$b),
@@ -593,7 +593,7 @@ def LDWSP_lru6 : _FLRU6<0b010111, (outs RRegs:$a), (ins i32imm:$b),
[(set RRegs:$a, (XCoreLdwsp immU16:$b))]>;
}
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def LDAWSP_ru6 : _FRU6<0b011001, (outs RRegs:$a), (ins i32imm:$b),
"ldaw $a, sp[$b]", []>;
@@ -628,7 +628,7 @@ defm BRBF: FRU6_LRU6_backwards_branch<0b011111, "bf">;
// U6
let Defs = [SP], Uses = [SP] in {
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
defm EXTSP : FU6_LU6_np<0b0111011110, "extsp">;
let mayStore = 1 in
@@ -639,7 +639,7 @@ defm RETSP : FU6_LU6<0b0111011111, "retsp", XCoreRetsp>;
}
}
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
defm EXTDP : FU6_LU6_np<0b0111001110, "extdp">;
let Uses = [R11], isCall=1 in
@@ -656,7 +656,7 @@ def BRFU_lu6 : _FLU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>;
}
//let Uses = [CP] in ...
-let Defs = [R11], neverHasSideEffects = 1, isReMaterializable = 1 in
+let Defs = [R11], hasSideEffects = 0, isReMaterializable = 1 in
def LDAWCP_u6: _FU6<0b0111111101, (outs), (ins i32imm:$a), "ldaw r11, cp[$a]",
[]>;
@@ -690,17 +690,17 @@ defm KRESTSP : FU6_LU6_np<0b0111101111, "krestsp">;
// U10
let Defs = [R11], isReMaterializable = 1 in {
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def LDAPF_u10 : _FU10<0b110110, (outs), (ins pcrel_imm:$a), "ldap r11, $a", []>;
def LDAPF_lu10 : _FLU10<0b110110, (outs), (ins pcrel_imm:$a), "ldap r11, $a",
[(set R11, (pcrelwrapper tglobaladdr:$a))]>;
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def LDAPB_u10 : _FU10<0b110111, (outs), (ins pcrel_imm_neg:$a), "ldap r11, $a",
[]>;
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def LDAPB_lu10 : _FLU10<0b110111, (outs), (ins pcrel_imm_neg:$a),
"ldap r11, $a",
[(set R11, (pcrelwrapper tglobaladdr:$a))]>;
@@ -729,7 +729,7 @@ def BLRB_lu10 : _FLU10<0b110101, (outs), (ins pcrel_imm_neg:$a), "bl $a", []>;
}
let Defs = [R11], mayLoad = 1, isReMaterializable = 1,
- neverHasSideEffects = 1 in {
+ hasSideEffects = 0 in {
def LDWCP_u10 : _FU10<0b111001, (outs), (ins i32imm:$a), "ldw r11, cp[$a]", []>;
def LDWCP_lu10 : _FLU10<0b111001, (outs), (ins i32imm:$a), "ldw r11, cp[$a]",
@@ -772,7 +772,7 @@ def ANDNOT_2r :
[(set GRRegs:$dst, (and GRRegs:$src1, (not GRRegs:$src2)))]>;
}
-let isReMaterializable = 1, neverHasSideEffects = 1 in
+let isReMaterializable = 1, hasSideEffects = 0 in
def MKMSK_rus : _FRUSBitp<0b101001, (outs GRRegs:$dst), (ins i32imm:$size),
"mkmsk $dst, $size", []>;
@@ -902,7 +902,7 @@ def BYTEREV_l2r : _FL2R<0b0000011001, (outs GRRegs:$dst), (ins GRRegs:$src),
"byterev $dst, $src",
[(set GRRegs:$dst, (bswap GRRegs:$src))]>;
-def CLZ_l2r : _FL2R<0b000111000, (outs GRRegs:$dst), (ins GRRegs:$src),
+def CLZ_l2r : _FL2R<0b0000111000, (outs GRRegs:$dst), (ins GRRegs:$src),
"clz $dst, $src",
[(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
@@ -972,13 +972,13 @@ def BR_JT32 : PseudoInstXCore<(outs), (ins InlineJT32:$t, GRRegs:$i),
let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
def BRU_1r : _F1R<0b001010, (outs), (ins GRRegs:$a), "bru $a", []>;
-let Defs=[SP], neverHasSideEffects=1 in
+let Defs=[SP], hasSideEffects=0 in
def SETSP_1r : _F1R<0b001011, (outs), (ins GRRegs:$a), "set sp, $a", []>;
-let neverHasSideEffects=1 in
+let hasSideEffects=0 in
def SETDP_1r : _F1R<0b001100, (outs), (ins GRRegs:$a), "set dp, $a", []>;
-let neverHasSideEffects=1 in
+let hasSideEffects=0 in
def SETCP_1r : _F1R<0b001101, (outs), (ins GRRegs:$a), "set cp, $a", []>;
let hasCtrlDep = 1 in
diff --git a/lib/Target/XCore/XCoreMCInstLower.h b/lib/Target/XCore/XCoreMCInstLower.h
index 28e702bb9884..569147872f23 100644
--- a/lib/Target/XCore/XCoreMCInstLower.h
+++ b/lib/Target/XCore/XCoreMCInstLower.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCOREMCINSTLOWER_H
-#define XCOREMCINSTLOWER_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCOREMCINSTLOWER_H
+#define LLVM_LIB_TARGET_XCORE_XCOREMCINSTLOWER_H
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/Support/Compiler.h"
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
index 212a5cfecc30..078ffde18fb9 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCOREMACHINEFUNCTIONINFO_H
-#define XCOREMACHINEFUNCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCOREMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_XCORE_XCOREMACHINEFUNCTIONINFO_H
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -103,4 +103,4 @@ public:
};
} // End llvm namespace
-#endif // XCOREMACHINEFUNCTIONINFO_H
+#endif
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 316c82c66a47..5c666ae59fb5 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -15,6 +15,7 @@
#include "XCore.h"
#include "XCoreInstrInfo.h"
#include "XCoreMachineFunctionInfo.h"
+#include "XCoreSubtarget.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -98,7 +99,7 @@ static void InsertFPConstInst(MachineBasicBlock::iterator II,
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc dl = MI.getDebugLoc();
unsigned ScratchOffset = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0);
- RS->setUsed(ScratchOffset);
+ RS->setRegUsed(ScratchOffset);
TII.loadImmediate(MBB, II, ScratchOffset, Offset);
switch (MI.getOpcode()) {
@@ -170,12 +171,12 @@ static void InsertSPConstInst(MachineBasicBlock::iterator II,
unsigned ScratchBase;
if (OpCode==XCore::STWFI) {
ScratchBase = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0);
- RS->setUsed(ScratchBase);
+ RS->setRegUsed(ScratchBase);
} else
ScratchBase = Reg;
BuildMI(MBB, II, dl, TII.get(XCore::LDAWSP_ru6), ScratchBase).addImm(0);
unsigned ScratchOffset = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0);
- RS->setUsed(ScratchOffset);
+ RS->setRegUsed(ScratchOffset);
TII.loadImmediate(MBB, II, ScratchOffset, Offset);
switch (OpCode) {
@@ -221,7 +222,7 @@ const MCPhysReg* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF
XCore::R8, XCore::R9,
0
};
- const TargetFrameLowering *TFI = MF->getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
if (TFI->hasFP(*MF))
return CalleeSavedRegsFP;
return CalleeSavedRegs;
@@ -229,7 +230,7 @@ const MCPhysReg* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF
BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
Reserved.set(XCore::CP);
Reserved.set(XCore::DP);
@@ -267,9 +268,9 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineFunction &MF = *MI.getParent()->getParent();
const XCoreInstrInfo &TII =
- *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo());
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
int StackSize = MF.getFrameInfo()->getStackSize();
@@ -323,7 +324,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
return TFI->hasFP(MF) ? XCore::R10 : XCore::SP;
}
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index aa617a0106d0..5d7721ca2fa0 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCOREREGISTERINFO_H
-#define XCOREREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCOREREGISTERINFO_H
+#define LLVM_LIB_TARGET_XCORE_XCOREREGISTERINFO_H
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
index 91b33fd6559c..a34884480cea 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -33,7 +33,7 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
// Call __memcpy_4 if the src, dst and size are all 4 byte aligned.
if (!AlwaysInline && (Align & 3) == 0 &&
DAG.MaskedValueIsZero(Size, APInt(SizeBitWidth, 3))) {
- const TargetLowering &TLI = *DAG.getTarget().getTargetLowering();
+ const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering();
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Ty = TLI.getDataLayout()->getIntPtrType(*DAG.getContext());
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h
index 0079de1798b2..cfd80b3f3172 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.h
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCORESELECTIONDAGINFO_H
-#define XCORESELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCORESELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_XCORE_XCORESELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
index 1e9810bb89ef..695578d5bf33 100644
--- a/lib/Target/XCore/XCoreSubtarget.h
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCORESUBTARGET_H
-#define XCORESUBTARGET_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCORESUBTARGET_H
+#define LLVM_LIB_TARGET_XCORE_XCORESUBTARGET_H
#include "XCoreFrameLowering.h"
#include "XCoreISelLowering.h"
@@ -48,14 +48,20 @@ public:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
- const XCoreFrameLowering *getFrameLowering() const { return &FrameLowering; }
- const XCoreTargetLowering *getTargetLowering() const { return &TLInfo; }
- const XCoreSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
- const TargetRegisterInfo *getRegisterInfo() const {
+ const XCoreInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const XCoreFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const XCoreTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const XCoreSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const TargetRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
- const DataLayout *getDataLayout() const { return &DL; }
+ const DataLayout *getDataLayout() const override { return &DL; }
};
} // End llvm namespace
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 8d8bb3800ea5..21ebf455607d 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "XCoreTargetMachine.h"
+#include "XCoreTargetObjectFile.h"
#include "XCore.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Module.h"
@@ -26,10 +27,13 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ TLOF(make_unique<XCoreTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
+XCoreTargetMachine::~XCoreTargetMachine() {}
+
namespace {
/// XCore Code Generator Pass Configuration Options.
class XCorePassConfig : public TargetPassConfig {
@@ -41,9 +45,10 @@ public:
return getTM<XCoreTargetMachine>();
}
+ void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
- bool addPreEmitPass() override;
+ void addPreEmitPass() override;
};
} // namespace
@@ -51,6 +56,12 @@ TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) {
return new XCorePassConfig(this, PM);
}
+void XCorePassConfig::addIRPasses() {
+ addPass(createAtomicExpandPass(&getXCoreTargetMachine()));
+
+ TargetPassConfig::addIRPasses();
+}
+
bool XCorePassConfig::addPreISel() {
addPass(createXCoreLowerThreadLocalPass());
return false;
@@ -61,9 +72,8 @@ bool XCorePassConfig::addInstSelector() {
return false;
}
-bool XCorePassConfig::addPreEmitPass() {
- addPass(createXCoreFrameToArgsOffsetEliminationPass());
- return false;
+void XCorePassConfig::addPreEmitPass() {
+ addPass(createXCoreFrameToArgsOffsetEliminationPass(), false);
}
// Force static initialization.
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 14c43bf151f4..8ff9269821d3 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCORETARGETMACHINE_H
-#define XCORETARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCORETARGETMACHINE_H
+#define LLVM_LIB_TARGET_XCORE_XCORETARGETMACHINE_H
#include "XCoreSubtarget.h"
#include "llvm/Target/TargetMachine.h"
@@ -20,37 +20,24 @@
namespace llvm {
class XCoreTargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
XCoreSubtarget Subtarget;
public:
XCoreTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
+ ~XCoreTargetMachine() override;
- const XCoreInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const XCoreFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
const XCoreSubtarget *getSubtargetImpl() const override { return &Subtarget; }
- const XCoreTargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const XCoreSelectionDAGInfo* getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
- const TargetRegisterInfo *getRegisterInfo() const override {
- return getSubtargetImpl()->getRegisterInfo();
- }
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
void addAnalysisPasses(PassManagerBase &PM) override;
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
};
} // end namespace llvm
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp
index cfd3302481e7..86d0de654e44 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -145,9 +145,9 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
if (Kind.isMergeableConst16()) return MergeableConst16Section;
}
Type *ObjType = GV->getType()->getPointerElementType();
- if (TM.getCodeModel() == CodeModel::Small ||
- !ObjType->isSized() ||
- TM.getDataLayout()->getTypeAllocSize(ObjType) < CodeModelLargeSize) {
+ if (TM.getCodeModel() == CodeModel::Small || !ObjType->isSized() ||
+ TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(ObjType) <
+ CodeModelLargeSize) {
if (Kind.isReadOnly()) return UseCPRel? ReadOnlySection
: DataRelROSection;
if (Kind.isBSS() || Kind.isCommon())return BSSSection;
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.h b/lib/Target/XCore/XCoreTargetObjectFile.h
index d389e55ae399..7d3f49d222d6 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.h
+++ b/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
-#define LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCORETARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_XCORE_XCORETARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
diff --git a/lib/Target/XCore/XCoreTargetStreamer.h b/lib/Target/XCore/XCoreTargetStreamer.h
index 0a394da98a0e..48bf0fafb480 100644
--- a/lib/Target/XCore/XCoreTargetStreamer.h
+++ b/lib/Target/XCore/XCoreTargetStreamer.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCORETARGETSTREAMER_H
-#define XCORETARGETSTREAMER_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCORETARGETSTREAMER_H
+#define LLVM_LIB_TARGET_XCORE_XCORETARGETSTREAMER_H
#include "llvm/MC/MCStreamer.h"
diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.cpp b/lib/Target/XCore/XCoreTargetTransformInfo.cpp
index 80d193d1c26e..da232dad6336 100644
--- a/lib/Target/XCore/XCoreTargetTransformInfo.cpp
+++ b/lib/Target/XCore/XCoreTargetTransformInfo.cpp
@@ -43,17 +43,17 @@ public:
initializeXCoreTTIPass(*PassRegistry::getPassRegistry());
}
- virtual void initializePass() override {
+ void initializePass() override {
pushTTIStack(this);
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
TargetTransformInfo::getAnalysisUsage(AU);
}
static char ID;
- virtual void *getAdjustedAnalysisPointer(const void *ID) override {
+ void *getAdjustedAnalysisPointer(const void *ID) override {
if (ID == &TargetTransformInfo::ID)
return (TargetTransformInfo*)this;
return this;