summaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp6
-rw-r--r--lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp2
-rw-r--r--lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp2
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp6
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp2
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp6
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp2
-rw-r--r--lib/Target/AArch64/AArch64LegalizerInfo.cpp4
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp2
-rw-r--r--lib/Target/AArch64/AArch64PBQPRegAlloc.cpp2
-rw-r--r--lib/Target/AArch64/AArch64RegisterBankInfo.cpp4
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.cpp2
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp2
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPU.td33
-rw-r--r--lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp6
-rw-r--r--lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp12
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUCallLowering.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp4
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp6
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructionSelector.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp10
-rw-r--r--lib/Target/AMDGPU/AMDGPUMCInstLower.cpp1
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineFunction.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp20
-rw-r--r--lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp353
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterBankInfo.h1
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterInfo.cpp1
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h17
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp18
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp6
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp2
-rw-r--r--lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp106
-rw-r--r--lib/Target/AMDGPU/CMakeLists.txt1
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp7
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h2
-rw-r--r--lib/Target/AMDGPU/GCNHazardRecognizer.cpp2
-rw-r--r--lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp2
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp2
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h422
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp197
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h2
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp2
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp32
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h12
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp2
-rw-r--r--lib/Target/AMDGPU/Processors.td75
-rw-r--r--lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp4
-rw-r--r--lib/Target/AMDGPU/R600EmitClauseMarkers.cpp2
-rw-r--r--lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp2
-rw-r--r--lib/Target/AMDGPU/R600FrameLowering.cpp2
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.cpp2
-rw-r--r--lib/Target/AMDGPU/R600InstrInfo.cpp4
-rw-r--r--lib/Target/AMDGPU/R600MachineScheduler.cpp4
-rw-r--r--lib/Target/AMDGPU/R600Packetizer.cpp2
-rw-r--r--lib/Target/AMDGPU/SIDebuggerInsertNops.cpp2
-rw-r--r--lib/Target/AMDGPU/SIFixSGPRCopies.cpp2
-rw-r--r--lib/Target/AMDGPU/SIFoldOperands.cpp3
-rw-r--r--lib/Target/AMDGPU/SIFrameLowering.cpp2
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp6
-rw-r--r--lib/Target/AMDGPU/SIInsertWaitcnts.cpp2
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp2
-rw-r--r--lib/Target/AMDGPU/SILowerControlFlow.cpp2
-rw-r--r--lib/Target/AMDGPU/SILowerI1Copies.cpp2
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.h2
-rw-r--r--lib/Target/AMDGPU/SIMachineScheduler.cpp2
-rw-r--r--lib/Target/AMDGPU/SIPeepholeSDWA.cpp97
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.cpp62
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.h4
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp43
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h7
-rw-r--r--lib/Target/AMDGPU/VOP3Instructions.td9
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp4
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMCallLowering.cpp16
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp2
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp2
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp2
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp9
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td9
-rw-r--r--lib/Target/ARM/ARMInstructionSelector.cpp70
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp2
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp2
-rw-r--r--lib/Target/ARM/ARMRegisterBankInfo.cpp33
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp4
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp15
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp6
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp19
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp66
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp12
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp2
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp6
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp2
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp20
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp2
-rw-r--r--lib/Target/AVR/AVR.h2
-rw-r--r--lib/Target/AVR/AVRAsmPrinter.cpp2
-rw-r--r--lib/Target/AVR/AVRRegisterInfo.cpp2
-rw-r--r--lib/Target/AVR/AVRSubtarget.cpp2
-rw-r--r--lib/Target/AVR/AVRSubtarget.h3
-rw-r--r--lib/Target/AVR/AVRTargetMachine.cpp4
-rw-r--r--lib/Target/AVR/AVRTargetObjectFile.cpp2
-rw-r--r--lib/Target/AVR/AsmParser/AVRAsmParser.cpp6
-rw-r--r--lib/Target/AVR/Disassembler/AVRDisassembler.cpp4
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp2
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp2
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp2
-rw-r--r--lib/Target/BPF/BPFAsmPrinter.cpp4
-rw-r--r--lib/Target/BPF/BPFInstrInfo.cpp2
-rw-r--r--lib/Target/BPF/BPFRegisterInfo.cpp6
-rw-r--r--lib/Target/BPF/BPFTargetMachine.cpp6
-rw-r--r--lib/Target/BPF/Disassembler/BPFDisassembler.cpp4
-rw-r--r--lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp2
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp2
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp2
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h2
-rw-r--r--lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp6
-rw-r--r--lib/Target/Hexagon/BitTracker.cpp2
-rw-r--r--lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonBitSimplify.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonBitTracker.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonBlockRanges.h2
-rw-r--r--lib/Target/Hexagon/HexagonCommonGEP.cpp16
-rw-r--r--lib/Target/Hexagon/HexagonConstPropagation.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonCopyToCombine.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonEarlyIfConv.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonFixupHwLoops.cpp3
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonGenExtract.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonGenInsert.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonGenMux.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp57
-rw-r--r--lib/Target/Hexagon/HexagonPatterns.td89
-rw-r--r--lib/Target/Hexagon/HexagonSplitDouble.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp36
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp6
-rw-r--r--lib/Target/Hexagon/RDFDeadCode.cpp2
-rw-r--r--lib/Target/Hexagon/RDFGraph.cpp2
-rw-r--r--lib/Target/Hexagon/RDFLiveness.cpp2
-rw-r--r--lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp2
-rw-r--r--lib/Target/Lanai/LanaiTargetObjectFile.cpp2
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp2
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp2
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp2
-rw-r--r--lib/Target/MSP430/MSP430AsmPrinter.cpp2
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp10
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp4
-rw-r--r--lib/Target/Mips/Mips.td3
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.cpp4
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp4
-rw-r--r--lib/Target/Mips/MipsCCState.cpp61
-rw-r--r--lib/Target/Mips/MipsCCState.h33
-rw-r--r--lib/Target/Mips/MipsCallingConv.td10
-rw-r--r--lib/Target/Mips/MipsConstantIslandPass.cpp2
-rw-r--r--lib/Target/Mips/MipsDSPInstrInfo.td5
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp2
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp81
-rw-r--r--lib/Target/Mips/MipsISelLowering.h27
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td18
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td6
-rw-r--r--lib/Target/Mips/MipsMachineFunction.cpp8
-rw-r--r--lib/Target/Mips/MipsOptimizePICCall.cpp2
-rw-r--r--lib/Target/Mips/MipsOs16.cpp2
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp4
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp2
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp2
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp6
-rw-r--r--lib/Target/Mips/MipsSubtarget.h5
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp2
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXGenericToNVVM.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXLowerArgs.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXPeephole.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp2
-rw-r--r--lib/Target/NVPTX/NVVMIntrRange.cpp2
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp6
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp6
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp8
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp10
-rw-r--r--lib/Target/PowerPC/PPCBoolRetToInt.cpp42
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp2
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp2
-rw-r--r--lib/Target/PowerPC/PPCEarlyReturn.cpp2
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp4
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp65
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp10
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td51
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp2
-rw-r--r--lib/Target/PowerPC/PPCMIPeephole.cpp2
-rw-r--r--lib/Target/PowerPC/PPCTLSDynamicCall.cpp2
-rw-r--r--lib/Target/PowerPC/PPCTOCRegDeps.cpp2
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp2
-rw-r--r--lib/Target/PowerPC/PPCVSXCopy.cpp2
-rw-r--r--lib/Target/PowerPC/PPCVSXFMAMutate.cpp2
-rw-r--r--lib/Target/PowerPC/PPCVSXSwapRemoval.cpp2
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp2
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp2
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h2
-rw-r--r--lib/Target/RISCV/RISCVTargetMachine.cpp2
-rw-r--r--lib/Target/Sparc/AsmParser/SparcAsmParser.cpp4
-rw-r--r--lib/Target/Sparc/Disassembler/SparcDisassembler.cpp4
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp2
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp2
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp1
-rw-r--r--lib/Target/Sparc/SparcAsmPrinter.cpp2
-rw-r--r--lib/Target/Sparc/SparcMCInstLower.cpp2
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp4
-rw-r--r--lib/Target/Sparc/SparcTargetObjectFile.cpp2
-rw-r--r--lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp2
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp2
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZHazardRecognizer.h2
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp25
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZLDCleanup.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZShortenInst.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.h2
-rw-r--r--lib/Target/SystemZ/SystemZTDC.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp4
-rw-r--r--lib/Target/Target.cpp4
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp16
-rw-r--r--lib/Target/TargetMachineC.cpp4
-rw-r--r--lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp2
-rw-r--r--lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h2
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp2
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp2
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h2
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h2
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp11
-rw-r--r--lib/Target/WebAssembly/WebAssemblyCFGSort.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFastISel.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegStackify.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyStoreResults.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp4
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp4
-rw-r--r--lib/Target/X86/AsmParser/X86Operand.h2
-rw-r--r--lib/Target/X86/CMakeLists.txt1
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp2
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp8
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp2
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp2
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp2
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp2
-rw-r--r--lib/Target/X86/X86FastISel.cpp6
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp2
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp193
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td41
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp3400
-rw-r--r--lib/Target/X86/X86InstrSSE.td10
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp16
-rw-r--r--lib/Target/X86/X86OptimizeLEAs.cpp2
-rw-r--r--lib/Target/X86/X86SchedHaswell.td33
-rw-r--r--lib/Target/X86/X86SchedSandyBridge.td25
-rw-r--r--lib/Target/X86/X86Schedule.td4
-rw-r--r--lib/Target/X86/X86ScheduleBtVer2.td32
-rw-r--r--lib/Target/X86/X86ScheduleSLM.td27
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp4
-rw-r--r--lib/Target/X86/X86Subtarget.cpp2
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp2
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp4
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp13
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.h4
-rw-r--r--lib/Target/X86/X86WinEHState.cpp2
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp4
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp2
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp2
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h2
-rw-r--r--lib/Target/XCore/XCoreTargetObjectFile.cpp2
326 files changed, 5474 insertions, 1525 deletions
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 981fd22c213c..5ce57926cc03 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -12,13 +12,13 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/AArch64AddressingModes.h"
#include "AArch64.h"
#include "AArch64MCInstLower.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "InstPrinter/AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCExpr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringSwitch.h"
@@ -35,11 +35,11 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index 30e2b2310456..544f67433fd5 100644
--- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -20,8 +20,8 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-dead-defs"
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 33fec74998d6..160107cd7e2b 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -14,9 +14,9 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/AArch64AddressingModes.h"
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 9ac7ecb9cdb4..e8fcf1a0e9b7 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -2827,7 +2827,7 @@ bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
return false;
EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
- if (SrcVT == MVT::f128)
+ if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
return false;
unsigned Opc;
@@ -2854,6 +2854,10 @@ bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
MVT DestVT;
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
return false;
+ // Let regular ISEL handle FP16
+ if (DestVT == MVT::f16)
+ return false;
+
assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
"Unexpected value type.");
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index b18fb30eb2d4..8c2c0a564c30 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -2566,7 +2566,7 @@ bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
// pstatefield for the MSR (immediate) instruction, we also require that an
// immediate value has been provided as an argument, we know that this is
// the case as it has been ensured by semantic checking.
- auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());;
+ auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
if (PMapper) {
assert (isa<ConstantSDNode>(N->getOperand(2))
&& "Expected a constant integer expression.");
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index f798010906cc..059556a560c0 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "AArch64ISelLowering.h"
#include "AArch64CallingConvention.h"
#include "AArch64MachineFunctionInfo.h"
-#include "AArch64ISelLowering.h"
#include "AArch64PerfectShuffle.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
@@ -22,9 +22,9 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
@@ -51,10 +51,10 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Type.h"
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index faf39be9b41e..eea012382150 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -17,8 +17,8 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 6e6daf812295..01196817f311 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -13,12 +13,12 @@
//===----------------------------------------------------------------------===//
#include "AArch64LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/IR/Type.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Type.h"
#include "llvm/Target/TargetOpcodes.h"
using namespace llvm;
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 976498aa70d6..9243eb91cc1a 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -16,10 +16,10 @@
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
index 038162c6f54a..fe4ef4b40ece 100644
--- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
@@ -17,8 +17,8 @@
#define DEBUG_TYPE "aarch64-pbqp"
-#include "AArch64.h"
#include "AArch64PBQPRegAlloc.h"
+#include "AArch64.h"
#include "AArch64RegisterInfo.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index 789270c2a34b..9b3899e0681c 100644
--- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -15,13 +15,13 @@
#include "AArch64RegisterBankInfo.h"
#include "AArch64InstrInfo.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/LowLevelType.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index 12a2e9a867f0..4bc2c060a068 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -9,12 +9,12 @@
#include "AArch64TargetObjectFile.h"
#include "AArch64TargetMachine.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Support/Dwarf.h"
using namespace llvm;
using namespace dwarf;
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 43569af04347..a4328682b93c 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -9,8 +9,8 @@
#include "AArch64TargetTransformInfo.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 449d732a8d44..e841fb894519 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -15,8 +15,8 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index ebf05ae303dd..43a6fa9ce089 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -11,8 +11,9 @@
#include "AArch64RegisterInfo.h"
#include "MCTargetDesc/AArch64FixupKinds.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/MC/MCAssembler.h"
+#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCELFObjectWriter.h"
@@ -22,7 +23,6 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
using namespace llvm;
namespace {
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 10e7241da709..f7dda92fb551 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -15,11 +15,11 @@
#include "MCTargetDesc/AArch64FixupKinds.h"
#include "MCTargetDesc/AArch64MCExpr.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <cstdint>
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 271263507ae1..031aa8b81e35 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -30,7 +31,6 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 3d296ba4806b..19b2576f6895 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -10,6 +10,7 @@
#include "MCTargetDesc/AArch64FixupKinds.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
@@ -23,7 +24,6 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/MachO.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
#include <cstdint>
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index b50e8d1d659e..6ab2b9ef0459 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -447,6 +447,16 @@ class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping,
Implies
>;
+def FeatureISAVersion6_0_0 : SubtargetFeatureISAVersion <6,0,0,
+ [FeatureSouthernIslands,
+ FeatureFastFMAF32,
+ HalfRate64Ops,
+ FeatureLDSBankCount32]>;
+
+def FeatureISAVersion6_0_1 : SubtargetFeatureISAVersion <6,0,1,
+ [FeatureSouthernIslands,
+ FeatureLDSBankCount32]>;
+
def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0,
[FeatureSeaIslands,
FeatureLDSBankCount32]>;
@@ -461,6 +471,10 @@ def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2,
[FeatureSeaIslands,
FeatureLDSBankCount16]>;
+def FeatureISAVersion7_0_3 : SubtargetFeatureISAVersion <7,0,3,
+ [FeatureSeaIslands,
+ FeatureLDSBankCount16]>;
+
def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0,
[FeatureVolcanicIslands,
FeatureLDSBankCount32,
@@ -489,8 +503,23 @@ def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
FeatureLDSBankCount16,
FeatureXNACK]>;
-def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,[]>;
-def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,[]>;
+def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,
+ [FeatureGFX9,
+ FeatureLDSBankCount32]>;
+
+def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,
+ [FeatureGFX9,
+ FeatureLDSBankCount32,
+ FeatureXNACK]>;
+
+def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2,
+ [FeatureGFX9,
+ FeatureLDSBankCount32]>;
+
+def FeatureISAVersion9_0_3 : SubtargetFeatureISAVersion <9,0,3,
+ [FeatureGFX9,
+ FeatureLDSBankCount32,
+ FeatureXNACK]>;
//===----------------------------------------------------------------------===//
// Debugger related subtarget features.
diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index 3c99f48e818a..faa424eb0a64 100644
--- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -10,15 +10,15 @@
/// This is the AMGPU address space based alias analysis pass.
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
+#include "AMDGPU.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/Passes.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index 91b3649f5c39..3c788fa1dcea 100644
--- a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -19,8 +19,8 @@
#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 0959014812d8..83ad1a5c6ee3 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -17,25 +17,25 @@
//
#include "AMDGPUAsmPrinter.h"
-#include "AMDGPUTargetMachine.h"
-#include "MCTargetDesc/AMDGPUTargetStreamer.h"
-#include "InstPrinter/AMDGPUInstPrinter.h"
-#include "Utils/AMDGPUBaseInfo.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
+#include "AMDGPUTargetMachine.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "R600Defines.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
#include "SIDefines.h"
-#include "SIMachineFunctionInfo.h"
#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index e5adeeb465e1..0a58ce06704d 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -15,8 +15,8 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
-#include "AMDKernelCodeT.h"
#include "AMDGPU.h"
+#include "AMDKernelCodeT.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include <cstddef>
diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index e67ae092fdda..515cc07dd449 100644
--- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -18,8 +18,8 @@
#include "AMDGPUISelLowering.h"
#include "AMDGPUSubtarget.h"
#include "SIISelLowering.h"
-#include "SIRegisterInfo.h"
#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index d923cb117c12..b312dbc8d14d 100644
--- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -25,13 +25,13 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 19fce064783d..251c2f9bb25a 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -13,15 +13,15 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUISelLowering.h" // For AMDGPUISD
#include "AMDGPUInstrInfo.h"
#include "AMDGPURegisterInfo.h"
-#include "AMDGPUISelLowering.h" // For AMDGPUISD
#include "AMDGPUSubtarget.h"
#include "SIDefines.h"
-#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 723e8a7b54e2..5586b513b5fc 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -21,6 +21,7 @@
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "R600MachineFunctionInfo.h"
+#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -30,7 +31,6 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/Support/KnownBits.h"
-#include "SIInstrInfo.h"
using namespace llvm;
static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 12caa5118342..41cc7d7093ec 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -17,8 +17,8 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
#include "AMDGPU.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "AMDGPUGenInstrInfo.inc"
diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index c87102e55dfb..ef845f44d365 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -15,9 +15,9 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
#include "AMDGPU.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
namespace llvm {
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 267f4807a788..b889788c3426 100644
--- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -14,10 +14,10 @@
#include "AMDGPULegalizerInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/Type.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/Target/TargetOpcodes.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetOpcodes.h"
using namespace llvm;
@@ -47,12 +47,18 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo() {
setAction({G_GEP, P2}, Legal);
setAction({G_GEP, 1, S64}, Legal);
+ setAction({G_ICMP, S1}, Legal);
+ setAction({G_ICMP, 1, S32}, Legal);
+
setAction({G_LOAD, P1}, Legal);
setAction({G_LOAD, P2}, Legal);
setAction({G_LOAD, S32}, Legal);
setAction({G_LOAD, 1, P1}, Legal);
setAction({G_LOAD, 1, P2}, Legal);
+ setAction({G_SELECT, S32}, Legal);
+ setAction({G_SELECT, 1, S1}, Legal);
+
setAction({G_STORE, S32}, Legal);
setAction({G_STORE, 1, P1}, Legal);
diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index f1ef6281c90f..63dd0d726d91 100644
--- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -38,7 +38,6 @@ using namespace llvm;
#include "AMDGPUGenMCPseudoLowering.inc"
-
AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st,
const AsmPrinter &ap):
Ctx(ctx), ST(st), AP(ap) { }
diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
index 6d2785ba1c60..2071b6f157cd 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "SIInstrInfo.h"
#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 8bfeb67ad4ec..99bb61b21db0 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -10,8 +10,8 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 07f92918a43f..625c9b77e2de 100644
--- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -33,11 +33,11 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
@@ -319,15 +319,17 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
switch (Inst->getOpcode()) {
case Instruction::Load: {
LoadInst *LI = cast<LoadInst>(Inst);
- return !LI->isVolatile();
+ // Currently only handle the case where the Pointer Operand is a GEP so check for that case.
+ return isa<GetElementPtrInst>(LI->getPointerOperand()) && !LI->isVolatile();
}
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
return true;
case Instruction::Store: {
- // Must be the stored pointer operand, not a stored value.
+ // Must be the stored pointer operand, not a stored value, plus
+ // since it should be canonical form, the User should be a GEP.
StoreInst *SI = cast<StoreInst>(Inst);
- return (SI->getPointerOperand() == User) && !SI->isVolatile();
+ return (SI->getPointerOperand() == User) && isa<GetElementPtrInst>(User) && !SI->isVolatile();
}
default:
return false;
@@ -341,8 +343,11 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
// FIXME: There is no reason why we can't support larger arrays, we
// are just being conservative for now.
+ // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these
+ // could also be promoted but we don't currently handle this case
if (!AllocaTy ||
AllocaTy->getElementType()->isVectorTy() ||
+ AllocaTy->getElementType()->isArrayTy() ||
AllocaTy->getNumElements() > 4 ||
AllocaTy->getNumElements() < 2) {
DEBUG(dbgs() << " Cannot convert type to vector\n");
@@ -390,7 +395,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
switch (Inst->getOpcode()) {
case Instruction::Load: {
Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
- Value *Ptr = Inst->getOperand(0);
+ Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
@@ -403,12 +408,13 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
case Instruction::Store: {
Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
- Value *Ptr = Inst->getOperand(1);
+ StoreInst *SI = cast<StoreInst>(Inst);
+ Value *Ptr = SI->getPointerOperand();
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
Value *VecValue = Builder.CreateLoad(BitCast);
Value *NewVecValue = Builder.CreateInsertElement(VecValue,
- Inst->getOperand(0),
+ SI->getValueOperand(),
Index);
Builder.CreateStore(NewVecValue, BitCast);
Inst->eraseFromParent();
diff --git a/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp b/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp
new file mode 100644
index 000000000000..36d88f52910d
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp
@@ -0,0 +1,353 @@
+//===-- AMDGPURegAsmNames.inc - Register asm names ----------*- C++ -*-----===//
+
+#ifdef AMDGPU_REG_ASM_NAMES
+
+static const char *const VGPR32RegNames[] = {
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
+ "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
+ "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
+ "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
+ "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
+ "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
+ "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
+ "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
+ "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
+ "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
+ "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
+ "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
+ "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
+ "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
+ "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
+ "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
+ "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
+ "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
+ "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
+ "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
+ "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
+ "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
+ "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
+ "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
+ "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
+ "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
+ "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
+ "v252", "v253", "v254", "v255"
+};
+
+static const char *const SGPR32RegNames[] = {
+ "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9",
+ "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19",
+ "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29",
+ "s30", "s31", "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39",
+ "s40", "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
+ "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", "s59",
+ "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", "s68", "s69",
+ "s70", "s71", "s72", "s73", "s74", "s75", "s76", "s77", "s78", "s79",
+ "s80", "s81", "s82", "s83", "s84", "s85", "s86", "s87", "s88", "s89",
+ "s90", "s91", "s92", "s93", "s94", "s95", "s96", "s97", "s98", "s99",
+ "s100", "s101", "s102", "s103"
+};
+
+static const char *const VGPR64RegNames[] = {
+ "v[0:1]", "v[1:2]", "v[2:3]", "v[3:4]", "v[4:5]",
+ "v[5:6]", "v[6:7]", "v[7:8]", "v[8:9]", "v[9:10]",
+ "v[10:11]", "v[11:12]", "v[12:13]", "v[13:14]", "v[14:15]",
+ "v[15:16]", "v[16:17]", "v[17:18]", "v[18:19]", "v[19:20]",
+ "v[20:21]", "v[21:22]", "v[22:23]", "v[23:24]", "v[24:25]",
+ "v[25:26]", "v[26:27]", "v[27:28]", "v[28:29]", "v[29:30]",
+ "v[30:31]", "v[31:32]", "v[32:33]", "v[33:34]", "v[34:35]",
+ "v[35:36]", "v[36:37]", "v[37:38]", "v[38:39]", "v[39:40]",
+ "v[40:41]", "v[41:42]", "v[42:43]", "v[43:44]", "v[44:45]",
+ "v[45:46]", "v[46:47]", "v[47:48]", "v[48:49]", "v[49:50]",
+ "v[50:51]", "v[51:52]", "v[52:53]", "v[53:54]", "v[54:55]",
+ "v[55:56]", "v[56:57]", "v[57:58]", "v[58:59]", "v[59:60]",
+ "v[60:61]", "v[61:62]", "v[62:63]", "v[63:64]", "v[64:65]",
+ "v[65:66]", "v[66:67]", "v[67:68]", "v[68:69]", "v[69:70]",
+ "v[70:71]", "v[71:72]", "v[72:73]", "v[73:74]", "v[74:75]",
+ "v[75:76]", "v[76:77]", "v[77:78]", "v[78:79]", "v[79:80]",
+ "v[80:81]", "v[81:82]", "v[82:83]", "v[83:84]", "v[84:85]",
+ "v[85:86]", "v[86:87]", "v[87:88]", "v[88:89]", "v[89:90]",
+ "v[90:91]", "v[91:92]", "v[92:93]", "v[93:94]", "v[94:95]",
+ "v[95:96]", "v[96:97]", "v[97:98]", "v[98:99]", "v[99:100]",
+ "v[100:101]", "v[101:102]", "v[102:103]", "v[103:104]", "v[104:105]",
+ "v[105:106]", "v[106:107]", "v[107:108]", "v[108:109]", "v[109:110]",
+ "v[110:111]", "v[111:112]", "v[112:113]", "v[113:114]", "v[114:115]",
+ "v[115:116]", "v[116:117]", "v[117:118]", "v[118:119]", "v[119:120]",
+ "v[120:121]", "v[121:122]", "v[122:123]", "v[123:124]", "v[124:125]",
+ "v[125:126]", "v[126:127]", "v[127:128]", "v[128:129]", "v[129:130]",
+ "v[130:131]", "v[131:132]", "v[132:133]", "v[133:134]", "v[134:135]",
+ "v[135:136]", "v[136:137]", "v[137:138]", "v[138:139]", "v[139:140]",
+ "v[140:141]", "v[141:142]", "v[142:143]", "v[143:144]", "v[144:145]",
+ "v[145:146]", "v[146:147]", "v[147:148]", "v[148:149]", "v[149:150]",
+ "v[150:151]", "v[151:152]", "v[152:153]", "v[153:154]", "v[154:155]",
+ "v[155:156]", "v[156:157]", "v[157:158]", "v[158:159]", "v[159:160]",
+ "v[160:161]", "v[161:162]", "v[162:163]", "v[163:164]", "v[164:165]",
+ "v[165:166]", "v[166:167]", "v[167:168]", "v[168:169]", "v[169:170]",
+ "v[170:171]", "v[171:172]", "v[172:173]", "v[173:174]", "v[174:175]",
+ "v[175:176]", "v[176:177]", "v[177:178]", "v[178:179]", "v[179:180]",
+ "v[180:181]", "v[181:182]", "v[182:183]", "v[183:184]", "v[184:185]",
+ "v[185:186]", "v[186:187]", "v[187:188]", "v[188:189]", "v[189:190]",
+ "v[190:191]", "v[191:192]", "v[192:193]", "v[193:194]", "v[194:195]",
+ "v[195:196]", "v[196:197]", "v[197:198]", "v[198:199]", "v[199:200]",
+ "v[200:201]", "v[201:202]", "v[202:203]", "v[203:204]", "v[204:205]",
+ "v[205:206]", "v[206:207]", "v[207:208]", "v[208:209]", "v[209:210]",
+ "v[210:211]", "v[211:212]", "v[212:213]", "v[213:214]", "v[214:215]",
+ "v[215:216]", "v[216:217]", "v[217:218]", "v[218:219]", "v[219:220]",
+ "v[220:221]", "v[221:222]", "v[222:223]", "v[223:224]", "v[224:225]",
+ "v[225:226]", "v[226:227]", "v[227:228]", "v[228:229]", "v[229:230]",
+ "v[230:231]", "v[231:232]", "v[232:233]", "v[233:234]", "v[234:235]",
+ "v[235:236]", "v[236:237]", "v[237:238]", "v[238:239]", "v[239:240]",
+ "v[240:241]", "v[241:242]", "v[242:243]", "v[243:244]", "v[244:245]",
+ "v[245:246]", "v[246:247]", "v[247:248]", "v[248:249]", "v[249:250]",
+ "v[250:251]", "v[251:252]", "v[252:253]", "v[253:254]", "v[254:255]"
+};
+
+static const char *const VGPR96RegNames[] = {
+ "v[0:2]", "v[1:3]", "v[2:4]", "v[3:5]", "v[4:6]",
+ "v[5:7]", "v[6:8]", "v[7:9]", "v[8:10]", "v[9:11]",
+ "v[10:12]", "v[11:13]", "v[12:14]", "v[13:15]", "v[14:16]",
+ "v[15:17]", "v[16:18]", "v[17:19]", "v[18:20]", "v[19:21]",
+ "v[20:22]", "v[21:23]", "v[22:24]", "v[23:25]", "v[24:26]",
+ "v[25:27]", "v[26:28]", "v[27:29]", "v[28:30]", "v[29:31]",
+ "v[30:32]", "v[31:33]", "v[32:34]", "v[33:35]", "v[34:36]",
+ "v[35:37]", "v[36:38]", "v[37:39]", "v[38:40]", "v[39:41]",
+ "v[40:42]", "v[41:43]", "v[42:44]", "v[43:45]", "v[44:46]",
+ "v[45:47]", "v[46:48]", "v[47:49]", "v[48:50]", "v[49:51]",
+ "v[50:52]", "v[51:53]", "v[52:54]", "v[53:55]", "v[54:56]",
+ "v[55:57]", "v[56:58]", "v[57:59]", "v[58:60]", "v[59:61]",
+ "v[60:62]", "v[61:63]", "v[62:64]", "v[63:65]", "v[64:66]",
+ "v[65:67]", "v[66:68]", "v[67:69]", "v[68:70]", "v[69:71]",
+ "v[70:72]", "v[71:73]", "v[72:74]", "v[73:75]", "v[74:76]",
+ "v[75:77]", "v[76:78]", "v[77:79]", "v[78:80]", "v[79:81]",
+ "v[80:82]", "v[81:83]", "v[82:84]", "v[83:85]", "v[84:86]",
+ "v[85:87]", "v[86:88]", "v[87:89]", "v[88:90]", "v[89:91]",
+ "v[90:92]", "v[91:93]", "v[92:94]", "v[93:95]", "v[94:96]",
+ "v[95:97]", "v[96:98]", "v[97:99]", "v[98:100]", "v[99:101]",
+ "v[100:102]", "v[101:103]", "v[102:104]", "v[103:105]", "v[104:106]",
+ "v[105:107]", "v[106:108]", "v[107:109]", "v[108:110]", "v[109:111]",
+ "v[110:112]", "v[111:113]", "v[112:114]", "v[113:115]", "v[114:116]",
+ "v[115:117]", "v[116:118]", "v[117:119]", "v[118:120]", "v[119:121]",
+ "v[120:122]", "v[121:123]", "v[122:124]", "v[123:125]", "v[124:126]",
+ "v[125:127]", "v[126:128]", "v[127:129]", "v[128:130]", "v[129:131]",
+ "v[130:132]", "v[131:133]", "v[132:134]", "v[133:135]", "v[134:136]",
+ "v[135:137]", "v[136:138]", "v[137:139]", "v[138:140]", "v[139:141]",
+ "v[140:142]", "v[141:143]", "v[142:144]", "v[143:145]", "v[144:146]",
+ "v[145:147]", "v[146:148]", "v[147:149]", "v[148:150]", "v[149:151]",
+ "v[150:152]", "v[151:153]", "v[152:154]", "v[153:155]", "v[154:156]",
+ "v[155:157]", "v[156:158]", "v[157:159]", "v[158:160]", "v[159:161]",
+ "v[160:162]", "v[161:163]", "v[162:164]", "v[163:165]", "v[164:166]",
+ "v[165:167]", "v[166:168]", "v[167:169]", "v[168:170]", "v[169:171]",
+ "v[170:172]", "v[171:173]", "v[172:174]", "v[173:175]", "v[174:176]",
+ "v[175:177]", "v[176:178]", "v[177:179]", "v[178:180]", "v[179:181]",
+ "v[180:182]", "v[181:183]", "v[182:184]", "v[183:185]", "v[184:186]",
+ "v[185:187]", "v[186:188]", "v[187:189]", "v[188:190]", "v[189:191]",
+ "v[190:192]", "v[191:193]", "v[192:194]", "v[193:195]", "v[194:196]",
+ "v[195:197]", "v[196:198]", "v[197:199]", "v[198:200]", "v[199:201]",
+ "v[200:202]", "v[201:203]", "v[202:204]", "v[203:205]", "v[204:206]",
+ "v[205:207]", "v[206:208]", "v[207:209]", "v[208:210]", "v[209:211]",
+ "v[210:212]", "v[211:213]", "v[212:214]", "v[213:215]", "v[214:216]",
+ "v[215:217]", "v[216:218]", "v[217:219]", "v[218:220]", "v[219:221]",
+ "v[220:222]", "v[221:223]", "v[222:224]", "v[223:225]", "v[224:226]",
+ "v[225:227]", "v[226:228]", "v[227:229]", "v[228:230]", "v[229:231]",
+ "v[230:232]", "v[231:233]", "v[232:234]", "v[233:235]", "v[234:236]",
+ "v[235:237]", "v[236:238]", "v[237:239]", "v[238:240]", "v[239:241]",
+ "v[240:242]", "v[241:243]", "v[242:244]", "v[243:245]", "v[244:246]",
+ "v[245:247]", "v[246:248]", "v[247:249]", "v[248:250]", "v[249:251]",
+ "v[250:252]", "v[251:253]", "v[252:254]", "v[253:255]"
+};
+
+static const char *const VGPR128RegNames[] = {
+ "v[0:3]", "v[1:4]", "v[2:5]", "v[3:6]", "v[4:7]",
+ "v[5:8]", "v[6:9]", "v[7:10]", "v[8:11]", "v[9:12]",
+ "v[10:13]", "v[11:14]", "v[12:15]", "v[13:16]", "v[14:17]",
+ "v[15:18]", "v[16:19]", "v[17:20]", "v[18:21]", "v[19:22]",
+ "v[20:23]", "v[21:24]", "v[22:25]", "v[23:26]", "v[24:27]",
+ "v[25:28]", "v[26:29]", "v[27:30]", "v[28:31]", "v[29:32]",
+ "v[30:33]", "v[31:34]", "v[32:35]", "v[33:36]", "v[34:37]",
+ "v[35:38]", "v[36:39]", "v[37:40]", "v[38:41]", "v[39:42]",
+ "v[40:43]", "v[41:44]", "v[42:45]", "v[43:46]", "v[44:47]",
+ "v[45:48]", "v[46:49]", "v[47:50]", "v[48:51]", "v[49:52]",
+ "v[50:53]", "v[51:54]", "v[52:55]", "v[53:56]", "v[54:57]",
+ "v[55:58]", "v[56:59]", "v[57:60]", "v[58:61]", "v[59:62]",
+ "v[60:63]", "v[61:64]", "v[62:65]", "v[63:66]", "v[64:67]",
+ "v[65:68]", "v[66:69]", "v[67:70]", "v[68:71]", "v[69:72]",
+ "v[70:73]", "v[71:74]", "v[72:75]", "v[73:76]", "v[74:77]",
+ "v[75:78]", "v[76:79]", "v[77:80]", "v[78:81]", "v[79:82]",
+ "v[80:83]", "v[81:84]", "v[82:85]", "v[83:86]", "v[84:87]",
+ "v[85:88]", "v[86:89]", "v[87:90]", "v[88:91]", "v[89:92]",
+ "v[90:93]", "v[91:94]", "v[92:95]", "v[93:96]", "v[94:97]",
+ "v[95:98]", "v[96:99]", "v[97:100]", "v[98:101]", "v[99:102]",
+ "v[100:103]", "v[101:104]", "v[102:105]", "v[103:106]", "v[104:107]",
+ "v[105:108]", "v[106:109]", "v[107:110]", "v[108:111]", "v[109:112]",
+ "v[110:113]", "v[111:114]", "v[112:115]", "v[113:116]", "v[114:117]",
+ "v[115:118]", "v[116:119]", "v[117:120]", "v[118:121]", "v[119:122]",
+ "v[120:123]", "v[121:124]", "v[122:125]", "v[123:126]", "v[124:127]",
+ "v[125:128]", "v[126:129]", "v[127:130]", "v[128:131]", "v[129:132]",
+ "v[130:133]", "v[131:134]", "v[132:135]", "v[133:136]", "v[134:137]",
+ "v[135:138]", "v[136:139]", "v[137:140]", "v[138:141]", "v[139:142]",
+ "v[140:143]", "v[141:144]", "v[142:145]", "v[143:146]", "v[144:147]",
+ "v[145:148]", "v[146:149]", "v[147:150]", "v[148:151]", "v[149:152]",
+ "v[150:153]", "v[151:154]", "v[152:155]", "v[153:156]", "v[154:157]",
+ "v[155:158]", "v[156:159]", "v[157:160]", "v[158:161]", "v[159:162]",
+ "v[160:163]", "v[161:164]", "v[162:165]", "v[163:166]", "v[164:167]",
+ "v[165:168]", "v[166:169]", "v[167:170]", "v[168:171]", "v[169:172]",
+ "v[170:173]", "v[171:174]", "v[172:175]", "v[173:176]", "v[174:177]",
+ "v[175:178]", "v[176:179]", "v[177:180]", "v[178:181]", "v[179:182]",
+ "v[180:183]", "v[181:184]", "v[182:185]", "v[183:186]", "v[184:187]",
+ "v[185:188]", "v[186:189]", "v[187:190]", "v[188:191]", "v[189:192]",
+ "v[190:193]", "v[191:194]", "v[192:195]", "v[193:196]", "v[194:197]",
+ "v[195:198]", "v[196:199]", "v[197:200]", "v[198:201]", "v[199:202]",
+ "v[200:203]", "v[201:204]", "v[202:205]", "v[203:206]", "v[204:207]",
+ "v[205:208]", "v[206:209]", "v[207:210]", "v[208:211]", "v[209:212]",
+ "v[210:213]", "v[211:214]", "v[212:215]", "v[213:216]", "v[214:217]",
+ "v[215:218]", "v[216:219]", "v[217:220]", "v[218:221]", "v[219:222]",
+ "v[220:223]", "v[221:224]", "v[222:225]", "v[223:226]", "v[224:227]",
+ "v[225:228]", "v[226:229]", "v[227:230]", "v[228:231]", "v[229:232]",
+ "v[230:233]", "v[231:234]", "v[232:235]", "v[233:236]", "v[234:237]",
+ "v[235:238]", "v[236:239]", "v[237:240]", "v[238:241]", "v[239:242]",
+ "v[240:243]", "v[241:244]", "v[242:245]", "v[243:246]", "v[244:247]",
+ "v[245:248]", "v[246:249]", "v[247:250]", "v[248:251]", "v[249:252]",
+ "v[250:253]", "v[251:254]", "v[252:255]"
+};
+
+static const char *const VGPR256RegNames[] = {
+ "v[0:7]", "v[1:8]", "v[2:9]", "v[3:10]", "v[4:11]",
+ "v[5:12]", "v[6:13]", "v[7:14]", "v[8:15]", "v[9:16]",
+ "v[10:17]", "v[11:18]", "v[12:19]", "v[13:20]", "v[14:21]",
+ "v[15:22]", "v[16:23]", "v[17:24]", "v[18:25]", "v[19:26]",
+ "v[20:27]", "v[21:28]", "v[22:29]", "v[23:30]", "v[24:31]",
+ "v[25:32]", "v[26:33]", "v[27:34]", "v[28:35]", "v[29:36]",
+ "v[30:37]", "v[31:38]", "v[32:39]", "v[33:40]", "v[34:41]",
+ "v[35:42]", "v[36:43]", "v[37:44]", "v[38:45]", "v[39:46]",
+ "v[40:47]", "v[41:48]", "v[42:49]", "v[43:50]", "v[44:51]",
+ "v[45:52]", "v[46:53]", "v[47:54]", "v[48:55]", "v[49:56]",
+ "v[50:57]", "v[51:58]", "v[52:59]", "v[53:60]", "v[54:61]",
+ "v[55:62]", "v[56:63]", "v[57:64]", "v[58:65]", "v[59:66]",
+ "v[60:67]", "v[61:68]", "v[62:69]", "v[63:70]", "v[64:71]",
+ "v[65:72]", "v[66:73]", "v[67:74]", "v[68:75]", "v[69:76]",
+ "v[70:77]", "v[71:78]", "v[72:79]", "v[73:80]", "v[74:81]",
+ "v[75:82]", "v[76:83]", "v[77:84]", "v[78:85]", "v[79:86]",
+ "v[80:87]", "v[81:88]", "v[82:89]", "v[83:90]", "v[84:91]",
+ "v[85:92]", "v[86:93]", "v[87:94]", "v[88:95]", "v[89:96]",
+ "v[90:97]", "v[91:98]", "v[92:99]", "v[93:100]", "v[94:101]",
+ "v[95:102]", "v[96:103]", "v[97:104]", "v[98:105]", "v[99:106]",
+ "v[100:107]", "v[101:108]", "v[102:109]", "v[103:110]", "v[104:111]",
+ "v[105:112]", "v[106:113]", "v[107:114]", "v[108:115]", "v[109:116]",
+ "v[110:117]", "v[111:118]", "v[112:119]", "v[113:120]", "v[114:121]",
+ "v[115:122]", "v[116:123]", "v[117:124]", "v[118:125]", "v[119:126]",
+ "v[120:127]", "v[121:128]", "v[122:129]", "v[123:130]", "v[124:131]",
+ "v[125:132]", "v[126:133]", "v[127:134]", "v[128:135]", "v[129:136]",
+ "v[130:137]", "v[131:138]", "v[132:139]", "v[133:140]", "v[134:141]",
+ "v[135:142]", "v[136:143]", "v[137:144]", "v[138:145]", "v[139:146]",
+ "v[140:147]", "v[141:148]", "v[142:149]", "v[143:150]", "v[144:151]",
+ "v[145:152]", "v[146:153]", "v[147:154]", "v[148:155]", "v[149:156]",
+ "v[150:157]", "v[151:158]", "v[152:159]", "v[153:160]", "v[154:161]",
+ "v[155:162]", "v[156:163]", "v[157:164]", "v[158:165]", "v[159:166]",
+ "v[160:167]", "v[161:168]", "v[162:169]", "v[163:170]", "v[164:171]",
+ "v[165:172]", "v[166:173]", "v[167:174]", "v[168:175]", "v[169:176]",
+ "v[170:177]", "v[171:178]", "v[172:179]", "v[173:180]", "v[174:181]",
+ "v[175:182]", "v[176:183]", "v[177:184]", "v[178:185]", "v[179:186]",
+ "v[180:187]", "v[181:188]", "v[182:189]", "v[183:190]", "v[184:191]",
+ "v[185:192]", "v[186:193]", "v[187:194]", "v[188:195]", "v[189:196]",
+ "v[190:197]", "v[191:198]", "v[192:199]", "v[193:200]", "v[194:201]",
+ "v[195:202]", "v[196:203]", "v[197:204]", "v[198:205]", "v[199:206]",
+ "v[200:207]", "v[201:208]", "v[202:209]", "v[203:210]", "v[204:211]",
+ "v[205:212]", "v[206:213]", "v[207:214]", "v[208:215]", "v[209:216]",
+ "v[210:217]", "v[211:218]", "v[212:219]", "v[213:220]", "v[214:221]",
+ "v[215:222]", "v[216:223]", "v[217:224]", "v[218:225]", "v[219:226]",
+ "v[220:227]", "v[221:228]", "v[222:229]", "v[223:230]", "v[224:231]",
+ "v[225:232]", "v[226:233]", "v[227:234]", "v[228:235]", "v[229:236]",
+ "v[230:237]", "v[231:238]", "v[232:239]", "v[233:240]", "v[234:241]",
+ "v[235:242]", "v[236:243]", "v[237:244]", "v[238:245]", "v[239:246]",
+ "v[240:247]", "v[241:248]", "v[242:249]", "v[243:250]", "v[244:251]",
+ "v[245:252]", "v[246:253]", "v[247:254]", "v[248:255]"
+};
+
+static const char *const VGPR512RegNames[] = {
+ "v[0:15]", "v[1:16]", "v[2:17]", "v[3:18]", "v[4:19]",
+ "v[5:20]", "v[6:21]", "v[7:22]", "v[8:23]", "v[9:24]",
+ "v[10:25]", "v[11:26]", "v[12:27]", "v[13:28]", "v[14:29]",
+ "v[15:30]", "v[16:31]", "v[17:32]", "v[18:33]", "v[19:34]",
+ "v[20:35]", "v[21:36]", "v[22:37]", "v[23:38]", "v[24:39]",
+ "v[25:40]", "v[26:41]", "v[27:42]", "v[28:43]", "v[29:44]",
+ "v[30:45]", "v[31:46]", "v[32:47]", "v[33:48]", "v[34:49]",
+ "v[35:50]", "v[36:51]", "v[37:52]", "v[38:53]", "v[39:54]",
+ "v[40:55]", "v[41:56]", "v[42:57]", "v[43:58]", "v[44:59]",
+ "v[45:60]", "v[46:61]", "v[47:62]", "v[48:63]", "v[49:64]",
+ "v[50:65]", "v[51:66]", "v[52:67]", "v[53:68]", "v[54:69]",
+ "v[55:70]", "v[56:71]", "v[57:72]", "v[58:73]", "v[59:74]",
+ "v[60:75]", "v[61:76]", "v[62:77]", "v[63:78]", "v[64:79]",
+ "v[65:80]", "v[66:81]", "v[67:82]", "v[68:83]", "v[69:84]",
+ "v[70:85]", "v[71:86]", "v[72:87]", "v[73:88]", "v[74:89]",
+ "v[75:90]", "v[76:91]", "v[77:92]", "v[78:93]", "v[79:94]",
+ "v[80:95]", "v[81:96]", "v[82:97]", "v[83:98]", "v[84:99]",
+ "v[85:100]", "v[86:101]", "v[87:102]", "v[88:103]", "v[89:104]",
+ "v[90:105]", "v[91:106]", "v[92:107]", "v[93:108]", "v[94:109]",
+ "v[95:110]", "v[96:111]", "v[97:112]", "v[98:113]", "v[99:114]",
+ "v[100:115]", "v[101:116]", "v[102:117]", "v[103:118]", "v[104:119]",
+ "v[105:120]", "v[106:121]", "v[107:122]", "v[108:123]", "v[109:124]",
+ "v[110:125]", "v[111:126]", "v[112:127]", "v[113:128]", "v[114:129]",
+ "v[115:130]", "v[116:131]", "v[117:132]", "v[118:133]", "v[119:134]",
+ "v[120:135]", "v[121:136]", "v[122:137]", "v[123:138]", "v[124:139]",
+ "v[125:140]", "v[126:141]", "v[127:142]", "v[128:143]", "v[129:144]",
+ "v[130:145]", "v[131:146]", "v[132:147]", "v[133:148]", "v[134:149]",
+ "v[135:150]", "v[136:151]", "v[137:152]", "v[138:153]", "v[139:154]",
+ "v[140:155]", "v[141:156]", "v[142:157]", "v[143:158]", "v[144:159]",
+ "v[145:160]", "v[146:161]", "v[147:162]", "v[148:163]", "v[149:164]",
+ "v[150:165]", "v[151:166]", "v[152:167]", "v[153:168]", "v[154:169]",
+ "v[155:170]", "v[156:171]", "v[157:172]", "v[158:173]", "v[159:174]",
+ "v[160:175]", "v[161:176]", "v[162:177]", "v[163:178]", "v[164:179]",
+ "v[165:180]", "v[166:181]", "v[167:182]", "v[168:183]", "v[169:184]",
+ "v[170:185]", "v[171:186]", "v[172:187]", "v[173:188]", "v[174:189]",
+ "v[175:190]", "v[176:191]", "v[177:192]", "v[178:193]", "v[179:194]",
+ "v[180:195]", "v[181:196]", "v[182:197]", "v[183:198]", "v[184:199]",
+ "v[185:200]", "v[186:201]", "v[187:202]", "v[188:203]", "v[189:204]",
+ "v[190:205]", "v[191:206]", "v[192:207]", "v[193:208]", "v[194:209]",
+ "v[195:210]", "v[196:211]", "v[197:212]", "v[198:213]", "v[199:214]",
+ "v[200:215]", "v[201:216]", "v[202:217]", "v[203:218]", "v[204:219]",
+ "v[205:220]", "v[206:221]", "v[207:222]", "v[208:223]", "v[209:224]",
+ "v[210:225]", "v[211:226]", "v[212:227]", "v[213:228]", "v[214:229]",
+ "v[215:230]", "v[216:231]", "v[217:232]", "v[218:233]", "v[219:234]",
+ "v[220:235]", "v[221:236]", "v[222:237]", "v[223:238]", "v[224:239]",
+ "v[225:240]", "v[226:241]", "v[227:242]", "v[228:243]", "v[229:244]",
+ "v[230:245]", "v[231:246]", "v[232:247]", "v[233:248]", "v[234:249]",
+ "v[235:250]", "v[236:251]", "v[237:252]", "v[238:253]", "v[239:254]",
+ "v[240:255]"
+};
+
+static const char *const SGPR64RegNames[] = {
+ "s[0:1]", "s[2:3]", "s[4:5]", "s[6:7]", "s[8:9]", "s[10:11]",
+ "s[12:13]", "s[14:15]", "s[16:17]", "s[18:19]", "s[20:21]", "s[22:23]",
+ "s[24:25]", "s[26:27]", "s[28:29]", "s[30:31]", "s[32:33]", "s[34:35]",
+ "s[36:37]", "s[38:39]", "s[40:41]", "s[42:43]", "s[44:45]", "s[46:47]",
+ "s[48:49]", "s[50:51]", "s[52:53]", "s[54:55]", "s[56:57]", "s[58:59]",
+ "s[60:61]", "s[62:63]", "s[64:65]", "s[66:67]", "s[68:69]", "s[70:71]",
+ "s[72:73]", "s[74:75]", "s[76:77]", "s[78:79]", "s[80:81]", "s[82:83]",
+ "s[84:85]", "s[86:87]", "s[88:89]", "s[90:91]", "s[92:93]", "s[94:95]",
+ "s[96:97]", "s[98:99]", "s[100:101]", "s[102:103]"
+};
+
+static const char *const SGPR128RegNames[] = {
+ "s[0:3]", "s[4:7]", "s[8:11]", "s[12:15]", "s[16:19]", "s[20:23]",
+ "s[24:27]", "s[28:31]", "s[32:35]", "s[36:39]", "s[40:43]", "s[44:47]",
+ "s[48:51]", "s[52:55]", "s[56:59]", "s[60:63]", "s[64:67]", "s[68:71]",
+ "s[72:75]", "s[76:79]", "s[80:83]", "s[84:87]", "s[88:91]", "s[92:95]",
+ "s[96:99]", "s[100:103]"
+};
+
+static const char *const SGPR256RegNames[] = {
+ "s[0:7]", "s[4:11]", "s[8:15]", "s[12:19]", "s[16:23]",
+ "s[20:27]", "s[24:31]", "s[28:35]", "s[32:39]", "s[36:43]",
+ "s[40:47]", "s[44:51]", "s[48:55]", "s[52:59]", "s[56:63]",
+ "s[60:67]", "s[64:71]", "s[68:75]", "s[72:79]", "s[76:83]",
+ "s[80:87]", "s[84:91]", "s[88:95]", "s[92:99]", "s[96:103]"
+};
+
+static const char *const SGPR512RegNames[] = {
+ "s[0:15]", "s[4:19]", "s[8:23]", "s[12:27]", "s[16:31]", "s[20:35]",
+ "s[24:39]", "s[28:43]", "s[32:47]", "s[36:51]", "s[40:55]", "s[44:59]",
+ "s[48:63]", "s[52:67]", "s[56:71]", "s[60:75]", "s[64:79]", "s[68:83]",
+ "s[72:87]", "s[76:91]", "s[80:95]", "s[84:99]", "s[88:103]"
+};
+
+#endif
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index 7c198a1b8a3f..201fdc1974c6 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -36,7 +36,6 @@ protected:
#define GET_TARGET_REGBANK_CLASS
#include "AMDGPUGenRegisterBank.inc"
-
};
class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
const SIRegisterInfo *TRI;
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
index b2867fcc49f9..ff58aa5741a1 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
@@ -40,7 +40,6 @@ unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const {
#define GET_REGINFO_TARGET_DESC
#include "AMDGPUGenRegisterInfo.inc"
-
// Forced to be here by one .inc
const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
const MachineFunction *MF) const {
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index ed9cbb994fad..5f4f20316a6b 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -16,12 +16,12 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
#include "AMDGPU.h"
-#include "R600InstrInfo.h"
-#include "R600ISelLowering.h"
#include "R600FrameLowering.h"
-#include "SIInstrInfo.h"
-#include "SIISelLowering.h"
+#include "R600ISelLowering.h"
+#include "R600InstrInfo.h"
#include "SIFrameLowering.h"
+#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/Triple.h"
@@ -57,9 +57,12 @@ public:
enum {
ISAVersion0_0_0,
+ ISAVersion6_0_0,
+ ISAVersion6_0_1,
ISAVersion7_0_0,
ISAVersion7_0_1,
ISAVersion7_0_2,
+ ISAVersion7_0_3,
ISAVersion8_0_0,
ISAVersion8_0_1,
ISAVersion8_0_2,
@@ -67,7 +70,9 @@ public:
ISAVersion8_0_4,
ISAVersion8_1_0,
ISAVersion9_0_0,
- ISAVersion9_0_1
+ ISAVersion9_0_1,
+ ISAVersion9_0_2,
+ ISAVersion9_0_3
};
enum TrapHandlerAbi {
@@ -787,7 +792,7 @@ public:
/// \returns VGPR allocation granularity supported by the subtarget.
unsigned getVGPRAllocGranule() const {
- return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());;
+ return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());
}
/// \returns VGPR encoding granularity supported by the subtarget.
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 404598ff4738..b644eba536fa 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -28,26 +28,26 @@
#include "GCNSchedStrategy.h"
#include "R600MachineScheduler.h"
#include "SIMachineScheduler.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/AlwaysInliner.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/GVN.h"
-#include "llvm/Transforms/Vectorize.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Vectorize.h"
#include <memory>
using namespace llvm;
@@ -734,7 +734,6 @@ void GCNPassConfig::addMachineSSAOptimization() {
addPass(&SIFoldOperandsID);
addPass(&DeadMachineInstructionElimID);
addPass(&SILoadStoreOptimizerID);
- addPass(createSIShrinkInstructionsPass());
if (EnableSDWAPeephole) {
addPass(&SIPeepholeSDWAID);
addPass(&MachineLICMID);
@@ -742,6 +741,7 @@ void GCNPassConfig::addMachineSSAOptimization() {
addPass(&SIFoldOperandsID);
addPass(&DeadMachineInstructionElimID);
}
+ addPass(createSIShrinkInstructionsPass());
}
bool GCNPassConfig::addILPOpts() {
diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
index c96761c0b04e..6c1885e67fcb 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
@@ -7,13 +7,13 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPUTargetMachine.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/ELF.h"
-#include "Utils/AMDGPUBaseInfo.h"
using namespace llvm;
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index beafebc1284a..dee3d2856701 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -20,8 +20,8 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index cc68c971b249..16e3b7b4ebee 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -11,18 +11,19 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "SIDefines.h"
+#include "Utils/AMDGPUAsmUtils.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDKernelCodeTUtils.h"
-#include "Utils/AMDGPUAsmUtils.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -40,12 +41,11 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -814,14 +814,8 @@ private:
bool ParseDirectiveCodeObjectMetadata();
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
bool ParseDirectiveAMDKernelCodeT();
- bool ParseSectionDirectiveHSAText();
bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
bool ParseDirectiveAMDGPUHsaKernel();
- bool ParseDirectiveAMDGPUHsaModuleGlobal();
- bool ParseDirectiveAMDGPUHsaProgramGlobal();
- bool ParseSectionDirectiveHSADataGlobalAgent();
- bool ParseSectionDirectiveHSADataGlobalProgram();
- bool ParseSectionDirectiveHSARodataReadonlyAgent();
bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
RegisterKind RegKind, unsigned Reg1,
unsigned RegNum);
@@ -2365,12 +2359,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
return false;
}
-bool AMDGPUAsmParser::ParseSectionDirectiveHSAText() {
- getParser().getStreamer().SwitchSection(
- AMDGPU::getHSATextSection(getContext()));
- return false;
-}
-
bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
if (getLexer().isNot(AsmToken::Identifier))
return TokError("expected symbol name");
@@ -2384,46 +2372,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
return false;
}
-bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaModuleGlobal() {
- if (getLexer().isNot(AsmToken::Identifier))
- return TokError("expected symbol name");
-
- StringRef GlobalName = Parser.getTok().getIdentifier();
-
- getTargetStreamer().EmitAMDGPUHsaModuleScopeGlobal(GlobalName);
- Lex();
- return false;
-}
-
-bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaProgramGlobal() {
- if (getLexer().isNot(AsmToken::Identifier))
- return TokError("expected symbol name");
-
- StringRef GlobalName = Parser.getTok().getIdentifier();
-
- getTargetStreamer().EmitAMDGPUHsaProgramScopeGlobal(GlobalName);
- Lex();
- return false;
-}
-
-bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalAgent() {
- getParser().getStreamer().SwitchSection(
- AMDGPU::getHSADataGlobalAgentSection(getContext()));
- return false;
-}
-
-bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalProgram() {
- getParser().getStreamer().SwitchSection(
- AMDGPU::getHSADataGlobalProgramSection(getContext()));
- return false;
-}
-
-bool AMDGPUAsmParser::ParseSectionDirectiveHSARodataReadonlyAgent() {
- getParser().getStreamer().SwitchSection(
- AMDGPU::getHSARodataReadonlyAgentSection(getContext()));
- return false;
-}
-
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
@@ -2439,27 +2387,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".amd_kernel_code_t")
return ParseDirectiveAMDKernelCodeT();
- if (IDVal == ".hsatext")
- return ParseSectionDirectiveHSAText();
-
if (IDVal == ".amdgpu_hsa_kernel")
return ParseDirectiveAMDGPUHsaKernel();
- if (IDVal == ".amdgpu_hsa_module_global")
- return ParseDirectiveAMDGPUHsaModuleGlobal();
-
- if (IDVal == ".amdgpu_hsa_program_global")
- return ParseDirectiveAMDGPUHsaProgramGlobal();
-
- if (IDVal == ".hsadata_global_agent")
- return ParseSectionDirectiveHSADataGlobalAgent();
-
- if (IDVal == ".hsadata_global_program")
- return ParseSectionDirectiveHSADataGlobalProgram();
-
- if (IDVal == ".hsarodata_readonly_agent")
- return ParseSectionDirectiveHSARodataReadonlyAgent();
-
return true;
}
@@ -2919,6 +2849,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
if (getLexer().isNot(AsmToken::Integer))
return true;
+ SMLoc ValLoc = Parser.getTok().getLoc();
if (getParser().parseAbsoluteExpression(CntVal))
return true;
@@ -2936,21 +2867,24 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
}
- // To improve diagnostics, do not skip delimiters on errors
- if (!Failed) {
- if (getLexer().isNot(AsmToken::RParen)) {
- return true;
- }
- Parser.Lex();
- if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
- const AsmToken NextToken = getLexer().peekTok();
- if (NextToken.is(AsmToken::Identifier)) {
- Parser.Lex();
- }
+ if (Failed) {
+ Error(ValLoc, "too large value for " + CntName);
+ return true;
+ }
+
+ if (getLexer().isNot(AsmToken::RParen)) {
+ return true;
+ }
+
+ Parser.Lex();
+ if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
+ const AsmToken NextToken = getLexer().peekTok();
+ if (NextToken.is(AsmToken::Identifier)) {
+ Parser.Lex();
}
}
- return Failed;
+ return false;
}
OperandMatchResultTy
diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt
index cafce0164fa9..e30844f082cd 100644
--- a/lib/Target/AMDGPU/CMakeLists.txt
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@@ -58,6 +58,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUISelLowering.cpp
AMDGPUInstrInfo.cpp
AMDGPUPromoteAlloca.cpp
+ AMDGPURegAsmNames.inc.cpp
AMDGPURegisterInfo.cpp
AMDGPUUnifyDivergentExitNodes.cpp
GCNHazardRecognizer.cpp
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 9b3cde7c4df6..88c92b9582fd 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -20,21 +20,20 @@
#include "AMDGPUDisassembler.h"
#include "AMDGPU.h"
#include "AMDGPURegisterInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIDefines.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/TargetRegistry.h"
-
using namespace llvm;
#define DEBUG_TYPE "amdgpu-disassembler"
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 0ff405a71e9b..5fa3cf1a223f 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -20,8 +20,8 @@
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
-#include <cstdint>
#include <algorithm>
+#include <cstdint>
#include <memory>
namespace llvm {
diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 80fc4ac9d2a3..cd9e7fb04f16 100644
--- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPUSubtarget.h"
#include "GCNHazardRecognizer.h"
+#include "AMDGPUSubtarget.h"
#include "SIDefines.h"
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index 523eea41897e..b84640230eee 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -9,8 +9,8 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUInstPrinter.h"
-#include "SIDefines.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
#include "Utils/AMDGPUAsmUtils.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index f3266fe82955..0a9c2b94c1ee 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -8,8 +8,8 @@
/// \file
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUFixupKinds.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h
deleted file mode 100644
index 816e8c744b27..000000000000
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h
+++ /dev/null
@@ -1,422 +0,0 @@
-//===--- AMDGPUCodeObjectMetadata.h -----------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief AMDGPU Code Object Metadata definitions and in-memory
-/// representations.
-///
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
-#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
-
-#include <cstdint>
-#include <string>
-#include <system_error>
-#include <vector>
-
-namespace llvm {
-namespace AMDGPU {
-
-//===----------------------------------------------------------------------===//
-// Code Object Metadata.
-//===----------------------------------------------------------------------===//
-namespace CodeObject {
-
-/// \brief Code object metadata major version.
-constexpr uint32_t MetadataVersionMajor = 1;
-/// \brief Code object metadata minor version.
-constexpr uint32_t MetadataVersionMinor = 0;
-
-/// \brief Code object metadata beginning assembler directive.
-constexpr char MetadataAssemblerDirectiveBegin[] =
- ".amdgpu_code_object_metadata";
-/// \brief Code object metadata ending assembler directive.
-constexpr char MetadataAssemblerDirectiveEnd[] =
- ".end_amdgpu_code_object_metadata";
-
-/// \brief Access qualifiers.
-enum class AccessQualifier : uint8_t {
- Default = 0,
- ReadOnly = 1,
- WriteOnly = 2,
- ReadWrite = 3,
- Unknown = 0xff
-};
-
-/// \brief Address space qualifiers.
-enum class AddressSpaceQualifier : uint8_t {
- Private = 0,
- Global = 1,
- Constant = 2,
- Local = 3,
- Generic = 4,
- Region = 5,
- Unknown = 0xff
-};
-
-/// \brief Value kinds.
-enum class ValueKind : uint8_t {
- ByValue = 0,
- GlobalBuffer = 1,
- DynamicSharedPointer = 2,
- Sampler = 3,
- Image = 4,
- Pipe = 5,
- Queue = 6,
- HiddenGlobalOffsetX = 7,
- HiddenGlobalOffsetY = 8,
- HiddenGlobalOffsetZ = 9,
- HiddenNone = 10,
- HiddenPrintfBuffer = 11,
- HiddenDefaultQueue = 12,
- HiddenCompletionAction = 13,
- Unknown = 0xff
-};
-
-/// \brief Value types.
-enum class ValueType : uint8_t {
- Struct = 0,
- I8 = 1,
- U8 = 2,
- I16 = 3,
- U16 = 4,
- F16 = 5,
- I32 = 6,
- U32 = 7,
- F32 = 8,
- I64 = 9,
- U64 = 10,
- F64 = 11,
- Unknown = 0xff
-};
-
-//===----------------------------------------------------------------------===//
-// Kernel Metadata.
-//===----------------------------------------------------------------------===//
-namespace Kernel {
-
-//===----------------------------------------------------------------------===//
-// Kernel Attributes Metadata.
-//===----------------------------------------------------------------------===//
-namespace Attrs {
-
-namespace Key {
-/// \brief Key for Kernel::Attr::Metadata::mReqdWorkGroupSize.
-constexpr char ReqdWorkGroupSize[] = "ReqdWorkGroupSize";
-/// \brief Key for Kernel::Attr::Metadata::mWorkGroupSizeHint.
-constexpr char WorkGroupSizeHint[] = "WorkGroupSizeHint";
-/// \brief Key for Kernel::Attr::Metadata::mVecTypeHint.
-constexpr char VecTypeHint[] = "VecTypeHint";
-} // end namespace Key
-
-/// \brief In-memory representation of kernel attributes metadata.
-struct Metadata final {
- /// \brief 'reqd_work_group_size' attribute. Optional.
- std::vector<uint32_t> mReqdWorkGroupSize = std::vector<uint32_t>();
- /// \brief 'work_group_size_hint' attribute. Optional.
- std::vector<uint32_t> mWorkGroupSizeHint = std::vector<uint32_t>();
- /// \brief 'vec_type_hint' attribute. Optional.
- std::string mVecTypeHint = std::string();
-
- /// \brief Default constructor.
- Metadata() = default;
-
- /// \returns True if kernel attributes metadata is empty, false otherwise.
- bool empty() const {
- return mReqdWorkGroupSize.empty() &&
- mWorkGroupSizeHint.empty() &&
- mVecTypeHint.empty();
- }
-
- /// \returns True if kernel attributes metadata is not empty, false otherwise.
- bool notEmpty() const {
- return !empty();
- }
-};
-
-} // end namespace Attrs
-
-//===----------------------------------------------------------------------===//
-// Kernel Argument Metadata.
-//===----------------------------------------------------------------------===//
-namespace Arg {
-
-namespace Key {
-/// \brief Key for Kernel::Arg::Metadata::mSize.
-constexpr char Size[] = "Size";
-/// \brief Key for Kernel::Arg::Metadata::mAlign.
-constexpr char Align[] = "Align";
-/// \brief Key for Kernel::Arg::Metadata::mValueKind.
-constexpr char ValueKind[] = "ValueKind";
-/// \brief Key for Kernel::Arg::Metadata::mValueType.
-constexpr char ValueType[] = "ValueType";
-/// \brief Key for Kernel::Arg::Metadata::mPointeeAlign.
-constexpr char PointeeAlign[] = "PointeeAlign";
-/// \brief Key for Kernel::Arg::Metadata::mAccQual.
-constexpr char AccQual[] = "AccQual";
-/// \brief Key for Kernel::Arg::Metadata::mAddrSpaceQual.
-constexpr char AddrSpaceQual[] = "AddrSpaceQual";
-/// \brief Key for Kernel::Arg::Metadata::mIsConst.
-constexpr char IsConst[] = "IsConst";
-/// \brief Key for Kernel::Arg::Metadata::mIsPipe.
-constexpr char IsPipe[] = "IsPipe";
-/// \brief Key for Kernel::Arg::Metadata::mIsRestrict.
-constexpr char IsRestrict[] = "IsRestrict";
-/// \brief Key for Kernel::Arg::Metadata::mIsVolatile.
-constexpr char IsVolatile[] = "IsVolatile";
-/// \brief Key for Kernel::Arg::Metadata::mName.
-constexpr char Name[] = "Name";
-/// \brief Key for Kernel::Arg::Metadata::mTypeName.
-constexpr char TypeName[] = "TypeName";
-} // end namespace Key
-
-/// \brief In-memory representation of kernel argument metadata.
-struct Metadata final {
- /// \brief Size in bytes. Required.
- uint32_t mSize = 0;
- /// \brief Alignment in bytes. Required.
- uint32_t mAlign = 0;
- /// \brief Value kind. Required.
- ValueKind mValueKind = ValueKind::Unknown;
- /// \brief Value type. Required.
- ValueType mValueType = ValueType::Unknown;
- /// \brief Pointee alignment in bytes. Optional.
- uint32_t mPointeeAlign = 0;
- /// \brief Access qualifier. Optional.
- AccessQualifier mAccQual = AccessQualifier::Unknown;
- /// \brief Address space qualifier. Optional.
- AddressSpaceQualifier mAddrSpaceQual = AddressSpaceQualifier::Unknown;
- /// \brief True if 'const' qualifier is specified. Optional.
- bool mIsConst = false;
- /// \brief True if 'pipe' qualifier is specified. Optional.
- bool mIsPipe = false;
- /// \brief True if 'restrict' qualifier is specified. Optional.
- bool mIsRestrict = false;
- /// \brief True if 'volatile' qualifier is specified. Optional.
- bool mIsVolatile = false;
- /// \brief Name. Optional.
- std::string mName = std::string();
- /// \brief Type name. Optional.
- std::string mTypeName = std::string();
-
- /// \brief Default constructor.
- Metadata() = default;
-};
-
-} // end namespace Arg
-
-//===----------------------------------------------------------------------===//
-// Kernel Code Properties Metadata.
-//===----------------------------------------------------------------------===//
-namespace CodeProps {
-
-namespace Key {
-/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentSize.
-constexpr char KernargSegmentSize[] = "KernargSegmentSize";
-/// \brief Key for Kernel::CodeProps::Metadata::mWorkgroupGroupSegmentSize.
-constexpr char WorkgroupGroupSegmentSize[] = "WorkgroupGroupSegmentSize";
-/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemPrivateSegmentSize.
-constexpr char WorkitemPrivateSegmentSize[] = "WorkitemPrivateSegmentSize";
-/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontNumSGPRs.
-constexpr char WavefrontNumSGPRs[] = "WavefrontNumSGPRs";
-/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemNumVGPRs.
-constexpr char WorkitemNumVGPRs[] = "WorkitemNumVGPRs";
-/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentAlign.
-constexpr char KernargSegmentAlign[] = "KernargSegmentAlign";
-/// \brief Key for Kernel::CodeProps::Metadata::mGroupSegmentAlign.
-constexpr char GroupSegmentAlign[] = "GroupSegmentAlign";
-/// \brief Key for Kernel::CodeProps::Metadata::mPrivateSegmentAlign.
-constexpr char PrivateSegmentAlign[] = "PrivateSegmentAlign";
-/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontSize.
-constexpr char WavefrontSize[] = "WavefrontSize";
-} // end namespace Key
-
-/// \brief In-memory representation of kernel code properties metadata.
-struct Metadata final {
- /// \brief Size in bytes of the kernarg segment memory. Kernarg segment memory
- /// holds the values of the arguments to the kernel. Optional.
- uint64_t mKernargSegmentSize = 0;
- /// \brief Size in bytes of the group segment memory required by a workgroup.
- /// This value does not include any dynamically allocated group segment memory
- /// that may be added when the kernel is dispatched. Optional.
- uint32_t mWorkgroupGroupSegmentSize = 0;
- /// \brief Size in bytes of the private segment memory required by a workitem.
- /// Private segment memory includes arg, spill and private segments. Optional.
- uint32_t mWorkitemPrivateSegmentSize = 0;
- /// \brief Total number of SGPRs used by a wavefront. Optional.
- uint16_t mWavefrontNumSGPRs = 0;
- /// \brief Total number of VGPRs used by a workitem. Optional.
- uint16_t mWorkitemNumVGPRs = 0;
- /// \brief Maximum byte alignment of variables used by the kernel in the
- /// kernarg memory segment. Expressed as a power of two. Optional.
- uint8_t mKernargSegmentAlign = 0;
- /// \brief Maximum byte alignment of variables used by the kernel in the
- /// group memory segment. Expressed as a power of two. Optional.
- uint8_t mGroupSegmentAlign = 0;
- /// \brief Maximum byte alignment of variables used by the kernel in the
- /// private memory segment. Expressed as a power of two. Optional.
- uint8_t mPrivateSegmentAlign = 0;
- /// \brief Wavefront size. Expressed as a power of two. Optional.
- uint8_t mWavefrontSize = 0;
-
- /// \brief Default constructor.
- Metadata() = default;
-
- /// \returns True if kernel code properties metadata is empty, false
- /// otherwise.
- bool empty() const {
- return !notEmpty();
- }
-
- /// \returns True if kernel code properties metadata is not empty, false
- /// otherwise.
- bool notEmpty() const {
- return mKernargSegmentSize || mWorkgroupGroupSegmentSize ||
- mWorkitemPrivateSegmentSize || mWavefrontNumSGPRs ||
- mWorkitemNumVGPRs || mKernargSegmentAlign || mGroupSegmentAlign ||
- mPrivateSegmentAlign || mWavefrontSize;
- }
-};
-
-} // end namespace CodeProps
-
-//===----------------------------------------------------------------------===//
-// Kernel Debug Properties Metadata.
-//===----------------------------------------------------------------------===//
-namespace DebugProps {
-
-namespace Key {
-/// \brief Key for Kernel::DebugProps::Metadata::mDebuggerABIVersion.
-constexpr char DebuggerABIVersion[] = "DebuggerABIVersion";
-/// \brief Key for Kernel::DebugProps::Metadata::mReservedNumVGPRs.
-constexpr char ReservedNumVGPRs[] = "ReservedNumVGPRs";
-/// \brief Key for Kernel::DebugProps::Metadata::mReservedFirstVGPR.
-constexpr char ReservedFirstVGPR[] = "ReservedFirstVGPR";
-/// \brief Key for Kernel::DebugProps::Metadata::mPrivateSegmentBufferSGPR.
-constexpr char PrivateSegmentBufferSGPR[] = "PrivateSegmentBufferSGPR";
-/// \brief Key for
-/// Kernel::DebugProps::Metadata::mWavefrontPrivateSegmentOffsetSGPR.
-constexpr char WavefrontPrivateSegmentOffsetSGPR[] =
- "WavefrontPrivateSegmentOffsetSGPR";
-} // end namespace Key
-
-/// \brief In-memory representation of kernel debug properties metadata.
-struct Metadata final {
- /// \brief Debugger ABI version. Optional.
- std::vector<uint32_t> mDebuggerABIVersion = std::vector<uint32_t>();
- /// \brief Consecutive number of VGPRs reserved for debugger use. Must be 0 if
- /// mDebuggerABIVersion is not set. Optional.
- uint16_t mReservedNumVGPRs = 0;
- /// \brief First fixed VGPR reserved. Must be uint16_t(-1) if
- /// mDebuggerABIVersion is not set or mReservedFirstVGPR is 0. Optional.
- uint16_t mReservedFirstVGPR = uint16_t(-1);
- /// \brief Fixed SGPR of the first of 4 SGPRs used to hold the scratch V# used
- /// for the entire kernel execution. Must be uint16_t(-1) if
- /// mDebuggerABIVersion is not set or SGPR not used or not known. Optional.
- uint16_t mPrivateSegmentBufferSGPR = uint16_t(-1);
- /// \brief Fixed SGPR used to hold the wave scratch offset for the entire
- /// kernel execution. Must be uint16_t(-1) if mDebuggerABIVersion is not set
- /// or SGPR is not used or not known. Optional.
- uint16_t mWavefrontPrivateSegmentOffsetSGPR = uint16_t(-1);
-
- /// \brief Default constructor.
- Metadata() = default;
-
- /// \returns True if kernel debug properties metadata is empty, false
- /// otherwise.
- bool empty() const {
- return !notEmpty();
- }
-
- /// \returns True if kernel debug properties metadata is not empty, false
- /// otherwise.
- bool notEmpty() const {
- return !mDebuggerABIVersion.empty();
- }
-};
-
-} // end namespace DebugProps
-
-namespace Key {
-/// \brief Key for Kernel::Metadata::mName.
-constexpr char Name[] = "Name";
-/// \brief Key for Kernel::Metadata::mLanguage.
-constexpr char Language[] = "Language";
-/// \brief Key for Kernel::Metadata::mLanguageVersion.
-constexpr char LanguageVersion[] = "LanguageVersion";
-/// \brief Key for Kernel::Metadata::mAttrs.
-constexpr char Attrs[] = "Attrs";
-/// \brief Key for Kernel::Metadata::mArgs.
-constexpr char Args[] = "Args";
-/// \brief Key for Kernel::Metadata::mCodeProps.
-constexpr char CodeProps[] = "CodeProps";
-/// \brief Key for Kernel::Metadata::mDebugProps.
-constexpr char DebugProps[] = "DebugProps";
-} // end namespace Key
-
-/// \brief In-memory representation of kernel metadata.
-struct Metadata final {
- /// \brief Name. Required.
- std::string mName = std::string();
- /// \brief Language. Optional.
- std::string mLanguage = std::string();
- /// \brief Language version. Optional.
- std::vector<uint32_t> mLanguageVersion = std::vector<uint32_t>();
- /// \brief Attributes metadata. Optional.
- Attrs::Metadata mAttrs = Attrs::Metadata();
- /// \brief Arguments metadata. Optional.
- std::vector<Arg::Metadata> mArgs = std::vector<Arg::Metadata>();
- /// \brief Code properties metadata. Optional.
- CodeProps::Metadata mCodeProps = CodeProps::Metadata();
- /// \brief Debug properties metadata. Optional.
- DebugProps::Metadata mDebugProps = DebugProps::Metadata();
-
- /// \brief Default constructor.
- Metadata() = default;
-};
-
-} // end namespace Kernel
-
-namespace Key {
-/// \brief Key for CodeObject::Metadata::mVersion.
-constexpr char Version[] = "Version";
-/// \brief Key for CodeObject::Metadata::mPrintf.
-constexpr char Printf[] = "Printf";
-/// \brief Key for CodeObject::Metadata::mKernels.
-constexpr char Kernels[] = "Kernels";
-} // end namespace Key
-
-/// \brief In-memory representation of code object metadata.
-struct Metadata final {
- /// \brief Code object metadata version. Required.
- std::vector<uint32_t> mVersion = std::vector<uint32_t>();
- /// \brief Printf metadata. Optional.
- std::vector<std::string> mPrintf = std::vector<std::string>();
- /// \brief Kernels metadata. Optional.
- std::vector<Kernel::Metadata> mKernels = std::vector<Kernel::Metadata>();
-
- /// \brief Default constructor.
- Metadata() = default;
-
- /// \brief Converts \p YamlString to \p CodeObjectMetadata.
- static std::error_code fromYamlString(std::string YamlString,
- Metadata &CodeObjectMetadata);
-
- /// \brief Converts \p CodeObjectMetadata to \p YamlString.
- static std::error_code toYamlString(Metadata CodeObjectMetadata,
- std::string &YamlString);
-};
-
-} // end namespace CodeObject
-} // end namespace AMDGPU
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
index 647017d5061d..4e828a791e09 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
@@ -13,20 +13,12 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
#include "AMDGPUCodeObjectMetadataStreamer.h"
+#include "AMDGPU.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/YAMLTraits.h"
-
-using namespace llvm::AMDGPU;
-using namespace llvm::AMDGPU::CodeObject;
-
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
-LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata)
-LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
+#include "llvm/Support/raw_ostream.h"
namespace llvm {
@@ -37,192 +29,7 @@ static cl::opt<bool> VerifyCodeObjectMetadata(
"amdgpu-verify-comd",
cl::desc("Verify AMDGPU Code Object Metadata"));
-namespace yaml {
-
-template <>
-struct ScalarEnumerationTraits<AccessQualifier> {
- static void enumeration(IO &YIO, AccessQualifier &EN) {
- YIO.enumCase(EN, "Default", AccessQualifier::Default);
- YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly);
- YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly);
- YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite);
- }
-};
-
-template <>
-struct ScalarEnumerationTraits<AddressSpaceQualifier> {
- static void enumeration(IO &YIO, AddressSpaceQualifier &EN) {
- YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private);
- YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global);
- YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant);
- YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local);
- YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic);
- YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region);
- }
-};
-
-template <>
-struct ScalarEnumerationTraits<ValueKind> {
- static void enumeration(IO &YIO, ValueKind &EN) {
- YIO.enumCase(EN, "ByValue", ValueKind::ByValue);
- YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer);
- YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer);
- YIO.enumCase(EN, "Sampler", ValueKind::Sampler);
- YIO.enumCase(EN, "Image", ValueKind::Image);
- YIO.enumCase(EN, "Pipe", ValueKind::Pipe);
- YIO.enumCase(EN, "Queue", ValueKind::Queue);
- YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX);
- YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY);
- YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ);
- YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone);
- YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer);
- YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue);
- YIO.enumCase(EN, "HiddenCompletionAction",
- ValueKind::HiddenCompletionAction);
- }
-};
-
-template <>
-struct ScalarEnumerationTraits<ValueType> {
- static void enumeration(IO &YIO, ValueType &EN) {
- YIO.enumCase(EN, "Struct", ValueType::Struct);
- YIO.enumCase(EN, "I8", ValueType::I8);
- YIO.enumCase(EN, "U8", ValueType::U8);
- YIO.enumCase(EN, "I16", ValueType::I16);
- YIO.enumCase(EN, "U16", ValueType::U16);
- YIO.enumCase(EN, "F16", ValueType::F16);
- YIO.enumCase(EN, "I32", ValueType::I32);
- YIO.enumCase(EN, "U32", ValueType::U32);
- YIO.enumCase(EN, "F32", ValueType::F32);
- YIO.enumCase(EN, "I64", ValueType::I64);
- YIO.enumCase(EN, "U64", ValueType::U64);
- YIO.enumCase(EN, "F64", ValueType::F64);
- }
-};
-
-template <>
-struct MappingTraits<Kernel::Attrs::Metadata> {
- static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) {
- YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize,
- MD.mReqdWorkGroupSize, std::vector<uint32_t>());
- YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint,
- MD.mWorkGroupSizeHint, std::vector<uint32_t>());
- YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint,
- MD.mVecTypeHint, std::string());
- }
-};
-
-template <>
-struct MappingTraits<Kernel::Arg::Metadata> {
- static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) {
- YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize);
- YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign);
- YIO.mapRequired(Kernel::Arg::Key::ValueKind, MD.mValueKind);
- YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType);
- YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign,
- uint32_t(0));
- YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual,
- AccessQualifier::Unknown);
- YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual,
- AddressSpaceQualifier::Unknown);
- YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false);
- YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false);
- YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false);
- YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false);
- YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string());
- YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string());
- }
-};
-
-template <>
-struct MappingTraits<Kernel::CodeProps::Metadata> {
- static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) {
- YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize,
- MD.mKernargSegmentSize, uint64_t(0));
- YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize,
- MD.mWorkgroupGroupSegmentSize, uint32_t(0));
- YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize,
- MD.mWorkitemPrivateSegmentSize, uint32_t(0));
- YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs,
- MD.mWavefrontNumSGPRs, uint16_t(0));
- YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs,
- MD.mWorkitemNumVGPRs, uint16_t(0));
- YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign,
- MD.mKernargSegmentAlign, uint8_t(0));
- YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign,
- MD.mGroupSegmentAlign, uint8_t(0));
- YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign,
- MD.mPrivateSegmentAlign, uint8_t(0));
- YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize,
- MD.mWavefrontSize, uint8_t(0));
- }
-};
-
-template <>
-struct MappingTraits<Kernel::DebugProps::Metadata> {
- static void mapping(IO &YIO, Kernel::DebugProps::Metadata &MD) {
- YIO.mapOptional(Kernel::DebugProps::Key::DebuggerABIVersion,
- MD.mDebuggerABIVersion, std::vector<uint32_t>());
- YIO.mapOptional(Kernel::DebugProps::Key::ReservedNumVGPRs,
- MD.mReservedNumVGPRs, uint16_t(0));
- YIO.mapOptional(Kernel::DebugProps::Key::ReservedFirstVGPR,
- MD.mReservedFirstVGPR, uint16_t(-1));
- YIO.mapOptional(Kernel::DebugProps::Key::PrivateSegmentBufferSGPR,
- MD.mPrivateSegmentBufferSGPR, uint16_t(-1));
- YIO.mapOptional(Kernel::DebugProps::Key::WavefrontPrivateSegmentOffsetSGPR,
- MD.mWavefrontPrivateSegmentOffsetSGPR, uint16_t(-1));
- }
-};
-
-template <>
-struct MappingTraits<Kernel::Metadata> {
- static void mapping(IO &YIO, Kernel::Metadata &MD) {
- YIO.mapRequired(Kernel::Key::Name, MD.mName);
- YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string());
- YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion,
- std::vector<uint32_t>());
- if (!MD.mAttrs.empty() || !YIO.outputting())
- YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs);
- if (!MD.mArgs.empty() || !YIO.outputting())
- YIO.mapOptional(Kernel::Key::Args, MD.mArgs);
- if (!MD.mCodeProps.empty() || !YIO.outputting())
- YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps);
- if (!MD.mDebugProps.empty() || !YIO.outputting())
- YIO.mapOptional(Kernel::Key::DebugProps, MD.mDebugProps);
- }
-};
-
-template <>
-struct MappingTraits<CodeObject::Metadata> {
- static void mapping(IO &YIO, CodeObject::Metadata &MD) {
- YIO.mapRequired(Key::Version, MD.mVersion);
- YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector<std::string>());
- if (!MD.mKernels.empty() || !YIO.outputting())
- YIO.mapOptional(Key::Kernels, MD.mKernels);
- }
-};
-
-} // end namespace yaml
-
namespace AMDGPU {
-
-/* static */
-std::error_code CodeObject::Metadata::fromYamlString(
- std::string YamlString, CodeObject::Metadata &CodeObjectMetadata) {
- yaml::Input YamlInput(YamlString);
- YamlInput >> CodeObjectMetadata;
- return YamlInput.error();
-}
-
-/* static */
-std::error_code CodeObject::Metadata::toYamlString(
- CodeObject::Metadata CodeObjectMetadata, std::string &YamlString) {
- raw_string_ostream YamlStream(YamlString);
- yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits<int>::max());
- YamlOutput << CodeObjectMetadata;
- return std::error_code();
-}
-
namespace CodeObject {
void MetadataStreamer::dump(StringRef YamlString) const {
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
index 8d4c51763f63..c6681431d74d 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
@@ -17,9 +17,9 @@
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
#include "AMDGPU.h"
-#include "AMDGPUCodeObjectMetadata.h"
#include "AMDKernelCodeT.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/AMDGPUCodeObjectMetadata.h"
#include "llvm/Support/ErrorOr.h"
namespace llvm {
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 073d19422e86..6abe7f3d37d5 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -8,12 +8,12 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUMCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 8dc863f723e2..2a0032fc9adc 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -11,12 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
#include "AMDGPUTargetStreamer.h"
+#include "AMDGPU.h"
#include "SIDefines.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Metadata.h"
@@ -25,7 +26,6 @@
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/FormattedStream.h"
namespace llvm {
@@ -100,16 +100,6 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
}
}
-void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleScopeGlobal(
- StringRef GlobalName) {
- OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n';
-}
-
-void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal(
- StringRef GlobalName) {
- OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
-}
-
bool AMDGPUTargetAsmStreamer::EmitCodeObjectMetadata(StringRef YamlString) {
auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString);
if (!VerifiedYamlString)
@@ -214,24 +204,6 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL);
}
-void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleScopeGlobal(
- StringRef GlobalName) {
-
- MCSymbolELF *Symbol = cast<MCSymbolELF>(
- getStreamer().getContext().getOrCreateSymbol(GlobalName));
- Symbol->setType(ELF::STT_OBJECT);
- Symbol->setBinding(ELF::STB_LOCAL);
-}
-
-void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
- StringRef GlobalName) {
-
- MCSymbolELF *Symbol = cast<MCSymbolELF>(
- getStreamer().getContext().getOrCreateSymbol(GlobalName));
- Symbol->setType(ELF::STT_OBJECT);
- Symbol->setBinding(ELF::STB_GLOBAL);
-}
-
bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata(StringRef YamlString) {
auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString);
if (!VerifiedYamlString)
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 5c588bbded9c..968128e94d0b 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -44,10 +44,6 @@ public:
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0;
- virtual void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) = 0;
-
- virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
-
virtual void EmitStartOfCodeObjectMetadata(const Module &Mod);
virtual void EmitKernelCodeObjectMetadata(
@@ -74,10 +70,6 @@ public:
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
- void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
-
- void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
-
/// \returns True on success, false on failure.
bool EmitCodeObjectMetadata(StringRef YamlString) override;
};
@@ -105,10 +97,6 @@ public:
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
- void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
-
- void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
-
/// \returns True on success, false on failure.
bool EmitCodeObjectMetadata(StringRef YamlString) override;
};
diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index 6015ec190fd4..eab90e1d344c 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -14,10 +14,10 @@
//
//===----------------------------------------------------------------------===//
-#include "R600Defines.h"
#include "MCTargetDesc/AMDGPUFixupKinds.h"
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "R600Defines.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixup.h"
diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td
index 0e4eda982139..f6f2582aa11b 100644
--- a/lib/Target/AMDGPU/Processors.td
+++ b/lib/Target/AMDGPU/Processors.td
@@ -80,50 +80,53 @@ def : Proc<"cayman", R600_VLIW4_Itin,
// Southern Islands
//===----------------------------------------------------------------------===//
-def : ProcessorModel<"SI", SIFullSpeedModel,
- [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops]
+def : ProcessorModel<"gfx600", SIFullSpeedModel,
+ [FeatureISAVersion6_0_0]>;
+
+def : ProcessorModel<"SI", SIFullSpeedModel,
+ [FeatureISAVersion6_0_0]
+>;
+
+def : ProcessorModel<"tahiti", SIFullSpeedModel,
+ [FeatureISAVersion6_0_0]
>;
-def : ProcessorModel<"tahiti", SIFullSpeedModel,
- [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops]
+def : ProcessorModel<"gfx601", SIQuarterSpeedModel,
+ [FeatureISAVersion6_0_1]
>;
-def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"pitcairn", SIQuarterSpeedModel,
+ [FeatureISAVersion6_0_1]>;
-def : ProcessorModel<"verde", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"verde", SIQuarterSpeedModel,
+ [FeatureISAVersion6_0_1]>;
-def : ProcessorModel<"oland", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"oland", SIQuarterSpeedModel,
+ [FeatureISAVersion6_0_1]>;
-def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureISAVersion6_0_1]>;
//===----------------------------------------------------------------------===//
// Sea Islands
//===----------------------------------------------------------------------===//
-def : ProcessorModel<"bonaire", SIQuarterSpeedModel,
+def : ProcessorModel<"gfx700", SIQuarterSpeedModel,
[FeatureISAVersion7_0_0]
>;
-def : ProcessorModel<"kabini", SIQuarterSpeedModel,
- [FeatureISAVersion7_0_2]
+def : ProcessorModel<"bonaire", SIQuarterSpeedModel,
+ [FeatureISAVersion7_0_0]
>;
def : ProcessorModel<"kaveri", SIQuarterSpeedModel,
[FeatureISAVersion7_0_0]
>;
-def : ProcessorModel<"hawaii", SIFullSpeedModel,
+def : ProcessorModel<"gfx701", SIFullSpeedModel,
[FeatureISAVersion7_0_1]
>;
-def : ProcessorModel<"mullins", SIQuarterSpeedModel,
- [FeatureISAVersion7_0_2]>;
-
-def : ProcessorModel<"gfx700", SIQuarterSpeedModel,
- [FeatureISAVersion7_0_0]
->;
-
-def : ProcessorModel<"gfx701", SIFullSpeedModel,
+def : ProcessorModel<"hawaii", SIFullSpeedModel,
[FeatureISAVersion7_0_1]
>;
@@ -131,6 +134,17 @@ def : ProcessorModel<"gfx702", SIQuarterSpeedModel,
[FeatureISAVersion7_0_2]
>;
+def : ProcessorModel<"gfx703", SIQuarterSpeedModel,
+ [FeatureISAVersion7_0_3]
+>;
+
+def : ProcessorModel<"kabini", SIQuarterSpeedModel,
+ [FeatureISAVersion7_0_3]
+>;
+
+def : ProcessorModel<"mullins", SIQuarterSpeedModel,
+ [FeatureISAVersion7_0_3]>;
+
//===----------------------------------------------------------------------===//
// Volcanic Islands
//===----------------------------------------------------------------------===//
@@ -187,10 +201,23 @@ def : ProcessorModel<"gfx810", SIQuarterSpeedModel,
[FeatureISAVersion8_1_0]
>;
-def : ProcessorModel<"gfx900", SIQuarterSpeedModel,
- [FeatureGFX9, FeatureISAVersion9_0_0, FeatureLDSBankCount32]
+//===----------------------------------------------------------------------===//
+// GFX9
+//===----------------------------------------------------------------------===//
+
+def : ProcessorModel<"gfx900", SIQuarterSpeedModel,
+ [FeatureISAVersion9_0_0]
+>;
+
+def : ProcessorModel<"gfx901", SIQuarterSpeedModel,
+ [FeatureISAVersion9_0_1]
+>;
+
+def : ProcessorModel<"gfx902", SIQuarterSpeedModel,
+ [FeatureISAVersion9_0_2]
>;
-def : ProcessorModel<"gfx901", SIQuarterSpeedModel,
- [FeatureGFX9, FeatureXNACK, FeatureISAVersion9_0_1, FeatureLDSBankCount32]
+def : ProcessorModel<"gfx903", SIQuarterSpeedModel,
+ [FeatureISAVersion9_0_3]
>;
+
diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index 09b328765604..6993e8a62a9c 100644
--- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -12,15 +12,14 @@
/// computing their address on the fly ; it also sets STACK_SIZE info.
//===----------------------------------------------------------------------===//
-#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -30,6 +29,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
diff --git a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
index 03fc1aff5ec1..0d8ccd088ec4 100644
--- a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -15,10 +15,10 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600RegisterInfo.h"
-#include "AMDGPUSubtarget.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
index 5c30a0734f0d..66def2d29caf 100644
--- a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
+++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -15,11 +15,11 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
-#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/lib/Target/AMDGPU/R600FrameLowering.cpp b/lib/Target/AMDGPU/R600FrameLowering.cpp
index 1f01ad732e00..37787b3c5f72 100644
--- a/lib/Target/AMDGPU/R600FrameLowering.cpp
+++ b/lib/Target/AMDGPU/R600FrameLowering.cpp
@@ -10,8 +10,8 @@
#include "R600FrameLowering.h"
#include "AMDGPUSubtarget.h"
#include "R600RegisterInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Support/MathExtras.h"
using namespace llvm;
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 60b913cfd39a..c55878f8bff0 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1120,7 +1120,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
Mask = DAG.getConstant(0xff, DL, MVT::i32);
} else if (Store->getMemoryVT() == MVT::i16) {
assert(Store->getAlignment() >= 2);
- Mask = DAG.getConstant(0xffff, DL, MVT::i32);;
+ Mask = DAG.getConstant(0xffff, DL, MVT::i32);
} else {
llvm_unreachable("Unsupported private trunc store");
}
diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp
index 2422d57269eb..c5da5e404200 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -12,12 +12,12 @@
//
//===----------------------------------------------------------------------===//
+#include "R600InstrInfo.h"
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600FrameLowering.h"
-#include "R600InstrInfo.h"
#include "R600RegisterInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/BitVector.h"
@@ -35,8 +35,8 @@
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
-#include <cstring>
#include <cstdint>
+#include <cstring>
#include <iterator>
#include <utility>
#include <vector>
diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp
index db18e5bd1afa..47fda1c8fa82 100644
--- a/lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -13,11 +13,11 @@
//===----------------------------------------------------------------------===//
#include "R600MachineScheduler.h"
-#include "R600InstrInfo.h"
#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Pass.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/AMDGPU/R600Packetizer.cpp b/lib/Target/AMDGPU/R600Packetizer.cpp
index 3e957126b497..1cb40938cee7 100644
--- a/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "R600InstrInfo.h"
@@ -24,6 +23,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
index 62ebef8e91af..b5c439b21b89 100644
--- a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
+++ b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
@@ -19,8 +19,8 @@
//
//===----------------------------------------------------------------------===//
-#include "SIInstrInfo.h"
#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 3cca815d8773..5f5f25103c02 100644
--- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -65,10 +65,10 @@
/// ultimately led to the creation of an illegal COPY.
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/DenseSet.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index dfac068d1f69..e10f1ed3762e 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -730,7 +730,8 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
// Make sure sources are identical.
const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
- if (!Src0->isReg() || Src0->getSubReg() != Src1->getSubReg() ||
+ if (!Src0->isReg() || !Src1->isReg() ||
+ Src0->getSubReg() != Src1->getSubReg() ||
Src0->getSubReg() != AMDGPU::NoSubRegister)
return nullptr;
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp
index 97bb0f0c0656..b1bd14e421f0 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -8,10 +8,10 @@
//==-----------------------------------------------------------------------===//
#include "SIFrameLowering.h"
+#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
-#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index b48b23911105..599ee942d738 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -17,12 +17,12 @@
#define _USE_MATH_DEFINES
#endif
+#include "SIISelLowering.h"
#include "AMDGPU.h"
#include "AMDGPUIntrinsicInfo.h"
-#include "AMDGPUTargetMachine.h"
#include "AMDGPUSubtarget.h"
+#include "AMDGPUTargetMachine.h"
#include "SIDefines.h"
-#include "SIISelLowering.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
@@ -2604,7 +2604,7 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
SDValue FpToFp16 = DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i32, Src);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToFp16);
- return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);;
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);
}
SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index c10badba88f3..0f009a48754a 100644
--- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -229,7 +229,7 @@ public:
MachineInstr &MI);
BlockWaitcntBrackets()
- : WaitAtBeginning(false), ValidLoop(false), MixedExpTypes(false),
+ : WaitAtBeginning(false), RevisitLoop(false), ValidLoop(false), MixedExpTypes(false),
LoopRegion(NULL), PostOrder(0), Waitcnt(NULL), VgprUB(0), SgprUB(0) {
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
T = (enum InstCounterType)(T + 1)) {
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 36d29b8ecf06..58c05cf16f15 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -20,10 +20,10 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/Debug.h"
diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 35d3a93d8710..5f1c7f1fc42f 100644
--- a/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -60,8 +60,8 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 3680e02da576..ba616ada0c9c 100644
--- a/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -21,8 +21,8 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 348bb4fa0260..9fdb8caac6f2 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -15,8 +15,8 @@
#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
#include "AMDGPUMachineFunction.h"
-#include "SIRegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp
index 9d4e677400e6..bb17dbbdfbd6 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -12,9 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#include "SIMachineScheduler.h"
#include "AMDGPU.h"
#include "SIInstrInfo.h"
-#include "SIMachineScheduler.h"
#include "SIRegisterInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index fae249b04492..f4ddf1891683 100644
--- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -20,13 +20,12 @@
///
//===----------------------------------------------------------------------===//
-
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIDefines.h"
#include "SIInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include <unordered_map>
@@ -129,7 +128,8 @@ public:
bool getNeg() const { return Neg; }
bool getSext() const { return Sext; }
- uint64_t getSrcMods() const;
+ uint64_t getSrcMods(const SIInstrInfo *TII,
+ const MachineOperand *SrcOp) const;
};
class SDWADstOperand : public SDWAOperand {
@@ -240,13 +240,24 @@ static bool isSubregOf(const MachineOperand &SubReg,
return SuperMask.all();
}
-uint64_t SDWASrcOperand::getSrcMods() const {
+uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
+ const MachineOperand *SrcOp) const {
uint64_t Mods = 0;
+ const auto *MI = SrcOp->getParent();
+ if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) {
+ if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
+ Mods = Mod->getImm();
+ }
+ } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) {
+ if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) {
+ Mods = Mod->getImm();
+ }
+ }
if (Abs || Neg) {
assert(!Sext &&
"Float and integer src modifiers can't be set simulteniously");
Mods |= Abs ? SISrcMods::ABS : 0;
- Mods |= Neg ? SISrcMods::NEG : 0;
+ Mods ^= Neg ? SISrcMods::NEG : 0;
} else if (Sext) {
Mods |= SISrcMods::SEXT;
}
@@ -312,7 +323,7 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
}
copyRegOperand(*Src, *getTargetOperand());
SrcSel->setImm(getSrcSel());
- SrcMods->setImm(getSrcMods());
+ SrcMods->setImm(getSrcMods(TII, Src));
getTargetOperand()->setIsKill(false);
return true;
}
@@ -409,7 +420,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
switch (Opcode) {
case AMDGPU::V_LSHRREV_B32_e32:
case AMDGPU::V_ASHRREV_I32_e32:
- case AMDGPU::V_LSHLREV_B32_e32: {
+ case AMDGPU::V_LSHLREV_B32_e32:
+ case AMDGPU::V_LSHRREV_B32_e64:
+ case AMDGPU::V_ASHRREV_I32_e64:
+ case AMDGPU::V_LSHLREV_B32_e64: {
// from: v_lshrrev_b32_e32 v1, 16/24, v0
// to SDWA src:v0 src_sel:WORD_1/BYTE_3
@@ -432,7 +446,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
TRI->isPhysicalRegister(Dst->getReg()))
break;
- if (Opcode == AMDGPU::V_LSHLREV_B32_e32) {
+ if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
+ Opcode == AMDGPU::V_LSHLREV_B32_e64) {
auto SDWADst = make_unique<SDWADstOperand>(
Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
@@ -441,7 +456,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
} else {
auto SDWASrc = make_unique<SDWASrcOperand>(
Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
- Opcode == AMDGPU::V_LSHRREV_B32_e32 ? false : true);
+ Opcode != AMDGPU::V_LSHRREV_B32_e32 &&
+ Opcode != AMDGPU::V_LSHRREV_B32_e64);
DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
SDWAOperands[&MI] = std::move(SDWASrc);
++NumSDWAPatternsFound;
@@ -451,7 +467,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
case AMDGPU::V_LSHRREV_B16_e32:
case AMDGPU::V_ASHRREV_I16_e32:
- case AMDGPU::V_LSHLREV_B16_e32: {
+ case AMDGPU::V_LSHLREV_B16_e32:
+ case AMDGPU::V_LSHRREV_B16_e64:
+ case AMDGPU::V_ASHRREV_I16_e64:
+ case AMDGPU::V_LSHLREV_B16_e64: {
// from: v_lshrrev_b16_e32 v1, 8, v0
// to SDWA src:v0 src_sel:BYTE_1
@@ -472,7 +491,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
TRI->isPhysicalRegister(Dst->getReg()))
break;
- if (Opcode == AMDGPU::V_LSHLREV_B16_e32) {
+ if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
+ Opcode == AMDGPU::V_LSHLREV_B16_e64) {
auto SDWADst =
make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
@@ -481,7 +501,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
} else {
auto SDWASrc = make_unique<SDWASrcOperand>(
Src1, Dst, BYTE_1, false, false,
- Opcode == AMDGPU::V_LSHRREV_B16_e32 ? false : true);
+ Opcode != AMDGPU::V_LSHRREV_B16_e32 &&
+ Opcode != AMDGPU::V_LSHRREV_B16_e64);
DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
SDWAOperands[&MI] = std::move(SDWASrc);
++NumSDWAPatternsFound;
@@ -549,20 +570,25 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
++NumSDWAPatternsFound;
break;
}
- case AMDGPU::V_AND_B32_e32: {
+ case AMDGPU::V_AND_B32_e32:
+ case AMDGPU::V_AND_B32_e64: {
// e.g.:
// from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
// to SDWA src:v0 src_sel:WORD_0/BYTE_0
MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ auto ValSrc = Src1;
auto Imm = foldToImm(*Src0);
- if (!Imm)
- break;
- if (*Imm != 0x0000ffff && *Imm != 0x000000ff)
+ if (!Imm) {
+ Imm = foldToImm(*Src1);
+ ValSrc = Src0;
+ }
+
+ if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff))
break;
- MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
if (TRI->isPhysicalRegister(Src1->getReg()) ||
@@ -570,7 +596,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
break;
auto SDWASrc = make_unique<SDWASrcOperand>(
- Src1, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
+ ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
SDWAOperands[&MI] = std::move(SDWASrc);
++NumSDWAPatternsFound;
@@ -583,28 +609,38 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI) const {
// Check if this instruction has opcode that supports SDWA
- return AMDGPU::getSDWAOp(MI.getOpcode()) != -1;
+ unsigned Opc = MI.getOpcode();
+ if (AMDGPU::getSDWAOp(Opc) != -1)
+ return true;
+ int Opc32 = AMDGPU::getVOPe32(Opc);
+ if (Opc32 != -1 && AMDGPU::getSDWAOp(Opc32) != -1)
+ return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) &&
+ !TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+ return false;
}
bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
const SDWAOperandsVector &SDWAOperands) {
// Convert to sdwa
int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode());
+ if (SDWAOpcode == -1)
+ SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode()));
assert(SDWAOpcode != -1);
+ // Copy dst, if it is present in original then should also be present in SDWA
+ MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+ if (!Dst && !TII->isVOPC(MI))
+ return false;
+
const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
// Create SDWA version of instruction MI and initialize its operands
MachineInstrBuilder SDWAInst =
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);
- // Copy dst, if it is present in original then should also be present in SDWA
- MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
if (Dst) {
assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
SDWAInst.add(*Dst);
- } else {
- assert(TII->isVOPC(MI));
}
// Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
@@ -614,7 +650,10 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
Src0 &&
AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 &&
AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1);
- SDWAInst.addImm(0);
+ if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers))
+ SDWAInst.addImm(Mod->getImm());
+ else
+ SDWAInst.addImm(0);
SDWAInst.add(*Src0);
// Copy src1 if present, initialize src1_modifiers.
@@ -623,10 +662,11 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
assert(
AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 &&
AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1);
- SDWAInst.addImm(0);
+ if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers))
+ SDWAInst.addImm(Mod->getImm());
+ else
+ SDWAInst.addImm(0);
SDWAInst.add(*Src1);
- } else {
- assert(TII->isVOP1(MI));
}
if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
@@ -746,8 +786,9 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
PotentialMatches.clear();
SDWAOperands.clear();
+ bool Ret = !ConvertedInstructions.empty();
while (!ConvertedInstructions.empty())
legalizeScalarOperands(*ConvertedInstructions.pop_back_val());
- return false;
+ return Ret;
}
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 6fb01a09fe13..b611f28fcabd 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -13,9 +13,9 @@
//===----------------------------------------------------------------------===//
#include "SIRegisterInfo.h"
+#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
-#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"
@@ -1104,6 +1104,66 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
}
+StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
+ #define AMDGPU_REG_ASM_NAMES
+ #include "AMDGPURegAsmNames.inc.cpp"
+
+ #define REG_RANGE(BeginReg, EndReg, RegTable) \
+ if (Reg >= BeginReg && Reg <= EndReg) { \
+ unsigned Index = Reg - BeginReg; \
+ assert(Index < array_lengthof(RegTable)); \
+ return RegTable[Index]; \
+ }
+
+ REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
+ REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
+ REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
+ REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
+ REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
+ VGPR96RegNames);
+
+ REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
+ AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
+ VGPR128RegNames);
+ REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
+ AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
+ SGPR128RegNames);
+
+ REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
+ AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
+ VGPR256RegNames);
+
+ REG_RANGE(
+ AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
+ AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
+ VGPR512RegNames);
+
+ REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
+ AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
+ SGPR256RegNames);
+
+ REG_RANGE(
+ AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
+ AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
+ SGPR512RegNames
+ );
+
+#undef REG_RANGE
+
+ // FIXME: Rename flat_scr so we don't need to special case this.
+ switch (Reg) {
+ case AMDGPU::FLAT_SCR:
+ return "flat_scratch";
+ case AMDGPU::FLAT_SCR_LO:
+ return "flat_scratch_lo";
+ case AMDGPU::FLAT_SCR_HI:
+ return "flat_scratch_hi";
+ default:
+ // For the special named registers the default is fine.
+ return TargetRegisterInfo::getRegAsmName(Reg);
+ }
+}
+
// FIXME: This is very slow. It might be worth creating a map from physreg to
// register class.
const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h
index a648c178101a..8fed6d5f9710 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -16,8 +16,8 @@
#define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
#include "AMDGPURegisterInfo.h"
-#include "SIDefines.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
namespace llvm {
@@ -118,6 +118,8 @@ public:
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI,
int FI, RegScavenger *RS) const;
+ StringRef getRegAsmName(unsigned Reg) const override;
+
unsigned getHWRegIndex(unsigned Reg) const {
return getEncodingValue(Reg) & 0xff;
}
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 630f469eabf0..f581e69980c7 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -7,11 +7,12 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
#include "AMDGPUBaseInfo.h"
+#include "AMDGPU.h"
#include "SIDefines.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
@@ -27,7 +28,6 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
@@ -38,7 +38,6 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-
#define GET_INSTRINFO_NAMED_OPS
#include "AMDGPUGenInstrInfo.inc"
#undef GET_INSTRINFO_NAMED_OPS
@@ -104,6 +103,11 @@ namespace AMDGPU {
namespace IsaInfo {
IsaVersion getIsaVersion(const FeatureBitset &Features) {
+ // SI.
+ if (Features.test(FeatureISAVersion6_0_0))
+ return {6, 0, 0};
+ if (Features.test(FeatureISAVersion6_0_1))
+ return {6, 0, 1};
// CI.
if (Features.test(FeatureISAVersion7_0_0))
return {7, 0, 0};
@@ -111,6 +115,8 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) {
return {7, 0, 1};
if (Features.test(FeatureISAVersion7_0_2))
return {7, 0, 2};
+ if (Features.test(FeatureISAVersion7_0_3))
+ return {7, 0, 3};
// VI.
if (Features.test(FeatureISAVersion8_0_0))
@@ -131,6 +137,10 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) {
return {9, 0, 0};
if (Features.test(FeatureISAVersion9_0_1))
return {9, 0, 1};
+ if (Features.test(FeatureISAVersion9_0_2))
+ return {9, 0, 2};
+ if (Features.test(FeatureISAVersion9_0_3))
+ return {9, 0, 3};
if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
return {0, 0, 0};
@@ -327,33 +337,6 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
Header.private_segment_alignment = 4;
}
-MCSection *getHSATextSection(MCContext &Ctx) {
- return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC | ELF::SHF_WRITE |
- ELF::SHF_EXECINSTR |
- ELF::SHF_AMDGPU_HSA_AGENT |
- ELF::SHF_AMDGPU_HSA_CODE);
-}
-
-MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) {
- return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC | ELF::SHF_WRITE |
- ELF::SHF_AMDGPU_HSA_GLOBAL |
- ELF::SHF_AMDGPU_HSA_AGENT);
-}
-
-MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) {
- return Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC | ELF::SHF_WRITE |
- ELF::SHF_AMDGPU_HSA_GLOBAL);
-}
-
-MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
- return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY |
- ELF::SHF_AMDGPU_HSA_AGENT);
-}
-
bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) {
return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS;
}
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 19888ad7556a..eff0230d21f5 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -149,13 +149,6 @@ int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const FeatureBitset &Features);
-MCSection *getHSATextSection(MCContext &Ctx);
-
-MCSection *getHSADataGlobalAgentSection(MCContext &Ctx);
-
-MCSection *getHSADataGlobalProgramSection(MCContext &Ctx);
-
-MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx);
bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS);
bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS);
diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td
index 77fc9551cff9..a8ca593f14ed 100644
--- a/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/lib/Target/AMDGPU/VOP3Instructions.td
@@ -172,8 +172,8 @@ def V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>,
def V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
def V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
def V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>;
-def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
-def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbit>;
+def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>;
def V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmin3>;
def V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmin3>;
def V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumin3>;
@@ -209,7 +209,10 @@ def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64,
}
def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>;
+
+let Constraints = "@earlyclobber $vdst" in {
def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_mqsad_pk_u16_u8>;
+} // End Constraints = "@earlyclobber $vdst"
def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUtrig_preop> {
let SchedRW = [WriteDouble];
@@ -232,8 +235,10 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>;
let SubtargetPredicate = isCIVI in {
+let Constraints = "@earlyclobber $vdst" in {
def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_qsad_pk_u16_u8>;
def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_V4I32>, int_amdgcn_mqsad_u32_u8>;
+} // End Constraints = "@earlyclobber $vdst"
let isCommutable = 1 in {
def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 14e197f477f1..f9da036c7e46 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -23,6 +23,8 @@
#include "MCTargetDesc/ARMMCExpr.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
@@ -43,9 +45,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ARMBuildAttributes.h"
-#include "llvm/Support/COFF.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index f8b65573f9cd..8715657ad5e2 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -21,9 +21,9 @@
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index b18ed509ed23..b4fb292c0116 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -11,17 +11,17 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMBaseRegisterInfo.h"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
-#include "ARMBaseRegisterInfo.h"
#include "ARMFrameLowering.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
index a33d025d114e..a7ac9a1dca6e 100644
--- a/lib/Target/ARM/ARMCallLowering.cpp
+++ b/lib/Target/ARM/ARMCallLowering.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
@@ -122,8 +123,7 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
MRI.createGenericVirtualRegister(LLT::scalar(32))};
- MIRBuilder.buildExtract(NewRegs[0], Arg.Reg, 0);
- MIRBuilder.buildExtract(NewRegs[1], Arg.Reg, 32);
+ MIRBuilder.buildUnmerge(NewRegs, Arg.Reg);
bool IsLittle = MIRBuilder.getMF().getSubtarget<ARMSubtarget>().isLittle();
if (!IsLittle)
@@ -339,7 +339,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
if (!IsLittle)
std::swap(NewRegs[0], NewRegs[1]);
- MIRBuilder.buildSequence(Arg.Reg, NewRegs, {0, 32});
+ MIRBuilder.buildMerge(Arg.Reg, NewRegs);
return 1;
}
@@ -461,7 +461,8 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
MachineFunction &MF = MIRBuilder.getMF();
const auto &TLI = *getTLI<ARMTargetLowering>();
const auto &DL = MF.getDataLayout();
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const auto &STI = MF.getSubtarget();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
if (MF.getSubtarget<ARMSubtarget>().genLongCalls())
@@ -473,6 +474,13 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// registers, but don't insert it yet.
auto MIB = MIRBuilder.buildInstrNoInsert(ARM::BLX).add(Callee).addRegMask(
TRI->getCallPreservedMask(MF, CallConv));
+ if (Callee.isReg()) {
+ auto CalleeReg = Callee.getReg();
+ if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg))
+ MIB->getOperand(0).setReg(constrainOperandRegClass(
+ MF, *TRI, MRI, *STI.getInstrInfo(), *STI.getRegBankInfo(),
+ *MIB.getInstr(), MIB->getDesc(), CalleeReg, 0));
+ }
SmallVector<ArgInfo, 8> ArgInfos;
for (auto Arg : OrigArgs) {
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 6434df317aa8..667337dc9267 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -21,10 +21,10 @@
#include "MCTargetDesc/ARMBaseInfo.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 4f6a73b5980d..384f80356cc8 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -26,8 +26,8 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index c2b2502843c0..16b54e8848c2 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -20,9 +20,9 @@
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 949d821e36b2..5b2d093e8f0d 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -12,11 +12,11 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMISelLowering.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
-#include "ARMISelLowering.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMPerfectShuffle.h"
#include "ARMRegisterInfo.h"
@@ -29,13 +29,13 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/VectorUtils.h"
@@ -61,7 +61,6 @@
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
@@ -103,8 +102,8 @@
#include <cstdlib>
#include <iterator>
#include <limits>
-#include <tuple>
#include <string>
+#include <tuple>
#include <utility>
#include <vector>
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 0f225156d4ca..817b567db767 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -1958,7 +1958,8 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0,
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1966,7 +1967,8 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines.
}
@@ -1976,7 +1978,8 @@ def VFMSH : AHbI<0b11101, 0b10, 1, 0,
IIC_fpFMAC16, "vfms", ".f16\t$Sd, $Sn, $Sm",
[]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFusedMAC]>;
+ Requires<[HasFullFP16,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index b1f059835ff5..2ae3bad4076b 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -127,34 +127,30 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
return true;
}
-static bool selectSequence(MachineInstrBuilder &MIB,
- const ARMBaseInstrInfo &TII,
- MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI) {
- assert(TII.getSubtarget().hasVFP2() && "Can't select sequence without VFP");
-
- // We only support G_SEQUENCE as a way to stick together two scalar GPRs
+static bool selectMergeValues(MachineInstrBuilder &MIB,
+ const ARMBaseInstrInfo &TII,
+ MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) {
+ assert(TII.getSubtarget().hasVFP2() && "Can't select merge without VFP");
+
+ // We only support G_MERGE_VALUES as a way to stick together two scalar GPRs
// into one DPR.
unsigned VReg0 = MIB->getOperand(0).getReg();
(void)VReg0;
assert(MRI.getType(VReg0).getSizeInBits() == 64 &&
RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::FPRRegBankID &&
- "Unsupported operand for G_SEQUENCE");
+ "Unsupported operand for G_MERGE_VALUES");
unsigned VReg1 = MIB->getOperand(1).getReg();
(void)VReg1;
assert(MRI.getType(VReg1).getSizeInBits() == 32 &&
RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID &&
- "Unsupported operand for G_SEQUENCE");
- unsigned VReg2 = MIB->getOperand(3).getReg();
+ "Unsupported operand for G_MERGE_VALUES");
+ unsigned VReg2 = MIB->getOperand(2).getReg();
(void)VReg2;
assert(MRI.getType(VReg2).getSizeInBits() == 32 &&
RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::GPRRegBankID &&
- "Unsupported operand for G_SEQUENCE");
-
- // Remove the operands corresponding to the offsets.
- MIB->RemoveOperand(4);
- MIB->RemoveOperand(2);
+ "Unsupported operand for G_MERGE_VALUES");
MIB->setDesc(TII.get(ARM::VMOVDRR));
MIB.add(predOps(ARMCC::AL));
@@ -162,30 +158,32 @@ static bool selectSequence(MachineInstrBuilder &MIB,
return true;
}
-static bool selectExtract(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII,
- MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI) {
- assert(TII.getSubtarget().hasVFP2() && "Can't select extract without VFP");
+static bool selectUnmergeValues(MachineInstrBuilder &MIB,
+ const ARMBaseInstrInfo &TII,
+ MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) {
+ assert(TII.getSubtarget().hasVFP2() && "Can't select unmerge without VFP");
- // We only support G_EXTRACT as a way to break up one DPR into two GPRs.
+ // We only support G_UNMERGE_VALUES as a way to break up one DPR into two
+ // GPRs.
unsigned VReg0 = MIB->getOperand(0).getReg();
(void)VReg0;
assert(MRI.getType(VReg0).getSizeInBits() == 32 &&
RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::GPRRegBankID &&
- "Unsupported operand for G_EXTRACT");
+ "Unsupported operand for G_UNMERGE_VALUES");
unsigned VReg1 = MIB->getOperand(1).getReg();
(void)VReg1;
- assert(MRI.getType(VReg1).getSizeInBits() == 64 &&
- RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::FPRRegBankID &&
- "Unsupported operand for G_EXTRACT");
- assert(MIB->getOperand(2).getImm() % 32 == 0 &&
- "Unsupported operand for G_EXTRACT");
-
- // Remove the operands corresponding to the offsets.
- MIB->getOperand(2).setImm(MIB->getOperand(2).getImm() / 32);
+ assert(MRI.getType(VReg1).getSizeInBits() == 32 &&
+ RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ "Unsupported operand for G_UNMERGE_VALUES");
+ unsigned VReg2 = MIB->getOperand(2).getReg();
+ (void)VReg2;
+ assert(MRI.getType(VReg2).getSizeInBits() == 64 &&
+ RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::FPRRegBankID &&
+ "Unsupported operand for G_UNMERGE_VALUES");
- MIB->setDesc(TII.get(ARM::VGETLNi32));
+ MIB->setDesc(TII.get(ARM::VMOVRRD));
MIB.add(predOps(ARMCC::AL));
return true;
@@ -407,13 +405,13 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
MIB.addImm(0).add(predOps(ARMCC::AL));
break;
}
- case G_SEQUENCE: {
- if (!selectSequence(MIB, TII, MRI, TRI, RBI))
+ case G_MERGE_VALUES: {
+ if (!selectMergeValues(MIB, TII, MRI, TRI, RBI))
return false;
break;
}
- case G_EXTRACT: {
- if (!selectExtract(MIB, TII, MRI, TRI, RBI))
+ case G_UNMERGE_VALUES: {
+ if (!selectUnmergeValues(MIB, TII, MRI, TRI, RBI))
return false;
break;
}
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index 5bf6c7aed6b8..2d490b7c303e 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -45,7 +45,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({Op, 1, p0}, Legal);
}
- for (unsigned Op : {G_ADD, G_SUB, G_MUL}) {
+ for (unsigned Op : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) {
for (auto Ty : {s1, s8, s16})
setAction({Op, Ty}, WidenScalar);
setAction({Op, s32}, Legal);
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 72fcf7cd6a4f..7a452d4a2095 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -33,7 +34,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 9e9c1ba6c114..13acea3c28a9 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -25,9 +25,9 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/IR/Constants.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp
index a20997c95cd9..f59b075e6dd9 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -221,6 +221,9 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case G_ADD:
case G_SUB:
case G_MUL:
+ case G_AND:
+ case G_OR:
+ case G_XOR:
case G_SDIV:
case G_UDIV:
case G_SEXT:
@@ -252,30 +255,32 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OperandsMapping =
getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr});
break;
- case G_SEQUENCE: {
- // We only support G_SEQUENCE for creating a double precision floating point
- // value out of two GPRs.
+ case G_MERGE_VALUES: {
+ // We only support G_MERGE_VALUES for creating a double precision floating
+ // point value out of two GPRs.
LLT Ty1 = MRI.getType(MI.getOperand(1).getReg());
- LLT Ty2 = MRI.getType(MI.getOperand(3).getReg());
+ LLT Ty2 = MRI.getType(MI.getOperand(2).getReg());
if (Ty.getSizeInBits() != 64 || Ty1.getSizeInBits() != 32 ||
Ty2.getSizeInBits() != 32)
return getInvalidInstructionMapping();
OperandsMapping =
getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx],
- &ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr,
- &ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr});
+ &ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx]});
break;
}
- case G_EXTRACT: {
- // We only support G_EXTRACT for splitting a double precision floating point
- // value into two GPRs.
+ case G_UNMERGE_VALUES: {
+ // We only support G_UNMERGE_VALUES for splitting a double precision
+ // floating point value into two GPRs.
LLT Ty1 = MRI.getType(MI.getOperand(1).getReg());
- if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 64 ||
- MI.getOperand(2).getImm() % 32 != 0)
+ LLT Ty2 = MRI.getType(MI.getOperand(2).getReg());
+ if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 32 ||
+ Ty2.getSizeInBits() != 64)
return getInvalidInstructionMapping();
- OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx],
- &ARM::ValueMappings[ARM::DPR3OpsIdx],
- nullptr, nullptr});
+ OperandsMapping =
+ getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::DPR3OpsIdx]});
break;
}
default:
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index b8a708a20a95..d9d0c27c6304 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -28,10 +28,10 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCTargetOptions.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetParser.h"
+#include "llvm/Target/TargetOptions.h"
#include <cassert>
#include <string>
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index f5e4043882ff..c0506cfda612 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -37,6 +37,7 @@
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Attributes.h"
@@ -389,6 +390,20 @@ public:
return getTM<ARMBaseTargetMachine>();
}
+ ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const override {
+ ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+ // add DAG Mutations here.
+ return DAG;
+ }
+
+ ScheduleDAGInstrs *
+ createPostMachineScheduler(MachineSchedContext *C) const override {
+ ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
+ // add DAG Mutations here.
+ return DAG;
+ }
+
void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index edbf2b99126c..a5b27abeb27f 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -7,17 +7,17 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMTargetObjectFile.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
-#include "ARMTargetObjectFile.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/SectionKind.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Target/TargetMachine.h"
#include <cassert>
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index ada816c16389..19fba3033bb2 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -17,6 +17,8 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
@@ -39,10 +41,8 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ARMBuildAttributes.h"
#include "llvm/Support/ARMEHABI.h"
-#include "llvm/Support/COFF.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetParser.h"
@@ -1026,6 +1026,15 @@ public:
ARM_AM::getSOImmVal(-Value) != -1);
}
bool isT2SOImm() const {
+ // If we have an immediate that's not a constant, treat it as an expression
+ // needing a fixup.
+ if (isImm() && !isa<MCConstantExpr>(getImm())) {
+ // We want to avoid matching :upper16: and :lower16: as we want these
+ // expressions to match in isImm0_65535Expr()
+ const ARMMCExpr *ARM16Expr = dyn_cast<ARMMCExpr>(getImm());
+ return (!ARM16Expr || (ARM16Expr->getKind() != ARMMCExpr::VK_ARM_HI16 &&
+ ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16));
+ }
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
@@ -8404,7 +8413,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
// wide encoding wasn't explicit.
if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
!isARMLowRegister(Inst.getOperand(0).getReg()) ||
- (unsigned)Inst.getOperand(2).getImm() > 255 ||
+ (Inst.getOperand(2).isImm() &&
+ (unsigned)Inst.getOperand(2).getImm() > 255) ||
((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) ||
(inITBlock() && Inst.getOperand(5).getReg() != 0)) ||
(static_cast<ARMOperand &>(*Operands[3]).isToken() &&
@@ -8556,7 +8566,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
// If we can use the 16-bit encoding and the user didn't explicitly
// request the 32-bit variant, transform it here.
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
- (unsigned)Inst.getOperand(1).getImm() <= 255 &&
+ (Inst.getOperand(1).isImm() &&
+ (unsigned)Inst.getOperand(1).getImm() <= 255) &&
((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL &&
Inst.getOperand(4).getReg() == ARM::CPSR) ||
(inITBlock() && Inst.getOperand(4).getReg() == 0)) &&
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index e812d32cc76f..585726208a8d 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -20,8 +20,8 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index b0d1d3fb9ef0..716492ea2566 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -7,15 +7,17 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARMMCTargetDesc.h"
-#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMAsmBackend.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMAsmBackendDarwin.h"
#include "MCTargetDesc/ARMAsmBackendELF.h"
#include "MCTargetDesc/ARMAsmBackendWinCOFF.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
@@ -31,10 +33,8 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/MachO.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -98,6 +98,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_t2_movt_hi16", 0, 20, 0},
{"fixup_t2_movw_lo16", 0, 20, 0},
{"fixup_arm_mod_imm", 0, 12, 0},
+ {"fixup_t2_so_imm", 0, 26, 0},
};
const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = {
// This table *must* be in the order that the fixup_* kinds are defined in
@@ -148,6 +149,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_t2_movt_hi16", 12, 20, 0},
{"fixup_t2_movw_lo16", 12, 20, 0},
{"fixup_arm_mod_imm", 20, 12, 0},
+ {"fixup_t2_so_imm", 26, 6, 0},
};
if (Kind < FirstTargetFixupKind)
@@ -693,6 +695,23 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
return 0;
}
return Value;
+ case ARM::fixup_t2_so_imm: {
+ Value = ARM_AM::getT2SOImmVal(Value);
+ if ((int64_t)Value < 0) {
+ Ctx.reportError(Fixup.getLoc(), "out of range immediate fixup value");
+ return 0;
+ }
+ // Value will contain a 12-bit value broken up into a 4-bit shift in bits
+ // 11:8 and the 8-bit immediate in 0:7. The instruction has the immediate
+ // in 0:7. The 4-bit shift is split up into i:imm3 where i is placed at bit
+ // 10 of the upper half-word and imm3 is placed at 14:12 of the lower
+ // half-word.
+ uint64_t EncValue = 0;
+ EncValue |= (Value & 0x800) << 15;
+ EncValue |= (Value & 0x700) << 4;
+ EncValue |= (Value & 0xff);
+ return swapHalfWords(EncValue, IsLittleEndian);
+ }
}
}
@@ -704,16 +723,17 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
bool &IsResolved) {
const MCSymbolRefExpr *A = Target.getSymA();
const MCSymbol *Sym = A ? &A->getSymbol() : nullptr;
+ const unsigned FixupKind = Fixup.getKind() ;
// MachO (the only user of "Value") tries to make .o files that look vaguely
// pre-linked, so for MOVW/MOVT and .word relocations they put the Thumb bit
// into the addend if possible. Other relocation types don't want this bit
// though (branches couldn't encode it if it *was* present, and no other
// relocations exist) and it can interfere with checking valid expressions.
- if ((unsigned)Fixup.getKind() == FK_Data_4 ||
- (unsigned)Fixup.getKind() == ARM::fixup_arm_movw_lo16 ||
- (unsigned)Fixup.getKind() == ARM::fixup_arm_movt_hi16 ||
- (unsigned)Fixup.getKind() == ARM::fixup_t2_movw_lo16 ||
- (unsigned)Fixup.getKind() == ARM::fixup_t2_movt_hi16) {
+ if (FixupKind == FK_Data_4 ||
+ FixupKind == ARM::fixup_arm_movw_lo16 ||
+ FixupKind == ARM::fixup_arm_movt_hi16 ||
+ FixupKind == ARM::fixup_t2_movw_lo16 ||
+ FixupKind == ARM::fixup_t2_movt_hi16) {
if (Sym) {
if (Asm.isThumbFunc(Sym))
Value |= 1;
@@ -729,23 +749,27 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
// linker can handle it. GNU AS produces an error in this case.
if (Sym->isExternal() || Value >= 0x400004)
IsResolved = false;
- // When an ARM function is called from a Thumb function, produce a
- // relocation so the linker will use the correct branch instruction for ELF
- // binaries.
- if (Sym->isELF()) {
- unsigned Type = dyn_cast<MCSymbolELF>(Sym)->getType();
- if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC) &&
- !Asm.isThumbFunc(Sym))
+ }
+ // Create relocations for unconditional branches to function symbols with
+ // different execution mode in ELF binaries.
+ if (Sym && Sym->isELF()) {
+ unsigned Type = dyn_cast<MCSymbolELF>(Sym)->getType();
+ if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)) {
+ if (Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_uncondbranch))
+ IsResolved = false;
+ if (!Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_thumb_br ||
+ FixupKind == ARM::fixup_arm_thumb_bl ||
+ FixupKind == ARM::fixup_t2_uncondbranch))
IsResolved = false;
}
}
// We must always generate a relocation for BL/BLX instructions if we have
// a symbol to reference, as the linker relies on knowing the destination
// symbol's thumb-ness to get interworking right.
- if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx ||
- (unsigned)Fixup.getKind() == ARM::fixup_arm_blx ||
- (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl ||
- (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl))
+ if (A && (FixupKind == ARM::fixup_arm_thumb_blx ||
+ FixupKind == ARM::fixup_arm_blx ||
+ FixupKind == ARM::fixup_arm_uncondbl ||
+ FixupKind == ARM::fixup_arm_condbl))
IsResolved = false;
}
@@ -792,6 +816,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case ARM::fixup_arm_movw_lo16:
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_so_imm:
return 4;
case FK_SecRel_2:
@@ -844,6 +869,7 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movw_lo16:
case ARM::fixup_arm_mod_imm:
+ case ARM::fixup_t2_so_imm:
// Instruction size is 4 bytes.
return 4;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index 09dc0173ade6..bd729fabedf5 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -11,7 +11,7 @@
#define LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H
#include "ARMAsmBackend.h"
-#include "llvm/Support/MachO.h"
+#include "llvm/BinaryFormat/MachO.h"
namespace llvm {
class ARMAsmBackendDarwin : public ARMAsmBackend {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index e1fa24571820..59f31be69d58 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -9,12 +9,12 @@
#include "MCTargetDesc/ARMFixupKinds.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 4d6c52f3cd49..93f4006cee87 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
@@ -43,12 +44,11 @@
#include "llvm/Support/ARMBuildAttributes.h"
#include "llvm/Support/ARMEHABI.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetParser.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <climits>
diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 3fe2302bdd37..9f6c5d7bf920 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -110,6 +110,9 @@ enum Fixups {
// fixup_arm_mod_imm - Fixup for mod_imm
fixup_arm_mod_imm,
+ // fixup_t2_so_imm - Fixup for Thumb2 8-bit rotated operand
+ fixup_t2_so_imm,
+
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index d9df2c6da7ec..f1f35f409900 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -339,7 +339,17 @@ public:
unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- unsigned SoImm = MI.getOperand(Op).getImm();
+ const MCOperand &MO = MI.getOperand(Op);
+
+ // Support for fixups (MCFixup)
+ if (MO.isExpr()) {
+ const MCExpr *Expr = MO.getExpr();
+ // Fixups resolve to plain values that need to be encoded.
+ MCFixupKind Kind = MCFixupKind(ARM::fixup_t2_so_imm);
+ Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
+ return 0;
+ }
+ unsigned SoImm = MO.getImm();
unsigned Encoded = ARM_AM::getT2SOImmVal(SoImm);
assert(Encoded != ~0U && "Not a Thumb2 so_imm value?");
return Encoded;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 477755157040..b8a8b1f7619a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMMCTargetDesc.h"
#include "ARMBaseInfo.h"
#include "ARMMCAsmInfo.h"
-#include "ARMMCTargetDesc.h"
#include "InstPrinter/ARMInstPrinter.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCELFStreamer.h"
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
index 34c770440e1b..5516a1bdb03d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
@@ -9,10 +9,10 @@
#include "ARMMCExpr.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm-c/Disassembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm-c/Disassembler.h"
using namespace llvm;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index b77181f29b2d..4a8139dea668 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -7,10 +7,11 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
@@ -21,7 +22,6 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
using namespace llvm;
namespace {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 7ae2f864d79d..00505a103e00 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -9,13 +9,13 @@
#include "MCTargetDesc/ARMFixupKinds.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCWinCOFFObjectWriter.h"
-#include "llvm/Support/COFF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index f10427e2ed57..0b6574c37de1 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -11,26 +11,26 @@
//
//===----------------------------------------------------------------------===//
+#include "Thumb1FrameLowering.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMBaseInfo.h"
-#include "Thumb1FrameLowering.h"
#include "Thumb1InstrInfo.h"
#include "ThumbRegisterInfo.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCDwarf.h"
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 0ebf55924647..3a3920a2db32 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMSubtarget.h"
#include "Thumb1InstrInfo.h"
+#include "ARMSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 2e2dfe035e26..9125be96a07b 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===-- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information -------------===//
+//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,16 +11,26 @@
//
//===----------------------------------------------------------------------===//
-#include "Thumb2InstrInfo.h"
-#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
using namespace llvm;
@@ -30,7 +40,7 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
cl::init(false));
Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI() {}
+ : ARMBaseInstrInfo(STI) {}
/// Return the noop instruction to use for a noop.
void Thumb2InstrInfo::getNoop(MCInst &NopInst) const {
@@ -539,9 +549,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
// Add cc_out operand if the original instruction did not have one.
if (!HasCCOut)
MI.addOperand(MachineOperand::CreateReg(0, false));
-
} else {
-
// AddrMode4 and AddrMode6 cannot handle any offset.
if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
return false;
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index c90475c28db7..d911dd97b1ac 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -14,10 +14,10 @@
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/AVR/AVR.h b/lib/Target/AVR/AVR.h
index 8e5cc5360ad4..5eadf7bdcef6 100644
--- a/lib/Target/AVR/AVR.h
+++ b/lib/Target/AVR/AVR.h
@@ -15,8 +15,8 @@
#ifndef LLVM_AVR_H
#define LLVM_AVR_H
-#include "llvm/Target/TargetMachine.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
diff --git a/lib/Target/AVR/AVRAsmPrinter.cpp b/lib/Target/AVR/AVRAsmPrinter.cpp
index d6491ce5c3bf..f0c7b11895b4 100644
--- a/lib/Target/AVR/AVRAsmPrinter.cpp
+++ b/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -18,8 +18,8 @@
#include "InstPrinter/AVRInstPrinter.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp
index 11a47bad78ba..55f3f5cf428a 100644
--- a/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -51,7 +51,6 @@ AVRRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const AVRTargetMachine &TM = static_cast<const AVRTargetMachine&>(MF.getTarget());
// Reserve the intermediate result registers r1 and r2
// The result of instructions like 'mul' is always stored here.
@@ -269,4 +268,3 @@ void AVRRegisterInfo::splitReg(unsigned Reg,
}
} // end of namespace llvm
-
diff --git a/lib/Target/AVR/AVRSubtarget.cpp b/lib/Target/AVR/AVRSubtarget.cpp
index c228d051d771..556d69ec5234 100644
--- a/lib/Target/AVR/AVRSubtarget.cpp
+++ b/lib/Target/AVR/AVRSubtarget.cpp
@@ -13,7 +13,7 @@
#include "AVRSubtarget.h"
-#include "llvm/Support/ELF.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Support/TargetRegistry.h"
#include "AVR.h"
diff --git a/lib/Target/AVR/AVRSubtarget.h b/lib/Target/AVR/AVRSubtarget.h
index a37849c3f3f7..b0e634f86168 100644
--- a/lib/Target/AVR/AVRSubtarget.h
+++ b/lib/Target/AVR/AVRSubtarget.h
@@ -14,10 +14,9 @@
#ifndef LLVM_AVR_SUBTARGET_H
#define LLVM_AVR_SUBTARGET_H
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "AVRFrameLowering.h"
#include "AVRISelLowering.h"
diff --git a/lib/Target/AVR/AVRTargetMachine.cpp b/lib/Target/AVR/AVRTargetMachine.cpp
index 2ab0b1080c6a..91d2a8737b87 100644
--- a/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/lib/Target/AVR/AVRTargetMachine.cpp
@@ -15,12 +15,12 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
-#include "AVRTargetObjectFile.h"
#include "AVR.h"
+#include "AVRTargetObjectFile.h"
#include "MCTargetDesc/AVRMCTargetDesc.h"
namespace llvm {
diff --git a/lib/Target/AVR/AVRTargetObjectFile.cpp b/lib/Target/AVR/AVRTargetObjectFile.cpp
index af14d9292f27..0cebb0f043f9 100644
--- a/lib/Target/AVR/AVRTargetObjectFile.cpp
+++ b/lib/Target/AVR/AVRTargetObjectFile.cpp
@@ -9,12 +9,12 @@
#include "AVRTargetObjectFile.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/ELF.h"
#include "AVR.h"
diff --git a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index 5b0398c0ca34..cf52e552978f 100644
--- a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -18,12 +18,12 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
diff --git a/lib/Target/AVR/Disassembler/AVRDisassembler.cpp b/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
index d2a21fb64635..e69accfa9393 100644
--- a/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
+++ b/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
@@ -16,11 +16,11 @@
#include "AVRSubtarget.h"
#include "MCTargetDesc/AVRMCTargetDesc.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
index 713754821005..1e61eccf775f 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
@@ -1,8 +1,8 @@
#include "AVRELFStreamer.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/FormattedStream.h"
#include "AVRMCTargetDesc.h"
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
index 400296b8409b..085afd23a83c 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
@@ -9,11 +9,11 @@
#include "AVRMCExpr.h"
+#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/MC/MCAsmLayout.h"
namespace llvm {
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
index a4fa5c0a9310..826430e94b9c 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "AVRMCTargetDesc.h"
#include "AVRELFStreamer.h"
#include "AVRMCAsmInfo.h"
-#include "AVRMCTargetDesc.h"
#include "AVRTargetStreamer.h"
#include "InstPrinter/AVRInstPrinter.h"
diff --git a/lib/Target/BPF/BPFAsmPrinter.cpp b/lib/Target/BPF/BPFAsmPrinter.cpp
index c5201465e074..fcd903b7a4a8 100644
--- a/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -18,10 +18,10 @@
#include "BPFTargetMachine.h"
#include "InstPrinter/BPFInstPrinter.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
diff --git a/lib/Target/BPF/BPFInstrInfo.cpp b/lib/Target/BPF/BPFInstrInfo.cpp
index e38facead922..5351cfa95020 100644
--- a/lib/Target/BPF/BPFInstrInfo.cpp
+++ b/lib/Target/BPF/BPFInstrInfo.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "BPF.h"
#include "BPFInstrInfo.h"
+#include "BPF.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/BPF/BPFRegisterInfo.cpp b/lib/Target/BPF/BPFRegisterInfo.cpp
index 7925bee9c587..273843e92701 100644
--- a/lib/Target/BPF/BPFRegisterInfo.cpp
+++ b/lib/Target/BPF/BPFRegisterInfo.cpp
@@ -11,17 +11,17 @@
//
//===----------------------------------------------------------------------===//
-#include "BPF.h"
#include "BPFRegisterInfo.h"
+#include "BPF.h"
#include "BPFSubtarget.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/IR/DiagnosticInfo.h"
#define GET_REGINFO_TARGET_DESC
#include "BPFGenRegisterInfo.inc"
diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp
index cf8e73540904..d84b0a80fc0c 100644
--- a/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/lib/Target/BPF/BPFTargetMachine.cpp
@@ -11,12 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#include "BPF.h"
#include "BPFTargetMachine.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/IR/LegacyPassManager.h"
+#include "BPF.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
index b98621ca4749..a1d732c339e5 100644
--- a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -15,10 +15,10 @@
#include "BPFSubtarget.h"
#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
index ffd29f3ea991..64e986fe0f04 100644
--- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
+++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "BPF.h"
#include "BPFInstPrinter.h"
+#include "BPF.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index ebe9abd8ffac..d5e1d7706edc 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -8,9 +8,9 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/BPFMCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixup.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include <cstdint>
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index b58409730de0..797904e1c976 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "BPF.h"
#include "InstPrinter/BPFInstPrinter.h"
-#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "MCTargetDesc/BPFMCAsmInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
index 3df673eaeb4b..d1c97c9987e1 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -14,8 +14,8 @@
#ifndef LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCTARGETDESC_H
#define LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCTARGETDESC_H
-#include "llvm/Support/DataTypes.h"
#include "llvm/Config/config.h"
+#include "llvm/Support/DataTypes.h"
namespace llvm {
class MCAsmBackend;
diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index b0b2644fffbe..c19e636d79ca 100644
--- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -17,11 +17,12 @@
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "MCTargetDesc/HexagonShuffler.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDirectives.h"
@@ -42,13 +43,12 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cctype>
diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp
index 07767d1037a9..5b02aa3ca3ae 100644
--- a/lib/Target/Hexagon/BitTracker.cpp
+++ b/lib/Target/Hexagon/BitTracker.cpp
@@ -65,9 +65,9 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include <iterator>
#include <cassert>
#include <cstdint>
+#include <iterator>
using namespace llvm;
diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 87c212b6163f..586220dfec26 100644
--- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -12,12 +12,12 @@
#include "Hexagon.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCChecker.h"
-#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
@@ -25,8 +25,8 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index bb5128e7500f..e689483a0999 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#include "Hexagon.h"
#include "HexagonAsmPrinter.h"
+#include "Hexagon.h"
#include "HexagonMachineFunctionInfo.h"
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
@@ -23,6 +23,7 @@
#include "MCTargetDesc/HexagonMCShuffler.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -43,7 +44,6 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 8502bf24c02f..14c682c6df4b 100644
--- a/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -13,8 +13,8 @@
#include "HexagonTargetMachine.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp
index af0f8b265bda..730026121d3b 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#include "Hexagon.h"
#include "HexagonBitTracker.h"
+#include "Hexagon.h"
#include "HexagonInstrInfo.h"
#include "HexagonRegisterInfo.h"
#include "HexagonTargetMachine.h"
diff --git a/lib/Target/Hexagon/HexagonBlockRanges.h b/lib/Target/Hexagon/HexagonBlockRanges.h
index 717480314d16..769ec7044a0e 100644
--- a/lib/Target/Hexagon/HexagonBlockRanges.h
+++ b/lib/Target/Hexagon/HexagonBlockRanges.h
@@ -14,8 +14,8 @@
#include <cassert>
#include <map>
#include <set>
-#include <vector>
#include <utility>
+#include <vector>
namespace llvm {
diff --git a/lib/Target/Hexagon/HexagonCommonGEP.cpp b/lib/Target/Hexagon/HexagonCommonGEP.cpp
index a07ba77e6f3e..b5b46f2b7d19 100644
--- a/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -175,7 +175,8 @@ namespace {
None = 0,
Root = 0x01,
Internal = 0x02,
- Used = 0x04
+ Used = 0x04,
+ InBounds = 0x08
};
uint32_t Flags;
@@ -231,6 +232,11 @@ namespace {
OS << ',';
OS << "used";
}
+ if (GN.Flags & GepNode::InBounds) {
+ if (Comma)
+ OS << ',';
+ OS << "inbounds";
+ }
OS << "} ";
if (GN.Flags & GepNode::Root)
OS << "BaseVal:" << GN.BaseVal->getName() << '(' << GN.BaseVal << ')';
@@ -334,10 +340,11 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
DEBUG(dbgs() << "Visiting GEP: " << *GepI << '\n');
GepNode *N = new (*Mem) GepNode;
Value *PtrOp = GepI->getPointerOperand();
+ uint32_t InBounds = GepI->isInBounds() ? GepNode::InBounds : 0;
ValueToNodeMap::iterator F = NM.find(PtrOp);
if (F == NM.end()) {
N->BaseVal = PtrOp;
- N->Flags |= GepNode::Root;
+ N->Flags |= GepNode::Root | InBounds;
} else {
// If PtrOp was a GEP instruction, it must have already been processed.
// The ValueToNodeMap entry for it is the last gep node in the generated
@@ -373,7 +380,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
Value *Op = *OI;
GepNode *Nx = new (*Mem) GepNode;
Nx->Parent = PN; // Link Nx to the previous node.
- Nx->Flags |= GepNode::Internal;
+ Nx->Flags |= GepNode::Internal | InBounds;
Nx->PTy = PtrTy;
Nx->Idx = Op;
Nodes.push_back(Nx);
@@ -1081,7 +1088,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
GepNode *RN = NA[0];
assert((RN->Flags & GepNode::Root) && "Creating GEP for non-root");
- Value *NewInst = nullptr;
+ GetElementPtrInst *NewInst = nullptr;
Value *Input = RN->BaseVal;
Value **IdxList = new Value*[Num+1];
unsigned nax = 0;
@@ -1112,6 +1119,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
Type *InpTy = Input->getType();
Type *ElTy = cast<PointerType>(InpTy->getScalarType())->getElementType();
NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", &*At);
+ NewInst->setIsInBounds(RN->Flags & GepNode::InBounds);
DEBUG(dbgs() << "new GEP: " << *NewInst << '\n');
Input = NewInst;
} while (nax <= Num);
diff --git a/lib/Target/Hexagon/HexagonConstPropagation.cpp b/lib/Target/Hexagon/HexagonConstPropagation.cpp
index 783b916e04b0..aa68f6cfdfc1 100644
--- a/lib/Target/Hexagon/HexagonConstPropagation.cpp
+++ b/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -2276,7 +2276,7 @@ Undetermined:
goto Undetermined;
uint32_t Props = PredC.properties();
- bool CTrue = false, CFalse = false;;
+ bool CTrue = false, CFalse = false;
if (Props & ConstantProperties::Zero)
CFalse = true;
else if (Props & ConstantProperties::NonZero)
diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index 8118c8eb149d..6b4f53428256 100644
--- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "HexagonInstrInfo.h"
#include "HexagonSubtarget.h"
-#include "llvm/PassSupport.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -22,6 +21,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/PassSupport.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index 67af947e089d..03c4a83594b3 100644
--- a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -65,9 +65,9 @@
#include "HexagonInstrInfo.h"
#include "HexagonSubtarget.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
index 015d3b840e6f..23d4e2610d9a 100644
--- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
+++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -12,10 +12,9 @@
// form.
//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/DenseMap.h"
#include "Hexagon.h"
#include "HexagonTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 25018b9ed510..18e49c69b8e3 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -10,8 +10,8 @@
#define DEBUG_TYPE "hexagon-pei"
-#include "HexagonBlockRanges.h"
#include "HexagonFrameLowering.h"
+#include "HexagonBlockRanges.h"
#include "HexagonInstrInfo.h"
#include "HexagonMachineFunctionInfo.h"
#include "HexagonRegisterInfo.h"
diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp
index c99ad5130aef..7c6de6d513e8 100644
--- a/lib/Target/Hexagon/HexagonGenExtract.cpp
+++ b/lib/Target/Hexagon/HexagonGenExtract.cpp
@@ -14,10 +14,10 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp
index 54d99d399f88..bf31e1699284 100644
--- a/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -17,9 +17,9 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -34,8 +34,8 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
#include <cassert>
diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp
index 85222944c77c..3c37d9ebb0eb 100644
--- a/lib/Target/Hexagon/HexagonGenMux.cpp
+++ b/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -40,8 +40,8 @@
#include "llvm/Pass.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
-#include <limits>
#include <iterator>
+#include <limits>
#include <utility>
using namespace llvm;
@@ -235,8 +235,11 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
unsigned DR = MI->getOperand(0).getReg();
if (isRegPair(DR))
continue;
+ MachineOperand &PredOp = MI->getOperand(1);
+ if (PredOp.isUndef())
+ continue;
- unsigned PR = MI->getOperand(1).getReg();
+ unsigned PR = PredOp.getReg();
unsigned Idx = I2X.lookup(MI);
CondsetMap::iterator F = CM.find(DR);
bool IfTrue = HII->isPredicatedTrue(Opc);
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 4c6c6eeafbe0..afed894cfb9a 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "Hexagon.h"
#include "HexagonISelLowering.h"
+#include "Hexagon.h"
#include "HexagonMachineFunctionInfo.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
@@ -26,8 +26,8 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/BasicBlock.h"
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index b76da727237c..f43101fa456d 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "HexagonInstrInfo.h"
#include "Hexagon.h"
#include "HexagonHazardRecognizer.h"
-#include "HexagonInstrInfo.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -57,9 +57,9 @@ using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#define GET_INSTRMAP_INFO
-#include "HexagonGenInstrInfo.inc"
-#include "HexagonGenDFAPacketizer.inc"
#include "HexagonDepTimingClasses.h"
+#include "HexagonGenDFAPacketizer.inc"
+#include "HexagonGenInstrInfo.inc"
cl::opt<bool> ScheduleInlineAsm("hexagon-sched-inline-asm", cl::Hidden,
cl::init(false), cl::desc("Do not consider inline-asm a scheduling/"
diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index 9aa185fc85a6..b748b58bc0ae 100644
--- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -23,11 +23,11 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <array>
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 324108284a9a..4602de979024 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -563,40 +563,33 @@ void ConvergingVLIWScheduler::readyQueueVerboseDump(
}
#endif
-/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
-/// of SU, return it, otherwise return null.
-static SUnit *getSingleUnscheduledPred(SUnit *SU) {
- SUnit *OnlyAvailablePred = nullptr;
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- SUnit &Pred = *I->getSUnit();
- if (!Pred.isScheduled) {
- // We found an available, but not scheduled, predecessor. If it's the
- // only one we have found, keep track of it... otherwise give up.
- if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
- return nullptr;
- OnlyAvailablePred = &Pred;
- }
+/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
+/// of SU, return true (we may have duplicates)
+static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) {
+ if (SU->NumPredsLeft == 0)
+ return false;
+
+ for (auto &Pred : SU->Preds) {
+ // We found an available, but not scheduled, predecessor.
+ if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))
+ return false;
}
- return OnlyAvailablePred;
+
+ return true;
}
-/// getSingleUnscheduledSucc - If there is exactly one unscheduled successor
-/// of SU, return it, otherwise return null.
-static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
- SUnit *OnlyAvailableSucc = nullptr;
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- SUnit &Succ = *I->getSUnit();
- if (!Succ.isScheduled) {
- // We found an available, but not scheduled, successor. If it's the
- // only one we have found, keep track of it... otherwise give up.
- if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ)
- return nullptr;
- OnlyAvailableSucc = &Succ;
- }
+/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
+/// of SU, return true (we may have duplicates)
+static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {
+ if (SU->NumSuccsLeft == 0)
+ return false;
+
+ for (auto &Succ : SU->Succs) {
+ // We found an available, but not scheduled, successor.
+ if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))
+ return false;
}
- return OnlyAvailableSucc;
+ return true;
}
// Constants used to denote relative importance of
@@ -673,12 +666,12 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
// Count the number of nodes that
// this node is the sole unscheduled node for.
for (const SDep &SI : SU->Succs)
- if (getSingleUnscheduledPred(SI.getSUnit()) == SU)
+ if (isSingleUnscheduledPred(SI.getSUnit(), SU))
++NumNodesBlocking;
} else {
// How many unscheduled predecessors block this node?
for (const SDep &PI : SU->Preds)
- if (getSingleUnscheduledSucc(PI.getSUnit()) == SU)
+ if (isSingleUnscheduledSucc(PI.getSUnit(), SU))
++NumNodesBlocking;
}
ResCount += (NumNodesBlocking * ScaleTwo);
diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td
index 70ed123bc898..f269b74fc447 100644
--- a/lib/Target/Hexagon/HexagonPatterns.td
+++ b/lib/Target/Hexagon/HexagonPatterns.td
@@ -1,3 +1,12 @@
+//==- HexagonPatterns.td - Target Description for Hexagon -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
// Pattern fragment that combines the value type and the register class
// into a single parameter.
@@ -345,7 +354,7 @@ def: Pat<(add (mul IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1),
(M2_macsip IntRegs:$src1, IntRegs:$src2, imm:$src3)>;
def: Pat<(add (mul I32:$src2, I32:$src3), I32:$src1),
(M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
-def: Pat<(add (add IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1),
+def: Pat<(add (add IntRegs:$src2, s32_0ImmPred:$src3), IntRegs:$src1),
(M2_accii IntRegs:$src1, IntRegs:$src2, imm:$src3)>;
def: Pat<(add (add I32:$src2, I32:$src3), I32:$src1),
(M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
@@ -674,6 +683,8 @@ def I32toI1: OutPatFrag<(ops node:$Rs),
defm: Storexm_pat<store, I1, s32_0ImmPred, I1toI32, S2_storerb_io>;
def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>;
+def: Pat<(sra (add (sra I64:$src, u6_0ImmPred:$u6), 1), (i32 1)),
+ (S2_asr_i_p_rnd DoubleRegs:$src, imm:$u6)>, Requires<[HasV5T]>;
def: Pat<(sra I64:$src, u6_0ImmPred:$u6),
(S2_asr_i_p DoubleRegs:$src, imm:$u6)>;
def: Pat<(srl I64:$src, u6_0ImmPred:$u6),
@@ -786,27 +797,19 @@ def: Pat<(i64 (sext_inreg I64:$src1, i16)),
def: Pat<(i64 (sext_inreg I64:$src1, i8)),
(A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>;
-// We want to prevent emitting pnot's as much as possible.
-// Map brcond with an unsupported setcc to a J2_jumpf.
-def : Pat <(brcond (i1 (setne I32:$src1, I32:$src2)),
- bb:$offset),
- (J2_jumpf (C2_cmpeq I32:$src1, I32:$src2),
- bb:$offset)>;
-
-def : Pat <(brcond (i1 (setne I32:$src1, s10_0ImmPred:$src2)),
- bb:$offset),
- (J2_jumpf (C2_cmpeqi I32:$src1, s10_0ImmPred:$src2), bb:$offset)>;
-
-def: Pat<(brcond (i1 (setne I1:$src1, (i1 -1))), bb:$offset),
- (J2_jumpf PredRegs:$src1, bb:$offset)>;
-
-def: Pat<(brcond (i1 (setne I1:$src1, (i1 0))), bb:$offset),
- (J2_jumpt PredRegs:$src1, bb:$offset)>;
+def: Pat<(brcond (i1 (setne I32:$Rs, I32:$Rt)), bb:$offset),
+ (J2_jumpf (C2_cmpeq I32:$Rs, I32:$Rt), bb:$offset)>;
+def: Pat<(brcond (i1 (setne I32:$Rs, s10_0ImmPred:$s10)), bb:$offset),
+ (J2_jumpf (C2_cmpeqi I32:$Rs, imm:$s10), bb:$offset)>;
+def: Pat<(brcond (i1 (setne I1:$Pu, (i1 -1))), bb:$offset),
+ (J2_jumpf PredRegs:$Pu, bb:$offset)>;
+def: Pat<(brcond (i1 (setne I1:$Pu, (i1 0))), bb:$offset),
+ (J2_jumpt PredRegs:$Pu, bb:$offset)>;
// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1)
-def: Pat<(brcond (i1 (setlt I32:$src1, s8_0ImmPred:$src2)), bb:$offset),
- (J2_jumpf (C2_cmpgti IntRegs:$src1, (SDEC1 s8_0ImmPred:$src2)),
- bb:$offset)>;
+def: Pat<(brcond (i1 (setlt I32:$Rs, s8_0ImmPred:$s8)), bb:$offset),
+ (J2_jumpf (C2_cmpgti IntRegs:$Rs, (SDEC1 imm:$s8)), bb:$offset)>;
+
// Map from a 64-bit select to an emulated 64-bit mux.
// Hexagon does not support 64-bit MUXes; so emulate with combines.
@@ -860,15 +863,13 @@ def: Pat<(i1 (setne I1:$src1, I1:$src2)),
def: Pat<(i1 (setne I64:$src1, I64:$src2)),
(C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>;
-// Map cmpge(Rs, Rt) -> !cmpgt(Rs, Rt).
-// rs >= rt -> !(rt > rs).
-def : Pat <(i1 (setge I32:$src1, I32:$src2)),
- (i1 (C2_not (i1 (C2_cmpgt I32:$src2, I32:$src1))))>;
+// rs >= rt -> rt <= rs
+def: Pat<(i1 (setge I32:$Rs, I32:$Rt)),
+ (C4_cmplte I32:$Rt, I32:$Rs)>;
-// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1)
let AddedComplexity = 30 in
-def: Pat<(i1 (setge I32:$src1, s32_0ImmPred:$src2)),
- (C2_cmpgti IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2))>;
+def: Pat<(i1 (setge I32:$Rs, s32_0ImmPred:$s10)),
+ (C2_cmpgti IntRegs:$Rs, (SDEC1 imm:$s10))>;
// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
// rss >= rtt -> !(rtt > rss).
@@ -1634,9 +1635,14 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), u32_0ImmPred:$u6),
(M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>;
+def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6),
+ (HexagonCONST32 tglobaladdr:$global)),
+ (M4_mpyri_addi tglobaladdr:$global, IntRegs:$Rs, imm:$U6)>;
def: Pat<(add (mul I32:$Rs, I32:$Rt), u32_0ImmPred:$u6),
(M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>;
-
+def: Pat<(add (mul I32:$Rs, I32:$Rt),
+ (HexagonCONST32 tglobaladdr:$global)),
+ (M4_mpyrr_addi tglobaladdr:$global, IntRegs:$Rs, IntRegs:$Rt)>;
def: Pat<(add I32:$src1, (mul I32:$src3, u6_2ImmPred:$src2)),
(M4_mpyri_addr_u2 IntRegs:$src1, imm:$src2, IntRegs:$src3)>;
def: Pat<(add I32:$src1, (mul I32:$src3, u32_0ImmPred:$src2)),
@@ -2129,6 +2135,11 @@ let AddedComplexity = 30 in {
def: Storea_pat<truncstorei8, I32, u32_0ImmPred, PS_storerbabs>;
def: Storea_pat<truncstorei16, I32, u32_0ImmPred, PS_storerhabs>;
def: Storea_pat<store, I32, u32_0ImmPred, PS_storeriabs>;
+ def: Storea_pat<store, I64, u32_0ImmPred, PS_storerdabs>;
+
+ def: Stoream_pat<truncstorei8, I64, u32_0ImmPred, LoReg, PS_storerbabs>;
+ def: Stoream_pat<truncstorei16, I64, u32_0ImmPred, LoReg, PS_storerhabs>;
+ def: Stoream_pat<truncstorei32, I64, u32_0ImmPred, LoReg, PS_storeriabs>;
}
let AddedComplexity = 30 in {
@@ -2137,6 +2148,19 @@ let AddedComplexity = 30 in {
def: Loada_pat<zextloadi8, i32, u32_0ImmPred, PS_loadrubabs>;
def: Loada_pat<sextloadi16, i32, u32_0ImmPred, PS_loadrhabs>;
def: Loada_pat<zextloadi16, i32, u32_0ImmPred, PS_loadruhabs>;
+ def: Loada_pat<load, i64, u32_0ImmPred, PS_loadrdabs>;
+
+ def: Loadam_pat<extloadi8, i64, u32_0ImmPred, ToZext64, PS_loadrubabs>;
+ def: Loadam_pat<sextloadi8, i64, u32_0ImmPred, ToSext64, PS_loadrbabs>;
+ def: Loadam_pat<zextloadi8, i64, u32_0ImmPred, ToZext64, PS_loadrubabs>;
+
+ def: Loadam_pat<extloadi16, i64, u32_0ImmPred, ToZext64, PS_loadruhabs>;
+ def: Loadam_pat<sextloadi16, i64, u32_0ImmPred, ToSext64, PS_loadrhabs>;
+ def: Loadam_pat<zextloadi16, i64, u32_0ImmPred, ToZext64, PS_loadruhabs>;
+
+ def: Loadam_pat<extloadi32, i64, u32_0ImmPred, ToZext64, PS_loadriabs>;
+ def: Loadam_pat<sextloadi32, i64, u32_0ImmPred, ToSext64, PS_loadriabs>;
+ def: Loadam_pat<zextloadi32, i64, u32_0ImmPred, ToZext64, PS_loadriabs>;
}
// Indexed store word - global address.
@@ -2707,6 +2731,15 @@ def: Pat<(fneg F64:$Rs),
(S2_togglebit_i (HiReg $Rs), 31), isub_hi,
(i32 (LoReg $Rs)), isub_lo)>;
+def: Pat<(mul I64:$Rss, I64:$Rtt),
+ (A2_combinew
+ (M2_maci (M2_maci (HiReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))),
+ (LoReg $Rss),
+ (HiReg $Rtt)),
+ (LoReg $Rtt),
+ (HiReg $Rss)),
+ (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))))>;
+
def alignedload : PatFrag<(ops node:$addr), (load $addr), [{
return isAlignedMemNode(dyn_cast<MemSDNode>(N));
}]>;
diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp
index 471e32221b29..db268b78cd73 100644
--- a/lib/Target/Hexagon/HexagonSplitDouble.cpp
+++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -13,8 +13,8 @@
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 14ecf297d351..c757b6ecdd00 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -23,8 +23,8 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
@@ -276,27 +276,27 @@ bool HexagonPassConfig::addInstSelector() {
if (!NoOpt) {
// Create logical operations on predicate registers.
if (EnableGenPred)
- addPass(createHexagonGenPredicate(), false);
+ addPass(createHexagonGenPredicate());
// Rotate loops to expose bit-simplification opportunities.
if (EnableLoopResched)
- addPass(createHexagonLoopRescheduling(), false);
+ addPass(createHexagonLoopRescheduling());
// Split double registers.
if (!DisableHSDR)
addPass(createHexagonSplitDoubleRegs());
// Bit simplification.
if (EnableBitSimplify)
- addPass(createHexagonBitSimplify(), false);
+ addPass(createHexagonBitSimplify());
addPass(createHexagonPeephole());
printAndVerify("After hexagon peephole pass");
// Constant propagation.
if (!DisableHCP) {
- addPass(createHexagonConstPropagationPass(), false);
- addPass(&UnreachableMachineBlockElimID, false);
+ addPass(createHexagonConstPropagationPass());
+ addPass(&UnreachableMachineBlockElimID);
}
if (EnableGenInsert)
- addPass(createHexagonGenInsert(), false);
+ addPass(createHexagonGenInsert());
if (EnableEarlyIf)
- addPass(createHexagonEarlyIfConversion(), false);
+ addPass(createHexagonEarlyIfConversion());
}
return false;
@@ -307,9 +307,9 @@ void HexagonPassConfig::addPreRegAlloc() {
if (EnableExpandCondsets)
insertPass(&RegisterCoalescerID, &HexagonExpandCondsetsID);
if (!DisableStoreWidening)
- addPass(createHexagonStoreWidening(), false);
+ addPass(createHexagonStoreWidening());
if (!DisableHardwareLoops)
- addPass(createHexagonHardwareLoops(), false);
+ addPass(createHexagonHardwareLoops());
}
if (TM->getOptLevel() >= CodeGenOpt::Default)
addPass(&MachinePipelinerID);
@@ -320,16 +320,16 @@ void HexagonPassConfig::addPostRegAlloc() {
if (EnableRDFOpt)
addPass(createHexagonRDFOpt());
if (!DisableHexagonCFGOpt)
- addPass(createHexagonCFGOptimizer(), false);
+ addPass(createHexagonCFGOptimizer());
if (!DisableAModeOpt)
- addPass(createHexagonOptAddrMode(), false);
+ addPass(createHexagonOptAddrMode());
}
}
void HexagonPassConfig::addPreSched2() {
- addPass(createHexagonCopyToCombine(), false);
+ addPass(createHexagonCopyToCombine());
if (getOptLevel() != CodeGenOpt::None)
- addPass(&IfConverterID, false);
+ addPass(&IfConverterID);
addPass(createHexagonSplitConst32AndConst64());
}
@@ -337,17 +337,17 @@ void HexagonPassConfig::addPreEmitPass() {
bool NoOpt = (getOptLevel() == CodeGenOpt::None);
if (!NoOpt)
- addPass(createHexagonNewValueJump(), false);
+ addPass(createHexagonNewValueJump());
- addPass(createHexagonBranchRelaxation(), false);
+ addPass(createHexagonBranchRelaxation());
// Create Packets.
if (!NoOpt) {
if (!DisableHardwareLoops)
- addPass(createHexagonFixupHwLoops(), false);
+ addPass(createHexagonFixupHwLoops());
// Generate MUX from pairs of conditional transfers.
if (EnableGenMux)
- addPass(createHexagonGenMux(), false);
+ addPass(createHexagonGenMux());
addPass(createHexagonPacketizer(), false);
}
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index c9c4f95dbaaa..4dacb1501392 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalObject.h"
@@ -28,7 +29,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index fa08afe4019d..7667bfb7a0eb 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -16,10 +16,10 @@
// prune the dependence.
//
//===----------------------------------------------------------------------===//
+#include "HexagonVLIWPacketizer.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
-#include "HexagonVLIWPacketizer.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 904403543e18..545c8b6b2acd 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -12,9 +12,9 @@
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCChecker.h"
#include "MCTargetDesc/HexagonMCCodeEmitter.h"
-#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCShuffler.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index dd790fd41257..1929152129fa 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "HexagonAsmPrinter.h"
#include "HexagonInstPrinter.h"
+#include "HexagonAsmPrinter.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "llvm/MC/MCAsmInfo.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 70410ff03a64..50f00d1aaeac 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -7,10 +7,10 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/HexagonMCCodeEmitter.h"
#include "Hexagon.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonFixupKinds.h"
-#include "MCTargetDesc/HexagonMCCodeEmitter.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/ADT/Statistic.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index 9e1ff9ca35d7..47007e08a2ff 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -17,6 +17,7 @@
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCShuffler.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -29,7 +30,6 @@
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
index aece36790486..b2c7f1569380 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
@@ -14,9 +14,9 @@
#define DEBUG_TYPE "hexagon-shuffle"
+#include "MCTargetDesc/HexagonMCShuffler.h"
#include "Hexagon.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
-#include "MCTargetDesc/HexagonMCShuffler.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index bb98c2bbef6d..1a361548f938 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -11,14 +11,15 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "Hexagon.h"
#include "HexagonTargetStreamer.h"
#include "MCTargetDesc/HexagonInstPrinter.h"
#include "MCTargetDesc/HexagonMCAsmInfo.h"
#include "MCTargetDesc/HexagonMCELFStreamer.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
-#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCELFStreamer.h"
@@ -27,10 +28,9 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
#include <new>
diff --git a/lib/Target/Hexagon/RDFDeadCode.cpp b/lib/Target/Hexagon/RDFDeadCode.cpp
index 9aa8ad68e07e..60a12dcf2f03 100644
--- a/lib/Target/Hexagon/RDFDeadCode.cpp
+++ b/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -9,9 +9,9 @@
//
// RDF-based generic dead code elimination.
+#include "RDFDeadCode.h"
#include "RDFGraph.h"
#include "RDFLiveness.h"
-#include "RDFDeadCode.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp
index 7a2895aa4e8c..8d1272370899 100644
--- a/lib/Target/Hexagon/RDFGraph.cpp
+++ b/lib/Target/Hexagon/RDFGraph.cpp
@@ -10,8 +10,8 @@
// Target-independent, SSA-based data flow graph for register data flow (RDF).
//
#include "RDFGraph.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominanceFrontier.h"
#include "llvm/CodeGen/MachineDominators.h"
diff --git a/lib/Target/Hexagon/RDFLiveness.cpp b/lib/Target/Hexagon/RDFLiveness.cpp
index 9d8a3881797b..83e8968086d8 100644
--- a/lib/Target/Hexagon/RDFLiveness.cpp
+++ b/lib/Target/Hexagon/RDFLiveness.cpp
@@ -23,8 +23,8 @@
// and Embedded Architectures and Compilers", 8 (4),
// <10.1145/2086696.2086706>. <hal-00647369>
//
-#include "RDFGraph.h"
#include "RDFLiveness.h"
+#include "RDFGraph.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominanceFrontier.h"
diff --git a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index 1d6c07974beb..72e471f5766e 100644
--- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -28,8 +28,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
diff --git a/lib/Target/Lanai/LanaiTargetObjectFile.cpp b/lib/Target/Lanai/LanaiTargetObjectFile.cpp
index 7475dbd68ae4..38e75108ba16 100644
--- a/lib/Target/Lanai/LanaiTargetObjectFile.cpp
+++ b/lib/Target/Lanai/LanaiTargetObjectFile.cpp
@@ -10,13 +10,13 @@
#include "LanaiSubtarget.h"
#include "LanaiTargetMachine.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
index e02bba529bd5..64cd3342ac18 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
@@ -9,8 +9,8 @@
#include "MCTargetDesc/LanaiBaseInfo.h"
#include "MCTargetDesc/LanaiFixupKinds.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
index 10254677a5ad..c3727416ecb9 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
@@ -19,8 +19,8 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
index a47ff9ff3d61..bcbde2b8b794 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
-#include "LanaiMCAsmInfo.h"
#include "LanaiMCTargetDesc.h"
#include "InstPrinter/LanaiInstPrinter.h"
+#include "LanaiMCAsmInfo.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCInst.h"
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index abf062fe86ae..f39c21fc8aa2 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MSP430.h"
#include "InstPrinter/MSP430InstPrinter.h"
+#include "MSP430.h"
#include "MSP430InstrInfo.h"
#include "MSP430MCInstLower.h"
#include "MSP430TargetMachine.h"
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index d855d3e7f778..694c201cbe8d 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -9,17 +9,18 @@
#include "MCTargetDesc/MipsABIFlagsSection.h"
#include "MCTargetDesc/MipsABIInfo.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
#include "MCTargetDesc/MipsMCExpr.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "MipsTargetStreamer.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -40,13 +41,12 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index ecdf6b0de6e7..b0b994323036 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -17,14 +17,14 @@
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 1e2eb7dbec3e..6d3d4db03603 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
//
-#include "MCTargetDesc/MipsFixupKinds.h"
#include "MCTargetDesc/MipsAsmBackend.h"
+#include "MCTargetDesc/MipsFixupKinds.h"
#include "MCTargetDesc/MipsMCExpr.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "llvm/MC/MCAsmBackend.h"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 1a1c613cfce0..d116ac3471bc 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -10,13 +10,13 @@
#include "MCTargetDesc/MipsFixupKinds.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index ae3278322311..f658aadff22f 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -10,12 +10,12 @@
#include "MipsELFStreamer.h"
#include "MipsOptionRecord.h"
#include "MipsTargetStreamer.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
using namespace llvm;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index a35eb2a8e03a..0330824fd614 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -11,10 +11,10 @@
//
//===----------------------------------------------------------------------===//
+#include "MipsMCCodeEmitter.h"
#include "MCTargetDesc/MipsFixupKinds.h"
#include "MCTargetDesc/MipsMCExpr.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
-#include "MipsMCCodeEmitter.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index be04480044d4..aad6bf378ea0 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -8,14 +8,14 @@
//===----------------------------------------------------------------------===//
#include "MipsMCExpr.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
index 74d5e4cc9841..2d84528e7469 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
@@ -7,15 +7,15 @@
//
//===----------------------------------------------------------------------===//
+#include "MipsOptionRecord.h"
#include "MipsABIInfo.h"
#include "MipsELFStreamer.h"
-#include "MipsOptionRecord.h"
#include "MipsTargetStreamer.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/ELF.h"
#include <cassert>
using namespace llvm;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 2d4083b27ed1..0cd4aebe4d16 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -11,19 +11,19 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/MipsABIInfo.h"
#include "MipsTargetStreamer.h"
#include "InstPrinter/MipsInstPrinter.h"
+#include "MCTargetDesc/MipsABIInfo.h"
#include "MipsELFStreamer.h"
#include "MipsMCExpr.h"
#include "MipsMCTargetDesc.h"
#include "MipsTargetObjectFile.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 9615bc38bfce..f24761d7d101 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -185,6 +185,9 @@ def FeatureUseTCCInDIV : SubtargetFeature<
"UseTCCInDIV", "false",
"Force the assembler to use trapping">;
+def FeatureMadd4 : SubtargetFeature<"nomadd4", "DisableMadd4", "true",
+ "Disable 4-operand madd.fmt and related instructions">;
+
//===----------------------------------------------------------------------===//
// Mips processors supported.
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index a222080f6b81..09e41e1423ae 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/MipsBaseInfo.h"
#include "Mips16FrameLowering.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
#include "Mips16InstrInfo.h"
#include "MipsInstrInfo.h"
#include "MipsRegisterInfo.h"
@@ -25,10 +25,10 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/IR/DebugLoc.h"
-#include "llvm/MC/MachineLocation.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetFrameLowering.h"
#include <cassert>
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 9cdbf510737f..f7ff7c3dc7bb 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -12,17 +12,18 @@
//
//===----------------------------------------------------------------------===//
+#include "MipsAsmPrinter.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "MCTargetDesc/MipsMCNaCl.h"
#include "Mips.h"
-#include "MipsAsmPrinter.h"
#include "MipsInstrInfo.h"
#include "MipsMCInstLower.h"
#include "MipsTargetMachine.h"
#include "MipsTargetStreamer.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -43,7 +44,6 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
diff --git a/lib/Target/Mips/MipsCCState.cpp b/lib/Target/Mips/MipsCCState.cpp
index cb9f676c237a..6a03ee9927d7 100644
--- a/lib/Target/Mips/MipsCCState.cpp
+++ b/lib/Target/Mips/MipsCCState.cpp
@@ -51,6 +51,22 @@ static bool originalTypeIsF128(const Type *Ty, const char *Func) {
return (Func && Ty->isIntegerTy(128) && isF128SoftLibCall(Func));
}
+/// Return true if the original type was vXfXX.
+static bool originalEVTTypeIsVectorFloat(EVT Ty) {
+ if (Ty.isVector() && Ty.getVectorElementType().isFloatingPoint())
+ return true;
+
+ return false;
+}
+
+/// Return true if the original type was vXfXX / vXfXX.
+static bool originalTypeIsVectorFloat(const Type * Ty) {
+ if (Ty->isVectorTy() && Ty->isFPOrFPVectorTy())
+ return true;
+
+ return false;
+}
+
MipsCCState::SpecialCallingConvType
MipsCCState::getSpecialCallingConvForCallee(const SDNode *Callee,
const MipsSubtarget &Subtarget) {
@@ -78,8 +94,8 @@ void MipsCCState::PreAnalyzeCallResultForF128(
}
}
-/// Identify lowered values that originated from f128 arguments and record
-/// this for use by RetCC_MipsN.
+/// Identify lowered values that originated from f128 or float arguments and
+/// record this for use by RetCC_MipsN.
void MipsCCState::PreAnalyzeReturnForF128(
const SmallVectorImpl<ISD::OutputArg> &Outs) {
const MachineFunction &MF = getMachineFunction();
@@ -91,23 +107,44 @@ void MipsCCState::PreAnalyzeReturnForF128(
}
}
-/// Identify lowered values that originated from f128 arguments and record
+/// Identify lower values that originated from vXfXX and record
/// this.
+void MipsCCState::PreAnalyzeCallResultForVectorFloat(
+ const SmallVectorImpl<ISD::InputArg> &Ins, const Type *RetTy) {
+ for (unsigned i = 0; i < Ins.size(); ++i) {
+ OriginalRetWasFloatVector.push_back(originalTypeIsVectorFloat(RetTy));
+ }
+}
+
+/// Identify lowered values that originated from vXfXX arguments and record
+/// this.
+void MipsCCState::PreAnalyzeReturnForVectorFloat(
+ const SmallVectorImpl<ISD::OutputArg> &Outs) {
+ for (unsigned i = 0; i < Outs.size(); ++i) {
+ ISD::OutputArg Out = Outs[i];
+ OriginalRetWasFloatVector.push_back(
+ originalEVTTypeIsVectorFloat(Out.ArgVT));
+ }
+}
+
+/// Identify lowered values that originated from f128, float and sret to vXfXX
+/// arguments and record this.
void MipsCCState::PreAnalyzeCallOperands(
const SmallVectorImpl<ISD::OutputArg> &Outs,
std::vector<TargetLowering::ArgListEntry> &FuncArgs,
const char *Func) {
for (unsigned i = 0; i < Outs.size(); ++i) {
- OriginalArgWasF128.push_back(
- originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, Func));
- OriginalArgWasFloat.push_back(
- FuncArgs[Outs[i].OrigArgIndex].Ty->isFloatingPointTy());
+ TargetLowering::ArgListEntry FuncArg = FuncArgs[Outs[i].OrigArgIndex];
+
+ OriginalArgWasF128.push_back(originalTypeIsF128(FuncArg.Ty, Func));
+ OriginalArgWasFloat.push_back(FuncArg.Ty->isFloatingPointTy());
+ OriginalArgWasFloatVector.push_back(FuncArg.Ty->isVectorTy());
CallOperandIsFixed.push_back(Outs[i].IsFixed);
}
}
-/// Identify lowered values that originated from f128 arguments and record
-/// this.
+/// Identify lowered values that originated from f128, float and vXfXX arguments
+/// and record this.
void MipsCCState::PreAnalyzeFormalArgumentsForF128(
const SmallVectorImpl<ISD::InputArg> &Ins) {
const MachineFunction &MF = getMachineFunction();
@@ -120,6 +157,7 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128(
if (Ins[i].Flags.isSRet()) {
OriginalArgWasF128.push_back(false);
OriginalArgWasFloat.push_back(false);
+ OriginalArgWasFloatVector.push_back(false);
continue;
}
@@ -129,5 +167,10 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128(
OriginalArgWasF128.push_back(
originalTypeIsF128(FuncArg->getType(), nullptr));
OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy());
+
+ // The MIPS vector ABI exhibits a corner case of sorts or quirk; if the
+ // first argument is actually an SRet pointer to a vector, then the next
+ // argument slot is $a2.
+ OriginalArgWasFloatVector.push_back(FuncArg->getType()->isVectorTy());
}
}
diff --git a/lib/Target/Mips/MipsCCState.h b/lib/Target/Mips/MipsCCState.h
index 77ecc65b2eee..27901699480b 100644
--- a/lib/Target/Mips/MipsCCState.h
+++ b/lib/Target/Mips/MipsCCState.h
@@ -45,16 +45,33 @@ private:
const char *Func);
/// Identify lowered values that originated from f128 arguments and record
- /// this.
+ /// this for use by RetCC_MipsN.
void
PreAnalyzeFormalArgumentsForF128(const SmallVectorImpl<ISD::InputArg> &Ins);
+ void
+ PreAnalyzeCallResultForVectorFloat(const SmallVectorImpl<ISD::InputArg> &Ins,
+ const Type *RetTy);
+
+ void PreAnalyzeFormalArgumentsForVectorFloat(
+ const SmallVectorImpl<ISD::InputArg> &Ins);
+
+ void
+ PreAnalyzeReturnForVectorFloat(const SmallVectorImpl<ISD::OutputArg> &Outs);
+
/// Records whether the value has been lowered from an f128.
SmallVector<bool, 4> OriginalArgWasF128;
/// Records whether the value has been lowered from float.
SmallVector<bool, 4> OriginalArgWasFloat;
+ /// Records whether the value has been lowered from a floating point vector.
+ SmallVector<bool, 4> OriginalArgWasFloatVector;
+
+ /// Records whether the return value has been lowered from a floating point
+ /// vector.
+ SmallVector<bool, 4> OriginalRetWasFloatVector;
+
/// Records whether the value was a fixed argument.
/// See ISD::OutputArg::IsFixed,
SmallVector<bool, 4> CallOperandIsFixed;
@@ -78,6 +95,7 @@ public:
CCState::AnalyzeCallOperands(Outs, Fn);
OriginalArgWasF128.clear();
OriginalArgWasFloat.clear();
+ OriginalArgWasFloatVector.clear();
CallOperandIsFixed.clear();
}
@@ -96,31 +114,38 @@ public:
CCState::AnalyzeFormalArguments(Ins, Fn);
OriginalArgWasFloat.clear();
OriginalArgWasF128.clear();
+ OriginalArgWasFloatVector.clear();
}
void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
CCAssignFn Fn, const Type *RetTy,
const char *Func) {
PreAnalyzeCallResultForF128(Ins, RetTy, Func);
+ PreAnalyzeCallResultForVectorFloat(Ins, RetTy);
CCState::AnalyzeCallResult(Ins, Fn);
OriginalArgWasFloat.clear();
OriginalArgWasF128.clear();
+ OriginalArgWasFloatVector.clear();
}
void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
CCAssignFn Fn) {
PreAnalyzeReturnForF128(Outs);
+ PreAnalyzeReturnForVectorFloat(Outs);
CCState::AnalyzeReturn(Outs, Fn);
OriginalArgWasFloat.clear();
OriginalArgWasF128.clear();
+ OriginalArgWasFloatVector.clear();
}
bool CheckReturn(const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
CCAssignFn Fn) {
PreAnalyzeReturnForF128(ArgsFlags);
+ PreAnalyzeReturnForVectorFloat(ArgsFlags);
bool Return = CCState::CheckReturn(ArgsFlags, Fn);
OriginalArgWasFloat.clear();
OriginalArgWasF128.clear();
+ OriginalArgWasFloatVector.clear();
return Return;
}
@@ -128,6 +153,12 @@ public:
bool WasOriginalArgFloat(unsigned ValNo) {
return OriginalArgWasFloat[ValNo];
}
+ bool WasOriginalArgVectorFloat(unsigned ValNo) const {
+ return OriginalArgWasFloatVector[ValNo];
+ }
+ bool WasOriginalRetVectorFloat(unsigned ValNo) const {
+ return OriginalRetWasFloatVector[ValNo];
+ }
bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; }
SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; }
};
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index a57cb7badc17..b5df78f89a6b 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -37,6 +37,10 @@ class CCIfOrigArgWasF128<CCAction A>
class CCIfArgIsVarArg<CCAction A>
: CCIf<"!static_cast<MipsCCState *>(&State)->IsCallOperandFixed(ValNo)", A>;
+/// Match if the return was a floating point vector.
+class CCIfOrigArgWasNotVectorFloat<CCAction A>
+ : CCIf<"!static_cast<MipsCCState *>(&State)"
+ "->WasOriginalRetVectorFloat(ValNo)", A>;
/// Match if the special calling conv is the specified value.
class CCIfSpecialCallingConv<string CC, CCAction A>
@@ -93,8 +97,10 @@ def RetCC_MipsO32 : CallingConv<[
// Promote i1/i8/i16 return values to i32.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
- // i32 are returned in registers V0, V1, A0, A1
- CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>,
+ // i32 are returned in registers V0, V1, A0, A1, unless the original return
+ // type was a vector of floats.
+ CCIfOrigArgWasNotVectorFloat<CCIfType<[i32],
+ CCAssignToReg<[V0, V1, A0, A1]>>>,
// f32 are returned in registers F0, F2
CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
index 026f66a1c0e1..ff43a3950610 100644
--- a/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -24,10 +24,10 @@
#include "Mips16InstrInfo.h"
#include "MipsMachineFunction.h"
#include "MipsSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td
index ac9a81b1bb2f..c238a65378e2 100644
--- a/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -19,6 +19,7 @@ def immZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}]>;
def immZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}]>;
def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>;
def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>;
+def immSExt10 : ImmLeaf<i32, [{return isInt<10>(Imm);}]>;
// Mips-specific dsp nodes
def SDT_MipsExtr : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
@@ -851,8 +852,8 @@ class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph,
class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, uimm8,
immZExt8, NoItinerary, DSPROpnd>;
-class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, uimm10,
- immZExt10, NoItinerary, DSPROpnd>;
+class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, simm10,
+ immSExt10, NoItinerary, DSPROpnd>;
class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb,
NoItinerary, DSPROpnd, GPR32Opnd>;
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index b83f44a74d5b..f79cb0e67200 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -17,8 +17,8 @@
#include "MCTargetDesc/MipsABIInfo.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "MipsCCState.h"
-#include "MipsInstrInfo.h"
#include "MipsISelLowering.h"
+#include "MipsInstrInfo.h"
#include "MipsMachineFunction.h"
#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 760630c41176..f2193013b7aa 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -22,12 +22,12 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
@@ -71,6 +71,48 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
return true;
}
+// The MIPS MSA ABI passes vector arguments in the integer register set.
+// The number of integer registers used is dependant on the ABI used.
+MVT MipsTargetLowering::getRegisterTypeForCallingConv(MVT VT) const {
+ if (VT.isVector() && Subtarget.hasMSA())
+ return Subtarget.isABI_O32() ? MVT::i32 : MVT::i64;
+ return MipsTargetLowering::getRegisterType(VT);
+}
+
+MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ EVT VT) const {
+ if (VT.isVector()) {
+ if (Subtarget.isABI_O32()) {
+ return MVT::i32;
+ } else {
+ return (VT.getSizeInBits() == 32) ? MVT::i32 : MVT::i64;
+ }
+ }
+ return MipsTargetLowering::getRegisterType(Context, VT);
+}
+
+unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+ EVT VT) const {
+ if (VT.isVector())
+ return std::max((VT.getSizeInBits() / (Subtarget.isABI_O32() ? 32 : 64)),
+ 1U);
+ return MipsTargetLowering::getNumRegisters(Context, VT);
+}
+
+unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const {
+
+ // Break down vector types to either 2 i64s or 4 i32s.
+ RegisterVT = getRegisterTypeForCallingConv(Context, VT) ;
+ IntermediateVT = RegisterVT;
+ NumIntermediates = VT.getSizeInBits() < RegisterVT.getSizeInBits()
+ ? VT.getVectorNumElements()
+ : VT.getSizeInBits() / RegisterVT.getSizeInBits();
+
+ return NumIntermediates;
+}
+
SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const {
MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>();
return DAG.getRegister(FI->getGlobalBaseReg(), Ty);
@@ -470,8 +512,9 @@ MipsTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
!Subtarget.hasMips32r6() && !Subtarget.inMips16Mode() &&
!Subtarget.inMicroMipsMode();
- // Disable if we don't generate PIC or the ABI isn't O32.
- if (!TM.isPositionIndependent() || !TM.getABI().IsO32())
+ // Disable if either of the following is true:
+ // We do not generate PIC, the ABI is not O32, LargeGOT is being used.
+ if (!TM.isPositionIndependent() || !TM.getABI().IsO32() || LargeGOT)
UseFastISel = false;
return UseFastISel ? Mips::createFastISel(funcInfo, libInfo) : nullptr;
@@ -2551,6 +2594,11 @@ SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op,
// yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is
// not used, it must be shadowed. If only A3 is available, shadow it and
// go to stack.
+// vXiX - Received as scalarized i32s, passed in A0 - A3 and the stack.
+// vXf32 - Passed in either a pair of registers {A0, A1}, {A2, A3} or {A0 - A3}
+// with the remainder spilled to the stack.
+// vXf64 - Passed in either {A0, A1, A2, A3} or {A2, A3} and in both cases
+// spilling the remainder to the stack.
//
// For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack.
//===----------------------------------------------------------------------===//
@@ -2562,8 +2610,13 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
State.getMachineFunction().getSubtarget());
static const MCPhysReg IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 };
+
+ const MipsCCState * MipsState = static_cast<MipsCCState *>(&State);
+
static const MCPhysReg F32Regs[] = { Mips::F12, Mips::F14 };
+ static const MCPhysReg FloatVectorIntRegs[] = { Mips::A0, Mips::A2 };
+
// Do not process byval args here.
if (ArgFlags.isByVal())
return true;
@@ -2601,8 +2654,26 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
State.getFirstUnallocated(F32Regs) != ValNo;
unsigned OrigAlign = ArgFlags.getOrigAlign();
bool isI64 = (ValVT == MVT::i32 && OrigAlign == 8);
-
- if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
+ bool isVectorFloat = MipsState->WasOriginalArgVectorFloat(ValNo);
+
+ // The MIPS vector ABI for floats passes them in a pair of registers
+ if (ValVT == MVT::i32 && isVectorFloat) {
+ // This is the start of an vector that was scalarized into an unknown number
+ // of components. It doesn't matter how many there are. Allocate one of the
+ // notional 8 byte aligned registers which map onto the argument stack, and
+ // shadow the register lost to alignment requirements.
+ if (ArgFlags.isSplit()) {
+ Reg = State.AllocateReg(FloatVectorIntRegs);
+ if (Reg == Mips::A2)
+ State.AllocateReg(Mips::A1);
+ else if (Reg == 0)
+ State.AllocateReg(Mips::A3);
+ } else {
+ // If we're an intermediate component of the split, we can just attempt to
+ // allocate a register directly.
+ Reg = State.AllocateReg(IntRegs);
+ }
+ } else if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
Reg = State.AllocateReg(IntRegs);
// If this is the first part of an i64 arg,
// the allocated register must be either A0 or A2.
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 2dcafd51061a..0e47ed38f420 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -248,6 +248,33 @@ namespace llvm {
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
+ /// Return the register type for a given MVT, ensuring vectors are treated
+ /// as a series of gpr sized integers.
+ virtual MVT getRegisterTypeForCallingConv(MVT VT) const override;
+
+ /// Return the register type for a given MVT, ensuring vectors are treated
+ /// as a series of gpr sized integers.
+ virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+ EVT VT) const override;
+
+ /// Return the number of registers for a given MVT, ensuring vectors are
+ /// treated as a series of gpr sized integers.
+ virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ EVT VT) const override;
+
+ /// Break down vectors to the correct number of gpr sized integers.
+ virtual unsigned getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const override;
+
+ /// Return the correct alignment for the current calling convention.
+ virtual unsigned
+ getABIAlignmentForCallingConv(Type *ArgTy, DataLayout DL) const override {
+ if (ArgTy->isVectorTy())
+ return std::min(DL.getABITypeAlignment(ArgTy), 8U);
+ return DL.getABITypeAlignment(ArgTy);
+ }
+
ISD::NodeType getExtendForAtomicOps() const override {
return ISD::SIGN_EXTEND;
}
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index d81a769d7fd9..94f3a74be98b 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -557,11 +557,11 @@ def FSUB_S : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>,
defm FSUB : ADDS_M<"sub.d", II_SUB_D, 0, fsub>, ADDS_FM<0x01, 17>;
def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>,
- MADDS_FM<4, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
+ MADDS_FM<4, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6, MADD4;
def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>,
- MADDS_FM<5, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
+ MADDS_FM<5, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6, MADD4;
-let AdditionalPredicates = [NoNaNsFPMath] in {
+let AdditionalPredicates = [NoNaNsFPMath, HasMadd4] in {
def NMADD_S : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S, fadd>,
MADDS_FM<6, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
def NMSUB_S : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, II_NMSUB_S, fsub>,
@@ -569,11 +569,11 @@ let AdditionalPredicates = [NoNaNsFPMath] in {
}
def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>,
- MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
+ MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32, MADD4;
def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>,
- MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
+ MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32, MADD4;
-let AdditionalPredicates = [NoNaNsFPMath] in {
+let AdditionalPredicates = [NoNaNsFPMath, HasMadd4] in {
def NMADD_D32 : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>,
MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
def NMSUB_D32 : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>,
@@ -582,12 +582,12 @@ let AdditionalPredicates = [NoNaNsFPMath] in {
let DecoderNamespace = "Mips64" in {
def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, II_MADD_D, fadd>,
- MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
+ MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64, MADD4;
def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, II_MSUB_D, fsub>,
- MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
+ MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64, MADD4;
}
-let AdditionalPredicates = [NoNaNsFPMath],
+let AdditionalPredicates = [NoNaNsFPMath, HasMadd4],
DecoderNamespace = "Mips64" in {
def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, II_NMADD_D, fadd>,
MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 8761946b8dbb..40078fb77144 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -238,6 +238,8 @@ def HasEVA : Predicate<"Subtarget->hasEVA()">,
AssemblerPredicate<"FeatureEVA,FeatureMips32r2">;
def HasMSA : Predicate<"Subtarget->hasMSA()">,
AssemblerPredicate<"FeatureMSA">;
+def HasMadd4 : Predicate<"!Subtarget->disableMadd4()">,
+ AssemblerPredicate<"!FeatureMadd4">;
//===----------------------------------------------------------------------===//
@@ -390,6 +392,10 @@ class ASE_NOT_DSP {
list<Predicate> InsnPredicates = [NotDSP];
}
+class MADD4 {
+ list<Predicate> AdditionalPredicates = [HasMadd4];
+}
+
//===----------------------------------------------------------------------===//
class MipsPat<dag pattern, dag result> : Pat<pattern, result>, PredicateControl {
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index 63034ecab93b..e01c03db2227 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/MipsABIInfo.h"
#include "MipsMachineFunction.h"
+#include "MCTargetDesc/MipsABIInfo.h"
#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -40,7 +40,11 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() {
const TargetRegisterClass *RC =
STI.inMips16Mode()
? &Mips::CPU16RegsRegClass
- : static_cast<const MipsTargetMachine &>(MF.getTarget())
+ : STI.inMicroMipsMode()
+ ? STI.hasMips64()
+ ? &Mips::GPRMM16_64RegClass
+ : &Mips::GPRMM16RegClass
+ : static_cast<const MipsTargetMachine &>(MF.getTarget())
.getABI()
.IsN64()
? &Mips::GPR64RegClass
diff --git a/lib/Target/Mips/MipsOptimizePICCall.cpp b/lib/Target/Mips/MipsOptimizePICCall.cpp
index 94a1965f9ffb..79c8395d9dcc 100644
--- a/lib/Target/Mips/MipsOptimizePICCall.cpp
+++ b/lib/Target/Mips/MipsOptimizePICCall.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "Mips.h"
#include "MCTargetDesc/MipsBaseInfo.h"
+#include "Mips.h"
#include "MipsMachineFunction.h"
#include "MipsTargetMachine.h"
#include "llvm/ADT/ScopedHashTable.h"
diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp
index 70ead5cde6fa..7ee45c28a7d0 100644
--- a/lib/Target/Mips/MipsOs16.cpp
+++ b/lib/Target/Mips/MipsOs16.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/Instructions.h"
#include "Mips.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 65be350f259d..de3389b5a6bf 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -286,7 +286,9 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
<< "spOffset : " << spOffset << "\n"
- << "stackSize : " << stackSize << "\n");
+ << "stackSize : " << stackSize << "\n"
+ << "alignment : "
+ << MF.getFrameInfo().getObjectAlignment(FrameIndex) << "\n");
eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset);
}
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index e765b4625206..102ebb21609a 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -11,10 +11,10 @@
//
//===----------------------------------------------------------------------===//
+#include "MipsSEFrameLowering.h"
#include "MCTargetDesc/MipsABIInfo.h"
#include "MipsMachineFunction.h"
#include "MipsRegisterInfo.h"
-#include "MipsSEFrameLowering.h"
#include "MipsSEInstrInfo.h"
#include "MipsSubtarget.h"
#include "llvm/ADT/BitVector.h"
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index c9cf9363b8c9..49ae6dd4cd39 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -24,11 +24,11 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 1f4e933db2a2..154d5825427b 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -11,10 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "MipsMachineFunction.h"
+#include "MipsSubtarget.h"
#include "Mips.h"
+#include "MipsMachineFunction.h"
#include "MipsRegisterInfo.h"
-#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
@@ -70,7 +70,7 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16),
Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false),
- HasEVA(false), TM(TM), TargetTriple(TT), TSInfo(),
+ HasEVA(false), DisableMadd4(false), TM(TM), TargetTriple(TT), TSInfo(),
InstrInfo(
MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))),
FrameLowering(MipsFrameLowering::create(*this)),
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index b4d15ee361ff..625a652a0ca0 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -144,6 +144,10 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// HasEVA -- supports EVA ASE.
bool HasEVA;
+
+ // nomadd4 - disables generation of 4-operand madd.s, madd.d and
+ // related instructions.
+ bool DisableMadd4;
InstrItineraryData InstrItins;
@@ -253,6 +257,7 @@ public:
bool hasDSPR2() const { return HasDSPR2; }
bool hasDSPR3() const { return HasDSPR3; }
bool hasMSA() const { return HasMSA; }
+ bool disableMadd4() const { return DisableMadd4; }
bool hasEVA() const { return HasEVA; }
bool useSmallSection() const { return UseSmallSection; }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index a9d6ab055892..330ae19ecd0f 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "MipsTargetMachine.h"
#include "MCTargetDesc/MipsABIInfo.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "Mips.h"
@@ -18,7 +19,6 @@
#include "MipsSEISelDAGToDAG.h"
#include "MipsSubtarget.h"
#include "MipsTargetObjectFile.h"
-#include "MipsTargetMachine.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index c5d6a05d6611..4d73c3991035 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -10,13 +10,13 @@
#include "MipsTargetObjectFile.h"
#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 58cb7793d040..0139646fc3f7 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -12,11 +12,11 @@
//
//===----------------------------------------------------------------------===//
+#include "NVPTXAsmPrinter.h"
#include "InstPrinter/NVPTXInstPrinter.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "MCTargetDesc/NVPTXMCAsmInfo.h"
#include "NVPTX.h"
-#include "NVPTXAsmPrinter.h"
#include "NVPTXMCExpr.h"
#include "NVPTXMachineFunctionInfo.h"
#include "NVPTXRegisterInfo.h"
@@ -73,8 +73,8 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 390776212ce7..916b0e115664 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "NVPTX.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
+#include "NVPTX.h"
#include "NVPTXUtilities.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Constants.h"
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index ebaaf42bc64e..f26b9a7cb8dd 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -12,9 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#include "NVPTXISelLowering.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
-#include "NVPTXISelLowering.h"
#include "NVPTXSection.h"
#include "NVPTXSubtarget.h"
#include "NVPTXTargetMachine.h"
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 0f6c2e53e60a..da563f0531d4 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "NVPTX.h"
#include "NVPTXInstrInfo.h"
+#include "NVPTX.h"
#include "NVPTXTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index e858b37e1843..139dc7fbeeda 100644
--- a/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -90,8 +90,8 @@
//===----------------------------------------------------------------------===//
#include "NVPTX.h"
-#include "NVPTXUtilities.h"
#include "NVPTXTargetMachine.h"
+#include "NVPTXUtilities.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
diff --git a/lib/Target/NVPTX/NVPTXPeephole.cpp b/lib/Target/NVPTX/NVPTXPeephole.cpp
index e10b046f7c97..4e902c0fb507 100644
--- a/lib/Target/NVPTX/NVPTXPeephole.cpp
+++ b/lib/Target/NVPTX/NVPTXPeephole.cpp
@@ -36,8 +36,8 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 8dfbfece9b8e..2b6ba8c85d4d 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -11,10 +11,10 @@
//
//===----------------------------------------------------------------------===//
+#include "NVPTXTargetMachine.h"
#include "NVPTX.h"
#include "NVPTXAllocaHoisting.h"
#include "NVPTXLowerAggrCopies.h"
-#include "NVPTXTargetMachine.h"
#include "NVPTXTargetObjectFile.h"
#include "NVPTXTargetTransformInfo.h"
#include "llvm/ADT/STLExtras.h"
diff --git a/lib/Target/NVPTX/NVVMIntrRange.cpp b/lib/Target/NVPTX/NVVMIntrRange.cpp
index 9c71a2ee165b..11277f5ba596 100644
--- a/lib/Target/NVPTX/NVVMIntrRange.cpp
+++ b/lib/Target/NVPTX/NVVMIntrRange.cpp
@@ -15,8 +15,8 @@
#include "NVPTX.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
using namespace llvm;
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 84bb9ec56800..baf5902ddf58 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "PPCInstPrinter.h"
-#include "PPCInstrInfo.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCInstrInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 4863ac542736..028c2cb562f8 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -7,8 +7,10 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
@@ -18,9 +20,7 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index fd279c60f3f5..1488bd5b0be6 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -7,9 +7,9 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
#include "MCTargetDesc/PPCMCExpr.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index 6b97d4c1456b..54f664314578 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCFixupKinds.h"
#include "PPCMCExpr.h"
+#include "PPCFixupKinds.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 2d686f227919..e8f220ea5457 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -11,12 +11,13 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "InstPrinter/PPCInstPrinter.h"
#include "MCTargetDesc/PPCMCAsmInfo.h"
-#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "PPCTargetStreamer.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
@@ -30,11 +31,10 @@
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index 1f38a8c947e7..6d591ca964a6 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -7,9 +7,10 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
@@ -18,7 +19,6 @@
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/MachO.h"
using namespace llvm;
@@ -151,7 +151,7 @@ static void makeRelocationInfo(MachO::any_relocation_info &MRE,
// The bitfield offsets that work (as determined by trial-and-error)
// are different than what is documented in the mach-o manuals.
// This appears to be an endianness issue; reversing the order of the
- // documented bitfields in <llvm/Support/MachO.h> fixes this (but
+ // documented bitfields in <llvm/BinaryFormat/MachO.h> fixes this (but
// breaks x86/ARM assembly).
MRE.r_word1 = ((Index << 8) | // was << 0
(IsPCRel << 7) | // was << 24
@@ -222,7 +222,7 @@ bool PPCMachObjectWriter::recordScatteredRelocation(
report_fatal_error("symbol '" + B->getSymbol().getName() +
"' can not be undefined in a subtraction expression");
- // FIXME: is Type correct? see include/llvm/Support/MachO.h
+ // FIXME: is Type correct? see include/llvm/BinaryFormat/MachO.h
Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent());
}
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 1f181d007f63..841b8c514464 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -16,11 +16,11 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
-#include "PPCInstrInfo.h"
#include "InstPrinter/PPCInstPrinter.h"
#include "MCTargetDesc/PPCMCExpr.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "PPC.h"
+#include "PPCInstrInfo.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
@@ -29,6 +29,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -55,11 +57,9 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
diff --git a/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/lib/Target/PowerPC/PPCBoolRetToInt.cpp
index 93c201d03869..55e105dad0e5 100644
--- a/lib/Target/PowerPC/PPCBoolRetToInt.cpp
+++ b/lib/Target/PowerPC/PPCBoolRetToInt.cpp
@@ -7,15 +7,15 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements converting i1 values to i32 if they could be more
+// This file implements converting i1 values to i32/i64 if they could be more
// profitably allocated as GPRs rather than CRs. This pass will become totally
// unnecessary if Register Bank Allocation and Global Instruction Selection ever
// go upstream.
//
-// Presently, the pass converts i1 Constants, and Arguments to i32 if the
+// Presently, the pass converts i1 Constants, and Arguments to i32/i64 if the
// transitive closure of their uses includes only PHINodes, CallInsts, and
// ReturnInsts. The rational is that arguments are generally passed and returned
-// in GPRs rather than CRs, so casting them to i32 at the LLVM IR level will
+// in GPRs rather than CRs, so casting them to i32/i64 at the LLVM IR level will
// actually save casts at the Machine Instruction level.
//
// It might be useful to expand this pass to add bit-wise operations to the list
@@ -33,11 +33,12 @@
//===----------------------------------------------------------------------===//
#include "PPC.h"
+#include "PPCTargetMachine.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
@@ -50,8 +51,9 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Pass.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/Casting.h"
#include <cassert>
using namespace llvm;
@@ -87,17 +89,19 @@ class PPCBoolRetToInt : public FunctionPass {
return Defs;
}
- // Translate a i1 value to an equivalent i32 value:
- static Value *translate(Value *V) {
- Type *Int32Ty = Type::getInt32Ty(V->getContext());
+ // Translate a i1 value to an equivalent i32/i64 value:
+ Value *translate(Value *V) {
+ Type *IntTy = ST->isPPC64() ? Type::getInt64Ty(V->getContext())
+ : Type::getInt32Ty(V->getContext());
+
if (auto *C = dyn_cast<Constant>(V))
- return ConstantExpr::getZExt(C, Int32Ty);
+ return ConstantExpr::getZExt(C, IntTy);
if (auto *P = dyn_cast<PHINode>(V)) {
// Temporarily set the operands to 0. We'll fix this later in
// runOnUse.
- Value *Zero = Constant::getNullValue(Int32Ty);
+ Value *Zero = Constant::getNullValue(IntTy);
PHINode *Q =
- PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P);
+ PHINode::Create(IntTy, P->getNumIncomingValues(), P->getName(), P);
for (unsigned i = 0; i < P->getNumOperands(); ++i)
Q->addIncoming(Zero, P->getIncomingBlock(i));
return Q;
@@ -109,7 +113,7 @@ class PPCBoolRetToInt : public FunctionPass {
auto InstPt =
A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode();
- return new ZExtInst(V, Int32Ty, "", InstPt);
+ return new ZExtInst(V, IntTy, "", InstPt);
}
typedef SmallPtrSet<const PHINode *, 8> PHINodeSet;
@@ -185,6 +189,13 @@ class PPCBoolRetToInt : public FunctionPass {
if (skipFunction(F))
return false;
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ auto &TM = TPC->getTM<PPCTargetMachine>();
+ ST = TM.getSubtargetImpl(F);
+
PHINodeSet PromotablePHINodes = getPromotablePHINodes(F);
B2IMap Bool2IntMap;
bool Changed = false;
@@ -205,7 +216,7 @@ class PPCBoolRetToInt : public FunctionPass {
return Changed;
}
- static bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes,
+ bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes,
B2IMap &BoolToIntMap) {
auto Defs = findAllDefs(U);
@@ -262,13 +273,16 @@ class PPCBoolRetToInt : public FunctionPass {
AU.addPreserved<DominatorTreeWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
}
+
+private:
+ const PPCSubtarget *ST;
};
} // end anonymous namespace
char PPCBoolRetToInt::ID = 0;
INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int",
- "Convert i1 constants to i32 if they are returned",
+ "Convert i1 constants to i32/i64 if they are returned",
false, false)
FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); }
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index b7d3154d0000..d0b66f9bca09 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -15,8 +15,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
#include "PPCSubtarget.h"
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 70c4170653ae..24bc027f8106 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -23,7 +23,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
#include "PPC.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
@@ -43,6 +42,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
diff --git a/lib/Target/PowerPC/PPCEarlyReturn.cpp b/lib/Target/PowerPC/PPCEarlyReturn.cpp
index 6bd229625fc3..811e4dd9dfe1 100644
--- a/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
#include "PPCMachineFunctionInfo.h"
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 2fc8654deeab..bc9957194f6d 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -13,10 +13,10 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
-#include "PPCCallingConv.h"
+#include "PPC.h"
#include "PPCCCState.h"
+#include "PPCCallingConv.h"
#include "PPCISelLowering.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 54414457388d..28d496ee9ca1 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -21,9 +21,10 @@
#include "PPCTargetMachine.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -54,7 +55,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/Statistic.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -2824,6 +2824,20 @@ SDValue PPCDAGToDAGISel::get32BitZExtCompare(SDValue LHS, SDValue RHS,
return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
ShiftOps), 0);
}
+ case ISD::SETNE: {
+ // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
+ // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
+ SDValue Xor = IsRHSZero ? LHS :
+ SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
+ SDValue Clz =
+ SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
+ SDValue ShiftOps[] = { Clz, getI32Imm(27, dl), getI32Imm(5, dl),
+ getI32Imm(31, dl) };
+ SDValue Shift =
+ SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
+ getI32Imm(1, dl)), 0);
+ }
}
}
@@ -2850,6 +2864,27 @@ SDValue PPCDAGToDAGISel::get32BitSExtCompare(SDValue LHS, SDValue RHS,
return SDValue(CurDAG->getMachineNode(PPC::SRADI_32, dl, MVT::i32, Sldi,
getI32Imm(63, dl)), 0);
}
+ case ISD::SETNE: {
+ // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
+ // flip the bit, finally take 2's complement.
+ // (sext (setcc %a, %b, setne)) ->
+ // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
+ // Same as above, but the first xor is not needed.
+ // (sext (setcc %a, 0, setne)) ->
+ // (neg (xor (lshr (ctlz %a), 5), 1))
+ SDValue Xor = IsRHSZero ? LHS :
+ SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
+ SDValue Clz =
+ SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
+ SDValue ShiftOps[] =
+ { Clz, getI32Imm(27, dl), getI32Imm(5, dl), getI32Imm(31, dl) };
+ SDValue Shift =
+ SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
+ SDValue Xori =
+ SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
+ getI32Imm(1, dl)), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
+ }
}
}
@@ -2872,6 +2907,19 @@ SDValue PPCDAGToDAGISel::get64BitZExtCompare(SDValue LHS, SDValue RHS,
getI64Imm(58, dl), getI64Imm(63, dl)),
0);
}
+ case ISD::SETNE: {
+ // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
+ // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
+ // {addcz.reg, addcz.CA} = (addcarry %a, -1)
+ // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
+ SDValue Xor = IsRHSZero ? LHS :
+ SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
+ SDValue AC =
+ SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
+ Xor, getI32Imm(~0U, dl)), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
+ Xor, AC.getValue(1)), 0);
+ }
}
}
@@ -2896,6 +2944,19 @@ SDValue PPCDAGToDAGISel::get64BitSExtCompare(SDValue LHS, SDValue RHS,
return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
Addic, Addic.getValue(1)), 0);
}
+ case ISD::SETNE: {
+ // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
+ // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
+ // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
+ // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
+ SDValue Xor = IsRHSZero ? LHS :
+ SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
+ SDValue SC =
+ SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
+ Xor, getI32Imm(0, dl)), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
+ SC, SC.getValue(1)), 0);
+ }
}
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 41ff9d903aa0..bda4e5e81734 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11,13 +11,13 @@
//
//===----------------------------------------------------------------------===//
+#include "PPCISelLowering.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPC.h"
-#include "PPCCallingConv.h"
#include "PPCCCState.h"
+#include "PPCCallingConv.h"
#include "PPCFrameLowering.h"
#include "PPCInstrInfo.h"
-#include "PPCISelLowering.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
#include "PPCRegisterInfo.h"
@@ -28,11 +28,11 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
@@ -52,8 +52,8 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/CallingConv.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -61,9 +61,9 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index c4139ca8b7bd..e214d26c063b 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2717,6 +2717,40 @@ def DblToFlt {
dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0))));
dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1))));
}
+
+def ByteToWord {
+ dag A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8));
+ dag A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8));
+ dag A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8));
+ dag A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8));
+}
+
+def ByteToDWord {
+ dag A0 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8));
+ dag A1 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8));
+}
+
+def HWordToWord {
+ dag A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16));
+ dag A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16));
+ dag A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16));
+ dag A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16));
+}
+
+def HWordToDWord {
+ dag A0 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16));
+ dag A1 = (i64 (sext_inreg
+ (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16));
+}
+
+def WordToDWord {
+ dag A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0))));
+ dag A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2))));
+}
+
def FltToIntLoad {
dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A)))));
}
@@ -2969,4 +3003,21 @@ let AddedComplexity = 400 in {
(VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC),
(COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC))>;
}
+ // P9 Altivec instructions that can be used to build vectors.
+ // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
+ // with complexities of existing build vector patterns in this file.
+ let Predicates = [HasP9Altivec] in {
+ def : Pat<(v2i64 (build_vector WordToDWord.A0, WordToDWord.A1)),
+ (v2i64 (VEXTSW2D $A))>;
+ def : Pat<(v2i64 (build_vector HWordToDWord.A0, HWordToDWord.A1)),
+ (v2i64 (VEXTSH2D $A))>;
+ def : Pat<(v4i32 (build_vector HWordToWord.A0, HWordToWord.A1,
+ HWordToWord.A2, HWordToWord.A3)),
+ (v4i32 (VEXTSH2W $A))>;
+ def : Pat<(v4i32 (build_vector ByteToWord.A0, ByteToWord.A1,
+ ByteToWord.A2, ByteToWord.A3)),
+ (v4i32 (VEXTSB2W $A))>;
+ def : Pat<(v2i64 (build_vector ByteToDWord.A0, ByteToDWord.A1)),
+ (v2i64 (VEXTSB2D $A))>;
+ }
}
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 541b98e01b99..b310493587ae 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
#include "MCTargetDesc/PPCMCExpr.h"
+#include "PPC.h"
#include "PPCSubtarget.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp
index c6d2c3ebcc0f..ff5f17c7628f 100644
--- a/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -19,9 +19,9 @@
//
//===---------------------------------------------------------------------===//
-#include "PPCInstrInfo.h"
#include "PPC.h"
#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index c7aa4cb78b7a..31c50785c2ee 100644
--- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -21,9 +21,9 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCInstrInfo.h"
#include "PPC.h"
#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/lib/Target/PowerPC/PPCTOCRegDeps.cpp
index 7c53a5601790..17345b6ca8d3 100644
--- a/lib/Target/PowerPC/PPCTOCRegDeps.cpp
+++ b/lib/Target/PowerPC/PPCTOCRegDeps.cpp
@@ -61,8 +61,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
#include "PPCMachineFunctionInfo.h"
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index b9004cc8a9f5..5a226b23ff96 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -11,11 +11,11 @@
//
//===----------------------------------------------------------------------===//
+#include "PPCTargetMachine.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "PPC.h"
#include "PPCSubtarget.h"
#include "PPCTargetObjectFile.h"
-#include "PPCTargetMachine.h"
#include "PPCTargetTransformInfo.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp
index f3a0290da054..93fe3230ab81 100644
--- a/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
#include "PPCHazardRecognizers.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index f6d20ced15a0..a57484e5abdf 100644
--- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -12,10 +12,10 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCInstrInfo.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPC.h"
#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index d3434b77be8a..491eaf326a50 100644
--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -42,9 +42,9 @@
//
//===---------------------------------------------------------------------===//
-#include "PPCInstrInfo.h"
#include "PPC.h"
#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index d6f2672271e9..d9a71893afee 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -12,10 +12,10 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index b2ed13758d41..9309d493cef4 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -13,13 +13,13 @@
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
index ddc3bf350452..7c98b1c8f321 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
@@ -14,9 +14,9 @@
#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCTARGETDESC_H
#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCTARGETDESC_H
+#include "llvm/Config/config.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/DataTypes.h"
-#include "llvm/Config/config.h"
namespace llvm {
class MCAsmBackend;
diff --git a/lib/Target/RISCV/RISCVTargetMachine.cpp b/lib/Target/RISCV/RISCVTargetMachine.cpp
index efdde04c582d..744d7b8aaa3a 100644
--- a/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -13,10 +13,10 @@
#include "RISCVTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 7e6dff6b7894..087c037614a9 100644
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -9,8 +9,8 @@
#include "MCTargetDesc/SparcMCExpr.h"
#include "MCTargetDesc/SparcMCTargetDesc.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCContext.h"
@@ -28,8 +28,8 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index da7e0b737e78..8e298e8316da 100644
--- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -14,11 +14,11 @@
#include "Sparc.h"
#include "SparcRegisterInfo.h"
#include "SparcSubtarget.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index cc07547ede2c..d1d1334163a2 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -7,9 +7,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCAsmBackend.h"
#include "MCTargetDesc/SparcFixupKinds.h"
#include "MCTargetDesc/SparcMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixupKindInfo.h"
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index 21df60237d96..50e8825b15e8 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -14,10 +14,10 @@
#include "SparcMCAsmInfo.h"
#include "SparcMCExpr.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCTargetOptions.h"
-#include "llvm/Support/Dwarf.h"
using namespace llvm;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
index e85a8cd5e339..a77f760d9eff 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
@@ -19,7 +19,6 @@
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Object/ELF.h"
-
using namespace llvm;
#define DEBUG_TYPE "sparcmcexpr"
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index 31a128a5f271..19fb94534b25 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -12,9 +12,9 @@
//
//===----------------------------------------------------------------------===//
-#include "Sparc.h"
#include "InstPrinter/SparcInstPrinter.h"
#include "MCTargetDesc/SparcMCExpr.h"
+#include "Sparc.h"
#include "SparcInstrInfo.h"
#include "SparcTargetMachine.h"
#include "SparcTargetStreamer.h"
diff --git a/lib/Target/Sparc/SparcMCInstLower.cpp b/lib/Target/Sparc/SparcMCInstLower.cpp
index a3cedcbf9dd1..a784124ff688 100644
--- a/lib/Target/Sparc/SparcMCInstLower.cpp
+++ b/lib/Target/Sparc/SparcMCInstLower.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "Sparc.h"
#include "MCTargetDesc/SparcMCExpr.h"
+#include "Sparc.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 49c67e0819f7..c7a1ca262d2c 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -11,9 +11,9 @@
//===----------------------------------------------------------------------===//
#include "SparcTargetMachine.h"
-#include "SparcTargetObjectFile.h"
-#include "Sparc.h"
#include "LeonPasses.h"
+#include "Sparc.h"
+#include "SparcTargetObjectFile.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
diff --git a/lib/Target/Sparc/SparcTargetObjectFile.cpp b/lib/Target/Sparc/SparcTargetObjectFile.cpp
index 8fdde15d8d27..627e49a95f3c 100644
--- a/lib/Target/Sparc/SparcTargetObjectFile.cpp
+++ b/lib/Target/Sparc/SparcTargetObjectFile.cpp
@@ -9,8 +9,8 @@
#include "SparcTargetObjectFile.h"
#include "MCTargetDesc/SparcMCExpr.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
-#include "llvm/Support/Dwarf.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index efcf6696fd50..ad05779a9f64 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/SystemZMCTargetDesc.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 23b7d5b5d501..fd1fd7bc40dc 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "MCTargetDesc/SystemZMCFixups.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixupKindInfo.h"
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index 3de570bf30cc..df0a8161e6e7 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -9,11 +9,11 @@
#include "MCTargetDesc/SystemZMCFixups.h"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <cstdint>
diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.h b/lib/Target/SystemZ/SystemZHazardRecognizer.h
index 8fa54ee434cf..0c755c9ad1b9 100644
--- a/lib/Target/SystemZ/SystemZHazardRecognizer.h
+++ b/lib/Target/SystemZ/SystemZHazardRecognizer.h
@@ -25,10 +25,10 @@
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H
#include "SystemZSubtarget.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index ae141dbcad34..ac4c3f6db684 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -5367,12 +5367,24 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
if (Invert)
CCMask ^= CCValid;
+
+ // ISel pattern matching also adds a load memory operand of the same
+ // address, so take special care to find the storing memory operand.
+ MachineMemOperand *MMO = nullptr;
+ for (auto *I : MI.memoperands())
+ if (I->isStore()) {
+ MMO = I;
+ break;
+ }
+
BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
- .addReg(SrcReg)
- .add(Base)
- .addImm(Disp)
- .addImm(CCValid)
- .addImm(CCMask);
+ .addReg(SrcReg)
+ .add(Base)
+ .addImm(Disp)
+ .addImm(CCValid)
+ .addImm(CCMask)
+ .addMemOperand(MMO);
+
MI.eraseFromParent();
return MBB;
}
@@ -5950,7 +5962,8 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
.addImm(DestDisp)
.addImm(ThisLength)
.add(SrcBase)
- .addImm(SrcDisp);
+ .addImm(SrcDisp)
+ ->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
DestDisp += ThisLength;
SrcDisp += ThisLength;
Length -= ThisLength;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index b34c181124de..66a5ff12be46 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -11,10 +11,10 @@
//
//===----------------------------------------------------------------------===//
+#include "SystemZInstrInfo.h"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "SystemZ.h"
#include "SystemZInstrBuilder.h"
-#include "SystemZInstrInfo.h"
#include "SystemZSubtarget.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp
index ec8ce6e911fa..3a0e01da42f0 100644
--- a/lib/Target/SystemZ/SystemZLDCleanup.cpp
+++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#include "SystemZTargetMachine.h"
#include "SystemZMachineFunctionInfo.h"
+#include "SystemZTargetMachine.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 6ef8000d6f43..d14a0fb0b0b2 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#include "SystemZInstrInfo.h"
#include "SystemZRegisterInfo.h"
+#include "SystemZInstrInfo.h"
#include "SystemZSubtarget.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
index 263aff8b7bfb..7391df8342ef 100644
--- a/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -14,9 +14,9 @@
//===----------------------------------------------------------------------===//
#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index 36e51921bf2f..be480f03c572 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -19,8 +19,8 @@
#include "SystemZInstrInfo.h"
#include "SystemZRegisterInfo.h"
#include "SystemZSelectionDAGInfo.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
diff --git a/lib/Target/SystemZ/SystemZTDC.cpp b/lib/Target/SystemZ/SystemZTDC.cpp
index 96a9ef82c125..5dbd23d420a3 100644
--- a/lib/Target/SystemZ/SystemZTDC.cpp
+++ b/lib/Target/SystemZ/SystemZTDC.cpp
@@ -47,10 +47,10 @@
#include "SystemZ.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include <deque>
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index f30d52f859d7..cb81c0e5276e 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -7,14 +7,14 @@
//
//===----------------------------------------------------------------------===//
+#include "SystemZTargetMachine.h"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "SystemZ.h"
#include "SystemZMachineScheduler.h"
-#include "SystemZTargetMachine.h"
#include "SystemZTargetTransformInfo.h"
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index 5d1616d03779..42d92622d6c8 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -14,12 +14,12 @@
#include "llvm-c/Target.h"
#include "llvm-c/Initialization.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include <cstring>
using namespace llvm;
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 91cc97e38b3d..f941891f3183 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -24,7 +25,6 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
@@ -240,6 +240,20 @@ MCSection *TargetLoweringObjectFile::SectionForGlobal(
if (GO->hasSection())
return getExplicitSectionGlobal(GO, Kind, TM);
+ if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
+ auto Attrs = GVar->getAttributes();
+ if ((Attrs.hasAttribute("bss-section") && Kind.isBSS()) ||
+ (Attrs.hasAttribute("data-section") && Kind.isData()) ||
+ (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly())) {
+ return getExplicitSectionGlobal(GO, Kind, TM);
+ }
+ }
+
+ if (auto *F = dyn_cast<Function>(GO)) {
+ if (F->hasFnAttribute("implicit-section-name"))
+ return getExplicitSectionGlobal(GO, Kind, TM);
+ }
+
// Use default section depending on the 'type' of global
return SelectSectionForGlobal(GO, Kind, TM);
}
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index df12e0e88e3b..01f14939864f 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -11,13 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm-c/TargetMachine.h"
#include "llvm-c/Core.h"
#include "llvm-c/Target.h"
+#include "llvm-c/TargetMachine.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/CodeGenCWrappers.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormattedStream.h"
diff --git a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index b5f53114d3e1..9be11da9afac 100644
--- a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -15,8 +15,8 @@
///
//===----------------------------------------------------------------------===//
-#include "WebAssembly.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
index c6158720d62f..b1de84d7e8e6 100644
--- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
+++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
@@ -16,9 +16,9 @@
#define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H
#include "llvm/ADT/SmallVector.h"
+#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/Wasm.h"
namespace llvm {
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index 7c78285fbda4..4f20096c1583 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -12,8 +12,8 @@
///
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "MCTargetDesc/WebAssemblyFixupKinds.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCDirectives.h"
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index 544cd653fd72..c56c591def36 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -12,8 +12,8 @@
///
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "MCTargetDesc/WebAssemblyFixupKinds.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 795658ca96b4..0ba700a86b74 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -15,9 +15,9 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
+#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Wasm.h"
namespace llvm {
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index 68d6747298df..ddf964e7dbb7 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -16,9 +16,9 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H
+#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/Wasm.h"
namespace llvm {
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index 2846ec5e9337..27c01cb8acf7 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -13,14 +13,14 @@
///
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "MCTargetDesc/WebAssemblyFixupKinds.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/MC/MCWasmObjectWriter.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Wasm.h"
using namespace llvm;
namespace {
@@ -54,6 +54,11 @@ static bool IsFunctionExpr(const MCExpr *Expr) {
return false;
}
+static bool IsFunctionType(const MCValue &Target) {
+ const MCSymbolRefExpr *RefA = Target.getSymA();
+ return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_TYPEINDEX;
+}
+
unsigned WebAssemblyWasmObjectWriter::getRelocType(MCContext &Ctx,
const MCValue &Target,
const MCFixup &Fixup,
@@ -71,6 +76,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(MCContext &Ctx,
case WebAssembly::fixup_code_sleb128_i64:
llvm_unreachable("fixup_sleb128_i64 not implemented yet");
case WebAssembly::fixup_code_uleb128_i32:
+ if (IsFunctionType(Target))
+ return wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB;
if (IsFunction)
return wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB;
return wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB;
diff --git a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
index 40e1928197bc..1691808d05a0 100644
--- a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
@@ -17,8 +17,8 @@
///
////===----------------------------------------------------------------------===//
-#include "WebAssembly.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyUtilities.h"
#include "llvm/ADT/PriorityQueue.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index bd11d1b46906..21e0f6b23777 100644
--- a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -18,8 +18,8 @@
///
//===----------------------------------------------------------------------===//
-#include "WebAssembly.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyUtilities.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
index bc6360aafd61..b2330a232093 100644
--- a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
@@ -22,8 +22,8 @@
///
//===----------------------------------------------------------------------===//
-#include "WebAssembly.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
+#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "llvm/Analysis/AliasAnalysis.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 53698ff09b10..09338a4898e0 100644
--- a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -16,8 +16,8 @@
///
//===----------------------------------------------------------------------===//
-#include "WebAssembly.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyTargetMachine.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
index 2bbf7a2b42f9..41f315c2825b 100644
--- a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
@@ -26,8 +26,8 @@
///
//===----------------------------------------------------------------------===//
-#include "WebAssembly.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "llvm/ADT/PriorityQueue.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index 257f1d110aa2..4f3ae57733e5 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -12,8 +12,8 @@
///
//===----------------------------------------------------------------------===//
-#include "WebAssembly.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
#include "WebAssemblyTargetMachine.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Function.h" // To access function attributes.
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
index 744a3ed427af..576b71dd7966 100644
--- a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
@@ -15,8 +15,8 @@
///
//===----------------------------------------------------------------------===//
-#include "WebAssembly.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
index 473dcb7a33fd..1462c49aa9fd 100644
--- a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
@@ -19,8 +19,8 @@
///
//===----------------------------------------------------------------------===//
-#include "WebAssembly.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyUtilities.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
index e3470825940c..766ab456a8e6 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
@@ -13,8 +13,8 @@
///
//===----------------------------------------------------------------------===//
-#include "WebAssembly.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyUtilities.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 57d454746b06..6650191807dc 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -20,8 +20,8 @@
///
//===----------------------------------------------------------------------===//
-#include "WebAssembly.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
+#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyUtilities.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
index 9e944df637d9..878ffd08d228 100644
--- a/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
@@ -19,8 +19,8 @@
///
//===----------------------------------------------------------------------===//
-#include "WebAssembly.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
index 2441ead7cb27..b1385f409fd3 100644
--- a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
+++ b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
@@ -12,8 +12,8 @@
///
//===----------------------------------------------------------------------===//
-#include "WebAssembly.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
index a9aa781610ce..8173364fa880 100644
--- a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
@@ -24,8 +24,8 @@
///
//===----------------------------------------------------------------------===//
-#include "WebAssembly.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index d9b2b8743649..7b05f671bdcb 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -12,9 +12,9 @@
///
//===----------------------------------------------------------------------===//
-#include "WebAssembly.h"
-#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssemblyTargetMachine.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
#include "WebAssemblyTargetObjectFile.h"
#include "WebAssemblyTargetTransformInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index 788fac62626b..f7e31de65f6d 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -7,11 +7,11 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/X86MCTargetDesc.h"
#include "X86AsmInstrumentation.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
#include "X86Operand.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h
index 33eff14b8215..0fba15cc692c 100644
--- a/lib/Target/X86/AsmParser/X86Operand.h
+++ b/lib/Target/X86/AsmParser/X86Operand.h
@@ -15,8 +15,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SMLoc.h"
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 7471373334f6..fc4adddc149b 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -11,7 +11,6 @@ tablegen(LLVM X86GenFastISel.inc -gen-fast-isel)
tablegen(LLVM X86GenCallingConv.inc -gen-callingconv)
tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM X86GenEVEX2VEXTables.inc -gen-x86-EVEX2VEX-tables)
-tablegen(LLVM X86GenFoldTables.inc -gen-x86-fold-tables)
if(LLVM_BUILD_GLOBAL_ISEL)
tablegen(LLVM X86GenRegisterBank.inc -gen-register-bank)
tablegen(LLVM X86GenGlobalISel.inc -gen-global-isel)
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 36ad23bb41c0..4ce908b1da64 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -74,8 +74,8 @@
//
//===----------------------------------------------------------------------===//
-#include "X86DisassemblerDecoder.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "X86DisassemblerDecoder.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index b7f637e9a8cd..577b7a776c6d 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -13,10 +13,10 @@
//
//===----------------------------------------------------------------------===//
-#include <cstdarg> /* for va_*() */
-#include <cstdio> /* for vsnprintf() */
-#include <cstdlib> /* for exit() */
-#include <cstring> /* for memset() */
+#include <cstdarg> /* for va_*() */
+#include <cstdio> /* for vsnprintf() */
+#include <cstdlib> /* for exit() */
+#include <cstring> /* for memset() */
#include "X86DisassemblerDecoder.h"
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 6aa700306744..4d91300c7ede 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/X86BaseInfo.h"
#include "X86ATTInstPrinter.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "X86InstComments.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index b5a926f915af..5e809c34325e 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -15,8 +15,8 @@
#include "X86InstComments.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "Utils/X86ShuffleDecode.h"
-#include "llvm/MC/MCInst.h"
#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index a8c631ae282f..d6af6712d5a1 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -12,9 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#include "X86IntelInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "X86InstComments.h"
-#include "X86IntelInstPrinter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index a713af6aadb5..7a9e4f4468ec 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -10,6 +10,8 @@
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86FixupKinds.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
@@ -22,9 +24,7 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index 0b73df3a2ff8..4da4eebec038 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -9,13 +9,13 @@
#include "MCTargetDesc/X86FixupKinds.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <cstdint>
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 9c35a251e480..1538a515f419 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -13,12 +13,12 @@
#include "X86MCAsmInfo.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ELF.h"
using namespace llvm;
enum AsmWriterFlavorTy {
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 297926ddcfda..4097ef224d50 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -7,9 +7,10 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/X86MCTargetDesc.h"
#include "MCTargetDesc/X86FixupKinds.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
@@ -19,7 +20,6 @@
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/MachO.h"
using namespace llvm;
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index d6777fc8aa6a..105580c913a1 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -9,11 +9,11 @@
#include "MCTargetDesc/X86FixupKinds.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/BinaryFormat/COFF.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCWinCOFFObjectWriter.h"
-#include "llvm/Support/COFF.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index e1825ca1eda1..dc15aeadaa61 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -17,6 +17,7 @@
#include "MCTargetDesc/X86BaseInfo.h"
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
+#include "llvm/BinaryFormat/COFF.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineValueType.h"
@@ -34,7 +35,6 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/COFF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 3cfb924abd01..621505aaded9 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -414,6 +414,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
assert(HasAVX);
if (IsNonTemporal && Alignment >= 32 && HasAVX2)
Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
+ else if (IsNonTemporal && Alignment >= 16)
+ return false; // Force split for X86::VMOVNTDQArm
else if (Alignment >= 32)
Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
else
@@ -424,6 +426,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
assert(HasAVX);
if (IsNonTemporal && Alignment >= 32 && HasAVX2)
Opc = X86::VMOVNTDQAYrm;
+ else if (IsNonTemporal && Alignment >= 16)
+ return false; // Force split for X86::VMOVNTDQArm
else if (Alignment >= 32)
Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
else
@@ -437,6 +441,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
assert(HasAVX);
if (IsNonTemporal && Alignment >= 32 && HasAVX2)
Opc = X86::VMOVNTDQAYrm;
+ else if (IsNonTemporal && Alignment >= 16)
+ return false; // Force split for X86::VMOVNTDQArm
else if (Alignment >= 32)
Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
else
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 328a80304602..2777fa89330f 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -29,8 +29,8 @@
#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetOptions.h"
#include <cstdlib>
using namespace llvm;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 5303d7a406ad..831e9bdab0e1 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1,4 +1,4 @@
-
+
//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
//
// The LLVM Compiler Infrastructure
@@ -81,6 +81,12 @@ static cl::opt<int> ExperimentalPrefLoopAlignment(
" of the loop header PC will be 0)."),
cl::Hidden);
+static cl::opt<bool> MulConstantOptimization(
+ "mul-constant-optimization", cl::init(true),
+ cl::desc("Replace 'mul x, Const' with more effective instructions like "
+ "SHIFT, LEA, etc."),
+ cl::Hidden);
+
/// Call this when the user attempts to do something unsupported, like
/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
/// report_fatal_error, so calling code should attempt to recover without
@@ -5810,7 +5816,8 @@ static bool setTargetShuffleZeroElements(SDValue N,
// The decoded shuffle mask may contain a different number of elements to the
// destination value type.
static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
- SmallVectorImpl<SDValue> &Ops) {
+ SmallVectorImpl<SDValue> &Ops,
+ SelectionDAG &DAG) {
Mask.clear();
Ops.clear();
@@ -5868,8 +5875,7 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
assert(SrcExtract.getOperand(0).getValueType() == MVT::v16i8);
}
- if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1)) ||
- NumElts <= SrcExtract.getConstantOperandVal(1))
+ if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1)))
return false;
SDValue SrcVec = SrcExtract.getOperand(0);
@@ -5877,8 +5883,12 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
unsigned NumSrcElts = SrcVT.getVectorNumElements();
unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1;
+ unsigned SrcIdx = SrcExtract.getConstantOperandVal(1);
+ if (NumSrcElts <= SrcIdx)
+ return false;
+
Ops.push_back(SrcVec);
- Mask.push_back(SrcExtract.getConstantOperandVal(1));
+ Mask.push_back(SrcIdx);
Mask.append(NumZeros, SM_SentinelZero);
Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef);
return true;
@@ -5915,6 +5925,19 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
Mask.push_back(i == InIdx ? NumElts + ExIdx : i);
return true;
}
+ case X86ISD::PACKSS: {
+ // If we know input saturation won't happen we can treat this
+ // as a truncation shuffle.
+ if (DAG.ComputeNumSignBits(N.getOperand(0)) <= NumBitsPerElt ||
+ DAG.ComputeNumSignBits(N.getOperand(1)) <= NumBitsPerElt)
+ return false;
+
+ Ops.push_back(N.getOperand(0));
+ Ops.push_back(N.getOperand(1));
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(i * 2);
+ return true;
+ }
case X86ISD::VSHLI:
case X86ISD::VSRLI: {
uint64_t ShiftVal = N.getConstantOperandVal(1);
@@ -5989,9 +6012,10 @@ static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
/// Returns true if the target shuffle mask was decoded.
static bool resolveTargetShuffleInputs(SDValue Op,
SmallVectorImpl<SDValue> &Inputs,
- SmallVectorImpl<int> &Mask) {
+ SmallVectorImpl<int> &Mask,
+ SelectionDAG &DAG) {
if (!setTargetShuffleZeroElements(Op, Mask, Inputs))
- if (!getFauxShuffleMask(Op, Mask, Inputs))
+ if (!getFauxShuffleMask(Op, Mask, Inputs, DAG))
return false;
resolveTargetShuffleInputsAndMask(Inputs, Mask);
@@ -6391,6 +6415,7 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
/// Example: <load i32 *a, load i32 *a+4, zero, undef> -> zextload a
static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
const SDLoc &DL, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget,
bool isAfterLegalize) {
unsigned NumElems = Elts.size();
@@ -6495,6 +6520,12 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
if (isAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT))
return SDValue();
+ // Don't create 256-bit non-temporal aligned loads without AVX2 as these
+ // will lower to regular temporal loads and use the cache.
+ if (LDBase->isNonTemporal() && LDBase->getAlignment() >= 32 &&
+ VT.is256BitVector() && !Subtarget.hasInt256())
+ return SDValue();
+
if (IsConsecutiveLoad)
return CreateLoad(VT, LDBase);
@@ -7701,7 +7732,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// See if we can use a vector load to get all of the elements.
if (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) {
SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems);
- if (SDValue LD = EltsFromConsecutiveLoads(VT, Ops, dl, DAG, false))
+ if (SDValue LD =
+ EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false))
return LD;
}
@@ -7825,24 +7857,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
}
// Next, we iteratively mix elements, e.g. for v4f32:
- // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
- // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
- // Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
- unsigned EltStride = NumElems >> 1;
- while (EltStride != 0) {
- for (unsigned i = 0; i < EltStride; ++i) {
- // If Ops[i+EltStride] is undef and this is the first round of mixing,
- // then it is safe to just drop this shuffle: V[i] is already in the
- // right place, the one element (since it's the first round) being
- // inserted as undef can be dropped. This isn't safe for successive
- // rounds because they will permute elements within both vectors.
- if (Ops[i+EltStride].isUndef() &&
- EltStride == NumElems/2)
- continue;
-
- Ops[i] = getUnpackl(DAG, dl, VT, Ops[i], Ops[i + EltStride]);
- }
- EltStride >>= 1;
+ // Step 1: unpcklps 0, 1 ==> X: <?, ?, 1, 0>
+ // : unpcklps 2, 3 ==> Y: <?, ?, 3, 2>
+ // Step 2: unpcklpd X, Y ==> <3, 2, 1, 0>
+ for (unsigned Scale = 1; Scale < NumElems; Scale *= 2) {
+ // Generate scaled UNPCKL shuffle mask.
+ SmallVector<int, 16> Mask;
+ for(unsigned i = 0; i != Scale; ++i)
+ Mask.push_back(i);
+ for (unsigned i = 0; i != Scale; ++i)
+ Mask.push_back(NumElems+i);
+ Mask.append(NumElems - Mask.size(), SM_SentinelUndef);
+
+ for (unsigned i = 0, e = NumElems / (2 * Scale); i != e; ++i)
+ Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2*i], Ops[(2*i)+1], Mask);
}
return Ops[0];
}
@@ -17177,7 +17205,13 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
Cond == ISD::SETGE || Cond == ISD::SETUGE;
bool Invert = Cond == ISD::SETNE ||
(Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond));
- bool FlipSigns = ISD::isUnsignedIntSetCC(Cond);
+
+ // If both operands are known non-negative, then an unsigned compare is the
+ // same as a signed compare and there's no need to flip signbits.
+ // TODO: We could check for more general simplifications here since we're
+ // computing known bits.
+ bool FlipSigns = ISD::isUnsignedIntSetCC(Cond) &&
+ !(DAG.SignBitIsZero(Op0) && DAG.SignBitIsZero(Op1));
// Special case: Use min/max operations for SETULE/SETUGE
MVT VET = VT.getVectorElementType();
@@ -26741,6 +26775,17 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
return Tmp;
}
+ case X86ISD::VSHLI: {
+ SDValue Src = Op.getOperand(0);
+ unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
+ APInt ShiftVal = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
+ if (ShiftVal.uge(VTBits))
+ return VTBits; // Shifted all bits out --> zero.
+ if (ShiftVal.uge(Tmp))
+ return 1; // Shifted all sign bits out --> unknown.
+ return Tmp - ShiftVal.getZExtValue();
+ }
+
case X86ISD::VSRAI: {
SDValue Src = Op.getOperand(0);
unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
@@ -27889,7 +27934,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
// Extract target shuffle mask and resolve sentinels and inputs.
SmallVector<int, 64> OpMask;
SmallVector<SDValue, 2> OpInputs;
- if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask))
+ if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG))
return false;
assert(OpInputs.size() <= 2 && "Too many shuffle inputs");
@@ -28788,7 +28833,8 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
}
if (Elts.size() == VT.getVectorNumElements())
- if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true))
+ if (SDValue LD =
+ EltsFromConsecutiveLoads(VT, Elts, dl, DAG, Subtarget, true))
return LD;
// For AVX2, we sometimes want to combine
@@ -29430,7 +29476,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
// Resolve the target shuffle inputs and mask.
SmallVector<int, 16> Mask;
SmallVector<SDValue, 2> Ops;
- if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask))
+ if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask, DAG))
return SDValue();
// Attempt to narrow/widen the shuffle mask to the correct size.
@@ -31017,6 +31063,77 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
}
}
+static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
+ EVT VT, SDLoc DL) {
+
+ auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) {
+ SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+ DAG.getConstant(Mult, DL, VT));
+ Result = DAG.getNode(ISD::SHL, DL, VT, Result,
+ DAG.getConstant(Shift, DL, MVT::i8));
+ Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result,
+ N->getOperand(0));
+ return Result;
+ };
+
+ auto combineMulMulAddOrSub = [&](bool isAdd) {
+ SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+ DAG.getConstant(9, DL, VT));
+ Result = DAG.getNode(ISD::MUL, DL, VT, Result, DAG.getConstant(3, DL, VT));
+ Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result,
+ N->getOperand(0));
+ return Result;
+ };
+
+ switch (MulAmt) {
+ default:
+ break;
+ case 11:
+ // mul x, 11 => add ((shl (mul x, 5), 1), x)
+ return combineMulShlAddOrSub(5, 1, /*isAdd*/ true);
+ case 21:
+ // mul x, 21 => add ((shl (mul x, 5), 2), x)
+ return combineMulShlAddOrSub(5, 2, /*isAdd*/ true);
+ case 22:
+ // mul x, 22 => add (add ((shl (mul x, 5), 2), x), x)
+ return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
+ combineMulShlAddOrSub(5, 2, /*isAdd*/ true));
+ case 19:
+ // mul x, 19 => sub ((shl (mul x, 5), 2), x)
+ return combineMulShlAddOrSub(5, 2, /*isAdd*/ false);
+ case 13:
+ // mul x, 13 => add ((shl (mul x, 3), 2), x)
+ return combineMulShlAddOrSub(3, 2, /*isAdd*/ true);
+ case 23:
+ // mul x, 13 => sub ((shl (mul x, 3), 3), x)
+ return combineMulShlAddOrSub(3, 3, /*isAdd*/ false);
+ case 14:
+ // mul x, 14 => add (add ((shl (mul x, 3), 2), x), x)
+ return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
+ combineMulShlAddOrSub(3, 2, /*isAdd*/ true));
+ case 26:
+ // mul x, 26 => sub ((mul (mul x, 9), 3), x)
+ return combineMulMulAddOrSub(/*isAdd*/ false);
+ case 28:
+ // mul x, 28 => add ((mul (mul x, 9), 3), x)
+ return combineMulMulAddOrSub(/*isAdd*/ true);
+ case 29:
+ // mul x, 29 => add (add ((mul (mul x, 9), 3), x), x)
+ return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
+ combineMulMulAddOrSub(/*isAdd*/ true));
+ case 30:
+ // mul x, 30 => sub (sub ((shl x, 5), x), x)
+ return DAG.getNode(
+ ISD::SUB, DL, VT,
+ DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(5, DL, MVT::i8)),
+ N->getOperand(0)),
+ N->getOperand(0));
+ }
+ return SDValue();
+}
+
/// Optimize a single multiply with constant into two operations in order to
/// implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA.
static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
@@ -31026,6 +31143,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalize() && VT.isVector())
return reduceVMULWidth(N, DAG, Subtarget);
+ if (!MulConstantOptimization)
+ return SDValue();
// An imul is usually smaller than the alternative sequence.
if (DAG.getMachineFunction().getFunction()->optForMinSize())
return SDValue();
@@ -31081,7 +31200,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
else
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
DAG.getConstant(MulAmt2, DL, VT));
- }
+ } else if (!Subtarget.slowLEA())
+ NewMul = combineMulSpecial(MulAmt, N, DAG, VT, DL);
if (!NewMul) {
assert(MulAmt != 0 &&
@@ -32381,15 +32501,17 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// For chips with slow 32-byte unaligned loads, break the 32-byte operation
- // into two 16-byte operations.
+ // into two 16-byte operations. Also split non-temporal aligned loads on
+ // pre-AVX2 targets as 32-byte loads will lower to regular temporal loads.
ISD::LoadExtType Ext = Ld->getExtensionType();
bool Fast;
unsigned AddressSpace = Ld->getAddressSpace();
unsigned Alignment = Ld->getAlignment();
if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() &&
Ext == ISD::NON_EXTLOAD &&
- TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT,
- AddressSpace, Alignment, &Fast) && !Fast) {
+ ((Ld->isNonTemporal() && !Subtarget.hasInt256() && Alignment >= 16) ||
+ (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT,
+ AddressSpace, Alignment, &Fast) && !Fast))) {
unsigned NumElems = RegVT.getVectorNumElements();
if (NumElems < 2)
return SDValue();
@@ -35097,7 +35219,8 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
OpVT, AS, Alignment, &Fast) && Fast) {
SDValue Ops[] = {SubVec2, SubVec};
- if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
+ if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG,
+ Subtarget, false))
return Ld;
}
}
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index e2e228f5544b..5224a16613cb 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -641,22 +641,37 @@ def sdmem : Operand<v2f64> {
// SSE pattern fragments
//===----------------------------------------------------------------------===//
+// Vector load wrappers to prevent folding of non-temporal aligned loads on
+// supporting targets.
+def vec128load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return !Subtarget->hasSSE41() || !cast<LoadSDNode>(N)->isNonTemporal() ||
+ cast<LoadSDNode>(N)->getAlignment() < 16;
+}]>;
+def vec256load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return !Subtarget->hasAVX2() || !cast<LoadSDNode>(N)->isNonTemporal() ||
+ cast<LoadSDNode>(N)->getAlignment() < 32;
+}]>;
+def vec512load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return !Subtarget->hasAVX512() || !cast<LoadSDNode>(N)->isNonTemporal() ||
+ cast<LoadSDNode>(N)->getAlignment() < 64;
+}]>;
+
// 128-bit load pattern fragments
// NOTE: all 128-bit integer vector loads are promoted to v2i64
-def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
-def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
-def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
+def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (vec128load node:$ptr))>;
+def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (vec128load node:$ptr))>;
+def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (vec128load node:$ptr))>;
// 256-bit load pattern fragments
// NOTE: all 256-bit integer vector loads are promoted to v4i64
-def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
-def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
-def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
+def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (vec256load node:$ptr))>;
+def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (vec256load node:$ptr))>;
+def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (vec256load node:$ptr))>;
// 512-bit load pattern fragments
-def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>;
-def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>;
-def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>;
+def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (vec512load node:$ptr))>;
+def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (vec512load node:$ptr))>;
+def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (vec512load node:$ptr))>;
// 128-/256-/512-bit extload pattern fragments
def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
@@ -728,9 +743,13 @@ def alignedloadv8i64 : PatFrag<(ops node:$ptr),
// allows unaligned accesses, match any load, though this may require
// setting a feature bit in the processor (on startup, for example).
// Opteron 10h and later implement such a feature.
+// Avoid non-temporal aligned loads on supported targets.
def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return Subtarget->hasSSEUnalignedMem()
- || cast<LoadSDNode>(N)->getAlignment() >= 16;
+ return (Subtarget->hasSSEUnalignedMem() ||
+ cast<LoadSDNode>(N)->getAlignment() >= 16) &&
+ (!Subtarget->hasSSE41() ||
+ !(cast<LoadSDNode>(N)->getAlignment() >= 16 &&
+ cast<LoadSDNode>(N)->isNonTemporal()));
}]>;
// 128-bit memop pattern fragments
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 0aee30081a35..ff5d90c4e78b 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -121,8 +121,172 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
(STI.is64Bit() ? X86::RETQ : X86::RETL)),
Subtarget(STI), RI(STI.getTargetTriple()) {
-// Generated memory folding tables.
-#include "X86GenFoldTables.inc"
+ static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
+ { X86::ADC32ri, X86::ADC32mi, 0 },
+ { X86::ADC32ri8, X86::ADC32mi8, 0 },
+ { X86::ADC32rr, X86::ADC32mr, 0 },
+ { X86::ADC64ri32, X86::ADC64mi32, 0 },
+ { X86::ADC64ri8, X86::ADC64mi8, 0 },
+ { X86::ADC64rr, X86::ADC64mr, 0 },
+ { X86::ADD16ri, X86::ADD16mi, 0 },
+ { X86::ADD16ri8, X86::ADD16mi8, 0 },
+ { X86::ADD16ri_DB, X86::ADD16mi, TB_NO_REVERSE },
+ { X86::ADD16ri8_DB, X86::ADD16mi8, TB_NO_REVERSE },
+ { X86::ADD16rr, X86::ADD16mr, 0 },
+ { X86::ADD16rr_DB, X86::ADD16mr, TB_NO_REVERSE },
+ { X86::ADD32ri, X86::ADD32mi, 0 },
+ { X86::ADD32ri8, X86::ADD32mi8, 0 },
+ { X86::ADD32ri_DB, X86::ADD32mi, TB_NO_REVERSE },
+ { X86::ADD32ri8_DB, X86::ADD32mi8, TB_NO_REVERSE },
+ { X86::ADD32rr, X86::ADD32mr, 0 },
+ { X86::ADD32rr_DB, X86::ADD32mr, TB_NO_REVERSE },
+ { X86::ADD64ri32, X86::ADD64mi32, 0 },
+ { X86::ADD64ri8, X86::ADD64mi8, 0 },
+ { X86::ADD64ri32_DB,X86::ADD64mi32, TB_NO_REVERSE },
+ { X86::ADD64ri8_DB, X86::ADD64mi8, TB_NO_REVERSE },
+ { X86::ADD64rr, X86::ADD64mr, 0 },
+ { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE },
+ { X86::ADD8ri, X86::ADD8mi, 0 },
+ { X86::ADD8rr, X86::ADD8mr, 0 },
+ { X86::AND16ri, X86::AND16mi, 0 },
+ { X86::AND16ri8, X86::AND16mi8, 0 },
+ { X86::AND16rr, X86::AND16mr, 0 },
+ { X86::AND32ri, X86::AND32mi, 0 },
+ { X86::AND32ri8, X86::AND32mi8, 0 },
+ { X86::AND32rr, X86::AND32mr, 0 },
+ { X86::AND64ri32, X86::AND64mi32, 0 },
+ { X86::AND64ri8, X86::AND64mi8, 0 },
+ { X86::AND64rr, X86::AND64mr, 0 },
+ { X86::AND8ri, X86::AND8mi, 0 },
+ { X86::AND8rr, X86::AND8mr, 0 },
+ { X86::DEC16r, X86::DEC16m, 0 },
+ { X86::DEC32r, X86::DEC32m, 0 },
+ { X86::DEC64r, X86::DEC64m, 0 },
+ { X86::DEC8r, X86::DEC8m, 0 },
+ { X86::INC16r, X86::INC16m, 0 },
+ { X86::INC32r, X86::INC32m, 0 },
+ { X86::INC64r, X86::INC64m, 0 },
+ { X86::INC8r, X86::INC8m, 0 },
+ { X86::NEG16r, X86::NEG16m, 0 },
+ { X86::NEG32r, X86::NEG32m, 0 },
+ { X86::NEG64r, X86::NEG64m, 0 },
+ { X86::NEG8r, X86::NEG8m, 0 },
+ { X86::NOT16r, X86::NOT16m, 0 },
+ { X86::NOT32r, X86::NOT32m, 0 },
+ { X86::NOT64r, X86::NOT64m, 0 },
+ { X86::NOT8r, X86::NOT8m, 0 },
+ { X86::OR16ri, X86::OR16mi, 0 },
+ { X86::OR16ri8, X86::OR16mi8, 0 },
+ { X86::OR16rr, X86::OR16mr, 0 },
+ { X86::OR32ri, X86::OR32mi, 0 },
+ { X86::OR32ri8, X86::OR32mi8, 0 },
+ { X86::OR32rr, X86::OR32mr, 0 },
+ { X86::OR64ri32, X86::OR64mi32, 0 },
+ { X86::OR64ri8, X86::OR64mi8, 0 },
+ { X86::OR64rr, X86::OR64mr, 0 },
+ { X86::OR8ri, X86::OR8mi, 0 },
+ { X86::OR8rr, X86::OR8mr, 0 },
+ { X86::ROL16r1, X86::ROL16m1, 0 },
+ { X86::ROL16rCL, X86::ROL16mCL, 0 },
+ { X86::ROL16ri, X86::ROL16mi, 0 },
+ { X86::ROL32r1, X86::ROL32m1, 0 },
+ { X86::ROL32rCL, X86::ROL32mCL, 0 },
+ { X86::ROL32ri, X86::ROL32mi, 0 },
+ { X86::ROL64r1, X86::ROL64m1, 0 },
+ { X86::ROL64rCL, X86::ROL64mCL, 0 },
+ { X86::ROL64ri, X86::ROL64mi, 0 },
+ { X86::ROL8r1, X86::ROL8m1, 0 },
+ { X86::ROL8rCL, X86::ROL8mCL, 0 },
+ { X86::ROL8ri, X86::ROL8mi, 0 },
+ { X86::ROR16r1, X86::ROR16m1, 0 },
+ { X86::ROR16rCL, X86::ROR16mCL, 0 },
+ { X86::ROR16ri, X86::ROR16mi, 0 },
+ { X86::ROR32r1, X86::ROR32m1, 0 },
+ { X86::ROR32rCL, X86::ROR32mCL, 0 },
+ { X86::ROR32ri, X86::ROR32mi, 0 },
+ { X86::ROR64r1, X86::ROR64m1, 0 },
+ { X86::ROR64rCL, X86::ROR64mCL, 0 },
+ { X86::ROR64ri, X86::ROR64mi, 0 },
+ { X86::ROR8r1, X86::ROR8m1, 0 },
+ { X86::ROR8rCL, X86::ROR8mCL, 0 },
+ { X86::ROR8ri, X86::ROR8mi, 0 },
+ { X86::SAR16r1, X86::SAR16m1, 0 },
+ { X86::SAR16rCL, X86::SAR16mCL, 0 },
+ { X86::SAR16ri, X86::SAR16mi, 0 },
+ { X86::SAR32r1, X86::SAR32m1, 0 },
+ { X86::SAR32rCL, X86::SAR32mCL, 0 },
+ { X86::SAR32ri, X86::SAR32mi, 0 },
+ { X86::SAR64r1, X86::SAR64m1, 0 },
+ { X86::SAR64rCL, X86::SAR64mCL, 0 },
+ { X86::SAR64ri, X86::SAR64mi, 0 },
+ { X86::SAR8r1, X86::SAR8m1, 0 },
+ { X86::SAR8rCL, X86::SAR8mCL, 0 },
+ { X86::SAR8ri, X86::SAR8mi, 0 },
+ { X86::SBB32ri, X86::SBB32mi, 0 },
+ { X86::SBB32ri8, X86::SBB32mi8, 0 },
+ { X86::SBB32rr, X86::SBB32mr, 0 },
+ { X86::SBB64ri32, X86::SBB64mi32, 0 },
+ { X86::SBB64ri8, X86::SBB64mi8, 0 },
+ { X86::SBB64rr, X86::SBB64mr, 0 },
+ { X86::SHL16r1, X86::SHL16m1, 0 },
+ { X86::SHL16rCL, X86::SHL16mCL, 0 },
+ { X86::SHL16ri, X86::SHL16mi, 0 },
+ { X86::SHL32r1, X86::SHL32m1, 0 },
+ { X86::SHL32rCL, X86::SHL32mCL, 0 },
+ { X86::SHL32ri, X86::SHL32mi, 0 },
+ { X86::SHL64r1, X86::SHL64m1, 0 },
+ { X86::SHL64rCL, X86::SHL64mCL, 0 },
+ { X86::SHL64ri, X86::SHL64mi, 0 },
+ { X86::SHL8r1, X86::SHL8m1, 0 },
+ { X86::SHL8rCL, X86::SHL8mCL, 0 },
+ { X86::SHL8ri, X86::SHL8mi, 0 },
+ { X86::SHLD16rrCL, X86::SHLD16mrCL, 0 },
+ { X86::SHLD16rri8, X86::SHLD16mri8, 0 },
+ { X86::SHLD32rrCL, X86::SHLD32mrCL, 0 },
+ { X86::SHLD32rri8, X86::SHLD32mri8, 0 },
+ { X86::SHLD64rrCL, X86::SHLD64mrCL, 0 },
+ { X86::SHLD64rri8, X86::SHLD64mri8, 0 },
+ { X86::SHR16r1, X86::SHR16m1, 0 },
+ { X86::SHR16rCL, X86::SHR16mCL, 0 },
+ { X86::SHR16ri, X86::SHR16mi, 0 },
+ { X86::SHR32r1, X86::SHR32m1, 0 },
+ { X86::SHR32rCL, X86::SHR32mCL, 0 },
+ { X86::SHR32ri, X86::SHR32mi, 0 },
+ { X86::SHR64r1, X86::SHR64m1, 0 },
+ { X86::SHR64rCL, X86::SHR64mCL, 0 },
+ { X86::SHR64ri, X86::SHR64mi, 0 },
+ { X86::SHR8r1, X86::SHR8m1, 0 },
+ { X86::SHR8rCL, X86::SHR8mCL, 0 },
+ { X86::SHR8ri, X86::SHR8mi, 0 },
+ { X86::SHRD16rrCL, X86::SHRD16mrCL, 0 },
+ { X86::SHRD16rri8, X86::SHRD16mri8, 0 },
+ { X86::SHRD32rrCL, X86::SHRD32mrCL, 0 },
+ { X86::SHRD32rri8, X86::SHRD32mri8, 0 },
+ { X86::SHRD64rrCL, X86::SHRD64mrCL, 0 },
+ { X86::SHRD64rri8, X86::SHRD64mri8, 0 },
+ { X86::SUB16ri, X86::SUB16mi, 0 },
+ { X86::SUB16ri8, X86::SUB16mi8, 0 },
+ { X86::SUB16rr, X86::SUB16mr, 0 },
+ { X86::SUB32ri, X86::SUB32mi, 0 },
+ { X86::SUB32ri8, X86::SUB32mi8, 0 },
+ { X86::SUB32rr, X86::SUB32mr, 0 },
+ { X86::SUB64ri32, X86::SUB64mi32, 0 },
+ { X86::SUB64ri8, X86::SUB64mi8, 0 },
+ { X86::SUB64rr, X86::SUB64mr, 0 },
+ { X86::SUB8ri, X86::SUB8mi, 0 },
+ { X86::SUB8rr, X86::SUB8mr, 0 },
+ { X86::XOR16ri, X86::XOR16mi, 0 },
+ { X86::XOR16ri8, X86::XOR16mi8, 0 },
+ { X86::XOR16rr, X86::XOR16mr, 0 },
+ { X86::XOR32ri, X86::XOR32mi, 0 },
+ { X86::XOR32ri8, X86::XOR32mi8, 0 },
+ { X86::XOR32rr, X86::XOR32mr, 0 },
+ { X86::XOR64ri32, X86::XOR64mi32, 0 },
+ { X86::XOR64ri8, X86::XOR64mi8, 0 },
+ { X86::XOR64rr, X86::XOR64mr, 0 },
+ { X86::XOR8ri, X86::XOR8mi, 0 },
+ { X86::XOR8rr, X86::XOR8mr, 0 }
+ };
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2Addr) {
AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
@@ -131,11 +295,746 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
Entry.Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
}
+ static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
+ { X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD },
+ { X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD },
+ { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD },
+ { X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD },
+ { X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD },
+ { X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD },
+ { X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD },
+ { X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD },
+ { X86::CMP32ri, X86::CMP32mi, TB_FOLDED_LOAD },
+ { X86::CMP32ri8, X86::CMP32mi8, TB_FOLDED_LOAD },
+ { X86::CMP32rr, X86::CMP32mr, TB_FOLDED_LOAD },
+ { X86::CMP64ri32, X86::CMP64mi32, TB_FOLDED_LOAD },
+ { X86::CMP64ri8, X86::CMP64mi8, TB_FOLDED_LOAD },
+ { X86::CMP64rr, X86::CMP64mr, TB_FOLDED_LOAD },
+ { X86::CMP8ri, X86::CMP8mi, TB_FOLDED_LOAD },
+ { X86::CMP8rr, X86::CMP8mr, TB_FOLDED_LOAD },
+ { X86::DIV16r, X86::DIV16m, TB_FOLDED_LOAD },
+ { X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD },
+ { X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD },
+ { X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD },
+ { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE },
+ { X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD },
+ { X86::IDIV32r, X86::IDIV32m, TB_FOLDED_LOAD },
+ { X86::IDIV64r, X86::IDIV64m, TB_FOLDED_LOAD },
+ { X86::IDIV8r, X86::IDIV8m, TB_FOLDED_LOAD },
+ { X86::IMUL16r, X86::IMUL16m, TB_FOLDED_LOAD },
+ { X86::IMUL32r, X86::IMUL32m, TB_FOLDED_LOAD },
+ { X86::IMUL64r, X86::IMUL64m, TB_FOLDED_LOAD },
+ { X86::IMUL8r, X86::IMUL8m, TB_FOLDED_LOAD },
+ { X86::JMP32r, X86::JMP32m, TB_FOLDED_LOAD },
+ { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD },
+ { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE },
+ { X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE },
+ { X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE },
+ { X86::MOV32rr, X86::MOV32mr, TB_FOLDED_STORE },
+ { X86::MOV64ri32, X86::MOV64mi32, TB_FOLDED_STORE },
+ { X86::MOV64rr, X86::MOV64mr, TB_FOLDED_STORE },
+ { X86::MOV8ri, X86::MOV8mi, TB_FOLDED_STORE },
+ { X86::MOV8rr, X86::MOV8mr, TB_FOLDED_STORE },
+ { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE },
+ { X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::MOVDQUrr, X86::MOVDQUmr, TB_FOLDED_STORE },
+ { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, TB_FOLDED_STORE },
+ { X86::MOVPQIto64rr,X86::MOVPQI2QImr, TB_FOLDED_STORE },
+ { X86::MOVSDto64rr, X86::MOVSDto64mr, TB_FOLDED_STORE },
+ { X86::MOVSS2DIrr, X86::MOVSS2DImr, TB_FOLDED_STORE },
+ { X86::MOVUPDrr, X86::MOVUPDmr, TB_FOLDED_STORE },
+ { X86::MOVUPSrr, X86::MOVUPSmr, TB_FOLDED_STORE },
+ { X86::MUL16r, X86::MUL16m, TB_FOLDED_LOAD },
+ { X86::MUL32r, X86::MUL32m, TB_FOLDED_LOAD },
+ { X86::MUL64r, X86::MUL64m, TB_FOLDED_LOAD },
+ { X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD },
+ { X86::PEXTRDrr, X86::PEXTRDmr, TB_FOLDED_STORE },
+ { X86::PEXTRQrr, X86::PEXTRQmr, TB_FOLDED_STORE },
+ { X86::PUSH16r, X86::PUSH16rmm, TB_FOLDED_LOAD },
+ { X86::PUSH32r, X86::PUSH32rmm, TB_FOLDED_LOAD },
+ { X86::PUSH64r, X86::PUSH64rmm, TB_FOLDED_LOAD },
+ { X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE },
+ { X86::SETAr, X86::SETAm, TB_FOLDED_STORE },
+ { X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE },
+ { X86::SETBr, X86::SETBm, TB_FOLDED_STORE },
+ { X86::SETEr, X86::SETEm, TB_FOLDED_STORE },
+ { X86::SETGEr, X86::SETGEm, TB_FOLDED_STORE },
+ { X86::SETGr, X86::SETGm, TB_FOLDED_STORE },
+ { X86::SETLEr, X86::SETLEm, TB_FOLDED_STORE },
+ { X86::SETLr, X86::SETLm, TB_FOLDED_STORE },
+ { X86::SETNEr, X86::SETNEm, TB_FOLDED_STORE },
+ { X86::SETNOr, X86::SETNOm, TB_FOLDED_STORE },
+ { X86::SETNPr, X86::SETNPm, TB_FOLDED_STORE },
+ { X86::SETNSr, X86::SETNSm, TB_FOLDED_STORE },
+ { X86::SETOr, X86::SETOm, TB_FOLDED_STORE },
+ { X86::SETPr, X86::SETPm, TB_FOLDED_STORE },
+ { X86::SETSr, X86::SETSm, TB_FOLDED_STORE },
+ { X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD },
+ { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD },
+ { X86::TAILJMPr64_REX, X86::TAILJMPm64_REX, TB_FOLDED_LOAD },
+ { X86::TEST16ri, X86::TEST16mi, TB_FOLDED_LOAD },
+ { X86::TEST32ri, X86::TEST32mi, TB_FOLDED_LOAD },
+ { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD },
+ { X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD },
+
+ // AVX 128-bit versions of foldable instructions
+ { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE },
+ { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVDQUrr, X86::VMOVDQUmr, TB_FOLDED_STORE },
+ { X86::VMOVPDI2DIrr,X86::VMOVPDI2DImr, TB_FOLDED_STORE },
+ { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr,TB_FOLDED_STORE },
+ { X86::VMOVSDto64rr,X86::VMOVSDto64mr, TB_FOLDED_STORE },
+ { X86::VMOVSS2DIrr, X86::VMOVSS2DImr, TB_FOLDED_STORE },
+ { X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE },
+ { X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE },
+ { X86::VPEXTRDrr, X86::VPEXTRDmr, TB_FOLDED_STORE },
+ { X86::VPEXTRQrr, X86::VPEXTRQmr, TB_FOLDED_STORE },
+
+ // AVX 256-bit foldable instructions
+ { X86::VEXTRACTI128rr, X86::VEXTRACTI128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVDQUYrr, X86::VMOVDQUYmr, TB_FOLDED_STORE },
+ { X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE },
+ { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE },
+
+ // AVX-512 foldable instructions
+ { X86::VEXTRACTF32x4Zrr,X86::VEXTRACTF32x4Zmr, TB_FOLDED_STORE },
+ { X86::VEXTRACTF32x8Zrr,X86::VEXTRACTF32x8Zmr, TB_FOLDED_STORE },
+ { X86::VEXTRACTF64x2Zrr,X86::VEXTRACTF64x2Zmr, TB_FOLDED_STORE },
+ { X86::VEXTRACTF64x4Zrr,X86::VEXTRACTF64x4Zmr, TB_FOLDED_STORE },
+ { X86::VEXTRACTI32x4Zrr,X86::VEXTRACTI32x4Zmr, TB_FOLDED_STORE },
+ { X86::VEXTRACTI32x8Zrr,X86::VEXTRACTI32x8Zmr, TB_FOLDED_STORE },
+ { X86::VEXTRACTI64x2Zrr,X86::VEXTRACTI64x2Zmr, TB_FOLDED_STORE },
+ { X86::VEXTRACTI64x4Zrr,X86::VEXTRACTI64x4Zmr, TB_FOLDED_STORE },
+ { X86::VEXTRACTPSZrr, X86::VEXTRACTPSZmr, TB_FOLDED_STORE },
+ { X86::VMOVAPDZrr, X86::VMOVAPDZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
+ { X86::VMOVAPSZrr, X86::VMOVAPSZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
+ { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
+ { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
+ { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zmr, TB_FOLDED_STORE },
+ { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zmr, TB_FOLDED_STORE },
+ { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zmr, TB_FOLDED_STORE },
+ { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zmr, TB_FOLDED_STORE },
+ { X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE },
+ { X86::VMOVPQIto64Zrr, X86::VMOVPQI2QIZmr, TB_FOLDED_STORE },
+ { X86::VMOVSDto64Zrr, X86::VMOVSDto64Zmr, TB_FOLDED_STORE },
+ { X86::VMOVSS2DIZrr, X86::VMOVSS2DIZmr, TB_FOLDED_STORE },
+ { X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE },
+ { X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE },
+ { X86::VPEXTRDZrr, X86::VPEXTRDZmr, TB_FOLDED_STORE },
+ { X86::VPEXTRQZrr, X86::VPEXTRQZmr, TB_FOLDED_STORE },
+ { X86::VPMOVDBZrr, X86::VPMOVDBZmr, TB_FOLDED_STORE },
+ { X86::VPMOVDWZrr, X86::VPMOVDWZmr, TB_FOLDED_STORE },
+ { X86::VPMOVQDZrr, X86::VPMOVQDZmr, TB_FOLDED_STORE },
+ { X86::VPMOVQWZrr, X86::VPMOVQWZmr, TB_FOLDED_STORE },
+ { X86::VPMOVWBZrr, X86::VPMOVWBZmr, TB_FOLDED_STORE },
+ { X86::VPMOVSDBZrr, X86::VPMOVSDBZmr, TB_FOLDED_STORE },
+ { X86::VPMOVSDWZrr, X86::VPMOVSDWZmr, TB_FOLDED_STORE },
+ { X86::VPMOVSQDZrr, X86::VPMOVSQDZmr, TB_FOLDED_STORE },
+ { X86::VPMOVSQWZrr, X86::VPMOVSQWZmr, TB_FOLDED_STORE },
+ { X86::VPMOVSWBZrr, X86::VPMOVSWBZmr, TB_FOLDED_STORE },
+ { X86::VPMOVUSDBZrr, X86::VPMOVUSDBZmr, TB_FOLDED_STORE },
+ { X86::VPMOVUSDWZrr, X86::VPMOVUSDWZmr, TB_FOLDED_STORE },
+ { X86::VPMOVUSQDZrr, X86::VPMOVUSQDZmr, TB_FOLDED_STORE },
+ { X86::VPMOVUSQWZrr, X86::VPMOVUSQWZmr, TB_FOLDED_STORE },
+ { X86::VPMOVUSWBZrr, X86::VPMOVUSWBZmr, TB_FOLDED_STORE },
+
+ // AVX-512 foldable instructions (256-bit versions)
+ { X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTF32x4Z256mr, TB_FOLDED_STORE },
+ { X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTF64x2Z256mr, TB_FOLDED_STORE },
+ { X86::VEXTRACTI32x4Z256rr,X86::VEXTRACTI32x4Z256mr, TB_FOLDED_STORE },
+ { X86::VEXTRACTI64x2Z256rr,X86::VEXTRACTI64x2Z256mr, TB_FOLDED_STORE },
+ { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256mr, TB_FOLDED_STORE },
+ { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256mr, TB_FOLDED_STORE },
+ { X86::VPMOVDWZ256rr, X86::VPMOVDWZ256mr, TB_FOLDED_STORE },
+ { X86::VPMOVQDZ256rr, X86::VPMOVQDZ256mr, TB_FOLDED_STORE },
+ { X86::VPMOVWBZ256rr, X86::VPMOVWBZ256mr, TB_FOLDED_STORE },
+ { X86::VPMOVSDWZ256rr, X86::VPMOVSDWZ256mr, TB_FOLDED_STORE },
+ { X86::VPMOVSQDZ256rr, X86::VPMOVSQDZ256mr, TB_FOLDED_STORE },
+ { X86::VPMOVSWBZ256rr, X86::VPMOVSWBZ256mr, TB_FOLDED_STORE },
+ { X86::VPMOVUSDWZ256rr, X86::VPMOVUSDWZ256mr, TB_FOLDED_STORE },
+ { X86::VPMOVUSQDZ256rr, X86::VPMOVUSQDZ256mr, TB_FOLDED_STORE },
+ { X86::VPMOVUSWBZ256rr, X86::VPMOVUSWBZ256mr, TB_FOLDED_STORE },
+
+ // AVX-512 foldable instructions (128-bit versions)
+ { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128mr, TB_FOLDED_STORE },
+ { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128mr, TB_FOLDED_STORE },
+
+ // F16C foldable instructions
+ { X86::VCVTPS2PHrr, X86::VCVTPS2PHmr, TB_FOLDED_STORE },
+ { X86::VCVTPS2PHYrr, X86::VCVTPS2PHYmr, TB_FOLDED_STORE }
+ };
+
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable0) {
AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
Entry.RegOp, Entry.MemOp, TB_INDEX_0 | Entry.Flags);
}
+ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
+ { X86::BSF16rr, X86::BSF16rm, 0 },
+ { X86::BSF32rr, X86::BSF32rm, 0 },
+ { X86::BSF64rr, X86::BSF64rm, 0 },
+ { X86::BSR16rr, X86::BSR16rm, 0 },
+ { X86::BSR32rr, X86::BSR32rm, 0 },
+ { X86::BSR64rr, X86::BSR64rm, 0 },
+ { X86::CMP16rr, X86::CMP16rm, 0 },
+ { X86::CMP32rr, X86::CMP32rm, 0 },
+ { X86::CMP64rr, X86::CMP64rm, 0 },
+ { X86::CMP8rr, X86::CMP8rm, 0 },
+ { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 },
+ { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 },
+ { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 },
+ { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 },
+ { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 },
+ { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 },
+ { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 },
+ { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
+ { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
+ { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
+ { X86::IMUL16rri, X86::IMUL16rmi, 0 },
+ { X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
+ { X86::IMUL32rri, X86::IMUL32rmi, 0 },
+ { X86::IMUL32rri8, X86::IMUL32rmi8, 0 },
+ { X86::IMUL64rri32, X86::IMUL64rmi32, 0 },
+ { X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
+ { X86::Int_COMISDrr, X86::Int_COMISDrm, TB_NO_REVERSE },
+ { X86::Int_COMISSrr, X86::Int_COMISSrm, TB_NO_REVERSE },
+ { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, TB_NO_REVERSE },
+ { X86::CVTSD2SIrr, X86::CVTSD2SIrm, TB_NO_REVERSE },
+ { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, TB_NO_REVERSE },
+ { X86::CVTSS2SIrr, X86::CVTSS2SIrm, TB_NO_REVERSE },
+ { X86::CVTDQ2PDrr, X86::CVTDQ2PDrm, TB_NO_REVERSE },
+ { X86::CVTDQ2PSrr, X86::CVTDQ2PSrm, TB_ALIGN_16 },
+ { X86::CVTPD2DQrr, X86::CVTPD2DQrm, TB_ALIGN_16 },
+ { X86::CVTPD2PSrr, X86::CVTPD2PSrm, TB_ALIGN_16 },
+ { X86::CVTPS2DQrr, X86::CVTPS2DQrm, TB_ALIGN_16 },
+ { X86::CVTPS2PDrr, X86::CVTPS2PDrm, TB_NO_REVERSE },
+ { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
+ { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
+ { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, TB_NO_REVERSE },
+ { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, TB_NO_REVERSE },
+ { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, TB_NO_REVERSE },
+ { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, TB_NO_REVERSE },
+ { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, TB_NO_REVERSE },
+ { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, TB_NO_REVERSE },
+ { X86::MOV16rr, X86::MOV16rm, 0 },
+ { X86::MOV32rr, X86::MOV32rm, 0 },
+ { X86::MOV64rr, X86::MOV64rm, 0 },
+ { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
+ { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
+ { X86::MOV8rr, X86::MOV8rm, 0 },
+ { X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 },
+ { X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 },
+ { X86::MOVDDUPrr, X86::MOVDDUPrm, TB_NO_REVERSE },
+ { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
+ { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
+ { X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 },
+ { X86::MOVDQUrr, X86::MOVDQUrm, 0 },
+ { X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 },
+ { X86::MOVSLDUPrr, X86::MOVSLDUPrm, TB_ALIGN_16 },
+ { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
+ { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 },
+ { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 },
+ { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 },
+ { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 },
+ { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
+ { X86::MOVUPDrr, X86::MOVUPDrm, 0 },
+ { X86::MOVUPSrr, X86::MOVUPSrm, 0 },
+ { X86::MOVZPQILo2PQIrr, X86::MOVQI2PQIrm, TB_NO_REVERSE },
+ { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 },
+ { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
+ { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
+ { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 },
+ { X86::PABSBrr, X86::PABSBrm, TB_ALIGN_16 },
+ { X86::PABSDrr, X86::PABSDrm, TB_ALIGN_16 },
+ { X86::PABSWrr, X86::PABSWrm, TB_ALIGN_16 },
+ { X86::PCMPESTRIrr, X86::PCMPESTRIrm, TB_ALIGN_16 },
+ { X86::PCMPESTRM128rr, X86::PCMPESTRM128rm, TB_ALIGN_16 },
+ { X86::PCMPISTRIrr, X86::PCMPISTRIrm, TB_ALIGN_16 },
+ { X86::PCMPISTRM128rr, X86::PCMPISTRM128rm, TB_ALIGN_16 },
+ { X86::PHMINPOSUWrr128, X86::PHMINPOSUWrm128, TB_ALIGN_16 },
+ { X86::PMOVSXBDrr, X86::PMOVSXBDrm, TB_NO_REVERSE },
+ { X86::PMOVSXBQrr, X86::PMOVSXBQrm, TB_NO_REVERSE },
+ { X86::PMOVSXBWrr, X86::PMOVSXBWrm, TB_NO_REVERSE },
+ { X86::PMOVSXDQrr, X86::PMOVSXDQrm, TB_NO_REVERSE },
+ { X86::PMOVSXWDrr, X86::PMOVSXWDrm, TB_NO_REVERSE },
+ { X86::PMOVSXWQrr, X86::PMOVSXWQrm, TB_NO_REVERSE },
+ { X86::PMOVZXBDrr, X86::PMOVZXBDrm, TB_NO_REVERSE },
+ { X86::PMOVZXBQrr, X86::PMOVZXBQrm, TB_NO_REVERSE },
+ { X86::PMOVZXBWrr, X86::PMOVZXBWrm, TB_NO_REVERSE },
+ { X86::PMOVZXDQrr, X86::PMOVZXDQrm, TB_NO_REVERSE },
+ { X86::PMOVZXWDrr, X86::PMOVZXWDrm, TB_NO_REVERSE },
+ { X86::PMOVZXWQrr, X86::PMOVZXWQrm, TB_NO_REVERSE },
+ { X86::PSHUFDri, X86::PSHUFDmi, TB_ALIGN_16 },
+ { X86::PSHUFHWri, X86::PSHUFHWmi, TB_ALIGN_16 },
+ { X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 },
+ { X86::PTESTrr, X86::PTESTrm, TB_ALIGN_16 },
+ { X86::RCPPSr, X86::RCPPSm, TB_ALIGN_16 },
+ { X86::RCPSSr, X86::RCPSSm, 0 },
+ { X86::RCPSSr_Int, X86::RCPSSm_Int, TB_NO_REVERSE },
+ { X86::ROUNDPDr, X86::ROUNDPDm, TB_ALIGN_16 },
+ { X86::ROUNDPSr, X86::ROUNDPSm, TB_ALIGN_16 },
+ { X86::ROUNDSDr, X86::ROUNDSDm, 0 },
+ { X86::ROUNDSSr, X86::ROUNDSSm, 0 },
+ { X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16 },
+ { X86::RSQRTSSr, X86::RSQRTSSm, 0 },
+ { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, TB_NO_REVERSE },
+ { X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 },
+ { X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16 },
+ { X86::SQRTSDr, X86::SQRTSDm, 0 },
+ { X86::SQRTSDr_Int, X86::SQRTSDm_Int, TB_NO_REVERSE },
+ { X86::SQRTSSr, X86::SQRTSSm, 0 },
+ { X86::SQRTSSr_Int, X86::SQRTSSm_Int, TB_NO_REVERSE },
+ { X86::TEST16rr, X86::TEST16rm, 0 },
+ { X86::TEST32rr, X86::TEST32rm, 0 },
+ { X86::TEST64rr, X86::TEST64rm, 0 },
+ { X86::TEST8rr, X86::TEST8rm, 0 },
+ // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
+ { X86::UCOMISDrr, X86::UCOMISDrm, 0 },
+ { X86::UCOMISSrr, X86::UCOMISSrm, 0 },
+
+ // MMX version of foldable instructions
+ { X86::MMX_CVTPD2PIirr, X86::MMX_CVTPD2PIirm, 0 },
+ { X86::MMX_CVTPI2PDirr, X86::MMX_CVTPI2PDirm, 0 },
+ { X86::MMX_CVTPS2PIirr, X86::MMX_CVTPS2PIirm, 0 },
+ { X86::MMX_CVTTPD2PIirr, X86::MMX_CVTTPD2PIirm, 0 },
+ { X86::MMX_CVTTPS2PIirr, X86::MMX_CVTTPS2PIirm, 0 },
+ { X86::MMX_MOVD64to64rr, X86::MMX_MOVQ64rm, 0 },
+ { X86::MMX_PABSBrr64, X86::MMX_PABSBrm64, 0 },
+ { X86::MMX_PABSDrr64, X86::MMX_PABSDrm64, 0 },
+ { X86::MMX_PABSWrr64, X86::MMX_PABSWrm64, 0 },
+ { X86::MMX_PSHUFWri, X86::MMX_PSHUFWmi, 0 },
+
+ // 3DNow! version of foldable instructions
+ { X86::PF2IDrr, X86::PF2IDrm, 0 },
+ { X86::PF2IWrr, X86::PF2IWrm, 0 },
+ { X86::PFRCPrr, X86::PFRCPrm, 0 },
+ { X86::PFRSQRTrr, X86::PFRSQRTrm, 0 },
+ { X86::PI2FDrr, X86::PI2FDrm, 0 },
+ { X86::PI2FWrr, X86::PI2FWrm, 0 },
+ { X86::PSWAPDrr, X86::PSWAPDrm, 0 },
+
+ // AVX 128-bit versions of foldable instructions
+ { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, TB_NO_REVERSE },
+ { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, TB_NO_REVERSE },
+ { X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, TB_NO_REVERSE },
+ { X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, TB_NO_REVERSE },
+ { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 },
+ { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,TB_NO_REVERSE },
+ { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 },
+ { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm, TB_NO_REVERSE },
+ { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 },
+ { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,TB_NO_REVERSE },
+ { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 },
+ { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm, TB_NO_REVERSE },
+ { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, TB_NO_REVERSE },
+ { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, TB_NO_REVERSE },
+ { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, TB_NO_REVERSE },
+ { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, TB_NO_REVERSE },
+ { X86::VCVTDQ2PDrr, X86::VCVTDQ2PDrm, TB_NO_REVERSE },
+ { X86::VCVTDQ2PSrr, X86::VCVTDQ2PSrm, 0 },
+ { X86::VCVTPD2DQrr, X86::VCVTPD2DQrm, 0 },
+ { X86::VCVTPD2PSrr, X86::VCVTPD2PSrm, 0 },
+ { X86::VCVTPS2DQrr, X86::VCVTPS2DQrm, 0 },
+ { X86::VCVTPS2PDrr, X86::VCVTPS2PDrm, TB_NO_REVERSE },
+ { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, 0 },
+ { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 },
+ { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
+ { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 },
+ { X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 },
+ { X86::VMOVAPSrr, X86::VMOVAPSrm, TB_ALIGN_16 },
+ { X86::VMOVDDUPrr, X86::VMOVDDUPrm, TB_NO_REVERSE },
+ { X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 },
+ { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 },
+ { X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 },
+ { X86::VMOVDQUrr, X86::VMOVDQUrm, 0 },
+ { X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, 0 },
+ { X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, 0 },
+ { X86::VMOVUPDrr, X86::VMOVUPDrm, 0 },
+ { X86::VMOVUPSrr, X86::VMOVUPSrm, 0 },
+ { X86::VMOVZPQILo2PQIrr,X86::VMOVQI2PQIrm, TB_NO_REVERSE },
+ { X86::VPABSBrr, X86::VPABSBrm, 0 },
+ { X86::VPABSDrr, X86::VPABSDrm, 0 },
+ { X86::VPABSWrr, X86::VPABSWrm, 0 },
+ { X86::VPCMPESTRIrr, X86::VPCMPESTRIrm, 0 },
+ { X86::VPCMPESTRM128rr, X86::VPCMPESTRM128rm, 0 },
+ { X86::VPCMPISTRIrr, X86::VPCMPISTRIrm, 0 },
+ { X86::VPCMPISTRM128rr, X86::VPCMPISTRM128rm, 0 },
+ { X86::VPHMINPOSUWrr128, X86::VPHMINPOSUWrm128, 0 },
+ { X86::VPERMILPDri, X86::VPERMILPDmi, 0 },
+ { X86::VPERMILPSri, X86::VPERMILPSmi, 0 },
+ { X86::VPMOVSXBDrr, X86::VPMOVSXBDrm, TB_NO_REVERSE },
+ { X86::VPMOVSXBQrr, X86::VPMOVSXBQrm, TB_NO_REVERSE },
+ { X86::VPMOVSXBWrr, X86::VPMOVSXBWrm, TB_NO_REVERSE },
+ { X86::VPMOVSXDQrr, X86::VPMOVSXDQrm, TB_NO_REVERSE },
+ { X86::VPMOVSXWDrr, X86::VPMOVSXWDrm, TB_NO_REVERSE },
+ { X86::VPMOVSXWQrr, X86::VPMOVSXWQrm, TB_NO_REVERSE },
+ { X86::VPMOVZXBDrr, X86::VPMOVZXBDrm, TB_NO_REVERSE },
+ { X86::VPMOVZXBQrr, X86::VPMOVZXBQrm, TB_NO_REVERSE },
+ { X86::VPMOVZXBWrr, X86::VPMOVZXBWrm, TB_NO_REVERSE },
+ { X86::VPMOVZXDQrr, X86::VPMOVZXDQrm, TB_NO_REVERSE },
+ { X86::VPMOVZXWDrr, X86::VPMOVZXWDrm, TB_NO_REVERSE },
+ { X86::VPMOVZXWQrr, X86::VPMOVZXWQrm, TB_NO_REVERSE },
+ { X86::VPSHUFDri, X86::VPSHUFDmi, 0 },
+ { X86::VPSHUFHWri, X86::VPSHUFHWmi, 0 },
+ { X86::VPSHUFLWri, X86::VPSHUFLWmi, 0 },
+ { X86::VPTESTrr, X86::VPTESTrm, 0 },
+ { X86::VRCPPSr, X86::VRCPPSm, 0 },
+ { X86::VROUNDPDr, X86::VROUNDPDm, 0 },
+ { X86::VROUNDPSr, X86::VROUNDPSm, 0 },
+ { X86::VRSQRTPSr, X86::VRSQRTPSm, 0 },
+ { X86::VSQRTPDr, X86::VSQRTPDm, 0 },
+ { X86::VSQRTPSr, X86::VSQRTPSm, 0 },
+ { X86::VTESTPDrr, X86::VTESTPDrm, 0 },
+ { X86::VTESTPSrr, X86::VTESTPSrm, 0 },
+ { X86::VUCOMISDrr, X86::VUCOMISDrm, 0 },
+ { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 },
+
+ // AVX 256-bit foldable instructions
+ { X86::VCVTDQ2PDYrr, X86::VCVTDQ2PDYrm, TB_NO_REVERSE },
+ { X86::VCVTDQ2PSYrr, X86::VCVTDQ2PSYrm, 0 },
+ { X86::VCVTPD2DQYrr, X86::VCVTPD2DQYrm, 0 },
+ { X86::VCVTPD2PSYrr, X86::VCVTPD2PSYrm, 0 },
+ { X86::VCVTPS2DQYrr, X86::VCVTPS2DQYrm, 0 },
+ { X86::VCVTPS2PDYrr, X86::VCVTPS2PDYrm, TB_NO_REVERSE },
+ { X86::VCVTTPD2DQYrr, X86::VCVTTPD2DQYrm, 0 },
+ { X86::VCVTTPS2DQYrr, X86::VCVTTPS2DQYrm, 0 },
+ { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
+ { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 },
+ { X86::VMOVDDUPYrr, X86::VMOVDDUPYrm, 0 },
+ { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 },
+ { X86::VMOVDQUYrr, X86::VMOVDQUYrm, 0 },
+ { X86::VMOVSLDUPYrr, X86::VMOVSLDUPYrm, 0 },
+ { X86::VMOVSHDUPYrr, X86::VMOVSHDUPYrm, 0 },
+ { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 },
+ { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
+ { X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 },
+ { X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 },
+ { X86::VPTESTYrr, X86::VPTESTYrm, 0 },
+ { X86::VRCPPSYr, X86::VRCPPSYm, 0 },
+ { X86::VROUNDYPDr, X86::VROUNDYPDm, 0 },
+ { X86::VROUNDYPSr, X86::VROUNDYPSm, 0 },
+ { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
+ { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
+ { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },
+ { X86::VTESTPDYrr, X86::VTESTPDYrm, 0 },
+ { X86::VTESTPSYrr, X86::VTESTPSYrm, 0 },
+
+ // AVX2 foldable instructions
+
+ // VBROADCASTS{SD}rr register instructions were an AVX2 addition while the
+ // VBROADCASTS{SD}rm memory instructions were available from AVX1.
+ // TB_NO_REVERSE prevents unfolding from introducing an illegal instruction
+ // on AVX1 targets. The VPBROADCAST instructions are all AVX2 instructions
+ // so they don't need an equivalent limitation.
+ { X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE },
+ { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
+ { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
+ { X86::VPABSBYrr, X86::VPABSBYrm, 0 },
+ { X86::VPABSDYrr, X86::VPABSDYrm, 0 },
+ { X86::VPABSWYrr, X86::VPABSWYrm, 0 },
+ { X86::VPBROADCASTBrr, X86::VPBROADCASTBrm, TB_NO_REVERSE },
+ { X86::VPBROADCASTBYrr, X86::VPBROADCASTBYrm, TB_NO_REVERSE },
+ { X86::VPBROADCASTDrr, X86::VPBROADCASTDrm, TB_NO_REVERSE },
+ { X86::VPBROADCASTDYrr, X86::VPBROADCASTDYrm, TB_NO_REVERSE },
+ { X86::VPBROADCASTQrr, X86::VPBROADCASTQrm, TB_NO_REVERSE },
+ { X86::VPBROADCASTQYrr, X86::VPBROADCASTQYrm, TB_NO_REVERSE },
+ { X86::VPBROADCASTWrr, X86::VPBROADCASTWrm, TB_NO_REVERSE },
+ { X86::VPBROADCASTWYrr, X86::VPBROADCASTWYrm, TB_NO_REVERSE },
+ { X86::VPERMPDYri, X86::VPERMPDYmi, 0 },
+ { X86::VPERMQYri, X86::VPERMQYmi, 0 },
+ { X86::VPMOVSXBDYrr, X86::VPMOVSXBDYrm, TB_NO_REVERSE },
+ { X86::VPMOVSXBQYrr, X86::VPMOVSXBQYrm, TB_NO_REVERSE },
+ { X86::VPMOVSXBWYrr, X86::VPMOVSXBWYrm, 0 },
+ { X86::VPMOVSXDQYrr, X86::VPMOVSXDQYrm, 0 },
+ { X86::VPMOVSXWDYrr, X86::VPMOVSXWDYrm, 0 },
+ { X86::VPMOVSXWQYrr, X86::VPMOVSXWQYrm, TB_NO_REVERSE },
+ { X86::VPMOVZXBDYrr, X86::VPMOVZXBDYrm, TB_NO_REVERSE },
+ { X86::VPMOVZXBQYrr, X86::VPMOVZXBQYrm, TB_NO_REVERSE },
+ { X86::VPMOVZXBWYrr, X86::VPMOVZXBWYrm, 0 },
+ { X86::VPMOVZXDQYrr, X86::VPMOVZXDQYrm, 0 },
+ { X86::VPMOVZXWDYrr, X86::VPMOVZXWDYrm, 0 },
+ { X86::VPMOVZXWQYrr, X86::VPMOVZXWQYrm, TB_NO_REVERSE },
+ { X86::VPSHUFDYri, X86::VPSHUFDYmi, 0 },
+ { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, 0 },
+ { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, 0 },
+
+ // XOP foldable instructions
+ { X86::VFRCZPDrr, X86::VFRCZPDrm, 0 },
+ { X86::VFRCZPDrrY, X86::VFRCZPDrmY, 0 },
+ { X86::VFRCZPSrr, X86::VFRCZPSrm, 0 },
+ { X86::VFRCZPSrrY, X86::VFRCZPSrmY, 0 },
+ { X86::VFRCZSDrr, X86::VFRCZSDrm, 0 },
+ { X86::VFRCZSSrr, X86::VFRCZSSrm, 0 },
+ { X86::VPHADDBDrr, X86::VPHADDBDrm, 0 },
+ { X86::VPHADDBQrr, X86::VPHADDBQrm, 0 },
+ { X86::VPHADDBWrr, X86::VPHADDBWrm, 0 },
+ { X86::VPHADDDQrr, X86::VPHADDDQrm, 0 },
+ { X86::VPHADDWDrr, X86::VPHADDWDrm, 0 },
+ { X86::VPHADDWQrr, X86::VPHADDWQrm, 0 },
+ { X86::VPHADDUBDrr, X86::VPHADDUBDrm, 0 },
+ { X86::VPHADDUBQrr, X86::VPHADDUBQrm, 0 },
+ { X86::VPHADDUBWrr, X86::VPHADDUBWrm, 0 },
+ { X86::VPHADDUDQrr, X86::VPHADDUDQrm, 0 },
+ { X86::VPHADDUWDrr, X86::VPHADDUWDrm, 0 },
+ { X86::VPHADDUWQrr, X86::VPHADDUWQrm, 0 },
+ { X86::VPHSUBBWrr, X86::VPHSUBBWrm, 0 },
+ { X86::VPHSUBDQrr, X86::VPHSUBDQrm, 0 },
+ { X86::VPHSUBWDrr, X86::VPHSUBWDrm, 0 },
+ { X86::VPROTBri, X86::VPROTBmi, 0 },
+ { X86::VPROTBrr, X86::VPROTBmr, 0 },
+ { X86::VPROTDri, X86::VPROTDmi, 0 },
+ { X86::VPROTDrr, X86::VPROTDmr, 0 },
+ { X86::VPROTQri, X86::VPROTQmi, 0 },
+ { X86::VPROTQrr, X86::VPROTQmr, 0 },
+ { X86::VPROTWri, X86::VPROTWmi, 0 },
+ { X86::VPROTWrr, X86::VPROTWmr, 0 },
+ { X86::VPSHABrr, X86::VPSHABmr, 0 },
+ { X86::VPSHADrr, X86::VPSHADmr, 0 },
+ { X86::VPSHAQrr, X86::VPSHAQmr, 0 },
+ { X86::VPSHAWrr, X86::VPSHAWmr, 0 },
+ { X86::VPSHLBrr, X86::VPSHLBmr, 0 },
+ { X86::VPSHLDrr, X86::VPSHLDmr, 0 },
+ { X86::VPSHLQrr, X86::VPSHLQmr, 0 },
+ { X86::VPSHLWrr, X86::VPSHLWmr, 0 },
+
+ // LWP foldable instructions
+ { X86::LWPINS32rri, X86::LWPINS32rmi, 0 },
+ { X86::LWPINS64rri, X86::LWPINS64rmi, 0 },
+ { X86::LWPVAL32rri, X86::LWPVAL32rmi, 0 },
+ { X86::LWPVAL64rri, X86::LWPVAL64rmi, 0 },
+
+ // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions
+ { X86::BEXTR32rr, X86::BEXTR32rm, 0 },
+ { X86::BEXTR64rr, X86::BEXTR64rm, 0 },
+ { X86::BEXTRI32ri, X86::BEXTRI32mi, 0 },
+ { X86::BEXTRI64ri, X86::BEXTRI64mi, 0 },
+ { X86::BLCFILL32rr, X86::BLCFILL32rm, 0 },
+ { X86::BLCFILL64rr, X86::BLCFILL64rm, 0 },
+ { X86::BLCI32rr, X86::BLCI32rm, 0 },
+ { X86::BLCI64rr, X86::BLCI64rm, 0 },
+ { X86::BLCIC32rr, X86::BLCIC32rm, 0 },
+ { X86::BLCIC64rr, X86::BLCIC64rm, 0 },
+ { X86::BLCMSK32rr, X86::BLCMSK32rm, 0 },
+ { X86::BLCMSK64rr, X86::BLCMSK64rm, 0 },
+ { X86::BLCS32rr, X86::BLCS32rm, 0 },
+ { X86::BLCS64rr, X86::BLCS64rm, 0 },
+ { X86::BLSFILL32rr, X86::BLSFILL32rm, 0 },
+ { X86::BLSFILL64rr, X86::BLSFILL64rm, 0 },
+ { X86::BLSI32rr, X86::BLSI32rm, 0 },
+ { X86::BLSI64rr, X86::BLSI64rm, 0 },
+ { X86::BLSIC32rr, X86::BLSIC32rm, 0 },
+ { X86::BLSIC64rr, X86::BLSIC64rm, 0 },
+ { X86::BLSMSK32rr, X86::BLSMSK32rm, 0 },
+ { X86::BLSMSK64rr, X86::BLSMSK64rm, 0 },
+ { X86::BLSR32rr, X86::BLSR32rm, 0 },
+ { X86::BLSR64rr, X86::BLSR64rm, 0 },
+ { X86::BZHI32rr, X86::BZHI32rm, 0 },
+ { X86::BZHI64rr, X86::BZHI64rm, 0 },
+ { X86::LZCNT16rr, X86::LZCNT16rm, 0 },
+ { X86::LZCNT32rr, X86::LZCNT32rm, 0 },
+ { X86::LZCNT64rr, X86::LZCNT64rm, 0 },
+ { X86::POPCNT16rr, X86::POPCNT16rm, 0 },
+ { X86::POPCNT32rr, X86::POPCNT32rm, 0 },
+ { X86::POPCNT64rr, X86::POPCNT64rm, 0 },
+ { X86::RORX32ri, X86::RORX32mi, 0 },
+ { X86::RORX64ri, X86::RORX64mi, 0 },
+ { X86::SARX32rr, X86::SARX32rm, 0 },
+ { X86::SARX64rr, X86::SARX64rm, 0 },
+ { X86::SHRX32rr, X86::SHRX32rm, 0 },
+ { X86::SHRX64rr, X86::SHRX64rm, 0 },
+ { X86::SHLX32rr, X86::SHLX32rm, 0 },
+ { X86::SHLX64rr, X86::SHLX64rm, 0 },
+ { X86::T1MSKC32rr, X86::T1MSKC32rm, 0 },
+ { X86::T1MSKC64rr, X86::T1MSKC64rm, 0 },
+ { X86::TZCNT16rr, X86::TZCNT16rm, 0 },
+ { X86::TZCNT32rr, X86::TZCNT32rm, 0 },
+ { X86::TZCNT64rr, X86::TZCNT64rm, 0 },
+ { X86::TZMSK32rr, X86::TZMSK32rm, 0 },
+ { X86::TZMSK64rr, X86::TZMSK64rm, 0 },
+
+ // AVX-512 foldable instructions
+ { X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE },
+ { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
+ { X86::VMOV64toSDZrr, X86::VMOV64toSDZrm, 0 },
+ { X86::VMOVDI2PDIZrr, X86::VMOVDI2PDIZrm, 0 },
+ { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 },
+ { X86::VMOVAPDZrr, X86::VMOVAPDZrm, TB_ALIGN_64 },
+ { X86::VMOVAPSZrr, X86::VMOVAPSZrm, TB_ALIGN_64 },
+ { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 },
+ { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrm, TB_ALIGN_64 },
+ { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zrm, 0 },
+ { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zrm, 0 },
+ { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zrm, 0 },
+ { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zrm, 0 },
+ { X86::VMOVUPDZrr, X86::VMOVUPDZrm, 0 },
+ { X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 },
+ { X86::VMOVZPQILo2PQIZrr,X86::VMOVQI2PQIZrm, TB_NO_REVERSE },
+ { X86::VPABSBZrr, X86::VPABSBZrm, 0 },
+ { X86::VPABSDZrr, X86::VPABSDZrm, 0 },
+ { X86::VPABSQZrr, X86::VPABSQZrm, 0 },
+ { X86::VPABSWZrr, X86::VPABSWZrm, 0 },
+ { X86::VPERMILPDZri, X86::VPERMILPDZmi, 0 },
+ { X86::VPERMILPSZri, X86::VPERMILPSZmi, 0 },
+ { X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
+ { X86::VPERMQZri, X86::VPERMQZmi, 0 },
+ { X86::VPMOVSXBDZrr, X86::VPMOVSXBDZrm, 0 },
+ { X86::VPMOVSXBQZrr, X86::VPMOVSXBQZrm, TB_NO_REVERSE },
+ { X86::VPMOVSXBWZrr, X86::VPMOVSXBWZrm, 0 },
+ { X86::VPMOVSXDQZrr, X86::VPMOVSXDQZrm, 0 },
+ { X86::VPMOVSXWDZrr, X86::VPMOVSXWDZrm, 0 },
+ { X86::VPMOVSXWQZrr, X86::VPMOVSXWQZrm, 0 },
+ { X86::VPMOVZXBDZrr, X86::VPMOVZXBDZrm, 0 },
+ { X86::VPMOVZXBQZrr, X86::VPMOVZXBQZrm, TB_NO_REVERSE },
+ { X86::VPMOVZXBWZrr, X86::VPMOVZXBWZrm, 0 },
+ { X86::VPMOVZXDQZrr, X86::VPMOVZXDQZrm, 0 },
+ { X86::VPMOVZXWDZrr, X86::VPMOVZXWDZrm, 0 },
+ { X86::VPMOVZXWQZrr, X86::VPMOVZXWQZrm, 0 },
+ { X86::VPOPCNTDZrr, X86::VPOPCNTDZrm, 0 },
+ { X86::VPOPCNTQZrr, X86::VPOPCNTQZrm, 0 },
+ { X86::VPSHUFDZri, X86::VPSHUFDZmi, 0 },
+ { X86::VPSHUFHWZri, X86::VPSHUFHWZmi, 0 },
+ { X86::VPSHUFLWZri, X86::VPSHUFLWZmi, 0 },
+ { X86::VPSLLDQZ512rr, X86::VPSLLDQZ512rm, 0 },
+ { X86::VPSLLDZri, X86::VPSLLDZmi, 0 },
+ { X86::VPSLLQZri, X86::VPSLLQZmi, 0 },
+ { X86::VPSLLWZri, X86::VPSLLWZmi, 0 },
+ { X86::VPSRADZri, X86::VPSRADZmi, 0 },
+ { X86::VPSRAQZri, X86::VPSRAQZmi, 0 },
+ { X86::VPSRAWZri, X86::VPSRAWZmi, 0 },
+ { X86::VPSRLDQZ512rr, X86::VPSRLDQZ512rm, 0 },
+ { X86::VPSRLDZri, X86::VPSRLDZmi, 0 },
+ { X86::VPSRLQZri, X86::VPSRLQZmi, 0 },
+ { X86::VPSRLWZri, X86::VPSRLWZmi, 0 },
+
+ // AVX-512 foldable instructions (256-bit versions)
+ { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE },
+ { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 },
+ { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 },
+ { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 },
+ { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256rm, TB_ALIGN_32 },
+ { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256rm, 0 },
+ { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256rm, 0 },
+ { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256rm, 0 },
+ { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 },
+ { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 },
+ { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 },
+ { X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0 },
+ { X86::VPABSDZ256rr, X86::VPABSDZ256rm, 0 },
+ { X86::VPABSQZ256rr, X86::VPABSQZ256rm, 0 },
+ { X86::VPABSWZ256rr, X86::VPABSWZ256rm, 0 },
+ { X86::VPERMILPDZ256ri, X86::VPERMILPDZ256mi, 0 },
+ { X86::VPERMILPSZ256ri, X86::VPERMILPSZ256mi, 0 },
+ { X86::VPERMPDZ256ri, X86::VPERMPDZ256mi, 0 },
+ { X86::VPERMQZ256ri, X86::VPERMQZ256mi, 0 },
+ { X86::VPMOVSXBDZ256rr, X86::VPMOVSXBDZ256rm, TB_NO_REVERSE },
+ { X86::VPMOVSXBQZ256rr, X86::VPMOVSXBQZ256rm, TB_NO_REVERSE },
+ { X86::VPMOVSXBWZ256rr, X86::VPMOVSXBWZ256rm, 0 },
+ { X86::VPMOVSXDQZ256rr, X86::VPMOVSXDQZ256rm, 0 },
+ { X86::VPMOVSXWDZ256rr, X86::VPMOVSXWDZ256rm, 0 },
+ { X86::VPMOVSXWQZ256rr, X86::VPMOVSXWQZ256rm, TB_NO_REVERSE },
+ { X86::VPMOVZXBDZ256rr, X86::VPMOVZXBDZ256rm, TB_NO_REVERSE },
+ { X86::VPMOVZXBQZ256rr, X86::VPMOVZXBQZ256rm, TB_NO_REVERSE },
+ { X86::VPMOVZXBWZ256rr, X86::VPMOVZXBWZ256rm, 0 },
+ { X86::VPMOVZXDQZ256rr, X86::VPMOVZXDQZ256rm, 0 },
+ { X86::VPMOVZXWDZ256rr, X86::VPMOVZXWDZ256rm, 0 },
+ { X86::VPMOVZXWQZ256rr, X86::VPMOVZXWQZ256rm, TB_NO_REVERSE },
+ { X86::VPSHUFDZ256ri, X86::VPSHUFDZ256mi, 0 },
+ { X86::VPSHUFHWZ256ri, X86::VPSHUFHWZ256mi, 0 },
+ { X86::VPSHUFLWZ256ri, X86::VPSHUFLWZ256mi, 0 },
+ { X86::VPSLLDQZ256rr, X86::VPSLLDQZ256rm, 0 },
+ { X86::VPSLLDZ256ri, X86::VPSLLDZ256mi, 0 },
+ { X86::VPSLLQZ256ri, X86::VPSLLQZ256mi, 0 },
+ { X86::VPSLLWZ256ri, X86::VPSLLWZ256mi, 0 },
+ { X86::VPSRADZ256ri, X86::VPSRADZ256mi, 0 },
+ { X86::VPSRAQZ256ri, X86::VPSRAQZ256mi, 0 },
+ { X86::VPSRAWZ256ri, X86::VPSRAWZ256mi, 0 },
+ { X86::VPSRLDQZ256rr, X86::VPSRLDQZ256rm, 0 },
+ { X86::VPSRLDZ256ri, X86::VPSRLDZ256mi, 0 },
+ { X86::VPSRLQZ256ri, X86::VPSRLQZ256mi, 0 },
+ { X86::VPSRLWZ256ri, X86::VPSRLWZ256mi, 0 },
+
+ // AVX-512 foldable instructions (128-bit versions)
+ { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE },
+ { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 },
+ { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 },
+ { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 },
+ { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128rm, TB_ALIGN_16 },
+ { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128rm, 0 },
+ { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128rm, 0 },
+ { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128rm, 0 },
+ { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 },
+ { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 },
+ { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 },
+ { X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0 },
+ { X86::VPABSDZ128rr, X86::VPABSDZ128rm, 0 },
+ { X86::VPABSQZ128rr, X86::VPABSQZ128rm, 0 },
+ { X86::VPABSWZ128rr, X86::VPABSWZ128rm, 0 },
+ { X86::VPERMILPDZ128ri, X86::VPERMILPDZ128mi, 0 },
+ { X86::VPERMILPSZ128ri, X86::VPERMILPSZ128mi, 0 },
+ { X86::VPMOVSXBDZ128rr, X86::VPMOVSXBDZ128rm, TB_NO_REVERSE },
+ { X86::VPMOVSXBQZ128rr, X86::VPMOVSXBQZ128rm, TB_NO_REVERSE },
+ { X86::VPMOVSXBWZ128rr, X86::VPMOVSXBWZ128rm, TB_NO_REVERSE },
+ { X86::VPMOVSXDQZ128rr, X86::VPMOVSXDQZ128rm, TB_NO_REVERSE },
+ { X86::VPMOVSXWDZ128rr, X86::VPMOVSXWDZ128rm, TB_NO_REVERSE },
+ { X86::VPMOVSXWQZ128rr, X86::VPMOVSXWQZ128rm, TB_NO_REVERSE },
+ { X86::VPMOVZXBDZ128rr, X86::VPMOVZXBDZ128rm, TB_NO_REVERSE },
+ { X86::VPMOVZXBQZ128rr, X86::VPMOVZXBQZ128rm, TB_NO_REVERSE },
+ { X86::VPMOVZXBWZ128rr, X86::VPMOVZXBWZ128rm, TB_NO_REVERSE },
+ { X86::VPMOVZXDQZ128rr, X86::VPMOVZXDQZ128rm, TB_NO_REVERSE },
+ { X86::VPMOVZXWDZ128rr, X86::VPMOVZXWDZ128rm, TB_NO_REVERSE },
+ { X86::VPMOVZXWQZ128rr, X86::VPMOVZXWQZ128rm, TB_NO_REVERSE },
+ { X86::VPSHUFDZ128ri, X86::VPSHUFDZ128mi, 0 },
+ { X86::VPSHUFHWZ128ri, X86::VPSHUFHWZ128mi, 0 },
+ { X86::VPSHUFLWZ128ri, X86::VPSHUFLWZ128mi, 0 },
+ { X86::VPSLLDQZ128rr, X86::VPSLLDQZ128rm, 0 },
+ { X86::VPSLLDZ128ri, X86::VPSLLDZ128mi, 0 },
+ { X86::VPSLLQZ128ri, X86::VPSLLQZ128mi, 0 },
+ { X86::VPSLLWZ128ri, X86::VPSLLWZ128mi, 0 },
+ { X86::VPSRADZ128ri, X86::VPSRADZ128mi, 0 },
+ { X86::VPSRAQZ128ri, X86::VPSRAQZ128mi, 0 },
+ { X86::VPSRAWZ128ri, X86::VPSRAWZ128mi, 0 },
+ { X86::VPSRLDQZ128rr, X86::VPSRLDQZ128rm, 0 },
+ { X86::VPSRLDZ128ri, X86::VPSRLDZ128mi, 0 },
+ { X86::VPSRLQZ128ri, X86::VPSRLQZ128mi, 0 },
+ { X86::VPSRLWZ128ri, X86::VPSRLWZ128mi, 0 },
+
+ // F16C foldable instructions
+ { X86::VCVTPH2PSrr, X86::VCVTPH2PSrm, 0 },
+ { X86::VCVTPH2PSYrr, X86::VCVTPH2PSYrm, 0 },
+
+ // AES foldable instructions
+ { X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 },
+ { X86::AESKEYGENASSIST128rr, X86::AESKEYGENASSIST128rm, TB_ALIGN_16 },
+ { X86::VAESIMCrr, X86::VAESIMCrm, 0 },
+ { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, 0 }
+ };
+
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable1) {
AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
Entry.RegOp, Entry.MemOp,
@@ -143,6 +1042,1396 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
Entry.Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
}
+ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
+ { X86::ADC32rr, X86::ADC32rm, 0 },
+ { X86::ADC64rr, X86::ADC64rm, 0 },
+ { X86::ADD16rr, X86::ADD16rm, 0 },
+ { X86::ADD16rr_DB, X86::ADD16rm, TB_NO_REVERSE },
+ { X86::ADD32rr, X86::ADD32rm, 0 },
+ { X86::ADD32rr_DB, X86::ADD32rm, TB_NO_REVERSE },
+ { X86::ADD64rr, X86::ADD64rm, 0 },
+ { X86::ADD64rr_DB, X86::ADD64rm, TB_NO_REVERSE },
+ { X86::ADD8rr, X86::ADD8rm, 0 },
+ { X86::ADDPDrr, X86::ADDPDrm, TB_ALIGN_16 },
+ { X86::ADDPSrr, X86::ADDPSrm, TB_ALIGN_16 },
+ { X86::ADDSDrr, X86::ADDSDrm, 0 },
+ { X86::ADDSDrr_Int, X86::ADDSDrm_Int, TB_NO_REVERSE },
+ { X86::ADDSSrr, X86::ADDSSrm, 0 },
+ { X86::ADDSSrr_Int, X86::ADDSSrm_Int, TB_NO_REVERSE },
+ { X86::ADDSUBPDrr, X86::ADDSUBPDrm, TB_ALIGN_16 },
+ { X86::ADDSUBPSrr, X86::ADDSUBPSrm, TB_ALIGN_16 },
+ { X86::AND16rr, X86::AND16rm, 0 },
+ { X86::AND32rr, X86::AND32rm, 0 },
+ { X86::AND64rr, X86::AND64rm, 0 },
+ { X86::AND8rr, X86::AND8rm, 0 },
+ { X86::ANDNPDrr, X86::ANDNPDrm, TB_ALIGN_16 },
+ { X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16 },
+ { X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16 },
+ { X86::ANDPSrr, X86::ANDPSrm, TB_ALIGN_16 },
+ { X86::BLENDPDrri, X86::BLENDPDrmi, TB_ALIGN_16 },
+ { X86::BLENDPSrri, X86::BLENDPSrmi, TB_ALIGN_16 },
+ { X86::BLENDVPDrr0, X86::BLENDVPDrm0, TB_ALIGN_16 },
+ { X86::BLENDVPSrr0, X86::BLENDVPSrm0, TB_ALIGN_16 },
+ { X86::CMOVA16rr, X86::CMOVA16rm, 0 },
+ { X86::CMOVA32rr, X86::CMOVA32rm, 0 },
+ { X86::CMOVA64rr, X86::CMOVA64rm, 0 },
+ { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 },
+ { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 },
+ { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 },
+ { X86::CMOVB16rr, X86::CMOVB16rm, 0 },
+ { X86::CMOVB32rr, X86::CMOVB32rm, 0 },
+ { X86::CMOVB64rr, X86::CMOVB64rm, 0 },
+ { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 },
+ { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 },
+ { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 },
+ { X86::CMOVE16rr, X86::CMOVE16rm, 0 },
+ { X86::CMOVE32rr, X86::CMOVE32rm, 0 },
+ { X86::CMOVE64rr, X86::CMOVE64rm, 0 },
+ { X86::CMOVG16rr, X86::CMOVG16rm, 0 },
+ { X86::CMOVG32rr, X86::CMOVG32rm, 0 },
+ { X86::CMOVG64rr, X86::CMOVG64rm, 0 },
+ { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 },
+ { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 },
+ { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 },
+ { X86::CMOVL16rr, X86::CMOVL16rm, 0 },
+ { X86::CMOVL32rr, X86::CMOVL32rm, 0 },
+ { X86::CMOVL64rr, X86::CMOVL64rm, 0 },
+ { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 },
+ { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 },
+ { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 },
+ { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 },
+ { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 },
+ { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 },
+ { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 },
+ { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 },
+ { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 },
+ { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 },
+ { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 },
+ { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 },
+ { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 },
+ { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 },
+ { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 },
+ { X86::CMOVO16rr, X86::CMOVO16rm, 0 },
+ { X86::CMOVO32rr, X86::CMOVO32rm, 0 },
+ { X86::CMOVO64rr, X86::CMOVO64rm, 0 },
+ { X86::CMOVP16rr, X86::CMOVP16rm, 0 },
+ { X86::CMOVP32rr, X86::CMOVP32rm, 0 },
+ { X86::CMOVP64rr, X86::CMOVP64rm, 0 },
+ { X86::CMOVS16rr, X86::CMOVS16rm, 0 },
+ { X86::CMOVS32rr, X86::CMOVS32rm, 0 },
+ { X86::CMOVS64rr, X86::CMOVS64rm, 0 },
+ { X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 },
+ { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 },
+ { X86::CMPSDrr, X86::CMPSDrm, 0 },
+ { X86::CMPSSrr, X86::CMPSSrm, 0 },
+ { X86::CRC32r32r32, X86::CRC32r32m32, 0 },
+ { X86::CRC32r64r64, X86::CRC32r64m64, 0 },
+ { X86::DIVPDrr, X86::DIVPDrm, TB_ALIGN_16 },
+ { X86::DIVPSrr, X86::DIVPSrm, TB_ALIGN_16 },
+ { X86::DIVSDrr, X86::DIVSDrm, 0 },
+ { X86::DIVSDrr_Int, X86::DIVSDrm_Int, TB_NO_REVERSE },
+ { X86::DIVSSrr, X86::DIVSSrm, 0 },
+ { X86::DIVSSrr_Int, X86::DIVSSrm_Int, TB_NO_REVERSE },
+ { X86::DPPDrri, X86::DPPDrmi, TB_ALIGN_16 },
+ { X86::DPPSrri, X86::DPPSrmi, TB_ALIGN_16 },
+ { X86::HADDPDrr, X86::HADDPDrm, TB_ALIGN_16 },
+ { X86::HADDPSrr, X86::HADDPSrm, TB_ALIGN_16 },
+ { X86::HSUBPDrr, X86::HSUBPDrm, TB_ALIGN_16 },
+ { X86::HSUBPSrr, X86::HSUBPSrm, TB_ALIGN_16 },
+ { X86::IMUL16rr, X86::IMUL16rm, 0 },
+ { X86::IMUL32rr, X86::IMUL32rm, 0 },
+ { X86::IMUL64rr, X86::IMUL64rm, 0 },
+ { X86::Int_CMPSDrr, X86::Int_CMPSDrm, TB_NO_REVERSE },
+ { X86::Int_CMPSSrr, X86::Int_CMPSSrm, TB_NO_REVERSE },
+ { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, TB_NO_REVERSE },
+ { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
+ { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
+ { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
+ { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
+ { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, TB_NO_REVERSE },
+ { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 },
+ { X86::MAXCPDrr, X86::MAXCPDrm, TB_ALIGN_16 },
+ { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 },
+ { X86::MAXCPSrr, X86::MAXCPSrm, TB_ALIGN_16 },
+ { X86::MAXSDrr, X86::MAXSDrm, 0 },
+ { X86::MAXCSDrr, X86::MAXCSDrm, 0 },
+ { X86::MAXSDrr_Int, X86::MAXSDrm_Int, TB_NO_REVERSE },
+ { X86::MAXSSrr, X86::MAXSSrm, 0 },
+ { X86::MAXCSSrr, X86::MAXCSSrm, 0 },
+ { X86::MAXSSrr_Int, X86::MAXSSrm_Int, TB_NO_REVERSE },
+ { X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 },
+ { X86::MINCPDrr, X86::MINCPDrm, TB_ALIGN_16 },
+ { X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 },
+ { X86::MINCPSrr, X86::MINCPSrm, TB_ALIGN_16 },
+ { X86::MINSDrr, X86::MINSDrm, 0 },
+ { X86::MINCSDrr, X86::MINCSDrm, 0 },
+ { X86::MINSDrr_Int, X86::MINSDrm_Int, TB_NO_REVERSE },
+ { X86::MINSSrr, X86::MINSSrm, 0 },
+ { X86::MINCSSrr, X86::MINCSSrm, 0 },
+ { X86::MINSSrr_Int, X86::MINSSrm_Int, TB_NO_REVERSE },
+ { X86::MOVLHPSrr, X86::MOVHPSrm, TB_NO_REVERSE },
+ { X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 },
+ { X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 },
+ { X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 },
+ { X86::MULSDrr, X86::MULSDrm, 0 },
+ { X86::MULSDrr_Int, X86::MULSDrm_Int, TB_NO_REVERSE },
+ { X86::MULSSrr, X86::MULSSrm, 0 },
+ { X86::MULSSrr_Int, X86::MULSSrm_Int, TB_NO_REVERSE },
+ { X86::OR16rr, X86::OR16rm, 0 },
+ { X86::OR32rr, X86::OR32rm, 0 },
+ { X86::OR64rr, X86::OR64rm, 0 },
+ { X86::OR8rr, X86::OR8rm, 0 },
+ { X86::ORPDrr, X86::ORPDrm, TB_ALIGN_16 },
+ { X86::ORPSrr, X86::ORPSrm, TB_ALIGN_16 },
+ { X86::PACKSSDWrr, X86::PACKSSDWrm, TB_ALIGN_16 },
+ { X86::PACKSSWBrr, X86::PACKSSWBrm, TB_ALIGN_16 },
+ { X86::PACKUSDWrr, X86::PACKUSDWrm, TB_ALIGN_16 },
+ { X86::PACKUSWBrr, X86::PACKUSWBrm, TB_ALIGN_16 },
+ { X86::PADDBrr, X86::PADDBrm, TB_ALIGN_16 },
+ { X86::PADDDrr, X86::PADDDrm, TB_ALIGN_16 },
+ { X86::PADDQrr, X86::PADDQrm, TB_ALIGN_16 },
+ { X86::PADDSBrr, X86::PADDSBrm, TB_ALIGN_16 },
+ { X86::PADDSWrr, X86::PADDSWrm, TB_ALIGN_16 },
+ { X86::PADDUSBrr, X86::PADDUSBrm, TB_ALIGN_16 },
+ { X86::PADDUSWrr, X86::PADDUSWrm, TB_ALIGN_16 },
+ { X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 },
+ { X86::PALIGNRrri, X86::PALIGNRrmi, TB_ALIGN_16 },
+ { X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 },
+ { X86::PANDrr, X86::PANDrm, TB_ALIGN_16 },
+ { X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 },
+ { X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 },
+ { X86::PBLENDVBrr0, X86::PBLENDVBrm0, TB_ALIGN_16 },
+ { X86::PBLENDWrri, X86::PBLENDWrmi, TB_ALIGN_16 },
+ { X86::PCLMULQDQrr, X86::PCLMULQDQrm, TB_ALIGN_16 },
+ { X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 },
+ { X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 },
+ { X86::PCMPEQQrr, X86::PCMPEQQrm, TB_ALIGN_16 },
+ { X86::PCMPEQWrr, X86::PCMPEQWrm, TB_ALIGN_16 },
+ { X86::PCMPGTBrr, X86::PCMPGTBrm, TB_ALIGN_16 },
+ { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 },
+ { X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 },
+ { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 },
+ { X86::PHADDDrr, X86::PHADDDrm, TB_ALIGN_16 },
+ { X86::PHADDWrr, X86::PHADDWrm, TB_ALIGN_16 },
+ { X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 },
+ { X86::PHSUBDrr, X86::PHSUBDrm, TB_ALIGN_16 },
+ { X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 },
+ { X86::PHSUBWrr, X86::PHSUBWrm, TB_ALIGN_16 },
+ { X86::PINSRBrr, X86::PINSRBrm, 0 },
+ { X86::PINSRDrr, X86::PINSRDrm, 0 },
+ { X86::PINSRQrr, X86::PINSRQrm, 0 },
+ { X86::PINSRWrri, X86::PINSRWrmi, 0 },
+ { X86::PMADDUBSWrr, X86::PMADDUBSWrm, TB_ALIGN_16 },
+ { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 },
+ { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 },
+ { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 },
+ { X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 },
+ { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 },
+ { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 },
+ { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 },
+ { X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 },
+ { X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 },
+ { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 },
+ { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 },
+ { X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 },
+ { X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 },
+ { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 },
+ { X86::PMULHRSWrr, X86::PMULHRSWrm, TB_ALIGN_16 },
+ { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 },
+ { X86::PMULHWrr, X86::PMULHWrm, TB_ALIGN_16 },
+ { X86::PMULLDrr, X86::PMULLDrm, TB_ALIGN_16 },
+ { X86::PMULLWrr, X86::PMULLWrm, TB_ALIGN_16 },
+ { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 },
+ { X86::PORrr, X86::PORrm, TB_ALIGN_16 },
+ { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 },
+ { X86::PSHUFBrr, X86::PSHUFBrm, TB_ALIGN_16 },
+ { X86::PSIGNBrr128, X86::PSIGNBrm128, TB_ALIGN_16 },
+ { X86::PSIGNWrr128, X86::PSIGNWrm128, TB_ALIGN_16 },
+ { X86::PSIGNDrr128, X86::PSIGNDrm128, TB_ALIGN_16 },
+ { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 },
+ { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 },
+ { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 },
+ { X86::PSRADrr, X86::PSRADrm, TB_ALIGN_16 },
+ { X86::PSRAWrr, X86::PSRAWrm, TB_ALIGN_16 },
+ { X86::PSRLDrr, X86::PSRLDrm, TB_ALIGN_16 },
+ { X86::PSRLQrr, X86::PSRLQrm, TB_ALIGN_16 },
+ { X86::PSRLWrr, X86::PSRLWrm, TB_ALIGN_16 },
+ { X86::PSUBBrr, X86::PSUBBrm, TB_ALIGN_16 },
+ { X86::PSUBDrr, X86::PSUBDrm, TB_ALIGN_16 },
+ { X86::PSUBQrr, X86::PSUBQrm, TB_ALIGN_16 },
+ { X86::PSUBSBrr, X86::PSUBSBrm, TB_ALIGN_16 },
+ { X86::PSUBSWrr, X86::PSUBSWrm, TB_ALIGN_16 },
+ { X86::PSUBUSBrr, X86::PSUBUSBrm, TB_ALIGN_16 },
+ { X86::PSUBUSWrr, X86::PSUBUSWrm, TB_ALIGN_16 },
+ { X86::PSUBWrr, X86::PSUBWrm, TB_ALIGN_16 },
+ { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, TB_ALIGN_16 },
+ { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, TB_ALIGN_16 },
+ { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, TB_ALIGN_16 },
+ { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 },
+ { X86::PXORrr, X86::PXORrm, TB_ALIGN_16 },
+ { X86::ROUNDSDr_Int, X86::ROUNDSDm_Int, TB_NO_REVERSE },
+ { X86::ROUNDSSr_Int, X86::ROUNDSSm_Int, TB_NO_REVERSE },
+ { X86::SBB32rr, X86::SBB32rm, 0 },
+ { X86::SBB64rr, X86::SBB64rm, 0 },
+ { X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 },
+ { X86::SHUFPSrri, X86::SHUFPSrmi, TB_ALIGN_16 },
+ { X86::SUB16rr, X86::SUB16rm, 0 },
+ { X86::SUB32rr, X86::SUB32rm, 0 },
+ { X86::SUB64rr, X86::SUB64rm, 0 },
+ { X86::SUB8rr, X86::SUB8rm, 0 },
+ { X86::SUBPDrr, X86::SUBPDrm, TB_ALIGN_16 },
+ { X86::SUBPSrr, X86::SUBPSrm, TB_ALIGN_16 },
+ { X86::SUBSDrr, X86::SUBSDrm, 0 },
+ { X86::SUBSDrr_Int, X86::SUBSDrm_Int, TB_NO_REVERSE },
+ { X86::SUBSSrr, X86::SUBSSrm, 0 },
+ { X86::SUBSSrr_Int, X86::SUBSSrm_Int, TB_NO_REVERSE },
+ // FIXME: TEST*rr -> swapped operand of TEST*mr.
+ { X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 },
+ { X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 },
+ { X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 },
+ { X86::UNPCKLPSrr, X86::UNPCKLPSrm, TB_ALIGN_16 },
+ { X86::XOR16rr, X86::XOR16rm, 0 },
+ { X86::XOR32rr, X86::XOR32rm, 0 },
+ { X86::XOR64rr, X86::XOR64rm, 0 },
+ { X86::XOR8rr, X86::XOR8rm, 0 },
+ { X86::XORPDrr, X86::XORPDrm, TB_ALIGN_16 },
+ { X86::XORPSrr, X86::XORPSrm, TB_ALIGN_16 },
+
+ // MMX version of foldable instructions
+ { X86::MMX_CVTPI2PSirr, X86::MMX_CVTPI2PSirm, 0 },
+ { X86::MMX_PACKSSDWirr, X86::MMX_PACKSSDWirm, 0 },
+ { X86::MMX_PACKSSWBirr, X86::MMX_PACKSSWBirm, 0 },
+ { X86::MMX_PACKUSWBirr, X86::MMX_PACKUSWBirm, 0 },
+ { X86::MMX_PADDBirr, X86::MMX_PADDBirm, 0 },
+ { X86::MMX_PADDDirr, X86::MMX_PADDDirm, 0 },
+ { X86::MMX_PADDQirr, X86::MMX_PADDQirm, 0 },
+ { X86::MMX_PADDSBirr, X86::MMX_PADDSBirm, 0 },
+ { X86::MMX_PADDSWirr, X86::MMX_PADDSWirm, 0 },
+ { X86::MMX_PADDUSBirr, X86::MMX_PADDUSBirm, 0 },
+ { X86::MMX_PADDUSWirr, X86::MMX_PADDUSWirm, 0 },
+ { X86::MMX_PADDWirr, X86::MMX_PADDWirm, 0 },
+ { X86::MMX_PALIGNR64irr, X86::MMX_PALIGNR64irm, 0 },
+ { X86::MMX_PANDNirr, X86::MMX_PANDNirm, 0 },
+ { X86::MMX_PANDirr, X86::MMX_PANDirm, 0 },
+ { X86::MMX_PAVGBirr, X86::MMX_PAVGBirm, 0 },
+ { X86::MMX_PAVGWirr, X86::MMX_PAVGWirm, 0 },
+ { X86::MMX_PCMPEQBirr, X86::MMX_PCMPEQBirm, 0 },
+ { X86::MMX_PCMPEQDirr, X86::MMX_PCMPEQDirm, 0 },
+ { X86::MMX_PCMPEQWirr, X86::MMX_PCMPEQWirm, 0 },
+ { X86::MMX_PCMPGTBirr, X86::MMX_PCMPGTBirm, 0 },
+ { X86::MMX_PCMPGTDirr, X86::MMX_PCMPGTDirm, 0 },
+ { X86::MMX_PCMPGTWirr, X86::MMX_PCMPGTWirm, 0 },
+ { X86::MMX_PHADDSWrr64, X86::MMX_PHADDSWrm64, 0 },
+ { X86::MMX_PHADDWrr64, X86::MMX_PHADDWrm64, 0 },
+ { X86::MMX_PHADDrr64, X86::MMX_PHADDrm64, 0 },
+ { X86::MMX_PHSUBDrr64, X86::MMX_PHSUBDrm64, 0 },
+ { X86::MMX_PHSUBSWrr64, X86::MMX_PHSUBSWrm64, 0 },
+ { X86::MMX_PHSUBWrr64, X86::MMX_PHSUBWrm64, 0 },
+ { X86::MMX_PINSRWirri, X86::MMX_PINSRWirmi, 0 },
+ { X86::MMX_PMADDUBSWrr64, X86::MMX_PMADDUBSWrm64, 0 },
+ { X86::MMX_PMADDWDirr, X86::MMX_PMADDWDirm, 0 },
+ { X86::MMX_PMAXSWirr, X86::MMX_PMAXSWirm, 0 },
+ { X86::MMX_PMAXUBirr, X86::MMX_PMAXUBirm, 0 },
+ { X86::MMX_PMINSWirr, X86::MMX_PMINSWirm, 0 },
+ { X86::MMX_PMINUBirr, X86::MMX_PMINUBirm, 0 },
+ { X86::MMX_PMULHRSWrr64, X86::MMX_PMULHRSWrm64, 0 },
+ { X86::MMX_PMULHUWirr, X86::MMX_PMULHUWirm, 0 },
+ { X86::MMX_PMULHWirr, X86::MMX_PMULHWirm, 0 },
+ { X86::MMX_PMULLWirr, X86::MMX_PMULLWirm, 0 },
+ { X86::MMX_PMULUDQirr, X86::MMX_PMULUDQirm, 0 },
+ { X86::MMX_PORirr, X86::MMX_PORirm, 0 },
+ { X86::MMX_PSADBWirr, X86::MMX_PSADBWirm, 0 },
+ { X86::MMX_PSHUFBrr64, X86::MMX_PSHUFBrm64, 0 },
+ { X86::MMX_PSIGNBrr64, X86::MMX_PSIGNBrm64, 0 },
+ { X86::MMX_PSIGNDrr64, X86::MMX_PSIGNDrm64, 0 },
+ { X86::MMX_PSIGNWrr64, X86::MMX_PSIGNWrm64, 0 },
+ { X86::MMX_PSLLDrr, X86::MMX_PSLLDrm, 0 },
+ { X86::MMX_PSLLQrr, X86::MMX_PSLLQrm, 0 },
+ { X86::MMX_PSLLWrr, X86::MMX_PSLLWrm, 0 },
+ { X86::MMX_PSRADrr, X86::MMX_PSRADrm, 0 },
+ { X86::MMX_PSRAWrr, X86::MMX_PSRAWrm, 0 },
+ { X86::MMX_PSRLDrr, X86::MMX_PSRLDrm, 0 },
+ { X86::MMX_PSRLQrr, X86::MMX_PSRLQrm, 0 },
+ { X86::MMX_PSRLWrr, X86::MMX_PSRLWrm, 0 },
+ { X86::MMX_PSUBBirr, X86::MMX_PSUBBirm, 0 },
+ { X86::MMX_PSUBDirr, X86::MMX_PSUBDirm, 0 },
+ { X86::MMX_PSUBQirr, X86::MMX_PSUBQirm, 0 },
+ { X86::MMX_PSUBSBirr, X86::MMX_PSUBSBirm, 0 },
+ { X86::MMX_PSUBSWirr, X86::MMX_PSUBSWirm, 0 },
+ { X86::MMX_PSUBUSBirr, X86::MMX_PSUBUSBirm, 0 },
+ { X86::MMX_PSUBUSWirr, X86::MMX_PSUBUSWirm, 0 },
+ { X86::MMX_PSUBWirr, X86::MMX_PSUBWirm, 0 },
+ { X86::MMX_PUNPCKHBWirr, X86::MMX_PUNPCKHBWirm, 0 },
+ { X86::MMX_PUNPCKHDQirr, X86::MMX_PUNPCKHDQirm, 0 },
+ { X86::MMX_PUNPCKHWDirr, X86::MMX_PUNPCKHWDirm, 0 },
+ { X86::MMX_PUNPCKLBWirr, X86::MMX_PUNPCKLBWirm, 0 },
+ { X86::MMX_PUNPCKLDQirr, X86::MMX_PUNPCKLDQirm, 0 },
+ { X86::MMX_PUNPCKLWDirr, X86::MMX_PUNPCKLWDirm, 0 },
+ { X86::MMX_PXORirr, X86::MMX_PXORirm, 0 },
+
+ // 3DNow! version of foldable instructions
+ { X86::PAVGUSBrr, X86::PAVGUSBrm, 0 },
+ { X86::PFACCrr, X86::PFACCrm, 0 },
+ { X86::PFADDrr, X86::PFADDrm, 0 },
+ { X86::PFCMPEQrr, X86::PFCMPEQrm, 0 },
+ { X86::PFCMPGErr, X86::PFCMPGErm, 0 },
+ { X86::PFCMPGTrr, X86::PFCMPGTrm, 0 },
+ { X86::PFMAXrr, X86::PFMAXrm, 0 },
+ { X86::PFMINrr, X86::PFMINrm, 0 },
+ { X86::PFMULrr, X86::PFMULrm, 0 },
+ { X86::PFNACCrr, X86::PFNACCrm, 0 },
+ { X86::PFPNACCrr, X86::PFPNACCrm, 0 },
+ { X86::PFRCPIT1rr, X86::PFRCPIT1rm, 0 },
+ { X86::PFRCPIT2rr, X86::PFRCPIT2rm, 0 },
+ { X86::PFRSQIT1rr, X86::PFRSQIT1rm, 0 },
+ { X86::PFSUBrr, X86::PFSUBrm, 0 },
+ { X86::PFSUBRrr, X86::PFSUBRrm, 0 },
+ { X86::PMULHRWrr, X86::PMULHRWrm, 0 },
+
+ // AVX 128-bit versions of foldable instructions
+ { X86::VCVTSI2SD64rr, X86::VCVTSI2SD64rm, 0 },
+ { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm, 0 },
+ { X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 },
+ { X86::Int_VCVTSI2SDrr, X86::Int_VCVTSI2SDrm, 0 },
+ { X86::VCVTSI2SS64rr, X86::VCVTSI2SS64rm, 0 },
+ { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 },
+ { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 },
+ { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
+ { X86::VADDPDrr, X86::VADDPDrm, 0 },
+ { X86::VADDPSrr, X86::VADDPSrm, 0 },
+ { X86::VADDSDrr, X86::VADDSDrm, 0 },
+ { X86::VADDSDrr_Int, X86::VADDSDrm_Int, TB_NO_REVERSE },
+ { X86::VADDSSrr, X86::VADDSSrm, 0 },
+ { X86::VADDSSrr_Int, X86::VADDSSrm_Int, TB_NO_REVERSE },
+ { X86::VADDSUBPDrr, X86::VADDSUBPDrm, 0 },
+ { X86::VADDSUBPSrr, X86::VADDSUBPSrm, 0 },
+ { X86::VANDNPDrr, X86::VANDNPDrm, 0 },
+ { X86::VANDNPSrr, X86::VANDNPSrm, 0 },
+ { X86::VANDPDrr, X86::VANDPDrm, 0 },
+ { X86::VANDPSrr, X86::VANDPSrm, 0 },
+ { X86::VBLENDPDrri, X86::VBLENDPDrmi, 0 },
+ { X86::VBLENDPSrri, X86::VBLENDPSrmi, 0 },
+ { X86::VBLENDVPDrr, X86::VBLENDVPDrm, 0 },
+ { X86::VBLENDVPSrr, X86::VBLENDVPSrm, 0 },
+ { X86::VCMPPDrri, X86::VCMPPDrmi, 0 },
+ { X86::VCMPPSrri, X86::VCMPPSrmi, 0 },
+ { X86::VCMPSDrr, X86::VCMPSDrm, 0 },
+ { X86::VCMPSSrr, X86::VCMPSSrm, 0 },
+ { X86::VDIVPDrr, X86::VDIVPDrm, 0 },
+ { X86::VDIVPSrr, X86::VDIVPSrm, 0 },
+ { X86::VDIVSDrr, X86::VDIVSDrm, 0 },
+ { X86::VDIVSDrr_Int, X86::VDIVSDrm_Int, TB_NO_REVERSE },
+ { X86::VDIVSSrr, X86::VDIVSSrm, 0 },
+ { X86::VDIVSSrr_Int, X86::VDIVSSrm_Int, TB_NO_REVERSE },
+ { X86::VDPPDrri, X86::VDPPDrmi, 0 },
+ { X86::VDPPSrri, X86::VDPPSrmi, 0 },
+ { X86::VHADDPDrr, X86::VHADDPDrm, 0 },
+ { X86::VHADDPSrr, X86::VHADDPSrm, 0 },
+ { X86::VHSUBPDrr, X86::VHSUBPDrm, 0 },
+ { X86::VHSUBPSrr, X86::VHSUBPSrm, 0 },
+ { X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, TB_NO_REVERSE },
+ { X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, TB_NO_REVERSE },
+ { X86::VMAXCPDrr, X86::VMAXCPDrm, 0 },
+ { X86::VMAXCPSrr, X86::VMAXCPSrm, 0 },
+ { X86::VMAXCSDrr, X86::VMAXCSDrm, 0 },
+ { X86::VMAXCSSrr, X86::VMAXCSSrm, 0 },
+ { X86::VMAXPDrr, X86::VMAXPDrm, 0 },
+ { X86::VMAXPSrr, X86::VMAXPSrm, 0 },
+ { X86::VMAXSDrr, X86::VMAXSDrm, 0 },
+ { X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, TB_NO_REVERSE },
+ { X86::VMAXSSrr, X86::VMAXSSrm, 0 },
+ { X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, TB_NO_REVERSE },
+ { X86::VMINCPDrr, X86::VMINCPDrm, 0 },
+ { X86::VMINCPSrr, X86::VMINCPSrm, 0 },
+ { X86::VMINCSDrr, X86::VMINCSDrm, 0 },
+ { X86::VMINCSSrr, X86::VMINCSSrm, 0 },
+ { X86::VMINPDrr, X86::VMINPDrm, 0 },
+ { X86::VMINPSrr, X86::VMINPSrm, 0 },
+ { X86::VMINSDrr, X86::VMINSDrm, 0 },
+ { X86::VMINSDrr_Int, X86::VMINSDrm_Int, TB_NO_REVERSE },
+ { X86::VMINSSrr, X86::VMINSSrm, 0 },
+ { X86::VMINSSrr_Int, X86::VMINSSrm_Int, TB_NO_REVERSE },
+ { X86::VMOVLHPSrr, X86::VMOVHPSrm, TB_NO_REVERSE },
+ { X86::VMPSADBWrri, X86::VMPSADBWrmi, 0 },
+ { X86::VMULPDrr, X86::VMULPDrm, 0 },
+ { X86::VMULPSrr, X86::VMULPSrm, 0 },
+ { X86::VMULSDrr, X86::VMULSDrm, 0 },
+ { X86::VMULSDrr_Int, X86::VMULSDrm_Int, TB_NO_REVERSE },
+ { X86::VMULSSrr, X86::VMULSSrm, 0 },
+ { X86::VMULSSrr_Int, X86::VMULSSrm_Int, TB_NO_REVERSE },
+ { X86::VORPDrr, X86::VORPDrm, 0 },
+ { X86::VORPSrr, X86::VORPSrm, 0 },
+ { X86::VPACKSSDWrr, X86::VPACKSSDWrm, 0 },
+ { X86::VPACKSSWBrr, X86::VPACKSSWBrm, 0 },
+ { X86::VPACKUSDWrr, X86::VPACKUSDWrm, 0 },
+ { X86::VPACKUSWBrr, X86::VPACKUSWBrm, 0 },
+ { X86::VPADDBrr, X86::VPADDBrm, 0 },
+ { X86::VPADDDrr, X86::VPADDDrm, 0 },
+ { X86::VPADDQrr, X86::VPADDQrm, 0 },
+ { X86::VPADDSBrr, X86::VPADDSBrm, 0 },
+ { X86::VPADDSWrr, X86::VPADDSWrm, 0 },
+ { X86::VPADDUSBrr, X86::VPADDUSBrm, 0 },
+ { X86::VPADDUSWrr, X86::VPADDUSWrm, 0 },
+ { X86::VPADDWrr, X86::VPADDWrm, 0 },
+ { X86::VPALIGNRrri, X86::VPALIGNRrmi, 0 },
+ { X86::VPANDNrr, X86::VPANDNrm, 0 },
+ { X86::VPANDrr, X86::VPANDrm, 0 },
+ { X86::VPAVGBrr, X86::VPAVGBrm, 0 },
+ { X86::VPAVGWrr, X86::VPAVGWrm, 0 },
+ { X86::VPBLENDVBrr, X86::VPBLENDVBrm, 0 },
+ { X86::VPBLENDWrri, X86::VPBLENDWrmi, 0 },
+ { X86::VPCLMULQDQrr, X86::VPCLMULQDQrm, 0 },
+ { X86::VPCMPEQBrr, X86::VPCMPEQBrm, 0 },
+ { X86::VPCMPEQDrr, X86::VPCMPEQDrm, 0 },
+ { X86::VPCMPEQQrr, X86::VPCMPEQQrm, 0 },
+ { X86::VPCMPEQWrr, X86::VPCMPEQWrm, 0 },
+ { X86::VPCMPGTBrr, X86::VPCMPGTBrm, 0 },
+ { X86::VPCMPGTDrr, X86::VPCMPGTDrm, 0 },
+ { X86::VPCMPGTQrr, X86::VPCMPGTQrm, 0 },
+ { X86::VPCMPGTWrr, X86::VPCMPGTWrm, 0 },
+ { X86::VPHADDDrr, X86::VPHADDDrm, 0 },
+ { X86::VPHADDSWrr128, X86::VPHADDSWrm128, 0 },
+ { X86::VPHADDWrr, X86::VPHADDWrm, 0 },
+ { X86::VPHSUBDrr, X86::VPHSUBDrm, 0 },
+ { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, 0 },
+ { X86::VPHSUBWrr, X86::VPHSUBWrm, 0 },
+ { X86::VPERMILPDrr, X86::VPERMILPDrm, 0 },
+ { X86::VPERMILPSrr, X86::VPERMILPSrm, 0 },
+ { X86::VPINSRBrr, X86::VPINSRBrm, 0 },
+ { X86::VPINSRDrr, X86::VPINSRDrm, 0 },
+ { X86::VPINSRQrr, X86::VPINSRQrm, 0 },
+ { X86::VPINSRWrri, X86::VPINSRWrmi, 0 },
+ { X86::VPMADDUBSWrr, X86::VPMADDUBSWrm, 0 },
+ { X86::VPMADDWDrr, X86::VPMADDWDrm, 0 },
+ { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 },
+ { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 },
+ { X86::VPMAXSWrr, X86::VPMAXSWrm, 0 },
+ { X86::VPMAXUBrr, X86::VPMAXUBrm, 0 },
+ { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 },
+ { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 },
+ { X86::VPMINSBrr, X86::VPMINSBrm, 0 },
+ { X86::VPMINSDrr, X86::VPMINSDrm, 0 },
+ { X86::VPMINSWrr, X86::VPMINSWrm, 0 },
+ { X86::VPMINUBrr, X86::VPMINUBrm, 0 },
+ { X86::VPMINUDrr, X86::VPMINUDrm, 0 },
+ { X86::VPMINUWrr, X86::VPMINUWrm, 0 },
+ { X86::VPMULDQrr, X86::VPMULDQrm, 0 },
+ { X86::VPMULHRSWrr, X86::VPMULHRSWrm, 0 },
+ { X86::VPMULHUWrr, X86::VPMULHUWrm, 0 },
+ { X86::VPMULHWrr, X86::VPMULHWrm, 0 },
+ { X86::VPMULLDrr, X86::VPMULLDrm, 0 },
+ { X86::VPMULLWrr, X86::VPMULLWrm, 0 },
+ { X86::VPMULUDQrr, X86::VPMULUDQrm, 0 },
+ { X86::VPORrr, X86::VPORrm, 0 },
+ { X86::VPSADBWrr, X86::VPSADBWrm, 0 },
+ { X86::VPSHUFBrr, X86::VPSHUFBrm, 0 },
+ { X86::VPSIGNBrr128, X86::VPSIGNBrm128, 0 },
+ { X86::VPSIGNWrr128, X86::VPSIGNWrm128, 0 },
+ { X86::VPSIGNDrr128, X86::VPSIGNDrm128, 0 },
+ { X86::VPSLLDrr, X86::VPSLLDrm, 0 },
+ { X86::VPSLLQrr, X86::VPSLLQrm, 0 },
+ { X86::VPSLLWrr, X86::VPSLLWrm, 0 },
+ { X86::VPSRADrr, X86::VPSRADrm, 0 },
+ { X86::VPSRAWrr, X86::VPSRAWrm, 0 },
+ { X86::VPSRLDrr, X86::VPSRLDrm, 0 },
+ { X86::VPSRLQrr, X86::VPSRLQrm, 0 },
+ { X86::VPSRLWrr, X86::VPSRLWrm, 0 },
+ { X86::VPSUBBrr, X86::VPSUBBrm, 0 },
+ { X86::VPSUBDrr, X86::VPSUBDrm, 0 },
+ { X86::VPSUBQrr, X86::VPSUBQrm, 0 },
+ { X86::VPSUBSBrr, X86::VPSUBSBrm, 0 },
+ { X86::VPSUBSWrr, X86::VPSUBSWrm, 0 },
+ { X86::VPSUBUSBrr, X86::VPSUBUSBrm, 0 },
+ { X86::VPSUBUSWrr, X86::VPSUBUSWrm, 0 },
+ { X86::VPSUBWrr, X86::VPSUBWrm, 0 },
+ { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, 0 },
+ { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, 0 },
+ { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, 0 },
+ { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, 0 },
+ { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, 0 },
+ { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, 0 },
+ { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, 0 },
+ { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, 0 },
+ { X86::VPXORrr, X86::VPXORrm, 0 },
+ { X86::VRCPSSr, X86::VRCPSSm, 0 },
+ { X86::VRCPSSr_Int, X86::VRCPSSm_Int, TB_NO_REVERSE },
+ { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
+ { X86::VRSQRTSSr_Int, X86::VRSQRTSSm_Int, TB_NO_REVERSE },
+ { X86::VROUNDSDr, X86::VROUNDSDm, 0 },
+ { X86::VROUNDSDr_Int, X86::VROUNDSDm_Int, TB_NO_REVERSE },
+ { X86::VROUNDSSr, X86::VROUNDSSm, 0 },
+ { X86::VROUNDSSr_Int, X86::VROUNDSSm_Int, TB_NO_REVERSE },
+ { X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 },
+ { X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 },
+ { X86::VSQRTSDr, X86::VSQRTSDm, 0 },
+ { X86::VSQRTSDr_Int, X86::VSQRTSDm_Int, TB_NO_REVERSE },
+ { X86::VSQRTSSr, X86::VSQRTSSm, 0 },
+ { X86::VSQRTSSr_Int, X86::VSQRTSSm_Int, TB_NO_REVERSE },
+ { X86::VSUBPDrr, X86::VSUBPDrm, 0 },
+ { X86::VSUBPSrr, X86::VSUBPSrm, 0 },
+ { X86::VSUBSDrr, X86::VSUBSDrm, 0 },
+ { X86::VSUBSDrr_Int, X86::VSUBSDrm_Int, TB_NO_REVERSE },
+ { X86::VSUBSSrr, X86::VSUBSSrm, 0 },
+ { X86::VSUBSSrr_Int, X86::VSUBSSrm_Int, TB_NO_REVERSE },
+ { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, 0 },
+ { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, 0 },
+ { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, 0 },
+ { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, 0 },
+ { X86::VXORPDrr, X86::VXORPDrm, 0 },
+ { X86::VXORPSrr, X86::VXORPSrm, 0 },
+
+ // AVX 256-bit foldable instructions
+ { X86::VADDPDYrr, X86::VADDPDYrm, 0 },
+ { X86::VADDPSYrr, X86::VADDPSYrm, 0 },
+ { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, 0 },
+ { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, 0 },
+ { X86::VANDNPDYrr, X86::VANDNPDYrm, 0 },
+ { X86::VANDNPSYrr, X86::VANDNPSYrm, 0 },
+ { X86::VANDPDYrr, X86::VANDPDYrm, 0 },
+ { X86::VANDPSYrr, X86::VANDPSYrm, 0 },
+ { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, 0 },
+ { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, 0 },
+ { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, 0 },
+ { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, 0 },
+ { X86::VCMPPDYrri, X86::VCMPPDYrmi, 0 },
+ { X86::VCMPPSYrri, X86::VCMPPSYrmi, 0 },
+ { X86::VDIVPDYrr, X86::VDIVPDYrm, 0 },
+ { X86::VDIVPSYrr, X86::VDIVPSYrm, 0 },
+ { X86::VDPPSYrri, X86::VDPPSYrmi, 0 },
+ { X86::VHADDPDYrr, X86::VHADDPDYrm, 0 },
+ { X86::VHADDPSYrr, X86::VHADDPSYrm, 0 },
+ { X86::VHSUBPDYrr, X86::VHSUBPDYrm, 0 },
+ { X86::VHSUBPSYrr, X86::VHSUBPSYrm, 0 },
+ { X86::VINSERTF128rr, X86::VINSERTF128rm, 0 },
+ { X86::VMAXCPDYrr, X86::VMAXCPDYrm, 0 },
+ { X86::VMAXCPSYrr, X86::VMAXCPSYrm, 0 },
+ { X86::VMAXPDYrr, X86::VMAXPDYrm, 0 },
+ { X86::VMAXPSYrr, X86::VMAXPSYrm, 0 },
+ { X86::VMINCPDYrr, X86::VMINCPDYrm, 0 },
+ { X86::VMINCPSYrr, X86::VMINCPSYrm, 0 },
+ { X86::VMINPDYrr, X86::VMINPDYrm, 0 },
+ { X86::VMINPSYrr, X86::VMINPSYrm, 0 },
+ { X86::VMULPDYrr, X86::VMULPDYrm, 0 },
+ { X86::VMULPSYrr, X86::VMULPSYrm, 0 },
+ { X86::VORPDYrr, X86::VORPDYrm, 0 },
+ { X86::VORPSYrr, X86::VORPSYrm, 0 },
+ { X86::VPERM2F128rr, X86::VPERM2F128rm, 0 },
+ { X86::VPERMILPDYrr, X86::VPERMILPDYrm, 0 },
+ { X86::VPERMILPSYrr, X86::VPERMILPSYrm, 0 },
+ { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, 0 },
+ { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, 0 },
+ { X86::VSUBPDYrr, X86::VSUBPDYrm, 0 },
+ { X86::VSUBPSYrr, X86::VSUBPSYrm, 0 },
+ { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, 0 },
+ { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, 0 },
+ { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, 0 },
+ { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, 0 },
+ { X86::VXORPDYrr, X86::VXORPDYrm, 0 },
+ { X86::VXORPSYrr, X86::VXORPSYrm, 0 },
+
+ // AVX2 foldable instructions
+ { X86::VINSERTI128rr, X86::VINSERTI128rm, 0 },
+ { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, 0 },
+ { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, 0 },
+ { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, 0 },
+ { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, 0 },
+ { X86::VPADDBYrr, X86::VPADDBYrm, 0 },
+ { X86::VPADDDYrr, X86::VPADDDYrm, 0 },
+ { X86::VPADDQYrr, X86::VPADDQYrm, 0 },
+ { X86::VPADDSBYrr, X86::VPADDSBYrm, 0 },
+ { X86::VPADDSWYrr, X86::VPADDSWYrm, 0 },
+ { X86::VPADDUSBYrr, X86::VPADDUSBYrm, 0 },
+ { X86::VPADDUSWYrr, X86::VPADDUSWYrm, 0 },
+ { X86::VPADDWYrr, X86::VPADDWYrm, 0 },
+ { X86::VPALIGNRYrri, X86::VPALIGNRYrmi, 0 },
+ { X86::VPANDNYrr, X86::VPANDNYrm, 0 },
+ { X86::VPANDYrr, X86::VPANDYrm, 0 },
+ { X86::VPAVGBYrr, X86::VPAVGBYrm, 0 },
+ { X86::VPAVGWYrr, X86::VPAVGWYrm, 0 },
+ { X86::VPBLENDDrri, X86::VPBLENDDrmi, 0 },
+ { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, 0 },
+ { X86::VPBLENDVBYrr, X86::VPBLENDVBYrm, 0 },
+ { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, 0 },
+ { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, 0 },
+ { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, 0 },
+ { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, 0 },
+ { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, 0 },
+ { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, 0 },
+ { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, 0 },
+ { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, 0 },
+ { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, 0 },
+ { X86::VPERM2I128rr, X86::VPERM2I128rm, 0 },
+ { X86::VPERMDYrr, X86::VPERMDYrm, 0 },
+ { X86::VPERMPSYrr, X86::VPERMPSYrm, 0 },
+ { X86::VPHADDDYrr, X86::VPHADDDYrm, 0 },
+ { X86::VPHADDSWrr256, X86::VPHADDSWrm256, 0 },
+ { X86::VPHADDWYrr, X86::VPHADDWYrm, 0 },
+ { X86::VPHSUBDYrr, X86::VPHSUBDYrm, 0 },
+ { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, 0 },
+ { X86::VPHSUBWYrr, X86::VPHSUBWYrm, 0 },
+ { X86::VPMADDUBSWYrr, X86::VPMADDUBSWYrm, 0 },
+ { X86::VPMADDWDYrr, X86::VPMADDWDYrm, 0 },
+ { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 },
+ { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 },
+ { X86::VPMAXSWYrr, X86::VPMAXSWYrm, 0 },
+ { X86::VPMAXUBYrr, X86::VPMAXUBYrm, 0 },
+ { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 },
+ { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 },
+ { X86::VPMINSBYrr, X86::VPMINSBYrm, 0 },
+ { X86::VPMINSDYrr, X86::VPMINSDYrm, 0 },
+ { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 },
+ { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 },
+ { X86::VPMINUDYrr, X86::VPMINUDYrm, 0 },
+ { X86::VPMINUWYrr, X86::VPMINUWYrm, 0 },
+ { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, 0 },
+ { X86::VPMULDQYrr, X86::VPMULDQYrm, 0 },
+ { X86::VPMULHRSWYrr, X86::VPMULHRSWYrm, 0 },
+ { X86::VPMULHUWYrr, X86::VPMULHUWYrm, 0 },
+ { X86::VPMULHWYrr, X86::VPMULHWYrm, 0 },
+ { X86::VPMULLDYrr, X86::VPMULLDYrm, 0 },
+ { X86::VPMULLWYrr, X86::VPMULLWYrm, 0 },
+ { X86::VPMULUDQYrr, X86::VPMULUDQYrm, 0 },
+ { X86::VPORYrr, X86::VPORYrm, 0 },
+ { X86::VPSADBWYrr, X86::VPSADBWYrm, 0 },
+ { X86::VPSHUFBYrr, X86::VPSHUFBYrm, 0 },
+ { X86::VPSIGNBYrr256, X86::VPSIGNBYrm256, 0 },
+ { X86::VPSIGNWYrr256, X86::VPSIGNWYrm256, 0 },
+ { X86::VPSIGNDYrr256, X86::VPSIGNDYrm256, 0 },
+ { X86::VPSLLDYrr, X86::VPSLLDYrm, 0 },
+ { X86::VPSLLQYrr, X86::VPSLLQYrm, 0 },
+ { X86::VPSLLWYrr, X86::VPSLLWYrm, 0 },
+ { X86::VPSLLVDrr, X86::VPSLLVDrm, 0 },
+ { X86::VPSLLVDYrr, X86::VPSLLVDYrm, 0 },
+ { X86::VPSLLVQrr, X86::VPSLLVQrm, 0 },
+ { X86::VPSLLVQYrr, X86::VPSLLVQYrm, 0 },
+ { X86::VPSRADYrr, X86::VPSRADYrm, 0 },
+ { X86::VPSRAWYrr, X86::VPSRAWYrm, 0 },
+ { X86::VPSRAVDrr, X86::VPSRAVDrm, 0 },
+ { X86::VPSRAVDYrr, X86::VPSRAVDYrm, 0 },
+ { X86::VPSRLDYrr, X86::VPSRLDYrm, 0 },
+ { X86::VPSRLQYrr, X86::VPSRLQYrm, 0 },
+ { X86::VPSRLWYrr, X86::VPSRLWYrm, 0 },
+ { X86::VPSRLVDrr, X86::VPSRLVDrm, 0 },
+ { X86::VPSRLVDYrr, X86::VPSRLVDYrm, 0 },
+ { X86::VPSRLVQrr, X86::VPSRLVQrm, 0 },
+ { X86::VPSRLVQYrr, X86::VPSRLVQYrm, 0 },
+ { X86::VPSUBBYrr, X86::VPSUBBYrm, 0 },
+ { X86::VPSUBDYrr, X86::VPSUBDYrm, 0 },
+ { X86::VPSUBQYrr, X86::VPSUBQYrm, 0 },
+ { X86::VPSUBSBYrr, X86::VPSUBSBYrm, 0 },
+ { X86::VPSUBSWYrr, X86::VPSUBSWYrm, 0 },
+ { X86::VPSUBUSBYrr, X86::VPSUBUSBYrm, 0 },
+ { X86::VPSUBUSWYrr, X86::VPSUBUSWYrm, 0 },
+ { X86::VPSUBWYrr, X86::VPSUBWYrm, 0 },
+ { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, 0 },
+ { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, 0 },
+ { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, 0 },
+ { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, 0 },
+ { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, 0 },
+ { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, 0 },
+ { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, 0 },
+ { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, 0 },
+ { X86::VPXORYrr, X86::VPXORYrm, 0 },
+
+ // FMA4 foldable patterns
+ { X86::VFMADDSS4rr, X86::VFMADDSS4mr, TB_ALIGN_NONE },
+ { X86::VFMADDSS4rr_Int, X86::VFMADDSS4mr_Int, TB_NO_REVERSE },
+ { X86::VFMADDSD4rr, X86::VFMADDSD4mr, TB_ALIGN_NONE },
+ { X86::VFMADDSD4rr_Int, X86::VFMADDSD4mr_Int, TB_NO_REVERSE },
+ { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_NONE },
+ { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_NONE },
+ { X86::VFMADDPS4Yrr, X86::VFMADDPS4Ymr, TB_ALIGN_NONE },
+ { X86::VFMADDPD4Yrr, X86::VFMADDPD4Ymr, TB_ALIGN_NONE },
+ { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, TB_ALIGN_NONE },
+ { X86::VFNMADDSS4rr_Int, X86::VFNMADDSS4mr_Int, TB_NO_REVERSE },
+ { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, TB_ALIGN_NONE },
+ { X86::VFNMADDSD4rr_Int, X86::VFNMADDSD4mr_Int, TB_NO_REVERSE },
+ { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_NONE },
+ { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_NONE },
+ { X86::VFNMADDPS4Yrr, X86::VFNMADDPS4Ymr, TB_ALIGN_NONE },
+ { X86::VFNMADDPD4Yrr, X86::VFNMADDPD4Ymr, TB_ALIGN_NONE },
+ { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, TB_ALIGN_NONE },
+ { X86::VFMSUBSS4rr_Int, X86::VFMSUBSS4mr_Int, TB_NO_REVERSE },
+ { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, TB_ALIGN_NONE },
+ { X86::VFMSUBSD4rr_Int, X86::VFMSUBSD4mr_Int, TB_NO_REVERSE },
+ { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_NONE },
+ { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_NONE },
+ { X86::VFMSUBPS4Yrr, X86::VFMSUBPS4Ymr, TB_ALIGN_NONE },
+ { X86::VFMSUBPD4Yrr, X86::VFMSUBPD4Ymr, TB_ALIGN_NONE },
+ { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, TB_ALIGN_NONE },
+ { X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4mr_Int, TB_NO_REVERSE },
+ { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, TB_ALIGN_NONE },
+ { X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4mr_Int, TB_NO_REVERSE },
+ { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_NONE },
+ { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_NONE },
+ { X86::VFNMSUBPS4Yrr, X86::VFNMSUBPS4Ymr, TB_ALIGN_NONE },
+ { X86::VFNMSUBPD4Yrr, X86::VFNMSUBPD4Ymr, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPS4Yrr, X86::VFMADDSUBPS4Ymr, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPD4Yrr, X86::VFMADDSUBPD4Ymr, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPS4Yrr, X86::VFMSUBADDPS4Ymr, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPD4Yrr, X86::VFMSUBADDPD4Ymr, TB_ALIGN_NONE },
+
+ // XOP foldable instructions
+ { X86::VPCMOVrrr, X86::VPCMOVrmr, 0 },
+ { X86::VPCMOVYrrr, X86::VPCMOVYrmr, 0 },
+ { X86::VPCOMBri, X86::VPCOMBmi, 0 },
+ { X86::VPCOMDri, X86::VPCOMDmi, 0 },
+ { X86::VPCOMQri, X86::VPCOMQmi, 0 },
+ { X86::VPCOMWri, X86::VPCOMWmi, 0 },
+ { X86::VPCOMUBri, X86::VPCOMUBmi, 0 },
+ { X86::VPCOMUDri, X86::VPCOMUDmi, 0 },
+ { X86::VPCOMUQri, X86::VPCOMUQmi, 0 },
+ { X86::VPCOMUWri, X86::VPCOMUWmi, 0 },
+ { X86::VPERMIL2PDrr, X86::VPERMIL2PDmr, 0 },
+ { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYmr, 0 },
+ { X86::VPERMIL2PSrr, X86::VPERMIL2PSmr, 0 },
+ { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYmr, 0 },
+ { X86::VPMACSDDrr, X86::VPMACSDDrm, 0 },
+ { X86::VPMACSDQHrr, X86::VPMACSDQHrm, 0 },
+ { X86::VPMACSDQLrr, X86::VPMACSDQLrm, 0 },
+ { X86::VPMACSSDDrr, X86::VPMACSSDDrm, 0 },
+ { X86::VPMACSSDQHrr, X86::VPMACSSDQHrm, 0 },
+ { X86::VPMACSSDQLrr, X86::VPMACSSDQLrm, 0 },
+ { X86::VPMACSSWDrr, X86::VPMACSSWDrm, 0 },
+ { X86::VPMACSSWWrr, X86::VPMACSSWWrm, 0 },
+ { X86::VPMACSWDrr, X86::VPMACSWDrm, 0 },
+ { X86::VPMACSWWrr, X86::VPMACSWWrm, 0 },
+ { X86::VPMADCSSWDrr, X86::VPMADCSSWDrm, 0 },
+ { X86::VPMADCSWDrr, X86::VPMADCSWDrm, 0 },
+ { X86::VPPERMrrr, X86::VPPERMrmr, 0 },
+ { X86::VPROTBrr, X86::VPROTBrm, 0 },
+ { X86::VPROTDrr, X86::VPROTDrm, 0 },
+ { X86::VPROTQrr, X86::VPROTQrm, 0 },
+ { X86::VPROTWrr, X86::VPROTWrm, 0 },
+ { X86::VPSHABrr, X86::VPSHABrm, 0 },
+ { X86::VPSHADrr, X86::VPSHADrm, 0 },
+ { X86::VPSHAQrr, X86::VPSHAQrm, 0 },
+ { X86::VPSHAWrr, X86::VPSHAWrm, 0 },
+ { X86::VPSHLBrr, X86::VPSHLBrm, 0 },
+ { X86::VPSHLDrr, X86::VPSHLDrm, 0 },
+ { X86::VPSHLQrr, X86::VPSHLQrm, 0 },
+ { X86::VPSHLWrr, X86::VPSHLWrm, 0 },
+
+ // BMI/BMI2 foldable instructions
+ { X86::ANDN32rr, X86::ANDN32rm, 0 },
+ { X86::ANDN64rr, X86::ANDN64rm, 0 },
+ { X86::MULX32rr, X86::MULX32rm, 0 },
+ { X86::MULX64rr, X86::MULX64rm, 0 },
+ { X86::PDEP32rr, X86::PDEP32rm, 0 },
+ { X86::PDEP64rr, X86::PDEP64rm, 0 },
+ { X86::PEXT32rr, X86::PEXT32rm, 0 },
+ { X86::PEXT64rr, X86::PEXT64rm, 0 },
+
+ // ADX foldable instructions
+ { X86::ADCX32rr, X86::ADCX32rm, 0 },
+ { X86::ADCX64rr, X86::ADCX64rm, 0 },
+ { X86::ADOX32rr, X86::ADOX32rm, 0 },
+ { X86::ADOX64rr, X86::ADOX64rm, 0 },
+
+ // AVX-512 foldable instructions
+ { X86::VADDPDZrr, X86::VADDPDZrm, 0 },
+ { X86::VADDPSZrr, X86::VADDPSZrm, 0 },
+ { X86::VADDSDZrr, X86::VADDSDZrm, 0 },
+ { X86::VADDSDZrr_Int, X86::VADDSDZrm_Int, TB_NO_REVERSE },
+ { X86::VADDSSZrr, X86::VADDSSZrm, 0 },
+ { X86::VADDSSZrr_Int, X86::VADDSSZrm_Int, TB_NO_REVERSE },
+ { X86::VALIGNDZrri, X86::VALIGNDZrmi, 0 },
+ { X86::VALIGNQZrri, X86::VALIGNQZrmi, 0 },
+ { X86::VANDNPDZrr, X86::VANDNPDZrm, 0 },
+ { X86::VANDNPSZrr, X86::VANDNPSZrm, 0 },
+ { X86::VANDPDZrr, X86::VANDPDZrm, 0 },
+ { X86::VANDPSZrr, X86::VANDPSZrm, 0 },
+ { X86::VCMPPDZrri, X86::VCMPPDZrmi, 0 },
+ { X86::VCMPPSZrri, X86::VCMPPSZrmi, 0 },
+ { X86::VCMPSDZrr, X86::VCMPSDZrm, 0 },
+ { X86::VCMPSDZrr_Int, X86::VCMPSDZrm_Int, TB_NO_REVERSE },
+ { X86::VCMPSSZrr, X86::VCMPSSZrm, 0 },
+ { X86::VCMPSSZrr_Int, X86::VCMPSSZrm_Int, TB_NO_REVERSE },
+ { X86::VDIVPDZrr, X86::VDIVPDZrm, 0 },
+ { X86::VDIVPSZrr, X86::VDIVPSZrm, 0 },
+ { X86::VDIVSDZrr, X86::VDIVSDZrm, 0 },
+ { X86::VDIVSDZrr_Int, X86::VDIVSDZrm_Int, TB_NO_REVERSE },
+ { X86::VDIVSSZrr, X86::VDIVSSZrm, 0 },
+ { X86::VDIVSSZrr_Int, X86::VDIVSSZrm_Int, TB_NO_REVERSE },
+ { X86::VINSERTF32x4Zrr, X86::VINSERTF32x4Zrm, 0 },
+ { X86::VINSERTF32x8Zrr, X86::VINSERTF32x8Zrm, 0 },
+ { X86::VINSERTF64x2Zrr, X86::VINSERTF64x2Zrm, 0 },
+ { X86::VINSERTF64x4Zrr, X86::VINSERTF64x4Zrm, 0 },
+ { X86::VINSERTI32x4Zrr, X86::VINSERTI32x4Zrm, 0 },
+ { X86::VINSERTI32x8Zrr, X86::VINSERTI32x8Zrm, 0 },
+ { X86::VINSERTI64x2Zrr, X86::VINSERTI64x2Zrm, 0 },
+ { X86::VINSERTI64x4Zrr, X86::VINSERTI64x4Zrm, 0 },
+ { X86::VMAXCPDZrr, X86::VMAXCPDZrm, 0 },
+ { X86::VMAXCPSZrr, X86::VMAXCPSZrm, 0 },
+ { X86::VMAXCSDZrr, X86::VMAXCSDZrm, 0 },
+ { X86::VMAXCSSZrr, X86::VMAXCSSZrm, 0 },
+ { X86::VMAXPDZrr, X86::VMAXPDZrm, 0 },
+ { X86::VMAXPSZrr, X86::VMAXPSZrm, 0 },
+ { X86::VMAXSDZrr, X86::VMAXSDZrm, 0 },
+ { X86::VMAXSDZrr_Int, X86::VMAXSDZrm_Int, TB_NO_REVERSE },
+ { X86::VMAXSSZrr, X86::VMAXSSZrm, 0 },
+ { X86::VMAXSSZrr_Int, X86::VMAXSSZrm_Int, TB_NO_REVERSE },
+ { X86::VMINCPDZrr, X86::VMINCPDZrm, 0 },
+ { X86::VMINCPSZrr, X86::VMINCPSZrm, 0 },
+ { X86::VMINCSDZrr, X86::VMINCSDZrm, 0 },
+ { X86::VMINCSSZrr, X86::VMINCSSZrm, 0 },
+ { X86::VMINPDZrr, X86::VMINPDZrm, 0 },
+ { X86::VMINPSZrr, X86::VMINPSZrm, 0 },
+ { X86::VMINSDZrr, X86::VMINSDZrm, 0 },
+ { X86::VMINSDZrr_Int, X86::VMINSDZrm_Int, TB_NO_REVERSE },
+ { X86::VMINSSZrr, X86::VMINSSZrm, 0 },
+ { X86::VMINSSZrr_Int, X86::VMINSSZrm_Int, TB_NO_REVERSE },
+ { X86::VMOVLHPSZrr, X86::VMOVHPSZ128rm, TB_NO_REVERSE },
+ { X86::VMULPDZrr, X86::VMULPDZrm, 0 },
+ { X86::VMULPSZrr, X86::VMULPSZrm, 0 },
+ { X86::VMULSDZrr, X86::VMULSDZrm, 0 },
+ { X86::VMULSDZrr_Int, X86::VMULSDZrm_Int, TB_NO_REVERSE },
+ { X86::VMULSSZrr, X86::VMULSSZrm, 0 },
+ { X86::VMULSSZrr_Int, X86::VMULSSZrm_Int, TB_NO_REVERSE },
+ { X86::VORPDZrr, X86::VORPDZrm, 0 },
+ { X86::VORPSZrr, X86::VORPSZrm, 0 },
+ { X86::VPACKSSDWZrr, X86::VPACKSSDWZrm, 0 },
+ { X86::VPACKSSWBZrr, X86::VPACKSSWBZrm, 0 },
+ { X86::VPACKUSDWZrr, X86::VPACKUSDWZrm, 0 },
+ { X86::VPACKUSWBZrr, X86::VPACKUSWBZrm, 0 },
+ { X86::VPADDBZrr, X86::VPADDBZrm, 0 },
+ { X86::VPADDDZrr, X86::VPADDDZrm, 0 },
+ { X86::VPADDQZrr, X86::VPADDQZrm, 0 },
+ { X86::VPADDSBZrr, X86::VPADDSBZrm, 0 },
+ { X86::VPADDSWZrr, X86::VPADDSWZrm, 0 },
+ { X86::VPADDUSBZrr, X86::VPADDUSBZrm, 0 },
+ { X86::VPADDUSWZrr, X86::VPADDUSWZrm, 0 },
+ { X86::VPADDWZrr, X86::VPADDWZrm, 0 },
+ { X86::VPALIGNRZrri, X86::VPALIGNRZrmi, 0 },
+ { X86::VPANDDZrr, X86::VPANDDZrm, 0 },
+ { X86::VPANDNDZrr, X86::VPANDNDZrm, 0 },
+ { X86::VPANDNQZrr, X86::VPANDNQZrm, 0 },
+ { X86::VPANDQZrr, X86::VPANDQZrm, 0 },
+ { X86::VPAVGBZrr, X86::VPAVGBZrm, 0 },
+ { X86::VPAVGWZrr, X86::VPAVGWZrm, 0 },
+ { X86::VPCMPBZrri, X86::VPCMPBZrmi, 0 },
+ { X86::VPCMPDZrri, X86::VPCMPDZrmi, 0 },
+ { X86::VPCMPEQBZrr, X86::VPCMPEQBZrm, 0 },
+ { X86::VPCMPEQDZrr, X86::VPCMPEQDZrm, 0 },
+ { X86::VPCMPEQQZrr, X86::VPCMPEQQZrm, 0 },
+ { X86::VPCMPEQWZrr, X86::VPCMPEQWZrm, 0 },
+ { X86::VPCMPGTBZrr, X86::VPCMPGTBZrm, 0 },
+ { X86::VPCMPGTDZrr, X86::VPCMPGTDZrm, 0 },
+ { X86::VPCMPGTQZrr, X86::VPCMPGTQZrm, 0 },
+ { X86::VPCMPGTWZrr, X86::VPCMPGTWZrm, 0 },
+ { X86::VPCMPQZrri, X86::VPCMPQZrmi, 0 },
+ { X86::VPCMPUBZrri, X86::VPCMPUBZrmi, 0 },
+ { X86::VPCMPUDZrri, X86::VPCMPUDZrmi, 0 },
+ { X86::VPCMPUQZrri, X86::VPCMPUQZrmi, 0 },
+ { X86::VPCMPUWZrri, X86::VPCMPUWZrmi, 0 },
+ { X86::VPCMPWZrri, X86::VPCMPWZrmi, 0 },
+ { X86::VPERMBZrr, X86::VPERMBZrm, 0 },
+ { X86::VPERMDZrr, X86::VPERMDZrm, 0 },
+ { X86::VPERMILPDZrr, X86::VPERMILPDZrm, 0 },
+ { X86::VPERMILPSZrr, X86::VPERMILPSZrm, 0 },
+ { X86::VPERMPDZrr, X86::VPERMPDZrm, 0 },
+ { X86::VPERMPSZrr, X86::VPERMPSZrm, 0 },
+ { X86::VPERMQZrr, X86::VPERMQZrm, 0 },
+ { X86::VPERMWZrr, X86::VPERMWZrm, 0 },
+ { X86::VPINSRBZrr, X86::VPINSRBZrm, 0 },
+ { X86::VPINSRDZrr, X86::VPINSRDZrm, 0 },
+ { X86::VPINSRQZrr, X86::VPINSRQZrm, 0 },
+ { X86::VPINSRWZrr, X86::VPINSRWZrm, 0 },
+ { X86::VPMADDUBSWZrr, X86::VPMADDUBSWZrm, 0 },
+ { X86::VPMADDWDZrr, X86::VPMADDWDZrm, 0 },
+ { X86::VPMAXSBZrr, X86::VPMAXSBZrm, 0 },
+ { X86::VPMAXSDZrr, X86::VPMAXSDZrm, 0 },
+ { X86::VPMAXSQZrr, X86::VPMAXSQZrm, 0 },
+ { X86::VPMAXSWZrr, X86::VPMAXSWZrm, 0 },
+ { X86::VPMAXUBZrr, X86::VPMAXUBZrm, 0 },
+ { X86::VPMAXUDZrr, X86::VPMAXUDZrm, 0 },
+ { X86::VPMAXUQZrr, X86::VPMAXUQZrm, 0 },
+ { X86::VPMAXUWZrr, X86::VPMAXUWZrm, 0 },
+ { X86::VPMINSBZrr, X86::VPMINSBZrm, 0 },
+ { X86::VPMINSDZrr, X86::VPMINSDZrm, 0 },
+ { X86::VPMINSQZrr, X86::VPMINSQZrm, 0 },
+ { X86::VPMINSWZrr, X86::VPMINSWZrm, 0 },
+ { X86::VPMINUBZrr, X86::VPMINUBZrm, 0 },
+ { X86::VPMINUDZrr, X86::VPMINUDZrm, 0 },
+ { X86::VPMINUQZrr, X86::VPMINUQZrm, 0 },
+ { X86::VPMINUWZrr, X86::VPMINUWZrm, 0 },
+ { X86::VPMULDQZrr, X86::VPMULDQZrm, 0 },
+ { X86::VPMULLDZrr, X86::VPMULLDZrm, 0 },
+ { X86::VPMULLQZrr, X86::VPMULLQZrm, 0 },
+ { X86::VPMULLWZrr, X86::VPMULLWZrm, 0 },
+ { X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 },
+ { X86::VPORDZrr, X86::VPORDZrm, 0 },
+ { X86::VPORQZrr, X86::VPORQZrm, 0 },
+ { X86::VPSADBWZ512rr, X86::VPSADBWZ512rm, 0 },
+ { X86::VPSHUFBZrr, X86::VPSHUFBZrm, 0 },
+ { X86::VPSLLDZrr, X86::VPSLLDZrm, 0 },
+ { X86::VPSLLQZrr, X86::VPSLLQZrm, 0 },
+ { X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 },
+ { X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 },
+ { X86::VPSLLVWZrr, X86::VPSLLVWZrm, 0 },
+ { X86::VPSLLWZrr, X86::VPSLLWZrm, 0 },
+ { X86::VPSRADZrr, X86::VPSRADZrm, 0 },
+ { X86::VPSRAQZrr, X86::VPSRAQZrm, 0 },
+ { X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 },
+ { X86::VPSRAVQZrr, X86::VPSRAVQZrm, 0 },
+ { X86::VPSRAVWZrr, X86::VPSRAVWZrm, 0 },
+ { X86::VPSRAWZrr, X86::VPSRAWZrm, 0 },
+ { X86::VPSRLDZrr, X86::VPSRLDZrm, 0 },
+ { X86::VPSRLQZrr, X86::VPSRLQZrm, 0 },
+ { X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 },
+ { X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 },
+ { X86::VPSRLVWZrr, X86::VPSRLVWZrm, 0 },
+ { X86::VPSRLWZrr, X86::VPSRLWZrm, 0 },
+ { X86::VPSUBBZrr, X86::VPSUBBZrm, 0 },
+ { X86::VPSUBDZrr, X86::VPSUBDZrm, 0 },
+ { X86::VPSUBQZrr, X86::VPSUBQZrm, 0 },
+ { X86::VPSUBSBZrr, X86::VPSUBSBZrm, 0 },
+ { X86::VPSUBSWZrr, X86::VPSUBSWZrm, 0 },
+ { X86::VPSUBUSBZrr, X86::VPSUBUSBZrm, 0 },
+ { X86::VPSUBUSWZrr, X86::VPSUBUSWZrm, 0 },
+ { X86::VPSUBWZrr, X86::VPSUBWZrm, 0 },
+ { X86::VPUNPCKHBWZrr, X86::VPUNPCKHBWZrm, 0 },
+ { X86::VPUNPCKHDQZrr, X86::VPUNPCKHDQZrm, 0 },
+ { X86::VPUNPCKHQDQZrr, X86::VPUNPCKHQDQZrm, 0 },
+ { X86::VPUNPCKHWDZrr, X86::VPUNPCKHWDZrm, 0 },
+ { X86::VPUNPCKLBWZrr, X86::VPUNPCKLBWZrm, 0 },
+ { X86::VPUNPCKLDQZrr, X86::VPUNPCKLDQZrm, 0 },
+ { X86::VPUNPCKLQDQZrr, X86::VPUNPCKLQDQZrm, 0 },
+ { X86::VPUNPCKLWDZrr, X86::VPUNPCKLWDZrm, 0 },
+ { X86::VPXORDZrr, X86::VPXORDZrm, 0 },
+ { X86::VPXORQZrr, X86::VPXORQZrm, 0 },
+ { X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 },
+ { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 },
+ { X86::VSUBPDZrr, X86::VSUBPDZrm, 0 },
+ { X86::VSUBPSZrr, X86::VSUBPSZrm, 0 },
+ { X86::VSUBSDZrr, X86::VSUBSDZrm, 0 },
+ { X86::VSUBSDZrr_Int, X86::VSUBSDZrm_Int, TB_NO_REVERSE },
+ { X86::VSUBSSZrr, X86::VSUBSSZrm, 0 },
+ { X86::VSUBSSZrr_Int, X86::VSUBSSZrm_Int, TB_NO_REVERSE },
+ { X86::VUNPCKHPDZrr, X86::VUNPCKHPDZrm, 0 },
+ { X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrm, 0 },
+ { X86::VUNPCKLPDZrr, X86::VUNPCKLPDZrm, 0 },
+ { X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrm, 0 },
+ { X86::VXORPDZrr, X86::VXORPDZrm, 0 },
+ { X86::VXORPSZrr, X86::VXORPSZrm, 0 },
+
+ // AVX-512{F,VL} foldable instructions
+ { X86::VADDPDZ128rr, X86::VADDPDZ128rm, 0 },
+ { X86::VADDPDZ256rr, X86::VADDPDZ256rm, 0 },
+ { X86::VADDPSZ128rr, X86::VADDPSZ128rm, 0 },
+ { X86::VADDPSZ256rr, X86::VADDPSZ256rm, 0 },
+ { X86::VALIGNDZ128rri, X86::VALIGNDZ128rmi, 0 },
+ { X86::VALIGNDZ256rri, X86::VALIGNDZ256rmi, 0 },
+ { X86::VALIGNQZ128rri, X86::VALIGNQZ128rmi, 0 },
+ { X86::VALIGNQZ256rri, X86::VALIGNQZ256rmi, 0 },
+ { X86::VANDNPDZ128rr, X86::VANDNPDZ128rm, 0 },
+ { X86::VANDNPDZ256rr, X86::VANDNPDZ256rm, 0 },
+ { X86::VANDNPSZ128rr, X86::VANDNPSZ128rm, 0 },
+ { X86::VANDNPSZ256rr, X86::VANDNPSZ256rm, 0 },
+ { X86::VANDPDZ128rr, X86::VANDPDZ128rm, 0 },
+ { X86::VANDPDZ256rr, X86::VANDPDZ256rm, 0 },
+ { X86::VANDPSZ128rr, X86::VANDPSZ128rm, 0 },
+ { X86::VANDPSZ256rr, X86::VANDPSZ256rm, 0 },
+ { X86::VCMPPDZ128rri, X86::VCMPPDZ128rmi, 0 },
+ { X86::VCMPPDZ256rri, X86::VCMPPDZ256rmi, 0 },
+ { X86::VCMPPSZ128rri, X86::VCMPPSZ128rmi, 0 },
+ { X86::VCMPPSZ256rri, X86::VCMPPSZ256rmi, 0 },
+ { X86::VDIVPDZ128rr, X86::VDIVPDZ128rm, 0 },
+ { X86::VDIVPDZ256rr, X86::VDIVPDZ256rm, 0 },
+ { X86::VDIVPSZ128rr, X86::VDIVPSZ128rm, 0 },
+ { X86::VDIVPSZ256rr, X86::VDIVPSZ256rm, 0 },
+ { X86::VINSERTF32x4Z256rr,X86::VINSERTF32x4Z256rm, 0 },
+ { X86::VINSERTF64x2Z256rr,X86::VINSERTF64x2Z256rm, 0 },
+ { X86::VINSERTI32x4Z256rr,X86::VINSERTI32x4Z256rm, 0 },
+ { X86::VINSERTI64x2Z256rr,X86::VINSERTI64x2Z256rm, 0 },
+ { X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rm, 0 },
+ { X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rm, 0 },
+ { X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rm, 0 },
+ { X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rm, 0 },
+ { X86::VMAXPDZ128rr, X86::VMAXPDZ128rm, 0 },
+ { X86::VMAXPDZ256rr, X86::VMAXPDZ256rm, 0 },
+ { X86::VMAXPSZ128rr, X86::VMAXPSZ128rm, 0 },
+ { X86::VMAXPSZ256rr, X86::VMAXPSZ256rm, 0 },
+ { X86::VMINCPDZ128rr, X86::VMINCPDZ128rm, 0 },
+ { X86::VMINCPDZ256rr, X86::VMINCPDZ256rm, 0 },
+ { X86::VMINCPSZ128rr, X86::VMINCPSZ128rm, 0 },
+ { X86::VMINCPSZ256rr, X86::VMINCPSZ256rm, 0 },
+ { X86::VMINPDZ128rr, X86::VMINPDZ128rm, 0 },
+ { X86::VMINPDZ256rr, X86::VMINPDZ256rm, 0 },
+ { X86::VMINPSZ128rr, X86::VMINPSZ128rm, 0 },
+ { X86::VMINPSZ256rr, X86::VMINPSZ256rm, 0 },
+ { X86::VMULPDZ128rr, X86::VMULPDZ128rm, 0 },
+ { X86::VMULPDZ256rr, X86::VMULPDZ256rm, 0 },
+ { X86::VMULPSZ128rr, X86::VMULPSZ128rm, 0 },
+ { X86::VMULPSZ256rr, X86::VMULPSZ256rm, 0 },
+ { X86::VORPDZ128rr, X86::VORPDZ128rm, 0 },
+ { X86::VORPDZ256rr, X86::VORPDZ256rm, 0 },
+ { X86::VORPSZ128rr, X86::VORPSZ128rm, 0 },
+ { X86::VORPSZ256rr, X86::VORPSZ256rm, 0 },
+ { X86::VPACKSSDWZ256rr, X86::VPACKSSDWZ256rm, 0 },
+ { X86::VPACKSSDWZ128rr, X86::VPACKSSDWZ128rm, 0 },
+ { X86::VPACKSSWBZ256rr, X86::VPACKSSWBZ256rm, 0 },
+ { X86::VPACKSSWBZ128rr, X86::VPACKSSWBZ128rm, 0 },
+ { X86::VPACKUSDWZ256rr, X86::VPACKUSDWZ256rm, 0 },
+ { X86::VPACKUSDWZ128rr, X86::VPACKUSDWZ128rm, 0 },
+ { X86::VPACKUSWBZ256rr, X86::VPACKUSWBZ256rm, 0 },
+ { X86::VPACKUSWBZ128rr, X86::VPACKUSWBZ128rm, 0 },
+ { X86::VPADDBZ128rr, X86::VPADDBZ128rm, 0 },
+ { X86::VPADDBZ256rr, X86::VPADDBZ256rm, 0 },
+ { X86::VPADDDZ128rr, X86::VPADDDZ128rm, 0 },
+ { X86::VPADDDZ256rr, X86::VPADDDZ256rm, 0 },
+ { X86::VPADDQZ128rr, X86::VPADDQZ128rm, 0 },
+ { X86::VPADDQZ256rr, X86::VPADDQZ256rm, 0 },
+ { X86::VPADDSBZ128rr, X86::VPADDSBZ128rm, 0 },
+ { X86::VPADDSBZ256rr, X86::VPADDSBZ256rm, 0 },
+ { X86::VPADDSWZ128rr, X86::VPADDSWZ128rm, 0 },
+ { X86::VPADDSWZ256rr, X86::VPADDSWZ256rm, 0 },
+ { X86::VPADDUSBZ128rr, X86::VPADDUSBZ128rm, 0 },
+ { X86::VPADDUSBZ256rr, X86::VPADDUSBZ256rm, 0 },
+ { X86::VPADDUSWZ128rr, X86::VPADDUSWZ128rm, 0 },
+ { X86::VPADDUSWZ256rr, X86::VPADDUSWZ256rm, 0 },
+ { X86::VPADDWZ128rr, X86::VPADDWZ128rm, 0 },
+ { X86::VPADDWZ256rr, X86::VPADDWZ256rm, 0 },
+ { X86::VPALIGNRZ128rri, X86::VPALIGNRZ128rmi, 0 },
+ { X86::VPALIGNRZ256rri, X86::VPALIGNRZ256rmi, 0 },
+ { X86::VPANDDZ128rr, X86::VPANDDZ128rm, 0 },
+ { X86::VPANDDZ256rr, X86::VPANDDZ256rm, 0 },
+ { X86::VPANDNDZ128rr, X86::VPANDNDZ128rm, 0 },
+ { X86::VPANDNDZ256rr, X86::VPANDNDZ256rm, 0 },
+ { X86::VPANDNQZ128rr, X86::VPANDNQZ128rm, 0 },
+ { X86::VPANDNQZ256rr, X86::VPANDNQZ256rm, 0 },
+ { X86::VPANDQZ128rr, X86::VPANDQZ128rm, 0 },
+ { X86::VPANDQZ256rr, X86::VPANDQZ256rm, 0 },
+ { X86::VPAVGBZ128rr, X86::VPAVGBZ128rm, 0 },
+ { X86::VPAVGBZ256rr, X86::VPAVGBZ256rm, 0 },
+ { X86::VPAVGWZ128rr, X86::VPAVGWZ128rm, 0 },
+ { X86::VPAVGWZ256rr, X86::VPAVGWZ256rm, 0 },
+ { X86::VPCMPBZ128rri, X86::VPCMPBZ128rmi, 0 },
+ { X86::VPCMPBZ256rri, X86::VPCMPBZ256rmi, 0 },
+ { X86::VPCMPDZ128rri, X86::VPCMPDZ128rmi, 0 },
+ { X86::VPCMPDZ256rri, X86::VPCMPDZ256rmi, 0 },
+ { X86::VPCMPEQBZ128rr, X86::VPCMPEQBZ128rm, 0 },
+ { X86::VPCMPEQBZ256rr, X86::VPCMPEQBZ256rm, 0 },
+ { X86::VPCMPEQDZ128rr, X86::VPCMPEQDZ128rm, 0 },
+ { X86::VPCMPEQDZ256rr, X86::VPCMPEQDZ256rm, 0 },
+ { X86::VPCMPEQQZ128rr, X86::VPCMPEQQZ128rm, 0 },
+ { X86::VPCMPEQQZ256rr, X86::VPCMPEQQZ256rm, 0 },
+ { X86::VPCMPEQWZ128rr, X86::VPCMPEQWZ128rm, 0 },
+ { X86::VPCMPEQWZ256rr, X86::VPCMPEQWZ256rm, 0 },
+ { X86::VPCMPGTBZ128rr, X86::VPCMPGTBZ128rm, 0 },
+ { X86::VPCMPGTBZ256rr, X86::VPCMPGTBZ256rm, 0 },
+ { X86::VPCMPGTDZ128rr, X86::VPCMPGTDZ128rm, 0 },
+ { X86::VPCMPGTDZ256rr, X86::VPCMPGTDZ256rm, 0 },
+ { X86::VPCMPGTQZ128rr, X86::VPCMPGTQZ128rm, 0 },
+ { X86::VPCMPGTQZ256rr, X86::VPCMPGTQZ256rm, 0 },
+ { X86::VPCMPGTWZ128rr, X86::VPCMPGTWZ128rm, 0 },
+ { X86::VPCMPGTWZ256rr, X86::VPCMPGTWZ256rm, 0 },
+ { X86::VPCMPQZ128rri, X86::VPCMPQZ128rmi, 0 },
+ { X86::VPCMPQZ256rri, X86::VPCMPQZ256rmi, 0 },
+ { X86::VPCMPUBZ128rri, X86::VPCMPUBZ128rmi, 0 },
+ { X86::VPCMPUBZ256rri, X86::VPCMPUBZ256rmi, 0 },
+ { X86::VPCMPUDZ128rri, X86::VPCMPUDZ128rmi, 0 },
+ { X86::VPCMPUDZ256rri, X86::VPCMPUDZ256rmi, 0 },
+ { X86::VPCMPUQZ128rri, X86::VPCMPUQZ128rmi, 0 },
+ { X86::VPCMPUQZ256rri, X86::VPCMPUQZ256rmi, 0 },
+ { X86::VPCMPUWZ128rri, X86::VPCMPUWZ128rmi, 0 },
+ { X86::VPCMPUWZ256rri, X86::VPCMPUWZ256rmi, 0 },
+ { X86::VPCMPWZ128rri, X86::VPCMPWZ128rmi, 0 },
+ { X86::VPCMPWZ256rri, X86::VPCMPWZ256rmi, 0 },
+ { X86::VPERMBZ128rr, X86::VPERMBZ128rm, 0 },
+ { X86::VPERMBZ256rr, X86::VPERMBZ256rm, 0 },
+ { X86::VPERMDZ256rr, X86::VPERMDZ256rm, 0 },
+ { X86::VPERMILPDZ128rr, X86::VPERMILPDZ128rm, 0 },
+ { X86::VPERMILPDZ256rr, X86::VPERMILPDZ256rm, 0 },
+ { X86::VPERMILPSZ128rr, X86::VPERMILPSZ128rm, 0 },
+ { X86::VPERMILPSZ256rr, X86::VPERMILPSZ256rm, 0 },
+ { X86::VPERMPDZ256rr, X86::VPERMPDZ256rm, 0 },
+ { X86::VPERMPSZ256rr, X86::VPERMPSZ256rm, 0 },
+ { X86::VPERMQZ256rr, X86::VPERMQZ256rm, 0 },
+ { X86::VPERMWZ128rr, X86::VPERMWZ128rm, 0 },
+ { X86::VPERMWZ256rr, X86::VPERMWZ256rm, 0 },
+ { X86::VPMADDUBSWZ128rr, X86::VPMADDUBSWZ128rm, 0 },
+ { X86::VPMADDUBSWZ256rr, X86::VPMADDUBSWZ256rm, 0 },
+ { X86::VPMADDWDZ128rr, X86::VPMADDWDZ128rm, 0 },
+ { X86::VPMADDWDZ256rr, X86::VPMADDWDZ256rm, 0 },
+ { X86::VPMAXSBZ128rr, X86::VPMAXSBZ128rm, 0 },
+ { X86::VPMAXSBZ256rr, X86::VPMAXSBZ256rm, 0 },
+ { X86::VPMAXSDZ128rr, X86::VPMAXSDZ128rm, 0 },
+ { X86::VPMAXSDZ256rr, X86::VPMAXSDZ256rm, 0 },
+ { X86::VPMAXSQZ128rr, X86::VPMAXSQZ128rm, 0 },
+ { X86::VPMAXSQZ256rr, X86::VPMAXSQZ256rm, 0 },
+ { X86::VPMAXSWZ128rr, X86::VPMAXSWZ128rm, 0 },
+ { X86::VPMAXSWZ256rr, X86::VPMAXSWZ256rm, 0 },
+ { X86::VPMAXUBZ128rr, X86::VPMAXUBZ128rm, 0 },
+ { X86::VPMAXUBZ256rr, X86::VPMAXUBZ256rm, 0 },
+ { X86::VPMAXUDZ128rr, X86::VPMAXUDZ128rm, 0 },
+ { X86::VPMAXUDZ256rr, X86::VPMAXUDZ256rm, 0 },
+ { X86::VPMAXUQZ128rr, X86::VPMAXUQZ128rm, 0 },
+ { X86::VPMAXUQZ256rr, X86::VPMAXUQZ256rm, 0 },
+ { X86::VPMAXUWZ128rr, X86::VPMAXUWZ128rm, 0 },
+ { X86::VPMAXUWZ256rr, X86::VPMAXUWZ256rm, 0 },
+ { X86::VPMINSBZ128rr, X86::VPMINSBZ128rm, 0 },
+ { X86::VPMINSBZ256rr, X86::VPMINSBZ256rm, 0 },
+ { X86::VPMINSDZ128rr, X86::VPMINSDZ128rm, 0 },
+ { X86::VPMINSDZ256rr, X86::VPMINSDZ256rm, 0 },
+ { X86::VPMINSQZ128rr, X86::VPMINSQZ128rm, 0 },
+ { X86::VPMINSQZ256rr, X86::VPMINSQZ256rm, 0 },
+ { X86::VPMINSWZ128rr, X86::VPMINSWZ128rm, 0 },
+ { X86::VPMINSWZ256rr, X86::VPMINSWZ256rm, 0 },
+ { X86::VPMINUBZ128rr, X86::VPMINUBZ128rm, 0 },
+ { X86::VPMINUBZ256rr, X86::VPMINUBZ256rm, 0 },
+ { X86::VPMINUDZ128rr, X86::VPMINUDZ128rm, 0 },
+ { X86::VPMINUDZ256rr, X86::VPMINUDZ256rm, 0 },
+ { X86::VPMINUQZ128rr, X86::VPMINUQZ128rm, 0 },
+ { X86::VPMINUQZ256rr, X86::VPMINUQZ256rm, 0 },
+ { X86::VPMINUWZ128rr, X86::VPMINUWZ128rm, 0 },
+ { X86::VPMINUWZ256rr, X86::VPMINUWZ256rm, 0 },
+ { X86::VPMULDQZ128rr, X86::VPMULDQZ128rm, 0 },
+ { X86::VPMULDQZ256rr, X86::VPMULDQZ256rm, 0 },
+ { X86::VPMULLDZ128rr, X86::VPMULLDZ128rm, 0 },
+ { X86::VPMULLDZ256rr, X86::VPMULLDZ256rm, 0 },
+ { X86::VPMULLQZ128rr, X86::VPMULLQZ128rm, 0 },
+ { X86::VPMULLQZ256rr, X86::VPMULLQZ256rm, 0 },
+ { X86::VPMULLWZ128rr, X86::VPMULLWZ128rm, 0 },
+ { X86::VPMULLWZ256rr, X86::VPMULLWZ256rm, 0 },
+ { X86::VPMULUDQZ128rr, X86::VPMULUDQZ128rm, 0 },
+ { X86::VPMULUDQZ256rr, X86::VPMULUDQZ256rm, 0 },
+ { X86::VPORDZ128rr, X86::VPORDZ128rm, 0 },
+ { X86::VPORDZ256rr, X86::VPORDZ256rm, 0 },
+ { X86::VPORQZ128rr, X86::VPORQZ128rm, 0 },
+ { X86::VPORQZ256rr, X86::VPORQZ256rm, 0 },
+ { X86::VPSADBWZ128rr, X86::VPSADBWZ128rm, 0 },
+ { X86::VPSADBWZ256rr, X86::VPSADBWZ256rm, 0 },
+ { X86::VPSHUFBZ128rr, X86::VPSHUFBZ128rm, 0 },
+ { X86::VPSHUFBZ256rr, X86::VPSHUFBZ256rm, 0 },
+ { X86::VPSLLDZ128rr, X86::VPSLLDZ128rm, 0 },
+ { X86::VPSLLDZ256rr, X86::VPSLLDZ256rm, 0 },
+ { X86::VPSLLQZ128rr, X86::VPSLLQZ128rm, 0 },
+ { X86::VPSLLQZ256rr, X86::VPSLLQZ256rm, 0 },
+ { X86::VPSLLVDZ128rr, X86::VPSLLVDZ128rm, 0 },
+ { X86::VPSLLVDZ256rr, X86::VPSLLVDZ256rm, 0 },
+ { X86::VPSLLVQZ128rr, X86::VPSLLVQZ128rm, 0 },
+ { X86::VPSLLVQZ256rr, X86::VPSLLVQZ256rm, 0 },
+ { X86::VPSLLVWZ128rr, X86::VPSLLVWZ128rm, 0 },
+ { X86::VPSLLVWZ256rr, X86::VPSLLVWZ256rm, 0 },
+ { X86::VPSLLWZ128rr, X86::VPSLLWZ128rm, 0 },
+ { X86::VPSLLWZ256rr, X86::VPSLLWZ256rm, 0 },
+ { X86::VPSRADZ128rr, X86::VPSRADZ128rm, 0 },
+ { X86::VPSRADZ256rr, X86::VPSRADZ256rm, 0 },
+ { X86::VPSRAQZ128rr, X86::VPSRAQZ128rm, 0 },
+ { X86::VPSRAQZ256rr, X86::VPSRAQZ256rm, 0 },
+ { X86::VPSRAVDZ128rr, X86::VPSRAVDZ128rm, 0 },
+ { X86::VPSRAVDZ256rr, X86::VPSRAVDZ256rm, 0 },
+ { X86::VPSRAVQZ128rr, X86::VPSRAVQZ128rm, 0 },
+ { X86::VPSRAVQZ256rr, X86::VPSRAVQZ256rm, 0 },
+ { X86::VPSRAVWZ128rr, X86::VPSRAVWZ128rm, 0 },
+ { X86::VPSRAVWZ256rr, X86::VPSRAVWZ256rm, 0 },
+ { X86::VPSRAWZ128rr, X86::VPSRAWZ128rm, 0 },
+ { X86::VPSRAWZ256rr, X86::VPSRAWZ256rm, 0 },
+ { X86::VPSRLDZ128rr, X86::VPSRLDZ128rm, 0 },
+ { X86::VPSRLDZ256rr, X86::VPSRLDZ256rm, 0 },
+ { X86::VPSRLQZ128rr, X86::VPSRLQZ128rm, 0 },
+ { X86::VPSRLQZ256rr, X86::VPSRLQZ256rm, 0 },
+ { X86::VPSRLVDZ128rr, X86::VPSRLVDZ128rm, 0 },
+ { X86::VPSRLVDZ256rr, X86::VPSRLVDZ256rm, 0 },
+ { X86::VPSRLVQZ128rr, X86::VPSRLVQZ128rm, 0 },
+ { X86::VPSRLVQZ256rr, X86::VPSRLVQZ256rm, 0 },
+ { X86::VPSRLVWZ128rr, X86::VPSRLVWZ128rm, 0 },
+ { X86::VPSRLVWZ256rr, X86::VPSRLVWZ256rm, 0 },
+ { X86::VPSRLWZ128rr, X86::VPSRLWZ128rm, 0 },
+ { X86::VPSRLWZ256rr, X86::VPSRLWZ256rm, 0 },
+ { X86::VPSUBBZ128rr, X86::VPSUBBZ128rm, 0 },
+ { X86::VPSUBBZ256rr, X86::VPSUBBZ256rm, 0 },
+ { X86::VPSUBDZ128rr, X86::VPSUBDZ128rm, 0 },
+ { X86::VPSUBDZ256rr, X86::VPSUBDZ256rm, 0 },
+ { X86::VPSUBQZ128rr, X86::VPSUBQZ128rm, 0 },
+ { X86::VPSUBQZ256rr, X86::VPSUBQZ256rm, 0 },
+ { X86::VPSUBSBZ128rr, X86::VPSUBSBZ128rm, 0 },
+ { X86::VPSUBSBZ256rr, X86::VPSUBSBZ256rm, 0 },
+ { X86::VPSUBSWZ128rr, X86::VPSUBSWZ128rm, 0 },
+ { X86::VPSUBSWZ256rr, X86::VPSUBSWZ256rm, 0 },
+ { X86::VPSUBUSBZ128rr, X86::VPSUBUSBZ128rm, 0 },
+ { X86::VPSUBUSBZ256rr, X86::VPSUBUSBZ256rm, 0 },
+ { X86::VPSUBUSWZ128rr, X86::VPSUBUSWZ128rm, 0 },
+ { X86::VPSUBUSWZ256rr, X86::VPSUBUSWZ256rm, 0 },
+ { X86::VPSUBWZ128rr, X86::VPSUBWZ128rm, 0 },
+ { X86::VPSUBWZ256rr, X86::VPSUBWZ256rm, 0 },
+ { X86::VPUNPCKHBWZ128rr, X86::VPUNPCKHBWZ128rm, 0 },
+ { X86::VPUNPCKHBWZ256rr, X86::VPUNPCKHBWZ256rm, 0 },
+ { X86::VPUNPCKHDQZ128rr, X86::VPUNPCKHDQZ128rm, 0 },
+ { X86::VPUNPCKHDQZ256rr, X86::VPUNPCKHDQZ256rm, 0 },
+ { X86::VPUNPCKHQDQZ128rr, X86::VPUNPCKHQDQZ128rm, 0 },
+ { X86::VPUNPCKHQDQZ256rr, X86::VPUNPCKHQDQZ256rm, 0 },
+ { X86::VPUNPCKHWDZ128rr, X86::VPUNPCKHWDZ128rm, 0 },
+ { X86::VPUNPCKHWDZ256rr, X86::VPUNPCKHWDZ256rm, 0 },
+ { X86::VPUNPCKLBWZ128rr, X86::VPUNPCKLBWZ128rm, 0 },
+ { X86::VPUNPCKLBWZ256rr, X86::VPUNPCKLBWZ256rm, 0 },
+ { X86::VPUNPCKLDQZ128rr, X86::VPUNPCKLDQZ128rm, 0 },
+ { X86::VPUNPCKLDQZ256rr, X86::VPUNPCKLDQZ256rm, 0 },
+ { X86::VPUNPCKLQDQZ128rr, X86::VPUNPCKLQDQZ128rm, 0 },
+ { X86::VPUNPCKLQDQZ256rr, X86::VPUNPCKLQDQZ256rm, 0 },
+ { X86::VPUNPCKLWDZ128rr, X86::VPUNPCKLWDZ128rm, 0 },
+ { X86::VPUNPCKLWDZ256rr, X86::VPUNPCKLWDZ256rm, 0 },
+ { X86::VPXORDZ128rr, X86::VPXORDZ128rm, 0 },
+ { X86::VPXORDZ256rr, X86::VPXORDZ256rm, 0 },
+ { X86::VPXORQZ128rr, X86::VPXORQZ128rm, 0 },
+ { X86::VPXORQZ256rr, X86::VPXORQZ256rm, 0 },
+ { X86::VSHUFPDZ128rri, X86::VSHUFPDZ128rmi, 0 },
+ { X86::VSHUFPDZ256rri, X86::VSHUFPDZ256rmi, 0 },
+ { X86::VSHUFPSZ128rri, X86::VSHUFPSZ128rmi, 0 },
+ { X86::VSHUFPSZ256rri, X86::VSHUFPSZ256rmi, 0 },
+ { X86::VSUBPDZ128rr, X86::VSUBPDZ128rm, 0 },
+ { X86::VSUBPDZ256rr, X86::VSUBPDZ256rm, 0 },
+ { X86::VSUBPSZ128rr, X86::VSUBPSZ128rm, 0 },
+ { X86::VSUBPSZ256rr, X86::VSUBPSZ256rm, 0 },
+ { X86::VUNPCKHPDZ128rr, X86::VUNPCKHPDZ128rm, 0 },
+ { X86::VUNPCKHPDZ256rr, X86::VUNPCKHPDZ256rm, 0 },
+ { X86::VUNPCKHPSZ128rr, X86::VUNPCKHPSZ128rm, 0 },
+ { X86::VUNPCKHPSZ256rr, X86::VUNPCKHPSZ256rm, 0 },
+ { X86::VUNPCKLPDZ128rr, X86::VUNPCKLPDZ128rm, 0 },
+ { X86::VUNPCKLPDZ256rr, X86::VUNPCKLPDZ256rm, 0 },
+ { X86::VUNPCKLPSZ128rr, X86::VUNPCKLPSZ128rm, 0 },
+ { X86::VUNPCKLPSZ256rr, X86::VUNPCKLPSZ256rm, 0 },
+ { X86::VXORPDZ128rr, X86::VXORPDZ128rm, 0 },
+ { X86::VXORPDZ256rr, X86::VXORPDZ256rm, 0 },
+ { X86::VXORPSZ128rr, X86::VXORPSZ128rm, 0 },
+ { X86::VXORPSZ256rr, X86::VXORPSZ256rm, 0 },
+
+ // AVX-512 masked foldable instructions
+ { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE },
+ { X86::VPABSBZrrkz, X86::VPABSBZrmkz, 0 },
+ { X86::VPABSDZrrkz, X86::VPABSDZrmkz, 0 },
+ { X86::VPABSQZrrkz, X86::VPABSQZrmkz, 0 },
+ { X86::VPABSWZrrkz, X86::VPABSWZrmkz, 0 },
+ { X86::VPERMILPDZrikz, X86::VPERMILPDZmikz, 0 },
+ { X86::VPERMILPSZrikz, X86::VPERMILPSZmikz, 0 },
+ { X86::VPERMPDZrikz, X86::VPERMPDZmikz, 0 },
+ { X86::VPERMQZrikz, X86::VPERMQZmikz, 0 },
+ { X86::VPMOVSXBDZrrkz, X86::VPMOVSXBDZrmkz, 0 },
+ { X86::VPMOVSXBQZrrkz, X86::VPMOVSXBQZrmkz, TB_NO_REVERSE },
+ { X86::VPMOVSXBWZrrkz, X86::VPMOVSXBWZrmkz, 0 },
+ { X86::VPMOVSXDQZrrkz, X86::VPMOVSXDQZrmkz, 0 },
+ { X86::VPMOVSXWDZrrkz, X86::VPMOVSXWDZrmkz, 0 },
+ { X86::VPMOVSXWQZrrkz, X86::VPMOVSXWQZrmkz, 0 },
+ { X86::VPMOVZXBDZrrkz, X86::VPMOVZXBDZrmkz, 0 },
+ { X86::VPMOVZXBQZrrkz, X86::VPMOVZXBQZrmkz, TB_NO_REVERSE },
+ { X86::VPMOVZXBWZrrkz, X86::VPMOVZXBWZrmkz, 0 },
+ { X86::VPMOVZXDQZrrkz, X86::VPMOVZXDQZrmkz, 0 },
+ { X86::VPMOVZXWDZrrkz, X86::VPMOVZXWDZrmkz, 0 },
+ { X86::VPMOVZXWQZrrkz, X86::VPMOVZXWQZrmkz, 0 },
+ { X86::VPOPCNTDZrrkz, X86::VPOPCNTDZrmkz, 0 },
+ { X86::VPOPCNTQZrrkz, X86::VPOPCNTQZrmkz, 0 },
+ { X86::VPSHUFDZrikz, X86::VPSHUFDZmikz, 0 },
+ { X86::VPSHUFHWZrikz, X86::VPSHUFHWZmikz, 0 },
+ { X86::VPSHUFLWZrikz, X86::VPSHUFLWZmikz, 0 },
+ { X86::VPSLLDZrikz, X86::VPSLLDZmikz, 0 },
+ { X86::VPSLLQZrikz, X86::VPSLLQZmikz, 0 },
+ { X86::VPSLLWZrikz, X86::VPSLLWZmikz, 0 },
+ { X86::VPSRADZrikz, X86::VPSRADZmikz, 0 },
+ { X86::VPSRAQZrikz, X86::VPSRAQZmikz, 0 },
+ { X86::VPSRAWZrikz, X86::VPSRAWZmikz, 0 },
+ { X86::VPSRLDZrikz, X86::VPSRLDZmikz, 0 },
+ { X86::VPSRLQZrikz, X86::VPSRLQZmikz, 0 },
+ { X86::VPSRLWZrikz, X86::VPSRLWZmikz, 0 },
+
+ // AVX-512VL 256-bit masked foldable instructions
+ { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE },
+ { X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE },
+ { X86::VPABSBZ256rrkz, X86::VPABSBZ256rmkz, 0 },
+ { X86::VPABSDZ256rrkz, X86::VPABSDZ256rmkz, 0 },
+ { X86::VPABSQZ256rrkz, X86::VPABSQZ256rmkz, 0 },
+ { X86::VPABSWZ256rrkz, X86::VPABSWZ256rmkz, 0 },
+ { X86::VPERMILPDZ256rikz, X86::VPERMILPDZ256mikz, 0 },
+ { X86::VPERMILPSZ256rikz, X86::VPERMILPSZ256mikz, 0 },
+ { X86::VPERMPDZ256rikz, X86::VPERMPDZ256mikz, 0 },
+ { X86::VPERMQZ256rikz, X86::VPERMQZ256mikz, 0 },
+ { X86::VPMOVSXBDZ256rrkz, X86::VPMOVSXBDZ256rmkz, TB_NO_REVERSE },
+ { X86::VPMOVSXBQZ256rrkz, X86::VPMOVSXBQZ256rmkz, TB_NO_REVERSE },
+ { X86::VPMOVSXBWZ256rrkz, X86::VPMOVSXBWZ256rmkz, 0 },
+ { X86::VPMOVSXDQZ256rrkz, X86::VPMOVSXDQZ256rmkz, 0 },
+ { X86::VPMOVSXWDZ256rrkz, X86::VPMOVSXWDZ256rmkz, 0 },
+ { X86::VPMOVSXWQZ256rrkz, X86::VPMOVSXWQZ256rmkz, TB_NO_REVERSE },
+ { X86::VPMOVZXBDZ256rrkz, X86::VPMOVZXBDZ256rmkz, TB_NO_REVERSE },
+ { X86::VPMOVZXBQZ256rrkz, X86::VPMOVZXBQZ256rmkz, TB_NO_REVERSE },
+ { X86::VPMOVZXBWZ256rrkz, X86::VPMOVZXBWZ256rmkz, 0 },
+ { X86::VPMOVZXDQZ256rrkz, X86::VPMOVZXDQZ256rmkz, 0 },
+ { X86::VPMOVZXWDZ256rrkz, X86::VPMOVZXWDZ256rmkz, 0 },
+ { X86::VPMOVZXWQZ256rrkz, X86::VPMOVZXWQZ256rmkz, TB_NO_REVERSE },
+ { X86::VPSHUFDZ256rikz, X86::VPSHUFDZ256mikz, 0 },
+ { X86::VPSHUFHWZ256rikz, X86::VPSHUFHWZ256mikz, 0 },
+ { X86::VPSHUFLWZ256rikz, X86::VPSHUFLWZ256mikz, 0 },
+ { X86::VPSLLDZ256rikz, X86::VPSLLDZ256mikz, 0 },
+ { X86::VPSLLQZ256rikz, X86::VPSLLQZ256mikz, 0 },
+ { X86::VPSLLWZ256rikz, X86::VPSLLWZ256mikz, 0 },
+ { X86::VPSRADZ256rikz, X86::VPSRADZ256mikz, 0 },
+ { X86::VPSRAQZ256rikz, X86::VPSRAQZ256mikz, 0 },
+ { X86::VPSRAWZ256rikz, X86::VPSRAWZ256mikz, 0 },
+ { X86::VPSRLDZ256rikz, X86::VPSRLDZ256mikz, 0 },
+ { X86::VPSRLQZ256rikz, X86::VPSRLQZ256mikz, 0 },
+ { X86::VPSRLWZ256rikz, X86::VPSRLWZ256mikz, 0 },
+
+ // AVX-512VL 128-bit masked foldable instructions
+ { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE },
+ { X86::VPABSBZ128rrkz, X86::VPABSBZ128rmkz, 0 },
+ { X86::VPABSDZ128rrkz, X86::VPABSDZ128rmkz, 0 },
+ { X86::VPABSQZ128rrkz, X86::VPABSQZ128rmkz, 0 },
+ { X86::VPABSWZ128rrkz, X86::VPABSWZ128rmkz, 0 },
+ { X86::VPERMILPDZ128rikz, X86::VPERMILPDZ128mikz, 0 },
+ { X86::VPERMILPSZ128rikz, X86::VPERMILPSZ128mikz, 0 },
+ { X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz, TB_NO_REVERSE },
+ { X86::VPMOVSXBQZ128rrkz, X86::VPMOVSXBQZ128rmkz, TB_NO_REVERSE },
+ { X86::VPMOVSXBWZ128rrkz, X86::VPMOVSXBWZ128rmkz, TB_NO_REVERSE },
+ { X86::VPMOVSXDQZ128rrkz, X86::VPMOVSXDQZ128rmkz, TB_NO_REVERSE },
+ { X86::VPMOVSXWDZ128rrkz, X86::VPMOVSXWDZ128rmkz, TB_NO_REVERSE },
+ { X86::VPMOVSXWQZ128rrkz, X86::VPMOVSXWQZ128rmkz, TB_NO_REVERSE },
+ { X86::VPMOVZXBDZ128rrkz, X86::VPMOVZXBDZ128rmkz, TB_NO_REVERSE },
+ { X86::VPMOVZXBQZ128rrkz, X86::VPMOVZXBQZ128rmkz, TB_NO_REVERSE },
+ { X86::VPMOVZXBWZ128rrkz, X86::VPMOVZXBWZ128rmkz, TB_NO_REVERSE },
+ { X86::VPMOVZXDQZ128rrkz, X86::VPMOVZXDQZ128rmkz, TB_NO_REVERSE },
+ { X86::VPMOVZXWDZ128rrkz, X86::VPMOVZXWDZ128rmkz, TB_NO_REVERSE },
+ { X86::VPMOVZXWQZ128rrkz, X86::VPMOVZXWQZ128rmkz, TB_NO_REVERSE },
+ { X86::VPSHUFDZ128rikz, X86::VPSHUFDZ128mikz, 0 },
+ { X86::VPSHUFHWZ128rikz, X86::VPSHUFHWZ128mikz, 0 },
+ { X86::VPSHUFLWZ128rikz, X86::VPSHUFLWZ128mikz, 0 },
+ { X86::VPSLLDZ128rikz, X86::VPSLLDZ128mikz, 0 },
+ { X86::VPSLLQZ128rikz, X86::VPSLLQZ128mikz, 0 },
+ { X86::VPSLLWZ128rikz, X86::VPSLLWZ128mikz, 0 },
+ { X86::VPSRADZ128rikz, X86::VPSRADZ128mikz, 0 },
+ { X86::VPSRAQZ128rikz, X86::VPSRAQZ128mikz, 0 },
+ { X86::VPSRAWZ128rikz, X86::VPSRAWZ128mikz, 0 },
+ { X86::VPSRLDZ128rikz, X86::VPSRLDZ128mikz, 0 },
+ { X86::VPSRLQZ128rikz, X86::VPSRLQZ128mikz, 0 },
+ { X86::VPSRLWZ128rikz, X86::VPSRLWZ128mikz, 0 },
+
+ // AES foldable instructions
+ { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 },
+ { X86::AESDECrr, X86::AESDECrm, TB_ALIGN_16 },
+ { X86::AESENCLASTrr, X86::AESENCLASTrm, TB_ALIGN_16 },
+ { X86::AESENCrr, X86::AESENCrm, TB_ALIGN_16 },
+ { X86::VAESDECLASTrr, X86::VAESDECLASTrm, 0 },
+ { X86::VAESDECrr, X86::VAESDECrm, 0 },
+ { X86::VAESENCLASTrr, X86::VAESENCLASTrm, 0 },
+ { X86::VAESENCrr, X86::VAESENCrm, 0 },
+
+ // SHA foldable instructions
+ { X86::SHA1MSG1rr, X86::SHA1MSG1rm, TB_ALIGN_16 },
+ { X86::SHA1MSG2rr, X86::SHA1MSG2rm, TB_ALIGN_16 },
+ { X86::SHA1NEXTErr, X86::SHA1NEXTErm, TB_ALIGN_16 },
+ { X86::SHA1RNDS4rri, X86::SHA1RNDS4rmi, TB_ALIGN_16 },
+ { X86::SHA256MSG1rr, X86::SHA256MSG1rm, TB_ALIGN_16 },
+ { X86::SHA256MSG2rr, X86::SHA256MSG2rm, TB_ALIGN_16 },
+ { X86::SHA256RNDS2rr, X86::SHA256RNDS2rm, TB_ALIGN_16 }
+ };
+
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2) {
AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
Entry.RegOp, Entry.MemOp,
@@ -150,12 +2439,1105 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
Entry.Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
}
+ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
+ // FMA4 foldable patterns
+ { X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_NONE },
+ { X86::VFMADDSS4rr_Int, X86::VFMADDSS4rm_Int, TB_NO_REVERSE },
+ { X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_NONE },
+ { X86::VFMADDSD4rr_Int, X86::VFMADDSD4rm_Int, TB_NO_REVERSE },
+ { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_NONE },
+ { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_NONE },
+ { X86::VFMADDPS4Yrr, X86::VFMADDPS4Yrm, TB_ALIGN_NONE },
+ { X86::VFMADDPD4Yrr, X86::VFMADDPD4Yrm, TB_ALIGN_NONE },
+ { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, TB_ALIGN_NONE },
+ { X86::VFNMADDSS4rr_Int, X86::VFNMADDSS4rm_Int, TB_NO_REVERSE },
+ { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, TB_ALIGN_NONE },
+ { X86::VFNMADDSD4rr_Int, X86::VFNMADDSD4rm_Int, TB_NO_REVERSE },
+ { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_NONE },
+ { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_NONE },
+ { X86::VFNMADDPS4Yrr, X86::VFNMADDPS4Yrm, TB_ALIGN_NONE },
+ { X86::VFNMADDPD4Yrr, X86::VFNMADDPD4Yrm, TB_ALIGN_NONE },
+ { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, TB_ALIGN_NONE },
+ { X86::VFMSUBSS4rr_Int, X86::VFMSUBSS4rm_Int, TB_NO_REVERSE },
+ { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, TB_ALIGN_NONE },
+ { X86::VFMSUBSD4rr_Int, X86::VFMSUBSD4rm_Int, TB_NO_REVERSE },
+ { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_NONE },
+ { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_NONE },
+ { X86::VFMSUBPS4Yrr, X86::VFMSUBPS4Yrm, TB_ALIGN_NONE },
+ { X86::VFMSUBPD4Yrr, X86::VFMSUBPD4Yrm, TB_ALIGN_NONE },
+ { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, TB_ALIGN_NONE },
+ { X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4rm_Int, TB_NO_REVERSE },
+ { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, TB_ALIGN_NONE },
+ { X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4rm_Int, TB_NO_REVERSE },
+ { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_NONE },
+ { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_NONE },
+ { X86::VFNMSUBPS4Yrr, X86::VFNMSUBPS4Yrm, TB_ALIGN_NONE },
+ { X86::VFNMSUBPD4Yrr, X86::VFNMSUBPD4Yrm, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPS4Yrr, X86::VFMADDSUBPS4Yrm, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPD4Yrr, X86::VFMADDSUBPD4Yrm, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPS4Yrr, X86::VFMSUBADDPS4Yrm, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPD4Yrr, X86::VFMSUBADDPD4Yrm, TB_ALIGN_NONE },
+
+ // XOP foldable instructions
+ { X86::VPCMOVrrr, X86::VPCMOVrrm, 0 },
+ { X86::VPCMOVYrrr, X86::VPCMOVYrrm, 0 },
+ { X86::VPERMIL2PDrr, X86::VPERMIL2PDrm, 0 },
+ { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYrm, 0 },
+ { X86::VPERMIL2PSrr, X86::VPERMIL2PSrm, 0 },
+ { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYrm, 0 },
+ { X86::VPPERMrrr, X86::VPPERMrrm, 0 },
+
+ // AVX-512 instructions with 3 source operands.
+ { X86::VPERMI2Brr, X86::VPERMI2Brm, 0 },
+ { X86::VPERMI2Drr, X86::VPERMI2Drm, 0 },
+ { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 },
+ { X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 },
+ { X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 },
+ { X86::VPERMI2Wrr, X86::VPERMI2Wrm, 0 },
+ { X86::VPERMT2Brr, X86::VPERMT2Brm, 0 },
+ { X86::VPERMT2Drr, X86::VPERMT2Drm, 0 },
+ { X86::VPERMT2PSrr, X86::VPERMT2PSrm, 0 },
+ { X86::VPERMT2PDrr, X86::VPERMT2PDrm, 0 },
+ { X86::VPERMT2Qrr, X86::VPERMT2Qrm, 0 },
+ { X86::VPERMT2Wrr, X86::VPERMT2Wrm, 0 },
+ { X86::VPTERNLOGDZrri, X86::VPTERNLOGDZrmi, 0 },
+ { X86::VPTERNLOGQZrri, X86::VPTERNLOGQZrmi, 0 },
+
+ // AVX-512VL 256-bit instructions with 3 source operands.
+ { X86::VPERMI2B256rr, X86::VPERMI2B256rm, 0 },
+ { X86::VPERMI2D256rr, X86::VPERMI2D256rm, 0 },
+ { X86::VPERMI2PD256rr, X86::VPERMI2PD256rm, 0 },
+ { X86::VPERMI2PS256rr, X86::VPERMI2PS256rm, 0 },
+ { X86::VPERMI2Q256rr, X86::VPERMI2Q256rm, 0 },
+ { X86::VPERMI2W256rr, X86::VPERMI2W256rm, 0 },
+ { X86::VPERMT2B256rr, X86::VPERMT2B256rm, 0 },
+ { X86::VPERMT2D256rr, X86::VPERMT2D256rm, 0 },
+ { X86::VPERMT2PD256rr, X86::VPERMT2PD256rm, 0 },
+ { X86::VPERMT2PS256rr, X86::VPERMT2PS256rm, 0 },
+ { X86::VPERMT2Q256rr, X86::VPERMT2Q256rm, 0 },
+ { X86::VPERMT2W256rr, X86::VPERMT2W256rm, 0 },
+ { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGDZ256rmi, 0 },
+ { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGQZ256rmi, 0 },
+
+ // AVX-512VL 128-bit instructions with 3 source operands.
+ { X86::VPERMI2B128rr, X86::VPERMI2B128rm, 0 },
+ { X86::VPERMI2D128rr, X86::VPERMI2D128rm, 0 },
+ { X86::VPERMI2PD128rr, X86::VPERMI2PD128rm, 0 },
+ { X86::VPERMI2PS128rr, X86::VPERMI2PS128rm, 0 },
+ { X86::VPERMI2Q128rr, X86::VPERMI2Q128rm, 0 },
+ { X86::VPERMI2W128rr, X86::VPERMI2W128rm, 0 },
+ { X86::VPERMT2B128rr, X86::VPERMT2B128rm, 0 },
+ { X86::VPERMT2D128rr, X86::VPERMT2D128rm, 0 },
+ { X86::VPERMT2PD128rr, X86::VPERMT2PD128rm, 0 },
+ { X86::VPERMT2PS128rr, X86::VPERMT2PS128rm, 0 },
+ { X86::VPERMT2Q128rr, X86::VPERMT2Q128rm, 0 },
+ { X86::VPERMT2W128rr, X86::VPERMT2W128rm, 0 },
+ { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGDZ128rmi, 0 },
+ { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGQZ128rmi, 0 },
+
+ // AVX-512 masked instructions
+ { X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 },
+ { X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 },
+ { X86::VADDSDZrr_Intkz, X86::VADDSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VADDSSZrr_Intkz, X86::VADDSSZrm_Intkz, TB_NO_REVERSE },
+ { X86::VALIGNDZrrikz, X86::VALIGNDZrmikz, 0 },
+ { X86::VALIGNQZrrikz, X86::VALIGNQZrmikz, 0 },
+ { X86::VANDNPDZrrkz, X86::VANDNPDZrmkz, 0 },
+ { X86::VANDNPSZrrkz, X86::VANDNPSZrmkz, 0 },
+ { X86::VANDPDZrrkz, X86::VANDPDZrmkz, 0 },
+ { X86::VANDPSZrrkz, X86::VANDPSZrmkz, 0 },
+ { X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 },
+ { X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 },
+ { X86::VDIVSDZrr_Intkz, X86::VDIVSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VDIVSSZrr_Intkz, X86::VDIVSSZrm_Intkz, TB_NO_REVERSE },
+ { X86::VINSERTF32x4Zrrkz, X86::VINSERTF32x4Zrmkz, 0 },
+ { X86::VINSERTF32x8Zrrkz, X86::VINSERTF32x8Zrmkz, 0 },
+ { X86::VINSERTF64x2Zrrkz, X86::VINSERTF64x2Zrmkz, 0 },
+ { X86::VINSERTF64x4Zrrkz, X86::VINSERTF64x4Zrmkz, 0 },
+ { X86::VINSERTI32x4Zrrkz, X86::VINSERTI32x4Zrmkz, 0 },
+ { X86::VINSERTI32x8Zrrkz, X86::VINSERTI32x8Zrmkz, 0 },
+ { X86::VINSERTI64x2Zrrkz, X86::VINSERTI64x2Zrmkz, 0 },
+ { X86::VINSERTI64x4Zrrkz, X86::VINSERTI64x4Zrmkz, 0 },
+ { X86::VMAXCPDZrrkz, X86::VMAXCPDZrmkz, 0 },
+ { X86::VMAXCPSZrrkz, X86::VMAXCPSZrmkz, 0 },
+ { X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 },
+ { X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0 },
+ { X86::VMAXSDZrr_Intkz, X86::VMAXSDZrm_Intkz, 0 },
+ { X86::VMAXSSZrr_Intkz, X86::VMAXSSZrm_Intkz, 0 },
+ { X86::VMINCPDZrrkz, X86::VMINCPDZrmkz, 0 },
+ { X86::VMINCPSZrrkz, X86::VMINCPSZrmkz, 0 },
+ { X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0 },
+ { X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 },
+ { X86::VMINSDZrr_Intkz, X86::VMINSDZrm_Intkz, 0 },
+ { X86::VMINSSZrr_Intkz, X86::VMINSSZrm_Intkz, 0 },
+ { X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 },
+ { X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 },
+ { X86::VMULSDZrr_Intkz, X86::VMULSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VMULSSZrr_Intkz, X86::VMULSSZrm_Intkz, TB_NO_REVERSE },
+ { X86::VORPDZrrkz, X86::VORPDZrmkz, 0 },
+ { X86::VORPSZrrkz, X86::VORPSZrmkz, 0 },
+ { X86::VPACKSSDWZrrkz, X86::VPACKSSDWZrmkz, 0 },
+ { X86::VPACKSSWBZrrkz, X86::VPACKSSWBZrmkz, 0 },
+ { X86::VPACKUSDWZrrkz, X86::VPACKUSDWZrmkz, 0 },
+ { X86::VPACKUSWBZrrkz, X86::VPACKUSWBZrmkz, 0 },
+ { X86::VPADDBZrrkz, X86::VPADDBZrmkz, 0 },
+ { X86::VPADDDZrrkz, X86::VPADDDZrmkz, 0 },
+ { X86::VPADDQZrrkz, X86::VPADDQZrmkz, 0 },
+ { X86::VPADDSBZrrkz, X86::VPADDSBZrmkz, 0 },
+ { X86::VPADDSWZrrkz, X86::VPADDSWZrmkz, 0 },
+ { X86::VPADDUSBZrrkz, X86::VPADDUSBZrmkz, 0 },
+ { X86::VPADDUSWZrrkz, X86::VPADDUSWZrmkz, 0 },
+ { X86::VPADDWZrrkz, X86::VPADDWZrmkz, 0 },
+ { X86::VPALIGNRZrrikz, X86::VPALIGNRZrmikz, 0 },
+ { X86::VPANDDZrrkz, X86::VPANDDZrmkz, 0 },
+ { X86::VPANDNDZrrkz, X86::VPANDNDZrmkz, 0 },
+ { X86::VPANDNQZrrkz, X86::VPANDNQZrmkz, 0 },
+ { X86::VPANDQZrrkz, X86::VPANDQZrmkz, 0 },
+ { X86::VPAVGBZrrkz, X86::VPAVGBZrmkz, 0 },
+ { X86::VPAVGWZrrkz, X86::VPAVGWZrmkz, 0 },
+ { X86::VPERMBZrrkz, X86::VPERMBZrmkz, 0 },
+ { X86::VPERMDZrrkz, X86::VPERMDZrmkz, 0 },
+ { X86::VPERMILPDZrrkz, X86::VPERMILPDZrmkz, 0 },
+ { X86::VPERMILPSZrrkz, X86::VPERMILPSZrmkz, 0 },
+ { X86::VPERMPDZrrkz, X86::VPERMPDZrmkz, 0 },
+ { X86::VPERMPSZrrkz, X86::VPERMPSZrmkz, 0 },
+ { X86::VPERMQZrrkz, X86::VPERMQZrmkz, 0 },
+ { X86::VPERMWZrrkz, X86::VPERMWZrmkz, 0 },
+ { X86::VPMADDUBSWZrrkz, X86::VPMADDUBSWZrmkz, 0 },
+ { X86::VPMADDWDZrrkz, X86::VPMADDWDZrmkz, 0 },
+ { X86::VPMAXSBZrrkz, X86::VPMAXSBZrmkz, 0 },
+ { X86::VPMAXSDZrrkz, X86::VPMAXSDZrmkz, 0 },
+ { X86::VPMAXSQZrrkz, X86::VPMAXSQZrmkz, 0 },
+ { X86::VPMAXSWZrrkz, X86::VPMAXSWZrmkz, 0 },
+ { X86::VPMAXUBZrrkz, X86::VPMAXUBZrmkz, 0 },
+ { X86::VPMAXUDZrrkz, X86::VPMAXUDZrmkz, 0 },
+ { X86::VPMAXUQZrrkz, X86::VPMAXUQZrmkz, 0 },
+ { X86::VPMAXUWZrrkz, X86::VPMAXUWZrmkz, 0 },
+ { X86::VPMINSBZrrkz, X86::VPMINSBZrmkz, 0 },
+ { X86::VPMINSDZrrkz, X86::VPMINSDZrmkz, 0 },
+ { X86::VPMINSQZrrkz, X86::VPMINSQZrmkz, 0 },
+ { X86::VPMINSWZrrkz, X86::VPMINSWZrmkz, 0 },
+ { X86::VPMINUBZrrkz, X86::VPMINUBZrmkz, 0 },
+ { X86::VPMINUDZrrkz, X86::VPMINUDZrmkz, 0 },
+ { X86::VPMINUQZrrkz, X86::VPMINUQZrmkz, 0 },
+ { X86::VPMINUWZrrkz, X86::VPMINUWZrmkz, 0 },
+ { X86::VPMULLDZrrkz, X86::VPMULLDZrmkz, 0 },
+ { X86::VPMULLQZrrkz, X86::VPMULLQZrmkz, 0 },
+ { X86::VPMULLWZrrkz, X86::VPMULLWZrmkz, 0 },
+ { X86::VPMULDQZrrkz, X86::VPMULDQZrmkz, 0 },
+ { X86::VPMULUDQZrrkz, X86::VPMULUDQZrmkz, 0 },
+ { X86::VPORDZrrkz, X86::VPORDZrmkz, 0 },
+ { X86::VPORQZrrkz, X86::VPORQZrmkz, 0 },
+ { X86::VPSHUFBZrrkz, X86::VPSHUFBZrmkz, 0 },
+ { X86::VPSLLDZrrkz, X86::VPSLLDZrmkz, 0 },
+ { X86::VPSLLQZrrkz, X86::VPSLLQZrmkz, 0 },
+ { X86::VPSLLVDZrrkz, X86::VPSLLVDZrmkz, 0 },
+ { X86::VPSLLVQZrrkz, X86::VPSLLVQZrmkz, 0 },
+ { X86::VPSLLVWZrrkz, X86::VPSLLVWZrmkz, 0 },
+ { X86::VPSLLWZrrkz, X86::VPSLLWZrmkz, 0 },
+ { X86::VPSRADZrrkz, X86::VPSRADZrmkz, 0 },
+ { X86::VPSRAQZrrkz, X86::VPSRAQZrmkz, 0 },
+ { X86::VPSRAVDZrrkz, X86::VPSRAVDZrmkz, 0 },
+ { X86::VPSRAVQZrrkz, X86::VPSRAVQZrmkz, 0 },
+ { X86::VPSRAVWZrrkz, X86::VPSRAVWZrmkz, 0 },
+ { X86::VPSRAWZrrkz, X86::VPSRAWZrmkz, 0 },
+ { X86::VPSRLDZrrkz, X86::VPSRLDZrmkz, 0 },
+ { X86::VPSRLQZrrkz, X86::VPSRLQZrmkz, 0 },
+ { X86::VPSRLVDZrrkz, X86::VPSRLVDZrmkz, 0 },
+ { X86::VPSRLVQZrrkz, X86::VPSRLVQZrmkz, 0 },
+ { X86::VPSRLVWZrrkz, X86::VPSRLVWZrmkz, 0 },
+ { X86::VPSRLWZrrkz, X86::VPSRLWZrmkz, 0 },
+ { X86::VPSUBBZrrkz, X86::VPSUBBZrmkz, 0 },
+ { X86::VPSUBDZrrkz, X86::VPSUBDZrmkz, 0 },
+ { X86::VPSUBQZrrkz, X86::VPSUBQZrmkz, 0 },
+ { X86::VPSUBSBZrrkz, X86::VPSUBSBZrmkz, 0 },
+ { X86::VPSUBSWZrrkz, X86::VPSUBSWZrmkz, 0 },
+ { X86::VPSUBUSBZrrkz, X86::VPSUBUSBZrmkz, 0 },
+ { X86::VPSUBUSWZrrkz, X86::VPSUBUSWZrmkz, 0 },
+ { X86::VPSUBWZrrkz, X86::VPSUBWZrmkz, 0 },
+ { X86::VPUNPCKHBWZrrkz, X86::VPUNPCKHBWZrmkz, 0 },
+ { X86::VPUNPCKHDQZrrkz, X86::VPUNPCKHDQZrmkz, 0 },
+ { X86::VPUNPCKHQDQZrrkz, X86::VPUNPCKHQDQZrmkz, 0 },
+ { X86::VPUNPCKHWDZrrkz, X86::VPUNPCKHWDZrmkz, 0 },
+ { X86::VPUNPCKLBWZrrkz, X86::VPUNPCKLBWZrmkz, 0 },
+ { X86::VPUNPCKLDQZrrkz, X86::VPUNPCKLDQZrmkz, 0 },
+ { X86::VPUNPCKLQDQZrrkz, X86::VPUNPCKLQDQZrmkz, 0 },
+ { X86::VPUNPCKLWDZrrkz, X86::VPUNPCKLWDZrmkz, 0 },
+ { X86::VPXORDZrrkz, X86::VPXORDZrmkz, 0 },
+ { X86::VPXORQZrrkz, X86::VPXORQZrmkz, 0 },
+ { X86::VSHUFPDZrrikz, X86::VSHUFPDZrmikz, 0 },
+ { X86::VSHUFPSZrrikz, X86::VSHUFPSZrmikz, 0 },
+ { X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 },
+ { X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 },
+ { X86::VSUBSDZrr_Intkz, X86::VSUBSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VSUBSSZrr_Intkz, X86::VSUBSSZrm_Intkz, TB_NO_REVERSE },
+ { X86::VUNPCKHPDZrrkz, X86::VUNPCKHPDZrmkz, 0 },
+ { X86::VUNPCKHPSZrrkz, X86::VUNPCKHPSZrmkz, 0 },
+ { X86::VUNPCKLPDZrrkz, X86::VUNPCKLPDZrmkz, 0 },
+ { X86::VUNPCKLPSZrrkz, X86::VUNPCKLPSZrmkz, 0 },
+ { X86::VXORPDZrrkz, X86::VXORPDZrmkz, 0 },
+ { X86::VXORPSZrrkz, X86::VXORPSZrmkz, 0 },
+
+ // AVX-512{F,VL} masked arithmetic instructions 256-bit
+ { X86::VADDPDZ256rrkz, X86::VADDPDZ256rmkz, 0 },
+ { X86::VADDPSZ256rrkz, X86::VADDPSZ256rmkz, 0 },
+ { X86::VALIGNDZ256rrikz, X86::VALIGNDZ256rmikz, 0 },
+ { X86::VALIGNQZ256rrikz, X86::VALIGNQZ256rmikz, 0 },
+ { X86::VANDNPDZ256rrkz, X86::VANDNPDZ256rmkz, 0 },
+ { X86::VANDNPSZ256rrkz, X86::VANDNPSZ256rmkz, 0 },
+ { X86::VANDPDZ256rrkz, X86::VANDPDZ256rmkz, 0 },
+ { X86::VANDPSZ256rrkz, X86::VANDPSZ256rmkz, 0 },
+ { X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0 },
+ { X86::VDIVPSZ256rrkz, X86::VDIVPSZ256rmkz, 0 },
+ { X86::VINSERTF32x4Z256rrkz, X86::VINSERTF32x4Z256rmkz, 0 },
+ { X86::VINSERTF64x2Z256rrkz, X86::VINSERTF64x2Z256rmkz, 0 },
+ { X86::VINSERTI32x4Z256rrkz, X86::VINSERTI32x4Z256rmkz, 0 },
+ { X86::VINSERTI64x2Z256rrkz, X86::VINSERTI64x2Z256rmkz, 0 },
+ { X86::VMAXCPDZ256rrkz, X86::VMAXCPDZ256rmkz, 0 },
+ { X86::VMAXCPSZ256rrkz, X86::VMAXCPSZ256rmkz, 0 },
+ { X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmkz, 0 },
+ { X86::VMAXPSZ256rrkz, X86::VMAXPSZ256rmkz, 0 },
+ { X86::VMINCPDZ256rrkz, X86::VMINCPDZ256rmkz, 0 },
+ { X86::VMINCPSZ256rrkz, X86::VMINCPSZ256rmkz, 0 },
+ { X86::VMINPDZ256rrkz, X86::VMINPDZ256rmkz, 0 },
+ { X86::VMINPSZ256rrkz, X86::VMINPSZ256rmkz, 0 },
+ { X86::VMULPDZ256rrkz, X86::VMULPDZ256rmkz, 0 },
+ { X86::VMULPSZ256rrkz, X86::VMULPSZ256rmkz, 0 },
+ { X86::VORPDZ256rrkz, X86::VORPDZ256rmkz, 0 },
+ { X86::VORPSZ256rrkz, X86::VORPSZ256rmkz, 0 },
+ { X86::VPACKSSDWZ256rrkz, X86::VPACKSSDWZ256rmkz, 0 },
+ { X86::VPACKSSWBZ256rrkz, X86::VPACKSSWBZ256rmkz, 0 },
+ { X86::VPACKUSDWZ256rrkz, X86::VPACKUSDWZ256rmkz, 0 },
+ { X86::VPACKUSWBZ256rrkz, X86::VPACKUSWBZ256rmkz, 0 },
+ { X86::VPADDBZ256rrkz, X86::VPADDBZ256rmkz, 0 },
+ { X86::VPADDDZ256rrkz, X86::VPADDDZ256rmkz, 0 },
+ { X86::VPADDQZ256rrkz, X86::VPADDQZ256rmkz, 0 },
+ { X86::VPADDSBZ256rrkz, X86::VPADDSBZ256rmkz, 0 },
+ { X86::VPADDSWZ256rrkz, X86::VPADDSWZ256rmkz, 0 },
+ { X86::VPADDUSBZ256rrkz, X86::VPADDUSBZ256rmkz, 0 },
+ { X86::VPADDUSWZ256rrkz, X86::VPADDUSWZ256rmkz, 0 },
+ { X86::VPADDWZ256rrkz, X86::VPADDWZ256rmkz, 0 },
+ { X86::VPALIGNRZ256rrikz, X86::VPALIGNRZ256rmikz, 0 },
+ { X86::VPANDDZ256rrkz, X86::VPANDDZ256rmkz, 0 },
+ { X86::VPANDNDZ256rrkz, X86::VPANDNDZ256rmkz, 0 },
+ { X86::VPANDNQZ256rrkz, X86::VPANDNQZ256rmkz, 0 },
+ { X86::VPANDQZ256rrkz, X86::VPANDQZ256rmkz, 0 },
+ { X86::VPAVGBZ256rrkz, X86::VPAVGBZ256rmkz, 0 },
+ { X86::VPAVGWZ256rrkz, X86::VPAVGWZ256rmkz, 0 },
+ { X86::VPERMBZ256rrkz, X86::VPERMBZ256rmkz, 0 },
+ { X86::VPERMDZ256rrkz, X86::VPERMDZ256rmkz, 0 },
+ { X86::VPERMILPDZ256rrkz, X86::VPERMILPDZ256rmkz, 0 },
+ { X86::VPERMILPSZ256rrkz, X86::VPERMILPSZ256rmkz, 0 },
+ { X86::VPERMPDZ256rrkz, X86::VPERMPDZ256rmkz, 0 },
+ { X86::VPERMPSZ256rrkz, X86::VPERMPSZ256rmkz, 0 },
+ { X86::VPERMQZ256rrkz, X86::VPERMQZ256rmkz, 0 },
+ { X86::VPERMWZ256rrkz, X86::VPERMWZ256rmkz, 0 },
+ { X86::VPMADDUBSWZ256rrkz, X86::VPMADDUBSWZ256rmkz, 0 },
+ { X86::VPMADDWDZ256rrkz, X86::VPMADDWDZ256rmkz, 0 },
+ { X86::VPMAXSBZ256rrkz, X86::VPMAXSBZ256rmkz, 0 },
+ { X86::VPMAXSDZ256rrkz, X86::VPMAXSDZ256rmkz, 0 },
+ { X86::VPMAXSQZ256rrkz, X86::VPMAXSQZ256rmkz, 0 },
+ { X86::VPMAXSWZ256rrkz, X86::VPMAXSWZ256rmkz, 0 },
+ { X86::VPMAXUBZ256rrkz, X86::VPMAXUBZ256rmkz, 0 },
+ { X86::VPMAXUDZ256rrkz, X86::VPMAXUDZ256rmkz, 0 },
+ { X86::VPMAXUQZ256rrkz, X86::VPMAXUQZ256rmkz, 0 },
+ { X86::VPMAXUWZ256rrkz, X86::VPMAXUWZ256rmkz, 0 },
+ { X86::VPMINSBZ256rrkz, X86::VPMINSBZ256rmkz, 0 },
+ { X86::VPMINSDZ256rrkz, X86::VPMINSDZ256rmkz, 0 },
+ { X86::VPMINSQZ256rrkz, X86::VPMINSQZ256rmkz, 0 },
+ { X86::VPMINSWZ256rrkz, X86::VPMINSWZ256rmkz, 0 },
+ { X86::VPMINUBZ256rrkz, X86::VPMINUBZ256rmkz, 0 },
+ { X86::VPMINUDZ256rrkz, X86::VPMINUDZ256rmkz, 0 },
+ { X86::VPMINUQZ256rrkz, X86::VPMINUQZ256rmkz, 0 },
+ { X86::VPMINUWZ256rrkz, X86::VPMINUWZ256rmkz, 0 },
+ { X86::VPMULDQZ256rrkz, X86::VPMULDQZ256rmkz, 0 },
+ { X86::VPMULLDZ256rrkz, X86::VPMULLDZ256rmkz, 0 },
+ { X86::VPMULLQZ256rrkz, X86::VPMULLQZ256rmkz, 0 },
+ { X86::VPMULLWZ256rrkz, X86::VPMULLWZ256rmkz, 0 },
+ { X86::VPMULUDQZ256rrkz, X86::VPMULUDQZ256rmkz, 0 },
+ { X86::VPORDZ256rrkz, X86::VPORDZ256rmkz, 0 },
+ { X86::VPORQZ256rrkz, X86::VPORQZ256rmkz, 0 },
+ { X86::VPSHUFBZ256rrkz, X86::VPSHUFBZ256rmkz, 0 },
+ { X86::VPSLLDZ256rrkz, X86::VPSLLDZ256rmkz, 0 },
+ { X86::VPSLLQZ256rrkz, X86::VPSLLQZ256rmkz, 0 },
+ { X86::VPSLLVDZ256rrkz, X86::VPSLLVDZ256rmkz, 0 },
+ { X86::VPSLLVQZ256rrkz, X86::VPSLLVQZ256rmkz, 0 },
+ { X86::VPSLLVWZ256rrkz, X86::VPSLLVWZ256rmkz, 0 },
+ { X86::VPSLLWZ256rrkz, X86::VPSLLWZ256rmkz, 0 },
+ { X86::VPSRADZ256rrkz, X86::VPSRADZ256rmkz, 0 },
+ { X86::VPSRAQZ256rrkz, X86::VPSRAQZ256rmkz, 0 },
+ { X86::VPSRAVDZ256rrkz, X86::VPSRAVDZ256rmkz, 0 },
+ { X86::VPSRAVQZ256rrkz, X86::VPSRAVQZ256rmkz, 0 },
+ { X86::VPSRAVWZ256rrkz, X86::VPSRAVWZ256rmkz, 0 },
+ { X86::VPSRAWZ256rrkz, X86::VPSRAWZ256rmkz, 0 },
+ { X86::VPSRLDZ256rrkz, X86::VPSRLDZ256rmkz, 0 },
+ { X86::VPSRLQZ256rrkz, X86::VPSRLQZ256rmkz, 0 },
+ { X86::VPSRLVDZ256rrkz, X86::VPSRLVDZ256rmkz, 0 },
+ { X86::VPSRLVQZ256rrkz, X86::VPSRLVQZ256rmkz, 0 },
+ { X86::VPSRLVWZ256rrkz, X86::VPSRLVWZ256rmkz, 0 },
+ { X86::VPSRLWZ256rrkz, X86::VPSRLWZ256rmkz, 0 },
+ { X86::VPSUBBZ256rrkz, X86::VPSUBBZ256rmkz, 0 },
+ { X86::VPSUBDZ256rrkz, X86::VPSUBDZ256rmkz, 0 },
+ { X86::VPSUBQZ256rrkz, X86::VPSUBQZ256rmkz, 0 },
+ { X86::VPSUBSBZ256rrkz, X86::VPSUBSBZ256rmkz, 0 },
+ { X86::VPSUBSWZ256rrkz, X86::VPSUBSWZ256rmkz, 0 },
+ { X86::VPSUBUSBZ256rrkz, X86::VPSUBUSBZ256rmkz, 0 },
+ { X86::VPSUBUSWZ256rrkz, X86::VPSUBUSWZ256rmkz, 0 },
+ { X86::VPSUBWZ256rrkz, X86::VPSUBWZ256rmkz, 0 },
+ { X86::VPUNPCKHBWZ256rrkz, X86::VPUNPCKHBWZ256rmkz, 0 },
+ { X86::VPUNPCKHDQZ256rrkz, X86::VPUNPCKHDQZ256rmkz, 0 },
+ { X86::VPUNPCKHQDQZ256rrkz, X86::VPUNPCKHQDQZ256rmkz, 0 },
+ { X86::VPUNPCKHWDZ256rrkz, X86::VPUNPCKHWDZ256rmkz, 0 },
+ { X86::VPUNPCKLBWZ256rrkz, X86::VPUNPCKLBWZ256rmkz, 0 },
+ { X86::VPUNPCKLDQZ256rrkz, X86::VPUNPCKLDQZ256rmkz, 0 },
+ { X86::VPUNPCKLQDQZ256rrkz, X86::VPUNPCKLQDQZ256rmkz, 0 },
+ { X86::VPUNPCKLWDZ256rrkz, X86::VPUNPCKLWDZ256rmkz, 0 },
+ { X86::VPXORDZ256rrkz, X86::VPXORDZ256rmkz, 0 },
+ { X86::VPXORQZ256rrkz, X86::VPXORQZ256rmkz, 0 },
+ { X86::VSHUFPDZ256rrikz, X86::VSHUFPDZ256rmikz, 0 },
+ { X86::VSHUFPSZ256rrikz, X86::VSHUFPSZ256rmikz, 0 },
+ { X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0 },
+ { X86::VSUBPSZ256rrkz, X86::VSUBPSZ256rmkz, 0 },
+ { X86::VUNPCKHPDZ256rrkz, X86::VUNPCKHPDZ256rmkz, 0 },
+ { X86::VUNPCKHPSZ256rrkz, X86::VUNPCKHPSZ256rmkz, 0 },
+ { X86::VUNPCKLPDZ256rrkz, X86::VUNPCKLPDZ256rmkz, 0 },
+ { X86::VUNPCKLPSZ256rrkz, X86::VUNPCKLPSZ256rmkz, 0 },
+ { X86::VXORPDZ256rrkz, X86::VXORPDZ256rmkz, 0 },
+ { X86::VXORPSZ256rrkz, X86::VXORPSZ256rmkz, 0 },
+
+ // AVX-512{F,VL} masked arithmetic instructions 128-bit
+ { X86::VADDPDZ128rrkz, X86::VADDPDZ128rmkz, 0 },
+ { X86::VADDPSZ128rrkz, X86::VADDPSZ128rmkz, 0 },
+ { X86::VALIGNDZ128rrikz, X86::VALIGNDZ128rmikz, 0 },
+ { X86::VALIGNQZ128rrikz, X86::VALIGNQZ128rmikz, 0 },
+ { X86::VANDNPDZ128rrkz, X86::VANDNPDZ128rmkz, 0 },
+ { X86::VANDNPSZ128rrkz, X86::VANDNPSZ128rmkz, 0 },
+ { X86::VANDPDZ128rrkz, X86::VANDPDZ128rmkz, 0 },
+ { X86::VANDPSZ128rrkz, X86::VANDPSZ128rmkz, 0 },
+ { X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmkz, 0 },
+ { X86::VDIVPSZ128rrkz, X86::VDIVPSZ128rmkz, 0 },
+ { X86::VMAXCPDZ128rrkz, X86::VMAXCPDZ128rmkz, 0 },
+ { X86::VMAXCPSZ128rrkz, X86::VMAXCPSZ128rmkz, 0 },
+ { X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0 },
+ { X86::VMAXPSZ128rrkz, X86::VMAXPSZ128rmkz, 0 },
+ { X86::VMINCPDZ128rrkz, X86::VMINCPDZ128rmkz, 0 },
+ { X86::VMINCPSZ128rrkz, X86::VMINCPSZ128rmkz, 0 },
+ { X86::VMINPDZ128rrkz, X86::VMINPDZ128rmkz, 0 },
+ { X86::VMINPSZ128rrkz, X86::VMINPSZ128rmkz, 0 },
+ { X86::VMULPDZ128rrkz, X86::VMULPDZ128rmkz, 0 },
+ { X86::VMULPSZ128rrkz, X86::VMULPSZ128rmkz, 0 },
+ { X86::VORPDZ128rrkz, X86::VORPDZ128rmkz, 0 },
+ { X86::VORPSZ128rrkz, X86::VORPSZ128rmkz, 0 },
+ { X86::VPACKSSDWZ128rrkz, X86::VPACKSSDWZ128rmkz, 0 },
+ { X86::VPACKSSWBZ128rrkz, X86::VPACKSSWBZ128rmkz, 0 },
+ { X86::VPACKUSDWZ128rrkz, X86::VPACKUSDWZ128rmkz, 0 },
+ { X86::VPACKUSWBZ128rrkz, X86::VPACKUSWBZ128rmkz, 0 },
+ { X86::VPADDBZ128rrkz, X86::VPADDBZ128rmkz, 0 },
+ { X86::VPADDDZ128rrkz, X86::VPADDDZ128rmkz, 0 },
+ { X86::VPADDQZ128rrkz, X86::VPADDQZ128rmkz, 0 },
+ { X86::VPADDSBZ128rrkz, X86::VPADDSBZ128rmkz, 0 },
+ { X86::VPADDSWZ128rrkz, X86::VPADDSWZ128rmkz, 0 },
+ { X86::VPADDUSBZ128rrkz, X86::VPADDUSBZ128rmkz, 0 },
+ { X86::VPADDUSWZ128rrkz, X86::VPADDUSWZ128rmkz, 0 },
+ { X86::VPADDWZ128rrkz, X86::VPADDWZ128rmkz, 0 },
+ { X86::VPALIGNRZ128rrikz, X86::VPALIGNRZ128rmikz, 0 },
+ { X86::VPANDDZ128rrkz, X86::VPANDDZ128rmkz, 0 },
+ { X86::VPANDNDZ128rrkz, X86::VPANDNDZ128rmkz, 0 },
+ { X86::VPANDNQZ128rrkz, X86::VPANDNQZ128rmkz, 0 },
+ { X86::VPANDQZ128rrkz, X86::VPANDQZ128rmkz, 0 },
+ { X86::VPAVGBZ128rrkz, X86::VPAVGBZ128rmkz, 0 },
+ { X86::VPAVGWZ128rrkz, X86::VPAVGWZ128rmkz, 0 },
+ { X86::VPERMBZ128rrkz, X86::VPERMBZ128rmkz, 0 },
+ { X86::VPERMILPDZ128rrkz, X86::VPERMILPDZ128rmkz, 0 },
+ { X86::VPERMILPSZ128rrkz, X86::VPERMILPSZ128rmkz, 0 },
+ { X86::VPERMWZ128rrkz, X86::VPERMWZ128rmkz, 0 },
+ { X86::VPMADDUBSWZ128rrkz, X86::VPMADDUBSWZ128rmkz, 0 },
+ { X86::VPMADDWDZ128rrkz, X86::VPMADDWDZ128rmkz, 0 },
+ { X86::VPMAXSBZ128rrkz, X86::VPMAXSBZ128rmkz, 0 },
+ { X86::VPMAXSDZ128rrkz, X86::VPMAXSDZ128rmkz, 0 },
+ { X86::VPMAXSQZ128rrkz, X86::VPMAXSQZ128rmkz, 0 },
+ { X86::VPMAXSWZ128rrkz, X86::VPMAXSWZ128rmkz, 0 },
+ { X86::VPMAXUBZ128rrkz, X86::VPMAXUBZ128rmkz, 0 },
+ { X86::VPMAXUDZ128rrkz, X86::VPMAXUDZ128rmkz, 0 },
+ { X86::VPMAXUQZ128rrkz, X86::VPMAXUQZ128rmkz, 0 },
+ { X86::VPMAXUWZ128rrkz, X86::VPMAXUWZ128rmkz, 0 },
+ { X86::VPMINSBZ128rrkz, X86::VPMINSBZ128rmkz, 0 },
+ { X86::VPMINSDZ128rrkz, X86::VPMINSDZ128rmkz, 0 },
+ { X86::VPMINSQZ128rrkz, X86::VPMINSQZ128rmkz, 0 },
+ { X86::VPMINSWZ128rrkz, X86::VPMINSWZ128rmkz, 0 },
+ { X86::VPMINUBZ128rrkz, X86::VPMINUBZ128rmkz, 0 },
+ { X86::VPMINUDZ128rrkz, X86::VPMINUDZ128rmkz, 0 },
+ { X86::VPMINUQZ128rrkz, X86::VPMINUQZ128rmkz, 0 },
+ { X86::VPMINUWZ128rrkz, X86::VPMINUWZ128rmkz, 0 },
+ { X86::VPMULDQZ128rrkz, X86::VPMULDQZ128rmkz, 0 },
+ { X86::VPMULLDZ128rrkz, X86::VPMULLDZ128rmkz, 0 },
+ { X86::VPMULLQZ128rrkz, X86::VPMULLQZ128rmkz, 0 },
+ { X86::VPMULLWZ128rrkz, X86::VPMULLWZ128rmkz, 0 },
+ { X86::VPMULUDQZ128rrkz, X86::VPMULUDQZ128rmkz, 0 },
+ { X86::VPORDZ128rrkz, X86::VPORDZ128rmkz, 0 },
+ { X86::VPORQZ128rrkz, X86::VPORQZ128rmkz, 0 },
+ { X86::VPSHUFBZ128rrkz, X86::VPSHUFBZ128rmkz, 0 },
+ { X86::VPSLLDZ128rrkz, X86::VPSLLDZ128rmkz, 0 },
+ { X86::VPSLLQZ128rrkz, X86::VPSLLQZ128rmkz, 0 },
+ { X86::VPSLLVDZ128rrkz, X86::VPSLLVDZ128rmkz, 0 },
+ { X86::VPSLLVQZ128rrkz, X86::VPSLLVQZ128rmkz, 0 },
+ { X86::VPSLLVWZ128rrkz, X86::VPSLLVWZ128rmkz, 0 },
+ { X86::VPSLLWZ128rrkz, X86::VPSLLWZ128rmkz, 0 },
+ { X86::VPSRADZ128rrkz, X86::VPSRADZ128rmkz, 0 },
+ { X86::VPSRAQZ128rrkz, X86::VPSRAQZ128rmkz, 0 },
+ { X86::VPSRAVDZ128rrkz, X86::VPSRAVDZ128rmkz, 0 },
+ { X86::VPSRAVQZ128rrkz, X86::VPSRAVQZ128rmkz, 0 },
+ { X86::VPSRAVWZ128rrkz, X86::VPSRAVWZ128rmkz, 0 },
+ { X86::VPSRAWZ128rrkz, X86::VPSRAWZ128rmkz, 0 },
+ { X86::VPSRLDZ128rrkz, X86::VPSRLDZ128rmkz, 0 },
+ { X86::VPSRLQZ128rrkz, X86::VPSRLQZ128rmkz, 0 },
+ { X86::VPSRLVDZ128rrkz, X86::VPSRLVDZ128rmkz, 0 },
+ { X86::VPSRLVQZ128rrkz, X86::VPSRLVQZ128rmkz, 0 },
+ { X86::VPSRLVWZ128rrkz, X86::VPSRLVWZ128rmkz, 0 },
+ { X86::VPSRLWZ128rrkz, X86::VPSRLWZ128rmkz, 0 },
+ { X86::VPSUBBZ128rrkz, X86::VPSUBBZ128rmkz, 0 },
+ { X86::VPSUBDZ128rrkz, X86::VPSUBDZ128rmkz, 0 },
+ { X86::VPSUBQZ128rrkz, X86::VPSUBQZ128rmkz, 0 },
+ { X86::VPSUBSBZ128rrkz, X86::VPSUBSBZ128rmkz, 0 },
+ { X86::VPSUBSWZ128rrkz, X86::VPSUBSWZ128rmkz, 0 },
+ { X86::VPSUBUSBZ128rrkz, X86::VPSUBUSBZ128rmkz, 0 },
+ { X86::VPSUBUSWZ128rrkz, X86::VPSUBUSWZ128rmkz, 0 },
+ { X86::VPSUBWZ128rrkz, X86::VPSUBWZ128rmkz, 0 },
+ { X86::VPUNPCKHBWZ128rrkz, X86::VPUNPCKHBWZ128rmkz, 0 },
+ { X86::VPUNPCKHDQZ128rrkz, X86::VPUNPCKHDQZ128rmkz, 0 },
+ { X86::VPUNPCKHQDQZ128rrkz, X86::VPUNPCKHQDQZ128rmkz, 0 },
+ { X86::VPUNPCKHWDZ128rrkz, X86::VPUNPCKHWDZ128rmkz, 0 },
+ { X86::VPUNPCKLBWZ128rrkz, X86::VPUNPCKLBWZ128rmkz, 0 },
+ { X86::VPUNPCKLDQZ128rrkz, X86::VPUNPCKLDQZ128rmkz, 0 },
+ { X86::VPUNPCKLQDQZ128rrkz, X86::VPUNPCKLQDQZ128rmkz, 0 },
+ { X86::VPUNPCKLWDZ128rrkz, X86::VPUNPCKLWDZ128rmkz, 0 },
+ { X86::VPXORDZ128rrkz, X86::VPXORDZ128rmkz, 0 },
+ { X86::VPXORQZ128rrkz, X86::VPXORQZ128rmkz, 0 },
+ { X86::VSHUFPDZ128rrikz, X86::VSHUFPDZ128rmikz, 0 },
+ { X86::VSHUFPSZ128rrikz, X86::VSHUFPSZ128rmikz, 0 },
+ { X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0 },
+ { X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 },
+ { X86::VUNPCKHPDZ128rrkz, X86::VUNPCKHPDZ128rmkz, 0 },
+ { X86::VUNPCKHPSZ128rrkz, X86::VUNPCKHPSZ128rmkz, 0 },
+ { X86::VUNPCKLPDZ128rrkz, X86::VUNPCKLPDZ128rmkz, 0 },
+ { X86::VUNPCKLPSZ128rrkz, X86::VUNPCKLPSZ128rmkz, 0 },
+ { X86::VXORPDZ128rrkz, X86::VXORPDZ128rmkz, 0 },
+ { X86::VXORPSZ128rrkz, X86::VXORPSZ128rmkz, 0 },
+
+ // AVX-512 masked foldable instructions
+ { X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE },
+ { X86::VPABSBZrrk, X86::VPABSBZrmk, 0 },
+ { X86::VPABSDZrrk, X86::VPABSDZrmk, 0 },
+ { X86::VPABSQZrrk, X86::VPABSQZrmk, 0 },
+ { X86::VPABSWZrrk, X86::VPABSWZrmk, 0 },
+ { X86::VPERMILPDZrik, X86::VPERMILPDZmik, 0 },
+ { X86::VPERMILPSZrik, X86::VPERMILPSZmik, 0 },
+ { X86::VPERMPDZrik, X86::VPERMPDZmik, 0 },
+ { X86::VPERMQZrik, X86::VPERMQZmik, 0 },
+ { X86::VPMOVSXBDZrrk, X86::VPMOVSXBDZrmk, 0 },
+ { X86::VPMOVSXBQZrrk, X86::VPMOVSXBQZrmk, TB_NO_REVERSE },
+ { X86::VPMOVSXBWZrrk, X86::VPMOVSXBWZrmk, 0 },
+ { X86::VPMOVSXDQZrrk, X86::VPMOVSXDQZrmk, 0 },
+ { X86::VPMOVSXWDZrrk, X86::VPMOVSXWDZrmk, 0 },
+ { X86::VPMOVSXWQZrrk, X86::VPMOVSXWQZrmk, 0 },
+ { X86::VPMOVZXBDZrrk, X86::VPMOVZXBDZrmk, 0 },
+ { X86::VPMOVZXBQZrrk, X86::VPMOVZXBQZrmk, TB_NO_REVERSE },
+ { X86::VPMOVZXBWZrrk, X86::VPMOVZXBWZrmk, 0 },
+ { X86::VPMOVZXDQZrrk, X86::VPMOVZXDQZrmk, 0 },
+ { X86::VPMOVZXWDZrrk, X86::VPMOVZXWDZrmk, 0 },
+ { X86::VPMOVZXWQZrrk, X86::VPMOVZXWQZrmk, 0 },
+ { X86::VPOPCNTDZrrk, X86::VPOPCNTDZrmk, 0 },
+ { X86::VPOPCNTQZrrk, X86::VPOPCNTQZrmk, 0 },
+ { X86::VPSHUFDZrik, X86::VPSHUFDZmik, 0 },
+ { X86::VPSHUFHWZrik, X86::VPSHUFHWZmik, 0 },
+ { X86::VPSHUFLWZrik, X86::VPSHUFLWZmik, 0 },
+ { X86::VPSLLDZrik, X86::VPSLLDZmik, 0 },
+ { X86::VPSLLQZrik, X86::VPSLLQZmik, 0 },
+ { X86::VPSLLWZrik, X86::VPSLLWZmik, 0 },
+ { X86::VPSRADZrik, X86::VPSRADZmik, 0 },
+ { X86::VPSRAQZrik, X86::VPSRAQZmik, 0 },
+ { X86::VPSRAWZrik, X86::VPSRAWZmik, 0 },
+ { X86::VPSRLDZrik, X86::VPSRLDZmik, 0 },
+ { X86::VPSRLQZrik, X86::VPSRLQZmik, 0 },
+ { X86::VPSRLWZrik, X86::VPSRLWZmik, 0 },
+
+ // AVX-512VL 256-bit masked foldable instructions
+ { X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE },
+ { X86::VPABSBZ256rrk, X86::VPABSBZ256rmk, 0 },
+ { X86::VPABSDZ256rrk, X86::VPABSDZ256rmk, 0 },
+ { X86::VPABSQZ256rrk, X86::VPABSQZ256rmk, 0 },
+ { X86::VPABSWZ256rrk, X86::VPABSWZ256rmk, 0 },
+ { X86::VPERMILPDZ256rik, X86::VPERMILPDZ256mik, 0 },
+ { X86::VPERMILPSZ256rik, X86::VPERMILPSZ256mik, 0 },
+ { X86::VPERMPDZ256rik, X86::VPERMPDZ256mik, 0 },
+ { X86::VPERMQZ256rik, X86::VPERMQZ256mik, 0 },
+ { X86::VPMOVSXBDZ256rrk, X86::VPMOVSXBDZ256rmk, TB_NO_REVERSE },
+ { X86::VPMOVSXBQZ256rrk, X86::VPMOVSXBQZ256rmk, TB_NO_REVERSE },
+ { X86::VPMOVSXBWZ256rrk, X86::VPMOVSXBWZ256rmk, 0 },
+ { X86::VPMOVSXDQZ256rrk, X86::VPMOVSXDQZ256rmk, 0 },
+ { X86::VPMOVSXWDZ256rrk, X86::VPMOVSXWDZ256rmk, 0 },
+ { X86::VPMOVSXWQZ256rrk, X86::VPMOVSXWQZ256rmk, TB_NO_REVERSE },
+ { X86::VPMOVZXBDZ256rrk, X86::VPMOVZXBDZ256rmk, TB_NO_REVERSE },
+ { X86::VPMOVZXBQZ256rrk, X86::VPMOVZXBQZ256rmk, TB_NO_REVERSE },
+ { X86::VPMOVZXBWZ256rrk, X86::VPMOVZXBWZ256rmk, 0 },
+ { X86::VPMOVZXDQZ256rrk, X86::VPMOVZXDQZ256rmk, 0 },
+ { X86::VPMOVZXWDZ256rrk, X86::VPMOVZXWDZ256rmk, 0 },
+ { X86::VPMOVZXWQZ256rrk, X86::VPMOVZXWQZ256rmk, TB_NO_REVERSE },
+ { X86::VPSHUFDZ256rik, X86::VPSHUFDZ256mik, 0 },
+ { X86::VPSHUFHWZ256rik, X86::VPSHUFHWZ256mik, 0 },
+ { X86::VPSHUFLWZ256rik, X86::VPSHUFLWZ256mik, 0 },
+ { X86::VPSLLDZ256rik, X86::VPSLLDZ256mik, 0 },
+ { X86::VPSLLQZ256rik, X86::VPSLLQZ256mik, 0 },
+ { X86::VPSLLWZ256rik, X86::VPSLLWZ256mik, 0 },
+ { X86::VPSRADZ256rik, X86::VPSRADZ256mik, 0 },
+ { X86::VPSRAQZ256rik, X86::VPSRAQZ256mik, 0 },
+ { X86::VPSRAWZ256rik, X86::VPSRAWZ256mik, 0 },
+ { X86::VPSRLDZ256rik, X86::VPSRLDZ256mik, 0 },
+ { X86::VPSRLQZ256rik, X86::VPSRLQZ256mik, 0 },
+ { X86::VPSRLWZ256rik, X86::VPSRLWZ256mik, 0 },
+
+ // AVX-512VL 128-bit masked foldable instructions
+ { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE },
+ { X86::VPABSBZ128rrk, X86::VPABSBZ128rmk, 0 },
+ { X86::VPABSDZ128rrk, X86::VPABSDZ128rmk, 0 },
+ { X86::VPABSQZ128rrk, X86::VPABSQZ128rmk, 0 },
+ { X86::VPABSWZ128rrk, X86::VPABSWZ128rmk, 0 },
+ { X86::VPERMILPDZ128rik, X86::VPERMILPDZ128mik, 0 },
+ { X86::VPERMILPSZ128rik, X86::VPERMILPSZ128mik, 0 },
+ { X86::VPMOVSXBDZ128rrk, X86::VPMOVSXBDZ128rmk, TB_NO_REVERSE },
+ { X86::VPMOVSXBQZ128rrk, X86::VPMOVSXBQZ128rmk, TB_NO_REVERSE },
+ { X86::VPMOVSXBWZ128rrk, X86::VPMOVSXBWZ128rmk, TB_NO_REVERSE },
+ { X86::VPMOVSXDQZ128rrk, X86::VPMOVSXDQZ128rmk, TB_NO_REVERSE },
+ { X86::VPMOVSXWDZ128rrk, X86::VPMOVSXWDZ128rmk, TB_NO_REVERSE },
+ { X86::VPMOVSXWQZ128rrk, X86::VPMOVSXWQZ128rmk, TB_NO_REVERSE },
+ { X86::VPMOVZXBDZ128rrk, X86::VPMOVZXBDZ128rmk, TB_NO_REVERSE },
+ { X86::VPMOVZXBQZ128rrk, X86::VPMOVZXBQZ128rmk, TB_NO_REVERSE },
+ { X86::VPMOVZXBWZ128rrk, X86::VPMOVZXBWZ128rmk, TB_NO_REVERSE },
+ { X86::VPMOVZXDQZ128rrk, X86::VPMOVZXDQZ128rmk, TB_NO_REVERSE },
+ { X86::VPMOVZXWDZ128rrk, X86::VPMOVZXWDZ128rmk, TB_NO_REVERSE },
+ { X86::VPMOVZXWQZ128rrk, X86::VPMOVZXWQZ128rmk, TB_NO_REVERSE },
+ { X86::VPSHUFDZ128rik, X86::VPSHUFDZ128mik, 0 },
+ { X86::VPSHUFHWZ128rik, X86::VPSHUFHWZ128mik, 0 },
+ { X86::VPSHUFLWZ128rik, X86::VPSHUFLWZ128mik, 0 },
+ { X86::VPSLLDZ128rik, X86::VPSLLDZ128mik, 0 },
+ { X86::VPSLLQZ128rik, X86::VPSLLQZ128mik, 0 },
+ { X86::VPSLLWZ128rik, X86::VPSLLWZ128mik, 0 },
+ { X86::VPSRADZ128rik, X86::VPSRADZ128mik, 0 },
+ { X86::VPSRAQZ128rik, X86::VPSRAQZ128mik, 0 },
+ { X86::VPSRAWZ128rik, X86::VPSRAWZ128mik, 0 },
+ { X86::VPSRLDZ128rik, X86::VPSRLDZ128mik, 0 },
+ { X86::VPSRLQZ128rik, X86::VPSRLQZ128mik, 0 },
+ { X86::VPSRLWZ128rik, X86::VPSRLWZ128mik, 0 },
+ };
+
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) {
AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
Entry.RegOp, Entry.MemOp,
// Index 3, folded load
Entry.Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
}
+ auto I = X86InstrFMA3Info::rm_begin();
+ auto E = X86InstrFMA3Info::rm_end();
+ for (; I != E; ++I) {
+ if (!I.getGroup()->isKMasked()) {
+ // Intrinsic forms need to pass TB_NO_REVERSE.
+ if (I.getGroup()->isIntrinsic()) {
+ AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
+ I.getRegOpcode(), I.getMemOpcode(),
+ TB_ALIGN_NONE | TB_INDEX_3 | TB_FOLDED_LOAD | TB_NO_REVERSE);
+ } else {
+ AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
+ I.getRegOpcode(), I.getMemOpcode(),
+ TB_ALIGN_NONE | TB_INDEX_3 | TB_FOLDED_LOAD);
+ }
+ }
+ }
+
+ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
+ // AVX-512 foldable masked instructions
+ { X86::VADDPDZrrk, X86::VADDPDZrmk, 0 },
+ { X86::VADDPSZrrk, X86::VADDPSZrmk, 0 },
+ { X86::VADDSDZrr_Intk, X86::VADDSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VADDSSZrr_Intk, X86::VADDSSZrm_Intk, TB_NO_REVERSE },
+ { X86::VALIGNDZrrik, X86::VALIGNDZrmik, 0 },
+ { X86::VALIGNQZrrik, X86::VALIGNQZrmik, 0 },
+ { X86::VANDNPDZrrk, X86::VANDNPDZrmk, 0 },
+ { X86::VANDNPSZrrk, X86::VANDNPSZrmk, 0 },
+ { X86::VANDPDZrrk, X86::VANDPDZrmk, 0 },
+ { X86::VANDPSZrrk, X86::VANDPSZrmk, 0 },
+ { X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 },
+ { X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 },
+ { X86::VDIVSDZrr_Intk, X86::VDIVSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VDIVSSZrr_Intk, X86::VDIVSSZrm_Intk, TB_NO_REVERSE },
+ { X86::VINSERTF32x4Zrrk, X86::VINSERTF32x4Zrmk, 0 },
+ { X86::VINSERTF32x8Zrrk, X86::VINSERTF32x8Zrmk, 0 },
+ { X86::VINSERTF64x2Zrrk, X86::VINSERTF64x2Zrmk, 0 },
+ { X86::VINSERTF64x4Zrrk, X86::VINSERTF64x4Zrmk, 0 },
+ { X86::VINSERTI32x4Zrrk, X86::VINSERTI32x4Zrmk, 0 },
+ { X86::VINSERTI32x8Zrrk, X86::VINSERTI32x8Zrmk, 0 },
+ { X86::VINSERTI64x2Zrrk, X86::VINSERTI64x2Zrmk, 0 },
+ { X86::VINSERTI64x4Zrrk, X86::VINSERTI64x4Zrmk, 0 },
+ { X86::VMAXCPDZrrk, X86::VMAXCPDZrmk, 0 },
+ { X86::VMAXCPSZrrk, X86::VMAXCPSZrmk, 0 },
+ { X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 },
+ { X86::VMAXPSZrrk, X86::VMAXPSZrmk, 0 },
+ { X86::VMAXSDZrr_Intk, X86::VMAXSDZrm_Intk, 0 },
+ { X86::VMAXSSZrr_Intk, X86::VMAXSSZrm_Intk, 0 },
+ { X86::VMINCPDZrrk, X86::VMINCPDZrmk, 0 },
+ { X86::VMINCPSZrrk, X86::VMINCPSZrmk, 0 },
+ { X86::VMINPDZrrk, X86::VMINPDZrmk, 0 },
+ { X86::VMINPSZrrk, X86::VMINPSZrmk, 0 },
+ { X86::VMINSDZrr_Intk, X86::VMINSDZrm_Intk, 0 },
+ { X86::VMINSSZrr_Intk, X86::VMINSSZrm_Intk, 0 },
+ { X86::VMULPDZrrk, X86::VMULPDZrmk, 0 },
+ { X86::VMULPSZrrk, X86::VMULPSZrmk, 0 },
+ { X86::VMULSDZrr_Intk, X86::VMULSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VMULSSZrr_Intk, X86::VMULSSZrm_Intk, TB_NO_REVERSE },
+ { X86::VORPDZrrk, X86::VORPDZrmk, 0 },
+ { X86::VORPSZrrk, X86::VORPSZrmk, 0 },
+ { X86::VPACKSSDWZrrk, X86::VPACKSSDWZrmk, 0 },
+ { X86::VPACKSSWBZrrk, X86::VPACKSSWBZrmk, 0 },
+ { X86::VPACKUSDWZrrk, X86::VPACKUSDWZrmk, 0 },
+ { X86::VPACKUSWBZrrk, X86::VPACKUSWBZrmk, 0 },
+ { X86::VPADDBZrrk, X86::VPADDBZrmk, 0 },
+ { X86::VPADDDZrrk, X86::VPADDDZrmk, 0 },
+ { X86::VPADDQZrrk, X86::VPADDQZrmk, 0 },
+ { X86::VPADDSBZrrk, X86::VPADDSBZrmk, 0 },
+ { X86::VPADDSWZrrk, X86::VPADDSWZrmk, 0 },
+ { X86::VPADDUSBZrrk, X86::VPADDUSBZrmk, 0 },
+ { X86::VPADDUSWZrrk, X86::VPADDUSWZrmk, 0 },
+ { X86::VPADDWZrrk, X86::VPADDWZrmk, 0 },
+ { X86::VPALIGNRZrrik, X86::VPALIGNRZrmik, 0 },
+ { X86::VPANDDZrrk, X86::VPANDDZrmk, 0 },
+ { X86::VPANDNDZrrk, X86::VPANDNDZrmk, 0 },
+ { X86::VPANDNQZrrk, X86::VPANDNQZrmk, 0 },
+ { X86::VPANDQZrrk, X86::VPANDQZrmk, 0 },
+ { X86::VPAVGBZrrk, X86::VPAVGBZrmk, 0 },
+ { X86::VPAVGWZrrk, X86::VPAVGWZrmk, 0 },
+ { X86::VPERMBZrrk, X86::VPERMBZrmk, 0 },
+ { X86::VPERMDZrrk, X86::VPERMDZrmk, 0 },
+ { X86::VPERMI2Brrk, X86::VPERMI2Brmk, 0 },
+ { X86::VPERMI2Drrk, X86::VPERMI2Drmk, 0 },
+ { X86::VPERMI2PSrrk, X86::VPERMI2PSrmk, 0 },
+ { X86::VPERMI2PDrrk, X86::VPERMI2PDrmk, 0 },
+ { X86::VPERMI2Qrrk, X86::VPERMI2Qrmk, 0 },
+ { X86::VPERMI2Wrrk, X86::VPERMI2Wrmk, 0 },
+ { X86::VPERMILPDZrrk, X86::VPERMILPDZrmk, 0 },
+ { X86::VPERMILPSZrrk, X86::VPERMILPSZrmk, 0 },
+ { X86::VPERMPDZrrk, X86::VPERMPDZrmk, 0 },
+ { X86::VPERMPSZrrk, X86::VPERMPSZrmk, 0 },
+ { X86::VPERMQZrrk, X86::VPERMQZrmk, 0 },
+ { X86::VPERMT2Brrk, X86::VPERMT2Brmk, 0 },
+ { X86::VPERMT2Drrk, X86::VPERMT2Drmk, 0 },
+ { X86::VPERMT2PSrrk, X86::VPERMT2PSrmk, 0 },
+ { X86::VPERMT2PDrrk, X86::VPERMT2PDrmk, 0 },
+ { X86::VPERMT2Qrrk, X86::VPERMT2Qrmk, 0 },
+ { X86::VPERMT2Wrrk, X86::VPERMT2Wrmk, 0 },
+ { X86::VPERMWZrrk, X86::VPERMWZrmk, 0 },
+ { X86::VPMADDUBSWZrrk, X86::VPMADDUBSWZrmk, 0 },
+ { X86::VPMADDWDZrrk, X86::VPMADDWDZrmk, 0 },
+ { X86::VPMAXSBZrrk, X86::VPMAXSBZrmk, 0 },
+ { X86::VPMAXSDZrrk, X86::VPMAXSDZrmk, 0 },
+ { X86::VPMAXSQZrrk, X86::VPMAXSQZrmk, 0 },
+ { X86::VPMAXSWZrrk, X86::VPMAXSWZrmk, 0 },
+ { X86::VPMAXUBZrrk, X86::VPMAXUBZrmk, 0 },
+ { X86::VPMAXUDZrrk, X86::VPMAXUDZrmk, 0 },
+ { X86::VPMAXUQZrrk, X86::VPMAXUQZrmk, 0 },
+ { X86::VPMAXUWZrrk, X86::VPMAXUWZrmk, 0 },
+ { X86::VPMINSBZrrk, X86::VPMINSBZrmk, 0 },
+ { X86::VPMINSDZrrk, X86::VPMINSDZrmk, 0 },
+ { X86::VPMINSQZrrk, X86::VPMINSQZrmk, 0 },
+ { X86::VPMINSWZrrk, X86::VPMINSWZrmk, 0 },
+ { X86::VPMINUBZrrk, X86::VPMINUBZrmk, 0 },
+ { X86::VPMINUDZrrk, X86::VPMINUDZrmk, 0 },
+ { X86::VPMINUQZrrk, X86::VPMINUQZrmk, 0 },
+ { X86::VPMINUWZrrk, X86::VPMINUWZrmk, 0 },
+ { X86::VPMULDQZrrk, X86::VPMULDQZrmk, 0 },
+ { X86::VPMULLDZrrk, X86::VPMULLDZrmk, 0 },
+ { X86::VPMULLQZrrk, X86::VPMULLQZrmk, 0 },
+ { X86::VPMULLWZrrk, X86::VPMULLWZrmk, 0 },
+ { X86::VPMULUDQZrrk, X86::VPMULUDQZrmk, 0 },
+ { X86::VPORDZrrk, X86::VPORDZrmk, 0 },
+ { X86::VPORQZrrk, X86::VPORQZrmk, 0 },
+ { X86::VPSHUFBZrrk, X86::VPSHUFBZrmk, 0 },
+ { X86::VPSLLDZrrk, X86::VPSLLDZrmk, 0 },
+ { X86::VPSLLQZrrk, X86::VPSLLQZrmk, 0 },
+ { X86::VPSLLVDZrrk, X86::VPSLLVDZrmk, 0 },
+ { X86::VPSLLVQZrrk, X86::VPSLLVQZrmk, 0 },
+ { X86::VPSLLVWZrrk, X86::VPSLLVWZrmk, 0 },
+ { X86::VPSLLWZrrk, X86::VPSLLWZrmk, 0 },
+ { X86::VPSRADZrrk, X86::VPSRADZrmk, 0 },
+ { X86::VPSRAQZrrk, X86::VPSRAQZrmk, 0 },
+ { X86::VPSRAVDZrrk, X86::VPSRAVDZrmk, 0 },
+ { X86::VPSRAVQZrrk, X86::VPSRAVQZrmk, 0 },
+ { X86::VPSRAVWZrrk, X86::VPSRAVWZrmk, 0 },
+ { X86::VPSRAWZrrk, X86::VPSRAWZrmk, 0 },
+ { X86::VPSRLDZrrk, X86::VPSRLDZrmk, 0 },
+ { X86::VPSRLQZrrk, X86::VPSRLQZrmk, 0 },
+ { X86::VPSRLVDZrrk, X86::VPSRLVDZrmk, 0 },
+ { X86::VPSRLVQZrrk, X86::VPSRLVQZrmk, 0 },
+ { X86::VPSRLVWZrrk, X86::VPSRLVWZrmk, 0 },
+ { X86::VPSRLWZrrk, X86::VPSRLWZrmk, 0 },
+ { X86::VPSUBBZrrk, X86::VPSUBBZrmk, 0 },
+ { X86::VPSUBDZrrk, X86::VPSUBDZrmk, 0 },
+ { X86::VPSUBQZrrk, X86::VPSUBQZrmk, 0 },
+ { X86::VPSUBSBZrrk, X86::VPSUBSBZrmk, 0 },
+ { X86::VPSUBSWZrrk, X86::VPSUBSWZrmk, 0 },
+ { X86::VPSUBUSBZrrk, X86::VPSUBUSBZrmk, 0 },
+ { X86::VPSUBUSWZrrk, X86::VPSUBUSWZrmk, 0 },
+ { X86::VPTERNLOGDZrrik, X86::VPTERNLOGDZrmik, 0 },
+ { X86::VPTERNLOGQZrrik, X86::VPTERNLOGQZrmik, 0 },
+ { X86::VPUNPCKHBWZrrk, X86::VPUNPCKHBWZrmk, 0 },
+ { X86::VPUNPCKHDQZrrk, X86::VPUNPCKHDQZrmk, 0 },
+ { X86::VPUNPCKHQDQZrrk, X86::VPUNPCKHQDQZrmk, 0 },
+ { X86::VPUNPCKHWDZrrk, X86::VPUNPCKHWDZrmk, 0 },
+ { X86::VPUNPCKLBWZrrk, X86::VPUNPCKLBWZrmk, 0 },
+ { X86::VPUNPCKLDQZrrk, X86::VPUNPCKLDQZrmk, 0 },
+ { X86::VPUNPCKLQDQZrrk, X86::VPUNPCKLQDQZrmk, 0 },
+ { X86::VPUNPCKLWDZrrk, X86::VPUNPCKLWDZrmk, 0 },
+ { X86::VPXORDZrrk, X86::VPXORDZrmk, 0 },
+ { X86::VPXORQZrrk, X86::VPXORQZrmk, 0 },
+ { X86::VSHUFPDZrrik, X86::VSHUFPDZrmik, 0 },
+ { X86::VSHUFPSZrrik, X86::VSHUFPSZrmik, 0 },
+ { X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 },
+ { X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 },
+ { X86::VSUBSDZrr_Intk, X86::VSUBSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VSUBSSZrr_Intk, X86::VSUBSSZrm_Intk, TB_NO_REVERSE },
+ { X86::VUNPCKHPDZrrk, X86::VUNPCKHPDZrmk, 0 },
+ { X86::VUNPCKHPSZrrk, X86::VUNPCKHPSZrmk, 0 },
+ { X86::VUNPCKLPDZrrk, X86::VUNPCKLPDZrmk, 0 },
+ { X86::VUNPCKLPSZrrk, X86::VUNPCKLPSZrmk, 0 },
+ { X86::VXORPDZrrk, X86::VXORPDZrmk, 0 },
+ { X86::VXORPSZrrk, X86::VXORPSZrmk, 0 },
+
+ // AVX-512{F,VL} foldable masked instructions 256-bit
+ { X86::VADDPDZ256rrk, X86::VADDPDZ256rmk, 0 },
+ { X86::VADDPSZ256rrk, X86::VADDPSZ256rmk, 0 },
+ { X86::VALIGNDZ256rrik, X86::VALIGNDZ256rmik, 0 },
+ { X86::VALIGNQZ256rrik, X86::VALIGNQZ256rmik, 0 },
+ { X86::VANDNPDZ256rrk, X86::VANDNPDZ256rmk, 0 },
+ { X86::VANDNPSZ256rrk, X86::VANDNPSZ256rmk, 0 },
+ { X86::VANDPDZ256rrk, X86::VANDPDZ256rmk, 0 },
+ { X86::VANDPSZ256rrk, X86::VANDPSZ256rmk, 0 },
+ { X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0 },
+ { X86::VDIVPSZ256rrk, X86::VDIVPSZ256rmk, 0 },
+ { X86::VINSERTF32x4Z256rrk,X86::VINSERTF32x4Z256rmk, 0 },
+ { X86::VINSERTF64x2Z256rrk,X86::VINSERTF64x2Z256rmk, 0 },
+ { X86::VINSERTI32x4Z256rrk,X86::VINSERTI32x4Z256rmk, 0 },
+ { X86::VINSERTI64x2Z256rrk,X86::VINSERTI64x2Z256rmk, 0 },
+ { X86::VMAXCPDZ256rrk, X86::VMAXCPDZ256rmk, 0 },
+ { X86::VMAXCPSZ256rrk, X86::VMAXCPSZ256rmk, 0 },
+ { X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmk, 0 },
+ { X86::VMAXPSZ256rrk, X86::VMAXPSZ256rmk, 0 },
+ { X86::VMINCPDZ256rrk, X86::VMINCPDZ256rmk, 0 },
+ { X86::VMINCPSZ256rrk, X86::VMINCPSZ256rmk, 0 },
+ { X86::VMINPDZ256rrk, X86::VMINPDZ256rmk, 0 },
+ { X86::VMINPSZ256rrk, X86::VMINPSZ256rmk, 0 },
+ { X86::VMULPDZ256rrk, X86::VMULPDZ256rmk, 0 },
+ { X86::VMULPSZ256rrk, X86::VMULPSZ256rmk, 0 },
+ { X86::VORPDZ256rrk, X86::VORPDZ256rmk, 0 },
+ { X86::VORPSZ256rrk, X86::VORPSZ256rmk, 0 },
+ { X86::VPACKSSDWZ256rrk, X86::VPACKSSDWZ256rmk, 0 },
+ { X86::VPACKSSWBZ256rrk, X86::VPACKSSWBZ256rmk, 0 },
+ { X86::VPACKUSDWZ256rrk, X86::VPACKUSDWZ256rmk, 0 },
+ { X86::VPACKUSWBZ256rrk, X86::VPACKUSWBZ256rmk, 0 },
+ { X86::VPADDBZ256rrk, X86::VPADDBZ256rmk, 0 },
+ { X86::VPADDDZ256rrk, X86::VPADDDZ256rmk, 0 },
+ { X86::VPADDQZ256rrk, X86::VPADDQZ256rmk, 0 },
+ { X86::VPADDSBZ256rrk, X86::VPADDSBZ256rmk, 0 },
+ { X86::VPADDSWZ256rrk, X86::VPADDSWZ256rmk, 0 },
+ { X86::VPADDUSBZ256rrk, X86::VPADDUSBZ256rmk, 0 },
+ { X86::VPADDUSWZ256rrk, X86::VPADDUSWZ256rmk, 0 },
+ { X86::VPADDWZ256rrk, X86::VPADDWZ256rmk, 0 },
+ { X86::VPALIGNRZ256rrik, X86::VPALIGNRZ256rmik, 0 },
+ { X86::VPANDDZ256rrk, X86::VPANDDZ256rmk, 0 },
+ { X86::VPANDNDZ256rrk, X86::VPANDNDZ256rmk, 0 },
+ { X86::VPANDNQZ256rrk, X86::VPANDNQZ256rmk, 0 },
+ { X86::VPANDQZ256rrk, X86::VPANDQZ256rmk, 0 },
+ { X86::VPAVGBZ256rrk, X86::VPAVGBZ256rmk, 0 },
+ { X86::VPAVGWZ256rrk, X86::VPAVGWZ256rmk, 0 },
+ { X86::VPERMBZ256rrk, X86::VPERMBZ256rmk, 0 },
+ { X86::VPERMDZ256rrk, X86::VPERMDZ256rmk, 0 },
+ { X86::VPERMI2B256rrk, X86::VPERMI2B256rmk, 0 },
+ { X86::VPERMI2D256rrk, X86::VPERMI2D256rmk, 0 },
+ { X86::VPERMI2PD256rrk, X86::VPERMI2PD256rmk, 0 },
+ { X86::VPERMI2PS256rrk, X86::VPERMI2PS256rmk, 0 },
+ { X86::VPERMI2Q256rrk, X86::VPERMI2Q256rmk, 0 },
+ { X86::VPERMI2W256rrk, X86::VPERMI2W256rmk, 0 },
+ { X86::VPERMILPDZ256rrk, X86::VPERMILPDZ256rmk, 0 },
+ { X86::VPERMILPSZ256rrk, X86::VPERMILPSZ256rmk, 0 },
+ { X86::VPERMPDZ256rrk, X86::VPERMPDZ256rmk, 0 },
+ { X86::VPERMPSZ256rrk, X86::VPERMPSZ256rmk, 0 },
+ { X86::VPERMQZ256rrk, X86::VPERMQZ256rmk, 0 },
+ { X86::VPERMT2B256rrk, X86::VPERMT2B256rmk, 0 },
+ { X86::VPERMT2D256rrk, X86::VPERMT2D256rmk, 0 },
+ { X86::VPERMT2PD256rrk, X86::VPERMT2PD256rmk, 0 },
+ { X86::VPERMT2PS256rrk, X86::VPERMT2PS256rmk, 0 },
+ { X86::VPERMT2Q256rrk, X86::VPERMT2Q256rmk, 0 },
+ { X86::VPERMT2W256rrk, X86::VPERMT2W256rmk, 0 },
+ { X86::VPERMWZ256rrk, X86::VPERMWZ256rmk, 0 },
+ { X86::VPMADDUBSWZ256rrk, X86::VPMADDUBSWZ256rmk, 0 },
+ { X86::VPMADDWDZ256rrk, X86::VPMADDWDZ256rmk, 0 },
+ { X86::VPMAXSBZ256rrk, X86::VPMAXSBZ256rmk, 0 },
+ { X86::VPMAXSDZ256rrk, X86::VPMAXSDZ256rmk, 0 },
+ { X86::VPMAXSQZ256rrk, X86::VPMAXSQZ256rmk, 0 },
+ { X86::VPMAXSWZ256rrk, X86::VPMAXSWZ256rmk, 0 },
+ { X86::VPMAXUBZ256rrk, X86::VPMAXUBZ256rmk, 0 },
+ { X86::VPMAXUDZ256rrk, X86::VPMAXUDZ256rmk, 0 },
+ { X86::VPMAXUQZ256rrk, X86::VPMAXUQZ256rmk, 0 },
+ { X86::VPMAXUWZ256rrk, X86::VPMAXUWZ256rmk, 0 },
+ { X86::VPMINSBZ256rrk, X86::VPMINSBZ256rmk, 0 },
+ { X86::VPMINSDZ256rrk, X86::VPMINSDZ256rmk, 0 },
+ { X86::VPMINSQZ256rrk, X86::VPMINSQZ256rmk, 0 },
+ { X86::VPMINSWZ256rrk, X86::VPMINSWZ256rmk, 0 },
+ { X86::VPMINUBZ256rrk, X86::VPMINUBZ256rmk, 0 },
+ { X86::VPMINUDZ256rrk, X86::VPMINUDZ256rmk, 0 },
+ { X86::VPMINUQZ256rrk, X86::VPMINUQZ256rmk, 0 },
+ { X86::VPMINUWZ256rrk, X86::VPMINUWZ256rmk, 0 },
+ { X86::VPMULDQZ256rrk, X86::VPMULDQZ256rmk, 0 },
+ { X86::VPMULLDZ256rrk, X86::VPMULLDZ256rmk, 0 },
+ { X86::VPMULLQZ256rrk, X86::VPMULLQZ256rmk, 0 },
+ { X86::VPMULLWZ256rrk, X86::VPMULLWZ256rmk, 0 },
+ { X86::VPMULUDQZ256rrk, X86::VPMULUDQZ256rmk, 0 },
+ { X86::VPORDZ256rrk, X86::VPORDZ256rmk, 0 },
+ { X86::VPORQZ256rrk, X86::VPORQZ256rmk, 0 },
+ { X86::VPSHUFBZ256rrk, X86::VPSHUFBZ256rmk, 0 },
+ { X86::VPSLLDZ256rrk, X86::VPSLLDZ256rmk, 0 },
+ { X86::VPSLLQZ256rrk, X86::VPSLLQZ256rmk, 0 },
+ { X86::VPSLLVDZ256rrk, X86::VPSLLVDZ256rmk, 0 },
+ { X86::VPSLLVQZ256rrk, X86::VPSLLVQZ256rmk, 0 },
+ { X86::VPSLLVWZ256rrk, X86::VPSLLVWZ256rmk, 0 },
+ { X86::VPSLLWZ256rrk, X86::VPSLLWZ256rmk, 0 },
+ { X86::VPSRADZ256rrk, X86::VPSRADZ256rmk, 0 },
+ { X86::VPSRAQZ256rrk, X86::VPSRAQZ256rmk, 0 },
+ { X86::VPSRAVDZ256rrk, X86::VPSRAVDZ256rmk, 0 },
+ { X86::VPSRAVQZ256rrk, X86::VPSRAVQZ256rmk, 0 },
+ { X86::VPSRAVWZ256rrk, X86::VPSRAVWZ256rmk, 0 },
+ { X86::VPSRAWZ256rrk, X86::VPSRAWZ256rmk, 0 },
+ { X86::VPSRLDZ256rrk, X86::VPSRLDZ256rmk, 0 },
+ { X86::VPSRLQZ256rrk, X86::VPSRLQZ256rmk, 0 },
+ { X86::VPSRLVDZ256rrk, X86::VPSRLVDZ256rmk, 0 },
+ { X86::VPSRLVQZ256rrk, X86::VPSRLVQZ256rmk, 0 },
+ { X86::VPSRLVWZ256rrk, X86::VPSRLVWZ256rmk, 0 },
+ { X86::VPSRLWZ256rrk, X86::VPSRLWZ256rmk, 0 },
+ { X86::VPSUBBZ256rrk, X86::VPSUBBZ256rmk, 0 },
+ { X86::VPSUBDZ256rrk, X86::VPSUBDZ256rmk, 0 },
+ { X86::VPSUBQZ256rrk, X86::VPSUBQZ256rmk, 0 },
+ { X86::VPSUBSBZ256rrk, X86::VPSUBSBZ256rmk, 0 },
+ { X86::VPSUBSWZ256rrk, X86::VPSUBSWZ256rmk, 0 },
+ { X86::VPSUBUSBZ256rrk, X86::VPSUBUSBZ256rmk, 0 },
+ { X86::VPSUBUSWZ256rrk, X86::VPSUBUSWZ256rmk, 0 },
+ { X86::VPSUBWZ256rrk, X86::VPSUBWZ256rmk, 0 },
+ { X86::VPTERNLOGDZ256rrik, X86::VPTERNLOGDZ256rmik, 0 },
+ { X86::VPTERNLOGQZ256rrik, X86::VPTERNLOGQZ256rmik, 0 },
+ { X86::VPUNPCKHBWZ256rrk, X86::VPUNPCKHBWZ256rmk, 0 },
+ { X86::VPUNPCKHDQZ256rrk, X86::VPUNPCKHDQZ256rmk, 0 },
+ { X86::VPUNPCKHQDQZ256rrk, X86::VPUNPCKHQDQZ256rmk, 0 },
+ { X86::VPUNPCKHWDZ256rrk, X86::VPUNPCKHWDZ256rmk, 0 },
+ { X86::VPUNPCKLBWZ256rrk, X86::VPUNPCKLBWZ256rmk, 0 },
+ { X86::VPUNPCKLDQZ256rrk, X86::VPUNPCKLDQZ256rmk, 0 },
+ { X86::VPUNPCKLQDQZ256rrk, X86::VPUNPCKLQDQZ256rmk, 0 },
+ { X86::VPUNPCKLWDZ256rrk, X86::VPUNPCKLWDZ256rmk, 0 },
+ { X86::VPXORDZ256rrk, X86::VPXORDZ256rmk, 0 },
+ { X86::VPXORQZ256rrk, X86::VPXORQZ256rmk, 0 },
+ { X86::VSHUFPDZ256rrik, X86::VSHUFPDZ256rmik, 0 },
+ { X86::VSHUFPSZ256rrik, X86::VSHUFPSZ256rmik, 0 },
+ { X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0 },
+ { X86::VSUBPSZ256rrk, X86::VSUBPSZ256rmk, 0 },
+ { X86::VUNPCKHPDZ256rrk, X86::VUNPCKHPDZ256rmk, 0 },
+ { X86::VUNPCKHPSZ256rrk, X86::VUNPCKHPSZ256rmk, 0 },
+ { X86::VUNPCKLPDZ256rrk, X86::VUNPCKLPDZ256rmk, 0 },
+ { X86::VUNPCKLPSZ256rrk, X86::VUNPCKLPSZ256rmk, 0 },
+ { X86::VXORPDZ256rrk, X86::VXORPDZ256rmk, 0 },
+ { X86::VXORPSZ256rrk, X86::VXORPSZ256rmk, 0 },
+
+ // AVX-512{F,VL} foldable instructions 128-bit
+ { X86::VADDPDZ128rrk, X86::VADDPDZ128rmk, 0 },
+ { X86::VADDPSZ128rrk, X86::VADDPSZ128rmk, 0 },
+ { X86::VALIGNDZ128rrik, X86::VALIGNDZ128rmik, 0 },
+ { X86::VALIGNQZ128rrik, X86::VALIGNQZ128rmik, 0 },
+ { X86::VANDNPDZ128rrk, X86::VANDNPDZ128rmk, 0 },
+ { X86::VANDNPSZ128rrk, X86::VANDNPSZ128rmk, 0 },
+ { X86::VANDPDZ128rrk, X86::VANDPDZ128rmk, 0 },
+ { X86::VANDPSZ128rrk, X86::VANDPSZ128rmk, 0 },
+ { X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmk, 0 },
+ { X86::VDIVPSZ128rrk, X86::VDIVPSZ128rmk, 0 },
+ { X86::VMAXCPDZ128rrk, X86::VMAXCPDZ128rmk, 0 },
+ { X86::VMAXCPSZ128rrk, X86::VMAXCPSZ128rmk, 0 },
+ { X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0 },
+ { X86::VMAXPSZ128rrk, X86::VMAXPSZ128rmk, 0 },
+ { X86::VMINCPDZ128rrk, X86::VMINCPDZ128rmk, 0 },
+ { X86::VMINCPSZ128rrk, X86::VMINCPSZ128rmk, 0 },
+ { X86::VMINPDZ128rrk, X86::VMINPDZ128rmk, 0 },
+ { X86::VMINPSZ128rrk, X86::VMINPSZ128rmk, 0 },
+ { X86::VMULPDZ128rrk, X86::VMULPDZ128rmk, 0 },
+ { X86::VMULPSZ128rrk, X86::VMULPSZ128rmk, 0 },
+ { X86::VORPDZ128rrk, X86::VORPDZ128rmk, 0 },
+ { X86::VORPSZ128rrk, X86::VORPSZ128rmk, 0 },
+ { X86::VPACKSSDWZ128rrk, X86::VPACKSSDWZ128rmk, 0 },
+ { X86::VPACKSSWBZ128rrk, X86::VPACKSSWBZ128rmk, 0 },
+ { X86::VPACKUSDWZ128rrk, X86::VPACKUSDWZ128rmk, 0 },
+ { X86::VPACKUSWBZ128rrk, X86::VPACKUSWBZ128rmk, 0 },
+ { X86::VPADDBZ128rrk, X86::VPADDBZ128rmk, 0 },
+ { X86::VPADDDZ128rrk, X86::VPADDDZ128rmk, 0 },
+ { X86::VPADDQZ128rrk, X86::VPADDQZ128rmk, 0 },
+ { X86::VPADDSBZ128rrk, X86::VPADDSBZ128rmk, 0 },
+ { X86::VPADDSWZ128rrk, X86::VPADDSWZ128rmk, 0 },
+ { X86::VPADDUSBZ128rrk, X86::VPADDUSBZ128rmk, 0 },
+ { X86::VPADDUSWZ128rrk, X86::VPADDUSWZ128rmk, 0 },
+ { X86::VPADDWZ128rrk, X86::VPADDWZ128rmk, 0 },
+ { X86::VPALIGNRZ128rrik, X86::VPALIGNRZ128rmik, 0 },
+ { X86::VPANDDZ128rrk, X86::VPANDDZ128rmk, 0 },
+ { X86::VPANDNDZ128rrk, X86::VPANDNDZ128rmk, 0 },
+ { X86::VPANDNQZ128rrk, X86::VPANDNQZ128rmk, 0 },
+ { X86::VPANDQZ128rrk, X86::VPANDQZ128rmk, 0 },
+ { X86::VPAVGBZ128rrk, X86::VPAVGBZ128rmk, 0 },
+ { X86::VPAVGWZ128rrk, X86::VPAVGWZ128rmk, 0 },
+ { X86::VPERMBZ128rrk, X86::VPERMBZ128rmk, 0 },
+ { X86::VPERMI2B128rrk, X86::VPERMI2B128rmk, 0 },
+ { X86::VPERMI2D128rrk, X86::VPERMI2D128rmk, 0 },
+ { X86::VPERMI2PD128rrk, X86::VPERMI2PD128rmk, 0 },
+ { X86::VPERMI2PS128rrk, X86::VPERMI2PS128rmk, 0 },
+ { X86::VPERMI2Q128rrk, X86::VPERMI2Q128rmk, 0 },
+ { X86::VPERMI2W128rrk, X86::VPERMI2W128rmk, 0 },
+ { X86::VPERMILPDZ128rrk, X86::VPERMILPDZ128rmk, 0 },
+ { X86::VPERMILPSZ128rrk, X86::VPERMILPSZ128rmk, 0 },
+ { X86::VPERMT2B128rrk, X86::VPERMT2B128rmk, 0 },
+ { X86::VPERMT2D128rrk, X86::VPERMT2D128rmk, 0 },
+ { X86::VPERMT2PD128rrk, X86::VPERMT2PD128rmk, 0 },
+ { X86::VPERMT2PS128rrk, X86::VPERMT2PS128rmk, 0 },
+ { X86::VPERMT2Q128rrk, X86::VPERMT2Q128rmk, 0 },
+ { X86::VPERMT2W128rrk, X86::VPERMT2W128rmk, 0 },
+ { X86::VPERMWZ128rrk, X86::VPERMWZ128rmk, 0 },
+ { X86::VPMADDUBSWZ128rrk, X86::VPMADDUBSWZ128rmk, 0 },
+ { X86::VPMADDWDZ128rrk, X86::VPMADDWDZ128rmk, 0 },
+ { X86::VPMAXSBZ128rrk, X86::VPMAXSBZ128rmk, 0 },
+ { X86::VPMAXSDZ128rrk, X86::VPMAXSDZ128rmk, 0 },
+ { X86::VPMAXSQZ128rrk, X86::VPMAXSQZ128rmk, 0 },
+ { X86::VPMAXSWZ128rrk, X86::VPMAXSWZ128rmk, 0 },
+ { X86::VPMAXUBZ128rrk, X86::VPMAXUBZ128rmk, 0 },
+ { X86::VPMAXUDZ128rrk, X86::VPMAXUDZ128rmk, 0 },
+ { X86::VPMAXUQZ128rrk, X86::VPMAXUQZ128rmk, 0 },
+ { X86::VPMAXUWZ128rrk, X86::VPMAXUWZ128rmk, 0 },
+ { X86::VPMINSBZ128rrk, X86::VPMINSBZ128rmk, 0 },
+ { X86::VPMINSDZ128rrk, X86::VPMINSDZ128rmk, 0 },
+ { X86::VPMINSQZ128rrk, X86::VPMINSQZ128rmk, 0 },
+ { X86::VPMINSWZ128rrk, X86::VPMINSWZ128rmk, 0 },
+ { X86::VPMINUBZ128rrk, X86::VPMINUBZ128rmk, 0 },
+ { X86::VPMINUDZ128rrk, X86::VPMINUDZ128rmk, 0 },
+ { X86::VPMINUQZ128rrk, X86::VPMINUQZ128rmk, 0 },
+ { X86::VPMINUWZ128rrk, X86::VPMINUWZ128rmk, 0 },
+ { X86::VPMULDQZ128rrk, X86::VPMULDQZ128rmk, 0 },
+ { X86::VPMULLDZ128rrk, X86::VPMULLDZ128rmk, 0 },
+ { X86::VPMULLQZ128rrk, X86::VPMULLQZ128rmk, 0 },
+ { X86::VPMULLWZ128rrk, X86::VPMULLWZ128rmk, 0 },
+ { X86::VPMULUDQZ128rrk, X86::VPMULUDQZ128rmk, 0 },
+ { X86::VPORDZ128rrk, X86::VPORDZ128rmk, 0 },
+ { X86::VPORQZ128rrk, X86::VPORQZ128rmk, 0 },
+ { X86::VPSHUFBZ128rrk, X86::VPSHUFBZ128rmk, 0 },
+ { X86::VPSLLDZ128rrk, X86::VPSLLDZ128rmk, 0 },
+ { X86::VPSLLQZ128rrk, X86::VPSLLQZ128rmk, 0 },
+ { X86::VPSLLVDZ128rrk, X86::VPSLLVDZ128rmk, 0 },
+ { X86::VPSLLVQZ128rrk, X86::VPSLLVQZ128rmk, 0 },
+ { X86::VPSLLVWZ128rrk, X86::VPSLLVWZ128rmk, 0 },
+ { X86::VPSLLWZ128rrk, X86::VPSLLWZ128rmk, 0 },
+ { X86::VPSRADZ128rrk, X86::VPSRADZ128rmk, 0 },
+ { X86::VPSRAQZ128rrk, X86::VPSRAQZ128rmk, 0 },
+ { X86::VPSRAVDZ128rrk, X86::VPSRAVDZ128rmk, 0 },
+ { X86::VPSRAVQZ128rrk, X86::VPSRAVQZ128rmk, 0 },
+ { X86::VPSRAVWZ128rrk, X86::VPSRAVWZ128rmk, 0 },
+ { X86::VPSRAWZ128rrk, X86::VPSRAWZ128rmk, 0 },
+ { X86::VPSRLDZ128rrk, X86::VPSRLDZ128rmk, 0 },
+ { X86::VPSRLQZ128rrk, X86::VPSRLQZ128rmk, 0 },
+ { X86::VPSRLVDZ128rrk, X86::VPSRLVDZ128rmk, 0 },
+ { X86::VPSRLVQZ128rrk, X86::VPSRLVQZ128rmk, 0 },
+ { X86::VPSRLVWZ128rrk, X86::VPSRLVWZ128rmk, 0 },
+ { X86::VPSRLWZ128rrk, X86::VPSRLWZ128rmk, 0 },
+ { X86::VPSUBBZ128rrk, X86::VPSUBBZ128rmk, 0 },
+ { X86::VPSUBDZ128rrk, X86::VPSUBDZ128rmk, 0 },
+ { X86::VPSUBQZ128rrk, X86::VPSUBQZ128rmk, 0 },
+ { X86::VPSUBSBZ128rrk, X86::VPSUBSBZ128rmk, 0 },
+ { X86::VPSUBSWZ128rrk, X86::VPSUBSWZ128rmk, 0 },
+ { X86::VPSUBUSBZ128rrk, X86::VPSUBUSBZ128rmk, 0 },
+ { X86::VPSUBUSWZ128rrk, X86::VPSUBUSWZ128rmk, 0 },
+ { X86::VPSUBWZ128rrk, X86::VPSUBWZ128rmk, 0 },
+ { X86::VPTERNLOGDZ128rrik, X86::VPTERNLOGDZ128rmik, 0 },
+ { X86::VPTERNLOGQZ128rrik, X86::VPTERNLOGQZ128rmik, 0 },
+ { X86::VPUNPCKHBWZ128rrk, X86::VPUNPCKHBWZ128rmk, 0 },
+ { X86::VPUNPCKHDQZ128rrk, X86::VPUNPCKHDQZ128rmk, 0 },
+ { X86::VPUNPCKHQDQZ128rrk, X86::VPUNPCKHQDQZ128rmk, 0 },
+ { X86::VPUNPCKHWDZ128rrk, X86::VPUNPCKHWDZ128rmk, 0 },
+ { X86::VPUNPCKLBWZ128rrk, X86::VPUNPCKLBWZ128rmk, 0 },
+ { X86::VPUNPCKLDQZ128rrk, X86::VPUNPCKLDQZ128rmk, 0 },
+ { X86::VPUNPCKLQDQZ128rrk, X86::VPUNPCKLQDQZ128rmk, 0 },
+ { X86::VPUNPCKLWDZ128rrk, X86::VPUNPCKLWDZ128rmk, 0 },
+ { X86::VPXORDZ128rrk, X86::VPXORDZ128rmk, 0 },
+ { X86::VPXORQZ128rrk, X86::VPXORQZ128rmk, 0 },
+ { X86::VSHUFPDZ128rrik, X86::VSHUFPDZ128rmik, 0 },
+ { X86::VSHUFPSZ128rrik, X86::VSHUFPSZ128rmik, 0 },
+ { X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0 },
+ { X86::VSUBPSZ128rrk, X86::VSUBPSZ128rmk, 0 },
+ { X86::VUNPCKHPDZ128rrk, X86::VUNPCKHPDZ128rmk, 0 },
+ { X86::VUNPCKHPSZ128rrk, X86::VUNPCKHPSZ128rmk, 0 },
+ { X86::VUNPCKLPDZ128rrk, X86::VUNPCKLPDZ128rmk, 0 },
+ { X86::VUNPCKLPSZ128rrk, X86::VUNPCKLPSZ128rmk, 0 },
+ { X86::VXORPDZ128rrk, X86::VXORPDZ128rmk, 0 },
+ { X86::VXORPSZ128rrk, X86::VXORPSZ128rmk, 0 },
+
+ // 512-bit three source instructions with zero masking.
+ { X86::VPERMI2Brrkz, X86::VPERMI2Brmkz, 0 },
+ { X86::VPERMI2Drrkz, X86::VPERMI2Drmkz, 0 },
+ { X86::VPERMI2PSrrkz, X86::VPERMI2PSrmkz, 0 },
+ { X86::VPERMI2PDrrkz, X86::VPERMI2PDrmkz, 0 },
+ { X86::VPERMI2Qrrkz, X86::VPERMI2Qrmkz, 0 },
+ { X86::VPERMI2Wrrkz, X86::VPERMI2Wrmkz, 0 },
+ { X86::VPERMT2Brrkz, X86::VPERMT2Brmkz, 0 },
+ { X86::VPERMT2Drrkz, X86::VPERMT2Drmkz, 0 },
+ { X86::VPERMT2PSrrkz, X86::VPERMT2PSrmkz, 0 },
+ { X86::VPERMT2PDrrkz, X86::VPERMT2PDrmkz, 0 },
+ { X86::VPERMT2Qrrkz, X86::VPERMT2Qrmkz, 0 },
+ { X86::VPERMT2Wrrkz, X86::VPERMT2Wrmkz, 0 },
+ { X86::VPTERNLOGDZrrikz, X86::VPTERNLOGDZrmikz, 0 },
+ { X86::VPTERNLOGQZrrikz, X86::VPTERNLOGQZrmikz, 0 },
+
+ // 256-bit three source instructions with zero masking.
+ { X86::VPERMI2B256rrkz, X86::VPERMI2B256rmkz, 0 },
+ { X86::VPERMI2D256rrkz, X86::VPERMI2D256rmkz, 0 },
+ { X86::VPERMI2PD256rrkz, X86::VPERMI2PD256rmkz, 0 },
+ { X86::VPERMI2PS256rrkz, X86::VPERMI2PS256rmkz, 0 },
+ { X86::VPERMI2Q256rrkz, X86::VPERMI2Q256rmkz, 0 },
+ { X86::VPERMI2W256rrkz, X86::VPERMI2W256rmkz, 0 },
+ { X86::VPERMT2B256rrkz, X86::VPERMT2B256rmkz, 0 },
+ { X86::VPERMT2D256rrkz, X86::VPERMT2D256rmkz, 0 },
+ { X86::VPERMT2PD256rrkz, X86::VPERMT2PD256rmkz, 0 },
+ { X86::VPERMT2PS256rrkz, X86::VPERMT2PS256rmkz, 0 },
+ { X86::VPERMT2Q256rrkz, X86::VPERMT2Q256rmkz, 0 },
+ { X86::VPERMT2W256rrkz, X86::VPERMT2W256rmkz, 0 },
+ { X86::VPTERNLOGDZ256rrikz,X86::VPTERNLOGDZ256rmikz, 0 },
+ { X86::VPTERNLOGQZ256rrikz,X86::VPTERNLOGQZ256rmikz, 0 },
+
+ // 128-bit three source instructions with zero masking.
+ { X86::VPERMI2B128rrkz, X86::VPERMI2B128rmkz, 0 },
+ { X86::VPERMI2D128rrkz, X86::VPERMI2D128rmkz, 0 },
+ { X86::VPERMI2PD128rrkz, X86::VPERMI2PD128rmkz, 0 },
+ { X86::VPERMI2PS128rrkz, X86::VPERMI2PS128rmkz, 0 },
+ { X86::VPERMI2Q128rrkz, X86::VPERMI2Q128rmkz, 0 },
+ { X86::VPERMI2W128rrkz, X86::VPERMI2W128rmkz, 0 },
+ { X86::VPERMT2B128rrkz, X86::VPERMT2B128rmkz, 0 },
+ { X86::VPERMT2D128rrkz, X86::VPERMT2D128rmkz, 0 },
+ { X86::VPERMT2PD128rrkz, X86::VPERMT2PD128rmkz, 0 },
+ { X86::VPERMT2PS128rrkz, X86::VPERMT2PS128rmkz, 0 },
+ { X86::VPERMT2Q128rrkz, X86::VPERMT2Q128rmkz, 0 },
+ { X86::VPERMT2W128rrkz, X86::VPERMT2W128rmkz, 0 },
+ { X86::VPTERNLOGDZ128rrikz,X86::VPTERNLOGDZ128rmikz, 0 },
+ { X86::VPTERNLOGQZ128rrikz,X86::VPTERNLOGQZ128rmikz, 0 },
+ };
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable4) {
AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
@@ -163,6 +3545,20 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// Index 4, folded load
Entry.Flags | TB_INDEX_4 | TB_FOLDED_LOAD);
}
+ for (I = X86InstrFMA3Info::rm_begin(); I != E; ++I) {
+ if (I.getGroup()->isKMasked()) {
+ // Intrinsics need to pass TB_NO_REVERSE.
+ if (I.getGroup()->isIntrinsic()) {
+ AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
+ I.getRegOpcode(), I.getMemOpcode(),
+ TB_ALIGN_NONE | TB_INDEX_4 | TB_FOLDED_LOAD | TB_NO_REVERSE);
+ } else {
+ AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
+ I.getRegOpcode(), I.getMemOpcode(),
+ TB_ALIGN_NONE | TB_INDEX_4 | TB_FOLDED_LOAD);
+ }
+ }
+ }
}
void
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index a3e677209305..8490b972eb5c 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -5183,14 +5183,14 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
- Sched<[WriteFAdd]>;
+ Sched<[WriteFHAdd]>;
def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))],
- IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
+ IIC_SSE_HADDSUB_RM>, Sched<[WriteFHAddLd, ReadAfterLd]>;
}
multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
X86MemOperand x86memop, SDNode OpNode, PatFrag ld_frag,
@@ -5200,14 +5200,14 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
- Sched<[WriteFAdd]>;
+ Sched<[WriteFHAdd]>;
def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))],
- IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
+ IIC_SSE_HADDSUB_RM>, Sched<[WriteFHAddLd, ReadAfterLd]>;
}
let Predicates = [HasAVX] in {
@@ -5310,7 +5310,7 @@ defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, memopv2i64>;
// SSSE3 - Packed Binary Operator Instructions
//===---------------------------------------------------------------------===//
-let Sched = WriteVecALU in {
+let Sched = WritePHAdd in {
def SSE_PHADDSUBD : OpndItins<
IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM
>;
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 598d88d8b9c3..33bc8e11a572 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -12,20 +12,21 @@
//
//===----------------------------------------------------------------------===//
-#include "X86AsmPrinter.h"
-#include "X86RegisterInfo.h"
-#include "X86ShuffleDecodeConstantPool.h"
#include "InstPrinter/X86ATTInstPrinter.h"
#include "InstPrinter/X86InstComments.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "Utils/X86ShuffleDecode.h"
+#include "X86AsmPrinter.h"
+#include "X86RegisterInfo.h"
+#include "X86ShuffleDecodeConstantPool.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalValue.h"
@@ -38,13 +39,12 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp
index aabbf67a16b6..e6756b975c10 100644
--- a/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -27,8 +27,8 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
index 677e82459766..03c8ccb53afe 100644
--- a/lib/Target/X86/X86SchedHaswell.td
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -1488,6 +1488,39 @@ def : InstRW<[WriteVPGATHERQQ256, ReadAfterLd], (instregex "VPGATHERQQYrm")>;
//-- Arithmetic instructions --//
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+// HADD, HSUB PS/PD
+// x,x / v,v,v.
+def : WriteRes<WriteFHAdd, [HWPort1, HWPort5]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 2];
+}
+
+// x,m / v,v,m.
+def : WriteRes<WriteFHAddLd, [HWPort1, HWPort5, HWPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 2, 1];
+}
+
+// PHADD|PHSUB (S) W/D.
+// v <- v,v.
+def : WriteRes<WritePHAdd, [HWPort1, HWPort5]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 2];
+}
+// v <- v,m.
+def : WriteRes<WritePHAddLd, [HWPort1, HWPort5, HWPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 2, 1];
+}
+
// PHADD|PHSUB (S) W/D.
// v <- v,v.
def WritePHADDSUBr : SchedWriteRes<[HWPort1, HWPort5]> {
diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td
index eca65c2892b7..b8ec5883152c 100644
--- a/lib/Target/X86/X86SchedSandyBridge.td
+++ b/lib/Target/X86/X86SchedSandyBridge.td
@@ -157,6 +157,31 @@ def : WriteRes<WriteMPSADLd, [SBPort0, SBPort1, SBPort5, SBPort23]> {
let ResourceCycles = [1, 1, 1, 1];
}
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub instructions.
+////////////////////////////////////////////////////////////////////////////////
+// HADD, HSUB PS/PD
+// x,x / v,v,v.
+def : WriteRes<WriteFHAdd, [SBPort1]> {
+ let Latency = 3;
+}
+
+// x,m / v,v,m.
+def : WriteRes<WriteFHAddLd, [SBPort1, SBPort23]> {
+ let Latency = 7;
+ let ResourceCycles = [1, 1];
+}
+
+// PHADD|PHSUB (S) W/D.
+// v <- v,v.
+def : WriteRes<WritePHAdd, [SBPort15]>;
+
+// v <- v,m.
+def : WriteRes<WritePHAddLd, [SBPort15, SBPort23]> {
+ let Latency = 5;
+ let ResourceCycles = [1, 1];
+}
+
// String instructions.
// Packed Compare Implicit Length Strings, Return Mask
def : WriteRes<WritePCmpIStrM, [SBPort015]> {
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index 4eae6ca7abe3..a12fa68faf4f 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -77,6 +77,10 @@ defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends.
// FMA Scheduling helper class.
class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
+// Horizontal Add/Sub (float and integer)
+defm WriteFHAdd : X86SchedWritePair;
+defm WritePHAdd : X86SchedWritePair;
+
// Vector integer operations.
defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td
index ce1ece34e431..6cb2a3694d92 100644
--- a/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/lib/Target/X86/X86ScheduleBtVer2.td
@@ -320,6 +320,38 @@ def : WriteRes<WriteAESKeyGenLd, [JLAGU, JVIMUL]> {
}
////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteFHAdd, [JFPU0]> {
+ let Latency = 3;
+}
+
+def : WriteRes<WriteFHAddLd, [JLAGU, JFPU0]> {
+ let Latency = 8;
+}
+
+def : WriteRes<WritePHAdd, [JFPU01]> {
+ let ResourceCycles = [1];
+}
+def : WriteRes<WritePHAddLd, [JLAGU, JFPU01 ]> {
+ let Latency = 6;
+ let ResourceCycles = [1, 1];
+}
+
+def WriteFHAddY: SchedWriteRes<[JFPU0]> {
+ let Latency = 3;
+ let ResourceCycles = [2];
+}
+def : InstRW<[WriteFHAddY], (instregex "VH(ADD|SUB)P(S|D)Yrr")>;
+
+def WriteFHAddYLd: SchedWriteRes<[JLAGU, JFPU0]> {
+ let Latency = 8;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[WriteFHAddYLd], (instregex "VH(ADD|SUB)P(S|D)Yrm")>;
+
+////////////////////////////////////////////////////////////////////////////////
// Carry-less multiplication instructions.
////////////////////////////////////////////////////////////////////////////////
diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td
index f95d4fa04177..03ed2db2350d 100644
--- a/lib/Target/X86/X86ScheduleSLM.td
+++ b/lib/Target/X86/X86ScheduleSLM.td
@@ -137,6 +137,33 @@ defm : SMWriteResPair<WriteShuffle, FPC_RSV0, 1>;
defm : SMWriteResPair<WriteBlend, FPC_RSV0, 1>;
defm : SMWriteResPair<WriteMPSAD, FPC_RSV0, 7>;
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+// HADD, HSUB PS/PD
+
+def : WriteRes<WriteFHAdd, [FPC_RSV01]> {
+ let Latency = 3;
+ let ResourceCycles = [2];
+}
+
+def : WriteRes<WriteFHAddLd, [FPC_RSV01, MEC_RSV]> {
+ let Latency = 6;
+ let ResourceCycles = [2, 1];
+}
+
+// PHADD|PHSUB (S) W/D.
+def : WriteRes<WritePHAdd, [FPC_RSV01]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+}
+
+def : WriteRes<WritePHAddLd, [FPC_RSV01, MEC_RSV]> {
+ let Latency = 4;
+ let ResourceCycles = [1, 1];
+}
+
// String instructions.
// Packed Compare Implicit Length Strings, Return Mask
def : WriteRes<WritePCmpIStrM, [FPC_RSV0]> {
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index d4b2392eb1f5..c67aa04aebea 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -11,11 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "X86InstrInfo.h"
+#include "X86SelectionDAGInfo.h"
#include "X86ISelLowering.h"
+#include "X86InstrInfo.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
-#include "X86SelectionDAGInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Target/TargetLowering.h"
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 84ec98484f8e..e36a47506ba0 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/X86BaseInfo.h"
#include "X86Subtarget.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/Attributes.h"
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index cb21f1bd7706..278b57eb00b7 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -24,8 +24,8 @@
#include "X86TargetObjectFile.h"
#include "X86TargetTransformInfo.h"
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetTransformInfo.h"
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 7f70829cb6c6..4fd95717478e 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -9,6 +9,8 @@
#include "X86TargetObjectFile.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Operator.h"
#include "llvm/MC/MCContext.h"
@@ -16,8 +18,6 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Support/COFF.h"
-#include "llvm/Support/Dwarf.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index fe94079fd869..11ba7025e1b7 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1383,6 +1383,8 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
+unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; }
+
int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed) {
@@ -2176,6 +2178,17 @@ int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy,
return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace);
}
+bool X86TTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
+ TargetTransformInfo::LSRCost &C2) {
+ // X86 specific here are "instruction number 1st priority".
+ return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost,
+ C1.NumIVMuls, C1.NumBaseAdds,
+ C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
+ std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost,
+ C2.NumIVMuls, C2.NumBaseAdds,
+ C2.ScaleCost, C2.ImmCost, C2.SetupCost);
+}
+
bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {
Type *ScalarTy = DataTy->getScalarType();
int DataWidth = isa<PointerType>(ScalarTy) ?
diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h
index 9bef9e80c395..09ce2c90498d 100644
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@@ -76,6 +76,8 @@ public:
int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
const SCEV *Ptr);
+ unsigned getAtomicMemIntrinsicMaxElementSize() const;
+
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = UINT_MAX);
@@ -99,6 +101,8 @@ public:
int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty);
+ bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
+ TargetTransformInfo::LSRCost &C2);
bool isLegalMaskedLoad(Type *DataType);
bool isLegalMaskedStore(Type *DataType);
bool isLegalMaskedGather(Type *DataType);
diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp
index 3ee14a0ff7b1..0c3b34341476 100644
--- a/lib/Target/X86/X86WinEHState.cpp
+++ b/lib/Target/X86/X86WinEHState.cpp
@@ -22,9 +22,9 @@
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index 5fc58d831319..dd27e7ca30aa 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/XCoreMCTargetDesc.h"
#include "InstPrinter/XCoreInstPrinter.h"
#include "MCTargetDesc/XCoreMCAsmInfo.h"
-#include "MCTargetDesc/XCoreMCTargetDesc.h"
#include "XCoreTargetStreamer.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCDwarf.h"
@@ -23,8 +23,8 @@
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index b35aa0b95821..8f7c8a82380a 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "XCore.h"
#include "InstPrinter/XCoreInstPrinter.h"
+#include "XCore.h"
#include "XCoreInstrInfo.h"
#include "XCoreMCInstLower.h"
#include "XCoreSubtarget.h"
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 1a1cbd474888..cb23399995da 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -10,9 +10,9 @@
//
//===----------------------------------------------------------------------===//
+#include "XCoreTargetMachine.h"
#include "MCTargetDesc/XCoreMCTargetDesc.h"
#include "XCore.h"
-#include "XCoreTargetMachine.h"
#include "XCoreTargetObjectFile.h"
#include "XCoreTargetTransformInfo.h"
#include "llvm/ADT/Optional.h"
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 2b53f01a996d..a047b3c9d9fc 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -15,9 +15,9 @@
#define LLVM_LIB_TARGET_XCORE_XCORETARGETMACHINE_H
#include "XCoreSubtarget.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetMachine.h"
#include <memory>
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp
index ad8693fd325e..c60a262e719c 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -9,10 +9,10 @@
#include "XCoreTargetObjectFile.h"
#include "XCoreSubtarget.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;