aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib')
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp126
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp62
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h45
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp139
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp211
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Frontend/HLSL/HLSLResource.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Globals.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/IR/LLVMContext.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Metadata.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Type.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Verifier.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCContext.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCExpr.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp69
-rw-r--r--contrib/llvm-project/llvm/lib/Object/ModuleSymbolTable.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Passes/PassRegistry.def1
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/RawMemProfReader.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Support/CommandLine.cpp78
-rw-r--r--contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp83
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Signals.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp163
-rw-r--r--contrib/llvm-project/llvm/lib/Support/raw_socket_stream.cpp179
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td126
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td75
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp229
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp86
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td61
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp62
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.cpp92
-rw-r--r--contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.h37
-rw-r--r--contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp78
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td86
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp182
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td125
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp82
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td1082
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td1457
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrAsmAlias.td13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrKL.td22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td211
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrRAOINT.td8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrSGX.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrSNP.td12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td952
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrShiftRotate.td28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td141
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrTBM.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrTDX.td14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrTSX.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td284
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrVMX.td38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrXOP.td28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParserCommon.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/TargetParser/Host.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp429
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/RecordVisitor.cpp65
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/RecordsSlice.cpp120
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/Utils.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h17
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp134
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp106
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp75
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/DXILUpgrade.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp123
223 files changed, 6394 insertions, 3825 deletions
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
index 2a45acf63aa2..5beac5547d65 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -6079,7 +6079,7 @@ static Value *simplifyRelativeLoad(Constant *Ptr, Constant *Offset,
Type *Int32Ty = Type::getInt32Ty(Ptr->getContext());
auto *OffsetConstInt = dyn_cast<ConstantInt>(Offset);
- if (!OffsetConstInt || OffsetConstInt->getType()->getBitWidth() > 64)
+ if (!OffsetConstInt || OffsetConstInt->getBitWidth() > 64)
return nullptr;
APInt OffsetInt = OffsetConstInt->getValue().sextOrTrunc(
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
index 910f6b72afef..89cc7ea15ec1 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -1566,7 +1566,6 @@ void LazyValueInfoImpl::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
bool LazyValueInfoWrapperPass::runOnFunction(Function &F) {
Info.AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- Info.TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
if (auto *Impl = Info.getImpl())
Impl->clear();
@@ -1627,9 +1626,8 @@ void LazyValueInfoWrapperPass::releaseMemory() { Info.releaseMemory(); }
LazyValueInfo LazyValueAnalysis::run(Function &F,
FunctionAnalysisManager &FAM) {
auto &AC = FAM.getResult<AssumptionAnalysis>(F);
- auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
- return LazyValueInfo(&AC, &F.getParent()->getDataLayout(), &TLI);
+ return LazyValueInfo(&AC, &F.getParent()->getDataLayout());
}
/// Returns true if we can statically tell that this value will never be a
@@ -1714,11 +1712,11 @@ ConstantRange LazyValueInfo::getConstantRangeOnEdge(Value *V,
static LazyValueInfo::Tristate
getPredicateResult(unsigned Pred, Constant *C, const ValueLatticeElement &Val,
- const DataLayout &DL, TargetLibraryInfo *TLI) {
+ const DataLayout &DL) {
// If we know the value is a constant, evaluate the conditional.
Constant *Res = nullptr;
if (Val.isConstant()) {
- Res = ConstantFoldCompareInstOperands(Pred, Val.getConstant(), C, DL, TLI);
+ Res = ConstantFoldCompareInstOperands(Pred, Val.getConstant(), C, DL);
if (ConstantInt *ResCI = dyn_cast_or_null<ConstantInt>(Res))
return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True;
return LazyValueInfo::Unknown;
@@ -1759,15 +1757,13 @@ getPredicateResult(unsigned Pred, Constant *C, const ValueLatticeElement &Val,
if (Pred == ICmpInst::ICMP_EQ) {
// !C1 == C -> false iff C1 == C.
Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
- Val.getNotConstant(), C, DL,
- TLI);
+ Val.getNotConstant(), C, DL);
if (Res && Res->isNullValue())
return LazyValueInfo::False;
} else if (Pred == ICmpInst::ICMP_NE) {
// !C1 != C -> true iff C1 == C.
Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
- Val.getNotConstant(), C, DL,
- TLI);
+ Val.getNotConstant(), C, DL);
if (Res && Res->isNullValue())
return LazyValueInfo::True;
}
@@ -1787,7 +1783,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
ValueLatticeElement Result =
getOrCreateImpl(M).getValueOnEdge(V, FromBB, ToBB, CxtI);
- return getPredicateResult(Pred, C, Result, M->getDataLayout(), TLI);
+ return getPredicateResult(Pred, C, Result, M->getDataLayout());
}
LazyValueInfo::Tristate
@@ -1811,7 +1807,7 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
ValueLatticeElement Result =
UseBlockValue ? Impl.getValueInBlock(V, CxtI->getParent(), CxtI)
: Impl.getValueAt(V, CxtI);
- Tristate Ret = getPredicateResult(Pred, C, Result, DL, TLI);
+ Tristate Ret = getPredicateResult(Pred, C, Result, DL);
if (Ret != Unknown)
return Ret;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 0894560fd078..89666018d925 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -143,7 +143,7 @@ static cl::opt<bool, true> HoistRuntimeChecks(
"hoist-runtime-checks", cl::Hidden,
cl::desc(
"Hoist inner loop runtime memory checks to outer loop if possible"),
- cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(false));
+ cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true));
bool VectorizerParams::HoistRuntimeChecks;
bool VectorizerParams::isInterleaveForced() {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 49eccde45f31..951e00e34142 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -1292,16 +1292,16 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
if (InsertRes.first->second != Pointer.getAddr()) {
// Make sure to clean up the Visited map before continuing on to
// PredTranslationFailure.
- for (unsigned i = 0; i < NewBlocks.size(); i++)
- Visited.erase(NewBlocks[i]);
+ for (auto *NewBlock : NewBlocks)
+ Visited.erase(NewBlock);
goto PredTranslationFailure;
}
}
if (NewBlocks.size() > WorklistEntries) {
// Make sure to clean up the Visited map before continuing on to
// PredTranslationFailure.
- for (unsigned i = 0; i < NewBlocks.size(); i++)
- Visited.erase(NewBlocks[i]);
+ for (auto *NewBlock : NewBlocks)
+ Visited.erase(NewBlock);
GotWorklistLimit = true;
goto PredTranslationFailure;
}
@@ -1359,8 +1359,8 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
// Make sure to clean up the Visited map before continuing on to
// PredTranslationFailure.
- for (unsigned i = 0, n = PredList.size(); i < n; ++i)
- Visited.erase(PredList[i].first);
+ for (const auto &Pred : PredList)
+ Visited.erase(Pred.first);
goto PredTranslationFailure;
}
@@ -1371,9 +1371,9 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
// any results for. (getNonLocalPointerDepFromBB will modify our
// datastructures in ways the code after the PredTranslationFailure label
// doesn't expect.)
- for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
- BasicBlock *Pred = PredList[i].first;
- PHITransAddr &PredPointer = PredList[i].second;
+ for (auto &I : PredList) {
+ BasicBlock *Pred = I.first;
+ PHITransAddr &PredPointer = I.second;
Value *PredPtrVal = PredPointer.getAddr();
bool CanTranslate = true;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
index 580fe112fcd7..623814c038a7 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -7914,9 +7914,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// expression. We already checked that ShlAmt < BitWidth, so
// the multiplier, 1 << (ShlAmt - AShrAmt), fits into TruncTy as
// ShlAmt - AShrAmt < Amt.
- uint64_t ShlAmt = ShlAmtCI->getZExtValue();
- if (ShlAmtCI->getValue().ult(BitWidth) && ShlAmt >= AShrAmt) {
- APInt Mul = APInt::getOneBitSet(BitWidth - AShrAmt, ShlAmt - AShrAmt);
+ const APInt &ShlAmt = ShlAmtCI->getValue();
+ if (ShlAmt.ult(BitWidth) && ShlAmt.uge(AShrAmt)) {
+ APInt Mul = APInt::getOneBitSet(BitWidth - AShrAmt,
+ ShlAmtCI->getZExtValue() - AShrAmt);
const SCEV *CompositeExpr =
getMulExpr(AddTruncateExpr, getConstant(Mul));
if (L->getOpcode() != Instruction::Shl)
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 20959cf6948f..bbb7c86d2185 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -1149,6 +1149,16 @@ bool TargetLibraryInfoImpl::getLibFunc(const Function &FDecl,
return isValidProtoForLibFunc(*FDecl.getFunctionType(), F, *M);
}
+bool TargetLibraryInfoImpl::getLibFunc(unsigned int Opcode, Type *Ty,
+ LibFunc &F) const {
+ // Must be a frem instruction with float or double arguments.
+ if (Opcode != Instruction::FRem || (!Ty->isDoubleTy() && !Ty->isFloatTy()))
+ return false;
+
+ F = Ty->isDoubleTy() ? LibFunc_fmod : LibFunc_fmodf;
+ return true;
+}
+
void TargetLibraryInfoImpl::disableAllFunctions() {
memset(AvailableArray, 0, sizeof(AvailableArray));
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp b/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp
index ad918ef7245b..426f98c0c628 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp
@@ -126,7 +126,7 @@ static ParseRet tryParseLinearTokenWithRuntimeStep(StringRef &ParseString,
return ParseRet::None;
}
-/// The function looks for the following stringt at the beginning of
+/// The function looks for the following string at the beginning of
/// the input string `ParseString`:
///
/// <token> <number>
@@ -376,7 +376,7 @@ std::optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName,
// _ZGV<isa><mask><vlen><parameters>_<scalarname>.
StringRef VectorName = MangledName;
- // Parse the fixed size part of the manled name
+ // Parse the fixed size part of the mangled name
if (!MangledName.consume_front("_ZGV"))
return std::nullopt;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
index 9ae05a4b5ccc..cac2602d455f 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
@@ -1826,6 +1826,8 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
Known.Zero &= ~Elt;
Known.One &= Elt;
}
+ if (Known.hasConflict())
+ Known.resetAll();
return;
}
@@ -1849,6 +1851,8 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
Known.Zero &= ~Elt;
Known.One &= Elt;
}
+ if (Known.hasConflict())
+ Known.resetAll();
return;
}
@@ -2368,19 +2372,12 @@ static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth,
static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth,
const SimplifyQuery &Q, unsigned BitWidth, Value *X,
Value *Y) {
+ // TODO: Move this case into isKnownNonEqual().
if (auto *C = dyn_cast<Constant>(X))
if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Depth, Q))
return true;
- KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
- if (XKnown.isUnknown())
- return false;
- KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
- // If X != Y then X - Y is non zero.
- std::optional<bool> ne = KnownBits::ne(XKnown, YKnown);
- // If we are unable to compute if X != Y, we won't be able to do anything
- // computing the knownbits of the sub expression so just return here.
- return ne && *ne;
+ return ::isKnownNonEqual(X, Y, Depth, Q);
}
static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts,
@@ -3191,11 +3188,12 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
// Are any known bits in V1 contradictory to known bits in V2? If V1
// has a known zero where V2 has a known one, they must not be equal.
KnownBits Known1 = computeKnownBits(V1, Depth, Q);
- KnownBits Known2 = computeKnownBits(V2, Depth, Q);
-
- if (Known1.Zero.intersects(Known2.One) ||
- Known2.Zero.intersects(Known1.One))
- return true;
+ if (!Known1.isUnknown()) {
+ KnownBits Known2 = computeKnownBits(V2, Depth, Q);
+ if (Known1.Zero.intersects(Known2.One) ||
+ Known2.Zero.intersects(Known1.One))
+ return true;
+ }
}
if (isNonEqualSelect(V1, V2, Depth, Q) || isNonEqualSelect(V2, V1, Depth, Q))
@@ -3205,6 +3203,13 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
isNonEqualPointersWithRecursiveGEP(V2, V1, Q))
return true;
+ Value *A, *B;
+ // PtrToInts are NonEqual if their Ptrs are NonEqual.
+ // Check PtrToInt type matches the pointer size.
+ if (match(V1, m_PtrToIntSameSize(Q.DL, m_Value(A))) &&
+ match(V2, m_PtrToIntSameSize(Q.DL, m_Value(B))))
+ return isKnownNonEqual(A, B, Depth + 1, Q);
+
return false;
}
@@ -6284,10 +6289,10 @@ static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
}
/// Combine constant ranges from computeConstantRange() and computeKnownBits().
-static ConstantRange
-computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
- bool ForSigned,
- const SimplifyQuery &SQ) {
+ConstantRange
+llvm::computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
+ bool ForSigned,
+ const SimplifyQuery &SQ) {
ConstantRange CR1 =
ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned);
ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo);
@@ -6555,10 +6560,25 @@ static bool shiftAmountKnownInRange(const Value *ShiftAmount) {
return Safe;
}
-static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly,
+enum class UndefPoisonKind {
+ PoisonOnly = (1 << 0),
+ UndefOnly = (1 << 1),
+ UndefOrPoison = PoisonOnly | UndefOnly,
+};
+
+static bool includesPoison(UndefPoisonKind Kind) {
+ return (unsigned(Kind) & unsigned(UndefPoisonKind::PoisonOnly)) != 0;
+}
+
+static bool includesUndef(UndefPoisonKind Kind) {
+ return (unsigned(Kind) & unsigned(UndefPoisonKind::UndefOnly)) != 0;
+}
+
+static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind,
bool ConsiderFlagsAndMetadata) {
- if (ConsiderFlagsAndMetadata && Op->hasPoisonGeneratingFlagsOrMetadata())
+ if (ConsiderFlagsAndMetadata && includesPoison(Kind) &&
+ Op->hasPoisonGeneratingFlagsOrMetadata())
return true;
unsigned Opcode = Op->getOpcode();
@@ -6568,7 +6588,7 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly,
case Instruction::Shl:
case Instruction::AShr:
case Instruction::LShr:
- return !shiftAmountKnownInRange(Op->getOperand(1));
+ return includesPoison(Kind) && !shiftAmountKnownInRange(Op->getOperand(1));
case Instruction::FPToSI:
case Instruction::FPToUI:
// fptosi/ui yields poison if the resulting value does not fit in the
@@ -6609,7 +6629,8 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly,
return false;
case Intrinsic::sshl_sat:
case Intrinsic::ushl_sat:
- return !shiftAmountKnownInRange(II->getArgOperand(1));
+ return includesPoison(Kind) &&
+ !shiftAmountKnownInRange(II->getArgOperand(1));
case Intrinsic::fma:
case Intrinsic::fmuladd:
case Intrinsic::sqrt:
@@ -6664,18 +6685,16 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly,
auto *VTy = cast<VectorType>(Op->getOperand(0)->getType());
unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1;
auto *Idx = dyn_cast<ConstantInt>(Op->getOperand(IdxOp));
- if (!Idx || Idx->getValue().uge(VTy->getElementCount().getKnownMinValue()))
- return true;
+ if (includesPoison(Kind))
+ return !Idx ||
+ Idx->getValue().uge(VTy->getElementCount().getKnownMinValue());
return false;
}
case Instruction::ShuffleVector: {
- // shufflevector may return undef.
- if (PoisonOnly)
- return false;
ArrayRef<int> Mask = isa<ConstantExpr>(Op)
? cast<ConstantExpr>(Op)->getShuffleMask()
: cast<ShuffleVectorInst>(Op)->getShuffleMask();
- return is_contained(Mask, PoisonMaskElem);
+ return includesPoison(Kind) && is_contained(Mask, PoisonMaskElem);
}
case Instruction::FNeg:
case Instruction::PHI:
@@ -6711,17 +6730,17 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly,
bool llvm::canCreateUndefOrPoison(const Operator *Op,
bool ConsiderFlagsAndMetadata) {
- return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/false,
+ return ::canCreateUndefOrPoison(Op, UndefPoisonKind::UndefOrPoison,
ConsiderFlagsAndMetadata);
}
bool llvm::canCreatePoison(const Operator *Op, bool ConsiderFlagsAndMetadata) {
- return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/true,
+ return ::canCreateUndefOrPoison(Op, UndefPoisonKind::PoisonOnly,
ConsiderFlagsAndMetadata);
}
-static bool directlyImpliesPoison(const Value *ValAssumedPoison,
- const Value *V, unsigned Depth) {
+static bool directlyImpliesPoison(const Value *ValAssumedPoison, const Value *V,
+ unsigned Depth) {
if (ValAssumedPoison == V)
return true;
@@ -6773,14 +6792,11 @@ bool llvm::impliesPoison(const Value *ValAssumedPoison, const Value *V) {
return ::impliesPoison(ValAssumedPoison, V, /* Depth */ 0);
}
-static bool programUndefinedIfUndefOrPoison(const Value *V,
- bool PoisonOnly);
+static bool programUndefinedIfUndefOrPoison(const Value *V, bool PoisonOnly);
-static bool isGuaranteedNotToBeUndefOrPoison(const Value *V,
- AssumptionCache *AC,
- const Instruction *CtxI,
- const DominatorTree *DT,
- unsigned Depth, bool PoisonOnly) {
+static bool isGuaranteedNotToBeUndefOrPoison(
+ const Value *V, AssumptionCache *AC, const Instruction *CtxI,
+ const DominatorTree *DT, unsigned Depth, UndefPoisonKind Kind) {
if (Depth >= MaxAnalysisRecursionDepth)
return false;
@@ -6795,16 +6811,19 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V,
}
if (auto *C = dyn_cast<Constant>(V)) {
+ if (isa<PoisonValue>(C))
+ return !includesPoison(Kind);
+
if (isa<UndefValue>(C))
- return PoisonOnly && !isa<PoisonValue>(C);
+ return !includesUndef(Kind);
if (isa<ConstantInt>(C) || isa<GlobalVariable>(C) || isa<ConstantFP>(V) ||
isa<ConstantPointerNull>(C) || isa<Function>(C))
return true;
if (C->getType()->isVectorTy() && !isa<ConstantExpr>(C))
- return (PoisonOnly ? !C->containsPoisonElement()
- : !C->containsUndefOrPoisonElement()) &&
+ return (!includesUndef(Kind) ? !C->containsPoisonElement()
+ : !C->containsUndefOrPoisonElement()) &&
!C->containsConstantExpression();
}
@@ -6822,8 +6841,7 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V,
return true;
auto OpCheck = [&](const Value *V) {
- return isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth + 1,
- PoisonOnly);
+ return isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth + 1, Kind);
};
if (auto *Opr = dyn_cast<Operator>(V)) {
@@ -6845,14 +6863,16 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V,
for (unsigned i = 0; i < Num; ++i) {
auto *TI = PN->getIncomingBlock(i)->getTerminator();
if (!isGuaranteedNotToBeUndefOrPoison(PN->getIncomingValue(i), AC, TI,
- DT, Depth + 1, PoisonOnly)) {
+ DT, Depth + 1, Kind)) {
IsWellDefined = false;
break;
}
}
if (IsWellDefined)
return true;
- } else if (!canCreateUndefOrPoison(Opr) && all_of(Opr->operands(), OpCheck))
+ } else if (!::canCreateUndefOrPoison(Opr, Kind,
+ /*ConsiderFlagsAndMetadata*/ true) &&
+ all_of(Opr->operands(), OpCheck))
return true;
}
@@ -6862,7 +6882,7 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V,
I->hasMetadata(LLVMContext::MD_dereferenceable_or_null))
return true;
- if (programUndefinedIfUndefOrPoison(V, PoisonOnly))
+ if (programUndefinedIfUndefOrPoison(V, !includesUndef(Kind)))
return true;
// CxtI may be null or a cloned instruction.
@@ -6894,7 +6914,7 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V,
if (Cond) {
if (Cond == V)
return true;
- else if (PoisonOnly && isa<Operator>(Cond)) {
+ else if (!includesUndef(Kind) && isa<Operator>(Cond)) {
// For poison, we can analyze further
auto *Opr = cast<Operator>(Cond);
if (any_of(Opr->operands(),
@@ -6916,20 +6936,22 @@ bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC,
const Instruction *CtxI,
const DominatorTree *DT,
unsigned Depth) {
- return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, false);
+ return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth,
+ UndefPoisonKind::UndefOrPoison);
}
bool llvm::isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC,
const Instruction *CtxI,
const DominatorTree *DT, unsigned Depth) {
- return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, true);
+ return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth,
+ UndefPoisonKind::PoisonOnly);
}
bool llvm::isGuaranteedNotToBeUndef(const Value *V, AssumptionCache *AC,
const Instruction *CtxI,
const DominatorTree *DT, unsigned Depth) {
- // TODO: This is currently equivalent to isGuaranteedNotToBeUndefOrPoison().
- return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, false);
+ return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth,
+ UndefPoisonKind::UndefOnly);
}
/// Return true if undefined behavior would provably be executed on the path to
diff --git a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
index 91d8c31fa062..f90fca9d937f 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
@@ -12,6 +12,7 @@
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
@@ -20,6 +21,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
@@ -1477,6 +1479,32 @@ void VFABI::getVectorVariantNames(
}
}
+FunctionType *VFABI::createFunctionType(const VFInfo &Info,
+ const FunctionType *ScalarFTy) {
+ // Create vector parameter types
+ SmallVector<Type *, 8> VecTypes;
+ ElementCount VF = Info.Shape.VF;
+ int ScalarParamIndex = 0;
+ for (auto VFParam : Info.Shape.Parameters) {
+ if (VFParam.ParamKind == VFParamKind::GlobalPredicate) {
+ VectorType *MaskTy =
+ VectorType::get(Type::getInt1Ty(ScalarFTy->getContext()), VF);
+ VecTypes.push_back(MaskTy);
+ continue;
+ }
+
+ Type *OperandTy = ScalarFTy->getParamType(ScalarParamIndex++);
+ if (VFParam.ParamKind == VFParamKind::Vector)
+ OperandTy = VectorType::get(OperandTy, VF);
+ VecTypes.push_back(OperandTy);
+ }
+
+ auto *RetTy = ScalarFTy->getReturnType();
+ if (!RetTy->isVoidTy())
+ RetTy = VectorType::get(RetTy, VF);
+ return FunctionType::get(RetTy, VecTypes, false);
+}
+
bool VFShape::hasValidParameterList() const {
for (unsigned Pos = 0, NumParams = Parameters.size(); Pos < NumParams;
++Pos) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index d6f487c18b03..30ea7eef3a12 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -412,7 +412,7 @@ static uint32_t constructAbbreviationTag(
const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> &EntryRet) {
uint32_t AbbrvTag = 0;
if (EntryRet)
- AbbrvTag |= 1 << EntryRet->Endoding.Index;
+ AbbrvTag |= 1 << EntryRet->Encoding.Index;
AbbrvTag |= 1 << dwarf::DW_IDX_die_offset;
AbbrvTag |= Tag << LowerBitSize;
return AbbrvTag;
@@ -429,7 +429,7 @@ void Dwarf5AccelTableWriter<DataT>::populateAbbrevsMap() {
if (Abbreviations.count(AbbrvTag) == 0) {
SmallVector<DWARF5AccelTableData::AttributeEncoding, 2> UA;
if (EntryRet)
- UA.push_back(EntryRet->Endoding);
+ UA.push_back(EntryRet->Encoding);
UA.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
Abbreviations.try_emplace(AbbrvTag, UA);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 61309c51336e..4dd27702786e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -583,6 +583,7 @@ bool AsmPrinter::doInitialization(Module &M) {
[[fallthrough]];
case ExceptionHandling::SjLj:
case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::ZOS:
ES = new DwarfCFIException(this);
break;
case ExceptionHandling::ARM:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index 87a0ba58b14c..9037f752dc4f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -1375,16 +1375,9 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) {
if (!MI.isDebugValue())
return false;
- const DILocalVariable *Var = MI.getDebugVariable();
- const DIExpression *Expr = MI.getDebugExpression();
- const DILocation *DebugLoc = MI.getDebugLoc();
- const DILocation *InlinedAt = DebugLoc->getInlinedAt();
- assert(Var->isValidLocationForIntrinsic(DebugLoc) &&
+ assert(MI.getDebugVariable()->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
"Expected inlined-at fields to agree");
- DebugVariable V(Var, Expr, InlinedAt);
- DbgValueProperties Properties(MI);
-
// If there are no instructions in this lexical scope, do no location tracking
// at all, this variable shouldn't get a legitimate location range.
auto *Scope = LS.findLexicalScope(MI.getDebugLoc().get());
@@ -1417,7 +1410,7 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) {
}
}
}
- VTracker->defVar(MI, Properties, DebugOps);
+ VTracker->defVar(MI, DbgValueProperties(MI), DebugOps);
}
// If performing final tracking of transfers, report this variable definition
@@ -2420,7 +2413,7 @@ bool InstrRefBasedLDV::mlocJoin(
// Pick out the first predecessors live-out value for this location. It's
// guaranteed to not be a backedge, as we order by RPO.
- ValueIDNum FirstVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()];
+ ValueIDNum FirstVal = OutLocs[*BlockOrders[0]][Idx.asU64()];
// If we've already eliminated a PHI here, do no further checking, just
// propagate the first live-in value into this block.
@@ -2437,8 +2430,7 @@ bool InstrRefBasedLDV::mlocJoin(
bool Disagree = false;
for (unsigned int I = 1; I < BlockOrders.size(); ++I) {
const MachineBasicBlock *PredMBB = BlockOrders[I];
- const ValueIDNum &PredLiveOut =
- OutLocs[PredMBB->getNumber()][Idx.asU64()];
+ const ValueIDNum &PredLiveOut = OutLocs[*PredMBB][Idx.asU64()];
// Incoming values agree, continue trying to eliminate this PHI.
if (FirstVal == PredLiveOut)
@@ -2563,7 +2555,7 @@ void InstrRefBasedLDV::placeMLocPHIs(
auto InstallPHIsAtLoc = [&PHIBlocks, &MInLocs](LocIdx L) {
for (const MachineBasicBlock *MBB : PHIBlocks)
- MInLocs[MBB->getNumber()][L.asU64()] = ValueIDNum(MBB->getNumber(), 0, L);
+ MInLocs[*MBB][L.asU64()] = ValueIDNum(MBB->getNumber(), 0, L);
};
// For locations with no reg units, just place PHIs.
@@ -2642,7 +2634,8 @@ void InstrRefBasedLDV::buildMLocValueMap(
// Initialize entry block to PHIs. These represent arguments.
for (auto Location : MTracker->locations())
- MInLocs[0][Location.Idx.asU64()] = ValueIDNum(0, 0, Location.Idx);
+ MInLocs.tableForEntryMBB()[Location.Idx.asU64()] =
+ ValueIDNum(0, 0, Location.Idx);
MTracker->reset();
@@ -2671,7 +2664,7 @@ void InstrRefBasedLDV::buildMLocValueMap(
// Join the values in all predecessor blocks.
bool InLocsChanged;
- InLocsChanged = mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]);
+ InLocsChanged = mlocJoin(*MBB, Visited, MOutLocs, MInLocs[*MBB]);
InLocsChanged |= Visited.insert(MBB).second;
// Don't examine transfer function if we've visited this loc at least
@@ -2680,7 +2673,7 @@ void InstrRefBasedLDV::buildMLocValueMap(
continue;
// Load the current set of live-ins into MLocTracker.
- MTracker->loadFromArray(MInLocs[CurBB], CurBB);
+ MTracker->loadFromArray(MInLocs[*MBB], CurBB);
// Each element of the transfer function can be a new def, or a read of
// a live-in value. Evaluate each element, and store to "ToRemap".
@@ -2707,8 +2700,8 @@ void InstrRefBasedLDV::buildMLocValueMap(
// the transfer function, and mlocJoin.
bool OLChanged = false;
for (auto Location : MTracker->locations()) {
- OLChanged |= MOutLocs[CurBB][Location.Idx.asU64()] != Location.Value;
- MOutLocs[CurBB][Location.Idx.asU64()] = Location.Value;
+ OLChanged |= MOutLocs[*MBB][Location.Idx.asU64()] != Location.Value;
+ MOutLocs[*MBB][Location.Idx.asU64()] = Location.Value;
}
MTracker->reset();
@@ -2851,7 +2844,6 @@ std::optional<ValueIDNum> InstrRefBasedLDV::pickOperandPHILoc(
unsigned NumLocs = MTracker->getNumLocs();
for (const auto p : BlockOrders) {
- unsigned ThisBBNum = p->getNumber();
auto OutValIt = LiveOuts.find(p);
assert(OutValIt != LiveOuts.end());
const DbgValue &OutVal = *OutValIt->second;
@@ -2870,7 +2862,7 @@ std::optional<ValueIDNum> InstrRefBasedLDV::pickOperandPHILoc(
ValueIDNum ValToLookFor = OutValOp.ID;
// Search the live-outs of the predecessor for the specified value.
for (unsigned int I = 0; I < NumLocs; ++I) {
- if (MOutLocs[ThisBBNum][I] == ValToLookFor)
+ if (MOutLocs[*p][I] == ValToLookFor)
Locs.back().push_back(LocIdx(I));
}
} else {
@@ -2883,7 +2875,7 @@ std::optional<ValueIDNum> InstrRefBasedLDV::pickOperandPHILoc(
// machine-value PHI locations.
for (unsigned int I = 0; I < NumLocs; ++I) {
ValueIDNum MPHI(MBB.getNumber(), 0, LocIdx(I));
- if (MOutLocs[ThisBBNum][I] == MPHI)
+ if (MOutLocs[*p][I] == MPHI)
Locs.back().push_back(LocIdx(I));
}
}
@@ -3505,19 +3497,15 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit(
// Helper lambda for ejecting a block -- if nothing is going to use the block,
// we can translate the variable location information into DBG_VALUEs and then
// free all of InstrRefBasedLDV's data structures.
- SmallPtrSet<const MachineBasicBlock *, 8> EjectedBBs;
auto EjectBlock = [&](MachineBasicBlock &MBB) -> void {
- if (EjectedBBs.insert(&MBB).second == false)
- return;
unsigned BBNum = MBB.getNumber();
AllTheVLocs[BBNum].clear();
// Prime the transfer-tracker, and then step through all the block
// instructions, installing transfers.
MTracker->reset();
- MTracker->loadFromArray(MInLocs[BBNum], BBNum);
- TTracker->loadInlocs(MBB, MInLocs[BBNum], DbgOpStore, Output[BBNum],
- NumLocs);
+ MTracker->loadFromArray(MInLocs[MBB], BBNum);
+ TTracker->loadInlocs(MBB, MInLocs[MBB], DbgOpStore, Output[BBNum], NumLocs);
CurBB = BBNum;
CurInst = 1;
@@ -3528,8 +3516,8 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit(
}
// Free machine-location tables for this block.
- MInLocs[BBNum] = ValueTable();
- MOutLocs[BBNum] = ValueTable();
+ MInLocs.ejectTableForBlock(MBB);
+ MOutLocs.ejectTableForBlock(MBB);
// We don't need live-in variable values for this block either.
Output[BBNum].clear();
AllTheVLocs[BBNum].clear();
@@ -3594,7 +3582,8 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit(
// anything for such out-of-scope blocks, but for the sake of being similar
// to VarLocBasedLDV, eject these too.
for (auto *MBB : ArtificialBlocks)
- EjectBlock(*MBB);
+ if (MInLocs.hasTableFor(*MBB))
+ EjectBlock(*MBB);
return emitTransfers(AllVarsNumbering);
}
@@ -3693,8 +3682,8 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
// machine values. The outer dimension is the block number; while the inner
// dimension is a LocIdx from MLocTracker.
unsigned NumLocs = MTracker->getNumLocs();
- FuncValueTable MOutLocs(MaxNumBlocks, ValueTable(NumLocs));
- FuncValueTable MInLocs(MaxNumBlocks, ValueTable(NumLocs));
+ FuncValueTable MOutLocs(MaxNumBlocks, NumLocs);
+ FuncValueTable MInLocs(MaxNumBlocks, NumLocs);
// Solve the machine value dataflow problem using the MLocTransfer function,
// storing the computed live-ins / live-outs into the array-of-arrays. We use
@@ -3732,7 +3721,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
CurBB = MBB.getNumber();
VTracker = &vlocs[CurBB];
VTracker->MBB = &MBB;
- MTracker->loadFromArray(MInLocs[CurBB], CurBB);
+ MTracker->loadFromArray(MInLocs[MBB], CurBB);
CurInst = 1;
for (auto &MI : MBB) {
process(MI, &MOutLocs, &MInLocs);
@@ -3946,7 +3935,7 @@ public:
/// Find the live-in value number for the given block. Looks up the value at
/// the PHI location on entry.
BlockValueNum getValue(LDVSSABlock *LDVBB) {
- return MLiveIns[LDVBB->BB.getNumber()][Loc.asU64()].asU64();
+ return MLiveIns[LDVBB->BB][Loc.asU64()].asU64();
}
};
@@ -4186,8 +4175,7 @@ std::optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
});
for (auto &PHI : SortedPHIs) {
- ValueIDNum ThisBlockValueNum =
- MLiveIns[PHI->ParentBlock->BB.getNumber()][Loc.asU64()];
+ ValueIDNum ThisBlockValueNum = MLiveIns[PHI->ParentBlock->BB][Loc.asU64()];
// Are all these things actually defined?
for (auto &PHIIt : PHI->IncomingValues) {
@@ -4196,7 +4184,7 @@ std::optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
return std::nullopt;
ValueIDNum ValueToCheck;
- const ValueTable &BlockLiveOuts = MLiveOuts[PHIIt.first->BB.getNumber()];
+ const ValueTable &BlockLiveOuts = MLiveOuts[PHIIt.first->BB];
auto VVal = ValidatedValues.find(PHIIt.first);
if (VVal == ValidatedValues.end()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
index d6dbb1feda3e..ccc284b62331 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
@@ -207,9 +207,48 @@ using namespace llvm;
/// Type for a table of values in a block.
using ValueTable = SmallVector<ValueIDNum, 0>;
-/// Type for a table-of-table-of-values, i.e., the collection of either
-/// live-in or live-out values for each block in the function.
-using FuncValueTable = SmallVector<ValueTable, 0>;
+/// A collection of ValueTables, one per BB in a function, with convenient
+/// accessor methods.
+struct FuncValueTable {
+ FuncValueTable(int NumBBs, int NumLocs) {
+ Storage.reserve(NumBBs);
+ for (int i = 0; i != NumBBs; ++i)
+ Storage.push_back(
+ std::make_unique<ValueTable>(NumLocs, ValueIDNum::EmptyValue));
+ }
+
+ /// Returns the ValueTable associated with MBB.
+ ValueTable &operator[](const MachineBasicBlock &MBB) const {
+ return (*this)[MBB.getNumber()];
+ }
+
+ /// Returns the ValueTable associated with the MachineBasicBlock whose number
+ /// is MBBNum.
+ ValueTable &operator[](int MBBNum) const {
+ auto &TablePtr = Storage[MBBNum];
+ assert(TablePtr && "Trying to access a deleted table");
+ return *TablePtr;
+ }
+
+ /// Returns the ValueTable associated with the entry MachineBasicBlock.
+ ValueTable &tableForEntryMBB() const { return (*this)[0]; }
+
+ /// Returns true if the ValueTable associated with MBB has not been freed.
+ bool hasTableFor(MachineBasicBlock &MBB) const {
+ return Storage[MBB.getNumber()] != nullptr;
+ }
+
+ /// Frees the memory of the ValueTable associated with MBB.
+ void ejectTableForBlock(const MachineBasicBlock &MBB) {
+ Storage[MBB.getNumber()].reset();
+ }
+
+private:
+ /// ValueTables are stored as unique_ptrs to allow for deallocation during
+ /// LDV; this was measured to have a significant impact on compiler memory
+ /// usage.
+ SmallVector<std::unique_ptr<ValueTable>, 0> Storage;
+};
/// Thin wrapper around an integer -- designed to give more type safety to
/// spill location numbers.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 0203034b5a01..643370f0573d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -426,8 +426,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
// Erase any virtregs that are now empty and unused. There may be <undef>
// uses around. Keep the empty live range in that case.
- for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) {
- Register Reg = RegsToErase[i];
+ for (Register Reg : RegsToErase) {
if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) {
ToShrink.remove(&LIS.getInterval(Reg));
eraseVirtReg(Reg);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
index f3b5069d351b..af0b0a20c856 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
@@ -13,7 +13,11 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/LowerEmuTLS.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/Analysis/StackSafetyAnalysis.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
@@ -24,7 +28,7 @@
using namespace llvm;
-#define DEBUG_TYPE "loweremutls"
+#define DEBUG_TYPE "lower-emutls"
namespace {
@@ -36,22 +40,41 @@ public:
}
bool runOnModule(Module &M) override;
-private:
- bool addEmuTlsVar(Module &M, const GlobalVariable *GV);
- static void copyLinkageVisibility(Module &M,
- const GlobalVariable *from,
- GlobalVariable *to) {
- to->setLinkage(from->getLinkage());
- to->setVisibility(from->getVisibility());
- to->setDSOLocal(from->isDSOLocal());
- if (from->hasComdat()) {
- to->setComdat(M.getOrInsertComdat(to->getName()));
- to->getComdat()->setSelectionKind(from->getComdat()->getSelectionKind());
- }
- }
};
}
+static bool addEmuTlsVar(Module &M, const GlobalVariable *GV);
+
+static void copyLinkageVisibility(Module &M, const GlobalVariable *from,
+ GlobalVariable *to) {
+ to->setLinkage(from->getLinkage());
+ to->setVisibility(from->getVisibility());
+ to->setDSOLocal(from->isDSOLocal());
+ if (from->hasComdat()) {
+ to->setComdat(M.getOrInsertComdat(to->getName()));
+ to->getComdat()->setSelectionKind(from->getComdat()->getSelectionKind());
+ }
+}
+
+PreservedAnalyses LowerEmuTLSPass::run(Module &M, ModuleAnalysisManager &MAM) {
+ bool Changed = false;
+ SmallVector<const GlobalVariable *, 8> TlsVars;
+ for (const auto &G : M.globals()) {
+ if (G.isThreadLocal())
+ TlsVars.push_back(&G);
+ }
+ for (const auto *G : TlsVars)
+ Changed |= addEmuTlsVar(M, G);
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA = PreservedAnalyses::all();
+ PA.abandon<GlobalsAA>();
+ PA.abandon<ModuleSummaryIndexAnalysis>();
+ PA.abandon<StackSafetyGlobalAnalysis>();
+ return PA;
+}
+
char LowerEmuTLS::ID = 0;
INITIALIZE_PASS(LowerEmuTLS, DEBUG_TYPE,
@@ -83,7 +106,7 @@ bool LowerEmuTLS::runOnModule(Module &M) {
return Changed;
}
-bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) {
+bool addEmuTlsVar(Module &M, const GlobalVariable *GV) {
LLVMContext &C = M.getContext();
PointerType *VoidPtrType = PointerType::getUnqual(C);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
index b9db34f7be95..6eeed8b5c3f7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -208,8 +208,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
}
SmallSet<Register, 32> Added;
- for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
- Register Reg = LocalDefs[i];
+ for (Register Reg : LocalDefs) {
if (Added.insert(Reg).second) {
// If it's not live beyond end of the bundle, mark it dead.
bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg);
@@ -218,8 +217,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
}
}
- for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) {
- Register Reg = ExternUses[i];
+ for (Register Reg : ExternUses) {
bool isKill = KilledUseSet.count(Reg);
bool isUndef = UndefUseSet.count(Reg);
MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) |
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp
index aff4d95781f4..5bd6ca0978a4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp
@@ -212,15 +212,9 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU)
}
std::unique_ptr<ScheduleDAGMutation>
-llvm::createMacroFusionDAGMutation(ArrayRef<MacroFusionPredTy> Predicates) {
+llvm::createMacroFusionDAGMutation(ArrayRef<MacroFusionPredTy> Predicates,
+ bool BranchOnly) {
if (EnableMacroFusion)
- return std::make_unique<MacroFusion>(Predicates, true);
- return nullptr;
-}
-
-std::unique_ptr<ScheduleDAGMutation> llvm::createBranchMacroFusionDAGMutation(
- ArrayRef<MacroFusionPredTy> Predicates) {
- if (EnableMacroFusion)
- return std::make_unique<MacroFusion>(Predicates, false);
+ return std::make_unique<MacroFusion>(Predicates, !BranchOnly);
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
index 40c42cabf776..e81d47930136 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -62,6 +62,118 @@ static RegisterRegAlloc fastRegAlloc("fast", "fast register allocator",
namespace {
+/// Assign ascending index for instructions in machine basic block. The index
+/// can be used to determine dominance between instructions in same MBB.
+class InstrPosIndexes {
+public:
+ void unsetInitialized() { IsInitialized = false; }
+
+ void init(const MachineBasicBlock &MBB) {
+ CurMBB = &MBB;
+ Instr2PosIndex.clear();
+ uint64_t LastIndex = 0;
+ for (const MachineInstr &MI : MBB) {
+ LastIndex += InstrDist;
+ Instr2PosIndex[&MI] = LastIndex;
+ }
+ }
+
+ /// Set \p Index to index of \p MI. If \p MI is new inserted, it try to assign
+ /// index without affecting existing instruction's index. Return true if all
+ /// instructions index has been reassigned.
+ bool getIndex(const MachineInstr &MI, uint64_t &Index) {
+ if (!IsInitialized) {
+ init(*MI.getParent());
+ IsInitialized = true;
+ Index = Instr2PosIndex.at(&MI);
+ return true;
+ }
+
+ assert(MI.getParent() == CurMBB && "MI is not in CurMBB");
+ auto It = Instr2PosIndex.find(&MI);
+ if (It != Instr2PosIndex.end()) {
+ Index = It->second;
+ return false;
+ }
+
+ // Distance is the number of consecutive unassigned instructions including
+ // MI. Start is the first instruction of them. End is the next of last
+ // instruction of them.
+ // e.g.
+ // |Instruction| A | B | C | MI | D | E |
+ // | Index | 1024 | | | | | 2048 |
+ //
+ // In this case, B, C, MI, D are unassigned. Distance is 4, Start is B, End
+ // is E.
+ unsigned Distance = 1;
+ MachineBasicBlock::const_iterator Start = MI.getIterator(),
+ End = std::next(Start);
+ while (Start != CurMBB->begin() &&
+ !Instr2PosIndex.count(&*std::prev(Start))) {
+ --Start;
+ ++Distance;
+ }
+ while (End != CurMBB->end() && !Instr2PosIndex.count(&*(End))) {
+ ++End;
+ ++Distance;
+ }
+
+ // LastIndex is initialized to last used index prior to MI or zero.
+ // In previous example, LastIndex is 1024, EndIndex is 2048;
+ uint64_t LastIndex =
+ Start == CurMBB->begin() ? 0 : Instr2PosIndex.at(&*std::prev(Start));
+ uint64_t Step;
+ if (End == CurMBB->end())
+ Step = static_cast<uint64_t>(InstrDist);
+ else {
+ // No instruction uses index zero.
+ uint64_t EndIndex = Instr2PosIndex.at(&*End);
+ assert(EndIndex > LastIndex && "Index must be ascending order");
+ unsigned NumAvailableIndexes = EndIndex - LastIndex - 1;
+ // We want index gap between two adjacent MI is as same as possible. Given
+ // total A available indexes, D is number of consecutive unassigned
+ // instructions, S is the step.
+ // |<- S-1 -> MI <- S-1 -> MI <- A-S*D ->|
+ // There're S-1 available indexes between unassigned instruction and its
+ // predecessor. There're A-S*D available indexes between the last
+ // unassigned instruction and its successor.
+ // Ideally, we want
+ // S-1 = A-S*D
+ // then
+ // S = (A+1)/(D+1)
+ // An valid S must be integer greater than zero, so
+ // S <= (A+1)/(D+1)
+ // =>
+ // A-S*D >= 0
+ // That means we can safely use (A+1)/(D+1) as step.
+ // In previous example, Step is 204, Index of B, C, MI, D is 1228, 1432,
+ // 1636, 1840.
+ Step = (NumAvailableIndexes + 1) / (Distance + 1);
+ }
+
+ // Reassign index for all instructions if number of new inserted
+ // instructions exceed slot or all instructions are new.
+ if (LLVM_UNLIKELY(!Step || (!LastIndex && Step == InstrDist))) {
+ init(*CurMBB);
+ Index = Instr2PosIndex.at(&MI);
+ return true;
+ }
+
+ for (auto I = Start; I != End; ++I) {
+ LastIndex += Step;
+ Instr2PosIndex[&*I] = LastIndex;
+ }
+ Index = Instr2PosIndex.at(&MI);
+ return false;
+ }
+
+private:
+ bool IsInitialized = false;
+ enum { InstrDist = 1024 };
+ const MachineBasicBlock *CurMBB = nullptr;
+ DenseMap<const MachineInstr *, uint64_t> Instr2PosIndex;
+};
+
class RegAllocFast : public MachineFunctionPass {
public:
static char ID;
@@ -153,6 +265,9 @@ private:
// Register masks attached to the current instruction.
SmallVector<const uint32_t *> RegMasks;
+ // Assign index for each instruction to quickly determine dominance.
+ InstrPosIndexes PosIndexes;
+
void setPhysRegState(MCPhysReg PhysReg, unsigned NewState);
bool isPhysRegFree(MCPhysReg PhysReg) const;
@@ -339,18 +454,13 @@ int RegAllocFast::getStackSpaceFor(Register VirtReg) {
return FrameIdx;
}
-static bool dominates(MachineBasicBlock &MBB,
- MachineBasicBlock::const_iterator A,
- MachineBasicBlock::const_iterator B) {
- auto MBBEnd = MBB.end();
- if (B == MBBEnd)
- return true;
-
- MachineBasicBlock::const_iterator I = MBB.begin();
- for (; &*I != A && &*I != B; ++I)
- ;
-
- return &*I == A;
+static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A,
+ const MachineInstr &B) {
+ uint64_t IndexA, IndexB;
+ PosIndexes.getIndex(A, IndexA);
+ if (LLVM_UNLIKELY(PosIndexes.getIndex(B, IndexB)))
+ PosIndexes.getIndex(A, IndexA);
+ return IndexA < IndexB;
}
/// Returns false if \p VirtReg is known to not live out of the current block.
@@ -371,7 +481,7 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) {
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
return true;
} else {
- if (!SelfLoopDef || dominates(*MBB, DefInst.getIterator(), SelfLoopDef))
+ if (!SelfLoopDef || dominates(PosIndexes, DefInst, *SelfLoopDef))
SelfLoopDef = &DefInst;
}
}
@@ -396,7 +506,7 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) {
// Try to handle some simple cases to avoid spilling and reloading every
// value inside a self looping block.
if (SelfLoopDef == &UseInst ||
- !dominates(*MBB, SelfLoopDef->getIterator(), UseInst.getIterator())) {
+ !dominates(PosIndexes, *SelfLoopDef, UseInst)) {
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
return true;
}
@@ -1565,6 +1675,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
this->MBB = &MBB;
LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
+ PosIndexes.unsetInitialized();
RegUnitStates.assign(TRI->getNumRegUnits(), regFree);
assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
index fba8c35ecec2..17a9f55cccc0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -165,8 +165,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
assert(RCI.NumRegs <= NumRegs && "Allocation order larger than regclass");
// CSR aliases go after the volatile registers, preserve the target's order.
- for (unsigned i = 0, e = CSRAlias.size(); i != e; ++i) {
- unsigned PhysReg = CSRAlias[i];
+ for (unsigned PhysReg : CSRAlias) {
uint8_t Cost = RegCosts[PhysReg];
if (Cost != LastCost)
LastCostChange = N;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
index c1af37c8510f..3fbb93795075 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -305,7 +305,11 @@ namespace {
/// number if it is not zero. If DstReg is a physical register and the
/// existing subregister number of the def / use being updated is not zero,
/// make sure to set it to the correct physical subregister.
- void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
+ ///
+ /// If \p IsSubregToReg, we are coalescing a DstReg = SUBREG_TO_REG
+ /// SrcReg. This introduces an implicit-def of DstReg on coalesced users.
+ void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx,
+ bool IsSubregToReg);
/// If the given machine operand reads only undefined lanes add an undef
/// flag.
@@ -1343,8 +1347,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
if (DstReg.isPhysical()) {
Register NewDstReg = DstReg;
- unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
- DefMI->getOperand(0).getSubReg());
+ unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), DefSubIdx);
if (NewDstIdx)
NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
@@ -1493,7 +1496,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
MRI->setRegClass(DstReg, NewRC);
// Update machine operands and add flags.
- updateRegDefsUses(DstReg, DstReg, DstIdx);
+ updateRegDefsUses(DstReg, DstReg, DstIdx, false);
NewMI.getOperand(0).setSubReg(NewIdx);
// updateRegDefUses can add an "undef" flag to the definition, since
// it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
@@ -1618,8 +1621,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
NewMI.addOperand(MO);
SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
- for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
- MCRegister Reg = NewMIImplDefs[i];
+ for (MCRegister Reg : NewMIImplDefs) {
for (MCRegUnit Unit : TRI->regunits(Reg))
if (LiveRange *LR = LIS->getCachedRegUnit(Unit))
LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
@@ -1814,7 +1816,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
}
void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
- unsigned SubIdx) {
+ unsigned SubIdx, bool IsSubregToReg) {
bool DstIsPhys = DstReg.isPhysical();
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
@@ -1854,6 +1856,8 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
+ bool FullDef = true;
+
// Replace SrcReg with DstReg in all UseMI operands.
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
MachineOperand &MO = UseMI->getOperand(Ops[i]);
@@ -1861,9 +1865,13 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
// Adjust <undef> flags in case of sub-register joins. We don't want to
// turn a full def into a read-modify-write sub-register def and vice
// versa.
- if (SubIdx && MO.isDef())
+ if (SubIdx && MO.isDef()) {
MO.setIsUndef(!Reads);
+ if (!Reads)
+ FullDef = false;
+ }
+
// A subreg use of a partially undef (super) register may be a complete
// undef use now and then has to be marked that way.
if (MO.isUse() && !DstIsPhys) {
@@ -1895,6 +1903,25 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
MO.substVirtReg(DstReg, SubIdx, *TRI);
}
+ if (IsSubregToReg && !FullDef) {
+ // If the coalesed instruction doesn't fully define the register, we need
+ // to preserve the original super register liveness for SUBREG_TO_REG.
+ //
+ // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
+ // but it introduces liveness for other subregisters. Downstream users may
+ // have been relying on those bits, so we need to ensure their liveness is
+ // captured with a def of other lanes.
+
+ // FIXME: Need to add new subrange if tracking subranges. We could also
+ // skip adding this if we knew the other lanes are dead, and only for
+ // other lanes.
+
+ assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
+ "this should update subranges");
+ MachineInstrBuilder MIB(*MF, UseMI);
+ MIB.addReg(DstReg, RegState::ImplicitDefine);
+ }
+
LLVM_DEBUG({
dbgs() << "\t\tupdated: ";
if (!UseMI->isDebugInstr())
@@ -2094,6 +2121,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
});
}
+ const bool IsSubregToReg = CopyMI->isSubregToReg();
+
ShrinkMask = LaneBitmask::getNone();
ShrinkMainRange = false;
@@ -2161,9 +2190,12 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
// Rewrite all SrcReg operands to DstReg.
// Also update DstReg operands to include DstIdx if it is set.
- if (CP.getDstIdx())
- updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
- updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
+ if (CP.getDstIdx()) {
+ assert(!IsSubregToReg && "can this happen?");
+ updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx(), false);
+ }
+ updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
+ IsSubregToReg);
// Shrink subregister ranges if necessary.
if (ShrinkMask.any()) {
@@ -4236,8 +4268,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
InflateRegs.end());
LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size()
<< " regs.\n");
- for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
- Register Reg = InflateRegs[i];
+ for (Register Reg : InflateRegs) {
if (MRI->reg_nodbg_empty(Reg))
continue;
if (MRI->recomputeRegClass(Reg)) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 36c91b7fa97e..893aa4a91828 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -15,14 +15,17 @@
#include "llvm/CodeGen/ReplaceWithVeclib.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/Support/TypeSize.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
@@ -38,138 +41,137 @@ STATISTIC(NumTLIFuncDeclAdded,
STATISTIC(NumFuncUsedAdded,
"Number of functions added to `llvm.compiler.used`");
-static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) {
- Module *M = CI.getModule();
-
- Function *OldFunc = CI.getCalledFunction();
-
- // Check if the vector library function is already declared in this module,
- // otherwise insert it.
+/// Returns a vector Function that it adds to the Module \p M. When an \p
+/// ScalarFunc is not null, it copies its attributes to the newly created
+/// Function.
+Function *getTLIFunction(Module *M, FunctionType *VectorFTy,
+ const StringRef TLIName,
+ Function *ScalarFunc = nullptr) {
Function *TLIFunc = M->getFunction(TLIName);
if (!TLIFunc) {
- TLIFunc = Function::Create(OldFunc->getFunctionType(),
- Function::ExternalLinkage, TLIName, *M);
- TLIFunc->copyAttributesFrom(OldFunc);
+ TLIFunc =
+ Function::Create(VectorFTy, Function::ExternalLinkage, TLIName, *M);
+ if (ScalarFunc)
+ TLIFunc->copyAttributesFrom(ScalarFunc);
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `"
<< TLIName << "` of type `" << *(TLIFunc->getType())
<< "` to module.\n");
++NumTLIFuncDeclAdded;
-
- // Add the freshly created function to llvm.compiler.used,
- // similar to as it is done in InjectTLIMappings
+ // Add the freshly created function to llvm.compiler.used, similar to as it
+ // is done in InjectTLIMappings.
appendToCompilerUsed(*M, {TLIFunc});
-
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName
<< "` to `@llvm.compiler.used`.\n");
++NumFuncUsedAdded;
}
+ return TLIFunc;
+}
- // Replace the call to the vector intrinsic with a call
- // to the corresponding function from the vector library.
- IRBuilder<> IRBuilder(&CI);
- SmallVector<Value *> Args(CI.args());
- // Preserve the operand bundles.
- SmallVector<OperandBundleDef, 1> OpBundles;
- CI.getOperandBundlesAsDefs(OpBundles);
- CallInst *Replacement = IRBuilder.CreateCall(TLIFunc, Args, OpBundles);
- assert(OldFunc->getFunctionType() == TLIFunc->getFunctionType() &&
- "Expecting function types to be identical");
- CI.replaceAllUsesWith(Replacement);
- if (isa<FPMathOperator>(Replacement)) {
- // Preserve fast math flags for FP math.
- Replacement->copyFastMathFlags(&CI);
+/// Replace the call to the vector intrinsic ( \p CalltoReplace ) with a call to
+/// the corresponding function from the vector library ( \p TLIVecFunc ).
+static void replaceWithTLIFunction(CallInst &CalltoReplace, VFInfo &Info,
+ Function *TLIVecFunc) {
+ IRBuilder<> IRBuilder(&CalltoReplace);
+ SmallVector<Value *> Args(CalltoReplace.args());
+ if (auto OptMaskpos = Info.getParamIndexForOptionalMask()) {
+ auto *MaskTy = VectorType::get(Type::getInt1Ty(CalltoReplace.getContext()),
+ Info.Shape.VF);
+ Args.insert(Args.begin() + OptMaskpos.value(),
+ Constant::getAllOnesValue(MaskTy));
}
- LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `"
- << OldFunc->getName() << "` with call to `" << TLIName
- << "`.\n");
- ++NumCallsReplaced;
- return true;
+ // Preserve the operand bundles.
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CalltoReplace.getOperandBundlesAsDefs(OpBundles);
+ CallInst *Replacement = IRBuilder.CreateCall(TLIVecFunc, Args, OpBundles);
+ CalltoReplace.replaceAllUsesWith(Replacement);
+ // Preserve fast math flags for FP math.
+ if (isa<FPMathOperator>(Replacement))
+ Replacement->copyFastMathFlags(&CalltoReplace);
}
+/// Returns true when successfully replaced \p CallToReplace with a suitable
+/// function taking vector arguments, based on available mappings in the \p TLI.
+/// Currently only works when \p CallToReplace is a call to vectorized
+/// intrinsic.
static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
- CallInst &CI) {
- if (!CI.getCalledFunction()) {
+ CallInst &CallToReplace) {
+ if (!CallToReplace.getCalledFunction())
return false;
- }
- auto IntrinsicID = CI.getCalledFunction()->getIntrinsicID();
- if (IntrinsicID == Intrinsic::not_intrinsic) {
- // Replacement is only performed for intrinsic functions
+ auto IntrinsicID = CallToReplace.getCalledFunction()->getIntrinsicID();
+ // Replacement is only performed for intrinsic functions.
+ if (IntrinsicID == Intrinsic::not_intrinsic)
return false;
- }
- // Convert vector arguments to scalar type and check that
- // all vector operands have identical vector width.
+ // Compute arguments types of the corresponding scalar call. Additionally
+ // checks if in the vector call, all vector operands have the same EC.
ElementCount VF = ElementCount::getFixed(0);
- SmallVector<Type *> ScalarTypes;
- for (auto Arg : enumerate(CI.args())) {
- auto *ArgType = Arg.value()->getType();
- // Vector calls to intrinsics can still have
- // scalar operands for specific arguments.
+ SmallVector<Type *> ScalarArgTypes;
+ for (auto Arg : enumerate(CallToReplace.args())) {
+ auto *ArgTy = Arg.value()->getType();
if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, Arg.index())) {
- ScalarTypes.push_back(ArgType);
- } else {
- // The argument in this place should be a vector if
- // this is a call to a vector intrinsic.
- auto *VectorArgTy = dyn_cast<VectorType>(ArgType);
- if (!VectorArgTy) {
- // The argument is not a vector, do not perform
- // the replacement.
- return false;
- }
- ElementCount NumElements = VectorArgTy->getElementCount();
- if (NumElements.isScalable()) {
- // The current implementation does not support
- // scalable vectors.
+ ScalarArgTypes.push_back(ArgTy);
+ } else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) {
+ ScalarArgTypes.push_back(ArgTy->getScalarType());
+ // Disallow vector arguments with different VFs. When processing the first
+ // vector argument, store it's VF, and for the rest ensure that they match
+ // it.
+ if (VF.isZero())
+ VF = VectorArgTy->getElementCount();
+ else if (VF != VectorArgTy->getElementCount())
return false;
- }
- if (VF.isNonZero() && VF != NumElements) {
- // The different arguments differ in vector size.
- return false;
- } else {
- VF = NumElements;
- }
- ScalarTypes.push_back(VectorArgTy->getElementType());
- }
+ } else
+ // Exit when it is supposed to be a vector argument but it isn't.
+ return false;
}
- // Try to reconstruct the name for the scalar version of this
- // intrinsic using the intrinsic ID and the argument types
- // converted to scalar above.
- std::string ScalarName;
- if (Intrinsic::isOverloaded(IntrinsicID)) {
- ScalarName = Intrinsic::getName(IntrinsicID, ScalarTypes, CI.getModule());
- } else {
- ScalarName = Intrinsic::getName(IntrinsicID).str();
- }
+ // Try to reconstruct the name for the scalar version of this intrinsic using
+ // the intrinsic ID and the argument types converted to scalar above.
+ std::string ScalarName =
+ (Intrinsic::isOverloaded(IntrinsicID)
+ ? Intrinsic::getName(IntrinsicID, ScalarArgTypes,
+ CallToReplace.getModule())
+ : Intrinsic::getName(IntrinsicID).str());
+
+ // Try to find the mapping for the scalar version of this intrinsic and the
+ // exact vector width of the call operands in the TargetLibraryInfo. First,
+ // check with a non-masked variant, and if that fails try with a masked one.
+ const VecDesc *VD =
+ TLI.getVectorMappingInfo(ScalarName, VF, /*Masked*/ false);
+ if (!VD && !(VD = TLI.getVectorMappingInfo(ScalarName, VF, /*Masked*/ true)))
+ return false;
- if (!TLI.isFunctionVectorizable(ScalarName)) {
- // The TargetLibraryInfo does not contain a vectorized version of
- // the scalar function.
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI mapping from: `" << ScalarName
+ << "` and vector width " << VF << " to: `"
+ << VD->getVectorFnName() << "`.\n");
+
+ // Replace the call to the intrinsic with a call to the vector library
+ // function.
+ Type *ScalarRetTy = CallToReplace.getType()->getScalarType();
+ FunctionType *ScalarFTy =
+ FunctionType::get(ScalarRetTy, ScalarArgTypes, /*isVarArg*/ false);
+ const std::string MangledName = VD->getVectorFunctionABIVariantString();
+ auto OptInfo = VFABI::tryDemangleForVFABI(MangledName, ScalarFTy);
+ if (!OptInfo)
return false;
- }
- // Try to find the mapping for the scalar version of this intrinsic
- // and the exact vector width of the call operands in the
- // TargetLibraryInfo.
- StringRef TLIName = TLI.getVectorizedFunction(ScalarName, VF);
-
- LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
- << ScalarName << "` and vector width " << VF << ".\n");
-
- if (!TLIName.empty()) {
- // Found the correct mapping in the TargetLibraryInfo,
- // replace the call to the intrinsic with a call to
- // the vector library function.
- LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI function `" << TLIName
- << "`.\n");
- return replaceWithTLIFunction(CI, TLIName);
- }
+ FunctionType *VectorFTy = VFABI::createFunctionType(*OptInfo, ScalarFTy);
+ if (!VectorFTy)
+ return false;
+
+ Function *FuncToReplace = CallToReplace.getCalledFunction();
+ Function *TLIFunc = getTLIFunction(CallToReplace.getModule(), VectorFTy,
+ VD->getVectorFnName(), FuncToReplace);
+ replaceWithTLIFunction(CallToReplace, *OptInfo, TLIFunc);
- return false;
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `"
+ << FuncToReplace->getName() << "` with call to `"
+ << TLIFunc->getName() << "`.\n");
+ ++NumCallsReplaced;
+ return true;
}
static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
@@ -185,9 +187,8 @@ static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
}
// Erase the calls to the intrinsics that have been replaced
// with calls to the vector library.
- for (auto *CI : ReplacedCalls) {
+ for (auto *CI : ReplacedCalls)
CI->eraseFromParent();
- }
return Changed;
}
@@ -207,10 +208,10 @@ PreservedAnalyses ReplaceWithVeclib::run(Function &F,
PA.preserve<DemandedBitsAnalysis>();
PA.preserve<OptimizationRemarkEmitterAnalysis>();
return PA;
- } else {
- // The pass did not replace any calls, hence it preserves all analyses.
- return PreservedAnalyses::all();
}
+
+ // The pass did not replace any calls, hence it preserves all analyses.
+ return PreservedAnalyses::all();
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c782ad117ce6..0d46c7868d87 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13703,8 +13703,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
- (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
- N0.getValueType()) ||
+ (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
!TLI.isZExtFree(N0.getValueType(), VT))) {
SDValue X = N0.getOperand(0).getOperand(0);
X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
@@ -13935,8 +13934,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
- !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
- N0.getValueType())) {
+ !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
SDLoc DL(N);
SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
@@ -14759,6 +14757,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// Attempt to pre-truncate BUILD_VECTOR sources.
if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
+ N0.hasOneUse() &&
TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
// Avoid creating illegal types if running after type legalizer.
(!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
@@ -14818,11 +14817,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (SDValue Reduced = reduceLoadWidth(N))
return Reduced;
- // Handle the case where the load remains an extending load even
- // after truncation.
+ // Handle the case where the truncated result is at least as wide as the
+ // loaded type.
if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
auto *LN0 = cast<LoadSDNode>(N0);
- if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
+ if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
SDValue NewLoad = DAG.getExtLoad(
LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
@@ -15165,8 +15164,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
*LN0->getMemOperand())) {
SDValue Load =
DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
- LN0->getPointerInfo(), LN0->getAlign(),
- LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
+ LN0->getMemOperand());
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
return Load;
}
@@ -18855,8 +18853,7 @@ struct LoadedSlice {
void addSliceGain(const LoadedSlice &LS) {
// Each slice saves a truncate.
const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
- if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
- LS.Inst->getValueType(0)))
+ if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
++Truncates;
// If there is a shift amount, this slice gets rid of it.
if (LS.Shift)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index a83129586339..f3d8edb8926b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1000,8 +1000,7 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
if (!CanLowerReturn)
return false;
- for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
- EVT VT = RetTys[I];
+ for (EVT VT : RetTys) {
MVT RegisterVT = TLI.getRegisterType(CLI.RetTy->getContext(), VT);
unsigned NumRegs = TLI.getNumRegisters(CLI.RetTy->getContext(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 03cba892a167..5926a6058111 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -377,8 +377,7 @@ Register FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) {
ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs);
Register FirstReg;
- for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
- EVT ValueVT = ValueVTs[Value];
+ for (EVT ValueVT : ValueVTs) {
MVT RegisterVT = TLI->getRegisterType(Ty->getContext(), ValueVT);
unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 5e1f9fbcdde0..4e317062cec4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -913,14 +913,17 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// normal undefined upper bits behavior to allow using an in-reg extend
// with the illegal FP type, so load as an integer and do the
// from-integer conversion.
- if (SrcVT.getScalarType() == MVT::f16) {
+ EVT SVT = SrcVT.getScalarType();
+ if (SVT == MVT::f16 || SVT == MVT::bf16) {
EVT ISrcVT = SrcVT.changeTypeToInteger();
EVT IDestVT = DestVT.changeTypeToInteger();
EVT ILoadVT = TLI.getRegisterType(IDestVT.getSimpleVT());
SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, ILoadVT, Chain,
Ptr, ISrcVT, LD->getMemOperand());
- Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result);
+ Value =
+ DAG.getNode(SVT == MVT::f16 ? ISD::FP16_TO_FP : ISD::BF16_TO_FP,
+ dl, DestVT, Result);
Chain = Result.getValue(1);
break;
}
@@ -4905,7 +4908,9 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
static MVT getPromotedVectorElementType(const TargetLowering &TLI,
MVT EltVT, MVT NewEltVT) {
unsigned OldEltsPerNewElt = EltVT.getSizeInBits() / NewEltVT.getSizeInBits();
- MVT MidVT = MVT::getVectorVT(NewEltVT, OldEltsPerNewElt);
+ MVT MidVT = OldEltsPerNewElt == 1
+ ? NewEltVT
+ : MVT::getVectorVT(NewEltVT, OldEltsPerNewElt);
assert(TLI.isTypeLegal(MidVT) && "unexpected");
return MidVT;
}
@@ -5349,6 +5354,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FEXP:
case ISD::FEXP2:
case ISD::FEXP10:
+ case ISD::FCANONICALIZE:
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
Results.push_back(
@@ -5391,7 +5397,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
"Invalid promote type for build_vector");
- assert(NewEltVT.bitsLT(EltVT) && "not handled");
+ assert(NewEltVT.bitsLE(EltVT) && "not handled");
MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
@@ -5402,7 +5408,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
}
SDLoc SL(Node);
- SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewOps);
+ SDValue Concat =
+ DAG.getNode(MidVT == NewEltVT ? ISD::BUILD_VECTOR : ISD::CONCAT_VECTORS,
+ SL, NVT, NewOps);
SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat);
Results.push_back(CvtVec);
break;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index c4605a6b9598..65919a64b806 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -2214,6 +2214,9 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_TO_UINT_SAT:
R = PromoteFloatOp_FP_TO_XINT_SAT(N, OpNo); break;
case ISD::FP_EXTEND: R = PromoteFloatOp_FP_EXTEND(N, OpNo); break;
+ case ISD::STRICT_FP_EXTEND:
+ R = PromoteFloatOp_STRICT_FP_EXTEND(N, OpNo);
+ break;
case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break;
case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break;
case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break;
@@ -2276,6 +2279,26 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) {
return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Op);
}
+SDValue DAGTypeLegalizer::PromoteFloatOp_STRICT_FP_EXTEND(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 1);
+
+ SDValue Op = GetPromotedFloat(N->getOperand(1));
+ EVT VT = N->getValueType(0);
+
+ // Desired VT is same as promoted type. Use promoted float directly.
+ if (VT == Op->getValueType(0)) {
+ ReplaceValueWith(SDValue(N, 1), N->getOperand(0));
+ return Op;
+ }
+
+ // Else, extend the promoted float value to the desired VT.
+ SDValue Res = DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N), N->getVTList(),
+ N->getOperand(0), Op);
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
// Promote the float operands used for comparison. The true- and false-
// operands have the same type as the result and are promoted, if needed, by
// PromoteFloatRes_SELECT_CC
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 362fa92dd44b..3d21bd22e6ef 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1871,6 +1871,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
Res = PromoteIntOp_VP_STRIDED(N, OpNo);
break;
+ case ISD::EXPERIMENTAL_VP_SPLICE:
+ Res = PromoteIntOp_VP_SPLICE(N, OpNo);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -2549,6 +2552,20 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) {
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_VP_SPLICE(SDNode *N, unsigned OpNo) {
+ SmallVector<SDValue, 6> NewOps(N->op_begin(), N->op_end());
+
+ if (OpNo == 2) { // Offset operand
+ NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+ }
+
+ assert((OpNo == 4 || OpNo == 5) && "Unexpected operand for promotion");
+
+ NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
//===----------------------------------------------------------------------===//
// Integer Result Expansion
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 9d5931b44ac6..84b1b2c71fd0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -410,6 +410,7 @@ private:
SDValue PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_VP_SPLICE(SDNode *N, unsigned OpNo);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -712,6 +713,7 @@ private:
SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_STRICT_FP_EXTEND(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_UnaryOp(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index ab4c33c9e976..f73ddfee2b90 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -296,28 +296,24 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
if (isNewLoad)
AddPred(LoadSU, ChainPred);
}
- for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
- const SDep &Pred = LoadPreds[i];
+ for (const SDep &Pred : LoadPreds) {
RemovePred(SU, Pred);
if (isNewLoad) {
AddPred(LoadSU, Pred);
}
}
- for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
- const SDep &Pred = NodePreds[i];
+ for (const SDep &Pred : NodePreds) {
RemovePred(SU, Pred);
AddPred(NewSU, Pred);
}
- for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
- SDep D = NodeSuccs[i];
+ for (SDep D : NodeSuccs) {
SUnit *SuccDep = D.getSUnit();
D.setSUnit(SU);
RemovePred(SuccDep, D);
D.setSUnit(NewSU);
AddPred(SuccDep, D);
}
- for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
- SDep D = ChainSuccs[i];
+ for (SDep D : ChainSuccs) {
SUnit *SuccDep = D.getSUnit();
D.setSUnit(SU);
RemovePred(SuccDep, D);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 5be1892a44f6..81facf92e55a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6858,8 +6858,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// expanding copies of large vectors from registers. This only works for
// fixed length vectors, since we need to know the exact number of
// elements.
- if (N2C && N1.getOperand(0).getValueType().isFixedLengthVector() &&
- N1.getOpcode() == ISD::CONCAT_VECTORS && N1.getNumOperands() > 0) {
+ if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N1.getOperand(0).getValueType().isFixedLengthVector()) {
unsigned Factor =
N1.getOperand(0).getValueType().getVectorNumElements();
return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
@@ -6976,7 +6976,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of
// the concat have the same type as the extract.
- if (N1.getOpcode() == ISD::CONCAT_VECTORS && N1.getNumOperands() > 0 &&
+ if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
VT == N1.getOperand(0).getValueType()) {
unsigned Factor = VT.getVectorMinNumElements();
return N1.getOperand(N2C->getZExtValue() / Factor);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 12ed4a82ee91..3c4b285cb067 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -10627,8 +10627,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
else if (CLI.RetZExt)
AssertOp = ISD::AssertZext;
unsigned CurReg = 0;
- for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
- EVT VT = RetTys[I];
+ for (EVT VT : RetTys) {
MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
CLI.CallConv, VT);
unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index a1cf4cbbee1b..3dc6e4bbcf46 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2786,7 +2786,7 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
Val = decodeSignRotatedValue(Val);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
- return C && C->getSExtValue() == Val;
+ return C && C->getAPIntValue().trySExtValue() == Val;
}
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
@@ -3612,12 +3612,24 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch), VT), nullptr));
continue;
}
- case OPC_EmitRegister: {
- MVT::SimpleValueType VT =
- static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
+ case OPC_EmitRegister:
+ case OPC_EmitRegisterI32:
+ case OPC_EmitRegisterI64: {
+ MVT::SimpleValueType VT;
+ switch (Opcode) {
+ case OPC_EmitRegisterI32:
+ VT = MVT::i32;
+ break;
+ case OPC_EmitRegisterI64:
+ VT = MVT::i64;
+ break;
+ default:
+ VT = static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
+ break;
+ }
unsigned RegNo = MatcherTable[MatcherIndex++];
- RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
- CurDAG->getRegister(RegNo, VT), nullptr));
+ RecordedNodes.push_back(std::pair<SDValue, SDNode *>(
+ CurDAG->getRegister(RegNo, VT), nullptr));
continue;
}
case OPC_EmitRegister2: {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index 38f658084294..d4840d117110 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -115,7 +115,7 @@ Constant *ShadowStackGCLowering::GetFrameMap(Function &F) {
Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
if (!C->isNullValue())
NumMeta = I + 1;
- Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
+ Metadata.push_back(C);
}
Metadata.resize(NumMeta);
@@ -173,7 +173,7 @@ Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) {
bool ShadowStackGCLowering::doInitialization(Module &M) {
bool Active = false;
for (Function &F : M) {
- if (F.hasGC() && F.getGC() == std::string("shadow-stack")) {
+ if (F.hasGC() && F.getGC() == "shadow-stack") {
Active = true;
break;
}
@@ -292,8 +292,7 @@ void ShadowStackGCLowering::getAnalysisUsage(AnalysisUsage &AU) const {
/// runOnFunction - Insert code to maintain the shadow stack.
bool ShadowStackGCLowering::runOnFunction(Function &F) {
// Quick exit for functions that do not use the shadow stack GC.
- if (!F.hasGC() ||
- F.getGC() != std::string("shadow-stack"))
+ if (!F.hasGC() || F.getGC() != "shadow-stack")
return false;
LLVMContext &Context = F.getContext();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 9a0dd92bb58e..6e69dc66429d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -2681,6 +2681,13 @@ MCSection *TargetLoweringObjectFileGOFF::getExplicitSectionGlobal(
return SelectSectionForGlobal(GO, Kind, TM);
}
+MCSection *TargetLoweringObjectFileGOFF::getSectionForLSDA(
+ const Function &F, const MCSymbol &FnSym, const TargetMachine &TM) const {
+ std::string Name = ".gcc_exception_table." + F.getName().str();
+ return getContext().getGOFFSection(Name, SectionKind::getData(), nullptr,
+ nullptr);
+}
+
MCSection *TargetLoweringObjectFileGOFF::SelectSectionForGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
auto *Symbol = TM.getSymbol(GO);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
index faa5466b69e8..4003a08a5422 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -947,6 +947,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
case ExceptionHandling::DwarfCFI:
case ExceptionHandling::ARM:
case ExceptionHandling::AIX:
+ case ExceptionHandling::ZOS:
addPass(createDwarfEHPass(getOptLevel()));
break;
case ExceptionHandling::WinEH:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index bf689dbd308f..526cb847e8a0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1124,8 +1124,7 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI(
}
}
- for (unsigned i = 0, e = OtherDefs.size(); i != e; ++i) {
- Register MOReg = OtherDefs[i];
+ for (Register MOReg : OtherDefs) {
if (regOverlapsSet(Uses, MOReg))
return false;
if (MOReg.isPhysical() && regOverlapsSet(LiveDefs, MOReg))
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
index 409bec7a874b..809b2d51f059 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
@@ -188,21 +188,41 @@ private:
Edge::Kind DeltaKind;
Symbol *TargetSymbol;
uint64_t Addend;
+
+ bool FixingFromSymbol = true;
if (&BlockToFix == &FromSymbol->getAddressable()) {
+ if (LLVM_UNLIKELY(&BlockToFix == &ToSymbol->getAddressable())) {
+ // From and To are symbols in the same block. Decide direction by offset
+ // instead.
+ if (ToSymbol->getAddress() > FixupAddress)
+ FixingFromSymbol = true;
+ else if (FromSymbol->getAddress() > FixupAddress)
+ FixingFromSymbol = false;
+ else
+ FixingFromSymbol = FromSymbol->getAddress() >= ToSymbol->getAddress();
+ } else
+ FixingFromSymbol = true;
+ } else {
+ if (&BlockToFix == &ToSymbol->getAddressable())
+ FixingFromSymbol = false;
+ else {
+ // BlockToFix was neither FromSymbol nor ToSymbol.
+ return make_error<JITLinkError>("SUBTRACTOR relocation must fix up "
+ "either 'A' or 'B' (or a symbol in one "
+ "of their alt-entry groups)");
+ }
+ }
+
+ if (FixingFromSymbol) {
TargetSymbol = ToSymbol;
DeltaKind = (SubRI.r_length == 3) ? aarch64::Delta64 : aarch64::Delta32;
Addend = FixupValue + (FixupAddress - FromSymbol->getAddress());
// FIXME: handle extern 'from'.
- } else if (&BlockToFix == &ToSymbol->getAddressable()) {
+ } else {
TargetSymbol = &*FromSymbol;
DeltaKind =
(SubRI.r_length == 3) ? aarch64::NegDelta64 : aarch64::NegDelta32;
Addend = FixupValue - (FixupAddress - ToSymbol->getAddress());
- } else {
- // BlockToFix was neither FromSymbol nor ToSymbol.
- return make_error<JITLinkError>("SUBTRACTOR relocation must fix up "
- "either 'A' or 'B' (or a symbol in one "
- "of their alt-entry groups)");
}
return PairRelocInfo(DeltaKind, TargetSymbol, Addend);
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
index 49f619357f08..eeca27771ad6 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
@@ -179,21 +179,41 @@ private:
Edge::Kind DeltaKind;
Symbol *TargetSymbol;
uint64_t Addend;
+
+ bool FixingFromSymbol = true;
if (&BlockToFix == &FromSymbol->getAddressable()) {
+ if (LLVM_UNLIKELY(&BlockToFix == &ToSymbol->getAddressable())) {
+ // From and To are symbols in the same block. Decide direction by offset
+ // instead.
+ if (ToSymbol->getAddress() > FixupAddress)
+ FixingFromSymbol = true;
+ else if (FromSymbol->getAddress() > FixupAddress)
+ FixingFromSymbol = false;
+ else
+ FixingFromSymbol = FromSymbol->getAddress() >= ToSymbol->getAddress();
+ } else
+ FixingFromSymbol = true;
+ } else {
+ if (&BlockToFix == &ToSymbol->getAddressable())
+ FixingFromSymbol = false;
+ else {
+ // BlockToFix was neither FromSymbol nor ToSymbol.
+ return make_error<JITLinkError>("SUBTRACTOR relocation must fix up "
+ "either 'A' or 'B' (or a symbol in one "
+ "of their alt-entry groups)");
+ }
+ }
+
+ if (FixingFromSymbol) {
TargetSymbol = ToSymbol;
DeltaKind = (SubRI.r_length == 3) ? x86_64::Delta64 : x86_64::Delta32;
Addend = FixupValue + (FixupAddress - FromSymbol->getAddress());
// FIXME: handle extern 'from'.
- } else if (&BlockToFix == &ToSymbol->getAddressable()) {
+ } else {
TargetSymbol = FromSymbol;
DeltaKind =
(SubRI.r_length == 3) ? x86_64::NegDelta64 : x86_64::NegDelta32;
Addend = FixupValue - (FixupAddress - ToSymbol->getAddress());
- } else {
- // BlockToFix was neither FromSymbol nor ToSymbol.
- return make_error<JITLinkError>("SUBTRACTOR relocation must fix up "
- "either 'A' or 'B' (or a symbol in one "
- "of their alt-entry chains)");
}
return PairRelocInfo(DeltaKind, TargetSymbol, Addend);
diff --git a/contrib/llvm-project/llvm/lib/Frontend/HLSL/HLSLResource.cpp b/contrib/llvm-project/llvm/lib/Frontend/HLSL/HLSLResource.cpp
index 709fe3212623..bcdbe5eadc69 100644
--- a/contrib/llvm-project/llvm/lib/Frontend/HLSL/HLSLResource.cpp
+++ b/contrib/llvm-project/llvm/lib/Frontend/HLSL/HLSLResource.cpp
@@ -23,13 +23,15 @@ GlobalVariable *FrontendResource::getGlobalVariable() {
cast<ConstantAsMetadata>(Entry->getOperand(0))->getValue());
}
-StringRef FrontendResource::getSourceType() {
- return cast<MDString>(Entry->getOperand(1))->getString();
-}
-
ResourceKind FrontendResource::getResourceKind() {
return static_cast<ResourceKind>(
cast<ConstantInt>(
+ cast<ConstantAsMetadata>(Entry->getOperand(1))->getValue())
+ ->getLimitedValue());
+}
+ElementType FrontendResource::getElementType() {
+ return static_cast<ElementType>(
+ cast<ConstantInt>(
cast<ConstantAsMetadata>(Entry->getOperand(2))->getValue())
->getLimitedValue());
}
@@ -49,14 +51,15 @@ uint32_t FrontendResource::getSpace() {
->getLimitedValue();
}
-FrontendResource::FrontendResource(GlobalVariable *GV, StringRef TypeStr,
- ResourceKind RK, bool IsROV,
+FrontendResource::FrontendResource(GlobalVariable *GV, ResourceKind RK,
+ ElementType ElTy, bool IsROV,
uint32_t ResIndex, uint32_t Space) {
auto &Ctx = GV->getContext();
IRBuilder<> B(Ctx);
Entry = MDNode::get(
- Ctx, {ValueAsMetadata::get(GV), MDString::get(Ctx, TypeStr),
+ Ctx, {ValueAsMetadata::get(GV),
ConstantAsMetadata::get(B.getInt32(static_cast<int>(RK))),
+ ConstantAsMetadata::get(B.getInt32(static_cast<int>(ElTy))),
ConstantAsMetadata::get(B.getInt1(IsROV)),
ConstantAsMetadata::get(B.getInt32(ResIndex)),
ConstantAsMetadata::get(B.getInt32(Space))});
diff --git a/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp b/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp
index d499d74f7ba0..7fdc35e7fca0 100644
--- a/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp
@@ -868,7 +868,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
}
if (GVAlign > 1) {
- unsigned DstWidth = CI2->getType()->getBitWidth();
+ unsigned DstWidth = CI2->getBitWidth();
unsigned SrcWidth = std::min(DstWidth, Log2(GVAlign));
APInt BitsNotSet(APInt::getLowBitsSet(DstWidth, SrcWidth));
diff --git a/contrib/llvm-project/llvm/lib/IR/Globals.cpp b/contrib/llvm-project/llvm/lib/IR/Globals.cpp
index 51bdbeb0abf2..239acd2181e8 100644
--- a/contrib/llvm-project/llvm/lib/IR/Globals.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Globals.cpp
@@ -144,25 +144,27 @@ void GlobalObject::copyAttributesFrom(const GlobalObject *Src) {
std::string GlobalValue::getGlobalIdentifier(StringRef Name,
GlobalValue::LinkageTypes Linkage,
StringRef FileName) {
-
// Value names may be prefixed with a binary '1' to indicate
// that the backend should not modify the symbols due to any platform
// naming convention. Do not include that '1' in the PGO profile name.
if (Name[0] == '\1')
Name = Name.substr(1);
- std::string NewName = std::string(Name);
+ std::string GlobalName;
if (llvm::GlobalValue::isLocalLinkage(Linkage)) {
// For local symbols, prepend the main file name to distinguish them.
// Do not include the full path in the file name since there's no guarantee
// that it will stay the same, e.g., if the files are checked out from
// version control in different locations.
if (FileName.empty())
- NewName = NewName.insert(0, "<unknown>:");
+ GlobalName += "<unknown>";
else
- NewName = NewName.insert(0, FileName.str() + ":");
+ GlobalName += FileName;
+
+ GlobalName += kGlobalIdentifierDelimiter;
}
- return NewName;
+ GlobalName += Name;
+ return GlobalName;
}
std::string GlobalValue::getGlobalIdentifier() const {
diff --git a/contrib/llvm-project/llvm/lib/IR/LLVMContext.cpp b/contrib/llvm-project/llvm/lib/IR/LLVMContext.cpp
index 8ddf51537ec1..57077e786efc 100644
--- a/contrib/llvm-project/llvm/lib/IR/LLVMContext.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/LLVMContext.cpp
@@ -256,10 +256,13 @@ void LLVMContext::diagnose(const DiagnosticInfo &DI) {
RS->emit(*OptDiagBase);
// If there is a report handler, use it.
- if (pImpl->DiagHandler &&
- (!pImpl->RespectDiagnosticFilters || isDiagnosticEnabled(DI)) &&
- pImpl->DiagHandler->handleDiagnostics(DI))
- return;
+ if (pImpl->DiagHandler) {
+ if (DI.getSeverity() == DS_Error)
+ pImpl->DiagHandler->HasErrors = true;
+ if ((!pImpl->RespectDiagnosticFilters || isDiagnosticEnabled(DI)) &&
+ pImpl->DiagHandler->handleDiagnostics(DI))
+ return;
+ }
if (!isDiagnosticEnabled(DI))
return;
diff --git a/contrib/llvm-project/llvm/lib/IR/Metadata.cpp b/contrib/llvm-project/llvm/lib/IR/Metadata.cpp
index 7bc25e30b893..515893d079b8 100644
--- a/contrib/llvm-project/llvm/lib/IR/Metadata.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Metadata.cpp
@@ -1566,7 +1566,7 @@ void Instruction::updateDIAssignIDMapping(DIAssignID *ID) {
"Expect existing attachment to be mapped");
auto &InstVec = InstrsIt->second;
- auto *InstIt = std::find(InstVec.begin(), InstVec.end(), this);
+ auto *InstIt = llvm::find(InstVec, this);
assert(InstIt != InstVec.end() &&
"Expect instruction to be mapped to attachment");
// The vector contains a ptr to this. If this is the only element in the
diff --git a/contrib/llvm-project/llvm/lib/IR/Type.cpp b/contrib/llvm-project/llvm/lib/IR/Type.cpp
index 85d779c98a9b..c59bc3622fde 100644
--- a/contrib/llvm-project/llvm/lib/IR/Type.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Type.cpp
@@ -834,6 +834,8 @@ struct TargetTypeInfo {
static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) {
LLVMContext &C = Ty->getContext();
StringRef Name = Ty->getName();
+ if (Name.equals("spirv.Image"))
+ return TargetTypeInfo(PointerType::get(C, 0), TargetExtType::CanBeGlobal);
if (Name.starts_with("spirv."))
return TargetTypeInfo(PointerType::get(C, 0), TargetExtType::HasZeroInit,
TargetExtType::CanBeGlobal);
diff --git a/contrib/llvm-project/llvm/lib/IR/Verifier.cpp b/contrib/llvm-project/llvm/lib/IR/Verifier.cpp
index 8aba28026306..aeaca21a99cc 100644
--- a/contrib/llvm-project/llvm/lib/IR/Verifier.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Verifier.cpp
@@ -2296,10 +2296,9 @@ void Verifier::verifyFunctionMetadata(
Check(isa<ConstantAsMetadata>(MD->getOperand(0)),
"expected a constant operand for !kcfi_type", MD);
Constant *C = cast<ConstantAsMetadata>(MD->getOperand(0))->getValue();
- Check(isa<ConstantInt>(C),
+ Check(isa<ConstantInt>(C) && isa<IntegerType>(C->getType()),
"expected a constant integer operand for !kcfi_type", MD);
- IntegerType *Type = cast<ConstantInt>(C)->getType();
- Check(Type->getBitWidth() == 32,
+ Check(cast<ConstantInt>(C)->getBitWidth() == 32,
"expected a 32-bit integer constant operand for !kcfi_type", MD);
}
}
@@ -5690,8 +5689,10 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"vector of ints");
auto *Op3 = cast<ConstantInt>(Call.getArgOperand(2));
- Check(Op3->getType()->getBitWidth() <= 32,
- "third argument of [us][mul|div]_fix[_sat] must fit within 32 bits");
+ Check(Op3->getType()->isIntegerTy(),
+ "third operand of [us][mul|div]_fix[_sat] must be an int type");
+ Check(Op3->getBitWidth() <= 32,
+ "third operand of [us][mul|div]_fix[_sat] must fit within 32 bits");
if (ID == Intrinsic::smul_fix || ID == Intrinsic::smul_fix_sat ||
ID == Intrinsic::sdiv_fix || ID == Intrinsic::sdiv_fix_sat) {
diff --git a/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp
index 9e1d108ac14d..49668de27d67 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp
@@ -154,7 +154,10 @@ public:
void emitGNUAttribute(unsigned Tag, unsigned Value) override;
StringRef getMnemonic(MCInst &MI) override {
- return InstPrinter->getMnemonic(&MI).first;
+ auto [Ptr, Bits] = InstPrinter->getMnemonic(&MI);
+ assert((Bits != 0 || Ptr == nullptr) &&
+ "Invalid char pointer for instruction with no mnemonic");
+ return Ptr;
}
void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
diff --git a/contrib/llvm-project/llvm/lib/MC/MCContext.cpp b/contrib/llvm-project/llvm/lib/MC/MCContext.cpp
index 6e72b5062a1d..c1db7e3943c4 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCContext.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCContext.cpp
@@ -650,10 +650,16 @@ MCSectionGOFF *MCContext::getGOFFSection(StringRef Section, SectionKind Kind,
MCSection *Parent,
const MCExpr *SubsectionId) {
// Do the lookup. If we don't have a hit, return a new section.
- auto &GOFFSection = GOFFUniquingMap[Section.str()];
- if (!GOFFSection)
- GOFFSection = new (GOFFAllocator.Allocate())
- MCSectionGOFF(Section, Kind, Parent, SubsectionId);
+ auto IterBool =
+ GOFFUniquingMap.insert(std::make_pair(Section.str(), nullptr));
+ auto Iter = IterBool.first;
+ if (!IterBool.second)
+ return Iter->second;
+
+ StringRef CachedName = Iter->first;
+ MCSectionGOFF *GOFFSection = new (GOFFAllocator.Allocate())
+ MCSectionGOFF(CachedName, Kind, Parent, SubsectionId);
+ Iter->second = GOFFSection;
return GOFFSection;
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp b/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp
index 73e6569f96e4..a85182aa06ad 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp
@@ -632,7 +632,8 @@ static void AttemptToFoldSymbolOffsetDifference(
// instructions and InSet is false (not expressions in directive like
// .size/.fill), disable the fast path.
if (Layout && (InSet || !SecA.hasInstructions() ||
- !Asm->getContext().getTargetTriple().isRISCV())) {
+ !(Asm->getContext().getTargetTriple().isRISCV() ||
+ Asm->getContext().getTargetTriple().isLoongArch()))) {
// If both symbols are in the same fragment, return the difference of their
// offsets. canGetFragmentOffset(FA) may be false.
if (FA == FB && !SA.isVariable() && !SB.isVariable()) {
@@ -942,16 +943,17 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
Addrs, InSet)) {
// Check if both are Target Expressions, see if we can compare them.
if (const MCTargetExpr *L = dyn_cast<MCTargetExpr>(ABE->getLHS())) {
- const MCTargetExpr *R = cast<MCTargetExpr>(ABE->getRHS());
- switch (ABE->getOpcode()) {
- case MCBinaryExpr::EQ:
- Res = MCValue::get(L->isEqualTo(R) ? -1 : 0);
- return true;
- case MCBinaryExpr::NE:
- Res = MCValue::get(L->isEqualTo(R) ? 0 : -1);
- return true;
- default:
- break;
+ if (const MCTargetExpr *R = dyn_cast<MCTargetExpr>(ABE->getRHS())) {
+ switch (ABE->getOpcode()) {
+ case MCBinaryExpr::EQ:
+ Res = MCValue::get(L->isEqualTo(R) ? -1 : 0);
+ return true;
+ case MCBinaryExpr::NE:
+ Res = MCValue::get(L->isEqualTo(R) ? 0 : -1);
+ return true;
+ default:
+ break;
+ }
}
}
return false;
diff --git a/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.cpp
index 5352736bdcb9..c8b66d6fcb5e 100644
--- a/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.cpp
@@ -2638,7 +2638,7 @@ template <class ELFT> Error ELFWriter<ELFT>::finalize() {
Error BinaryWriter::write() {
SmallVector<const SectionBase *, 30> SectionsToWrite;
for (const SectionBase &Sec : Obj.allocSections()) {
- if (Sec.Type != SHT_NOBITS)
+ if (Sec.Type != SHT_NOBITS && Sec.Size > 0)
SectionsToWrite.push_back(&Sec);
}
diff --git a/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp
index 3c86b0f25dda..95c4f9f8545d 100644
--- a/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp
@@ -358,6 +358,8 @@ std::optional<StringRef> ELFObjectFileBase::tryGetCPUName() const {
switch (getEMachine()) {
case ELF::EM_AMDGPU:
return getAMDGPUCPUName();
+ case ELF::EM_CUDA:
+ return getNVPTXCPUName();
case ELF::EM_PPC:
case ELF::EM_PPC64:
return StringRef("future");
@@ -517,6 +519,73 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
}
}
+StringRef ELFObjectFileBase::getNVPTXCPUName() const {
+ assert(getEMachine() == ELF::EM_CUDA);
+ unsigned SM = getPlatformFlags() & ELF::EF_CUDA_SM;
+
+ switch (SM) {
+ // Fermi architecture.
+ case ELF::EF_CUDA_SM20:
+ return "sm_20";
+ case ELF::EF_CUDA_SM21:
+ return "sm_21";
+
+ // Kepler architecture.
+ case ELF::EF_CUDA_SM30:
+ return "sm_30";
+ case ELF::EF_CUDA_SM32:
+ return "sm_32";
+ case ELF::EF_CUDA_SM35:
+ return "sm_35";
+ case ELF::EF_CUDA_SM37:
+ return "sm_37";
+
+ // Maxwell architecture.
+ case ELF::EF_CUDA_SM50:
+ return "sm_50";
+ case ELF::EF_CUDA_SM52:
+ return "sm_52";
+ case ELF::EF_CUDA_SM53:
+ return "sm_53";
+
+ // Pascal architecture.
+ case ELF::EF_CUDA_SM60:
+ return "sm_60";
+ case ELF::EF_CUDA_SM61:
+ return "sm_61";
+ case ELF::EF_CUDA_SM62:
+ return "sm_62";
+
+ // Volta architecture.
+ case ELF::EF_CUDA_SM70:
+ return "sm_70";
+ case ELF::EF_CUDA_SM72:
+ return "sm_72";
+
+ // Turing architecture.
+ case ELF::EF_CUDA_SM75:
+ return "sm_75";
+
+ // Ampere architecture.
+ case ELF::EF_CUDA_SM80:
+ return "sm_80";
+ case ELF::EF_CUDA_SM86:
+ return "sm_86";
+ case ELF::EF_CUDA_SM87:
+ return "sm_87";
+
+ // Ada architecture.
+ case ELF::EF_CUDA_SM89:
+ return "sm_89";
+
+ // Hopper architecture.
+ case ELF::EF_CUDA_SM90:
+ return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_90a" : "sm_90";
+ default:
+ llvm_unreachable("Unknown EF_CUDA_SM value");
+ }
+}
+
// FIXME Encode from a tablegen description or target parser.
void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
if (TheTriple.getSubArch() != Triple::NoSubArch)
diff --git a/contrib/llvm-project/llvm/lib/Object/ModuleSymbolTable.cpp b/contrib/llvm-project/llvm/lib/Object/ModuleSymbolTable.cpp
index ab073e18cb46..07f76688fa43 100644
--- a/contrib/llvm-project/llvm/lib/Object/ModuleSymbolTable.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/ModuleSymbolTable.cpp
@@ -16,6 +16,7 @@
#include "RecordStreamer.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
@@ -68,6 +69,11 @@ void ModuleSymbolTable::addModule(Module *M) {
static void
initializeRecordStreamer(const Module &M,
function_ref<void(RecordStreamer &)> Init) {
+ // This function may be called twice, once for ModuleSummaryIndexAnalysis and
+ // the other when writing the IR symbol table. If parsing inline assembly has
+ // caused errors in the first run, suppress the second run.
+ if (M.getContext().getDiagHandlerPtr()->HasErrors)
+ return;
StringRef InlineAsm = M.getModuleInlineAsm();
if (InlineAsm.empty())
return;
@@ -95,7 +101,8 @@ initializeRecordStreamer(const Module &M,
if (!MCII)
return;
- std::unique_ptr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer(InlineAsm));
+ std::unique_ptr<MemoryBuffer> Buffer(
+ MemoryBuffer::getMemBuffer(InlineAsm, "<inline asm>"));
SourceMgr SrcMgr;
SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
@@ -115,6 +122,13 @@ initializeRecordStreamer(const Module &M,
if (!TAP)
return;
+ MCCtx.setDiagnosticHandler([&](const SMDiagnostic &SMD, bool IsInlineAsm,
+ const SourceMgr &SrcMgr,
+ std::vector<const MDNode *> &LocInfos) {
+ M.getContext().diagnose(
+ DiagnosticInfoSrcMgr(SMD, M.getName(), IsInlineAsm, /*LocCookie=*/0));
+ });
+
// Module-level inline asm is assumed to use At&t syntax (see
// AsmPrinter::doInitialization()).
Parser->setAssemblerDialect(InlineAsm::AD_ATT);
diff --git a/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp
index 168fb57935d6..dfe86a45df32 100644
--- a/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp
@@ -265,7 +265,6 @@ static wasm::WasmTableType readTableType(WasmObjectFile::ReadContext &Ctx) {
static Error readSection(WasmSection &Section, WasmObjectFile::ReadContext &Ctx,
WasmSectionOrderChecker &Checker) {
- Section.Offset = Ctx.Ptr - Ctx.Start;
Section.Type = readUint8(Ctx);
LLVM_DEBUG(dbgs() << "readSection type=" << Section.Type << "\n");
// When reading the section's size, store the size of the LEB used to encode
@@ -273,6 +272,7 @@ static Error readSection(WasmSection &Section, WasmObjectFile::ReadContext &Ctx,
const uint8_t *PreSizePtr = Ctx.Ptr;
uint32_t Size = readVaruint32(Ctx);
Section.HeaderSecSizeEncodingLen = Ctx.Ptr - PreSizePtr;
+ Section.Offset = Ctx.Ptr - Ctx.Start;
if (Size == 0)
return make_error<StringError>("zero length section",
object_error::parse_failed);
@@ -599,6 +599,10 @@ Error WasmObjectFile::parseLinkingSection(ReadContext &Ctx) {
Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
uint32_t Count = readVaruint32(Ctx);
+ // Clear out any symbol information that was derived from the exports
+ // section.
+ LinkingData.SymbolTable.clear();
+ Symbols.clear();
LinkingData.SymbolTable.reserve(Count);
Symbols.reserve(Count);
StringSet<> SymbolNames;
@@ -1290,37 +1294,75 @@ Error WasmObjectFile::parseGlobalSection(ReadContext &Ctx) {
Error WasmObjectFile::parseExportSection(ReadContext &Ctx) {
uint32_t Count = readVaruint32(Ctx);
Exports.reserve(Count);
+ LinkingData.SymbolTable.reserve(Count);
+ Symbols.reserve(Count);
for (uint32_t I = 0; I < Count; I++) {
wasm::WasmExport Ex;
Ex.Name = readString(Ctx);
Ex.Kind = readUint8(Ctx);
Ex.Index = readVaruint32(Ctx);
+ const wasm::WasmSignature *Signature = nullptr;
+ const wasm::WasmGlobalType *GlobalType = nullptr;
+ const wasm::WasmTableType *TableType = nullptr;
+ wasm::WasmSymbolInfo Info;
+ Info.Name = Ex.Name;
+ Info.Flags = 0;
switch (Ex.Kind) {
- case wasm::WASM_EXTERNAL_FUNCTION:
-
+ case wasm::WASM_EXTERNAL_FUNCTION: {
if (!isDefinedFunctionIndex(Ex.Index))
return make_error<GenericBinaryError>("invalid function export",
object_error::parse_failed);
getDefinedFunction(Ex.Index).ExportName = Ex.Name;
+ Info.Kind = wasm::WASM_SYMBOL_TYPE_FUNCTION;
+ Info.ElementIndex = Ex.Index;
+ unsigned FuncIndex = Info.ElementIndex - NumImportedFunctions;
+ wasm::WasmFunction &Function = Functions[FuncIndex];
+ Signature = &Signatures[Function.SigIndex];
break;
- case wasm::WASM_EXTERNAL_GLOBAL:
+ }
+ case wasm::WASM_EXTERNAL_GLOBAL: {
if (!isValidGlobalIndex(Ex.Index))
return make_error<GenericBinaryError>("invalid global export",
object_error::parse_failed);
+ Info.Kind = wasm::WASM_SYMBOL_TYPE_DATA;
+ uint64_t Offset = 0;
+ if (isDefinedGlobalIndex(Ex.Index)) {
+ auto Global = getDefinedGlobal(Ex.Index);
+ if (!Global.InitExpr.Extended) {
+ auto Inst = Global.InitExpr.Inst;
+ if (Inst.Opcode == wasm::WASM_OPCODE_I32_CONST) {
+ Offset = Inst.Value.Int32;
+ } else if (Inst.Opcode == wasm::WASM_OPCODE_I64_CONST) {
+ Offset = Inst.Value.Int64;
+ }
+ }
+ }
+ Info.DataRef = wasm::WasmDataReference{0, Offset, 0};
break;
+ }
case wasm::WASM_EXTERNAL_TAG:
if (!isValidTagIndex(Ex.Index))
return make_error<GenericBinaryError>("invalid tag export",
object_error::parse_failed);
+ Info.Kind = wasm::WASM_SYMBOL_TYPE_TAG;
+ Info.ElementIndex = Ex.Index;
break;
case wasm::WASM_EXTERNAL_MEMORY:
+ break;
case wasm::WASM_EXTERNAL_TABLE:
+ Info.Kind = wasm::WASM_SYMBOL_TYPE_TABLE;
break;
default:
return make_error<GenericBinaryError>("unexpected export kind",
object_error::parse_failed);
}
Exports.push_back(Ex);
+ if (Ex.Kind != wasm::WASM_EXTERNAL_MEMORY) {
+ LinkingData.SymbolTable.emplace_back(Info);
+ Symbols.emplace_back(LinkingData.SymbolTable.back(), GlobalType,
+ TableType, Signature);
+ LLVM_DEBUG(dbgs() << "Adding symbol: " << Symbols.back() << "\n");
+ }
}
if (Ctx.Ptr != Ctx.End)
return make_error<GenericBinaryError>("export section ended prematurely",
@@ -1644,6 +1686,8 @@ uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol &Sym) const {
return Segment.Offset.Inst.Value.Int32 + Sym.Info.DataRef.Offset;
} else if (Segment.Offset.Inst.Opcode == wasm::WASM_OPCODE_I64_CONST) {
return Segment.Offset.Inst.Value.Int64 + Sym.Info.DataRef.Offset;
+ } else if (Segment.Offset.Inst.Opcode == wasm::WASM_OPCODE_GLOBAL_GET) {
+ return Sym.Info.DataRef.Offset;
} else {
llvm_unreachable("unknown init expr opcode");
}
diff --git a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
index 95b9fb7ad735..f94bd422c6b5 100644
--- a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
@@ -83,6 +83,7 @@
#include "llvm/CodeGen/InterleavedAccess.h"
#include "llvm/CodeGen/InterleavedLoadCombine.h"
#include "llvm/CodeGen/JMCInstrumenter.h"
+#include "llvm/CodeGen/LowerEmuTLS.h"
#include "llvm/CodeGen/SafeStack.h"
#include "llvm/CodeGen/SelectOptimize.h"
#include "llvm/CodeGen/SjLjEHPrepare.h"
diff --git a/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def b/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def
index d8fc7cd8a231..82ce040c6496 100644
--- a/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def
+++ b/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def
@@ -83,6 +83,7 @@ MODULE_PASS("internalize", InternalizePass())
MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass())
MODULE_PASS("iroutliner", IROutlinerPass())
MODULE_PASS("jmc-instrumenter", JMCInstrumenterPass())
+MODULE_PASS("lower-emutls", LowerEmuTLSPass())
MODULE_PASS("lower-global-dtors", LowerGlobalDtorsPass())
MODULE_PASS("lower-ifunc", LowerIFuncPass())
MODULE_PASS("lowertypetests", LowerTypeTestsPass())
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp
index 649d814cfd9d..134a400e639c 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp
@@ -246,11 +246,27 @@ std::string InstrProfError::message() const {
char InstrProfError::ID = 0;
-std::string getPGOFuncName(StringRef RawFuncName,
- GlobalValue::LinkageTypes Linkage,
+std::string getPGOFuncName(StringRef Name, GlobalValue::LinkageTypes Linkage,
StringRef FileName,
uint64_t Version LLVM_ATTRIBUTE_UNUSED) {
- return GlobalValue::getGlobalIdentifier(RawFuncName, Linkage, FileName);
+ // Value names may be prefixed with a binary '1' to indicate
+ // that the backend should not modify the symbols due to any platform
+ // naming convention. Do not include that '1' in the PGO profile name.
+ if (Name[0] == '\1')
+ Name = Name.substr(1);
+
+ std::string NewName = std::string(Name);
+ if (llvm::GlobalValue::isLocalLinkage(Linkage)) {
+ // For local symbols, prepend the main file name to distinguish them.
+ // Do not include the full path in the file name since there's no guarantee
+ // that it will stay the same, e.g., if the files are checked out from
+ // version control in different locations.
+ if (FileName.empty())
+ NewName = NewName.insert(0, "<unknown>:");
+ else
+ NewName = NewName.insert(0, FileName.str() + ":");
+ }
+ return NewName;
}
// Strip NumPrefix level of directory name from PathNameStr. If the number of
@@ -300,12 +316,10 @@ getIRPGONameForGlobalObject(const GlobalObject &GO,
GlobalValue::LinkageTypes Linkage,
StringRef FileName) {
SmallString<64> Name;
- if (llvm::GlobalValue::isLocalLinkage(Linkage)) {
- Name.append(FileName.empty() ? "<unknown>" : FileName);
- Name.append(";");
- }
+ // FIXME: Mangler's handling is kept outside of `getGlobalIdentifier` for now.
+ // For more details please check issue #74565.
Mangler().getNameWithPrefix(Name, &GO, /*CannotUsePrivateLabel=*/true);
- return Name.str().str();
+ return GlobalValue::getGlobalIdentifier(Name, Linkage, FileName);
}
static std::optional<std::string> lookupPGONameFromMetadata(MDNode *MD) {
@@ -352,6 +366,9 @@ std::string getIRPGOFuncName(const Function &F, bool InLTO) {
return getIRPGOObjectName(F, InLTO, getPGOFuncNameMetadata(F));
}
+// Please use getIRPGOFuncName for LLVM IR instrumentation. This function is
+// for front-end (Clang, etc) instrumentation.
+// The implementation is kept for profile matching from older profiles.
// This is similar to `getIRPGOFuncName` except that this function calls
// 'getPGOFuncName' to get a name and `getIRPGOFuncName` calls
// 'getIRPGONameForGlobalObject'. See the difference between two callees in the
@@ -384,7 +401,8 @@ getParsedIRPGOFuncName(StringRef IRPGOFuncName) {
StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName) {
if (FileName.empty())
return PGOFuncName;
- // Drop the file name including ':'. See also getPGOFuncName.
+ // Drop the file name including ':' or ';'. See getIRPGONameForGlobalObject as
+ // well.
if (PGOFuncName.starts_with(FileName))
PGOFuncName = PGOFuncName.drop_front(FileName.size() + 1);
return PGOFuncName;
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp
index 068922d421f8..8f62df79d5b7 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1008,12 +1008,13 @@ public:
/// Extract the original function name from a PGO function name.
static StringRef extractName(StringRef Name) {
- // We can have multiple :-separated pieces; there can be pieces both
- // before and after the mangled name. Find the first part that starts
- // with '_Z'; we'll assume that's the mangled name we want.
+ // We can have multiple pieces separated by kGlobalIdentifierDelimiter (
+ // semicolon now and colon in older profiles); there can be pieces both
+ // before and after the mangled name. Find the first part that starts with
+ // '_Z'; we'll assume that's the mangled name we want.
std::pair<StringRef, StringRef> Parts = {StringRef(), Name};
while (true) {
- Parts = Parts.second.split(':');
+ Parts = Parts.second.split(kGlobalIdentifierDelimiter);
if (Parts.first.starts_with("_Z"))
return Parts.first;
if (Parts.second.empty())
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/RawMemProfReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/RawMemProfReader.cpp
index af2db8d61179..0e2b8668bab7 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/RawMemProfReader.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -336,6 +336,13 @@ Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) {
inconvertibleErrorCode()),
FileName);
+ // Process the raw profile.
+ if (Error E = readRawProfile(std::move(DataBuffer)))
+ return E;
+
+ if (Error E = setupForSymbolization())
+ return E;
+
auto *Object = cast<object::ObjectFile>(Binary.getBinary());
std::unique_ptr<DIContext> Context = DWARFContext::create(
*Object, DWARFContext::ProcessDebugRelocations::Process);
@@ -344,16 +351,13 @@ Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) {
Object, std::move(Context), /*UntagAddresses=*/false);
if (!SOFOr)
return report(SOFOr.takeError(), FileName);
- Symbolizer = std::move(SOFOr.get());
-
- // Process the raw profile.
- if (Error E = readRawProfile(std::move(DataBuffer)))
- return E;
-
- if (Error E = setupForSymbolization())
- return E;
+ auto Symbolizer = std::move(SOFOr.get());
- if (Error E = symbolizeAndFilterStackFrames())
+ // The symbolizer ownership is moved into symbolizeAndFilterStackFrames so
+ // that it is freed automatically at the end, when it is no longer used. This
+ // reduces peak memory since it won't be live while also mapping the raw
+ // profile into records afterwards.
+ if (Error E = symbolizeAndFilterStackFrames(std::move(Symbolizer)))
return E;
return mapRawProfileToRecords();
@@ -469,7 +473,8 @@ Error RawMemProfReader::mapRawProfileToRecords() {
return Error::success();
}
-Error RawMemProfReader::symbolizeAndFilterStackFrames() {
+Error RawMemProfReader::symbolizeAndFilterStackFrames(
+ std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer) {
// The specifier to use when symbolization is requested.
const DILineInfoSpecifier Specifier(
DILineInfoSpecifier::FileLineInfoKind::RawValue,
diff --git a/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp b/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp
index 088b4e4d755c..368dead44914 100644
--- a/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp
@@ -175,6 +175,23 @@ public:
StringRef Overview, raw_ostream *Errs = nullptr,
bool LongOptionsUseDoubleDash = false);
+ void forEachSubCommand(Option &Opt, function_ref<void(SubCommand &)> Action) {
+ if (Opt.Subs.empty()) {
+ Action(SubCommand::getTopLevel());
+ return;
+ }
+ if (Opt.Subs.size() == 1 && *Opt.Subs.begin() == &SubCommand::getAll()) {
+ for (auto *SC : RegisteredSubCommands)
+ Action(*SC);
+ return;
+ }
+ for (auto *SC : Opt.Subs) {
+ assert(SC != &SubCommand::getAll() &&
+ "SubCommand::getAll() should not be used with other subcommands");
+ Action(*SC);
+ }
+ }
+
void addLiteralOption(Option &Opt, SubCommand *SC, StringRef Name) {
if (Opt.hasArgStr())
return;
@@ -183,25 +200,11 @@ public:
<< "' registered more than once!\n";
report_fatal_error("inconsistency in registered CommandLine options");
}
-
- // If we're adding this to all sub-commands, add it to the ones that have
- // already been registered.
- if (SC == &SubCommand::getAll()) {
- for (auto *Sub : RegisteredSubCommands) {
- if (SC == Sub)
- continue;
- addLiteralOption(Opt, Sub, Name);
- }
- }
}
void addLiteralOption(Option &Opt, StringRef Name) {
- if (Opt.Subs.empty())
- addLiteralOption(Opt, &SubCommand::getTopLevel(), Name);
- else {
- for (auto *SC : Opt.Subs)
- addLiteralOption(Opt, SC, Name);
- }
+ forEachSubCommand(
+ Opt, [&](SubCommand &SC) { addLiteralOption(Opt, &SC, Name); });
}
void addOption(Option *O, SubCommand *SC) {
@@ -238,16 +241,6 @@ public:
// linked LLVM distribution.
if (HadErrors)
report_fatal_error("inconsistency in registered CommandLine options");
-
- // If we're adding this to all sub-commands, add it to the ones that have
- // already been registered.
- if (SC == &SubCommand::getAll()) {
- for (auto *Sub : RegisteredSubCommands) {
- if (SC == Sub)
- continue;
- addOption(O, Sub);
- }
- }
}
void addOption(Option *O, bool ProcessDefaultOption = false) {
@@ -255,13 +248,7 @@ public:
DefaultOptions.push_back(O);
return;
}
-
- if (O->Subs.empty()) {
- addOption(O, &SubCommand::getTopLevel());
- } else {
- for (auto *SC : O->Subs)
- addOption(O, SC);
- }
+ forEachSubCommand(*O, [&](SubCommand &SC) { addOption(O, &SC); });
}
void removeOption(Option *O, SubCommand *SC) {
@@ -298,17 +285,7 @@ public:
}
void removeOption(Option *O) {
- if (O->Subs.empty())
- removeOption(O, &SubCommand::getTopLevel());
- else {
- if (O->isInAllSubCommands()) {
- for (auto *SC : RegisteredSubCommands)
- removeOption(O, SC);
- } else {
- for (auto *SC : O->Subs)
- removeOption(O, SC);
- }
- }
+ forEachSubCommand(*O, [&](SubCommand &SC) { removeOption(O, &SC); });
}
bool hasOptions(const SubCommand &Sub) const {
@@ -344,17 +321,8 @@ public:
}
void updateArgStr(Option *O, StringRef NewName) {
- if (O->Subs.empty())
- updateArgStr(O, NewName, &SubCommand::getTopLevel());
- else {
- if (O->isInAllSubCommands()) {
- for (auto *SC : RegisteredSubCommands)
- updateArgStr(O, NewName, SC);
- } else {
- for (auto *SC : O->Subs)
- updateArgStr(O, NewName, SC);
- }
- }
+ forEachSubCommand(*O,
+ [&](SubCommand &SC) { updateArgStr(O, NewName, &SC); });
}
void printOptionValues();
diff --git a/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp b/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp
index bbbaf26a7bd4..7256e9a29329 100644
--- a/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp
@@ -145,6 +145,9 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
{"zmmul", RISCVExtensionVersion{1, 0}},
+ {"zvbb", RISCVExtensionVersion{1, 0}},
+ {"zvbc", RISCVExtensionVersion{1, 0}},
+
{"zve32f", RISCVExtensionVersion{1, 0}},
{"zve32x", RISCVExtensionVersion{1, 0}},
{"zve64d", RISCVExtensionVersion{1, 0}},
@@ -154,6 +157,22 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
{"zvfh", RISCVExtensionVersion{1, 0}},
{"zvfhmin", RISCVExtensionVersion{1, 0}},
+ // vector crypto
+ {"zvkb", RISCVExtensionVersion{1, 0}},
+ {"zvkg", RISCVExtensionVersion{1, 0}},
+ {"zvkn", RISCVExtensionVersion{1, 0}},
+ {"zvknc", RISCVExtensionVersion{1, 0}},
+ {"zvkned", RISCVExtensionVersion{1, 0}},
+ {"zvkng", RISCVExtensionVersion{1, 0}},
+ {"zvknha", RISCVExtensionVersion{1, 0}},
+ {"zvknhb", RISCVExtensionVersion{1, 0}},
+ {"zvks", RISCVExtensionVersion{1, 0}},
+ {"zvksc", RISCVExtensionVersion{1, 0}},
+ {"zvksed", RISCVExtensionVersion{1, 0}},
+ {"zvksg", RISCVExtensionVersion{1, 0}},
+ {"zvksh", RISCVExtensionVersion{1, 0}},
+ {"zvkt", RISCVExtensionVersion{1, 0}},
+
{"zvl1024b", RISCVExtensionVersion{1, 0}},
{"zvl128b", RISCVExtensionVersion{1, 0}},
{"zvl16384b", RISCVExtensionVersion{1, 0}},
@@ -179,27 +198,8 @@ static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
{"ztso", RISCVExtensionVersion{0, 1}},
- {"zvbb", RISCVExtensionVersion{1, 0}},
- {"zvbc", RISCVExtensionVersion{1, 0}},
-
{"zvfbfmin", RISCVExtensionVersion{0, 8}},
{"zvfbfwma", RISCVExtensionVersion{0, 8}},
-
- // vector crypto
- {"zvkb", RISCVExtensionVersion{1, 0}},
- {"zvkg", RISCVExtensionVersion{1, 0}},
- {"zvkn", RISCVExtensionVersion{1, 0}},
- {"zvknc", RISCVExtensionVersion{1, 0}},
- {"zvkned", RISCVExtensionVersion{1, 0}},
- {"zvkng", RISCVExtensionVersion{1, 0}},
- {"zvknha", RISCVExtensionVersion{1, 0}},
- {"zvknhb", RISCVExtensionVersion{1, 0}},
- {"zvks", RISCVExtensionVersion{1, 0}},
- {"zvksc", RISCVExtensionVersion{1, 0}},
- {"zvksed", RISCVExtensionVersion{1, 0}},
- {"zvksg", RISCVExtensionVersion{1, 0}},
- {"zvksh", RISCVExtensionVersion{1, 0}},
- {"zvkt", RISCVExtensionVersion{1, 0}},
};
static void verifyTables() {
@@ -215,11 +215,12 @@ static void verifyTables() {
#endif
}
-static void PrintExtension(const std::string Name, const std::string Version,
- const std::string Description) {
- outs() << " "
- << format(Description.empty() ? "%-20s%s\n" : "%-20s%-10s%s\n",
- Name.c_str(), Version.c_str(), Description.c_str());
+static void PrintExtension(StringRef Name, StringRef Version,
+ StringRef Description) {
+ outs().indent(4);
+ unsigned VersionWidth = Description.empty() ? 0 : 10;
+ outs() << left_justify(Name, 20) << left_justify(Version, VersionWidth)
+ << Description << "\n";
}
void llvm::riscvExtensionsHelp(StringMap<StringRef> DescMap) {
@@ -233,7 +234,7 @@ void llvm::riscvExtensionsHelp(StringMap<StringRef> DescMap) {
for (const auto &E : ExtMap) {
std::string Version = std::to_string(E.second.MajorVersion) + "." +
std::to_string(E.second.MinorVersion);
- PrintExtension(E.first, Version, DescMap[E.first].str());
+ PrintExtension(E.first, Version, DescMap[E.first]);
}
outs() << "\nExperimental extensions\n";
@@ -243,7 +244,7 @@ void llvm::riscvExtensionsHelp(StringMap<StringRef> DescMap) {
for (const auto &E : ExtMap) {
std::string Version = std::to_string(E.second.MajorVersion) + "." +
std::to_string(E.second.MinorVersion);
- PrintExtension(E.first, Version, DescMap["experimental-" + E.first].str());
+ PrintExtension(E.first, Version, DescMap["experimental-" + E.first]);
}
outs() << "\nUse -march to specify the target's extension.\n"
@@ -995,25 +996,25 @@ static const char *ImpliedExtsD[] = {"f"};
static const char *ImpliedExtsF[] = {"zicsr"};
static const char *ImpliedExtsV[] = {"zvl128b", "zve64d"};
static const char *ImpliedExtsXTHeadVdot[] = {"v"};
-static const char *ImpliedExtsXsfvcp[] = {"zve32x"};
-static const char *ImpliedExtsXsfvfnrclipxfqf[] = {"zve32f"};
-static const char *ImpliedExtsXsfvfwmaccqqq[] = {"zve32f", "zvfbfmin"};
-static const char *ImpliedExtsXsfvqmaccdod[] = {"zve32x"};
-static const char *ImpliedExtsXsfvqmaccqoq[] = {"zve32x"};
+static const char *ImpliedExtsXSfvcp[] = {"zve32x"};
+static const char *ImpliedExtsXSfvfnrclipxfqf[] = {"zve32f"};
+static const char *ImpliedExtsXSfvfwmaccqqq[] = {"zvfbfmin"};
+static const char *ImpliedExtsXSfvqmaccdod[] = {"zve32x"};
+static const char *ImpliedExtsXSfvqmaccqoq[] = {"zve32x"};
static const char *ImpliedExtsZacas[] = {"a"};
static const char *ImpliedExtsZcb[] = {"zca"};
static const char *ImpliedExtsZcd[] = {"d", "zca"};
static const char *ImpliedExtsZce[] = {"zcb", "zcmp", "zcmt"};
static const char *ImpliedExtsZcf[] = {"f", "zca"};
static const char *ImpliedExtsZcmp[] = {"zca"};
-static const char *ImpliedExtsZcmt[] = {"zca"};
+static const char *ImpliedExtsZcmt[] = {"zca", "zicsr"};
static const char *ImpliedExtsZdinx[] = {"zfinx"};
static const char *ImpliedExtsZfa[] = {"f"};
static const char *ImpliedExtsZfbfmin[] = {"f"};
static const char *ImpliedExtsZfh[] = {"zfhmin"};
static const char *ImpliedExtsZfhmin[] = {"f"};
static const char *ImpliedExtsZfinx[] = {"zicsr"};
-static const char *ImpliedExtsZhinx[] = {"zfinx"};
+static const char *ImpliedExtsZhinx[] = {"zhinxmin"};
static const char *ImpliedExtsZhinxmin[] = {"zfinx"};
static const char *ImpliedExtsZicntr[] = {"zicsr"};
static const char *ImpliedExtsZihpm[] = {"zicsr"};
@@ -1027,9 +1028,9 @@ static const char *ImpliedExtsZve32x[] = {"zvl32b", "zicsr"};
static const char *ImpliedExtsZve64d[] = {"zve64f", "d"};
static const char *ImpliedExtsZve64f[] = {"zve64x", "zve32f"};
static const char *ImpliedExtsZve64x[] = {"zve32x", "zvl64b"};
-static const char *ImpliedExtsZvfbfmin[] = {"zve32f", "zfbfmin"};
-static const char *ImpliedExtsZvfbfwma[] = {"zvfbfmin"};
-static const char *ImpliedExtsZvfh[] = {"zve32f", "zfhmin"};
+static const char *ImpliedExtsZvfbfmin[] = {"zve32f"};
+static const char *ImpliedExtsZvfbfwma[] = {"zvfbfmin", "zfbfmin"};
+static const char *ImpliedExtsZvfh[] = {"zvfhmin", "zfhmin"};
static const char *ImpliedExtsZvfhmin[] = {"zve32f"};
static const char *ImpliedExtsZvkn[] = {"zvkb", "zvkned", "zvknhb", "zvkt"};
static const char *ImpliedExtsZvknc[] = {"zvbc", "zvkn"};
@@ -1066,11 +1067,11 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = {
{{"d"}, {ImpliedExtsD}},
{{"f"}, {ImpliedExtsF}},
{{"v"}, {ImpliedExtsV}},
- {{"xsfvcp"}, {ImpliedExtsXsfvcp}},
- {{"xsfvfnrclipxfqf"}, {ImpliedExtsXsfvfnrclipxfqf}},
- {{"xsfvfwmaccqqq"}, {ImpliedExtsXsfvfwmaccqqq}},
- {{"xsfvqmaccdod"}, {ImpliedExtsXsfvqmaccdod}},
- {{"xsfvqmaccqoq"}, {ImpliedExtsXsfvqmaccqoq}},
+ {{"xsfvcp"}, {ImpliedExtsXSfvcp}},
+ {{"xsfvfnrclipxfqf"}, {ImpliedExtsXSfvfnrclipxfqf}},
+ {{"xsfvfwmaccqqq"}, {ImpliedExtsXSfvfwmaccqqq}},
+ {{"xsfvqmaccdod"}, {ImpliedExtsXSfvqmaccdod}},
+ {{"xsfvqmaccqoq"}, {ImpliedExtsXSfvqmaccqoq}},
{{"xtheadvdot"}, {ImpliedExtsXTHeadVdot}},
{{"zacas"}, {ImpliedExtsZacas}},
{{"zcb"}, {ImpliedExtsZcb}},
diff --git a/contrib/llvm-project/llvm/lib/Support/Signals.cpp b/contrib/llvm-project/llvm/lib/Support/Signals.cpp
index 669a9e2a8396..9f9030e79d10 100644
--- a/contrib/llvm-project/llvm/lib/Support/Signals.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Signals.cpp
@@ -145,7 +145,7 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
return false;
// Don't recursively invoke the llvm-symbolizer binary.
- if (Argv0.find("llvm-symbolizer") != std::string::npos)
+ if (Argv0.contains("llvm-symbolizer"))
return false;
// FIXME: Subtract necessary number from StackTrace entries to turn return addresses
diff --git a/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp
index 28ab85d4344c..3d3a564af51d 100644
--- a/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp
@@ -16,7 +16,6 @@
#include "llvm/Support/AutoConvert.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Duration.h"
-#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
@@ -25,17 +24,11 @@
#include "llvm/Support/NativeFormatting.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
-#include "llvm/Support/Threading.h"
#include <algorithm>
#include <cerrno>
#include <cstdio>
#include <sys/stat.h>
-#ifndef _WIN32
-#include <sys/socket.h>
-#include <sys/un.h>
-#endif // _WIN32
-
// <fcntl.h> may provide O_BINARY.
#if defined(HAVE_FCNTL_H)
# include <fcntl.h>
@@ -66,13 +59,6 @@
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/Windows/WindowsSupport.h"
-// winsock2.h must be included before afunix.h. Briefly turn off clang-format to
-// avoid error.
-// clang-format off
-#include <winsock2.h>
-#include <afunix.h>
-// clang-format on
-#include <io.h>
#endif
using namespace llvm;
@@ -659,7 +645,7 @@ raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered,
// Check if this is a console device. This is not equivalent to isatty.
IsWindowsConsole =
::GetFileType((HANDLE)::_get_osfhandle(fd)) == FILE_TYPE_CHAR;
-#endif // _WIN32
+#endif
// Get the starting position.
off_t loc = ::lseek(FD, 0, SEEK_CUR);
@@ -969,153 +955,6 @@ bool raw_fd_stream::classof(const raw_ostream *OS) {
}
//===----------------------------------------------------------------------===//
-// raw_socket_stream
-//===----------------------------------------------------------------------===//
-
-#ifdef _WIN32
-WSABalancer::WSABalancer() {
- WSADATA WsaData;
- ::memset(&WsaData, 0, sizeof(WsaData));
- if (WSAStartup(MAKEWORD(2, 2), &WsaData) != 0) {
- llvm::report_fatal_error("WSAStartup failed");
- }
-}
-
-WSABalancer::~WSABalancer() { WSACleanup(); }
-
-#endif // _WIN32
-
-static std::error_code getLastSocketErrorCode() {
-#ifdef _WIN32
- return std::error_code(::WSAGetLastError(), std::system_category());
-#else
- return std::error_code(errno, std::system_category());
-#endif
-}
-
-ListeningSocket::ListeningSocket(int SocketFD, StringRef SocketPath)
- : FD(SocketFD), SocketPath(SocketPath) {}
-
-ListeningSocket::ListeningSocket(ListeningSocket &&LS)
- : FD(LS.FD), SocketPath(LS.SocketPath) {
- LS.FD = -1;
-}
-
-Expected<ListeningSocket> ListeningSocket::createUnix(StringRef SocketPath,
- int MaxBacklog) {
-
-#ifdef _WIN32
- WSABalancer _;
- SOCKET MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0);
- if (MaybeWinsocket == INVALID_SOCKET) {
-#else
- int MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0);
- if (MaybeWinsocket == -1) {
-#endif
- return llvm::make_error<StringError>(getLastSocketErrorCode(),
- "socket create failed");
- }
-
- struct sockaddr_un Addr;
- memset(&Addr, 0, sizeof(Addr));
- Addr.sun_family = AF_UNIX;
- strncpy(Addr.sun_path, SocketPath.str().c_str(), sizeof(Addr.sun_path) - 1);
-
- if (bind(MaybeWinsocket, (struct sockaddr *)&Addr, sizeof(Addr)) == -1) {
- std::error_code Err = getLastSocketErrorCode();
- if (Err == std::errc::address_in_use)
- ::close(MaybeWinsocket);
- return llvm::make_error<StringError>(Err, "Bind error");
- }
- if (listen(MaybeWinsocket, MaxBacklog) == -1) {
- return llvm::make_error<StringError>(getLastSocketErrorCode(),
- "Listen error");
- }
- int UnixSocket;
-#ifdef _WIN32
- UnixSocket = _open_osfhandle(MaybeWinsocket, 0);
-#else
- UnixSocket = MaybeWinsocket;
-#endif // _WIN32
- return ListeningSocket{UnixSocket, SocketPath};
-}
-
-Expected<std::unique_ptr<raw_socket_stream>> ListeningSocket::accept() {
- int AcceptFD;
-#ifdef _WIN32
- SOCKET WinServerSock = _get_osfhandle(FD);
- SOCKET WinAcceptSock = ::accept(WinServerSock, NULL, NULL);
- AcceptFD = _open_osfhandle(WinAcceptSock, 0);
-#else
- AcceptFD = ::accept(FD, NULL, NULL);
-#endif //_WIN32
- if (AcceptFD == -1)
- return llvm::make_error<StringError>(getLastSocketErrorCode(),
- "Accept failed");
- return std::make_unique<raw_socket_stream>(AcceptFD);
-}
-
-ListeningSocket::~ListeningSocket() {
- if (FD == -1)
- return;
- ::close(FD);
- unlink(SocketPath.c_str());
-}
-
-static Expected<int> GetSocketFD(StringRef SocketPath) {
-#ifdef _WIN32
- SOCKET MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0);
- if (MaybeWinsocket == INVALID_SOCKET) {
-#else
- int MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0);
- if (MaybeWinsocket == -1) {
-#endif // _WIN32
- return llvm::make_error<StringError>(getLastSocketErrorCode(),
- "Create socket failed");
- }
-
- struct sockaddr_un Addr;
- memset(&Addr, 0, sizeof(Addr));
- Addr.sun_family = AF_UNIX;
- strncpy(Addr.sun_path, SocketPath.str().c_str(), sizeof(Addr.sun_path) - 1);
-
- int status = connect(MaybeWinsocket, (struct sockaddr *)&Addr, sizeof(Addr));
- if (status == -1) {
- return llvm::make_error<StringError>(getLastSocketErrorCode(),
- "Connect socket failed");
- }
-#ifdef _WIN32
- return _open_osfhandle(MaybeWinsocket, 0);
-#else
- return MaybeWinsocket;
-#endif // _WIN32
-}
-
-raw_socket_stream::raw_socket_stream(int SocketFD)
- : raw_fd_stream(SocketFD, true) {}
-
-Expected<std::unique_ptr<raw_socket_stream>>
-raw_socket_stream::createConnectedUnix(StringRef SocketPath) {
-#ifdef _WIN32
- WSABalancer _;
-#endif // _WIN32
- Expected<int> FD = GetSocketFD(SocketPath);
- if (!FD)
- return FD.takeError();
- return std::make_unique<raw_socket_stream>(*FD);
-}
-
-raw_socket_stream::~raw_socket_stream() {}
-
-//===----------------------------------------------------------------------===//
-// raw_string_ostream
-//===----------------------------------------------------------------------===//
-
-void raw_string_ostream::write_impl(const char *Ptr, size_t Size) {
- OS.append(Ptr, Size);
-}
-
-//===----------------------------------------------------------------------===//
// raw_svector_ostream
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Support/raw_socket_stream.cpp b/contrib/llvm-project/llvm/lib/Support/raw_socket_stream.cpp
new file mode 100644
index 000000000000..4659880cfe19
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Support/raw_socket_stream.cpp
@@ -0,0 +1,179 @@
+//===-- llvm/Support/raw_socket_stream.cpp - Socket streams --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains raw_ostream implementations for streams to communicate
+// via UNIX sockets
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/raw_socket_stream.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Error.h"
+
+#ifndef _WIN32
+#include <sys/socket.h>
+#include <sys/un.h>
+#else
+#include "llvm/Support/Windows/WindowsSupport.h"
+// winsock2.h must be included before afunix.h. Briefly turn off clang-format to
+// avoid error.
+// clang-format off
+#include <winsock2.h>
+#include <afunix.h>
+// clang-format on
+#include <io.h>
+#endif // _WIN32
+
+#if defined(HAVE_UNISTD_H)
+#include <unistd.h>
+#endif
+
+using namespace llvm;
+
+#ifdef _WIN32
+WSABalancer::WSABalancer() {
+ WSADATA WsaData;
+ ::memset(&WsaData, 0, sizeof(WsaData));
+ if (WSAStartup(MAKEWORD(2, 2), &WsaData) != 0) {
+ llvm::report_fatal_error("WSAStartup failed");
+ }
+}
+
+WSABalancer::~WSABalancer() { WSACleanup(); }
+
+#endif // _WIN32
+
+static std::error_code getLastSocketErrorCode() {
+#ifdef _WIN32
+ return std::error_code(::WSAGetLastError(), std::system_category());
+#else
+ return std::error_code(errno, std::system_category());
+#endif
+}
+
+ListeningSocket::ListeningSocket(int SocketFD, StringRef SocketPath)
+ : FD(SocketFD), SocketPath(SocketPath) {}
+
+ListeningSocket::ListeningSocket(ListeningSocket &&LS)
+ : FD(LS.FD), SocketPath(LS.SocketPath) {
+ LS.FD = -1;
+}
+
+Expected<ListeningSocket> ListeningSocket::createUnix(StringRef SocketPath,
+ int MaxBacklog) {
+
+#ifdef _WIN32
+ WSABalancer _;
+ SOCKET MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (MaybeWinsocket == INVALID_SOCKET) {
+#else
+ int MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (MaybeWinsocket == -1) {
+#endif
+ return llvm::make_error<StringError>(getLastSocketErrorCode(),
+ "socket create failed");
+ }
+
+ struct sockaddr_un Addr;
+ memset(&Addr, 0, sizeof(Addr));
+ Addr.sun_family = AF_UNIX;
+ strncpy(Addr.sun_path, SocketPath.str().c_str(), sizeof(Addr.sun_path) - 1);
+
+ if (bind(MaybeWinsocket, (struct sockaddr *)&Addr, sizeof(Addr)) == -1) {
+ std::error_code Err = getLastSocketErrorCode();
+ if (Err == std::errc::address_in_use)
+ ::close(MaybeWinsocket);
+ return llvm::make_error<StringError>(Err, "Bind error");
+ }
+ if (listen(MaybeWinsocket, MaxBacklog) == -1) {
+ return llvm::make_error<StringError>(getLastSocketErrorCode(),
+ "Listen error");
+ }
+ int UnixSocket;
+#ifdef _WIN32
+ UnixSocket = _open_osfhandle(MaybeWinsocket, 0);
+#else
+ UnixSocket = MaybeWinsocket;
+#endif // _WIN32
+ return ListeningSocket{UnixSocket, SocketPath};
+}
+
+Expected<std::unique_ptr<raw_socket_stream>> ListeningSocket::accept() {
+ int AcceptFD;
+#ifdef _WIN32
+ SOCKET WinServerSock = _get_osfhandle(FD);
+ SOCKET WinAcceptSock = ::accept(WinServerSock, NULL, NULL);
+ AcceptFD = _open_osfhandle(WinAcceptSock, 0);
+#else
+ AcceptFD = ::accept(FD, NULL, NULL);
+#endif //_WIN32
+ if (AcceptFD == -1)
+ return llvm::make_error<StringError>(getLastSocketErrorCode(),
+ "Accept failed");
+ return std::make_unique<raw_socket_stream>(AcceptFD);
+}
+
+ListeningSocket::~ListeningSocket() {
+ if (FD == -1)
+ return;
+ ::close(FD);
+ unlink(SocketPath.c_str());
+}
+
+static Expected<int> GetSocketFD(StringRef SocketPath) {
+#ifdef _WIN32
+ SOCKET MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (MaybeWinsocket == INVALID_SOCKET) {
+#else
+ int MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (MaybeWinsocket == -1) {
+#endif // _WIN32
+ return llvm::make_error<StringError>(getLastSocketErrorCode(),
+ "Create socket failed");
+ }
+
+ struct sockaddr_un Addr;
+ memset(&Addr, 0, sizeof(Addr));
+ Addr.sun_family = AF_UNIX;
+ strncpy(Addr.sun_path, SocketPath.str().c_str(), sizeof(Addr.sun_path) - 1);
+
+ int status = connect(MaybeWinsocket, (struct sockaddr *)&Addr, sizeof(Addr));
+ if (status == -1) {
+ return llvm::make_error<StringError>(getLastSocketErrorCode(),
+ "Connect socket failed");
+ }
+#ifdef _WIN32
+ return _open_osfhandle(MaybeWinsocket, 0);
+#else
+ return MaybeWinsocket;
+#endif // _WIN32
+}
+
+raw_socket_stream::raw_socket_stream(int SocketFD)
+ : raw_fd_stream(SocketFD, true) {}
+
+Expected<std::unique_ptr<raw_socket_stream>>
+raw_socket_stream::createConnectedUnix(StringRef SocketPath) {
+#ifdef _WIN32
+ WSABalancer _;
+#endif // _WIN32
+ Expected<int> FD = GetSocketFD(SocketPath);
+ if (!FD)
+ return FD.takeError();
+ return std::make_unique<raw_socket_stream>(*FD);
+}
+
+raw_socket_stream::~raw_socket_stream() {}
+
+//===----------------------------------------------------------------------===//
+// raw_string_ostream
+//===----------------------------------------------------------------------===//
+
+void raw_string_ostream::write_impl(const char *Ptr, size_t Size) {
+ OS.append(Ptr, Size);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
index c600bcaab2b3..68f452039c9b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
@@ -622,6 +622,17 @@ def FeatureLdpAlignedOnly : SubtargetFeature<"ldp-aligned-only", "HasLdpAlignedO
def FeatureStpAlignedOnly : SubtargetFeature<"stp-aligned-only", "HasStpAlignedOnly",
"true", "In order to emit stp, first check if the store will be aligned to 2 * element_size">;
+// AArch64 2023 Architecture Extensions (v9.5-A)
+
+def FeatureCPA : SubtargetFeature<"cpa", "HasCPA", "true",
+ "Enable Armv9.5-A Checked Pointer Arithmetic (FEAT_CPA)">;
+
+def FeaturePAuthLR : SubtargetFeature<"pauth-lr", "HasPAuthLR",
+ "true", "Enable Armv9.5-A PAC enhancements (FEAT_PAuth_LR)">;
+
+def FeatureTLBIW : SubtargetFeature<"tlbiw", "HasTLBIW", "true",
+ "Enable ARMv9.5-A TLBI VMALL for Dirty State (FEAT_TLBIW)">;
+
//===----------------------------------------------------------------------===//
// Architectures.
//
@@ -692,7 +703,7 @@ def HasV9_4aOps : SubtargetFeature<
def HasV9_5aOps : SubtargetFeature<
"v9.5a", "HasV9_5aOps", "true", "Support ARM v9.5a instructions",
- [HasV9_4aOps]>;
+ [HasV9_4aOps, FeatureCPA]>;
def HasV8_0rOps : SubtargetFeature<
"v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions",
@@ -783,7 +794,7 @@ let F = [HasSVE2p1, HasSVE2p1_or_HasSME2, HasSVE2p1_or_HasSME2p1] in
def SVE2p1Unsupported : AArch64Unsupported;
def SVE2Unsupported : AArch64Unsupported {
- let F = !listconcat([HasSVE2, HasSVE2orSME, HasSSVE_FP8FMA, HasSMEF8F16,
+ let F = !listconcat([HasSVE2, HasSVE2orSME, HasSVE2orSME2, HasSSVE_FP8FMA, HasSMEF8F16,
HasSMEF8F32, HasSVE2AES, HasSVE2SHA3, HasSVE2SM4, HasSVE2BitPerm],
SVE2p1Unsupported.F);
}
@@ -797,7 +808,7 @@ let F = [HasSME2p1, HasSVE2p1_or_HasSME2p1] in
def SME2p1Unsupported : AArch64Unsupported;
def SME2Unsupported : AArch64Unsupported {
- let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA,
+ let F = !listconcat([HasSME2, HasSVE2orSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA,
HasSMEF8F16, HasSMEF8F32],
SME2p1Unsupported.F);
}
@@ -807,7 +818,7 @@ def SMEUnsupported : AArch64Unsupported {
SME2Unsupported.F);
}
-let F = [HasPAuth] in
+let F = [HasPAuth, HasPAuthLR] in
def PAUnsupported : AArch64Unsupported;
include "AArch64SchedA53.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td
index d6c00be80bd9..99f256b88782 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -134,6 +134,14 @@ def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn,
form_duplane,
shuf_to_ins]>;
+// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's
+def vector_unmerge_lowering : GICombineRule <
+ (defs root:$root),
+ (match (wip_match_opcode G_UNMERGE_VALUES):$root,
+ [{ return matchScalarizeVectorUnmerge(*${root}, MRI); }]),
+ (apply [{ applyScalarizeVectorUnmerge(*${root}, MRI, B); }])
+>;
+
def adjust_icmp_imm_matchdata :
GIDefMatchData<"std::pair<uint64_t, CmpInst::Predicate>">;
def adjust_icmp_imm : GICombineRule <
@@ -251,7 +259,8 @@ def AArch64PostLegalizerLowering
icmp_lowering, build_vector_lowering,
lower_vector_fcmp, form_truncstore,
vector_sext_inreg_to_shift,
- unmerge_ext_to_unmerge, lower_mull]> {
+ unmerge_ext_to_unmerge, lower_mull,
+ vector_unmerge_lowering]> {
}
// Post-legalization combines which are primarily optimizations.
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 757471d6a905..bb7f4d907ffd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -747,6 +747,15 @@ bool AArch64ExpandPseudo::expandSetTagLoop(
bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned Opc, unsigned N) {
+ assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI ||
+ Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) &&
+ "Unexpected opcode");
+ unsigned RState = (Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI)
+ ? RegState::Define
+ : 0;
+ unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI)
+ ? AArch64::zsub0
+ : AArch64::psub0;
const TargetRegisterInfo *TRI =
MBB.getParent()->getSubtarget().getRegisterInfo();
MachineInstr &MI = *MBBI;
@@ -756,9 +765,8 @@ bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
assert(ImmOffset >= -256 && ImmOffset < 256 &&
"Immediate spill offset out of range");
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
- .addReg(
- TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
- Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
+ .addReg(TRI->getSubReg(MI.getOperand(0).getReg(), sub0 + Offset),
+ RState)
.addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
.addImm(ImmOffset);
}
@@ -1492,12 +1500,16 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
case AArch64::STR_ZZXI:
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
+ case AArch64::STR_PPXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2);
case AArch64::LDR_ZZZZXI:
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
case AArch64::LDR_ZZZXI:
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
case AArch64::LDR_ZZXI:
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
+ case AArch64::LDR_PPXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2);
case AArch64::BLR_RVMARKER:
return expandCALL_RVMARKER(MBB, MBBI);
case AArch64::BLR_BTI:
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3882e843fb69..dffe69bdb900 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -13708,15 +13708,18 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
APInt SplatValue;
APInt SplatUndef;
- unsigned SplatBitSize;
+ unsigned SplatBitSize = 0;
bool HasAnyUndefs;
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
bool IsCnst = BVN && BVN->isConstantSplat(SplatValue, SplatUndef,
SplatBitSize, HasAnyUndefs);
- bool IsZero = IsCnst && SplatValue == 0;
- bool IsOne = IsCnst && SplatValue == 1;
- bool IsMinusOne = IsCnst && SplatValue.isAllOnes();
+
+ bool IsSplatUniform =
+ SrcVT.getVectorElementType().getSizeInBits() >= SplatBitSize;
+ bool IsZero = IsCnst && SplatValue == 0 && IsSplatUniform;
+ bool IsOne = IsCnst && SplatValue == 1 && IsSplatUniform;
+ bool IsMinusOne = IsCnst && SplatValue.isAllOnes() && IsSplatUniform;
if (SrcVT.getVectorElementType().isFloatingPoint()) {
switch (CC) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 68e87f491a09..cb63d8726744 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -2368,6 +2368,80 @@ class ClearAuth<bits<1> data, string asm>
let Inst{4-0} = Rd;
}
+// v9.5-A FEAT_PAuth_LR
+
+class SignAuthFixedRegs<bits<5> opcode2, bits<6> opcode, string asm>
+ : I<(outs), (ins), asm, "", "", []>,
+ Sched<[WriteI, ReadI]> {
+ let Inst{31} = 0b1; // sf
+ let Inst{30} = 0b1;
+ let Inst{29} = 0b0; // S
+ let Inst{28-21} = 0b11010110;
+ let Inst{20-16} = opcode2;
+ let Inst{15-10} = opcode;
+ let Inst{9-5} = 0b11111; // Rn
+ let Inst{4-0} = 0b11110; // Rd
+}
+
+def PAuthPCRelLabel16Operand : PCRelLabel<16> {
+ let Name = "PAuthPCRelLabel16";
+ let PredicateMethod = "isPAuthPCRelLabel16Operand";
+}
+def am_pauth_pcrel : Operand<OtherVT> {
+ let EncoderMethod = "getPAuthPCRelOpValue";
+ let DecoderMethod = "DecodePCRelLabel16";
+ let PrintMethod = "printAlignedLabel";
+ let ParserMatchClass = PAuthPCRelLabel16Operand;
+ let OperandType = "OPERAND_PCREL";
+}
+
+class SignAuthPCRel<bits<2> opc, string asm>
+ : I<(outs), (ins am_pauth_pcrel:$label), asm, "\t$label", "", []>,
+ Sched<[]> {
+ bits<16> label;
+ let Inst{31} = 0b1; // sf
+ let Inst{30-23} = 0b11100111;
+ let Inst{22-21} = opc;
+ let Inst{20-5} = label; // imm
+ let Inst{4-0} = 0b11111; // Rd
+}
+
+class SignAuthOneReg<bits<5> opcode2, bits<6> opcode, string asm>
+ : I<(outs), (ins GPR64:$Rn), asm, "\t$Rn", "", []>,
+ Sched<[]> {
+ bits<5> Rn;
+ let Inst{31} = 0b1; // sf
+ let Inst{30} = 0b1;
+ let Inst{29} = 0b0; // S
+ let Inst{28-21} = 0b11010110;
+ let Inst{20-16} = opcode2;
+ let Inst{15-10} = opcode;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = 0b11110; // Rd
+}
+
+class SignAuthReturnPCRel<bits<3> opc, bits<5> op2, string asm>
+ : I<(outs), (ins am_pauth_pcrel:$label), asm, "\t$label", "", []>,
+ Sched<[WriteAtomic]> {
+ bits<16> label;
+ let Inst{31-24} = 0b01010101;
+ let Inst{23-21} = opc;
+ let Inst{20-5} = label; // imm16
+ let Inst{4-0} = op2;
+}
+
+class SignAuthReturnReg<bits<6> op3, string asm>
+ : I<(outs), (ins GPR64common:$Rm), asm, "\t$Rm", "", []>,
+ Sched<[WriteAtomic]> {
+ bits<5> Rm;
+ let Inst{31-25} = 0b1101011;
+ let Inst{24-21} = 0b0010; // opc
+ let Inst{20-16} = 0b11111; // op2
+ let Inst{15-10} = op3;
+ let Inst{9-5} = 0b11111; // Rn
+ let Inst{4-0} = Rm; // op4 (Rm)
+}
+
// Base class for the Armv8.4-A 8 and 16-bit flag manipulation instructions
class BaseFlagManipulation<bit sf, bit sz, dag iops, string asm, string ops>
: I<(outs), iops, asm, ops, "", []>,
@@ -12446,6 +12520,58 @@ class SystemPXtI<bit L, string asm> :
BaseSYSPEncoding<L, asm, "\t$op1, $Cn, $Cm, $op2, $Rt", (outs),
(ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XSeqPairClassOperand:$Rt)>;
+//----------------------------------------------------------------------------
+// 2023 Armv9.5 Extensions
+//----------------------------------------------------------------------------
+
+//---
+// Checked Pointer Arithmetic (FEAT_CPA)
+//---
+
+def LSLImm3ShiftOperand : AsmOperandClass {
+ let SuperClasses = [ExtendOperandLSL64];
+ let Name = "LSLImm3Shift";
+ let RenderMethod = "addLSLImm3ShifterOperands";
+ let DiagnosticType = "AddSubLSLImm3ShiftLarge";
+}
+
+def lsl_imm3_shift_operand : Operand<i32> {
+ let PrintMethod = "printShifter";
+ let ParserMatchClass = LSLImm3ShiftOperand;
+}
+
+// Base CPA scalar add/subtract with lsl #imm3 shift
+class BaseAddSubCPA<bit isSub, string asm> : I<(outs GPR64sp:$Rd),
+ (ins GPR64sp:$Rn, GPR64:$Rm, lsl_imm3_shift_operand:$shift_imm),
+ asm, "\t$Rd, $Rn, $Rm$shift_imm", "", []>, Sched<[]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<3> shift_imm;
+ let Inst{31} = 0b1;
+ let Inst{30} = isSub;
+ let Inst{29-21} = 0b011010000;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = 0b001;
+ let Inst{12-10} = shift_imm;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+// Alias for CPA scalar add/subtract with no shift
+class AddSubCPAAlias<string asm, Instruction inst>
+ : InstAlias<asm#"\t$Rd, $Rn, $Rm",
+ (inst GPR64sp:$Rd, GPR64sp:$Rn, GPR64:$Rm, 0)>;
+
+multiclass AddSubCPA<bit isSub, string asm> {
+ def _shift : BaseAddSubCPA<isSub, asm>;
+ def _noshift : AddSubCPAAlias<asm, !cast<Instruction>(NAME#"_shift")>;
+}
+
+class MulAccumCPA<bit isSub, string asm>
+ : BaseMulAccum<isSub, 0b011, GPR64, GPR64, asm, []>, Sched<[]> {
+ let Inst{31} = 0b1;
+}
//----------------------------------------------------------------------------
// Allow the size specifier tokens to be upper case, not just lower.
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 7d71c316bcb0..1cfbf4737a6f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -3771,6 +3771,13 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
MinOffset = -256;
MaxOffset = 255;
break;
+ case AArch64::LDR_PPXI:
+ case AArch64::STR_PPXI:
+ Scale = TypeSize::getScalable(2);
+ Width = TypeSize::getScalable(2 * 2);
+ MinOffset = -256;
+ MaxOffset = 254;
+ break;
case AArch64::LDR_ZXI:
case AArch64::STR_ZXI:
Scale = TypeSize::getScalable(16);
@@ -4089,6 +4096,16 @@ AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) {
return MI.getOperand(Idx);
}
+const MachineOperand &
+AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case AArch64::LDRBBroX:
+ return MI.getOperand(4);
+ }
+}
+
static const TargetRegisterClass *getRegClass(const MachineInstr &MI,
Register Reg) {
if (MI.getParent() == nullptr)
@@ -4804,6 +4821,10 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
assert(SrcReg != AArch64::WSP);
} else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
Opc = AArch64::STRSui;
+ else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
+ Opc = AArch64::STR_PPXI;
+ StackID = TargetStackID::ScalableVector;
+ }
break;
case 8:
if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
@@ -4980,6 +5001,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
assert(DestReg != AArch64::WSP);
} else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
Opc = AArch64::LDRSui;
+ else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
+ Opc = AArch64::LDR_PPXI;
+ StackID = TargetStackID::ScalableVector;
+ }
break;
case 8:
if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
@@ -8779,12 +8804,23 @@ AArch64InstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT,
// Don't outline anything used for return address signing. The outlined
// function will get signed later if needed
switch (MI.getOpcode()) {
+ case AArch64::PACM:
case AArch64::PACIASP:
case AArch64::PACIBSP:
+ case AArch64::PACIASPPC:
+ case AArch64::PACIBSPPC:
case AArch64::AUTIASP:
case AArch64::AUTIBSP:
+ case AArch64::AUTIASPPCi:
+ case AArch64::AUTIASPPCr:
+ case AArch64::AUTIBSPPCi:
+ case AArch64::AUTIBSPPCr:
case AArch64::RETAA:
case AArch64::RETAB:
+ case AArch64::RETAASPPCi:
+ case AArch64::RETAASPPCr:
+ case AArch64::RETABSPPCi:
+ case AArch64::RETABSPPCr:
case AArch64::EMITBKEY:
case AArch64::PAUTH_PROLOGUE:
case AArch64::PAUTH_EPILOGUE:
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 6526f6740747..db24a19fe5f8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -111,6 +111,9 @@ public:
/// Returns the immediate offset operator of a load/store.
static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI);
+ /// Returns the shift amount operator of a load/store.
+ static const MachineOperand &getLdStAmountOp(const MachineInstr &MI);
+
/// Returns whether the instruction is FP or NEON.
static bool isFpOrNEON(const MachineInstr &MI);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 44b0337fe787..62b2bf490f37 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -61,6 +61,9 @@ def HasLOR : Predicate<"Subtarget->hasLOR()">,
def HasPAuth : Predicate<"Subtarget->hasPAuth()">,
AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">;
+def HasPAuthLR : Predicate<"Subtarget->hasPAuthLR()">,
+ AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">;
+
def HasJS : Predicate<"Subtarget->hasJS()">,
AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">;
@@ -289,6 +292,8 @@ def HasCHK : Predicate<"Subtarget->hasCHK()">,
AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">;
def HasGCS : Predicate<"Subtarget->hasGCS()">,
AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">;
+def HasCPA : Predicate<"Subtarget->hasCPA()">,
+ AssemblerPredicateWithAll<(all_of FeatureCPA), "cpa">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
@@ -1644,6 +1649,47 @@ let Predicates = [HasPAuth] in {
}
+// v9.5-A pointer authentication extensions
+
+// Always accept "pacm" as an alias for "hint #39", but don't emit it when
+// disassembling if we don't have the pauth-lr feature.
+let CRm = 0b0100 in {
+ def PACM : SystemNoOperands<0b111, "hint\t#39">;
+}
+def : InstAlias<"pacm", (PACM), 0>;
+
+let Predicates = [HasPAuthLR] in {
+ let Defs = [LR], Uses = [LR, SP] in {
+ // opcode2, opcode, asm
+ def PACIASPPC : SignAuthFixedRegs<0b00001, 0b101000, "paciasppc">;
+ def PACIBSPPC : SignAuthFixedRegs<0b00001, 0b101001, "pacibsppc">;
+ def PACNBIASPPC : SignAuthFixedRegs<0b00001, 0b100000, "pacnbiasppc">;
+ def PACNBIBSPPC : SignAuthFixedRegs<0b00001, 0b100001, "pacnbibsppc">;
+ // opc, asm
+ def AUTIASPPCi : SignAuthPCRel<0b00, "autiasppc">;
+ def AUTIBSPPCi : SignAuthPCRel<0b01, "autibsppc">;
+ // opcode2, opcode, asm
+ def AUTIASPPCr : SignAuthOneReg<0b00001, 0b100100, "autiasppc">;
+ def AUTIBSPPCr : SignAuthOneReg<0b00001, 0b100101, "autibsppc">;
+ // opcode2, opcode, asm
+ def PACIA171615 : SignAuthFixedRegs<0b00001, 0b100010, "pacia171615">;
+ def PACIB171615 : SignAuthFixedRegs<0b00001, 0b100011, "pacib171615">;
+ def AUTIA171615 : SignAuthFixedRegs<0b00001, 0b101110, "autia171615">;
+ def AUTIB171615 : SignAuthFixedRegs<0b00001, 0b101111, "autib171615">;
+ }
+
+ let Uses = [LR, SP], isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ // opc, op2, asm
+ def RETAASPPCi : SignAuthReturnPCRel<0b000, 0b11111, "retaasppc">;
+ def RETABSPPCi : SignAuthReturnPCRel<0b001, 0b11111, "retabsppc">;
+ // op3, asm
+ def RETAASPPCr : SignAuthReturnReg<0b000010, "retaasppc">;
+ def RETABSPPCr : SignAuthReturnReg<0b000011, "retabsppc">;
+ }
+ def : InstAlias<"pacm", (PACM), 1>;
+}
+
+
// v8.3a floating point conversion for javascript
let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in
def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
@@ -6480,23 +6526,23 @@ def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))),
// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))),
(EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>;
-def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, 0)))),
+def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, (i64 0))))),
(EXTRACT_SUBREG V128:$src, ssub)>;
def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))),
(EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>;
-def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, 0)))),
+def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, (i64 0))))),
(EXTRACT_SUBREG V128:$src, dsub)>;
// Floating point vector extractions are codegen'd as either a sequence of
// subregister extractions, or a MOV (aka DUP here) if
// the lane number is anything other than zero.
-def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
+def : Pat<(f64 (vector_extract (v2f64 V128:$Rn), (i64 0))),
(f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
-def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
+def : Pat<(f32 (vector_extract (v4f32 V128:$Rn), (i64 0))),
(f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
-def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
+def : Pat<(f16 (vector_extract (v8f16 V128:$Rn), (i64 0))),
(f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
-def : Pat<(vector_extract (v8bf16 V128:$Rn), 0),
+def : Pat<(bf16 (vector_extract (v8bf16 V128:$Rn), (i64 0))),
(bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
@@ -9402,6 +9448,10 @@ let Predicates = [HasD128] in {
}
}
+//===----------------------------===//
+// 2023 Architecture Extensions:
+//===----------------------------===//
+
let Predicates = [HasFP8] in {
defm F1CVTL : SIMDMixedTwoVectorFP8<0b00, "f1cvtl">;
defm F2CVTL : SIMDMixedTwoVectorFP8<0b01, "f2cvtl">;
@@ -9443,6 +9493,19 @@ let Predicates = [HasFP8DOT4] in {
defm FDOT : SIMDThreeSameVectorDOT4<"fdot">;
} // End let Predicates = [HasFP8DOT4]
+//===----------------------------------------------------------------------===//
+// Checked Pointer Arithmetic (FEAT_CPA)
+//===----------------------------------------------------------------------===//
+let Predicates = [HasCPA] in {
+ // Scalar add/subtract
+ defm ADDPT : AddSubCPA<0, "addpt">;
+ defm SUBPT : AddSubCPA<1, "subpt">;
+
+ // Scalar multiply-add/subtract
+ def MADDPT : MulAccumCPA<0, "maddpt">;
+ def MSUBPT : MulAccumCPA<1, "msubpt">;
+}
+
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
include "AArch64SMEInstrInfo.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index dc6d5b8950c3..b435b3ce03e7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -62,6 +62,8 @@ STATISTIC(NumUnscaledPairCreated,
"Number of load/store from unscaled generated");
STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
+STATISTIC(NumConstOffsetFolded,
+ "Number of const offset of index address folded");
DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
"Controls which pairs are considered for renaming");
@@ -75,6 +77,11 @@ static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
cl::Hidden);
+// The LdStConstLimit limits how far we search for const offset instructions
+// when we form index address load/store instructions.
+static cl::opt<unsigned> LdStConstLimit("aarch64-load-store-const-scan-limit",
+ cl::init(10), cl::Hidden);
+
// Enable register renaming to find additional store pairing opportunities.
static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
cl::init(true), cl::Hidden);
@@ -171,6 +178,13 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
int UnscaledOffset, unsigned Limit);
+ // Scan the instruction list to find a register assigned with a const
+ // value that can be combined with the current instruction (a load or store)
+ // using base addressing with writeback. Scan forwards.
+ MachineBasicBlock::iterator
+ findMatchingConstOffsetBackward(MachineBasicBlock::iterator I, unsigned Limit,
+ unsigned &Offset);
+
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
// pre or post indexed addressing with writeback. Scan backwards.
@@ -182,11 +196,19 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
unsigned BaseReg, int Offset);
+ bool isMatchingMovConstInsn(MachineInstr &MemMI, MachineInstr &MI,
+ unsigned IndexReg, unsigned &Offset);
+
// Merge a pre- or post-index base register update into a ld/st instruction.
MachineBasicBlock::iterator
mergeUpdateInsn(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Update, bool IsPreIdx);
+ MachineBasicBlock::iterator
+ mergeConstOffsetInsn(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Update, unsigned Offset,
+ int Scale);
+
// Find and merge zero store instructions.
bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
@@ -199,6 +221,9 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Find and merge a base register updates before or after a ld/st instruction.
bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
+ // Find and merge a index ldr/st instructions into a base ld/st instruction.
+ bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale);
+
bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
bool runOnMachineFunction(MachineFunction &Fn) override;
@@ -481,6 +506,16 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
}
}
+static unsigned getBaseAddressOpcode(unsigned Opc) {
+ // TODO: Add more index address loads/stores.
+ switch (Opc) {
+ default:
+ llvm_unreachable("Opcode has no base address equivalent!");
+ case AArch64::LDRBBroX:
+ return AArch64::LDRBBui;
+ }
+}
+
static unsigned getPostIndexedOpcode(unsigned Opc) {
switch (Opc) {
default:
@@ -722,6 +757,20 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) {
}
}
+// Make sure this is a reg+reg Ld/St
+static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) {
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ default:
+ return false;
+ // Scaled instructions.
+ // TODO: Add more index address loads/stores.
+ case AArch64::LDRBBroX:
+ Scale = 1;
+ return true;
+ }
+}
+
static bool isRewritableImplicitDef(unsigned Opc) {
switch (Opc) {
default:
@@ -2018,6 +2067,63 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
return NextI;
}
+MachineBasicBlock::iterator
+AArch64LoadStoreOpt::mergeConstOffsetInsn(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Update,
+ unsigned Offset, int Scale) {
+ assert((Update->getOpcode() == AArch64::MOVKWi) &&
+ "Unexpected const mov instruction to merge!");
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator NextI = next_nodbg(I, E);
+ MachineBasicBlock::iterator PrevI = prev_nodbg(Update, E);
+ MachineInstr &MemMI = *I;
+ unsigned Mask = (1 << 12) * Scale - 1;
+ unsigned Low = Offset & Mask;
+ unsigned High = Offset - Low;
+ Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
+ Register IndexReg = AArch64InstrInfo::getLdStOffsetOp(MemMI).getReg();
+ MachineInstrBuilder AddMIB, MemMIB;
+
+ // Add IndexReg, BaseReg, High (the BaseReg may be SP)
+ AddMIB =
+ BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri))
+ .addDef(IndexReg)
+ .addUse(BaseReg)
+ .addImm(High >> 12) // shifted value
+ .addImm(12); // shift 12
+ (void)AddMIB;
+ // Ld/St DestReg, IndexReg, Imm12
+ unsigned NewOpc = getBaseAddressOpcode(I->getOpcode());
+ MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
+ .add(getLdStRegOp(MemMI))
+ .add(AArch64InstrInfo::getLdStOffsetOp(MemMI))
+ .addImm(Low / Scale)
+ .setMemRefs(I->memoperands())
+ .setMIFlags(I->mergeFlagsWith(*Update));
+ (void)MemMIB;
+
+ ++NumConstOffsetFolded;
+ LLVM_DEBUG(dbgs() << "Creating base address load/store.\n");
+ LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
+ LLVM_DEBUG(PrevI->print(dbgs()));
+ LLVM_DEBUG(dbgs() << " ");
+ LLVM_DEBUG(Update->print(dbgs()));
+ LLVM_DEBUG(dbgs() << " ");
+ LLVM_DEBUG(I->print(dbgs()));
+ LLVM_DEBUG(dbgs() << " with instruction:\n ");
+ LLVM_DEBUG(((MachineInstr *)AddMIB)->print(dbgs()));
+ LLVM_DEBUG(dbgs() << " ");
+ LLVM_DEBUG(((MachineInstr *)MemMIB)->print(dbgs()));
+ LLVM_DEBUG(dbgs() << "\n");
+
+ // Erase the old instructions for the block.
+ I->eraseFromParent();
+ PrevI->eraseFromParent();
+ Update->eraseFromParent();
+
+ return NextI;
+}
+
bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
MachineInstr &MI,
unsigned BaseReg, int Offset) {
@@ -2065,6 +2171,31 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
return false;
}
+bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
+ MachineInstr &MI,
+ unsigned IndexReg,
+ unsigned &Offset) {
+ // The update instruction source and destination register must be the
+ // same as the load/store index register.
+ if (MI.getOpcode() == AArch64::MOVKWi &&
+ TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) {
+
+ // movz + movk hold a large offset of a Ld/St instruction.
+ MachineBasicBlock::iterator B = MI.getParent()->begin();
+ MachineBasicBlock::iterator MBBI = &MI;
+ MBBI = prev_nodbg(MBBI, B);
+ MachineInstr &MovzMI = *MBBI;
+ if (MovzMI.getOpcode() == AArch64::MOVZWi) {
+ unsigned Low = MovzMI.getOperand(1).getImm();
+ unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm();
+ Offset = High + Low;
+ // 12-bit optionally shifted immediates are legal for adds.
+ return Offset >> 24 == 0;
+ }
+ }
+ return false;
+}
+
MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
MachineBasicBlock::iterator E = I->getParent()->end();
@@ -2220,6 +2351,60 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
return E;
}
+MachineBasicBlock::iterator
+AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
+ MachineBasicBlock::iterator I, unsigned Limit, unsigned &Offset) {
+ MachineBasicBlock::iterator B = I->getParent()->begin();
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineInstr &MemMI = *I;
+ MachineBasicBlock::iterator MBBI = I;
+
+ // If the load is the first instruction in the block, there's obviously
+ // not any matching load or store.
+ if (MBBI == B)
+ return E;
+
+ // Make sure the IndexReg is killed and the shift amount is zero.
+ // TODO: Relex this restriction to extend, simplify processing now.
+ if (!AArch64InstrInfo::getLdStOffsetOp(MemMI).isKill() ||
+ !AArch64InstrInfo::getLdStAmountOp(MemMI).isImm() ||
+ (AArch64InstrInfo::getLdStAmountOp(MemMI).getImm() != 0))
+ return E;
+
+ Register IndexReg = AArch64InstrInfo::getLdStOffsetOp(MemMI).getReg();
+
+ // Track which register units have been modified and used between the first
+ // insn (inclusive) and the second insn.
+ ModifiedRegUnits.clear();
+ UsedRegUnits.clear();
+ unsigned Count = 0;
+ do {
+ MBBI = prev_nodbg(MBBI, B);
+ MachineInstr &MI = *MBBI;
+
+ // Don't count transient instructions towards the search limit since there
+ // may be different numbers of them if e.g. debug information is present.
+ if (!MI.isTransient())
+ ++Count;
+
+ // If we found a match, return it.
+ if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) {
+ return MBBI;
+ }
+
+ // Update the status of what the instruction clobbered and used.
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
+
+ // Otherwise, if the index register is used or modified, we have no match,
+ // so return early.
+ if (!ModifiedRegUnits.available(IndexReg) ||
+ !UsedRegUnits.available(IndexReg))
+ return E;
+
+ } while (MBBI != B && Count < Limit);
+ return E;
+}
+
bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
@@ -2404,6 +2589,34 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
return false;
}
+bool AArch64LoadStoreOpt::tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI,
+ int Scale) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock::iterator E = MI.getParent()->end();
+ MachineBasicBlock::iterator Update;
+
+ // Don't know how to handle unscaled pre/post-index versions below, so bail.
+ if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
+ return false;
+
+ // Look back to try to find a const offset for index LdSt instruction. For
+ // example,
+ // mov x8, #LargeImm ; = a * (1<<12) + imm12
+ // ldr x1, [x0, x8]
+ // merged into:
+ // add x8, x0, a * (1<<12)
+ // ldr x1, [x8, imm12]
+ unsigned Offset;
+ Update = findMatchingConstOffsetBackward(MBBI, LdStConstLimit, Offset);
+ if (Update != E && (Offset & (Scale - 1)) == 0) {
+ // Merge the imm12 into the ld/st.
+ MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale);
+ return true;
+ }
+
+ return false;
+}
+
bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
bool EnableNarrowZeroStOpt) {
@@ -2482,6 +2695,22 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
++MBBI;
}
+ // 5) Find a register assigned with a const value that can be combined with
+ // into the load or store. e.g.,
+ // mov x8, #LargeImm ; = a * (1<<12) + imm12
+ // ldr x1, [x0, x8]
+ // ; becomes
+ // add x8, x0, a * (1<<12)
+ // ldr x1, [x8, imm12]
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ MBBI != E;) {
+ int Scale;
+ if (isMergeableIndexLdSt(*MBBI, Scale) && tryToMergeIndexLdSt(MBBI, Scale))
+ Modified = true;
+ else
+ ++MBBI;
+ }
+
return Modified;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
index 9da59ef2a806..1a8c71888a85 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -93,16 +93,24 @@ AArch64FunctionInfo::AArch64FunctionInfo(const Function &F,
// TODO: skip functions that have no instrumented allocas for optimization
IsMTETagged = F.hasFnAttribute(Attribute::SanitizeMemTag);
- if (!F.hasFnAttribute("branch-target-enforcement")) {
- if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
- F.getParent()->getModuleFlag("branch-target-enforcement")))
- BranchTargetEnforcement = BTE->getZExtValue();
- } else {
- const StringRef BTIEnable =
- F.getFnAttribute("branch-target-enforcement").getValueAsString();
- assert(BTIEnable == "true" || BTIEnable == "false");
- BranchTargetEnforcement = BTIEnable == "true";
- }
+ // BTI/PAuthLR may be set either on the function or the module. Set Bool from
+ // either the function attribute or module attribute, depending on what is
+ // set.
+ // Note: the module attributed is numeric (0 or 1) but the function attribute
+ // is stringy ("true" or "false").
+ auto TryFnThenModule = [&](StringRef AttrName, bool &Bool) {
+ if (F.hasFnAttribute(AttrName)) {
+ const StringRef V = F.getFnAttribute(AttrName).getValueAsString();
+ assert(V.equals_insensitive("true") || V.equals_insensitive("false"));
+ Bool = V.equals_insensitive("true");
+ } else if (const auto *ModVal = mdconst::extract_or_null<ConstantInt>(
+ F.getParent()->getModuleFlag(AttrName))) {
+ Bool = ModVal->getZExtValue();
+ }
+ };
+
+ TryFnThenModule("branch-target-enforcement", BranchTargetEnforcement);
+ TryFnThenModule("branch-protection-pauth-lr", BranchProtectionPAuthLR);
// The default stack probe size is 4096 if the function has no
// stack-probe-size attribute. This is a safe default because it is the
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 219f83cfd32e..cd4a18bfbc23 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
+#include "llvm/MC/MCSymbol.h"
#include <cassert>
#include <optional>
@@ -164,10 +165,21 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// SignWithBKey modifies the default PAC-RET mode to signing with the B key.
bool SignWithBKey = false;
+ /// SigningInstrOffset captures the offset of the PAC-RET signing instruction
+ /// within the prologue, so it can be re-used for authentication in the
+ /// epilogue when using PC as a second salt (FEAT_PAuth_LR)
+ MCSymbol *SignInstrLabel = nullptr;
+
/// BranchTargetEnforcement enables placing BTI instructions at potential
/// indirect branch destinations.
bool BranchTargetEnforcement = false;
+ /// Indicates that SP signing should be diversified with PC as-per PAuthLR.
+ /// This is set by -mbranch-protection and will emit NOP instructions unless
+ /// the subtarget feature +pauthlr is also used (in which case non-NOP
+ /// instructions are emitted).
+ bool BranchProtectionPAuthLR = false;
+
/// Whether this function has an extended frame record [Ctx, FP, LR]. If so,
/// bit 60 of the in-memory FP will be 1 to enable other tools to detect the
/// extended record.
@@ -436,10 +448,16 @@ public:
bool needsShadowCallStackPrologueEpilogue(MachineFunction &MF) const;
bool shouldSignWithBKey() const { return SignWithBKey; }
+
+ MCSymbol *getSigningInstrLabel() const { return SignInstrLabel; }
+ void setSigningInstrLabel(MCSymbol *Label) { SignInstrLabel = Label; }
+
bool isMTETagged() const { return IsMTETagged; }
bool branchTargetEnforcement() const { return BranchTargetEnforcement; }
+ bool branchProtectionPAuthLR() const { return BranchProtectionPAuthLR; }
+
void setHasSwiftAsyncContext(bool HasContext) {
HasSwiftAsyncContext = HasContext;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
index 7576d2a899d1..7509afaeb5fe 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
@@ -60,11 +60,35 @@ FunctionPass *llvm::createAArch64PointerAuthPass() {
char AArch64PointerAuth::ID = 0;
+// Where PAuthLR support is not known at compile time, it is supported using
+// PACM. PACM is in the hint space so has no effect when PAuthLR is not
+// supported by the hardware, but will alter the behaviour of PACI*SP, AUTI*SP
+// and RETAA/RETAB if the hardware supports PAuthLR.
+static void BuildPACM(const AArch64Subtarget &Subtarget, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL,
+ MachineInstr::MIFlag Flags, MCSymbol *PACSym = nullptr) {
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ auto &MFnI = *MBB.getParent()->getInfo<AArch64FunctionInfo>();
+
+ // ADR X16,<address_of_PACIASP>
+ if (PACSym) {
+ assert(Flags == MachineInstr::FrameDestroy);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADR))
+ .addReg(AArch64::X16, RegState::Define)
+ .addSym(PACSym);
+ }
+
+ // Only emit PACM if -mbranch-protection has +pc and the target does not
+ // have feature +pauth-lr.
+ if (MFnI.branchProtectionPAuthLR() && !Subtarget.hasPAuthLR())
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM)).setMIFlag(Flags);
+}
+
void AArch64PointerAuth::signLR(MachineFunction &MF,
MachineBasicBlock::iterator MBBI) const {
- const AArch64FunctionInfo *MFnI = MF.getInfo<AArch64FunctionInfo>();
- bool UseBKey = MFnI->shouldSignWithBKey();
- bool EmitCFI = MFnI->needsDwarfUnwindInfo(MF);
+ auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
+ bool UseBKey = MFnI.shouldSignWithBKey();
+ bool EmitCFI = MFnI.needsDwarfUnwindInfo(MF);
bool NeedsWinCFI = MF.hasWinCFI();
MachineBasicBlock &MBB = *MBBI->getParent();
@@ -77,11 +101,29 @@ void AArch64PointerAuth::signLR(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
}
+ // PAuthLR authentication instructions need to know the value of PC at the
+ // point of signing (PACI*).
+ if (MFnI.branchProtectionPAuthLR()) {
+ MCSymbol *PACSym = MF.getMMI().getContext().createTempSymbol();
+ MFnI.setSigningInstrLabel(PACSym);
+ }
+
// No SEH opcode for this one; it doesn't materialize into an
// instruction on Windows.
- BuildMI(MBB, MBBI, DL,
- TII->get(UseBKey ? AArch64::PACIBSP : AArch64::PACIASP))
- .setMIFlag(MachineInstr::FrameSetup);
+ if (MFnI.branchProtectionPAuthLR() && Subtarget->hasPAuthLR()) {
+ BuildMI(MBB, MBBI, DL,
+ TII->get(MFnI.shouldSignWithBKey() ? AArch64::PACIBSPPC
+ : AArch64::PACIASPPC))
+ .setMIFlag(MachineInstr::FrameSetup)
+ ->setPreInstrSymbol(MF, MFnI.getSigningInstrLabel());
+ } else {
+ BuildPACM(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL,
+ TII->get(MFnI.shouldSignWithBKey() ? AArch64::PACIBSP
+ : AArch64::PACIASP))
+ .setMIFlag(MachineInstr::FrameSetup)
+ ->setPreInstrSymbol(MF, MFnI.getSigningInstrLabel());
+ }
if (EmitCFI) {
unsigned CFIIndex =
@@ -118,15 +160,37 @@ void AArch64PointerAuth::authenticateLR(
// DW_CFA_AARCH64_negate_ra_state can't be emitted.
bool TerminatorIsCombinable =
TI != MBB.end() && TI->getOpcode() == AArch64::RET;
+ MCSymbol *PACSym = MFnI->getSigningInstrLabel();
+
if (Subtarget->hasPAuth() && TerminatorIsCombinable && !NeedsWinCFI &&
!MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) {
- unsigned CombinedRetOpcode = UseBKey ? AArch64::RETAB : AArch64::RETAA;
- BuildMI(MBB, TI, DL, TII->get(CombinedRetOpcode)).copyImplicitOps(*TI);
+ if (MFnI->branchProtectionPAuthLR() && Subtarget->hasPAuthLR()) {
+ assert(PACSym && "No PAC instruction to refer to");
+ BuildMI(MBB, TI, DL,
+ TII->get(UseBKey ? AArch64::RETABSPPCi : AArch64::RETAASPPCi))
+ .addSym(PACSym)
+ .copyImplicitOps(*MBBI)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ } else {
+ BuildPACM(*Subtarget, MBB, TI, DL, MachineInstr::FrameDestroy, PACSym);
+ BuildMI(MBB, TI, DL, TII->get(UseBKey ? AArch64::RETAB : AArch64::RETAA))
+ .copyImplicitOps(*MBBI)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ }
MBB.erase(TI);
} else {
- unsigned AutOpcode = UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP;
- BuildMI(MBB, MBBI, DL, TII->get(AutOpcode))
- .setMIFlag(MachineInstr::FrameDestroy);
+ if (MFnI->branchProtectionPAuthLR() && Subtarget->hasPAuthLR()) {
+ assert(PACSym && "No PAC instruction to refer to");
+ BuildMI(MBB, MBBI, DL,
+ TII->get(UseBKey ? AArch64::AUTIBSPPCi : AArch64::AUTIASPPCi))
+ .addSym(PACSym)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ } else {
+ BuildPACM(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy, PACSym);
+ BuildMI(MBB, MBBI, DL,
+ TII->get(UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ }
if (EmitAsyncCFI) {
unsigned CFIIndex =
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 50527e08a061..344a15389063 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -316,6 +316,26 @@ def AArch64ssra : PatFrags<(ops node:$op1, node:$op2, node:$op3),
[(int_aarch64_sve_ssra node:$op1, node:$op2, node:$op3),
(add node:$op1, (AArch64asr_p (SVEAnyPredicate), node:$op2, (SVEShiftSplatImmR (i32 node:$op3))))]>;
+// Replace pattern min(max(v1,v2),v3) by clamp
+def AArch64sclamp : PatFrags<(ops node:$Zd, node:$Zn, node:$Zm),
+ [(int_aarch64_sve_sclamp node:$Zd, node:$Zn, node:$Zm),
+ (AArch64smin_p (SVEAllActive),
+ (AArch64smax_p (SVEAllActive), node:$Zd, node:$Zn),
+ node:$Zm)
+ ]>;
+def AArch64uclamp : PatFrags<(ops node:$Zd, node:$Zn, node:$Zm),
+ [(int_aarch64_sve_uclamp node:$Zd, node:$Zn, node:$Zm),
+ (AArch64umin_p (SVEAllActive),
+ (AArch64umax_p (SVEAllActive), node:$Zd, node:$Zn),
+ node:$Zm)
+ ]>;
+def AArch64fclamp : PatFrags<(ops node:$Zd, node:$Zn, node:$Zm),
+ [(int_aarch64_sve_fclamp node:$Zd, node:$Zn, node:$Zm),
+ (AArch64fminnm_p (SVEAllActive),
+ (AArch64fmaxnm_p (SVEAllActive), node:$Zd, node:$Zn),
+ node:$Zm)
+ ]>;
+
def SDT_AArch64FCVT : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
SDTCVecEltisVT<1,i1>
@@ -2378,11 +2398,13 @@ let Predicates = [HasSVEorSME] in {
def LDR_ZZXI : Pseudo<(outs ZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def LDR_PPXI : Pseudo<(outs PPR2:$pp), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
}
let mayStore = 1, hasSideEffects = 0 in {
def STR_ZZXI : Pseudo<(outs), (ins ZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def STR_PPXI : Pseudo<(outs), (ins PPR2:$pp, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
}
let AddedComplexity = 1 in {
@@ -3802,8 +3824,8 @@ let Predicates = [HasSVE2BitPerm] in {
let Predicates = [HasSVE2p1_or_HasSME] in {
defm REVD_ZPmZ : sve2_int_perm_revd<"revd", AArch64revd_mt>;
-defm SCLAMP_ZZZ : sve2_clamp<"sclamp", 0b0, int_aarch64_sve_sclamp>;
-defm UCLAMP_ZZZ : sve2_clamp<"uclamp", 0b1, int_aarch64_sve_uclamp>;
+defm SCLAMP_ZZZ : sve2_clamp<"sclamp", 0b0, AArch64sclamp>;
+defm UCLAMP_ZZZ : sve2_clamp<"uclamp", 0b1, AArch64uclamp>;
defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel", int_aarch64_sve_psel>;
} // End HasSVE2p1_or_HasSME
@@ -3813,7 +3835,7 @@ defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel", int_aarch64_sve_psel>;
//===----------------------------------------------------------------------===//
let Predicates = [HasSVE2p1_or_HasSME2] in {
-defm FCLAMP_ZZZ : sve2p1_fclamp<"fclamp", int_aarch64_sve_fclamp>;
+defm FCLAMP_ZZZ : sve2p1_fclamp<"fclamp", AArch64fclamp>;
defm FDOT_ZZZ_S : sve_float_dot<0b0, 0b0, ZPR32, ZPR16, "fdot", nxv8f16, int_aarch64_sve_fdot_x2>;
defm FDOT_ZZZI_S : sve_float_dot_indexed<0b0, 0b00, ZPR16, ZPR3b16, "fdot", nxv8f16, int_aarch64_sve_fdot_lane_x2>;
@@ -4003,10 +4025,10 @@ def : InstAlias<"pfalse\t$Pd", (PFALSE PNRasPPR8:$Pd), 0>;
} // End HasSVE2p1_or_HasSME2
//===----------------------------------------------------------------------===//
-// SVE2.1 non-widening BFloat16 to BFloat16 instructions
+// Non-widening BFloat16 to BFloat16 instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasSVE2p1, HasB16B16, UseExperimentalZeroingPseudos] in {
+let Predicates = [HasSVE2orSME2, HasB16B16, UseExperimentalZeroingPseudos] in {
defm BFADD_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fadd>;
defm BFSUB_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fsub>;
defm BFMUL_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fmul>;
@@ -4014,9 +4036,9 @@ defm BFMAXNM_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fmaxnm>;
defm BFMINNM_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fminnm>;
defm BFMIN_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fmin>;
defm BFMAX_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fmax>;
-} //HasSVE2p1_or_HasSME2p1, HasB16B16, UseExperimentalZeroingPseudos
+} // HasSVE2orSME2, HasB16B16, UseExperimentalZeroingPseudos
-let Predicates = [HasSVE2p1, HasB16B16] in {
+let Predicates = [HasSVE2orSME2, HasB16B16] in {
defm BFMLA_ZPmZZ : sve_fp_3op_p_zds_a_bf<0b00, "bfmla", "BFMLA_ZPZZZ", AArch64fmla_m1>;
defm BFMLS_ZPmZZ : sve_fp_3op_p_zds_a_bf<0b01, "bfmls", "BFMLS_ZPZZZ", AArch64fmls_m1>;
@@ -4055,8 +4077,8 @@ defm BFMINNM_ZPZZ : sve2p1_bf_bin_pred_zds<AArch64fminnm_p>;
defm BFMUL_ZZZI : sve2p1_fp_bfmul_by_indexed_elem<"bfmul", int_aarch64_sve_fmul_lane>;
-defm BFCLAMP_ZZZ : sve2p1_bfclamp<"bfclamp", int_aarch64_sve_fclamp>;
-} // End HasSVE2p1_or_HasSME2p1, HasB16B16
+defm BFCLAMP_ZZZ : sve2p1_bfclamp<"bfclamp", AArch64fclamp>;
+} // End HasSVE2orSME2, HasB16B16
//===----------------------------------------------------------------------===//
@@ -4163,3 +4185,24 @@ let Predicates = [HasSVE2orSME2, HasLUT] in {
// LUTI4 (two contiguous registers)
defm LUTI4_Z2ZZI : sve2_luti4_vector_vg2_index<"luti4">;
} // End HasSVE2orSME2, HasLUT
+
+//===----------------------------------------------------------------------===//
+// Checked Pointer Arithmetic (FEAT_CPA)
+//===----------------------------------------------------------------------===//
+let Predicates = [HasSVE, HasCPA] in {
+ // Add/subtract (vectors, unpredicated)
+ def ADD_ZZZ_CPA : sve_int_bin_cons_arit_0<0b11, 0b010, "addpt", ZPR64>;
+ def SUB_ZZZ_CPA : sve_int_bin_cons_arit_0<0b11, 0b011, "subpt", ZPR64>;
+
+ // Add/subtract (vectors, predicated)
+ let DestructiveInstType = DestructiveBinaryComm in {
+ def ADD_ZPmZ_CPA : sve_int_bin_pred_arit_log<0b11, 0b00, 0b100, "addpt", ZPR64>;
+ def SUB_ZPmZ_CPA : sve_int_bin_pred_arit_log<0b11, 0b00, 0b101, "subpt", ZPR64>;
+ }
+
+ // Multiply-add vectors, writing multiplicand
+ def MAD_CPA : sve_int_mad_cpa<"madpt">;
+
+ // Multiply-add vectors, writing addend
+ def MLA_CPA : sve_int_mla_cpa<"mlapt">;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
index 65b97ff6956a..7edce4b61605 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
@@ -20,12 +20,9 @@ def A64FXModel : SchedMachineModel {
let PostRAScheduler = 1; // Using PostRA sched.
let CompleteModel = 1;
- list<Predicate> UnsupportedFeatures =
- [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth,
- HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16, HasSME2, HasSME2p1, HasSVE2p1,
- HasSVE2p1_or_HasSME2p1, HasSMEF16F16, HasSSVE_FP8FMA, HasSMEF8F16, HasSMEF8F32,
- HasSMEFA64];
-
+ list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, SVEUnsupported.F,
+ [HasMTE, HasMatMulInt8, HasBF16,
+ HasPAuth, HasPAuthLR, HasCPA]);
let FullInstRWOverlapCheck = 0;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
index 503de3bee2b8..a6fab5e6245f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
@@ -19,7 +19,7 @@ def NeoverseN2Model : SchedMachineModel {
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
- [HasSVE2p1]);
+ [HasSVE2p1, HasPAuthLR, HasCPA]);
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
index 726be1a547b9..75fbb85dce9d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
@@ -28,7 +28,7 @@ def NeoverseV1Model : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVE2Unsupported.F,
SMEUnsupported.F,
- [HasMTE]);
+ [HasMTE, HasCPA]);
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index 3367d5d0cd31..658d7cdd23a6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -22,7 +22,7 @@ def NeoverseV2Model : SchedMachineModel {
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
- [HasSVE2p1]);
+ [HasSVE2p1, HasCPA]);
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index e8b5f6059c9e..0b80f263e12e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -643,6 +643,14 @@ defm : TLBI<"PAALLOS", 0b110, 0b1000, 0b0001, 0b100, 0>;
defm : TLBI<"PAALL", 0b110, 0b1000, 0b0111, 0b100, 0>;
}
+// Armv9.5-A TLBI VMALL for Dirty State
+let Requires = ["AArch64::FeatureTLBIW"] in {
+// op1, CRn, CRm, op2, needsreg
+defm : TLBI<"VMALLWS2E1", 0b100, 0b1000, 0b0110, 0b010, 0>;
+defm : TLBI<"VMALLWS2E1IS", 0b100, 0b1000, 0b0010, 0b010, 0>;
+defm : TLBI<"VMALLWS2E1OS", 0b100, 0b1000, 0b0101, 0b010, 0>;
+}
+
//===----------------------------------------------------------------------===//
// MRS/MSR (system register read/write) instruction options.
//===----------------------------------------------------------------------===//
@@ -1946,3 +1954,22 @@ def : RWSysReg<"MDSTEPOP_EL1", 0b10, 0b000, 0b0000, 0b0101, 0b010>;
// v9.5a System PMU zero register (FEAT_SPMU2)
// Op0 Op1 CRn CRm Op2
def : WOSysReg<"SPMZR_EL0", 0b10, 0b011, 0b1001, 0b1100, 0b100>;
+
+// v9.5a Delegated SError exceptions for EL3 (FEAT_E3DSE)
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"VDISR_EL3", 0b11, 0b110, 0b1100, 0b0001, 0b001>;
+def : RWSysReg<"VSESR_EL3", 0b11, 0b110, 0b0101, 0b0010, 0b011>;
+
+// v9.5a Hardware Dirty State Tracking Structure (FEAT_HDBSS)
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"HDBSSBR_EL2", 0b11, 0b100, 0b0010, 0b0011, 0b010>;
+def : RWSysReg<"HDBSSPROD_EL2", 0b11, 0b100, 0b0010, 0b0011, 0b011>;
+
+// v9.5a Hardware Accelerator for Cleaning Dirty State (FEAT_HACDBS)
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"HACDBSBR_EL2", 0b11, 0b100, 0b0010, 0b0011, 0b100>;
+def : RWSysReg<"HACDBSCONS_EL2", 0b11, 0b100, 0b0010, 0b0011, 0b101>;
+
+// v9.5a Fine Grained Write Trap EL3 (FEAT_FGWTE3)
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"FGWTE3_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b101>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 238269cf27bd..be66790c4277 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -1541,6 +1541,13 @@ public:
getShiftExtendAmount() <= 4;
}
+ bool isLSLImm3Shift() const {
+ if (!isShiftExtend())
+ return false;
+ AArch64_AM::ShiftExtendType ET = getShiftExtendType();
+ return ET == AArch64_AM::LSL && getShiftExtendAmount() <= 7;
+ }
+
template<int Width> bool isMemXExtend() const {
if (!isExtend())
return false;
@@ -1689,6 +1696,21 @@ public:
return DiagnosticPredicateTy::Match;
}
+ bool isPAuthPCRelLabel16Operand() const {
+ // PAuth PCRel16 operands are similar to regular branch targets, but only
+ // negative values are allowed for concrete immediates as signing instr
+ // should be in a lower address.
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return true;
+ int64_t Val = MCE->getValue();
+ if (Val & 0b11)
+ return false;
+ return (Val <= 0) && (Val > -(1 << 18));
+ }
+
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible. Null MCExpr = 0.
if (!Expr)
@@ -1990,6 +2012,19 @@ public:
Inst.addOperand(MCOperand::createImm(MCE->getValue() >> 2));
}
+ void addPAuthPCRelLabel16Operands(MCInst &Inst, unsigned N) const {
+ // PC-relative operands don't encode the low bits, so shift them off
+ // here. If it's a label, however, just put it on directly as there's
+ // not enough information now to do anything.
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE) {
+ addExpr(Inst, getImm());
+ return;
+ }
+ Inst.addOperand(MCOperand::createImm(MCE->getValue() >> 2));
+ }
+
void addPCRelLabel19Operands(MCInst &Inst, unsigned N) const {
// Branch operands don't encode the low bits, so shift them off
// here. If it's a label, however, just put it on directly as there's
@@ -2091,6 +2126,12 @@ public:
Inst.addOperand(MCOperand::createImm(Imm));
}
+ void addLSLImm3ShifterOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ unsigned Imm = getShiftExtendAmount();
+ Inst.addOperand(MCOperand::createImm(Imm));
+ }
+
void addSyspXzrPairOperand(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
@@ -3664,6 +3705,8 @@ static const struct Extension {
{"sme-f8f16", {AArch64::FeatureSMEF8F16}},
{"sme-f8f32", {AArch64::FeatureSMEF8F32}},
{"sme-fa64", {AArch64::FeatureSMEFA64}},
+ {"cpa", {AArch64::FeatureCPA}},
+ {"tlbiw", {AArch64::FeatureTLBIW}},
};
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
@@ -6064,6 +6107,9 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
"Invalid vector list, expected list with each SVE vector in the list "
"4 registers apart, and the first register in the range [z0, z3] or "
"[z16, z19] and with correct element type");
+ case Match_AddSubLSLImm3ShiftLarge:
+ return Error(Loc,
+ "expected 'lsl' with optional integer in range [0, 7]");
default:
llvm_unreachable("unexpected error code!");
}
@@ -6448,6 +6494,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidMemoryIndexed8:
case Match_InvalidMemoryIndexed16:
case Match_InvalidCondCode:
+ case Match_AddSubLSLImm3ShiftLarge:
case Match_AddSubRegExtendSmall:
case Match_AddSubRegExtendLarge:
case Match_AddSubSecondSource:
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index cf2d3879292d..e3220d103ae0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -165,6 +165,9 @@ static DecodeStatus DecodeFixedPointScaleImm32(MCInst &Inst, unsigned Imm,
static DecodeStatus DecodeFixedPointScaleImm64(MCInst &Inst, unsigned Imm,
uint64_t Address,
const MCDisassembler *Decoder);
+static DecodeStatus DecodePCRelLabel16(MCInst &Inst, unsigned Imm,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodePCRelLabel19(MCInst &Inst, unsigned Imm,
uint64_t Address,
const MCDisassembler *Decoder);
@@ -887,6 +890,21 @@ static DecodeStatus DecodeFixedPointScaleImm64(MCInst &Inst, unsigned Imm,
return Success;
}
+static DecodeStatus DecodePCRelLabel16(MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
+ // Immediate is encoded as the top 16-bits of an unsigned 18-bit negative
+ // PC-relative offset.
+ uint64_t ImmVal = Imm;
+ if (ImmVal < 0 || ImmVal > (1 << 16))
+ return Fail;
+ ImmVal = -ImmVal;
+ if (!Decoder->tryAddingSymbolicOperand(Inst, (ImmVal << 2), Addr,
+ /*IsBranch=*/false, 0, 0, 4))
+ Inst.addOperand(MCOperand::createImm(ImmVal));
+ return Success;
+}
+
static DecodeStatus DecodePCRelLabel19(MCInst &Inst, unsigned Imm,
uint64_t Addr,
const MCDisassembler *Decoder) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index bdaae4dd724d..a4ace6cce463 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -6717,68 +6717,6 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
I.eraseFromParent();
return true;
}
- case Intrinsic::ptrauth_sign: {
- Register DstReg = I.getOperand(0).getReg();
- Register ValReg = I.getOperand(2).getReg();
- uint64_t Key = I.getOperand(3).getImm();
- Register DiscReg = I.getOperand(4).getReg();
- auto DiscVal = getIConstantVRegVal(DiscReg, MRI);
- bool IsDiscZero = DiscVal && DiscVal->isZero();
-
- if (Key > AArch64PACKey::LAST)
- return false;
-
- unsigned Opcodes[][4] = {
- {AArch64::PACIA, AArch64::PACIB, AArch64::PACDA, AArch64::PACDB},
- {AArch64::PACIZA, AArch64::PACIZB, AArch64::PACDZA, AArch64::PACDZB}};
- unsigned Opcode = Opcodes[IsDiscZero][Key];
-
- auto PAC = MIB.buildInstr(Opcode, {DstReg}, {ValReg});
-
- if (!IsDiscZero) {
- PAC.addUse(DiscReg);
- RBI.constrainGenericRegister(DiscReg, AArch64::GPR64spRegClass, MRI);
- }
-
- RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
- I.eraseFromParent();
- return true;
- }
- case Intrinsic::ptrauth_strip: {
- Register DstReg = I.getOperand(0).getReg();
- Register ValReg = I.getOperand(2).getReg();
- uint64_t Key = I.getOperand(3).getImm();
-
- if (Key > AArch64PACKey::LAST)
- return false;
- unsigned Opcode = getXPACOpcodeForKey((AArch64PACKey::ID)Key);
-
- MIB.buildInstr(Opcode, {DstReg}, {ValReg});
-
- RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
- RBI.constrainGenericRegister(ValReg, AArch64::GPR64RegClass, MRI);
- I.eraseFromParent();
- return true;
- }
- case Intrinsic::ptrauth_blend: {
- MachineFunction &MF = *I.getParent()->getParent();
- auto RHS = getIConstantVRegVal(I.getOperand(3).getReg(), MRI);
- if (RHS && (RHS->getZExtValue() <= 0xffff)) {
- I.setDesc(TII.get(AArch64::MOVKXi));
- I.removeOperand(3);
- I.removeOperand(1);
- MachineInstrBuilder(MF, I)
- .addImm(RHS->getZExtValue() & 0xffff)
- .addImm(48)
- .constrainAllUses(TII, TRI, RBI);
- } else {
- I.setDesc(TII.get(AArch64::BFMXri));
- I.removeOperand(1);
- MachineInstrBuilder(MF, I).addImm(16).addImm(15).constrainAllUses(
- TII, TRI, RBI);
- }
- return true;
- }
case Intrinsic::frameaddress:
case Intrinsic::returnaddress: {
MachineFunction &MF = *I.getParent()->getParent();
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 687063873a16..830203b61c58 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -769,6 +769,27 @@ void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
MI.eraseFromParent();
}
+bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ auto &Unmerge = cast<GUnmerge>(MI);
+ Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
+ const LLT SrcTy = MRI.getType(Src1Reg);
+ return SrcTy.isVector() && !SrcTy.isScalable() &&
+ Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1;
+}
+
+void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) {
+ auto &Unmerge = cast<GUnmerge>(MI);
+ Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
+ const LLT SrcTy = MRI.getType(Src1Reg);
+ assert((SrcTy.isVector() && !SrcTy.isScalable()) &&
+ "Expected a fixed length vector");
+
+ for (int I = 0; I < SrcTy.getNumElements(); ++I)
+ B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I);
+ MI.eraseFromParent();
+}
+
bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
auto Splat = getAArch64VectorSplat(MI, MRI);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index a6900b8963bb..30ef3680ae79 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -67,6 +67,7 @@ public:
{"fixup_aarch64_ldr_pcrel_imm19", 5, 19, PCRelFlagVal},
{"fixup_aarch64_movw", 5, 16, 0},
{"fixup_aarch64_pcrel_branch14", 5, 14, PCRelFlagVal},
+ {"fixup_aarch64_pcrel_branch16", 5, 16, PCRelFlagVal},
{"fixup_aarch64_pcrel_branch19", 5, 19, PCRelFlagVal},
{"fixup_aarch64_pcrel_branch26", 0, 26, PCRelFlagVal},
{"fixup_aarch64_pcrel_call26", 0, 26, PCRelFlagVal}};
@@ -121,6 +122,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case AArch64::fixup_aarch64_movw:
case AArch64::fixup_aarch64_pcrel_branch14:
+ case AArch64::fixup_aarch64_pcrel_branch16:
case AArch64::fixup_aarch64_add_imm12:
case AArch64::fixup_aarch64_ldst_imm12_scale1:
case AArch64::fixup_aarch64_ldst_imm12_scale2:
@@ -314,6 +316,17 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
if (Value & 0x3)
Ctx.reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
return (Value >> 2) & 0x3fff;
+ case AArch64::fixup_aarch64_pcrel_branch16:
+ // Unsigned PC-relative offset, so invert the negative immediate.
+ SignedValue = -SignedValue;
+ Value = static_cast<uint64_t>(SignedValue);
+ // Check valid 18-bit unsigned range.
+ if (SignedValue < 0 || SignedValue > ((1 << 18) - 1))
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+ // Low two bits are not encoded (4-byte alignment assumed).
+ if (Value & 0b11)
+ Ctx.reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
+ return (Value >> 2) & 0xffff;
case AArch64::fixup_aarch64_pcrel_branch26:
case AArch64::fixup_aarch64_pcrel_call26:
if (TheTriple.isOSBinFormatCOFF() && !IsResolved && SignedValue != 0) {
@@ -380,6 +393,7 @@ unsigned AArch64AsmBackend::getFixupKindContainereSizeInBytes(unsigned Kind) con
case AArch64::fixup_aarch64_movw:
case AArch64::fixup_aarch64_pcrel_branch14:
+ case AArch64::fixup_aarch64_pcrel_branch16:
case AArch64::fixup_aarch64_add_imm12:
case AArch64::fixup_aarch64_ldst_imm12_scale1:
case AArch64::fixup_aarch64_ldst_imm12_scale2:
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 9de40661298c..496ab18e9b19 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -186,6 +186,10 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
return R_CLS(LD_PREL_LO19);
case AArch64::fixup_aarch64_pcrel_branch14:
return R_CLS(TSTBR14);
+ case AArch64::fixup_aarch64_pcrel_branch16:
+ Ctx.reportError(Fixup.getLoc(),
+ "relocation of PAC/AUT instructions is not supported");
+ return ELF::R_AARCH64_NONE;
case AArch64::fixup_aarch64_pcrel_branch19:
return R_CLS(CONDBR19);
default:
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
index 767dd8805520..fdee2d5ad2bf 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
@@ -43,6 +43,11 @@ enum Fixups {
// The high 14 bits of a 21-bit pc-relative immediate.
fixup_aarch64_pcrel_branch14,
+ // The high 16 bits of a 18-bit unsigned PC-relative immediate. Used by
+ // pointer authentication, only within a function, so no relocation can be
+ // generated.
+ fixup_aarch64_pcrel_branch16,
+
// The high 19 bits of a 21-bit pc-relative immediate. Same encoding as
// fixup_aarch64_pcrel_adrhi, except this is use by b.cc and generates
// relocations directly when necessary.
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index dbc4323a860f..c3e12b6d8024 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -88,6 +88,12 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ /// getPAuthPCRelOpValue - Return the encoded value for a pointer
+ /// authentication pc-relative operand.
+ uint32_t getPAuthPCRelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
/// getLoadLiteralOpValue - Return the encoded value for a load-literal
/// pc-relative address.
uint32_t getLoadLiteralOpValue(const MCInst &MI, unsigned OpIdx,
@@ -327,6 +333,29 @@ uint32_t AArch64MCCodeEmitter::getCondBranchTargetOpValue(
return 0;
}
+/// getPAuthPCRelOpValue - Return the encoded value for a pointer
+/// authentication pc-relative operand.
+uint32_t
+AArch64MCCodeEmitter::getPAuthPCRelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+
+ // If the destination is an immediate, invert sign as it's a negative value
+ // that should be encoded as unsigned
+ if (MO.isImm())
+ return -(MO.getImm());
+ assert(MO.isExpr() && "Unexpected target type!");
+
+ MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_pcrel_branch16);
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc()));
+
+ ++MCNumFixups;
+
+ // All of the information is in the fixup.
+ return 0;
+}
+
/// getLoadLiteralOpValue - Return the encoded value for a load-literal
/// pc-relative address.
uint32_t
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 4f8917618ea4..70f3c2c99f0f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -1259,6 +1259,12 @@ multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> {
def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME)>;
def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME)>;
def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME)>;
+
+ def : SVE_1_Op_Passthru_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, !cast<Instruction>(NAME)>;
+ def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME)>;
+ def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME)>;
+
}
class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
@@ -3712,8 +3718,14 @@ multiclass sme2_mova_vec_to_array_vg2_multi<string mnemonic, SDPatternOperator i
def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZ_d_mul_r, SMEMatrixArray>;
- def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>;
- def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>;
+ def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv16i8, sme_elm_idx0_7, tileslice16>;
+ def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8i16, sme_elm_idx0_7, tileslice16>;
+ def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8f16, sme_elm_idx0_7, tileslice16>;
+ def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8bf16, sme_elm_idx0_7, tileslice16>;
+ def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv4i32, sme_elm_idx0_7, tileslice16>;
+ def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv4f32, sme_elm_idx0_7, tileslice16>;
+ def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>;
+ def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>;
defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
MatrixOp8,
@@ -3805,8 +3817,14 @@ multiclass sme2_mova_vec_to_array_vg4_multi<string mnemonic, SDPatternOperator i
def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZZZ_d_mul_r, SMEMatrixArray>;
- def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>;
- def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>;
+ def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv16i8, sme_elm_idx0_7, tileslice16>;
+ def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8i16, sme_elm_idx0_7, tileslice16>;
+ def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8f16, sme_elm_idx0_7, tileslice16>;
+ def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8bf16, sme_elm_idx0_7, tileslice16>;
+ def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv4i32, sme_elm_idx0_7, tileslice16>;
+ def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv4f32, sme_elm_idx0_7, tileslice16>;
+ def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>;
+ def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>;
defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
MatrixOp8,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 9edf26052247..b7552541e950 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -10433,3 +10433,34 @@ multiclass sve2_luti4_vector_vg2_index<string mnemonic> {
let Inst{23-22} = idx;
}
}
+
+//===----------------------------------------------------------------------===//
+// Checked Pointer Arithmetic (FEAT_CPA)
+//===----------------------------------------------------------------------===//
+class sve_int_mad_cpa<string asm>
+ : I<(outs ZPR64:$Zdn), (ins ZPR64:$_Zdn, ZPR64:$Zm, ZPR64:$Za),
+ asm, "\t$Zdn, $Zm, $Za", "", []>, Sched<[]> {
+ bits<5> Zdn;
+ bits<5> Zm;
+ bits<5> Za;
+ let Inst{31-24} = 0b01000100;
+ let Inst{23-22} = 0b11; // sz
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Zm;
+ let Inst{15} = 0b1;
+ let Inst{14-10} = 0b10110; // opc
+ let Inst{9-5} = Za;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = DestructiveOther;
+ let ElementSize = ZPR64.ElementSize;
+ let hasSideEffects = 0;
+}
+
+class sve_int_mla_cpa<string asm>
+ : sve2_int_mla<0b11, 0b10100, asm, ZPR64, ZPR64> {
+ let Inst{15} = 0b1;
+
+ let ElementSize = ZPR64.ElementSize;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td
index 060fb66d38f7..d2a325d5ad89 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1100,8 +1100,8 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
FeatureVOP3Literal, FeatureDPP8,
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
FeatureA16, FeatureFastDenormalF32, FeatureG16,
- FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureGDS,
- FeatureGWS, FeatureTrue16BitInsts
+ FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess,
+ FeatureTrue16BitInsts
]
>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index 0a17b1536040..4462cd8a31f1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -75,8 +75,9 @@ enum class SchedGroupMask {
DS = 1u << 7,
DS_READ = 1u << 8,
DS_WRITE = 1u << 9,
+ TRANS = 1u << 10,
ALL = ALU | VALU | SALU | MFMA | VMEM | VMEM_READ | VMEM_WRITE | DS |
- DS_READ | DS_WRITE,
+ DS_READ | DS_WRITE | TRANS,
LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
};
@@ -1435,11 +1436,12 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
Result = false;
else if (((SGMask & SchedGroupMask::ALU) != SchedGroupMask::NONE) &&
- (TII->isVALU(MI) || TII->isMFMAorWMMA(MI) || TII->isSALU(MI)))
+ (TII->isVALU(MI) || TII->isMFMAorWMMA(MI) || TII->isSALU(MI) ||
+ TII->isTRANS(MI)))
Result = true;
else if (((SGMask & SchedGroupMask::VALU) != SchedGroupMask::NONE) &&
- TII->isVALU(MI) && !TII->isMFMAorWMMA(MI))
+ TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI))
Result = true;
else if (((SGMask & SchedGroupMask::SALU) != SchedGroupMask::NONE) &&
@@ -1476,6 +1478,10 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
MI.mayStore() && TII->isDS(MI))
Result = true;
+ else if (((SGMask & SchedGroupMask::TRANS) != SchedGroupMask::NONE) &&
+ TII->isTRANS(MI))
+ Result = true;
+
LLVM_DEBUG(
dbgs() << "For SchedGroup with mask " << format_hex((int)SGMask, 10, true)
<< (Result ? " could classify " : " unable to classify ") << MI);
@@ -1635,10 +1641,13 @@ void IGroupLPDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) {
// Remove all existing edges from the SCHED_BARRIER that were added due to the
// instruction having side effects.
resetEdges(SchedBarrier, DAG);
+ LLVM_DEBUG(dbgs() << "Building SchedGroup for SchedBarrier with Mask: "
+ << MI.getOperand(0).getImm() << "\n");
auto InvertedMask =
invertSchedBarrierMask((SchedGroupMask)MI.getOperand(0).getImm());
SchedGroup SG(InvertedMask, std::nullopt, DAG, TII);
SG.initSchedGroup();
+
// Preserve original instruction ordering relative to the SCHED_BARRIER.
SG.link(
SchedBarrier,
@@ -1652,14 +1661,15 @@ IGroupLPDAGMutation::invertSchedBarrierMask(SchedGroupMask Mask) const {
// allowed past the SCHED_BARRIER.
SchedGroupMask InvertedMask = ~Mask;
- // ALU implies VALU, SALU, MFMA.
+ // ALU implies VALU, SALU, MFMA, TRANS.
if ((InvertedMask & SchedGroupMask::ALU) == SchedGroupMask::NONE)
- InvertedMask &=
- ~SchedGroupMask::VALU & ~SchedGroupMask::SALU & ~SchedGroupMask::MFMA;
- // VALU, SALU, MFMA implies ALU.
+ InvertedMask &= ~SchedGroupMask::VALU & ~SchedGroupMask::SALU &
+ ~SchedGroupMask::MFMA & ~SchedGroupMask::TRANS;
+ // VALU, SALU, MFMA, TRANS implies ALU.
else if ((InvertedMask & SchedGroupMask::VALU) == SchedGroupMask::NONE ||
(InvertedMask & SchedGroupMask::SALU) == SchedGroupMask::NONE ||
- (InvertedMask & SchedGroupMask::MFMA) == SchedGroupMask::NONE)
+ (InvertedMask & SchedGroupMask::MFMA) == SchedGroupMask::NONE ||
+ (InvertedMask & SchedGroupMask::TRANS) == SchedGroupMask::NONE)
InvertedMask &= ~SchedGroupMask::ALU;
// VMEM implies VMEM_READ, VMEM_WRITE.
@@ -1678,6 +1688,9 @@ IGroupLPDAGMutation::invertSchedBarrierMask(SchedGroupMask Mask) const {
(InvertedMask & SchedGroupMask::DS_WRITE) == SchedGroupMask::NONE)
InvertedMask &= ~SchedGroupMask::DS;
+ LLVM_DEBUG(dbgs() << "After Inverting, SchedGroup Mask: " << (int)InvertedMask
+ << "\n");
+
return InvertedMask;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 9d7443012e3d..541a5b62450d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -169,11 +169,17 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
@@ -185,10 +191,15 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16bf16, Expand);
setOperationAction(ISD::STORE, MVT::f32, Promote);
AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
@@ -506,9 +517,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::v12f32, Promote);
AddPromotedToType(ISD::SELECT, MVT::v12f32, MVT::v12i32);
- // There are no libcalls of any kind.
- for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I)
- setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
+ // Disable most libcalls.
+ for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I) {
+ if (I < RTLIB::ATOMIC_LOAD || I > RTLIB::ATOMIC_FETCH_NAND_16)
+ setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
+ }
setSchedulingPreference(Sched::RegPressure);
setJumpIsExpensive(true);
@@ -556,6 +569,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
ISD::FSUB, ISD::FNEG,
ISD::FABS, ISD::AssertZext,
ISD::AssertSext, ISD::INTRINSIC_WO_CHAIN});
+
+ setMaxAtomicSizeInBitsSupported(64);
}
bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const {
@@ -3055,18 +3070,26 @@ SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) cons
bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ||
Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF;
+ bool Is64BitScalar = !Src->isDivergent() && Src.getValueType() == MVT::i64;
- if (Src.getValueType() == MVT::i32) {
+ if (Src.getValueType() == MVT::i32 || Is64BitScalar) {
// (ctlz hi:lo) -> (umin (ffbh src), 32)
// (cttz hi:lo) -> (umin (ffbl src), 32)
// (ctlz_zero_undef src) -> (ffbh src)
// (cttz_zero_undef src) -> (ffbl src)
+
+ // 64-bit scalar version produce 32-bit result
+ // (ctlz hi:lo) -> (umin (S_FLBIT_I32_B64 src), 64)
+ // (cttz hi:lo) -> (umin (S_FF1_I32_B64 src), 64)
+ // (ctlz_zero_undef src) -> (S_FLBIT_I32_B64 src)
+ // (cttz_zero_undef src) -> (S_FF1_I32_B64 src)
SDValue NewOpr = DAG.getNode(NewOpc, SL, MVT::i32, Src);
if (!ZeroUndef) {
- const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32);
- NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const32);
+ const SDValue ConstVal = DAG.getConstant(
+ Op.getValueType().getScalarSizeInBits(), SL, MVT::i32);
+ NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, ConstVal);
}
- return NewOpr;
+ return DAG.getNode(ISD::ZERO_EXTEND, SL, Src.getValueType(), NewOpr);
}
SDValue Lo, Hi;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index ee93d9eb4c0a..2bb7b6bd0674 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1241,6 +1241,10 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
ConstantInt *DMask = cast<ConstantInt>(Args[DMaskIdx]);
unsigned DMaskVal = DMask->getZExtValue() & 0xf;
+ // dmask 0 has special semantics, do not simplify.
+ if (DMaskVal == 0)
+ return nullptr;
+
// Mask off values that are undefined because the dmask doesn't cover them
DemandedElts &= (1 << llvm::popcount(DMaskVal)) - 1;
@@ -1261,7 +1265,7 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
unsigned NewNumElts = DemandedElts.popcount();
if (!NewNumElts)
- return UndefValue::get(IIVTy);
+ return PoisonValue::get(IIVTy);
if (NewNumElts >= VWidth && DemandedElts.isMask()) {
if (DMaskIdx >= 0)
@@ -1299,7 +1303,7 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
if (IsLoad) {
if (NewNumElts == 1) {
- return IC.Builder.CreateInsertElement(UndefValue::get(IIVTy), NewCall,
+ return IC.Builder.CreateInsertElement(PoisonValue::get(IIVTy), NewCall,
DemandedElts.countr_zero());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 0c21382e5c22..f03e6b8915b1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1050,8 +1050,7 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
CF->isNegative();
} else {
needlog = true;
- needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
- (!CF || CF->isNegative());
+ needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
}
} else {
ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 1bed516fb5c7..5e73411cae9b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -530,6 +530,15 @@ static Value *promoteAllocaUserToVector(
return Builder.CreateVectorSplat(VectorTy->getElementCount(), Elt);
}
+ if (auto *Intr = dyn_cast<IntrinsicInst>(Inst)) {
+ if (Intr->getIntrinsicID() == Intrinsic::objectsize) {
+ Intr->replaceAllUsesWith(
+ Builder.getIntN(Intr->getType()->getIntegerBitWidth(),
+ DL.getTypeAllocSize(VectorTy)));
+ return nullptr;
+ }
+ }
+
llvm_unreachable("Unsupported call when promoting alloca to vector");
}
@@ -773,8 +782,17 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
continue;
}
+ if (auto *Intr = dyn_cast<IntrinsicInst>(Inst)) {
+ if (Intr->getIntrinsicID() == Intrinsic::objectsize) {
+ WorkList.push_back(Inst);
+ continue;
+ }
+ }
+
// Ignore assume-like intrinsics and comparisons used in assumes.
if (isAssumeLikeIntrinsic(Inst)) {
+ if (!Inst->use_empty())
+ return RejectUser(Inst, "assume-like intrinsic cannot have any users");
UsersToRemove.push_back(Inst);
continue;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index e8c04ecf39ba..fdc2077868cf 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -345,6 +345,11 @@ static cl::opt<bool> EnableImageIntrinsicOptimizer(
cl::desc("Enable image intrinsic optimizer pass"), cl::init(true),
cl::Hidden);
+static cl::opt<bool>
+ EnableLoopPrefetch("amdgpu-loop-prefetch",
+ cl::desc("Enable loop data prefetch on AMDGPU"),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool> EnableMaxIlpSchedStrategy(
"amdgpu-enable-max-ilp-scheduling-strategy",
cl::desc("Enable scheduling strategy to maximize ILP for a single wave."),
@@ -982,6 +987,8 @@ void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
}
void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
+ if (isPassEnabled(EnableLoopPrefetch, CodeGenOptLevel::Aggressive))
+ addPass(createLoopDataPrefetchPass());
addPass(createSeparateConstOffsetFromGEPPass());
// ReassociateGEPs exposes more opportunities for SLSR. See
// the example in reassociate-geps-and-slsr.ll.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index f1da1a61bf4d..ebe0b8551b23 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1345,3 +1345,11 @@ GCNTTIImpl::getTypeLegalizationCost(Type *Ty) const {
Cost.first += (Size + 255) / 256;
return Cost;
}
+
+unsigned GCNTTIImpl::getPrefetchDistance() const {
+ return ST->hasPrefetch() ? 128 : 0;
+}
+
+bool GCNTTIImpl::shouldPrefetchAddressSpace(unsigned AS) const {
+ return AMDGPU::isFlatGlobalAddrSpace(AS);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 1e6c5bbfc0d7..cd8e9fd10bbf 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -254,6 +254,16 @@ public:
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
FastMathFlags FMF,
TTI::TargetCostKind CostKind);
+
+ /// Data cache line size for LoopDataPrefetch pass. Has no use before GFX12.
+ unsigned getCacheLineSize() const override { return 128; }
+
+ /// How much before a load we should place the prefetch instruction.
+ /// This is currently measured in number of IR instructions.
+ unsigned getPrefetchDistance() const override;
+
+ /// \return if target want to issue a prefetch in address space \p AS.
+ bool shouldPrefetchAddressSpace(unsigned AS) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td
index 3a895923fa4b..bc9049b4ef33 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -1147,7 +1147,8 @@ def : GCNPat <
>;
} // End SubtargetPredicate = HasAtomicDsPkAdd16Insts
-def : Pat <
+let OtherPredicates = [HasGDS] in
+def : GCNPat <
(SIds_ordered_count i32:$value, i16:$offset),
(DS_ORDERED_COUNT $value, (as_i16imm $offset))
>;
@@ -1189,7 +1190,8 @@ def : GCNPat <
//===----------------------------------------------------------------------===//
class Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<8> op, DS_Pseudo ps, int ef,
- string opName = ps.Mnemonic>
+ string opName = ps.Mnemonic,
+ bit hasGFX12Enc = 0>
: DS_Real<ps, opName>, SIMCInstr <ps.Mnemonic, ef> {
let Inst{7-0} = !if(ps.has_offset0, offset0, 0);
@@ -1201,6 +1203,8 @@ class Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<8> op, DS_Pseudo ps, int ef,
let Inst{47-40} = !if(ps.has_data0, data0{7-0}, 0);
let Inst{55-48} = !if(ps.has_data1, data1{7-0}, 0);
let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, 0);
+
+ let gds = !if(hasGFX12Enc, 0, ?);
}
//===----------------------------------------------------------------------===//
@@ -1212,7 +1216,7 @@ let AssemblerPredicate = isGFX12Plus, DecoderNamespace = "GFX12" in {
defvar ps = !cast<DS_Pseudo>(NAME);
def _gfx12 :
Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, ps, SIEncodingFamily.GFX12,
- ps.Mnemonic>;
+ ps.Mnemonic, 1>;
}
multiclass DS_Real_Renamed_gfx12<bits<8> op, DS_Pseudo backing_pseudo,
@@ -1220,7 +1224,7 @@ let AssemblerPredicate = isGFX12Plus, DecoderNamespace = "GFX12" in {
def _gfx12 :
Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, backing_pseudo,
SIEncodingFamily.GFX12,
- real_name>,
+ real_name, 1>,
MnemonicAlias<backing_pseudo.Mnemonic, real_name>,
Requires<[isGFX12Plus]>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index ed2e7e4f189e..7939d0036568 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -702,6 +702,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
AMDGPU::OpName::src2_modifiers);
}
+ if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
+ !AMDGPU::hasGDS(STI)) {
+ insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
+ }
+
if (Res && (MCII->get(MI.getOpcode()).TSFlags &
(SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD))) {
int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 34826809c1a6..fc119aa61d01 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -540,10 +540,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
MVT::f16, Custom);
setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, MVT::i16, Custom);
-
- setOperationAction(
- {ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, ISD::UINT_TO_FP},
- MVT::f16, Promote);
+ setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::f16, Promote);
// F16 - VOP2 Actions.
setOperationAction({ISD::BR_CC, ISD::SELECT_CC}, MVT::f16, Expand);
@@ -1145,11 +1142,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable;
- // XXX - Should this be volatile without known ordering?
- Info.flags |= MachineMemOperand::MOVolatile;
-
switch (IntrID) {
default:
+ // XXX - Should this be volatile without known ordering?
+ Info.flags |= MachineMemOperand::MOVolatile;
break;
case Intrinsic::amdgcn_raw_buffer_load_lds:
case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
@@ -1157,6 +1153,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
+ Info.ptrVal = CI.getArgOperand(1);
return true;
}
}
@@ -1289,8 +1286,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.opc = ISD::INTRINSIC_VOID;
unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
- Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
- MachineMemOperand::MOVolatile;
+ Info.ptrVal = CI.getArgOperand(1);
+ Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
return true;
}
case Intrinsic::amdgcn_ds_bvh_stack_rtn: {
@@ -9231,7 +9228,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
MachinePointerInfo StorePtrI = LoadPtrI;
- StorePtrI.V = nullptr;
+ LoadPtrI.V = PoisonValue::get(
+ PointerType::get(*DAG.getContext(), AMDGPUAS::GLOBAL_ADDRESS));
+ LoadPtrI.AddrSpace = AMDGPUAS::GLOBAL_ADDRESS;
StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;
auto F = LoadMMO->getFlags() &
@@ -9309,6 +9308,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
LoadPtrI.Offset = Op->getConstantOperandVal(5);
MachinePointerInfo StorePtrI = LoadPtrI;
+ LoadPtrI.V = PoisonValue::get(
+ PointerType::get(*DAG.getContext(), AMDGPUAS::GLOBAL_ADDRESS));
LoadPtrI.AddrSpace = AMDGPUAS::GLOBAL_ADDRESS;
StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;
auto F = LoadMMO->getFlags() &
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 8415a3d77d3b..55ddb540c51e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -238,7 +238,7 @@ public:
bool merge(const WaitcntBrackets &Other);
- RegInterval getRegInterval(const MachineInstr *MI, const SIInstrInfo *TII,
+ RegInterval getRegInterval(const MachineInstr *MI,
const MachineRegisterInfo *MRI,
const SIRegisterInfo *TRI, unsigned OpNo) const;
@@ -500,7 +500,6 @@ public:
} // end anonymous namespace
RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
- const SIInstrInfo *TII,
const MachineRegisterInfo *MRI,
const SIRegisterInfo *TRI,
unsigned OpNo) const {
@@ -534,7 +533,7 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
else
return {-1, -1};
- const TargetRegisterClass *RC = TII->getOpRegClass(*MI, OpNo);
+ const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Op.getReg());
unsigned Size = TRI->getRegSizeInBits(*RC);
Result.second = Result.first + ((Size + 16) / 32);
@@ -546,7 +545,7 @@ void WaitcntBrackets::setExpScore(const MachineInstr *MI,
const SIRegisterInfo *TRI,
const MachineRegisterInfo *MRI, unsigned OpNo,
unsigned Val) {
- RegInterval Interval = getRegInterval(MI, TII, MRI, TRI, OpNo);
+ RegInterval Interval = getRegInterval(MI, MRI, TRI, OpNo);
assert(TRI->isVectorRegister(*MRI, MI->getOperand(OpNo).getReg()));
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
setRegScore(RegNo, EXP_CNT, Val);
@@ -674,7 +673,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
Inst.getOpcode() == AMDGPU::BUFFER_STORE_DWORDX4) {
MachineOperand *MO = TII->getNamedOperand(Inst, AMDGPU::OpName::data);
unsigned OpNo;//TODO: find the OpNo for this operand;
- RegInterval Interval = getRegInterval(&Inst, TII, MRI, TRI, OpNo);
+ RegInterval Interval = getRegInterval(&Inst, MRI, TRI, OpNo);
for (int RegNo = Interval.first; RegNo < Interval.second;
++RegNo) {
setRegScore(RegNo + NUM_ALL_VGPRS, t, CurrScore);
@@ -686,7 +685,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
auto &Op = Inst.getOperand(I);
if (!Op.isReg() || !Op.isDef())
continue;
- RegInterval Interval = getRegInterval(&Inst, TII, MRI, TRI, I);
+ RegInterval Interval = getRegInterval(&Inst, MRI, TRI, I);
if (T == VM_CNT) {
if (Interval.first >= NUM_ALL_VGPRS)
continue;
@@ -1140,7 +1139,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
if (MI.getOperand(CallAddrOpIdx).isReg()) {
RegInterval CallAddrOpInterval =
- ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, CallAddrOpIdx);
+ ScoreBrackets.getRegInterval(&MI, MRI, TRI, CallAddrOpIdx);
for (int RegNo = CallAddrOpInterval.first;
RegNo < CallAddrOpInterval.second; ++RegNo)
@@ -1150,7 +1149,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
if (RtnAddrOpIdx != -1) {
RegInterval RtnAddrOpInterval =
- ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, RtnAddrOpIdx);
+ ScoreBrackets.getRegInterval(&MI, MRI, TRI, RtnAddrOpIdx);
for (int RegNo = RtnAddrOpInterval.first;
RegNo < RtnAddrOpInterval.second; ++RegNo)
@@ -1202,8 +1201,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
if (Op.isTied() && Op.isUse() && TII->doesNotReadTiedSource(MI))
continue;
- RegInterval Interval =
- ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I);
+ RegInterval Interval = ScoreBrackets.getRegInterval(&MI, MRI, TRI, I);
const bool IsVGPR = TRI->isVectorRegister(*MRI, Op.getReg());
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
@@ -1782,7 +1780,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
MachineOperand &Op = MI.getOperand(I);
if (!Op.isReg() || !TRI->isVectorRegister(*MRI, Op.getReg()))
continue;
- RegInterval Interval = Brackets.getRegInterval(&MI, TII, MRI, TRI, I);
+ RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, I);
// Vgpr use
if (Op.isUse()) {
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 70ef1fff274a..ebe23a5eac57 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -245,6 +245,10 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad())
return false;
+ // A mayLoad instruction without a def is not a load. Likely a prefetch.
+ if (!get(Opc0).getNumDefs() || !get(Opc1).getNumDefs())
+ return false;
+
if (isDS(Opc0) && isDS(Opc1)) {
// FIXME: Handle this case:
@@ -3654,6 +3658,9 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
return false;
+ if (isLDSDMA(MIa) || isLDSDMA(MIb))
+ return false;
+
// TODO: Should we check the address space from the MachineMemOperand? That
// would allow us to distinguish objects we know don't alias based on the
// underlying address space, even if it was lowered to a different one,
@@ -4976,6 +4983,14 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
+ if (isDS(MI) && !ST.hasGDS()) {
+ const MachineOperand *GDSOp = getNamedOperand(MI, AMDGPU::OpName::gds);
+ if (GDSOp && GDSOp->getImm() != 0) {
+ ErrInfo = "GDS is not supported on this subtarget";
+ return false;
+ }
+ }
+
if (isImage(MI)) {
const MachineOperand *DimOp = getNamedOperand(MI, AMDGPU::OpName::dim);
if (DimOp) {
@@ -6897,6 +6912,15 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
Inst.eraseFromParent();
return;
+ case AMDGPU::S_FLBIT_I32_B64:
+ splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
+ Inst.eraseFromParent();
+ return;
+ case AMDGPU::S_FF1_I32_B64:
+ splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
+ Inst.eraseFromParent();
+ return;
+
case AMDGPU::S_LSHL_B32:
if (ST.hasOnlyRevVALUShifts()) {
NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
@@ -7830,6 +7854,61 @@ void SIInstrInfo::splitScalar64BitBFE(SIInstrWorklist &Worklist,
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
+void SIInstrInfo::splitScalar64BitCountOp(SIInstrWorklist &Worklist,
+ MachineInstr &Inst, unsigned Opcode,
+ MachineDominatorTree *MDT) const {
+ // (S_FLBIT_I32_B64 hi:lo) ->
+ // -> (umin (V_FFBH_U32_e32 hi), (uaddsat (V_FFBH_U32_e32 lo), 32))
+ // (S_FF1_I32_B64 hi:lo) ->
+ // ->(umin (uaddsat (V_FFBL_B32_e32 hi), 32) (V_FFBL_B32_e32 lo))
+
+ MachineBasicBlock &MBB = *Inst.getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineBasicBlock::iterator MII = Inst;
+ const DebugLoc &DL = Inst.getDebugLoc();
+
+ MachineOperand &Dest = Inst.getOperand(0);
+ MachineOperand &Src = Inst.getOperand(1);
+
+ const MCInstrDesc &InstDesc = get(Opcode);
+
+ bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
+ unsigned OpcodeAdd =
+ ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
+
+ const TargetRegisterClass *SrcRC =
+ Src.isReg() ? MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
+ const TargetRegisterClass *SrcSubRC =
+ RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
+
+ MachineOperand SrcRegSub0 =
+ buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub0, SrcSubRC);
+ MachineOperand SrcRegSub1 =
+ buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub1, SrcSubRC);
+
+ Register MidReg1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register MidReg2 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register MidReg3 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register MidReg4 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ BuildMI(MBB, MII, DL, InstDesc, MidReg1).add(SrcRegSub0);
+
+ BuildMI(MBB, MII, DL, InstDesc, MidReg2).add(SrcRegSub1);
+
+ BuildMI(MBB, MII, DL, get(OpcodeAdd), MidReg3)
+ .addReg(IsCtlz ? MidReg1 : MidReg2)
+ .addImm(32)
+ .addImm(1); // enable clamp
+
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_MIN_U32_e64), MidReg4)
+ .addReg(MidReg3)
+ .addReg(IsCtlz ? MidReg2 : MidReg1);
+
+ MRI.replaceRegWith(Dest.getReg(), MidReg4);
+
+ addUsersToMoveToVALUWorklist(MidReg4, MRI, Worklist);
+}
+
void SIInstrInfo::addUsersToMoveToVALUWorklist(
Register DstReg, MachineRegisterInfo &MRI,
SIInstrWorklist &Worklist) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index affe52046752..46eee6fae0a5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -144,6 +144,9 @@ private:
void splitScalar64BitBCNT(SIInstrWorklist &Worklist,
MachineInstr &Inst) const;
void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
+ void splitScalar64BitCountOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
+ unsigned Opcode,
+ MachineDominatorTree *MDT = nullptr) const;
void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI,
MachineInstr &Inst) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index a3a71a8ec09a..eeb7f64aa581 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1645,9 +1645,6 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
// Fold the return instruction into the LDM.
DeleteRet = true;
LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
- // We 'restore' LR into PC so it is not live out of the return block:
- // Clear Restored bit.
- Info.setRestored(false);
}
// If NoGap is true, pop consecutive registers and then leave the rest
@@ -2695,8 +2692,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
const Align TargetAlign = getStackAlign();
if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
- for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
- unsigned Reg = UnspilledCS1GPRs[i];
+ for (unsigned Reg : UnspilledCS1GPRs) {
// Don't spill high register if the function is thumb. In the case of
// Windows on ARM, accept R11 (frame pointer)
if (!AFI->isThumbFunction() ||
@@ -2785,6 +2781,33 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
}
+void ARMFrameLowering::processFunctionBeforeFrameFinalized(
+ MachineFunction &MF, RegScavenger *RS) const {
+ TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS);
+
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.isCalleeSavedInfoValid())
+ return;
+
+ // Check if all terminators do not implicitly use LR. Then we can 'restore' LR
+ // into PC so it is not live out of the return block: Clear the Restored bit
+ // in that case.
+ for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
+ if (Info.getReg() != ARM::LR)
+ continue;
+ if (all_of(MF, [](const MachineBasicBlock &MBB) {
+ return all_of(MBB.terminators(), [](const MachineInstr &Term) {
+ return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET ||
+ Term.getOpcode() == ARM::t2LDMIA_RET ||
+ Term.getOpcode() == ARM::tPOP_RET;
+ });
+ })) {
+ Info.setRestored(false);
+ break;
+ }
+ }
+}
+
void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF,
BitVector &SavedRegs) const {
TargetFrameLowering::getCalleeSaves(MF, SavedRegs);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h
index 16f2ce6bea6f..8d2b8beb9a58 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -59,6 +59,9 @@ public:
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;
+ void processFunctionBeforeFrameFinalized(
+ MachineFunction &MF, RegScavenger *RS = nullptr) const override;
+
void adjustForSegmentedStacks(MachineFunction &MF,
MachineBasicBlock &MBB) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
index db63facca870..d00b7853816e 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1415,6 +1415,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::GET_FPENV, MVT::i32, Legal);
setOperationAction(ISD::SET_FPENV, MVT::i32, Legal);
setOperationAction(ISD::RESET_FPENV, MVT::Other, Legal);
+ setOperationAction(ISD::GET_FPMODE, MVT::i32, Legal);
+ setOperationAction(ISD::SET_FPMODE, MVT::i32, Custom);
+ setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom);
}
// We want to custom lower some of our intrinsics.
@@ -6447,6 +6450,57 @@ SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op,
return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
}
+SDValue ARMTargetLowering::LowerSET_FPMODE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Chain = Op->getOperand(0);
+ SDValue Mode = Op->getOperand(1);
+
+ // Generate nodes to build:
+ // FPSCR = (FPSCR & FPStatusBits) | (Mode & ~FPStatusBits)
+ SDValue Ops[] = {Chain,
+ DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
+ SDValue FPSCR =
+ DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
+ Chain = FPSCR.getValue(1);
+ FPSCR = FPSCR.getValue(0);
+
+ SDValue FPSCRMasked =
+ DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
+ DAG.getConstant(ARM::FPStatusBits, DL, MVT::i32));
+ SDValue InputMasked =
+ DAG.getNode(ISD::AND, DL, MVT::i32, Mode,
+ DAG.getConstant(~ARM::FPStatusBits, DL, MVT::i32));
+ FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCRMasked, InputMasked);
+
+ SDValue Ops2[] = {
+ Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
+ return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
+}
+
+SDValue ARMTargetLowering::LowerRESET_FPMODE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Chain = Op->getOperand(0);
+
+ // To get the default FP mode all control bits are cleared:
+ // FPSCR = FPSCR & (FPStatusBits | FPReservedBits)
+ SDValue Ops[] = {Chain,
+ DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
+ SDValue FPSCR =
+ DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
+ Chain = FPSCR.getValue(1);
+ FPSCR = FPSCR.getValue(0);
+
+ SDValue FPSCRMasked = DAG.getNode(
+ ISD::AND, DL, MVT::i32, FPSCR,
+ DAG.getConstant(ARM::FPStatusBits | ARM::FPReservedBits, DL, MVT::i32));
+ SDValue Ops2[] = {Chain,
+ DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32),
+ FPSCRMasked};
+ return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
+}
+
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
SDLoc dl(N);
@@ -10557,6 +10611,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ZERO_EXTEND: return LowerVectorExtend(Op.getNode(), DAG, Subtarget);
case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
+ case ISD::SET_FPMODE:
+ return LowerSET_FPMODE(Op, DAG);
+ case ISD::RESET_FPMODE:
+ return LowerRESET_FPMODE(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::SDIV:
if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h
index 6c2b92de7a1d..f398b01f4186 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -375,6 +375,14 @@ class VectorType;
// Bit position of rounding mode bits in FPSCR.
const unsigned RoundingBitsPos = 22;
+
+ // Bits of floating-point status. These are NZCV flags, QC bit and cumulative
+ // FP exception bits.
+ const unsigned FPStatusBits = 0xf800009f;
+
+ // Some bits in the FPSCR are not yet defined. They must be preserved when
+ // modifying the contents.
+ const unsigned FPReservedBits = 0x00006060;
} // namespace ARM
/// Define some predicates that are used for node matching.
@@ -835,6 +843,8 @@ class VectorType;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td
index 800527bcf756..55d3efbd9b9a 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -2675,6 +2675,7 @@ def : Pat<(get_fpenv), (VMRS)>;
def : Pat<(set_fpenv GPRnopc:$Rt), (VMSR GPRnopc:$Rt)>;
def : Pat<(reset_fpenv), (VMSR (MOVi 0))>, Requires<[IsARM]>;
def : Pat<(reset_fpenv), (VMSR (tMOVi8 0))>, Requires<[IsThumb]>;
+def : Pat<(get_fpmode), (VMRS)>;
//===----------------------------------------------------------------------===//
// Assembler aliases.
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index a679699a66c7..ed9d30c3c3ab 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -2604,16 +2604,14 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
}
// Re-schedule loads.
- for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
- unsigned Base = LdBases[i];
+ for (unsigned Base : LdBases) {
SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
if (Lds.size() > 1)
RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap, RegisterMap);
}
// Re-schedule stores.
- for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
- unsigned Base = StBases[i];
+ for (unsigned Base : StBases) {
SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
if (Sts.size() > 1)
RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap, RegisterMap);
diff --git a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.cpp b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.cpp
index d3ff12a1f7b3..621852f2453f 100644
--- a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.cpp
@@ -63,57 +63,56 @@ ResourceBase::ResourceBase(uint32_t I, FrontendResource R)
RangeSize = ArrTy->getNumElements();
}
-StringRef ResourceBase::getComponentTypeName(ComponentType CompType) {
- switch (CompType) {
- case ComponentType::LastEntry:
- case ComponentType::Invalid:
+StringRef ResourceBase::getElementTypeName(ElementType ElTy) {
+ switch (ElTy) {
+ case ElementType::Invalid:
return "invalid";
- case ComponentType::I1:
+ case ElementType::I1:
return "i1";
- case ComponentType::I16:
+ case ElementType::I16:
return "i16";
- case ComponentType::U16:
+ case ElementType::U16:
return "u16";
- case ComponentType::I32:
+ case ElementType::I32:
return "i32";
- case ComponentType::U32:
+ case ElementType::U32:
return "u32";
- case ComponentType::I64:
+ case ElementType::I64:
return "i64";
- case ComponentType::U64:
+ case ElementType::U64:
return "u64";
- case ComponentType::F16:
+ case ElementType::F16:
return "f16";
- case ComponentType::F32:
+ case ElementType::F32:
return "f32";
- case ComponentType::F64:
+ case ElementType::F64:
return "f64";
- case ComponentType::SNormF16:
+ case ElementType::SNormF16:
return "snorm_f16";
- case ComponentType::UNormF16:
+ case ElementType::UNormF16:
return "unorm_f16";
- case ComponentType::SNormF32:
+ case ElementType::SNormF32:
return "snorm_f32";
- case ComponentType::UNormF32:
+ case ElementType::UNormF32:
return "unorm_f32";
- case ComponentType::SNormF64:
+ case ElementType::SNormF64:
return "snorm_f64";
- case ComponentType::UNormF64:
+ case ElementType::UNormF64:
return "unorm_f64";
- case ComponentType::PackedS8x32:
+ case ElementType::PackedS8x32:
return "p32i8";
- case ComponentType::PackedU8x32:
+ case ElementType::PackedU8x32:
return "p32u8";
}
- llvm_unreachable("All ComponentType enums are handled in switch");
+ llvm_unreachable("All ElementType enums are handled in switch");
}
-void ResourceBase::printComponentType(Kinds Kind, ComponentType CompType,
- unsigned Alignment, raw_ostream &OS) {
+void ResourceBase::printElementType(Kinds Kind, ElementType ElTy,
+ unsigned Alignment, raw_ostream &OS) {
switch (Kind) {
default:
// TODO: add vector size.
- OS << right_justify(getComponentTypeName(CompType), Alignment);
+ OS << right_justify(getElementTypeName(ElTy), Alignment);
break;
case Kinds::RawBuffer:
OS << right_justify("byte", Alignment);
@@ -232,19 +231,13 @@ void ResourceBase::print(raw_ostream &OS, StringRef IDPrefix,
OS << right_justify("unbounded", 6) << "\n";
}
-UAVResource::UAVResource(uint32_t I, FrontendResource R)
- : ResourceBase(I, R), Shape(R.getResourceKind()), GloballyCoherent(false),
- HasCounter(false), IsROV(R.getIsROV()), ExtProps() {
- parseSourceType(R.getSourceType());
-}
-
void UAVResource::print(raw_ostream &OS) const {
OS << "; " << left_justify(Name, 31);
OS << right_justify("UAV", 10);
- printComponentType(
- Shape, ExtProps.ElementType.value_or(ComponentType::Invalid), 8, OS);
+ printElementType(Shape, ExtProps.ElementType.value_or(ElementType::Invalid),
+ 8, OS);
// FIXME: support SampleCount.
// See https://github.com/llvm/llvm-project/issues/58175
@@ -253,35 +246,6 @@ void UAVResource::print(raw_ostream &OS) const {
ResourceBase::print(OS, "U", "u");
}
-// FIXME: Capture this in HLSL source. I would go do this right now, but I want
-// to get this in first so that I can make sure to capture all the extra
-// information we need to remove the source type string from here (See issue:
-// https://github.com/llvm/llvm-project/issues/57991).
-void UAVResource::parseSourceType(StringRef S) {
- S = S.substr(S.find("<") + 1);
-
- constexpr size_t PrefixLen = StringRef("vector<").size();
- if (S.starts_with("vector<"))
- S = S.substr(PrefixLen, S.find(",") - PrefixLen);
- else
- S = S.substr(0, S.find(">"));
-
- ComponentType ElTy = StringSwitch<ResourceBase::ComponentType>(S)
- .Case("bool", ComponentType::I1)
- .Case("int16_t", ComponentType::I16)
- .Case("uint16_t", ComponentType::U16)
- .Case("int32_t", ComponentType::I32)
- .Case("uint32_t", ComponentType::U32)
- .Case("int64_t", ComponentType::I64)
- .Case("uint64_t", ComponentType::U64)
- .Case("half", ComponentType::F16)
- .Case("float", ComponentType::F32)
- .Case("double", ComponentType::F64)
- .Default(ComponentType::Invalid);
- if (ElTy != ComponentType::Invalid)
- ExtProps.ElementType = ElTy;
-}
-
ConstantBuffer::ConstantBuffer(uint32_t I, hlsl::FrontendResource R)
: ResourceBase(I, R) {}
@@ -294,7 +258,7 @@ void ConstantBuffer::print(raw_ostream &OS) const {
OS << right_justify("cbuffer", 10);
- printComponentType(Kinds::CBuffer, ComponentType::Invalid, 8, OS);
+ printElementType(Kinds::CBuffer, ElementType::Invalid, 8, OS);
printKind(Kinds::CBuffer, 12, OS, /*SRV*/ false, /*HasCounter*/ false);
// Print the binding part.
diff --git a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.h b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.h
index cb39020bc61e..5f8b0badd145 100644
--- a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.h
+++ b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.h
@@ -46,38 +46,13 @@ protected:
bool SRV = false, bool HasCounter = false,
uint32_t SampleCount = 0);
- // The value ordering of this enumeration is part of the DXIL ABI. Elements
- // can only be added to the end, and not removed.
- enum class ComponentType : uint32_t {
- Invalid = 0,
- I1,
- I16,
- U16,
- I32,
- U32,
- I64,
- U64,
- F16,
- F32,
- F64,
- SNormF16,
- UNormF16,
- SNormF32,
- UNormF32,
- SNormF64,
- UNormF64,
- PackedS8x32,
- PackedU8x32,
- LastEntry
- };
-
- static StringRef getComponentTypeName(ComponentType CompType);
- static void printComponentType(Kinds Kind, ComponentType CompType,
- unsigned Alignment, raw_ostream &OS);
+ static StringRef getElementTypeName(hlsl::ElementType CompType);
+ static void printElementType(Kinds Kind, hlsl::ElementType CompType,
+ unsigned Alignment, raw_ostream &OS);
public:
struct ExtendedProperties {
- std::optional<ComponentType> ElementType;
+ std::optional<hlsl::ElementType> ElementType;
// The value ordering of this enumeration is part of the DXIL ABI. Elements
// can only be added to the end, and not removed.
@@ -102,7 +77,9 @@ class UAVResource : public ResourceBase {
void parseSourceType(StringRef S);
public:
- UAVResource(uint32_t I, hlsl::FrontendResource R);
+ UAVResource(uint32_t I, hlsl::FrontendResource R)
+ : ResourceBase(I, R), Shape(R.getResourceKind()), GloballyCoherent(false),
+ HasCounter(false), IsROV(R.getIsROV()), ExtProps{R.getElementType()} {}
MDNode *write() const;
void print(raw_ostream &O) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
index e2d0aeee092e..ebb269c6e6e0 100644
--- a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
@@ -937,8 +937,7 @@ void DXILBitcodeWriter::writeAttributeTable() {
Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3);
SmallVector<uint64_t, 64> Record;
- for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
- AttributeList AL = Attrs[i];
+ for (AttributeList AL : Attrs) {
for (unsigned i : AL.indexes()) {
AttributeSet AS = AL.getAttributes(i);
if (AS.hasAttributes())
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
index 47fbf0a69518..dae316ccb5e9 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -2860,8 +2860,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI,
// For each defined register, if it is a constant, create an instruction
// NewR = const
// and replace all uses of the defined register with NewR.
- for (unsigned i = 0, n = DefRegs.size(); i < n; ++i) {
- unsigned R = DefRegs[i];
+ for (unsigned R : DefRegs) {
const LatticeCell &L = Inputs.get(R);
if (L.isBottom())
continue;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index efb0d405fef2..e08566718d7c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -1337,8 +1337,7 @@ OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
// segments that are used in the output.
unsigned Seg0 = ~0u, Seg1 = ~0u;
- for (int I = 0, E = SegMap.size(); I != E; ++I) {
- unsigned X = SegMap[I];
+ for (unsigned X : SegMap) {
if (X == ~0u)
continue;
if (Seg0 == ~0u)
@@ -2037,8 +2036,7 @@ HvxSelector::completeToPerfect(ArrayRef<uint32_t> Completions, unsigned Width) {
#ifndef NDEBUG
// Check that we have generated a valid completion.
uint32_t OrAll = 0;
- for (unsigned I = 0, E = Comps.size(); I != E; ++I) {
- uint32_t C = Comps[I];
+ for (uint32_t C : Comps) {
assert(isPowerOf2_32(C));
OrAll |= C;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index 51ef72b873a5..7777ae23e8ae 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -1062,7 +1062,7 @@ void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
// Promote immediates.
for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i)))
- if (CI->getType()->getBitWidth() < DestBW)
+ if (CI->getBitWidth() < DestBW)
In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
}
}
@@ -1577,7 +1577,7 @@ Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
static bool hasZeroSignBit(const Value *V) {
if (const auto *CI = dyn_cast<const ConstantInt>(V))
- return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0;
+ return CI->getValue().isNonNegative();
const Instruction *I = dyn_cast<const Instruction>(V);
if (!I)
return false;
@@ -1688,7 +1688,7 @@ void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) {
if (I->getOpcode() != Instruction::Or)
return nullptr;
ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
- if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
+ if (!Msb || !Msb->getValue().isSignMask())
return nullptr;
if (!hasZeroSignBit(I->getOperand(0)))
return nullptr;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index fffd5abd9f8b..0740ac58a338 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -554,7 +554,7 @@ MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT,
// Add qfloat subtarget feature by default to v68 and above
// unless explicitely disabled
if (checkFeature(X, Hexagon::ExtensionHVXV68) &&
- ArchFS.find("-hvx-qfloat", 0) == std::string::npos) {
+ !ArchFS.contains("-hvx-qfloat")) {
llvm::FeatureBitset Features = X->getFeatureBits();
X->setFeatureBits(Features.set(Hexagon::ExtensionHVXQFloat));
}
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
index 14bcef7c7d26..6d8ef1bf96cb 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
@@ -177,6 +177,34 @@ bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
}
}
+static inline std::pair<MCFixupKind, MCFixupKind>
+getRelocPairForSize(unsigned Size) {
+ switch (Size) {
+ default:
+ llvm_unreachable("unsupported fixup size");
+ case 6:
+ return std::make_pair(
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD6),
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB6));
+ case 8:
+ return std::make_pair(
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD8),
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB8));
+ case 16:
+ return std::make_pair(
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD16),
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB16));
+ case 32:
+ return std::make_pair(
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD32),
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB32));
+ case 64:
+ return std::make_pair(
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD64),
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB64));
+ }
+}
+
bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
const MCSubtargetInfo *STI) const {
// We mostly follow binutils' convention here: align to 4-byte boundary with a
@@ -191,6 +219,56 @@ bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
return true;
}
+bool LoongArchAsmBackend::handleAddSubRelocations(const MCAsmLayout &Layout,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ const MCValue &Target,
+ uint64_t &FixedValue) const {
+ std::pair<MCFixupKind, MCFixupKind> FK;
+ uint64_t FixedValueA, FixedValueB;
+ const MCSection &SecA = Target.getSymA()->getSymbol().getSection();
+ const MCSection &SecB = Target.getSymB()->getSymbol().getSection();
+
+ // We need record relocation if SecA != SecB. Usually SecB is same as the
+ // section of Fixup, which will be record the relocation as PCRel. If SecB
+ // is not same as the section of Fixup, it will report error. Just return
+ // false and then this work can be finished by handleFixup.
+ if (&SecA != &SecB)
+ return false;
+
+ // In SecA == SecB case. If the linker relaxation is enabled, we need record
+ // the ADD, SUB relocations. Otherwise the FixedValue has already been
+ // calculated out in evaluateFixup, return true and avoid record relocations.
+ if (!STI.hasFeature(LoongArch::FeatureRelax))
+ return true;
+
+ switch (Fixup.getKind()) {
+ case llvm::FK_Data_1:
+ FK = getRelocPairForSize(8);
+ break;
+ case llvm::FK_Data_2:
+ FK = getRelocPairForSize(16);
+ break;
+ case llvm::FK_Data_4:
+ FK = getRelocPairForSize(32);
+ break;
+ case llvm::FK_Data_8:
+ FK = getRelocPairForSize(64);
+ break;
+ default:
+ llvm_unreachable("unsupported fixup size");
+ }
+ MCValue A = MCValue::get(Target.getSymA(), nullptr, Target.getConstant());
+ MCValue B = MCValue::get(Target.getSymB());
+ auto FA = MCFixup::create(Fixup.getOffset(), nullptr, std::get<0>(FK));
+ auto FB = MCFixup::create(Fixup.getOffset(), nullptr, std::get<1>(FK));
+ auto &Asm = Layout.getAssembler();
+ Asm.getWriter().recordRelocation(Asm, Layout, &F, FA, A, FixedValueA);
+ Asm.getWriter().recordRelocation(Asm, Layout, &F, FB, B, FixedValueB);
+ FixedValue = FixedValueA - FixedValueB;
+ return true;
+}
+
std::unique_ptr<MCObjectTargetWriter>
LoongArchAsmBackend::createObjectTargetWriter() const {
return createLoongArchELFObjectWriter(
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
index d1fbf788e8a8..fef0e84600a7 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
@@ -31,10 +31,15 @@ class LoongArchAsmBackend : public MCAsmBackend {
public:
LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit,
const MCTargetOptions &Options)
- : MCAsmBackend(llvm::endianness::little), STI(STI), OSABI(OSABI),
- Is64Bit(Is64Bit), TargetOptions(Options) {}
+ : MCAsmBackend(llvm::endianness::little,
+ LoongArch::fixup_loongarch_relax),
+ STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), TargetOptions(Options) {}
~LoongArchAsmBackend() override {}
+ bool handleAddSubRelocations(const MCAsmLayout &Layout, const MCFragment &F,
+ const MCFixup &Fixup, const MCValue &Target,
+ uint64_t &FixedValue) const override;
+
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
uint64_t Value, bool IsResolved,
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h
index ba2d6718cdf9..178fa6e5262b 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h
@@ -106,7 +106,9 @@ enum Fixups {
// 20-bit fixup corresponding to %gd_pc_hi20(foo) for instruction pcalau12i.
fixup_loongarch_tls_gd_pc_hi20,
// 20-bit fixup corresponding to %gd_hi20(foo) for instruction lu12i.w.
- fixup_loongarch_tls_gd_hi20
+ fixup_loongarch_tls_gd_hi20,
+ // Generate an R_LARCH_RELAX which indicates the linker may relax here.
+ fixup_loongarch_relax = FirstLiteralRelocationKind + ELF::R_LARCH_RELAX
};
} // end namespace LoongArch
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 22c662a79d87..385b3b74c34d 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10986,7 +10986,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
case Intrinsic::ppc_cfence: {
assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
- assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
SDValue Val = Op.getOperand(ArgStart + 1);
EVT Ty = Val.getValueType();
if (Ty == MVT::i128) {
@@ -10994,9 +10993,11 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
// ordering?
Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
}
+ unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
+ EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
return SDValue(
- DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val),
+ DAG.getMachineNode(Opcode, DL, MVT::Other,
+ DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val),
Op.getOperand(0)),
0);
}
@@ -11827,7 +11828,7 @@ Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
// See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
// and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
- if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
+ if (isa<LoadInst>(Inst))
return Builder.CreateCall(
Intrinsic::getDeclaration(
Builder.GetInsertBlock()->getParent()->getParent(),
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index d0a6cced1b19..aaced58defe6 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1067,9 +1067,7 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(
const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default:
- // This function should only be called for opcodes with the ReMaterializable
- // flag set.
- llvm_unreachable("Unknown rematerializable operation!");
+ // Let base implementaion decide.
break;
case PPC::LI:
case PPC::LI8:
@@ -3179,9 +3177,11 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
// FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
+ case PPC::CFENCE:
case PPC::CFENCE8: {
auto Val = MI.getOperand(0).getReg();
- BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val);
+ unsigned CmpOp = Subtarget.isPPC64() ? PPC::CMPD : PPC::CMPW;
+ BuildMI(MBB, MI, DL, get(CmpOp), PPC::CR7).addReg(Val).addReg(Val);
BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
.addImm(PPC::PRED_NE_MINUS)
.addReg(PPC::CR7)
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 6199785206b2..b1601739fd45 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -5260,6 +5260,9 @@ def HASHCHKP : XForm_XD6_RA5_RB5<31, 690, (outs),
"hashchkp $RB, $addr", IIC_IntGeneral, []>;
}
+let Defs = [CR7], Itinerary = IIC_LdStSync in
+def CFENCE : PPCPostRAExpPseudo<(outs), (ins gprc:$cr), "#CFENCE", []>;
+
// Now both high word and low word are reversed, next
// swap the high word and low word.
def : Pat<(i64 (bitreverse i64:$A)),
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 8f03a7ac41d3..28ec999157c6 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -85,13 +85,12 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
// Merge/Unmerge
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
+ auto &MergeUnmergeActions = getActionDefinitionsBuilder(Op);
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
- auto &MergeUnmergeActions = getActionDefinitionsBuilder(Op);
if (XLen == 32 && ST.hasStdExtD()) {
- LLT IdxZeroTy = G_MERGE_VALUES ? s64 : s32;
- LLT IdxOneTy = G_MERGE_VALUES ? s32 : s64;
- MergeUnmergeActions.legalFor({IdxZeroTy, IdxOneTy});
+ MergeUnmergeActions.legalIf(
+ all(typeIs(BigTyIdx, s64), typeIs(LitTyIdx, s32)));
}
MergeUnmergeActions.widenScalarToNextPow2(LitTyIdx, XLen)
.widenScalarToNextPow2(BigTyIdx, XLen)
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 00b4751905f6..30ed36525e29 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -113,6 +113,15 @@ enum {
UsesVXRMShift = HasRoundModeOpShift + 1,
UsesVXRMMask = 1 << UsesVXRMShift,
+
+ // Indicates whether these instructions can partially overlap between source
+ // registers and destination registers according to the vector spec.
+ // 0 -> not a vector pseudo
+ // 1 -> default value for vector pseudos. not widening or narrowing.
+ // 2 -> narrowing case
+ // 3 -> widening case
+ TargetOverlapConstraintTypeShift = UsesVXRMShift + 1,
+ TargetOverlapConstraintTypeMask = 3ULL << TargetOverlapConstraintTypeShift,
};
enum VLMUL : uint8_t {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td
index 294927aecb94..a66dd135ae5f 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -107,15 +107,15 @@ def HasStdExtZfhmin : Predicate<"Subtarget->hasStdExtZfhmin()">,
def FeatureStdExtZfh
: SubtargetFeature<"zfh", "HasStdExtZfh", "true",
"'Zfh' (Half-Precision Floating-Point)",
- [FeatureStdExtF]>;
+ [FeatureStdExtZfhmin]>;
def HasStdExtZfh : Predicate<"Subtarget->hasStdExtZfh()">,
AssemblerPredicate<(all_of FeatureStdExtZfh),
"'Zfh' (Half-Precision Floating-Point)">;
def NoStdExtZfh : Predicate<"!Subtarget->hasStdExtZfh()">;
def HasStdExtZfhOrZfhmin
- : Predicate<"Subtarget->hasStdExtZfhOrZfhmin()">,
- AssemblerPredicate<(any_of FeatureStdExtZfh, FeatureStdExtZfhmin),
+ : Predicate<"Subtarget->hasStdExtZfhmin()">,
+ AssemblerPredicate<(all_of FeatureStdExtZfhmin),
"'Zfh' (Half-Precision Floating-Point) or "
"'Zfhmin' (Half-Precision Floating-Point Minimal)">;
@@ -146,15 +146,15 @@ def HasStdExtZhinxmin : Predicate<"Subtarget->hasStdExtZhinxmin()">,
def FeatureStdExtZhinx
: SubtargetFeature<"zhinx", "HasStdExtZhinx", "true",
"'Zhinx' (Half Float in Integer)",
- [FeatureStdExtZfinx]>;
+ [FeatureStdExtZhinxmin]>;
def HasStdExtZhinx : Predicate<"Subtarget->hasStdExtZhinx()">,
AssemblerPredicate<(all_of FeatureStdExtZhinx),
"'Zhinx' (Half Float in Integer)">;
def NoStdExtZhinx : Predicate<"!Subtarget->hasStdExtZhinx()">;
def HasStdExtZhinxOrZhinxmin
- : Predicate<"Subtarget->hasStdExtZhinx() || Subtarget->hasStdExtZhinxmin()">,
- AssemblerPredicate<(any_of FeatureStdExtZhinx, FeatureStdExtZhinxmin),
+ : Predicate<"Subtarget->hasStdExtZhinxmin()">,
+ AssemblerPredicate<(all_of FeatureStdExtZhinxmin),
"'Zhinx' (Half Float in Integer) or "
"'Zhinxmin' (Half Float in Integer Minimal)">;
@@ -472,7 +472,7 @@ def HasStdExtZfbfmin : Predicate<"Subtarget->hasStdExtZfbfmin()">,
def FeatureStdExtZvfbfmin
: SubtargetFeature<"experimental-zvfbfmin", "HasStdExtZvfbfmin", "true",
"'Zvbfmin' (Vector BF16 Converts)",
- [FeatureStdExtZve32f, FeatureStdExtZfbfmin]>;
+ [FeatureStdExtZve32f]>;
def HasStdExtZvfbfmin : Predicate<"Subtarget->hasStdExtZvfbfmin()">,
AssemblerPredicate<(all_of FeatureStdExtZvfbfmin),
"'Zvfbfmin' (Vector BF16 Converts)">;
@@ -480,23 +480,23 @@ def HasStdExtZvfbfmin : Predicate<"Subtarget->hasStdExtZvfbfmin()">,
def FeatureStdExtZvfbfwma
: SubtargetFeature<"experimental-zvfbfwma", "HasStdExtZvfbfwma", "true",
"'Zvfbfwma' (Vector BF16 widening mul-add)",
- [FeatureStdExtZvfbfmin]>;
+ [FeatureStdExtZvfbfmin, FeatureStdExtZfbfmin]>;
def HasStdExtZvfbfwma : Predicate<"Subtarget->hasStdExtZvfbfwma()">,
AssemblerPredicate<(all_of FeatureStdExtZvfbfwma),
"'Zvfbfwma' (Vector BF16 widening mul-add)">;
def HasVInstructionsBF16 : Predicate<"Subtarget->hasVInstructionsBF16()">;
-def FeatureStdExtZvfh
- : SubtargetFeature<"zvfh", "HasStdExtZvfh", "true",
- "'Zvfh' (Vector Half-Precision Floating-Point)",
- [FeatureStdExtZve32f, FeatureStdExtZfhmin]>;
-
def FeatureStdExtZvfhmin
: SubtargetFeature<"zvfhmin", "HasStdExtZvfhmin", "true",
"'Zvfhmin' (Vector Half-Precision Floating-Point Minimal)",
[FeatureStdExtZve32f]>;
+def FeatureStdExtZvfh
+ : SubtargetFeature<"zvfh", "HasStdExtZvfh", "true",
+ "'Zvfh' (Vector Half-Precision Floating-Point)",
+ [FeatureStdExtZvfhmin, FeatureStdExtZfhmin]>;
+
def HasVInstructionsF16 : Predicate<"Subtarget->hasVInstructionsF16()">;
def HasVInstructionsF16Minimal : Predicate<"Subtarget->hasVInstructionsF16Minimal()">,
@@ -561,14 +561,14 @@ def HasStdExtZawrs : Predicate<"Subtarget->hasStdExtZawrs()">,
"'Zawrs' (Wait on Reservation Set)">;
def FeatureStdExtZvkb
- : SubtargetFeature<"experimental-zvkb", "HasStdExtZvkb", "true",
+ : SubtargetFeature<"zvkb", "HasStdExtZvkb", "true",
"'Zvkb' (Vector Bit-manipulation used in Cryptography)">;
def HasStdExtZvkb : Predicate<"Subtarget->hasStdExtZvkb()">,
AssemblerPredicate<(all_of FeatureStdExtZvkb),
"'Zvkb' (Vector Bit-manipulation used in Cryptography)">;
def FeatureStdExtZvbb
- : SubtargetFeature<"experimental-zvbb", "HasStdExtZvbb", "true",
+ : SubtargetFeature<"zvbb", "HasStdExtZvbb", "true",
"'Zvbb' (Vector basic bit-manipulation instructions.)",
[FeatureStdExtZvkb]>;
def HasStdExtZvbb : Predicate<"Subtarget->hasStdExtZvbb()">,
@@ -576,35 +576,35 @@ def HasStdExtZvbb : Predicate<"Subtarget->hasStdExtZvbb()">,
"'Zvbb' (Vector basic bit-manipulation instructions.)">;
def FeatureStdExtZvbc
- : SubtargetFeature<"experimental-zvbc", "HasStdExtZvbc", "true",
+ : SubtargetFeature<"zvbc", "HasStdExtZvbc", "true",
"'Zvbc' (Vector Carryless Multiplication)">;
def HasStdExtZvbc : Predicate<"Subtarget->hasStdExtZvbc()">,
AssemblerPredicate<(all_of FeatureStdExtZvbc),
"'Zvbc' (Vector Carryless Multiplication)">;
def FeatureStdExtZvkg
- : SubtargetFeature<"experimental-zvkg", "HasStdExtZvkg", "true",
+ : SubtargetFeature<"zvkg", "HasStdExtZvkg", "true",
"'Zvkg' (Vector GCM instructions for Cryptography)">;
def HasStdExtZvkg : Predicate<"Subtarget->hasStdExtZvkg()">,
AssemblerPredicate<(all_of FeatureStdExtZvkg),
"'Zvkg' (Vector GCM instructions for Cryptography)">;
def FeatureStdExtZvkned
- : SubtargetFeature<"experimental-zvkned", "HasStdExtZvkned", "true",
+ : SubtargetFeature<"zvkned", "HasStdExtZvkned", "true",
"'Zvkned' (Vector AES Encryption & Decryption (Single Round))">;
def HasStdExtZvkned : Predicate<"Subtarget->hasStdExtZvkned()">,
AssemblerPredicate<(all_of FeatureStdExtZvkned),
"'Zvkned' (Vector AES Encryption & Decryption (Single Round))">;
def FeatureStdExtZvknha
- : SubtargetFeature<"experimental-zvknha", "HasStdExtZvknha", "true",
+ : SubtargetFeature<"zvknha", "HasStdExtZvknha", "true",
"'Zvknha' (Vector SHA-2 (SHA-256 only))">;
def HasStdExtZvknha : Predicate<"Subtarget->hasStdExtZvknha()">,
AssemblerPredicate<(all_of FeatureStdExtZvknha),
"'Zvknha' (Vector SHA-2 (SHA-256 only))">;
def FeatureStdExtZvknhb
- : SubtargetFeature<"experimental-zvknhb", "HasStdExtZvknhb", "true",
+ : SubtargetFeature<"zvknhb", "HasStdExtZvknhb", "true",
"'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))",
[FeatureStdExtZve64x]>;
def HasStdExtZvknhb : Predicate<"Subtarget->hasStdExtZvknhb()">,
@@ -616,59 +616,59 @@ def HasStdExtZvknhaOrZvknhb : Predicate<"Subtarget->hasStdExtZvknha() || Subtarg
"'Zvknha' or 'Zvknhb' (Vector SHA-2)">;
def FeatureStdExtZvksed
- : SubtargetFeature<"experimental-zvksed", "HasStdExtZvksed", "true",
+ : SubtargetFeature<"zvksed", "HasStdExtZvksed", "true",
"'Zvksed' (SM4 Block Cipher Instructions)">;
def HasStdExtZvksed : Predicate<"Subtarget->hasStdExtZvksed()">,
AssemblerPredicate<(all_of FeatureStdExtZvksed),
"'Zvksed' (SM4 Block Cipher Instructions)">;
def FeatureStdExtZvksh
- : SubtargetFeature<"experimental-zvksh", "HasStdExtZvksh", "true",
+ : SubtargetFeature<"zvksh", "HasStdExtZvksh", "true",
"'Zvksh' (SM3 Hash Function Instructions)">;
def HasStdExtZvksh : Predicate<"Subtarget->hasStdExtZvksh()">,
AssemblerPredicate<(all_of FeatureStdExtZvksh),
"'Zvksh' (SM3 Hash Function Instructions)">;
def FeatureStdExtZvkt
- : SubtargetFeature<"experimental-zvkt", "HasStdExtZvkt", "true",
+ : SubtargetFeature<"zvkt", "HasStdExtZvkt", "true",
"'Zvkt' (Vector Data-Independent Execution Latency)">;
// Zvk short-hand extensions
def FeatureStdExtZvkn
- : SubtargetFeature<"experimental-zvkn", "HasStdExtZvkn", "true",
+ : SubtargetFeature<"zvkn", "HasStdExtZvkn", "true",
"This extension is shorthand for the following set of "
"other extensions: Zvkned, Zvknhb, Zvkb and Zvkt.",
[FeatureStdExtZvkned, FeatureStdExtZvknhb,
FeatureStdExtZvkb, FeatureStdExtZvkt]>;
def FeatureStdExtZvknc
- : SubtargetFeature<"experimental-zvknc", "HasStdExtZvknc", "true",
+ : SubtargetFeature<"zvknc", "HasStdExtZvknc", "true",
"This extension is shorthand for the following set of "
"other extensions: Zvkn and Zvbc.",
[FeatureStdExtZvkn, FeatureStdExtZvbc]>;
def FeatureStdExtZvkng
- : SubtargetFeature<"experimental-zvkng", "HasStdExtZvkng", "true",
+ : SubtargetFeature<"zvkng", "HasStdExtZvkng", "true",
"This extension is shorthand for the following set of "
"other extensions: Zvkn and Zvkg.",
[FeatureStdExtZvkn, FeatureStdExtZvkg]>;
def FeatureStdExtZvks
- : SubtargetFeature<"experimental-zvks", "HasStdExtZvks", "true",
+ : SubtargetFeature<"zvks", "HasStdExtZvks", "true",
"This extension is shorthand for the following set of "
"other extensions: Zvksed, Zvksh, Zvkb and Zvkt.",
[FeatureStdExtZvksed, FeatureStdExtZvksh,
FeatureStdExtZvkb, FeatureStdExtZvkt]>;
def FeatureStdExtZvksc
- : SubtargetFeature<"experimental-zvksc", "HasStdExtZvksc", "true",
+ : SubtargetFeature<"zvksc", "HasStdExtZvksc", "true",
"This extension is shorthand for the following set of "
"other extensions: Zvks and Zvbc.",
[FeatureStdExtZvks, FeatureStdExtZvbc]>;
def FeatureStdExtZvksg
- : SubtargetFeature<"experimental-zvksg", "HasStdExtZvksg", "true",
+ : SubtargetFeature<"zvksg", "HasStdExtZvksg", "true",
"This extension is shorthand for the following set of "
"other extensions: Zvks and Zvkg.",
[FeatureStdExtZvks, FeatureStdExtZvkg]>;
@@ -959,6 +959,10 @@ def TuneNoOptimizedZeroStrideLoad
"false", "Hasn't optimized (perform fewer memory operations)"
"zero-stride vector load">;
+def Experimental
+ : SubtargetFeature<"experimental", "HasExperimental",
+ "true", "Experimental intrinsics">;
+
// Some vector hardware implementations do not process all VLEN bits in parallel
// and instead split over multiple cycles. DLEN refers to the datapath width
// that can be done in parallel.
@@ -973,9 +977,19 @@ def TuneLUIADDIFusion
def TuneAUIPCADDIFusion
: SubtargetFeature<"auipc-addi-fusion", "HasAUIPCADDIFusion",
"true", "Enable AUIPC+ADDI macrofusion">;
-def TuneShiftedZExtFusion
- : SubtargetFeature<"shifted-zext-fusion", "HasShiftedZExtFusion",
- "true", "Enable SLLI+SRLI to be fused when computing (shifted) zero extension">;
+
+def TuneZExtHFusion
+ : SubtargetFeature<"zexth-fusion", "HasZExtHFusion",
+ "true", "Enable SLLI+SRLI to be fused to zero extension of halfword">;
+
+def TuneZExtWFusion
+ : SubtargetFeature<"zextw-fusion", "HasZExtWFusion",
+ "true", "Enable SLLI+SRLI to be fused to zero extension of word">;
+
+def TuneShiftedZExtWFusion
+ : SubtargetFeature<"shifted-zextw-fusion", "HasShiftedZExtWFusion",
+ "true", "Enable SLLI+SRLI to be fused when computing (shifted) zero extension of word">;
+
def TuneLDADDFusion
: SubtargetFeature<"ld-add-fusion", "HasLDADDFusion",
"true", "Enable LD+ADD macrofusion.">;
@@ -997,12 +1011,8 @@ def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
[TuneNoDefaultUnroll,
TuneShortForwardBranchOpt]>;
-def TuneVeyronFusions : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron",
- "Ventana Veyron-Series processors",
- [TuneLUIADDIFusion,
- TuneAUIPCADDIFusion,
- TuneShiftedZExtFusion,
- TuneLDADDFusion]>;
+def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron",
+ "Ventana Veyron-Series processors">;
// Assume that lock-free native-width atomics are available, even if the target
// and operating system combination would not usually provide them. The user
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 09b3ab96974c..098a320c9153 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -915,8 +915,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
Opc = RISCV::FMV_H_X;
break;
case MVT::f16:
- Opc =
- Subtarget->hasStdExtZhinxOrZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
+ Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
break;
case MVT::f32:
Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 03e994586d0c..c2508a158837 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -122,7 +122,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.is64Bit() && RV64LegalI32)
addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
- if (Subtarget.hasStdExtZfhOrZfhmin())
+ if (Subtarget.hasStdExtZfhmin())
addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
if (Subtarget.hasStdExtZfbfmin())
addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
@@ -130,7 +130,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
if (Subtarget.hasStdExtD())
addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
- if (Subtarget.hasStdExtZhinxOrZhinxmin())
+ if (Subtarget.hasStdExtZhinxmin())
addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
if (Subtarget.hasStdExtZfinx())
addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
@@ -439,7 +439,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
ISD::FROUNDEVEN};
- if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
+ if (Subtarget.hasStdExtZfhminOrZhinxmin())
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
static const unsigned ZfhminZfbfminPromoteOps[] = {
@@ -469,7 +469,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
}
- if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
+ if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
if (Subtarget.hasStdExtZfhOrZhinx()) {
setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
setOperationAction(FPRndMode, MVT::f16,
@@ -675,7 +675,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
- ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE};
+ ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE};
static const unsigned FloatingPointVPOps[] = {
ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
@@ -688,7 +688,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
- ISD::EXPERIMENTAL_VP_REVERSE};
+ ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE};
static const unsigned IntegerVecReduceOps[] = {
ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
@@ -773,6 +773,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
setOperationPromotedToType(
@@ -1147,6 +1148,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_SETCC, ISD::VP_TRUNCATE},
VT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
continue;
}
@@ -1322,7 +1324,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Custom-legalize bitcasts from fixed-length vectors to scalar types.
setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
Custom);
- if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
+ if (Subtarget.hasStdExtZfhminOrZhinxmin())
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
if (Subtarget.hasStdExtFOrZfinx())
setOperationAction(ISD::BITCAST, MVT::f32, Custom);
@@ -1388,7 +1390,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasStdExtZbkb())
setTargetDAGCombine(ISD::BITREVERSE);
- if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
+ if (Subtarget.hasStdExtZfhminOrZhinxmin())
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
if (Subtarget.hasStdExtFOrZfinx())
setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
@@ -2099,7 +2101,7 @@ bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
bool IsLegalVT = false;
if (VT == MVT::f16)
- IsLegalVT = Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin();
+ IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
else if (VT == MVT::f32)
IsLegalVT = Subtarget.hasStdExtFOrZfinx();
else if (VT == MVT::f64)
@@ -2171,7 +2173,7 @@ MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
// Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
// We might still end up using a GPR but that will be decided based on ABI.
if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
- !Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
+ !Subtarget.hasStdExtZfhminOrZhinxmin())
return MVT::f32;
MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
@@ -2188,7 +2190,7 @@ unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context
// Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
// We might still end up using a GPR but that will be decided based on ABI.
if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
- !Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
+ !Subtarget.hasStdExtZfhminOrZhinxmin())
return 1;
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
@@ -5528,7 +5530,7 @@ static unsigned getRISCVVLOp(SDValue Op) {
case ISD::VP_SELECT:
return RISCVISD::VSELECT_VL;
case ISD::VP_MERGE:
- return RISCVISD::VP_MERGE_VL;
+ return RISCVISD::VMERGE_VL;
case ISD::VP_ASHR:
return RISCVISD::SRA_VL;
case ISD::VP_LSHR:
@@ -5576,6 +5578,8 @@ static bool hasMergeOp(unsigned Opcode) {
return true;
if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
return true;
+ if (Opcode == RISCVISD::VMERGE_VL)
+ return true;
return false;
}
@@ -5761,7 +5765,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
EVT Op0VT = Op0.getValueType();
MVT XLenVT = Subtarget.getXLenVT();
if (VT == MVT::f16 && Op0VT == MVT::i16 &&
- Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
+ Subtarget.hasStdExtZfhminOrZhinxmin()) {
SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
return FPConv;
@@ -6637,6 +6641,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
!Subtarget.hasVInstructionsF16()))
return SplitVPOp(Op, DAG);
return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
+ case ISD::EXPERIMENTAL_VP_SPLICE:
+ return lowerVPSpliceExperimental(Op, DAG);
case ISD::EXPERIMENTAL_VP_REVERSE:
return lowerVPReverseExperimental(Op, DAG);
}
@@ -8238,8 +8244,8 @@ static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
AVL);
// TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
// It's fine because vmerge does not care mask policy.
- return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff,
- AVL);
+ return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
+ MaskedOff, AVL);
}
}
@@ -10312,9 +10318,20 @@ SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
for (const auto &OpIdx : enumerate(Op->ops())) {
SDValue V = OpIdx.value();
assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
- // Add dummy merge value before the mask.
- if (HasMergeOp && *ISD::getVPMaskIdx(Op.getOpcode()) == OpIdx.index())
- Ops.push_back(DAG.getUNDEF(ContainerVT));
+ // Add dummy merge value before the mask. Or if there isn't a mask, before
+ // EVL.
+ if (HasMergeOp) {
+ auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
+ if (MaskIdx) {
+ if (*MaskIdx == OpIdx.index())
+ Ops.push_back(DAG.getUNDEF(ContainerVT));
+ } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
+ OpIdx.index()) {
+ // For VP_MERGE, copy the false operand instead of an undef value.
+ assert(Op.getOpcode() == ISD::VP_MERGE);
+ Ops.push_back(Ops.back());
+ }
+ }
// Pass through operands which aren't fixed-length vectors.
if (!V.getValueType().isFixedLengthVector()) {
Ops.push_back(V);
@@ -10583,6 +10600,87 @@ SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
}
SDValue
+RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+
+ SDValue Op1 = Op.getOperand(0);
+ SDValue Op2 = Op.getOperand(1);
+ SDValue Offset = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+ SDValue EVL1 = Op.getOperand(4);
+ SDValue EVL2 = Op.getOperand(5);
+
+ const MVT XLenVT = Subtarget.getXLenVT();
+ MVT VT = Op.getSimpleValueType();
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
+ Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+
+ bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
+ if (IsMaskVector) {
+ ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
+
+ // Expand input operands
+ SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT),
+ DAG.getConstant(1, DL, XLenVT), EVL1);
+ SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT),
+ DAG.getConstant(0, DL, XLenVT), EVL1);
+ Op1 = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Op1, SplatOneOp1,
+ SplatZeroOp1, EVL1);
+
+ SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT),
+ DAG.getConstant(1, DL, XLenVT), EVL2);
+ SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT),
+ DAG.getConstant(0, DL, XLenVT), EVL2);
+ Op2 = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Op2, SplatOneOp2,
+ SplatZeroOp2, EVL2);
+ }
+
+ int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
+ SDValue DownOffset, UpOffset;
+ if (ImmValue >= 0) {
+ // The operand is a TargetConstant, we need to rebuild it as a regular
+ // constant.
+ DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
+ UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
+ } else {
+ // The operand is a TargetConstant, we need to rebuild it as a regular
+ // constant rather than negating the original operand.
+ UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
+ DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
+ }
+
+ SDValue SlideDown =
+ getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
+ Op1, DownOffset, Mask, UpOffset);
+ SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
+ UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
+
+ if (IsMaskVector) {
+ // Truncate Result back to a mask vector (Result has same EVL as Op2)
+ Result = DAG.getNode(
+ RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
+ {Result, DAG.getConstant(0, DL, ContainerVT),
+ DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
+ Mask, EVL2});
+ }
+
+ if (!VT.isFixedLengthVector())
+ return Result;
+ return convertFromScalableVector(VT, Result, DAG, Subtarget);
+}
+
+SDValue
RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@@ -11527,11 +11625,11 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
EVT Op0VT = Op0.getValueType();
MVT XLenVT = Subtarget.getXLenVT();
if (VT == MVT::i16 && Op0VT == MVT::f16 &&
- Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
+ Subtarget.hasStdExtZfhminOrZhinxmin()) {
SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
} else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
- Subtarget.hasStdExtZfbfmin()) {
+ Subtarget.hasStdExtZfbfmin()) {
SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
} else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
@@ -13493,6 +13591,7 @@ static SDValue performMemPairCombine(SDNode *N,
// (fp_to_int (ffloor X)) -> fcvt X, rdn
// (fp_to_int (fceil X)) -> fcvt X, rup
// (fp_to_int (fround X)) -> fcvt X, rmm
+// (fp_to_int (frint X)) -> fcvt X
static SDValue performFP_TO_INTCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
@@ -13516,10 +13615,7 @@ static SDValue performFP_TO_INTCombine(SDNode *N,
RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
// If the result is invalid, we didn't find a foldable instruction.
- // If the result is dynamic, then we found an frint which we don't yet
- // support. It will cause 7 to be written to the FRM CSR for vector.
- // FIXME: We could support this by using VFCVT_X_F_VL/VFCVT_XU_F_VL below.
- if (FRM == RISCVFPRndMode::Invalid || FRM == RISCVFPRndMode::DYN)
+ if (FRM == RISCVFPRndMode::Invalid)
return SDValue();
SDLoc DL(N);
@@ -13558,6 +13654,10 @@ static SDValue performFP_TO_INTCombine(SDNode *N,
unsigned Opc =
IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
+ } else if (FRM == RISCVFPRndMode::DYN) {
+ unsigned Opc =
+ IsSigned ? RISCVISD::VFCVT_X_F_VL : RISCVISD::VFCVT_XU_F_VL;
+ FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
} else {
unsigned Opc =
IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
@@ -13594,6 +13694,7 @@ static SDValue performFP_TO_INTCombine(SDNode *N,
// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
+// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
static SDValue performFP_TO_INT_SATCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
@@ -15998,13 +16099,26 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
// We can't do anything for most intrinsics.
break;
case Intrinsic::riscv_vsetvli:
- case Intrinsic::riscv_vsetvlimax:
- // Assume that VL output is <= 65536.
- // TODO: Take SEW and LMUL into account.
- if (BitWidth > 17)
- Known.Zero.setBitsFrom(17);
+ case Intrinsic::riscv_vsetvlimax: {
+ bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
+ unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
+ RISCVII::VLMUL VLMUL =
+ static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
+ unsigned SEW = RISCVVType::decodeVSEW(VSEW);
+ auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
+ uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
+ MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
+
+ // Result of vsetvli must be not larger than AVL.
+ if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
+ MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
+
+ unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
+ if (BitWidth > KnownZeroFirstBit)
+ Known.Zero.setBitsFrom(KnownZeroFirstBit);
break;
}
+ }
break;
}
}
@@ -18570,7 +18684,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VNSRL_VL)
NODE_NAME_CASE(SETCC_VL)
NODE_NAME_CASE(VSELECT_VL)
- NODE_NAME_CASE(VP_MERGE_VL)
+ NODE_NAME_CASE(VMERGE_VL)
NODE_NAME_CASE(VMAND_VL)
NODE_NAME_CASE(VMOR_VL)
NODE_NAME_CASE(VMXOR_VL)
@@ -18632,7 +18746,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// TODO: Support fixed vectors up to XLen for P extension?
if (VT.isVector())
break;
- if (VT == MVT::f16 && Subtarget.hasStdExtZhinxOrZhinxmin())
+ if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
return std::make_pair(0U, &RISCV::GPRF16RegClass);
if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
return std::make_pair(0U, &RISCV::GPRF32RegClass);
@@ -18640,7 +18754,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &RISCV::GPRPF64RegClass);
return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
case 'f':
- if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16)
+ if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
return std::make_pair(0U, &RISCV::FPR16RegClass);
if (Subtarget.hasStdExtF() && VT == MVT::f32)
return std::make_pair(0U, &RISCV::FPR32RegClass);
@@ -18753,7 +18867,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
if (VT == MVT::f32 || VT == MVT::Other)
return std::make_pair(FReg, &RISCV::FPR32RegClass);
- if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16) {
+ if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
unsigned RegNo = FReg - RISCV::F0_F;
unsigned HReg = RISCV::F0_H + RegNo;
return std::make_pair(HReg, &RISCV::FPR16RegClass);
@@ -19100,7 +19214,7 @@ bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
switch (FPVT.getSimpleVT().SimpleTy) {
case MVT::f16:
- return Subtarget.hasStdExtZfhOrZfhmin();
+ return Subtarget.hasStdExtZfhmin();
case MVT::f32:
return Subtarget.hasStdExtF();
case MVT::f64:
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 41a2dc5771c8..58ed611efc83 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -332,10 +332,8 @@ enum NodeType : unsigned {
// Vector select with an additional VL operand. This operation is unmasked.
VSELECT_VL,
- // Vector select with operand #2 (the value when the condition is false) tied
- // to the destination and an additional VL operand. This operation is
- // unmasked.
- VP_MERGE_VL,
+ // General vmerge node with mask, true, false, passthru, and vl operands.
+ VMERGE_VL,
// Mask binary operators.
VMAND_VL,
@@ -910,6 +908,7 @@ private:
SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVPSpliceExperimental(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index e80ba26800a1..f56f49ae2457 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -212,6 +212,15 @@ class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr,
// to the correct CSR.
bit UsesVXRM = 0;
let TSFlags{20} = UsesVXRM;
+
+ // Indicates whther these instructions can partially overlap between source
+ // registers and destination registers according to the vector spec.
+ // 0 -> not a vector pseudo
+ // 1 -> default value for vector pseudos. not widening or narrowing.
+ // 2 -> narrowing case
+ // 3 -> widening case
+ bits<2> TargetOverlapConstraintType = 0;
+ let TSFlags{22-21} = TargetOverlapConstraintType;
}
class RVInst<dag outs, dag ins, string opcodestr, string argstr,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 5e06422cf9ad..488ffa73f4e4 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -529,14 +529,6 @@ class RISCVVPseudo {
// SEW = 0 is used to denote that the Pseudo is not SEW specific (or unknown).
bits<8> SEW = 0;
bit NeedBeInPseudoTable = 1;
- // TargetOverlapConstraintType indicates that these instructions can
- // overlap between source operands and destination operands.
- // 1 -> default value, remain current constraint
- // 2 -> narrow case
- // 3 -> widen case
- // TODO: Add TargetOverlapConstraintType into PseudosTable for further
- // query.
- bits<2> TargetOverlapConstraintType = 1;
}
// The actual table.
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index dc6b57fad321..33bdc3366aa3 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -344,7 +344,14 @@ def SDT_RISCVSelect_VL : SDTypeProfile<1, 4, [
]>;
def riscv_vselect_vl : SDNode<"RISCVISD::VSELECT_VL", SDT_RISCVSelect_VL>;
-def riscv_vp_merge_vl : SDNode<"RISCVISD::VP_MERGE_VL", SDT_RISCVSelect_VL>;
+
+def SDT_RISCVVMERGE_VL : SDTypeProfile<1, 5, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>, SDTCVecEltisVT<1, i1>,
+ SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisSameAs<0, 4>,
+ SDTCisVT<5, XLenVT>
+]>;
+
+def riscv_vmerge_vl : SDNode<"RISCVISD::VMERGE_VL", SDT_RISCVVMERGE_VL>;
def SDT_RISCVVMSETCLR_VL : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, i1>,
SDTCisVT<1, XLenVT>]>;
@@ -675,14 +682,14 @@ multiclass VPatTiedBinaryNoMaskVL_V<SDNode vop,
op2_reg_class:$rs2,
GPR:$vl, sew, TAIL_AGNOSTIC)>;
// Tail undisturbed
- def : Pat<(riscv_vp_merge_vl true_mask,
+ def : Pat<(riscv_vmerge_vl true_mask,
(result_type (vop
result_reg_class:$rs1,
(op2_type op2_reg_class:$rs2),
srcvalue,
true_mask,
VLOpFrag)),
- result_reg_class:$rs1, VLOpFrag),
+ result_reg_class:$rs1, result_reg_class:$rs1, VLOpFrag),
(!cast<Instruction>(instruction_name#"_"#suffix#"_"# vlmul.MX#"_TIED")
result_reg_class:$rs1,
op2_reg_class:$rs2,
@@ -712,14 +719,14 @@ multiclass VPatTiedBinaryNoMaskVL_V_RM<SDNode vop,
FRM_DYN,
GPR:$vl, sew, TAIL_AGNOSTIC)>;
// Tail undisturbed
- def : Pat<(riscv_vp_merge_vl true_mask,
+ def : Pat<(riscv_vmerge_vl true_mask,
(result_type (vop
result_reg_class:$rs1,
(op2_type op2_reg_class:$rs2),
srcvalue,
true_mask,
VLOpFrag)),
- result_reg_class:$rs1, VLOpFrag),
+ result_reg_class:$rs1, result_reg_class:$rs1, VLOpFrag),
(!cast<Instruction>(instruction_name#"_"#suffix#"_"# vlmul.MX#"_TIED")
result_reg_class:$rs1,
op2_reg_class:$rs2,
@@ -1697,21 +1704,21 @@ multiclass VPatMultiplyAccVL_VV_VX<PatFrag op, string instruction_name> {
foreach vti = AllIntegerVectors in {
defvar suffix = vti.LMul.MX;
let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : Pat<(riscv_vp_merge_vl (vti.Mask V0),
+ def : Pat<(riscv_vmerge_vl (vti.Mask V0),
(vti.Vector (op vti.RegClass:$rd,
(riscv_mul_vl_oneuse vti.RegClass:$rs1, vti.RegClass:$rs2,
srcvalue, (vti.Mask true_mask), VLOpFrag),
srcvalue, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, VLOpFrag),
+ vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag),
(!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
(vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>;
- def : Pat<(riscv_vp_merge_vl (vti.Mask V0),
+ def : Pat<(riscv_vmerge_vl (vti.Mask V0),
(vti.Vector (op vti.RegClass:$rd,
(riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rs2,
srcvalue, (vti.Mask true_mask), VLOpFrag),
srcvalue, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, VLOpFrag),
+ vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag),
(!cast<Instruction>(instruction_name#"_VX_"# suffix #"_MASK")
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
(vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>;
@@ -1840,17 +1847,17 @@ multiclass VPatFPMulAccVL_VV_VF<PatFrag vop, string instruction_name> {
foreach vti = AllFloatVectors in {
defvar suffix = vti.LMul.MX;
let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : Pat<(riscv_vp_merge_vl (vti.Mask V0),
+ def : Pat<(riscv_vmerge_vl (vti.Mask V0),
(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2,
vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, VLOpFrag),
+ vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag),
(!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
(vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>;
- def : Pat<(riscv_vp_merge_vl (vti.Mask V0),
+ def : Pat<(riscv_vmerge_vl (vti.Mask V0),
(vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2,
vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, VLOpFrag),
+ vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag),
(!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
(vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>;
@@ -1876,10 +1883,10 @@ multiclass VPatFPMulAccVL_VV_VF_RM<PatFrag vop, string instruction_name> {
foreach vti = AllFloatVectors in {
defvar suffix = vti.LMul.MX;
let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : Pat<(riscv_vp_merge_vl (vti.Mask V0),
+ def : Pat<(riscv_vmerge_vl (vti.Mask V0),
(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2,
vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, VLOpFrag),
+ vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag),
(!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
(vti.Mask V0),
@@ -1887,10 +1894,10 @@ multiclass VPatFPMulAccVL_VV_VF_RM<PatFrag vop, string instruction_name> {
// RISCVInsertReadWriteCSR
FRM_DYN,
GPR:$vl, vti.Log2SEW, TU_MU)>;
- def : Pat<(riscv_vp_merge_vl (vti.Mask V0),
+ def : Pat<(riscv_vmerge_vl (vti.Mask V0),
(vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2,
vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, VLOpFrag),
+ vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag),
(!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
(vti.Mask V0),
@@ -2273,29 +2280,32 @@ foreach vti = AllIntegerVectors in {
(vti.Vector (IMPLICIT_DEF)),
vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0),
- vti.RegClass:$rs1,
- vti.RegClass:$rs2,
- VLOpFrag)),
+ def : Pat<(vti.Vector (riscv_vmerge_vl (vti.Mask V0),
+ vti.RegClass:$rs1,
+ vti.RegClass:$rs2,
+ vti.RegClass:$merge,
+ VLOpFrag)),
(!cast<Instruction>("PseudoVMERGE_VVM_"#vti.LMul.MX)
- vti.RegClass:$rs2, vti.RegClass:$rs2, vti.RegClass:$rs1,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
+ vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0),
- (SplatPat XLenVT:$rs1),
- vti.RegClass:$rs2,
- VLOpFrag)),
+ def : Pat<(vti.Vector (riscv_vmerge_vl (vti.Mask V0),
+ (SplatPat XLenVT:$rs1),
+ vti.RegClass:$rs2,
+ vti.RegClass:$merge,
+ VLOpFrag)),
(!cast<Instruction>("PseudoVMERGE_VXM_"#vti.LMul.MX)
- vti.RegClass:$rs2, vti.RegClass:$rs2, GPR:$rs1,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
-
- def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0),
- (SplatPat_simm5 simm5:$rs1),
- vti.RegClass:$rs2,
- VLOpFrag)),
+ vti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
+
+ def : Pat<(vti.Vector (riscv_vmerge_vl (vti.Mask V0),
+ (SplatPat_simm5 simm5:$rs1),
+ vti.RegClass:$rs2,
+ vti.RegClass:$merge,
+ VLOpFrag)),
(!cast<Instruction>("PseudoVMERGE_VIM_"#vti.LMul.MX)
- vti.RegClass:$rs2, vti.RegClass:$rs2, simm5:$rs1,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
+ vti.RegClass:$merge, vti.RegClass:$rs2, simm5:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
}
}
@@ -2493,21 +2503,23 @@ foreach fvti = AllFloatVectors in {
(fvti.Vector (IMPLICIT_DEF)),
fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
- def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0),
- fvti.RegClass:$rs1,
- fvti.RegClass:$rs2,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0),
- GPR:$vl, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0),
- (SplatFPOp (fvti.Scalar fpimm0)),
- fvti.RegClass:$rs2,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, fvti.RegClass:$rs2, 0, (fvti.Mask V0),
- GPR:$vl, fvti.Log2SEW)>;
+ def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0),
+ fvti.RegClass:$rs1,
+ fvti.RegClass:$rs2,
+ fvti.RegClass:$merge,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
+ fvti.RegClass:$merge, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0),
+ GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0),
+ (SplatFPOp (fvti.Scalar fpimm0)),
+ fvti.RegClass:$rs2,
+ fvti.RegClass:$merge,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
+ fvti.RegClass:$merge, fvti.RegClass:$rs2, 0, (fvti.Mask V0),
+ GPR:$vl, fvti.Log2SEW)>;
}
let Predicates = GetVTypePredicates<fvti>.Predicates in {
@@ -2521,12 +2533,13 @@ foreach fvti = AllFloatVectors in {
(fvti.Scalar fvti.ScalarRegClass:$rs1),
(fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
- def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0),
- (SplatFPOp fvti.ScalarRegClass:$rs1),
- fvti.RegClass:$rs2,
- VLOpFrag)),
+ def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0),
+ (SplatFPOp fvti.ScalarRegClass:$rs1),
+ fvti.RegClass:$rs2,
+ fvti.RegClass:$merge,
+ VLOpFrag)),
(!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, fvti.RegClass:$rs2,
+ fvti.RegClass:$merge, fvti.RegClass:$rs2,
(fvti.Scalar fvti.ScalarRegClass:$rs1),
(fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index fa618b437ce7..0b1d5b664df9 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -351,20 +351,22 @@ multiclass VPseudoSiFiveVMACC<string mx, VReg vd_type, VReg vs2_type,
multiclass VPseudoSiFiveVQMACC<string Constraint = ""> {
foreach m = MxListVF8 in
+ let VLMul = m.value in
defm NAME : VPseudoSiFiveVMACC<m.MX, m.vrclass, m.vrclass, Constraint>;
}
multiclass VPseudoSiFiveVFWMACC<string Constraint = ""> {
foreach m = MxListFW in
+ let VLMul = m.value in
defm NAME : VPseudoSiFiveVMACC<m.MX, m.wvrclass, m.vrclass, Constraint>;
}
multiclass VPseudoSiFiveVFNRCLIP<string Constraint = "@earlyclobber $rd"> {
- foreach m = MxListVF4 in
+ foreach i = [0, 1, 2, 3, 4] in
let hasSideEffects = 0 in
- defm "Pseudo" # NAME : VPseudoBinaryRoundingMode<!if(!eq(m.vrclass, VRM8),
- VRM2, VR),
- m.vrclass, FPR32, m,
+ defm "Pseudo" # NAME : VPseudoBinaryRoundingMode<MxListW[i].vrclass,
+ MxListVF4[i].vrclass,
+ FPR32, MxListW[i],
Constraint, /*sew*/0,
UsesVXRM=0>;
}
@@ -592,7 +594,7 @@ multiclass VPatVFNRCLIP<string intrinsic, string instruction> {
defvar Vti = pair.Vti;
defvar Wti = pair.Wti;
defm : VPatBinaryRoundingMode<"int_riscv_sf_" # intrinsic,
- "Pseudo" # instruction # "_" # Wti.LMul.MX,
+ "Pseudo" # instruction # "_" # Vti.LMul.MX,
Vti.Vector, Wti.Vector, Wti.Scalar, Vti.Mask,
Vti.Log2SEW, Vti.RegClass,
Wti.RegClass, Wti.ScalarRegClass>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
index 1ffa78a28d09..7c21fb4bcc1e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
// This file describes the RISC-V instructions from the standard 'Zvk',
-// Vector Cryptography Instructions extension, version 1.0.0-rc1.
+// Vector Cryptography Instructions extension, version Release 1.0.0.
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
index 02ea5270823d..f948f05b22f7 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
@@ -58,18 +58,66 @@ static bool isLDADD(const MachineInstr *FirstMI, const MachineInstr &SecondMI) {
return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
}
-// Fuse these patterns:
-//
-// slli rd, rs1, 32
-// srli rd, rd, x
-// where 0 <= x <= 32
-//
-// and
-//
+// Fuse zero extension of halfword:
// slli rd, rs1, 48
+// srli rd, rd, 48
+static bool isZExtH(const MachineInstr *FirstMI, const MachineInstr &SecondMI) {
+ if (SecondMI.getOpcode() != RISCV::SRLI)
+ return false;
+
+ if (!SecondMI.getOperand(2).isImm())
+ return false;
+
+ if (SecondMI.getOperand(2).getImm() != 48)
+ return false;
+
+ // Given SecondMI, when FirstMI is unspecified, we must return
+ // if SecondMI may be part of a fused pair at all.
+ if (!FirstMI)
+ return true;
+
+ if (FirstMI->getOpcode() != RISCV::SLLI)
+ return false;
+
+ if (FirstMI->getOperand(2).getImm() != 48)
+ return false;
+
+ return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
+}
+
+// Fuse zero extension of word:
+// slli rd, rs1, 32
+// srli rd, rd, 32
+static bool isZExtW(const MachineInstr *FirstMI, const MachineInstr &SecondMI) {
+ if (SecondMI.getOpcode() != RISCV::SRLI)
+ return false;
+
+ if (!SecondMI.getOperand(2).isImm())
+ return false;
+
+ if (SecondMI.getOperand(2).getImm() != 32)
+ return false;
+
+ // Given SecondMI, when FirstMI is unspecified, we must return
+ // if SecondMI may be part of a fused pair at all.
+ if (!FirstMI)
+ return true;
+
+ if (FirstMI->getOpcode() != RISCV::SLLI)
+ return false;
+
+ if (FirstMI->getOperand(2).getImm() != 32)
+ return false;
+
+ return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
+}
+
+// Fuse shifted zero extension of word:
+// slli rd, rs1, 32
// srli rd, rd, x
-static bool isShiftedZExt(const MachineInstr *FirstMI,
- const MachineInstr &SecondMI) {
+// where 0 <= x < 32
+static bool isShiftedZExtW(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
if (SecondMI.getOpcode() != RISCV::SRLI)
return false;
@@ -77,8 +125,7 @@ static bool isShiftedZExt(const MachineInstr *FirstMI,
return false;
unsigned SRLIImm = SecondMI.getOperand(2).getImm();
- bool IsShiftBy48 = SRLIImm == 48;
- if (SRLIImm > 32 && !IsShiftBy48)
+ if (SRLIImm >= 32)
return false;
// Given SecondMI, when FirstMI is unspecified, we must return
@@ -89,8 +136,7 @@ static bool isShiftedZExt(const MachineInstr *FirstMI,
if (FirstMI->getOpcode() != RISCV::SLLI)
return false;
- unsigned SLLIImm = FirstMI->getOperand(2).getImm();
- if (IsShiftBy48 ? (SLLIImm != 48) : (SLLIImm != 32))
+ if (FirstMI->getOperand(2).getImm() != 32)
return false;
return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
@@ -144,7 +190,13 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
if (ST.hasAUIPCADDIFusion() && isAUIPCADDI(FirstMI, SecondMI))
return true;
- if (ST.hasShiftedZExtFusion() && isShiftedZExt(FirstMI, SecondMI))
+ if (ST.hasZExtHFusion() && isZExtH(FirstMI, SecondMI))
+ return true;
+
+ if (ST.hasZExtWFusion() && isZExtW(FirstMI, SecondMI))
+ return true;
+
+ if (ST.hasShiftedZExtWFusion() && isShiftedZExtW(FirstMI, SecondMI))
return true;
if (ST.hasLDADDFusion() && isLDADD(FirstMI, SecondMI))
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td
index 58989fd716fa..6362a3bef6f2 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -216,6 +216,25 @@ def SIFIVE_X280 : RISCVProcessorModel<"sifive-x280", SiFive7Model,
[TuneSiFive7,
TuneDLenFactor2]>;
+def SIFIVE_P450 : RISCVProcessorModel<"sifive-p450", NoSchedModel,
+ [Feature64Bit,
+ FeatureStdExtZifencei,
+ FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtF,
+ FeatureStdExtD,
+ FeatureStdExtC,
+ FeatureStdExtZicbop,
+ FeatureStdExtZicbom,
+ FeatureStdExtZicboz,
+ FeatureStdExtZihintntl,
+ FeatureStdExtZihintpause,
+ FeatureStdExtZihpm,
+ FeatureStdExtZba,
+ FeatureStdExtZbb,
+ FeatureStdExtZbs,
+ FeatureStdExtZfhmin]>;
+
def SYNTACORE_SCR1_BASE : RISCVProcessorModel<"syntacore-scr1-base",
SyntacoreSCR1Model,
[Feature32Bit,
@@ -254,7 +273,13 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
FeatureStdExtZicbop,
FeatureStdExtZicboz,
FeatureVendorXVentanaCondOps],
- [TuneVeyronFusions]>;
+ [TuneVentanaVeyron,
+ TuneLUIADDIFusion,
+ TuneAUIPCADDIFusion,
+ TuneZExtHFusion,
+ TuneZExtWFusion,
+ TuneShiftedZExtWFusion,
+ TuneLDADDFusion]>;
def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu",
NoSchedModel,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 45783d482f3b..f531ab2fac8f 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -198,6 +198,7 @@ def SiFive7Model : SchedMachineModel {
let LoadLatency = 3;
let MispredictPenalty = 3;
let CompleteModel = 0;
+ let EnableIntervals = true;
let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne,
HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 23d56cfa6e4e..26320b05d9be 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -143,16 +143,12 @@ public:
bool hasStdExtZvl() const { return ZvlLen != 0; }
bool hasStdExtFOrZfinx() const { return HasStdExtF || HasStdExtZfinx; }
bool hasStdExtDOrZdinx() const { return HasStdExtD || HasStdExtZdinx; }
- bool hasStdExtZfhOrZfhmin() const { return HasStdExtZfh || HasStdExtZfhmin; }
bool hasStdExtZfhOrZhinx() const { return HasStdExtZfh || HasStdExtZhinx; }
- bool hasStdExtZhinxOrZhinxmin() const {
- return HasStdExtZhinx || HasStdExtZhinxmin;
- }
- bool hasStdExtZfhOrZfhminOrZhinxOrZhinxmin() const {
- return hasStdExtZfhOrZfhmin() || hasStdExtZhinxOrZhinxmin();
+ bool hasStdExtZfhminOrZhinxmin() const {
+ return HasStdExtZfhmin || HasStdExtZhinxmin;
}
bool hasHalfFPLoadStoreMove() const {
- return hasStdExtZfhOrZfhmin() || HasStdExtZfbfmin;
+ return HasStdExtZfhmin || HasStdExtZfbfmin;
}
bool is64Bit() const { return IsRV64; }
MVT getXLenVT() const {
@@ -194,16 +190,14 @@ public:
}
bool hasMacroFusion() const {
- return hasLUIADDIFusion() || hasAUIPCADDIFusion() ||
- hasShiftedZExtFusion() || hasLDADDFusion();
+ return hasLUIADDIFusion() || hasAUIPCADDIFusion() || hasZExtHFusion() ||
+ hasZExtWFusion() || hasShiftedZExtWFusion() || hasLDADDFusion();
}
// Vector codegen related methods.
bool hasVInstructions() const { return HasStdExtZve32x; }
bool hasVInstructionsI64() const { return HasStdExtZve64x; }
- bool hasVInstructionsF16Minimal() const {
- return HasStdExtZvfhmin || HasStdExtZvfh;
- }
+ bool hasVInstructionsF16Minimal() const { return HasStdExtZvfhmin; }
bool hasVInstructionsF16() const { return HasStdExtZvfh; }
bool hasVInstructionsBF16() const { return HasStdExtZvfbfmin; }
bool hasVInstructionsF32() const { return HasStdExtZve32f; }
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index efc8350064a6..96ecc771863e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -334,7 +334,7 @@ public:
return RISCVRegisterClass::GPRRC;
Type *ScalarTy = Ty->getScalarType();
- if ((ScalarTy->isHalfTy() && ST->hasStdExtZfhOrZfhmin()) ||
+ if ((ScalarTy->isHalfTy() && ST->hasStdExtZfhmin()) ||
(ScalarTy->isFloatTy() && ST->hasStdExtF()) ||
(ScalarTy->isDoubleTy() && ST->hasStdExtD())) {
return RISCVRegisterClass::FPRRC;
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 5ac45079bd00..c85bd27d256b 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -1617,7 +1617,7 @@ static bool buildEnqueueKernel(const SPIRV::IncomingCall *Call,
SPIRVGlobalRegistry *GR) {
MachineRegisterInfo *MRI = MIRBuilder.getMRI();
const DataLayout &DL = MIRBuilder.getDataLayout();
- bool HasEvents = Call->Builtin->Name.find("events") != StringRef::npos;
+ bool HasEvents = Call->Builtin->Name.contains("events");
const SPIRVType *Int32Ty = GR->getOrCreateSPIRVIntegerType(32, MIRBuilder);
// Make vararg instructions before OpEnqueueKernel.
@@ -2098,7 +2098,7 @@ parseBuiltinTypeNameToTargetExtType(std::string TypeName,
// Parameterized SPIR-V builtins names follow this format:
// e.g. %spirv.Image._void_1_0_0_0_0_0_0, %spirv.Pipe._0
- if (NameWithParameters.find('_') == std::string::npos)
+ if (!NameWithParameters.contains('_'))
return TargetExtType::get(MIRBuilder.getContext(), NameWithParameters);
SmallVector<StringRef> Parameters;
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index ec62a819b00e..660c574daf38 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -410,10 +410,10 @@ void SPIRVEmitIntrinsics::insertAssignPtrTypeIntrs(Instruction *I) {
Constant *EltTyConst;
unsigned AddressSpace = 0;
if (auto *AI = dyn_cast<AllocaInst>(I)) {
- EltTyConst = Constant::getNullValue(AI->getAllocatedType());
+ EltTyConst = UndefValue::get(AI->getAllocatedType());
AddressSpace = AI->getAddressSpace();
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
- EltTyConst = Constant::getNullValue(GEP->getResultElementType());
+ EltTyConst = UndefValue::get(GEP->getResultElementType());
AddressSpace = GEP->getPointerAddressSpace();
} else {
llvm_unreachable("Unexpected instruction!");
@@ -436,7 +436,7 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I) {
TypeToAssign = t->second->getType();
}
}
- Constant *Const = Constant::getNullValue(TypeToAssign);
+ Constant *Const = UndefValue::get(TypeToAssign);
buildIntrWithMD(Intrinsic::spv_assign_type, {Ty}, Const, I, {});
}
for (const auto &Op : I->operands()) {
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index b8a6784ff3c6..3a34a0bfae46 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -645,7 +645,7 @@ SPIRVType *SPIRVGlobalRegistry::findSPIRVType(
Register Reg = DT.find(Ty, &MIRBuilder.getMF());
if (Reg.isValid())
return getSPIRVTypeForVReg(Reg);
- if (ForwardPointerTypes.find(Ty) != ForwardPointerTypes.end())
+ if (ForwardPointerTypes.contains(Ty))
return ForwardPointerTypes[Ty];
return restOfCreateSPIRVType(Ty, MIRBuilder, AccQual, EmitIR);
}
@@ -712,14 +712,14 @@ SPIRVType *SPIRVGlobalRegistry::createSPIRVType(
// Null pointer means we have a loop in type definitions, make and
// return corresponding OpTypeForwardPointer.
if (SpvElementType == nullptr) {
- if (ForwardPointerTypes.find(Ty) == ForwardPointerTypes.end())
+ if (!ForwardPointerTypes.contains(Ty))
ForwardPointerTypes[PType] = getOpTypeForwardPointer(SC, MIRBuilder);
return ForwardPointerTypes[PType];
}
Register Reg(0);
// If we have forward pointer associated with this type, use its register
// operand to create OpTypePointer.
- if (ForwardPointerTypes.find(PType) != ForwardPointerTypes.end())
+ if (ForwardPointerTypes.contains(PType))
Reg = getSPIRVTypeID(ForwardPointerTypes[PType]);
return getOpTypePointer(SC, SpvElementType, MIRBuilder, Reg);
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index 779036016560..2a830535a2aa 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -391,7 +391,7 @@ void SPIRVModuleAnalysis::numberRegistersGlobally(const Module &M) {
if (MI.getOpcode() != SPIRV::OpExtInst)
continue;
auto Set = MI.getOperand(2).getImm();
- if (MAI.ExtInstSetMap.find(Set) == MAI.ExtInstSetMap.end())
+ if (!MAI.ExtInstSetMap.contains(Set))
MAI.ExtInstSetMap[Set] = Register::index2VirtReg(MAI.getNextID());
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
index 5124181b49e2..d0b8027edd42 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
@@ -189,7 +189,7 @@ struct ModuleAnalysisInfo {
}
unsigned getNextID() { return MaxID++; }
bool hasMBBRegister(const MachineBasicBlock &MBB) {
- return BBNumToRegMap.find(MBB.getNumber()) != BBNumToRegMap.end();
+ return BBNumToRegMap.contains(MBB.getNumber());
}
// Convert MBB's number to corresponding ID register.
Register getOrCreateMBBRegister(const MachineBasicBlock &MBB) {
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
index f4076be2a7b7..1bfce70fedc0 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
@@ -83,7 +83,7 @@ static void addConstantsToTrack(MachineFunction &MF, SPIRVGlobalRegistry *GR) {
}
for (MachineInstr *MI : ToErase) {
Register Reg = MI->getOperand(2).getReg();
- if (RegsAlreadyAddedToDT.find(MI) != RegsAlreadyAddedToDT.end())
+ if (RegsAlreadyAddedToDT.contains(MI))
Reg = RegsAlreadyAddedToDT[MI];
auto *RC = MRI.getRegClassOrNull(MI->getOperand(0).getReg());
if (!MRI.getRegClassOrNull(Reg) && RC)
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index e61b07e973e9..66555fa06b06 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -38,6 +38,7 @@ SystemZMCAsmInfoGOFF::SystemZMCAsmInfoGOFF(const Triple &TT) {
DotIsPC = false;
EmitGNUAsmStartIndentationMarker = false;
EmitLabelsInUpperCase = true;
+ ExceptionsType = ExceptionHandling::ZOS;
IsLittleEndian = false;
MaxInstLength = 6;
RestrictCommentStringToStartOfStatement = true;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 3186002c57d9..243461c0316e 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -1115,7 +1115,7 @@ void SystemZAsmPrinter::emitFunctionBodyEnd() {
static void emitPPA1Flags(std::unique_ptr<MCStreamer> &OutStreamer, bool VarArg,
bool StackProtector, bool FPRMask, bool VRMask,
- bool HasName) {
+ bool EHBlock, bool HasName) {
enum class PPA1Flag1 : uint8_t {
DSA64Bit = (0x80 >> 0),
VarArg = (0x80 >> 7),
@@ -1133,6 +1133,7 @@ static void emitPPA1Flags(std::unique_ptr<MCStreamer> &OutStreamer, bool VarArg,
enum class PPA1Flag4 : uint8_t {
EPMOffsetPresent = (0x80 >> 0),
VRMask = (0x80 >> 2),
+ EHBlock = (0x80 >> 3),
ProcedureNamePresent = (0x80 >> 7),
LLVM_MARK_AS_BITMASK_ENUM(EPMOffsetPresent)
};
@@ -1158,6 +1159,9 @@ static void emitPPA1Flags(std::unique_ptr<MCStreamer> &OutStreamer, bool VarArg,
if (VRMask)
Flags4 |= PPA1Flag4::VRMask; // Add emit VR mask flag.
+ if (EHBlock)
+ Flags4 |= PPA1Flag4::EHBlock; // Add optional EH block.
+
if (HasName)
Flags4 |= PPA1Flag4::ProcedureNamePresent; // Add optional name block.
@@ -1188,6 +1192,8 @@ static void emitPPA1Flags(std::unique_ptr<MCStreamer> &OutStreamer, bool VarArg,
OutStreamer->AddComment("PPA1 Flags 4");
if ((Flags4 & PPA1Flag4::VRMask) == PPA1Flag4::VRMask)
OutStreamer->AddComment(" Bit 2: 1 = Vector Reg Mask is in optional area");
+ if ((Flags4 & PPA1Flag4::EHBlock) == PPA1Flag4::EHBlock)
+ OutStreamer->AddComment(" Bit 3: 1 = C++ EH block");
if ((Flags4 & PPA1Flag4::ProcedureNamePresent) ==
PPA1Flag4::ProcedureNamePresent)
OutStreamer->AddComment(" Bit 7: 1 = Name Length and Name");
@@ -1314,12 +1320,14 @@ void SystemZAsmPrinter::emitPPA1(MCSymbol *FnEndSym) {
OutStreamer->AddComment("Offset to PPA2");
OutStreamer->emitAbsoluteSymbolDiff(PPA2Sym, CurrentFnPPA1Sym, 4);
+ bool NeedEmitEHBlock = !MF->getLandingPads().empty();
+
bool HasName =
MF->getFunction().hasName() && MF->getFunction().getName().size() > 0;
emitPPA1Flags(OutStreamer, MF->getFunction().isVarArg(),
MFFrame.hasStackProtectorIndex(), SavedFPRMask != 0,
- TargetHasVector && SavedVRMask != 0, HasName);
+ TargetHasVector && SavedVRMask != 0, NeedEmitEHBlock, HasName);
OutStreamer->AddComment("Length/4 of Parms");
OutStreamer->emitInt16(
@@ -1361,6 +1369,29 @@ void SystemZAsmPrinter::emitPPA1(MCSymbol *FnEndSym) {
OutStreamer->emitInt32(FrameAndVROffset);
}
+ // Emit C++ EH information block
+ const Function *Per = nullptr;
+ if (NeedEmitEHBlock) {
+ Per = dyn_cast<Function>(
+ MF->getFunction().getPersonalityFn()->stripPointerCasts());
+ MCSymbol *PersonalityRoutine =
+ Per ? MF->getTarget().getSymbol(Per) : nullptr;
+ assert(PersonalityRoutine && "Missing personality routine");
+
+ OutStreamer->AddComment("Version");
+ OutStreamer->emitInt32(1);
+ OutStreamer->AddComment("Flags");
+ OutStreamer->emitInt32(0); // LSDA field is a WAS offset
+ OutStreamer->AddComment("Personality routine");
+ OutStreamer->emitInt64(ADATable.insert(
+ PersonalityRoutine, SystemZII::MO_ADA_INDIRECT_FUNC_DESC));
+ OutStreamer->AddComment("LSDA location");
+ MCSymbol *GCCEH = MF->getContext().getOrCreateSymbol(
+ Twine("GCC_except_table") + Twine(MF->getFunctionNumber()));
+ OutStreamer->emitInt64(
+ ADATable.insert(GCCEH, SystemZII::MO_ADA_DATA_SYMBOL_ADDR));
+ }
+
// Emit name length and name optional section (0x01 of flags 4)
if (HasName)
emitPPA1Name(OutStreamer, MF->getFunction().getName());
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 7522998fd06d..db19c8881c68 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/Function.h"
#include "llvm/Target/TargetMachine.h"
@@ -994,6 +995,11 @@ bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots(
if (hasFP(MF) || Subtarget.hasBackChain())
CSI.push_back(CalleeSavedInfo(Regs.getStackPointerRegister()));
+ // If this function has an associated personality function then the
+ // environment register R5 must be saved in the DSA.
+ if (!MF.getLandingPads().empty())
+ CSI.push_back(CalleeSavedInfo(Regs.getADARegister()));
+
// Scan the call-saved GPRs and find the bounds of the register spill area.
Register LowRestoreGPR = 0;
int LowRestoreOffset = INT32_MAX;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index e5e1e91916f3..c7d8591c5bdf 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -465,7 +465,8 @@ bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM,
bool IsBase) const {
SDValue N = IsBase ? AM.Base : AM.Index;
unsigned Opcode = N.getOpcode();
- if (Opcode == ISD::TRUNCATE) {
+ // Look through no-op truncations.
+ if (Opcode == ISD::TRUNCATE && N.getOperand(0).getValueSizeInBits() <= 64) {
N = N.getOperand(0);
Opcode = N.getOpcode();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index a1803cf9a042..559f2ca476d7 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1362,6 +1362,16 @@ SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT,
report_fatal_error("Invalid register name global variable");
}
+Register SystemZTargetLowering::getExceptionPointerRegister(
+ const Constant *PersonalityFn) const {
+ return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
+}
+
+Register SystemZTargetLowering::getExceptionSelectorRegister(
+ const Constant *PersonalityFn) const {
+ return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
+}
+
void SystemZTargetLowering::LowerAsmOperandForConstraint(
SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 6b3ce3f8c1d2..baf4ba416548 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -555,16 +555,12 @@ public:
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
Register
- getExceptionPointerRegister(const Constant *PersonalityFn) const override {
- return SystemZ::R6D;
- }
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override;
/// If a physical register, this returns the register that receives the
/// exception typeid on entry to a landing pad.
Register
- getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
- return SystemZ::R7D;
- }
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
/// Override to support customized stack guard loading.
bool useLoadStackGuardNode() const override {
diff --git a/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp
index 3f96bd37755e..2a4383314e46 100644
--- a/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp
@@ -61,8 +61,6 @@ bool TargetMachine::isLargeGlobalValue(const GlobalValue *GVal) const {
// We should properly mark well-known section name prefixes as small/large,
// because otherwise the output section may have the wrong section flags and
// the linker will lay it out in an unexpected way.
- // TODO: bring back lbss/ldata/lrodata checks after fixing accesses to large
- // globals in the small code model.
StringRef Name = GV->getSection();
if (!Name.empty()) {
auto IsPrefix = [&](StringRef Prefix) {
@@ -71,6 +69,8 @@ bool TargetMachine::isLargeGlobalValue(const GlobalValue *GVal) const {
};
if (IsPrefix(".bss") || IsPrefix(".data") || IsPrefix(".rodata"))
return false;
+ if (IsPrefix(".lbss") || IsPrefix(".ldata") || IsPrefix(".lrodata"))
+ return true;
}
// For x86-64, we treat an explicit GlobalVariable small code model to mean
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index e78d16056460..bc5f562d9589 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -263,8 +263,7 @@ private:
return 0;
SmallVector<ICToken, 16> OperandStack;
- for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
- ICToken Op = PostfixStack[i];
+ for (const ICToken &Op : PostfixStack) {
if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
OperandStack.push_back(Op);
} else if (isUnaryOperator(Op.first)) {
@@ -1731,8 +1730,8 @@ bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
OrigOperands.pop_back();
}
// OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
- for (unsigned int i = 0; i < FinalOperands.size(); ++i)
- OrigOperands.push_back(std::move(FinalOperands[i]));
+ for (auto &Op : FinalOperands)
+ OrigOperands.push_back(std::move(Op));
return false;
}
@@ -3062,6 +3061,35 @@ bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp,
ErrMsg))
return Error(BaseLoc, ErrMsg);
+ // If the displacement is a constant, check overflows. For 64-bit addressing,
+ // gas requires isInt<32> and otherwise reports an error. For others, gas
+ // reports a warning and allows a wider range. E.g. gas allows
+ // [-0xffffffff,0xffffffff] for 32-bit addressing (e.g. Linux kernel uses
+ // `leal -__PAGE_OFFSET(%ecx),%esp` where __PAGE_OFFSET is 0xc0000000).
+ if (BaseReg || IndexReg) {
+ if (auto CE = dyn_cast<MCConstantExpr>(Disp)) {
+ auto Imm = CE->getValue();
+ bool Is64 = X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) ||
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg);
+ bool Is16 = X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg);
+ if (Is64) {
+ if (!isInt<32>(Imm))
+ return Error(BaseLoc, "displacement " + Twine(Imm) +
+ " is not within [-2147483648, 2147483647]");
+ } else if (!Is16) {
+ if (!isUInt<32>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) {
+ Warning(BaseLoc, "displacement " + Twine(Imm) +
+ " shortened to 32-bit signed " +
+ Twine(static_cast<int32_t>(Imm)));
+ }
+ } else if (!isUInt<16>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) {
+ Warning(BaseLoc, "displacement " + Twine(Imm) +
+ " shortened to 16-bit signed " +
+ Twine(static_cast<int16_t>(Imm)));
+ }
+ }
+ }
+
if (SegReg || BaseReg || IndexReg)
Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
BaseReg, IndexReg, Scale, StartLoc,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index b6ebbcf56aef..9e1f1eb97e70 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -1060,7 +1060,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg);
Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg);
CurOp += X86::AddrNumOperands;
- Prefix.set4V(MI, CurOp++);
+ Prefix.set4VV2(MI, CurOp++);
break;
}
case X86II::MRM_C0:
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
index 7f134fe1c72b..0ba31e173a1a 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
@@ -1306,8 +1306,8 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(Subtarget->is64Bit() ? X86::RET64 : X86::RET32));
}
- for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
- MIB.addReg(RetRegs[i], RegState::Implicit);
+ for (unsigned Reg : RetRegs)
+ MIB.addReg(Reg, RegState::Implicit);
return true;
}
@@ -3346,8 +3346,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Walk the register/memloc assignments, inserting copies/loads.
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign const &VA = ArgLocs[i];
+ for (const CCValAssign &VA : ArgLocs) {
const Value *ArgVal = OutVals[VA.getValNo()];
MVT ArgVT = OutVTs[VA.getValNo()];
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp
index aab2535aa86d..ca4d03913d09 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -462,8 +462,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
// Check to see if any of the values defined by this instruction are dead
// after definition. If so, pop them.
- for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
- unsigned Reg = DeadRegs[i];
+ for (unsigned Reg : DeadRegs) {
// Check if Reg is live on the stack. An inline-asm register operand that
// is in the clobber list and marked dead might not be live on the stack.
static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 7ec59c74f5f5..77a997588c4f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1828,9 +1828,7 @@ bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
// That signifies access to globals that are known to be "near",
// such as the GOT itself.
CodeModel::Model M = TM.getCodeModel();
- if (Subtarget->is64Bit() &&
- ((M == CodeModel::Large && !IsRIPRelTLS) ||
- (M == CodeModel::Medium && !IsRIPRel)))
+ if (Subtarget->is64Bit() && M == CodeModel::Large && !IsRIPRelTLS)
return true;
// Base and index reg must be 0 in order to use %rip as base.
@@ -1866,6 +1864,13 @@ bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
} else
llvm_unreachable("Unhandled symbol reference node.");
+ // Can't use an addressing mode with large globals.
+ if (Subtarget->is64Bit() && !IsRIPRel && AM.GV &&
+ TM.isLargeGlobalValue(AM.GV)) {
+ AM = Backup;
+ return true;
+ }
+
if (foldOffsetIntoAddress(Offset, AM)) {
AM = Backup;
return true;
@@ -1910,20 +1915,12 @@ bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
// Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
// because it has a smaller encoding.
- // TODO: Which other code models can use this?
- switch (TM.getCodeModel()) {
- default: break;
- case CodeModel::Small:
- case CodeModel::Kernel:
- if (Subtarget->is64Bit() &&
- AM.Scale == 1 &&
- AM.BaseType == X86ISelAddressMode::RegBase &&
- AM.Base_Reg.getNode() == nullptr &&
- AM.IndexReg.getNode() == nullptr &&
- AM.SymbolFlags == X86II::MO_NO_FLAG &&
- AM.hasSymbolicDisplacement())
- AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
- break;
+ if (TM.getCodeModel() != CodeModel::Large &&
+ (!AM.GV || !TM.isLargeGlobalValue(AM.GV)) && Subtarget->is64Bit() &&
+ AM.Scale == 1 && AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base_Reg.getNode() == nullptr && AM.IndexReg.getNode() == nullptr &&
+ AM.SymbolFlags == X86II::MO_NO_FLAG && AM.hasSymbolicDisplacement()) {
+ AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
}
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
index b80c766c7ffa..63bdf24d6b4f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2674,34 +2674,33 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
}
-bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
- bool hasSymbolicDisplacement) {
+bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model CM,
+ bool HasSymbolicDisplacement) {
// Offset should fit into 32 bit immediate field.
if (!isInt<32>(Offset))
return false;
// If we don't have a symbolic displacement - we don't have any extra
// restrictions.
- if (!hasSymbolicDisplacement)
+ if (!HasSymbolicDisplacement)
return true;
- // FIXME: Some tweaks might be needed for medium code model.
- if (M != CodeModel::Small && M != CodeModel::Kernel)
- return false;
-
- // For small code model we assume that latest object is 16MB before end of 31
- // bits boundary. We may also accept pretty large negative constants knowing
- // that all objects are in the positive half of address space.
- if (M == CodeModel::Small && Offset < 16*1024*1024)
+ // We can fold large offsets in the large code model because we always use
+ // 64-bit offsets.
+ if (CM == CodeModel::Large)
return true;
// For kernel code model we know that all object resist in the negative half
// of 32bits address space. We may not accept negative offsets, since they may
// be just off and we may accept pretty large positive ones.
- if (M == CodeModel::Kernel && Offset >= 0)
- return true;
+ if (CM == CodeModel::Kernel)
+ return Offset >= 0;
- return false;
+ // For other non-large code models we assume that latest small object is 16MB
+ // before end of 31 bits boundary. We may also accept pretty large negative
+ // constants knowing that all objects are in the positive half of address
+ // space.
+ return Offset < 16 * 1024 * 1024;
}
/// Return true if the condition is an signed comparison operation.
@@ -4554,7 +4553,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
}
-static const ConstantPoolSDNode *getTargetConstantPoolFromBasePtr(SDValue Ptr) {
+static ConstantPoolSDNode *getTargetConstantPoolFromBasePtr(SDValue Ptr) {
if (Ptr.getOpcode() == X86ISD::Wrapper ||
Ptr.getOpcode() == X86ISD::WrapperRIP)
Ptr = Ptr.getOperand(0);
@@ -4562,7 +4561,7 @@ static const ConstantPoolSDNode *getTargetConstantPoolFromBasePtr(SDValue Ptr) {
}
static const Constant *getTargetConstantFromBasePtr(SDValue Ptr) {
- const ConstantPoolSDNode *CNode = getTargetConstantPoolFromBasePtr(Ptr);
+ ConstantPoolSDNode *CNode = getTargetConstantPoolFromBasePtr(Ptr);
if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0)
return nullptr;
return CNode->getConstVal();
@@ -7563,8 +7562,7 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
} else
DstVec = DAG.getUNDEF(VT);
- for (unsigned i = 0, e = NonConstIdx.size(); i != e; ++i) {
- unsigned InsertIdx = NonConstIdx[i];
+ for (unsigned InsertIdx : NonConstIdx) {
DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
Op.getOperand(InsertIdx),
DAG.getIntPtrConstant(InsertIdx, dl));
@@ -40857,7 +40855,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetShuffle(
SDValue BC = peekThroughOneUseBitcasts(Mask);
EVT BCVT = BC.getValueType();
auto *Load = dyn_cast<LoadSDNode>(BC);
- if (!Load)
+ if (!Load || !Load->getBasePtr().hasOneUse())
return false;
const Constant *C = getTargetConstantFromNode(Load);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp
index 3e11ab2d98a4..6c23928228d2 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp
@@ -69,8 +69,8 @@ using PrefetchHints = SampleRecord::CallTargetMap;
// Return any prefetching hints for the specified MachineInstruction. The hints
// are returned as pairs (name, delta).
-ErrorOr<PrefetchHints> getPrefetchHints(const FunctionSamples *TopSamples,
- const MachineInstr &MI) {
+ErrorOr<const PrefetchHints &>
+getPrefetchHints(const FunctionSamples *TopSamples, const MachineInstr &MI) {
if (const auto &Loc = MI.getDebugLoc())
if (const auto *Samples = TopSamples->findFunctionSamples(Loc))
return Samples->findCallTargetMapAt(FunctionSamples::getOffset(Loc),
@@ -123,7 +123,7 @@ bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples,
};
static const char *SerializedPrefetchPrefix = "__prefetch";
- const ErrorOr<PrefetchHints> T = getPrefetchHints(TopSamples, MI);
+ auto T = getPrefetchHints(TopSamples, MI);
if (!T)
return false;
int16_t max_index = -1;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td
index 2dbb3e5ee316..7f3e193d9a1b 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td
@@ -20,32 +20,32 @@ let Predicates = [HasAMXTILE, In64BitMode] in {
Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
def LDTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src),
"ldtilecfg\t$src",
- [(int_x86_ldtilecfg addr:$src)]>, VEX, T8PS;
+ [(int_x86_ldtilecfg addr:$src)]>, VEX, T8;
let hasSideEffects = 1 in
def STTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src),
"sttilecfg\t$src",
- [(int_x86_sttilecfg addr:$src)]>, VEX, T8PD;
+ [(int_x86_sttilecfg addr:$src)]>, VEX, T8, PD;
let mayLoad = 1 in
def TILELOADD : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
(ins sibmem:$src),
"tileloadd\t{$src, $dst|$dst, $src}", []>,
- VEX, T8XD;
+ VEX, T8, XD;
let mayLoad = 1 in
def TILELOADDT1 : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
(ins sibmem:$src),
"tileloaddt1\t{$src, $dst|$dst, $src}", []>,
- VEX, T8PD;
+ VEX, T8, PD;
let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
def TILERELEASE : I<0x49, MRM_C0, (outs), (ins),
- "tilerelease", [(int_x86_tilerelease)]>, VEX, T8PS;
+ "tilerelease", [(int_x86_tilerelease)]>, VEX, T8;
let mayStore = 1 in
def TILESTORED : I<0x4b, MRMDestMemFSIB, (outs),
(ins sibmem:$dst, TILE:$src),
"tilestored\t{$src, $dst|$dst, $src}", []>,
- VEX, T8XS;
+ VEX, T8, XS;
def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins),
"tilezero\t$dst", []>,
- VEX, T8XD;
+ VEX, T8, XD;
// Pseduo instruction for RA.
let isPseudo = true, mayLoad = 1, hasSideEffects = 1,
@@ -91,19 +91,19 @@ let Predicates = [HasAMXINT8, In64BitMode] in {
def TDPBSSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"tdpbssd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
- VEX_4V, T8XD;
+ VEX, VVVV, T8, XD;
def TDPBSUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"tdpbsud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
- VEX_4V, T8XS;
+ VEX, VVVV, T8, XS;
def TDPBUSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"tdpbusd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
- VEX_4V, T8PD;
+ VEX, VVVV, T8, PD;
def TDPBUUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"tdpbuud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
- VEX_4V, T8PS;
+ VEX, VVVV, T8;
}
// Pseduo instruction for RA.
@@ -163,7 +163,7 @@ let Predicates = [HasAMXBF16, In64BitMode] in {
def TDPBF16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- []>, VEX_4V, T8XS;
+ []>, VEX, VVVV, T8, XS;
// Pseduo instruction for RA.
let isPseudo = true, Constraints = "$src4 = $dst" in
@@ -193,7 +193,7 @@ let Predicates = [HasAMXFP16, In64BitMode] in {
def TDPFP16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"tdpfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
- []>, VEX_4V, T8XD;
+ []>, VEX, VVVV, T8, XD;
}
// Pseduo instruction for RA.
@@ -222,11 +222,11 @@ let Predicates = [HasAMXCOMPLEX, In64BitMode] in {
def TCMMIMFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"tcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
- []>, T8PD, VEX_4V;
+ []>, T8, PD, VEX, VVVV;
def TCMMRLFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
(ins TILE:$src1, TILE:$src2, TILE:$src3),
"tcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
- []>, VEX_4V, WIG, T8PS;
+ []>, VEX, VVVV, WIG, T8;
} // Constraints = "$src1 = $dst"
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
index e1fe2b680b96..7c3c1d5fe42b 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -378,7 +378,7 @@ multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
(vinsert_for_mask:$src3 (To.VT To.RC:$src1),
(From.VT From.RC:$src2),
(iPTR imm))>,
- AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
+ AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>;
let mayLoad = 1 in
defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
(ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
@@ -389,7 +389,7 @@ multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
(iPTR imm)),
(vinsert_for_mask:$src3 (To.VT To.RC:$src1),
(From.VT (From.LdFrag addr:$src2)),
- (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
+ (iPTR imm))>, AVX512AIi8Base, EVEX, VVVV,
EVEX_CD8<From.EltSize, From.CD8TupleForm>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -647,14 +647,14 @@ def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
- EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
+ EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>;
def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
(ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128X:$dst, (X86insertps VR128X:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
timm:$src3))]>,
- EVEX_4V, EVEX_CD8<32, CD8VT1>,
+ EVEX, VVVV, EVEX_CD8<32, CD8VT1>,
Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
}
@@ -1039,7 +1039,7 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
(bitconvert
(DestInfo.VT
(UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
- DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
+ DestInfo.ExeDomain>, T8, PD, EVEX, Sched<[SchedRR]>;
def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
(ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
!strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
@@ -1051,7 +1051,7 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
(DestInfo.VT
(X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
MaskInfo.ImmAllZerosV))],
- DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
+ DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
let Constraints = "$src0 = $dst" in
def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
(ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
@@ -1065,7 +1065,7 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
(DestInfo.VT
(X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
MaskInfo.RC:$src0))],
- DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
+ DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K, Sched<[SchedRR]>;
let hasSideEffects = 0, mayLoad = 1 in
def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
@@ -1076,7 +1076,7 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
(bitconvert
(DestInfo.VT
(UnmaskedBcastOp addr:$src)))))],
- DestInfo.ExeDomain>, T8PD, EVEX,
+ DestInfo.ExeDomain>, T8, PD, EVEX,
EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
@@ -1090,7 +1090,7 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
(DestInfo.VT
(SrcInfo.BroadcastLdFrag addr:$src)))),
MaskInfo.ImmAllZerosV))],
- DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
+ DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ,
EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
let Constraints = "$src0 = $dst",
@@ -1107,7 +1107,7 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
(DestInfo.VT
(SrcInfo.BroadcastLdFrag addr:$src)))),
MaskInfo.RC:$src0))],
- DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
+ DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K,
EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
}
@@ -1173,7 +1173,7 @@ multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
"vpbroadcast"#_.Suffix, "$src", "$src",
(_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
/*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
- T8PD, EVEX, Sched<[SchedRR]>;
+ T8, PD, EVEX, Sched<[SchedRR]>;
}
multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
@@ -1185,7 +1185,7 @@ multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite Sched
!con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
!con((ins _.KRCWM:$mask), (ins GR32:$src)),
"vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
- "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
+ "$src0 = $dst">, T8, PD, EVEX, Sched<[SchedRR]>;
def : Pat <(_.VT (OpNode SrcRC:$src)),
(!cast<Instruction>(Name#rr)
@@ -1593,7 +1593,7 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
- EVEX_4V, AVX5128IBase, Sched<[sched]>;
+ EVEX, VVVV, AVX5128IBase, Sched<[sched]>;
let mayLoad = 1 in
defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
@@ -1601,7 +1601,7 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
(_.VT (_.LdFrag addr:$src3)))), 1>,
- EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -1616,7 +1616,7 @@ multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(_.VT (X86VPermt2 _.RC:$src2,
IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
- AVX5128IBase, EVEX_4V, EVEX_B,
+ AVX5128IBase, EVEX, VVVV, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -1715,14 +1715,14 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
(ins IdxVT.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
- EVEX_4V, AVX5128IBase, Sched<[sched]>;
+ EVEX, VVVV, AVX5128IBase, Sched<[sched]>;
defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins IdxVT.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
(_.LdFrag addr:$src3))), 1>,
- EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
@@ -1735,7 +1735,7 @@ multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(_.VT (X86VPermt2 _.RC:$src1,
IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
- AVX5128IBase, EVEX_4V, EVEX_B,
+ AVX5128IBase, EVEX, VVVV, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -1800,35 +1800,35 @@ multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
(ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
- EVEX_4V, Sched<[sched]>;
+ EVEX, VVVV, Sched<[sched]>;
def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
- []>, EVEX_4V, EVEX_K, Sched<[sched]>;
+ []>, EVEX, VVVV, EVEX_K, Sched<[sched]>;
def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
- []>, EVEX_4V, EVEX_KZ, Sched<[sched]>;
+ []>, EVEX, VVVV, EVEX_KZ, Sched<[sched]>;
let mayLoad = 1 in {
def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
- []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
+ []>, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
- []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
+ []>, EVEX, VVVV, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
- []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
+ []>, EVEX, VVVV, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -1841,7 +1841,7 @@ multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
- EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
+ EVEX, VVVV, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
@@ -1849,7 +1849,7 @@ multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
"$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
- EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
+ EVEX, VVVV, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
@@ -1857,7 +1857,7 @@ multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
"$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
- EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
+ EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -1921,7 +1921,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
"$cc, $src2, $src1", "$src1, $src2, $cc",
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
(OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
+ timm:$cc)>, EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
let mayLoad = 1 in
defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
@@ -1931,7 +1931,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
(OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
timm:$cc),
(OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
- timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
+ timm:$cc)>, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
let Uses = [MXCSR] in
@@ -1944,7 +1944,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
timm:$cc),
(OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
timm:$cc)>,
- EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
+ EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
let isCodeGenOnly = 1 in {
let isCommutable = 1 in
@@ -1955,7 +1955,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
[(set _.KRC:$dst, (OpNode _.FRC:$src1,
_.FRC:$src2,
timm:$cc))]>,
- EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
+ EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
def rm : AVX512Ii8<0xC2, MRMSrcMem,
(outs _.KRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
@@ -1964,7 +1964,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
[(set _.KRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2),
timm:$cc))]>,
- EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
+ EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
}
}
@@ -1991,24 +1991,24 @@ multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
def rr : AVX512BI<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, EVEX_4V, Sched<[sched]>;
+ []>, EVEX, VVVV, Sched<[sched]>;
let mayLoad = 1, hasSideEffects = 0 in
def rm : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ []>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCommutable = IsCommutable, hasSideEffects = 0 in
def rrk : AVX512BI<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2}"),
- []>, EVEX_4V, EVEX_K, Sched<[sched]>;
+ []>, EVEX, VVVV, EVEX_K, Sched<[sched]>;
let mayLoad = 1, hasSideEffects = 0 in
def rmk : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2}"),
- []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ []>, EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
@@ -2020,14 +2020,14 @@ multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
(outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
!strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
"|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
- []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ []>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmbk : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
_.ScalarMemOp:$src2),
!strconcat(OpcodeStr,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
- []>, EVEX_4V, EVEX_K, EVEX_B,
+ []>, EVEX, VVVV, EVEX_K, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -2082,7 +2082,7 @@ defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
- T8PD, REX_W, EVEX_CD8<64, CD8VF>;
+ T8, REX_W, EVEX_CD8<64, CD8VF>;
defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
@@ -2098,7 +2098,7 @@ defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
- T8PD, REX_W, EVEX_CD8<64, CD8VF>;
+ T8, REX_W, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
@@ -2113,7 +2113,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
[(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
cond)))]>,
- EVEX_4V, Sched<[sched]>;
+ EVEX, VVVV, Sched<[sched]>;
def rmi : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
!strconcat("vpcmp", Suffix,
@@ -2123,7 +2123,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
(_.VT _.RC:$src1),
(_.VT (_.LdFrag addr:$src2)),
cond)))]>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCommutable = 1 in
def rrik : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
@@ -2135,7 +2135,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
(_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
cond))))]>,
- EVEX_4V, EVEX_K, Sched<[sched]>;
+ EVEX, VVVV, EVEX_K, Sched<[sched]>;
def rmik : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
u8imm:$cc),
@@ -2148,7 +2148,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
(_.VT _.RC:$src1),
(_.VT (_.LdFrag addr:$src2)),
cond))))]>,
- EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
(_.VT _.RC:$src1), cond)),
@@ -2177,7 +2177,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
(_.VT _.RC:$src1),
(_.BroadcastLdFrag addr:$src2),
cond)))]>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmibk : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
_.ScalarMemOp:$src2, u8imm:$cc),
@@ -2189,7 +2189,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
(_.VT _.RC:$src1),
(_.BroadcastLdFrag addr:$src2),
cond))))]>,
- EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX, VVVV, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
(_.VT _.RC:$src1), cond)),
@@ -2405,11 +2405,11 @@ multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
}
defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
- AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
+ AVX512PDIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
- AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+ AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
- AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA;
+ AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<16, CD8VF>, TA;
// Patterns to select fp compares with load as first operand.
let Predicates = [HasAVX512] in {
@@ -2625,40 +2625,40 @@ multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
let Predicates = [HasDQI, NoEGPR] in
defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
- VEX, PD;
+ VEX, TB, PD;
let Predicates = [HasDQI, HasEGPR, In64BitMode] in
defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">,
avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">,
- EVEX, PD;
+ EVEX, TB, PD;
let Predicates = [HasAVX512, NoEGPR] in
defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
- VEX, PS;
+ VEX, TB;
let Predicates = [HasAVX512, HasEGPR, In64BitMode] in
defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">,
avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">,
- EVEX, PS;
+ EVEX, TB;
let Predicates = [HasBWI, NoEGPR] in {
defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
- VEX, PD, REX_W;
+ VEX, TB, PD, REX_W;
defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
- VEX, XD;
+ VEX, TB, XD;
defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
- VEX, PS, REX_W;
+ VEX, TB, REX_W;
defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
- VEX, XD, REX_W;
+ VEX, TB, XD, REX_W;
}
let Predicates = [HasBWI, HasEGPR, In64BitMode] in {
defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">,
- EVEX, PD, REX_W;
+ EVEX, TB, PD, REX_W;
defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">,
- EVEX, XD;
+ EVEX, TB, XD;
defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">,
- EVEX, PS, REX_W;
+ EVEX, TB, REX_W;
defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">,
- EVEX, XD, REX_W;
+ EVEX, TB, XD, REX_W;
}
// GR from/to mask register
@@ -2769,13 +2769,13 @@ multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode,
X86FoldableSchedWrite sched> {
defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
- sched, HasDQI>, VEX, PD;
+ sched, HasDQI>, VEX, TB, PD;
defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
- sched, HasAVX512>, VEX, PS;
+ sched, HasAVX512>, VEX, TB;
defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
- sched, HasBWI>, VEX, PD, REX_W;
+ sched, HasBWI>, VEX, TB, PD, REX_W;
defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
- sched, HasBWI>, VEX, PS, REX_W;
+ sched, HasBWI>, VEX, TB, REX_W;
}
// TODO - do we need a X86SchedWriteWidths::KMASK type?
@@ -2812,13 +2812,13 @@ multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, bit IsCommutable,
Predicate prdW = HasAVX512> {
defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
- sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
+ sched, HasDQI, IsCommutable>, VEX, VVVV, VEX_L, TB, PD;
defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
- sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
+ sched, prdW, IsCommutable>, VEX, VVVV, VEX_L, TB;
defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
- sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, REX_W, PD;
+ sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB, PD;
defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
- sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, REX_W, PS;
+ sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB;
}
// TODO - do we need a X86SchedWriteWidths::KMASK type?
@@ -2869,16 +2869,16 @@ multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
(ins Src.KRC:$src1, Src.KRC:$src2),
"kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- VEX_4V, VEX_L, Sched<[sched]>;
+ VEX, VVVV, VEX_L, Sched<[sched]>;
def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
(!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
}
}
-defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD;
-defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
-defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, REX_W;
+defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, TB, PD;
+defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, TB;
+defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, TB, REX_W;
// Mask bit testing
multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
@@ -2895,13 +2895,13 @@ multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched,
Predicate prdW = HasAVX512> {
defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
- VEX, PD;
+ VEX, TB, PD;
defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
- VEX, PS;
+ VEX, TB;
defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
- VEX, PS, REX_W;
+ VEX, TB, REX_W;
defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
- VEX, PD, REX_W;
+ VEX, TB, PD, REX_W;
}
// TODO - do we need a X86SchedWriteWidths::KMASK type?
@@ -2922,15 +2922,15 @@ multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
SDNode OpNode, X86FoldableSchedWrite sched> {
defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
- sched>, VEX, TAPD, REX_W;
+ sched>, VEX, TA, PD, REX_W;
let Predicates = [HasDQI] in
defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
- sched>, VEX, TAPD;
+ sched>, VEX, TA, PD;
let Predicates = [HasBWI] in {
defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
- sched>, VEX, TAPD, REX_W;
+ sched>, VEX, TA, PD, REX_W;
defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
- sched>, VEX, TAPD;
+ sched>, VEX, TA, PD;
}
}
@@ -3371,25 +3371,25 @@ defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
- PS, EVEX_CD8<32, CD8VF>;
+ TB, EVEX_CD8<32, CD8VF>;
defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
- PD, REX_W, EVEX_CD8<64, CD8VF>;
+ TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
SchedWriteFMoveLS, "VMOVUPS">,
- PS, EVEX_CD8<32, CD8VF>;
+ TB, EVEX_CD8<32, CD8VF>;
defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
SchedWriteFMoveLS, "VMOVUPD">,
- PD, REX_W, EVEX_CD8<64, CD8VF>;
+ TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
HasAVX512, SchedWriteVecMoveLS,
@@ -3397,7 +3397,7 @@ defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
HasAVX512, SchedWriteVecMoveLS,
"VMOVDQA", 1>,
- PD, EVEX_CD8<32, CD8VF>;
+ TB, PD, EVEX_CD8<32, CD8VF>;
defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
HasAVX512, SchedWriteVecMoveLS,
@@ -3405,31 +3405,31 @@ defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
HasAVX512, SchedWriteVecMoveLS,
"VMOVDQA">,
- PD, REX_W, EVEX_CD8<64, CD8VF>;
+ TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
SchedWriteVecMoveLS, "VMOVDQU", 1>,
avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
SchedWriteVecMoveLS, "VMOVDQU", 1>,
- XD, EVEX_CD8<8, CD8VF>;
+ TB, XD, EVEX_CD8<8, CD8VF>;
defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
SchedWriteVecMoveLS, "VMOVDQU", 1>,
avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
SchedWriteVecMoveLS, "VMOVDQU", 1>,
- XD, REX_W, EVEX_CD8<16, CD8VF>;
+ TB, XD, REX_W, EVEX_CD8<16, CD8VF>;
defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
SchedWriteVecMoveLS, "VMOVDQU", 1>,
- XS, EVEX_CD8<32, CD8VF>;
+ TB, XS, EVEX_CD8<32, CD8VF>;
defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
SchedWriteVecMoveLS, "VMOVDQU">,
- XS, REX_W, EVEX_CD8<64, CD8VF>;
+ TB, XS, REX_W, EVEX_CD8<64, CD8VF>;
// Special instructions to help with spilling when we don't have VLX. We need
// to load or store from a ZMM register instead. These are converted in
@@ -3816,12 +3816,12 @@ def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
(iPTR 0)))]>,
- PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>,
+ TB, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>,
Requires<[HasAVX512]>;
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
- "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
+ "vmovq\t{$src, $dst|$dst, $src}", []>, TB, PD,
EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>,
Requires<[HasAVX512, In64BitMode]>;
@@ -3830,7 +3830,7 @@ def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
"vmovq\t{$src, $dst|$dst, $src}",
[(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
addr:$dst)]>,
- EVEX, PD, REX_W, EVEX_CD8<64, CD8VT1>,
+ EVEX, TB, PD, REX_W, EVEX_CD8<64, CD8VT1>,
Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
@@ -3897,7 +3897,7 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
(ins _.RC:$src1, _.RC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
- _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
+ _.ExeDomain>, EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>;
let Predicates = [prd] in {
def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
@@ -3906,7 +3906,7 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
[(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
_.ImmAllZerosV)))],
- _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
+ _.ExeDomain>, EVEX, VVVV, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
let Constraints = "$src0 = $dst" in
def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
@@ -3915,7 +3915,7 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
[(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
(_.VT _.RC:$src0))))],
- _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
+ _.ExeDomain>, EVEX, VVVV, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
let canFoldAsLoad = 1, isReMaterializable = 1 in {
def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
@@ -3954,14 +3954,14 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
}
defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
- VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
+ VEX_LIG, TB, XS, EVEX_CD8<32, CD8VT1>;
defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
- VEX_LIG, XD, REX_W, EVEX_CD8<64, CD8VT1>;
+ VEX_LIG, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
HasFP16>,
- VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+ VEX_LIG, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
PatLeaf ZeroFP, X86VectorVTInfo _> {
@@ -4286,7 +4286,7 @@ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2),
"vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, T_MAP5XS, EVEX_4V, VEX_LIG,
+ []>, T_MAP5, XS, EVEX, VVVV, VEX_LIG,
Sched<[SchedWriteFShuffle.XMM]>;
let Constraints = "$src0 = $dst" in
@@ -4295,20 +4295,20 @@ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
VR128X:$src1, VR128X:$src2),
"vmovsh\t{$src2, $src1, $dst {${mask}}|"#
"$dst {${mask}}, $src1, $src2}",
- []>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG,
+ []>, T_MAP5, XS, EVEX_K, EVEX, VVVV, VEX_LIG,
Sched<[SchedWriteFShuffle.XMM]>;
def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
"vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
"$dst {${mask}} {z}, $src1, $src2}",
- []>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG,
+ []>, EVEX_KZ, T_MAP5, XS, EVEX, VVVV, VEX_LIG,
Sched<[SchedWriteFShuffle.XMM]>;
}
def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2),
"vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XS, EVEX_4V, VEX_LIG,
+ []>, TB, XS, EVEX, VVVV, VEX_LIG,
Sched<[SchedWriteFShuffle.XMM]>;
let Constraints = "$src0 = $dst" in
@@ -4317,20 +4317,20 @@ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
VR128X:$src1, VR128X:$src2),
"vmovss\t{$src2, $src1, $dst {${mask}}|"#
"$dst {${mask}}, $src1, $src2}",
- []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
+ []>, EVEX_K, TB, XS, EVEX, VVVV, VEX_LIG,
Sched<[SchedWriteFShuffle.XMM]>;
def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
"vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
"$dst {${mask}} {z}, $src1, $src2}",
- []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
+ []>, EVEX_KZ, TB, XS, EVEX, VVVV, VEX_LIG,
Sched<[SchedWriteFShuffle.XMM]>;
def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2),
"vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XD, EVEX_4V, VEX_LIG, REX_W,
+ []>, TB, XD, EVEX, VVVV, VEX_LIG, REX_W,
Sched<[SchedWriteFShuffle.XMM]>;
let Constraints = "$src0 = $dst" in
@@ -4339,7 +4339,7 @@ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
VR128X:$src1, VR128X:$src2),
"vmovsd\t{$src2, $src1, $dst {${mask}}|"#
"$dst {${mask}}, $src1, $src2}",
- []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
+ []>, EVEX_K, TB, XD, EVEX, VVVV, VEX_LIG,
REX_W, Sched<[SchedWriteFShuffle.XMM]>;
def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
@@ -4347,7 +4347,7 @@ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
VR128X:$src2),
"vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
"$dst {${mask}} {z}, $src1, $src2}",
- []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
+ []>, EVEX_KZ, TB, XD, EVEX, VVVV, VEX_LIG,
REX_W, Sched<[SchedWriteFShuffle.XMM]>;
}
@@ -4546,20 +4546,20 @@ let Predicates = [HasAVX512] in {
def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
(ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
[], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
- EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
+ EVEX, T8, PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
let Predicates = [HasVLX] in {
def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
(ins i256mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
[], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
- EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
+ EVEX, T8, PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
(ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
[], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
- EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
+ EVEX, T8, PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
@@ -4585,11 +4585,11 @@ multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
}
defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
- SchedWriteVecMoveLSNT>, PD;
+ SchedWriteVecMoveLSNT>, TB, PD;
defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
- SchedWriteFMoveLSNT>, PD, REX_W;
+ SchedWriteFMoveLSNT>, TB, PD, REX_W;
defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
- SchedWriteFMoveLSNT>, PS;
+ SchedWriteFMoveLSNT>, TB;
let Predicates = [HasAVX512], AddedComplexity = 400 in {
def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
@@ -4665,14 +4665,14 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
- IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
+ IsCommutable, IsCommutable>, AVX512BIBase, EVEX, VVVV,
Sched<[sched]>;
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
- AVX512BIBase, EVEX_4V,
+ AVX512BIBase, EVEX, VVVV,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -4686,7 +4686,7 @@ multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
"$src1, ${src2}"#_.BroadcastStr,
(_.VT (OpNode _.RC:$src1,
(_.BroadcastLdFrag addr:$src2)))>,
- AVX512BIBase, EVEX_4V, EVEX_B,
+ AVX512BIBase, EVEX, VVVV, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -4796,13 +4796,13 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
(_Src.VT _Src.RC:$src1),
(_Src.VT _Src.RC:$src2))),
IsCommutable>,
- AVX512BIBase, EVEX_4V, Sched<[sched]>;
+ AVX512BIBase, EVEX, VVVV, Sched<[sched]>;
defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
(_Src.LdFrag addr:$src2)))>,
- AVX512BIBase, EVEX_4V,
+ AVX512BIBase, EVEX, VVVV,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
@@ -4812,7 +4812,7 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
"$src1, ${src2}"#_Brdct.BroadcastStr,
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
(_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
- AVX512BIBase, EVEX_4V, EVEX_B,
+ AVX512BIBase, EVEX, VVVV, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -4829,22 +4829,22 @@ defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
SchedWriteVecALU, HasBWI, 0>;
defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
- SchedWritePMULLD, HasAVX512, 1>, T8PD;
+ SchedWritePMULLD, HasAVX512, 1>, T8;
defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
SchedWriteVecIMul, HasBWI, 1>;
defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
- SchedWriteVecIMul, HasDQI, 1>, T8PD,
+ SchedWriteVecIMul, HasDQI, 1>, T8,
NotEVEX2VEXConvertible;
defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
HasBWI, 1>;
defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
HasBWI, 1>;
defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
- SchedWriteVecIMul, HasBWI, 1>, T8PD;
+ SchedWriteVecIMul, HasBWI, 1>, T8;
defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu,
SchedWriteVecALU, HasBWI, 1>;
defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
- SchedWriteVecIMul, HasAVX512, 1>, T8PD;
+ SchedWriteVecIMul, HasAVX512, 1>, T8;
defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
SchedWriteVecIMul, HasAVX512, 1>;
@@ -4872,7 +4872,7 @@ multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
avx512vl_i8_info, avx512vl_i8_info,
- X86multishift, HasVBMI, 0>, T8PD;
+ X86multishift, HasVBMI, 0>, T8;
multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
@@ -4884,7 +4884,7 @@ multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
"$src1, ${src2}"#_Src.BroadcastStr,
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
(_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
- EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
+ EVEX, VVVV, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -4899,13 +4899,13 @@ multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
(_Src.VT _Src.RC:$src1),
(_Src.VT _Src.RC:$src2))),
IsCommutable, IsCommutable>,
- EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
+ EVEX_CD8<_Src.EltSize, CD8VF>, EVEX, VVVV, Sched<[sched]>;
defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
(_Src.LdFrag addr:$src2)))>,
- EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
+ EVEX, VVVV, EVEX_CD8<_Src.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -4967,48 +4967,48 @@ defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512B
defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
- avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, WIG;
+ avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8, WIG;
defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, WIG;
defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
- SchedWriteVecALU, HasBWI, 1>, T8PD;
+ SchedWriteVecALU, HasBWI, 1>, T8;
defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
SchedWriteVecALU, HasBWI, 1>;
defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
- SchedWriteVecALU, HasAVX512, 1>, T8PD;
+ SchedWriteVecALU, HasAVX512, 1>, T8;
defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
- SchedWriteVecALU, HasAVX512, 1>, T8PD,
+ SchedWriteVecALU, HasAVX512, 1>, T8,
NotEVEX2VEXConvertible;
defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
SchedWriteVecALU, HasBWI, 1>;
defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
- SchedWriteVecALU, HasBWI, 1>, T8PD;
+ SchedWriteVecALU, HasBWI, 1>, T8;
defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
- SchedWriteVecALU, HasAVX512, 1>, T8PD;
+ SchedWriteVecALU, HasAVX512, 1>, T8;
defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
- SchedWriteVecALU, HasAVX512, 1>, T8PD,
+ SchedWriteVecALU, HasAVX512, 1>, T8,
NotEVEX2VEXConvertible;
defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
- SchedWriteVecALU, HasBWI, 1>, T8PD;
+ SchedWriteVecALU, HasBWI, 1>, T8;
defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
SchedWriteVecALU, HasBWI, 1>;
defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
- SchedWriteVecALU, HasAVX512, 1>, T8PD;
+ SchedWriteVecALU, HasAVX512, 1>, T8;
defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
- SchedWriteVecALU, HasAVX512, 1>, T8PD,
+ SchedWriteVecALU, HasAVX512, 1>, T8,
NotEVEX2VEXConvertible;
defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
SchedWriteVecALU, HasBWI, 1>;
defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
- SchedWriteVecALU, HasBWI, 1>, T8PD;
+ SchedWriteVecALU, HasBWI, 1>, T8;
defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
- SchedWriteVecALU, HasAVX512, 1>, T8PD;
+ SchedWriteVecALU, HasAVX512, 1>, T8;
defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
- SchedWriteVecALU, HasAVX512, 1>, T8PD,
+ SchedWriteVecALU, HasAVX512, 1>, T8,
NotEVEX2VEXConvertible;
// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
@@ -5445,18 +5445,18 @@ multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator
sched.PS.Scl, IsCommutable>,
avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
sched.PS.Scl>,
- XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
sched.PD.Scl, IsCommutable>,
avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
sched.PD.Scl>,
- XD, REX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+ TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>;
let Predicates = [HasFP16] in
defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
VecNode, sched.PH.Scl, IsCommutable>,
avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
sched.PH.Scl>,
- T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>;
+ T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>;
}
multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -5465,16 +5465,16 @@ multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
VecNode, SaeNode, sched.PS.Scl, IsCommutable,
NAME#"SS">,
- XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
VecNode, SaeNode, sched.PD.Scl, IsCommutable,
NAME#"SD">,
- XD, REX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+ TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>;
let Predicates = [HasFP16] in {
defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
VecNode, SaeNode, sched.PH.Scl, IsCommutable,
NAME#"SH">,
- T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>,
+ T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>,
NotEVEX2VEXConvertible;
}
}
@@ -5515,30 +5515,30 @@ multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
}
}
defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
- SchedWriteFCmp.Scl, "VMINCSS">, XS,
- EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
+ SchedWriteFCmp.Scl, "VMINCSS">, TB, XS,
+ EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
- SchedWriteFCmp.Scl, "VMINCSD">, XD,
- REX_W, EVEX_4V, VEX_LIG,
+ SchedWriteFCmp.Scl, "VMINCSD">, TB, XD,
+ REX_W, EVEX, VVVV, VEX_LIG,
EVEX_CD8<64, CD8VT1>, SIMD_EXC;
defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
- SchedWriteFCmp.Scl, "VMAXCSS">, XS,
- EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
+ SchedWriteFCmp.Scl, "VMAXCSS">, TB, XS,
+ EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
- SchedWriteFCmp.Scl, "VMAXCSD">, XD,
- REX_W, EVEX_4V, VEX_LIG,
+ SchedWriteFCmp.Scl, "VMAXCSD">, TB, XD,
+ REX_W, EVEX, VVVV, VEX_LIG,
EVEX_CD8<64, CD8VT1>, SIMD_EXC;
defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
- SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS,
- EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
+ SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5, XS,
+ EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
NotEVEX2VEXConvertible;
defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
- SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS,
- EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
+ SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5, XS,
+ EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
NotEVEX2VEXConvertible;
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
@@ -5556,21 +5556,21 @@ multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpN
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
(_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
- IsCommutable, IsKCommutable, IsKCommutable>, EVEX_4V, Sched<[sched]>;
+ IsCommutable, IsKCommutable, IsKCommutable>, EVEX, VVVV, Sched<[sched]>;
let mayLoad = 1 in {
defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
"$src2, $src1", "$src1, $src2",
(OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
(MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
- ClobberConstraint>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ ClobberConstraint>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
"${src2}"#_.BroadcastStr#", $src1",
"$src1, ${src2}"#_.BroadcastStr,
(OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
(MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
- ClobberConstraint>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ ClobberConstraint>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
}
@@ -5586,7 +5586,7 @@ multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
(_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
0, 0, 0, vselect_mask, ClobberConstraint>,
- EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
+ EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
}
multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
@@ -5597,7 +5597,7 @@ multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
(ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
(_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
- EVEX_4V, EVEX_B, Sched<[sched]>;
+ EVEX, VVVV, EVEX_B, Sched<[sched]>;
}
multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
@@ -5607,27 +5607,27 @@ multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator Op
bit IsPD128Commutable = IsCommutable> {
let Predicates = [prd] in {
defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
- sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
+ sched.PS.ZMM, IsCommutable>, EVEX_V512, TB,
EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
- sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, REX_W,
+ sched.PD.ZMM, IsCommutable>, EVEX_V512, TB, PD, REX_W,
EVEX_CD8<64, CD8VF>;
}
// Define only if AVX512VL feature is present.
let Predicates = [prd, HasVLX] in {
defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
- sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
+ sched.PS.XMM, IsCommutable>, EVEX_V128, TB,
EVEX_CD8<32, CD8VF>;
defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
- sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
+ sched.PS.YMM, IsCommutable>, EVEX_V256, TB,
EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
sched.PD.XMM, IsPD128Commutable,
- IsCommutable>, EVEX_V128, PD, REX_W,
+ IsCommutable>, EVEX_V128, TB, PD, REX_W,
EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
- sched.PD.YMM, IsCommutable>, EVEX_V256, PD, REX_W,
+ sched.PD.YMM, IsCommutable>, EVEX_V256, TB, PD, REX_W,
EVEX_CD8<64, CD8VF>;
}
}
@@ -5637,15 +5637,15 @@ multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator O
X86SchedWriteSizes sched, bit IsCommutable = 0> {
let Predicates = [HasFP16] in {
defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
- sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS,
+ sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5,
EVEX_CD8<16, CD8VF>;
}
let Predicates = [HasVLX, HasFP16] in {
defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
- sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS,
+ sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5,
EVEX_CD8<16, CD8VF>;
defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
- sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS,
+ sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5,
EVEX_CD8<16, CD8VF>;
}
}
@@ -5656,14 +5656,14 @@ multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeR
let Predicates = [HasFP16] in {
defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
v32f16_info>,
- EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+ EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
}
defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
v16f32_info>,
- EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
+ EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
v8f64_info>,
- EVEX_V512, PD, REX_W,EVEX_CD8<64, CD8VF>;
+ EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>;
}
let Uses = [MXCSR] in
@@ -5672,14 +5672,14 @@ multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd
let Predicates = [HasFP16] in {
defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
v32f16_info>,
- EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+ EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
}
defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
v16f32_info>,
- EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
+ EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
v8f64_info>,
- EVEX_V512, PD, REX_W,EVEX_CD8<64, CD8VF>;
+ EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>;
}
defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
@@ -5734,18 +5734,18 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
- EVEX_4V, Sched<[sched]>;
+ EVEX, VVVV, Sched<[sched]>;
defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
"$src2, $src1", "$src1, $src2",
(OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
"${src2}"#_.BroadcastStr#", $src1",
"$src1, ${src2}"#_.BroadcastStr,
(OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -5770,43 +5770,43 @@ multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr
let Predicates = [HasFP16] in {
defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
- EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
+ EVEX_V512, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
- EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>;
+ EVEX, VVVV, T_MAP6, PD, EVEX_CD8<16, CD8VT1>;
}
defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
- EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD;
+ EVEX_V512, EVEX_CD8<32, CD8VF>, T8, PD;
defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
- EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8PD;
+ EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
X86scalefsRnd, sched.Scl>,
- EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD;
+ EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8, PD;
defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
X86scalefsRnd, sched.Scl>,
- EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8PD;
+ EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8, PD;
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
- EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD;
+ EVEX_V128, EVEX_CD8<32, CD8VF>, T8, PD;
defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
- EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD;
+ EVEX_V256, EVEX_CD8<32, CD8VF>, T8, PD;
defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
- EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8PD;
+ EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
- EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8PD;
+ EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
}
let Predicates = [HasFP16, HasVLX] in {
defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
- EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD;
+ EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PD;
defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
- EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD;
+ EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PD;
}
}
defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
@@ -5825,13 +5825,13 @@ multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(null_frag), (null_frag), 1>,
- EVEX_4V, Sched<[sched]>;
+ EVEX, VVVV, Sched<[sched]>;
let mayLoad = 1 in
defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(null_frag), (null_frag)>,
- EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
+ EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -5844,7 +5844,7 @@ multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
"${src2}"#_.BroadcastStr#", $src1",
"$src1, ${src2}"#_.BroadcastStr,
(null_frag), (null_frag)>,
- EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
+ EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -5898,9 +5898,9 @@ multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string Opcode
avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
- SchedWriteVecLogic>, T8PD;
+ SchedWriteVecLogic>, T8, PD;
defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
- SchedWriteVecLogic>, T8XS;
+ SchedWriteVecLogic>, T8, XS;
//===----------------------------------------------------------------------===//
// AVX-512 Shift instructions
@@ -5944,13 +5944,13 @@ multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, VR128X:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
- AVX512BIBase, EVEX_4V, Sched<[sched]>;
+ AVX512BIBase, EVEX, VVVV, Sched<[sched]>;
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, i128mem:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
AVX512BIBase,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -6035,22 +6035,22 @@ multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
SchedWriteVecShiftImm>,
avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
- SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
+ SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
SchedWriteVecShiftImm>,
avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
- SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
+ SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
SchedWriteVecShiftImm, 1>,
avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
- SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
+ SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
- SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
+ SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
- SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
+ SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
SchedWriteVecShift>;
@@ -6097,13 +6097,13 @@ multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
- AVX5128IBase, EVEX_4V, Sched<[sched]>;
+ AVX5128IBase, EVEX, VVVV, Sched<[sched]>;
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1,
(_.VT (_.LdFrag addr:$src2))))>,
- AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
+ AVX5128IBase, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -6116,7 +6116,7 @@ multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
"${src2}"#_.BroadcastStr#", $src1",
"$src1, ${src2}"#_.BroadcastStr,
(_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
- AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
+ AVX5128IBase, EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -6374,14 +6374,14 @@ multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1,
(Ctrl.VT Ctrl.RC:$src2)))>,
- T8PD, EVEX_4V, Sched<[sched]>;
+ T8, PD, EVEX, VVVV, Sched<[sched]>;
defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode
_.RC:$src1,
(Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
- T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
+ T8, PD, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
@@ -6390,7 +6390,7 @@ multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
(_.VT (OpNode
_.RC:$src1,
(Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
- T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
+ T8, PD, EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -6469,13 +6469,13 @@ def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2),
"vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
- Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
+ Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV;
let isCommutable = 1 in
def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2),
"vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
- Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
+ Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV;
//===----------------------------------------------------------------------===//
// VMOVHPS/PD VMOVLPS Instructions
@@ -6494,19 +6494,19 @@ multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
(OpNode _.RC:$src1,
(_.VT (bitconvert
(v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
- Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
+ Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX, VVVV;
}
// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
// SSE1. And MOVLPS pattern is even more complex.
defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
- v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
+ v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB;
defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
- v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, REX_W;
+ v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W;
defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
- v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
+ v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB;
defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
- v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, REX_W;
+ v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W;
let Predicates = [HasAVX512] in {
// VMOVHPD patterns
@@ -6565,14 +6565,14 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
(_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
- EVEX_4V, Sched<[sched]>;
+ EVEX, VVVV, Sched<[sched]>;
defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
(_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
+ EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
sched.ReadAfterFold]>;
defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -6583,7 +6583,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
_.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
(MaskOpNode _.RC:$src2,
_.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
+ EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
sched.ReadAfterFold]>;
}
}
@@ -6598,7 +6598,7 @@ multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
(_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
(_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
- EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
+ EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
}
multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
@@ -6627,13 +6627,13 @@ multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator O
SDNode MaskOpNode, SDNode OpNodeRnd> {
defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f16_info, HasFP16>, T_MAP6PD;
+ avx512vl_f16_info, HasFP16>, T_MAP6, PD;
defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f32_info>, T8PD;
+ avx512vl_f32_info>, T8, PD;
defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f64_info>, T8PD, REX_W;
+ avx512vl_f64_info>, T8, PD, REX_W;
}
defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
@@ -6660,14 +6660,14 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
OpcodeStr, "$src3, $src2", "$src2, $src3",
(null_frag),
(_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
- EVEX_4V, Sched<[sched]>;
+ EVEX, VVVV, Sched<[sched]>;
defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
(_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
+ EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
sched.ReadAfterFold]>;
defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -6679,7 +6679,7 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
_.RC:$src1)),
(_.VT (MaskOpNode _.RC:$src2,
(_.VT (_.BroadcastLdFrag addr:$src3)),
- _.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B,
+ _.RC:$src1)), 1, 0>, EVEX, VVVV, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold,
sched.ReadAfterFold]>;
}
@@ -6695,7 +6695,7 @@ multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
(null_frag),
(_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
- 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
+ 1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
}
multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
@@ -6724,13 +6724,13 @@ multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator O
SDNode MaskOpNode, SDNode OpNodeRnd > {
defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f16_info, HasFP16>, T_MAP6PD;
+ avx512vl_f16_info, HasFP16>, T_MAP6, PD;
defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f32_info>, T8PD;
+ avx512vl_f32_info>, T8, PD;
defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f64_info>, T8PD, REX_W;
+ avx512vl_f64_info>, T8, PD, REX_W;
}
defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
@@ -6756,7 +6756,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
OpcodeStr, "$src3, $src2", "$src2, $src3",
(null_frag),
(_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
- EVEX_4V, Sched<[sched]>;
+ EVEX, VVVV, Sched<[sched]>;
// Pattern is 312 order so that the load is in a different place from the
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
@@ -6765,7 +6765,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
(_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
+ EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
sched.ReadAfterFold]>;
// Pattern is 312 order so that the load is in a different place from the
@@ -6778,7 +6778,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
_.RC:$src1, _.RC:$src2)),
(_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
_.RC:$src1, _.RC:$src2)), 1, 0>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
+ EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
sched.ReadAfterFold]>;
}
}
@@ -6793,7 +6793,7 @@ multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
(null_frag),
(_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
- 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
+ 1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
}
multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
@@ -6822,13 +6822,13 @@ multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator O
SDNode MaskOpNode, SDNode OpNodeRnd > {
defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f16_info, HasFP16>, T_MAP6PD;
+ avx512vl_f16_info, HasFP16>, T_MAP6, PD;
defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f32_info>, T8PD;
+ avx512vl_f32_info>, T8, PD;
defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f64_info>, T8PD, REX_W;
+ avx512vl_f64_info>, T8, PD, REX_W;
}
defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
@@ -6851,33 +6851,33 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
- EVEX_4V, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
+ EVEX, VVVV, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
let mayLoad = 1 in
defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
- EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
+ EVEX, VVVV, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
let Uses = [MXCSR] in
defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
- EVEX_4V, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
+ EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
let isCodeGenOnly = 1, isCommutable = 1 in {
def r : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX_4V, SIMD_EXC;
+ !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV, SIMD_EXC;
def m : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
- SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC;
+ SchedWriteFMA.Scl.ReadAfterFold]>, EVEX, VVVV, SIMD_EXC;
let Uses = [MXCSR] in
def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -6885,7 +6885,7 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
!strconcat(OpcodeStr,
"\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
!if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
- Sched<[SchedWriteFMA.Scl]>, EVEX_4V;
+ Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV;
}// isCodeGenOnly = 1
}// Constraints = "$src1 = $dst"
}
@@ -6929,15 +6929,15 @@ multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
let Predicates = [HasAVX512] in {
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
OpNodeRnd, f32x_info, "SS">,
- EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD;
+ EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD;
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
OpNodeRnd, f64x_info, "SD">,
- EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8PD;
+ EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD;
}
let Predicates = [HasFP16] in {
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
OpNodeRnd, f16x_info, "SH">,
- EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6PD;
+ EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6, PD;
}
}
@@ -7189,13 +7189,13 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
- T8PD, EVEX_4V, Sched<[sched]>;
+ T8, PD, EVEX, VVVV, Sched<[sched]>;
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
- T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
+ T8, PD, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
sched.ReadAfterFold]>;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -7205,7 +7205,7 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode _.RC:$src2,
(_.VT (_.BroadcastLdFrag addr:$src3)),
_.RC:$src1)>,
- T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
+ T8, PD, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
sched.ReadAfterFold]>;
}
}
@@ -7247,19 +7247,19 @@ let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
(ins DstVT.FRC:$src1, SrcRC:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
- EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
+ EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
let mayLoad = 1 in
def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
(ins DstVT.FRC:$src1, x86memop:$src),
asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // hasSideEffects = 0
def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
(ins DstVT.RC:$src1, SrcRC:$src2),
!strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set DstVT.RC:$dst,
(OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
- EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
+ EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
(ins DstVT.RC:$src1, x86memop:$src2),
@@ -7267,7 +7267,7 @@ let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
[(set DstVT.RC:$dst,
(OpNode (DstVT.VT DstVT.RC:$src1),
(ld_frag addr:$src2)))]>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
(!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
@@ -7287,7 +7287,7 @@ multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
(OpNode (DstVT.VT DstVT.RC:$src1),
SrcRC:$src2,
(i32 timm:$rc)))]>,
- EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
+ EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
(!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
@@ -7307,18 +7307,18 @@ let Predicates = [HasAVX512] in {
defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
WriteCvtI2SS, GR32,
v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
- XS, EVEX_CD8<32, CD8VT1>;
+ TB, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
WriteCvtI2SS, GR64,
v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
- XS, REX_W, EVEX_CD8<64, CD8VT1>;
+ TB, XS, REX_W, EVEX_CD8<64, CD8VT1>;
defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
- XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
WriteCvtI2SD, GR64,
v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
- XD, REX_W, EVEX_CD8<64, CD8VT1>;
+ TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
@@ -7346,18 +7346,18 @@ def : Pat<(f64 (any_sint_to_fp GR64:$src)),
defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
WriteCvtI2SS, GR32,
v4f32x_info, i32mem, loadi32,
- "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
+ "cvtusi2ss", "l">, TB, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
WriteCvtI2SS, GR64,
v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
- XS, REX_W, EVEX_CD8<64, CD8VT1>;
+ TB, XS, REX_W, EVEX_CD8<64, CD8VT1>;
defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
- XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
WriteCvtI2SD, GR64,
v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
- XD, REX_W, EVEX_CD8<64, CD8VT1>;
+ TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
@@ -7422,28 +7422,28 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
// Convert float/double to signed/unsigned int 32/64
defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
- XS, EVEX_CD8<32, CD8VT1>;
+ TB, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
- XS, REX_W, EVEX_CD8<32, CD8VT1>;
+ TB, XS, REX_W, EVEX_CD8<32, CD8VT1>;
defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
- XS, EVEX_CD8<32, CD8VT1>;
+ TB, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
- XS, REX_W, EVEX_CD8<32, CD8VT1>;
+ TB, XS, REX_W, EVEX_CD8<32, CD8VT1>;
defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
- XD, EVEX_CD8<64, CD8VT1>;
+ TB, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
- XD, REX_W, EVEX_CD8<64, CD8VT1>;
+ TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
- XD, EVEX_CD8<64, CD8VT1>;
+ TB, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
- XD, REX_W, EVEX_CD8<64, CD8VT1>;
+ TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
X86VectorVTInfo DstVT, SDNode OpNode,
@@ -7463,13 +7463,13 @@ multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
}
defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
- lrint, WriteCvtSS2I>, XS, EVEX_CD8<32, CD8VT1>;
+ lrint, WriteCvtSS2I>, TB, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
- llrint, WriteCvtSS2I>, REX_W, XS, EVEX_CD8<32, CD8VT1>;
+ llrint, WriteCvtSS2I>, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
- lrint, WriteCvtSD2I>, XD, EVEX_CD8<64, CD8VT1>;
+ lrint, WriteCvtSD2I>, TB, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
- llrint, WriteCvtSD2I>, REX_W, XD, EVEX_CD8<64, CD8VT1>;
+ llrint, WriteCvtSD2I>, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>;
let Predicates = [HasAVX512] in {
def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
@@ -7609,29 +7609,29 @@ let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
- "{l}">, XS, EVEX_CD8<32, CD8VT1>;
+ "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
- "{q}">, REX_W, XS, EVEX_CD8<32, CD8VT1>;
+ "{q}">, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
- "{l}">, XD, EVEX_CD8<64, CD8VT1>;
+ "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
- "{q}">, REX_W, XD, EVEX_CD8<64, CD8VT1>;
+ "{q}">, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
- "{l}">, XS, EVEX_CD8<32, CD8VT1>;
+ "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
- "{q}">, XS,REX_W, EVEX_CD8<32, CD8VT1>;
+ "{q}">, TB, XS,REX_W, EVEX_CD8<32, CD8VT1>;
defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
- "{l}">, XD, EVEX_CD8<64, CD8VT1>;
+ "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
- "{q}">, XD, REX_W, EVEX_CD8<64, CD8VT1>;
+ "{q}">, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
//===----------------------------------------------------------------------===//
// AVX-512 Convert form float to double and back
@@ -7646,25 +7646,25 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1),
(_Src.VT _Src.RC:$src2)))>,
- EVEX_4V, VEX_LIG, Sched<[sched]>;
+ EVEX, VVVV, VEX_LIG, Sched<[sched]>;
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1),
(_Src.ScalarIntMemFrags addr:$src2)))>,
- EVEX_4V, VEX_LIG,
+ EVEX, VVVV, VEX_LIG,
Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, hasSideEffects = 0 in {
def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _Src.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- EVEX_4V, VEX_LIG, Sched<[sched]>;
+ EVEX, VVVV, VEX_LIG, Sched<[sched]>;
let mayLoad = 1 in
def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX, VVVV, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -7678,7 +7678,7 @@ multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTIn
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
(_.VT (OpNodeSAE (_.VT _.RC:$src1),
(_Src.VT _Src.RC:$src2)))>,
- EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
+ EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
}
// Scalar Conversion with rounding control (RC)
@@ -7691,7 +7691,7 @@ multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInf
"$rc, $src2, $src1", "$src1, $src2, $rc",
(_.VT (OpNodeRnd (_.VT _.RC:$src1),
(_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
- EVEX_4V, VEX_LIG, Sched<[sched]>,
+ EVEX, VVVV, VEX_LIG, Sched<[sched]>,
EVEX_B, EVEX_RC;
}
multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
@@ -7719,22 +7719,22 @@ multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
}
defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
X86froundsRnd, WriteCvtSD2SS, f64x_info,
- f32x_info>, XD, REX_W;
+ f32x_info>, TB, XD, REX_W;
defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
X86fpextsSAE, WriteCvtSS2SD, f32x_info,
- f64x_info>, XS;
+ f64x_info>, TB, XS;
defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
X86froundsRnd, WriteCvtSD2SS, f64x_info,
- f16x_info, HasFP16>, T_MAP5XD, REX_W;
+ f16x_info, HasFP16>, T_MAP5, XD, REX_W;
defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
X86fpextsSAE, WriteCvtSS2SD, f16x_info,
- f64x_info, HasFP16>, T_MAP5XS;
+ f64x_info, HasFP16>, T_MAP5, XS;
defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
X86froundsRnd, WriteCvtSD2SS, f32x_info,
- f16x_info, HasFP16>, T_MAP5PS;
+ f16x_info, HasFP16>, T_MAP5;
defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
X86fpextsSAE, WriteCvtSS2SD, f16x_info,
- f32x_info, HasFP16>, T_MAP6PS;
+ f32x_info, HasFP16>, T_MAP6;
def : Pat<(f64 (any_fpextend FR32X:$src)),
(VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
@@ -7996,10 +7996,10 @@ multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
- REX_W, PD, EVEX_CD8<64, CD8VF>;
+ REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
- PS, EVEX_CD8<32, CD8VH>;
+ TB, EVEX_CD8<32, CD8VH>;
// Extend Half to Double
multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
@@ -8108,14 +8108,14 @@ multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sc
defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
avx512vl_f32_info, SchedWriteCvtPD2PS,
- HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>;
+ HasFP16>, T_MAP5, PD, EVEX_CD8<32, CD8VF>;
defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
avx512vl_f16_info, SchedWriteCvtPS2PD,
- HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>;
+ HasFP16>, T_MAP6, PD, EVEX_CD8<16, CD8VH>;
defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
- REX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>;
+ REX_W, T_MAP5, PD, EVEX_CD8<64, CD8VF>;
defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
- T_MAP5PS, EVEX_CD8<16, CD8VQ>;
+ T_MAP5, EVEX_CD8<16, CD8VQ>;
let Predicates = [HasFP16, HasVLX] in {
// Special patterns to allow use of X86vmfpround for masking. Instruction
@@ -8596,120 +8596,120 @@ multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperato
defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
X86any_VSintToFP, X86VSintToFP,
- SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
+ SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>;
defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
- PS, EVEX_CD8<32, CD8VF>;
+ TB, EVEX_CD8<32, CD8VF>;
defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
X86cvttp2si, X86cvttp2siSAE,
- SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>;
+ SchedWriteCvtPS2DQ>, TB, XS, EVEX_CD8<32, CD8VF>;
defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
X86cvttp2si, X86cvttp2siSAE,
SchedWriteCvtPD2DQ>,
- PD, REX_W, EVEX_CD8<64, CD8VF>;
+ TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
X86cvttp2ui, X86cvttp2uiSAE,
- SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>;
+ SchedWriteCvtPS2DQ>, TB, EVEX_CD8<32, CD8VF>;
defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
X86cvttp2ui, X86cvttp2uiSAE,
SchedWriteCvtPD2DQ>,
- PS, REX_W, EVEX_CD8<64, CD8VF>;
+ TB, REX_W, EVEX_CD8<64, CD8VF>;
defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
uint_to_fp, X86any_VUintToFP, X86VUintToFP,
- SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
+ SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>;
defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
uint_to_fp, X86VUintToFpRnd,
- SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>;
+ SchedWriteCvtDQ2PS>, TB, XD, EVEX_CD8<32, CD8VF>;
defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
- X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
+ X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD,
EVEX_CD8<32, CD8VF>;
defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
- X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
+ X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, TB, XD,
REX_W, EVEX_CD8<64, CD8VF>;
defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
- PS, EVEX_CD8<32, CD8VF>;
+ TB, EVEX_CD8<32, CD8VF>;
defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
- PS, EVEX_CD8<64, CD8VF>;
+ TB, EVEX_CD8<64, CD8VF>;
defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, REX_W,
- PD, EVEX_CD8<64, CD8VF>;
+ TB, PD, EVEX_CD8<64, CD8VF>;
defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
- X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
+ X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD,
EVEX_CD8<32, CD8VH>;
defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
- PD, EVEX_CD8<64, CD8VF>;
+ TB, PD, EVEX_CD8<64, CD8VF>;
defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
- X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
+ X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, TB, PD,
EVEX_CD8<32, CD8VH>;
defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
X86cvttp2si, X86cvttp2siSAE,
SchedWriteCvtPD2DQ>, REX_W,
- PD, EVEX_CD8<64, CD8VF>;
+ TB, PD, EVEX_CD8<64, CD8VF>;
defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
X86cvttp2si, X86cvttp2siSAE,
- SchedWriteCvtPS2DQ>, PD,
+ SchedWriteCvtPS2DQ>, TB, PD,
EVEX_CD8<32, CD8VH>;
defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
X86cvttp2ui, X86cvttp2uiSAE,
SchedWriteCvtPD2DQ>, REX_W,
- PD, EVEX_CD8<64, CD8VF>;
+ TB, PD, EVEX_CD8<64, CD8VF>;
defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
X86cvttp2ui, X86cvttp2uiSAE,
- SchedWriteCvtPS2DQ>, PD,
+ SchedWriteCvtPS2DQ>, TB, PD,
EVEX_CD8<32, CD8VH>;
defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
sint_to_fp, X86VSintToFpRnd,
- SchedWriteCvtDQ2PD>, REX_W, XS, EVEX_CD8<64, CD8VF>;
+ SchedWriteCvtDQ2PD>, REX_W, TB, XS, EVEX_CD8<64, CD8VF>;
defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
- REX_W, XS, EVEX_CD8<64, CD8VF>;
+ REX_W, TB, XS, EVEX_CD8<64, CD8VF>;
defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
X86any_VSintToFP, X86VMSintToFP,
X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
SchedWriteCvtDQ2PS, HasFP16>,
- T_MAP5PS, EVEX_CD8<32, CD8VF>;
+ T_MAP5, EVEX_CD8<32, CD8VF>;
defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
X86any_VUintToFP, X86VMUintToFP,
X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
- SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD,
+ SchedWriteCvtDQ2PS, HasFP16>, T_MAP5, XD,
EVEX_CD8<32, CD8VF>;
defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
X86any_VSintToFP, X86VMSintToFP,
X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
- SchedWriteCvtDQ2PS>, REX_W, PS,
+ SchedWriteCvtDQ2PS>, REX_W, TB,
EVEX_CD8<64, CD8VF>;
defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
X86any_VUintToFP, X86VMUintToFP,
X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
- SchedWriteCvtDQ2PS>, REX_W, XD,
+ SchedWriteCvtDQ2PS>, REX_W, TB, XD,
EVEX_CD8<64, CD8VF>;
let Predicates = [HasVLX] in {
@@ -8912,12 +8912,12 @@ multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
(ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
(X86any_cvtph2ps (_src.VT _src.RC:$src)),
(X86cvtph2ps (_src.VT _src.RC:$src))>,
- T8PD, Sched<[sched]>;
+ T8, PD, Sched<[sched]>;
defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
(ins x86memop:$src), "vcvtph2ps", "$src", "$src",
(X86any_cvtph2ps (_src.VT ld_dag)),
(X86cvtph2ps (_src.VT ld_dag))>,
- T8PD, Sched<[sched.Folded]>;
+ T8, PD, Sched<[sched.Folded]>;
}
multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
@@ -8927,7 +8927,7 @@ multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
(ins _src.RC:$src), "vcvtph2ps",
"{sae}, $src", "$src, {sae}",
(X86cvtph2psSAE (_src.VT _src.RC:$src))>,
- T8PD, EVEX_B, Sched<[sched]>;
+ T8, PD, EVEX_B, Sched<[sched]>;
}
let Predicates = [HasAVX512] in
@@ -9068,55 +9068,55 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
- "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
+ "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
- "ucomisd", SSEPackedDouble>, PD, EVEX,
+ "ucomisd", SSEPackedDouble>, TB, PD, EVEX,
VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
- "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
+ "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
- "comisd", SSEPackedDouble>, PD, EVEX,
+ "comisd", SSEPackedDouble>, TB, PD, EVEX,
VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
let isCodeGenOnly = 1 in {
defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
- sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
+ sse_load_f32, "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
- sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
+ sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD, EVEX,
VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
- sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
+ sse_load_f32, "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
- sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
+ sse_load_f64, "comisd", SSEPackedDouble>, TB, PD, EVEX,
VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
}
}
let Defs = [EFLAGS], Predicates = [HasFP16] in {
defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
- SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
+ SSEPackedSingle>, AVX512PSIi8Base, T_MAP5,
EVEX_CD8<16, CD8VT1>;
defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
- SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
+ SSEPackedSingle>, AVX512PSIi8Base, T_MAP5,
EVEX_CD8<16, CD8VT1>;
defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
- "ucomish", SSEPackedSingle>, T_MAP5PS, EVEX,
+ "ucomish", SSEPackedSingle>, T_MAP5, EVEX,
VEX_LIG, EVEX_CD8<16, CD8VT1>;
defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
- "comish", SSEPackedSingle>, T_MAP5PS, EVEX,
+ "comish", SSEPackedSingle>, T_MAP5, EVEX,
VEX_LIG, EVEX_CD8<16, CD8VT1>;
let isCodeGenOnly = 1 in {
defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
sse_load_f16, "ucomish", SSEPackedSingle>,
- T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
+ T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
sse_load_f16, "comish", SSEPackedSingle>,
- T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
+ T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
}
}
@@ -9129,35 +9129,35 @@ multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
- EVEX_4V, VEX_LIG, Sched<[sched]>;
+ EVEX, VVVV, VEX_LIG, Sched<[sched]>;
defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1),
- (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG,
+ (_.ScalarIntMemFrags addr:$src2))>, EVEX, VVVV, VEX_LIG,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
- T_MAP6PD;
+ T_MAP6, PD;
defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
- EVEX_CD8<16, CD8VT1>, T_MAP6PD;
+ EVEX_CD8<16, CD8VT1>, T_MAP6, PD;
let Uses = [MXCSR] in {
defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
f32x_info>, EVEX_CD8<32, CD8VT1>,
- T8PD;
+ T8, PD;
defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
f64x_info>, REX_W, EVEX_CD8<64, CD8VT1>,
- T8PD;
+ T8, PD;
defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
SchedWriteFRsqrt.Scl, f32x_info>,
- EVEX_CD8<32, CD8VT1>, T8PD;
+ EVEX_CD8<32, CD8VT1>, T8, PD;
defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
SchedWriteFRsqrt.Scl, f64x_info>, REX_W,
- EVEX_CD8<64, CD8VT1>, T8PD;
+ EVEX_CD8<64, CD8VT1>, T8, PD;
}
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
@@ -9166,19 +9166,19 @@ multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
let ExeDomain = _.ExeDomain in {
defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
- (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
+ (_.VT (OpNode _.RC:$src))>, EVEX, T8, PD,
Sched<[sched]>;
defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
(OpNode (_.VT
- (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
+ (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8, PD,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
"${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
(OpNode (_.VT
(_.BroadcastLdFrag addr:$src)))>,
- EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX, T8, PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -9192,7 +9192,7 @@ multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
let Predicates = [HasFP16] in
defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
- v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
+ v32f16_info>, EVEX_V512, T_MAP6, EVEX_CD8<16, CD8VF>;
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX], Uses = [MXCSR] in {
@@ -9212,10 +9212,10 @@ multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [HasFP16, HasVLX] in {
defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
OpNode, sched.XMM, v8f16x_info>,
- EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
+ EVEX_V128, T_MAP6, EVEX_CD8<16, CD8VF>;
defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
OpNode, sched.YMM, v16f16x_info>,
- EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
+ EVEX_V256, T_MAP6, EVEX_CD8<16, CD8VF>;
}
}
@@ -9250,16 +9250,16 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
- sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V;
+ sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV;
defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
- sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8PD, EVEX_4V;
+ sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV;
}
multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
let Predicates = [HasFP16] in
defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode, OpNodeSAE, sched>,
- EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V;
+ EVEX_CD8<16, CD8VT1>, T_MAP6, PD, EVEX, VVVV;
}
let Predicates = [HasERI] in {
@@ -9311,10 +9311,10 @@ multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeSAE, X86SchedWriteWidths sched> {
defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
- T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
- T8PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
+ T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
@@ -9323,16 +9323,16 @@ multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
sched.XMM>,
- EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
+ EVEX_V128, T8, PD, EVEX_CD8<32, CD8VF>;
defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
sched.YMM>,
- EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
+ EVEX_V256, T8, PD, EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
sched.XMM>,
- EVEX_V128, REX_W, T8PD, EVEX_CD8<64, CD8VF>;
+ EVEX_V128, REX_W, T8, PD, EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
sched.YMM>,
- EVEX_V256, REX_W, T8PD, EVEX_CD8<64, CD8VF>;
+ EVEX_V256, REX_W, T8, PD, EVEX_CD8<64, CD8VF>;
}
}
@@ -9341,12 +9341,12 @@ multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [HasFP16] in
defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
- T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
+ T_MAP6, PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
let Predicates = [HasFP16, HasVLX] in {
defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
- EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
+ EVEX_V128, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
- EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
+ EVEX_V256, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
}
}
let Predicates = [HasERI] in {
@@ -9401,35 +9401,35 @@ multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
let Predicates = [HasFP16] in
defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
sched.PH.ZMM, v32f16_info>,
- EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+ EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
let Predicates = [HasFP16, HasVLX] in {
defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
sched.PH.XMM, v8f16x_info>,
- EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+ EVEX_V128, T_MAP5, EVEX_CD8<16, CD8VF>;
defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
sched.PH.YMM, v16f16x_info>,
- EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+ EVEX_V256, T_MAP5, EVEX_CD8<16, CD8VF>;
}
defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
sched.PS.ZMM, v16f32_info>,
- EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
+ EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
sched.PD.ZMM, v8f64_info>,
- EVEX_V512, REX_W, PD, EVEX_CD8<64, CD8VF>;
+ EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
sched.PS.XMM, v4f32x_info>,
- EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
+ EVEX_V128, TB, EVEX_CD8<32, CD8VF>;
defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
sched.PS.YMM, v8f32x_info>,
- EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
+ EVEX_V256, TB, EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
sched.PD.XMM, v2f64x_info>,
- EVEX_V128, REX_W, PD, EVEX_CD8<64, CD8VF>;
+ EVEX_V128, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
sched.PD.YMM, v4f64x_info>,
- EVEX_V256, REX_W, PD, EVEX_CD8<64, CD8VF>;
+ EVEX_V256, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
}
}
@@ -9439,13 +9439,13 @@ multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
let Predicates = [HasFP16] in
defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
sched.PH.ZMM, v32f16_info>,
- EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+ EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
sched.PS.ZMM, v16f32_info>,
- EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
+ EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
sched.PD.ZMM, v8f64_info>,
- EVEX_V512, REX_W, PD, EVEX_CD8<64, CD8VF>;
+ EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
@@ -9501,11 +9501,11 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
X86SchedWriteSizes sched> {
defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
- EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS;
+ EVEX_CD8<16, CD8VT1>, EVEX, VVVV, T_MAP5, XS;
defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
- EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
+ EVEX_CD8<32, CD8VT1>, EVEX, VVVV, TB, XS;
defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
- EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, REX_W;
+ EVEX_CD8<64, CD8VT1>, EVEX, VVVV, TB, XD, REX_W;
}
defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
@@ -9569,17 +9569,17 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
let Predicates = [HasFP16] in
defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
SchedWriteFRnd.Scl, f16x_info>,
- AVX512PSIi8Base, TA, EVEX_4V,
+ AVX512PSIi8Base, TA, EVEX, VVVV,
EVEX_CD8<16, CD8VT1>;
defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
SchedWriteFRnd.Scl, f32x_info>,
- AVX512AIi8Base, EVEX_4V, VEX_LIG,
+ AVX512AIi8Base, EVEX, VVVV, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
SchedWriteFRnd.Scl, f64x_info>,
- REX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
+ REX_W, AVX512AIi8Base, EVEX, VVVV, VEX_LIG,
EVEX_CD8<64, CD8VT1>;
multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
@@ -9923,16 +9923,16 @@ multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
let Predicates = [HasVLX, HasBWI] in {
defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info,
v16i8x_info, i64mem, LdFrag, InVecNode>,
- EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, WIG;
+ EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V128, WIG;
defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info,
v16i8x_info, i128mem, LdFrag, OpNode>,
- EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, WIG;
+ EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V256, WIG;
}
let Predicates = [HasBWI] in {
defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info,
v32i8x_info, i256mem, LdFrag, OpNode>,
- EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, WIG;
+ EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V512, WIG;
}
}
@@ -9943,16 +9943,16 @@ multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
v16i8x_info, i32mem, LdFrag, InVecNode>,
- EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, WIG;
+ EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V128, WIG;
defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
v16i8x_info, i64mem, LdFrag, InVecNode>,
- EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, WIG;
+ EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V256, WIG;
}
let Predicates = [HasAVX512] in {
defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
v16i8x_info, i128mem, LdFrag, OpNode>,
- EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, WIG;
+ EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V512, WIG;
}
}
@@ -9963,16 +9963,16 @@ multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
v16i8x_info, i16mem, LdFrag, InVecNode>,
- EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, WIG;
+ EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V128, WIG;
defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
v16i8x_info, i32mem, LdFrag, InVecNode>,
- EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, WIG;
+ EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V256, WIG;
}
let Predicates = [HasAVX512] in {
defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
v16i8x_info, i64mem, LdFrag, InVecNode>,
- EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, WIG;
+ EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V512, WIG;
}
}
@@ -9983,16 +9983,16 @@ multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
v8i16x_info, i64mem, LdFrag, InVecNode>,
- EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, WIG;
+ EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V128, WIG;
defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
v8i16x_info, i128mem, LdFrag, OpNode>,
- EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, WIG;
+ EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V256, WIG;
}
let Predicates = [HasAVX512] in {
defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
v16i16x_info, i256mem, LdFrag, OpNode>,
- EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, WIG;
+ EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V512, WIG;
}
}
@@ -10003,16 +10003,16 @@ multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
v8i16x_info, i32mem, LdFrag, InVecNode>,
- EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, WIG;
+ EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V128, WIG;
defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
v8i16x_info, i64mem, LdFrag, InVecNode>,
- EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, WIG;
+ EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V256, WIG;
}
let Predicates = [HasAVX512] in {
defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
v8i16x_info, i128mem, LdFrag, OpNode>,
- EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, WIG;
+ EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V512, WIG;
}
}
@@ -10024,16 +10024,16 @@ multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr,
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
v4i32x_info, i64mem, LdFrag, InVecNode>,
- EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
+ EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V128;
defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
v4i32x_info, i128mem, LdFrag, OpNode>,
- EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
+ EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V256;
}
let Predicates = [HasAVX512] in {
defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
v8i32x_info, i256mem, LdFrag, OpNode>,
- EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
+ EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V512;
}
}
@@ -10663,7 +10663,7 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
X86VectorVTInfo SrcInfo>{
- let ExeDomain = DestInfo.ExeDomain in {
+ let ExeDomain = DestInfo.ExeDomain, ImmT = Imm8 in {
defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
@@ -10689,7 +10689,7 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo _>:
avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
- let ExeDomain = _.ExeDomain in
+ let ExeDomain = _.ExeDomain, ImmT = Imm8 in
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
@@ -10773,13 +10773,13 @@ multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
let Predicates = [Pred] in {
defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
- SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
+ SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX, VVVV;
}
let Predicates = [Pred, HasVLX] in {
defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
- SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
+ SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX, VVVV;
defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
- SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
+ SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX, VVVV;
}
}
@@ -10835,38 +10835,38 @@ defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26
defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
0x50, X86VRange, X86VRangeSAE,
SchedWriteFAdd, HasDQI>,
- AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
+ AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
0x50, X86VRange, X86VRangeSAE,
SchedWriteFAdd, HasDQI>,
- AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+ AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
- AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
+ AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
- AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+ AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
- AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
+ AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
- AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+ AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
- AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
+ AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>;
defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
- AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
+ AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
- AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+ AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
- AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
+ AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>;
multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched,
@@ -10920,13 +10920,13 @@ multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched
}
defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
- avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+ avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
- avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
+ avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
- avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+ avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
- avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
+ avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
multiclass avx512_valign<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _>{
@@ -10962,15 +10962,15 @@ multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in {
defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
- AVX512AIi8Base, EVEX_4V, EVEX_V512;
+ AVX512AIi8Base, EVEX, VVVV, EVEX_V512;
}
let Predicates = [HasAVX512, HasVLX] in {
defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
- AVX512AIi8Base, EVEX_4V, EVEX_V128;
+ AVX512AIi8Base, EVEX, VVVV, EVEX_V128;
// We can't really override the 256-bit version so change it back to unset.
let EVEX2VEXOverride = ? in
defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
- AVX512AIi8Base, EVEX_4V, EVEX_V256;
+ AVX512AIi8Base, EVEX, VVVV, EVEX_V256;
}
}
@@ -11258,7 +11258,7 @@ defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>
multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86SchedWriteWidths sched> {
defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
- avx512vl_f32_info, HasAVX512>, XS;
+ avx512vl_f32_info, HasAVX512>, TB, XS;
}
defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
@@ -11301,7 +11301,7 @@ multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched> {
defm NAME: avx512_movddup_common<opc, OpcodeStr, sched,
- avx512vl_f64_info>, XD, REX_W;
+ avx512vl_f64_info>, TB, XD, REX_W;
}
defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
@@ -11369,9 +11369,9 @@ multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst,
(X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
- EVEX, TAPD, Sched<[WriteVecExtract]>;
+ EVEX, TA, PD, Sched<[WriteVecExtract]>;
- defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
+ defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TA, PD;
}
}
@@ -11382,15 +11382,15 @@ multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst,
(X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
- EVEX, PD, Sched<[WriteVecExtract]>;
+ EVEX, TB, PD, Sched<[WriteVecExtract]>;
let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
(ins _.RC:$src1, u8imm:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- EVEX, TAPD, Sched<[WriteVecExtract]>;
+ EVEX, TA, PD, Sched<[WriteVecExtract]>;
- defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
+ defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TA, PD;
}
}
@@ -11402,14 +11402,14 @@ multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GRC:$dst,
(extractelt (_.VT _.RC:$src1), imm:$src2))]>,
- EVEX, TAPD, Sched<[WriteVecExtract]>;
+ EVEX, TA, PD, Sched<[WriteVecExtract]>;
def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
(ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(store (extractelt (_.VT _.RC:$src1),
imm:$src2),addr:$dst)]>,
- EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
+ EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TA, PD,
Sched<[WriteVecExtractSt]>;
}
}
@@ -11427,7 +11427,7 @@ multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set _.RC:$dst,
(_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
- EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
+ EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -11437,7 +11437,7 @@ multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set _.RC:$dst,
- (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V,
+ (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX, VVVV,
Sched<[WriteVecInsert]>;
defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
@@ -11452,17 +11452,17 @@ multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set _.RC:$dst,
(_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
- EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
+ EVEX, VVVV, TA, PD, Sched<[WriteVecInsert]>;
defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
- _.ScalarLdFrag, imm>, TAPD;
+ _.ScalarLdFrag, imm>, TA, PD;
}
}
defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
- extloadi8>, TAPD, WIG;
+ extloadi8>, TA, PD, WIG;
defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
- extloadi16>, PD, WIG;
+ extloadi16>, TB, PD, WIG;
defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, REX_W;
@@ -11501,11 +11501,11 @@ multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
SchedWriteFShuffle>,
EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
- AVX512AIi8Base, EVEX_4V;
+ TA, EVEX, VVVV;
}
-defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, PS;
-defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, REX_W;
+defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, TB;
+defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, TB, PD, REX_W;
//===----------------------------------------------------------------------===//
// AVX-512 - Byte shift Left/Right
@@ -11543,10 +11543,10 @@ multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
}
defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
SchedWriteShuffle, HasBWI>,
- AVX512PDIi8Base, EVEX_4V, WIG;
+ AVX512PDIi8Base, EVEX, VVVV, WIG;
defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
SchedWriteShuffle, HasBWI>,
- AVX512PDIi8Base, EVEX_4V, WIG;
+ AVX512PDIi8Base, EVEX, VVVV, WIG;
multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
string OpcodeStr, X86FoldableSchedWrite sched,
@@ -11584,7 +11584,7 @@ multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
}
defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
- SchedWritePSADBW, HasBWI>, EVEX_4V, WIG;
+ SchedWritePSADBW, HasBWI>, EVEX, VVVV, WIG;
// Transforms to swizzle an immediate to enable better matching when
// memory operand isn't in the right place.
@@ -11659,7 +11659,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_.VT _.RC:$src2),
(_.VT _.RC:$src3),
(i8 timm:$src4)), 1, 1>,
- AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
+ AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>;
defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
@@ -11667,7 +11667,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_.VT _.RC:$src2),
(_.VT (bitconvert (_.LdFrag addr:$src3))),
(i8 timm:$src4)), 1, 0>,
- AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
+ AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
@@ -11677,7 +11677,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_.VT _.RC:$src2),
(_.VT (_.BroadcastLdFrag addr:$src3)),
(i8 timm:$src4)), 1, 0>, EVEX_B,
- AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
+ AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}// Constraints = "$src1 = $dst"
@@ -12002,23 +12002,23 @@ multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
let Predicates = [HasAVX512] in
defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
_Vec.info512, _Tbl.info512>, AVX512AIi8Base,
- EVEX_4V, EVEX_V512;
+ EVEX, VVVV, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
_Vec.info128, _Tbl.info128>, AVX512AIi8Base,
- EVEX_4V, EVEX_V128;
+ EVEX, VVVV, EVEX_V128;
defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
_Vec.info256, _Tbl.info256>, AVX512AIi8Base,
- EVEX_4V, EVEX_V256;
+ EVEX, VVVV, EVEX_V256;
}
}
defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
- AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+ AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
- AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
+ AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
@@ -12165,17 +12165,17 @@ multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
defm Z128 : AESI_binop_rm_int<Op, OpStr,
!cast<Intrinsic>(IntPrefix),
loadv2i64, 0, VR128X, i128mem>,
- EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG;
+ EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG;
defm Z256 : AESI_binop_rm_int<Op, OpStr,
!cast<Intrinsic>(IntPrefix#"_256"),
loadv4i64, 0, VR256X, i256mem>,
- EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG;
+ EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG;
}
let Predicates = [HasAVX512, HasVAES] in
defm Z : AESI_binop_rm_int<Op, OpStr,
!cast<Intrinsic>(IntPrefix#"_512"),
loadv8i64, 0, VR512, i512mem>,
- EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG;
+ EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG;
}
defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
@@ -12189,14 +12189,14 @@ defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">
let Predicates = [HasAVX512, HasVPCLMULQDQ] in
defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
- EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG;
+ EVEX, VVVV, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG;
let Predicates = [HasVLX, HasVPCLMULQDQ] in {
defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
- EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG;
+ EVEX, VVVV, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG;
defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
- int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
+ int_x86_pclmulqdq_256>, EVEX, VVVV, EVEX_V256,
EVEX_CD8<64, CD8VF>, WIG;
}
@@ -12217,13 +12217,13 @@ multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
(ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
- T8PD, EVEX_4V, Sched<[sched]>;
+ T8, PD, EVEX, VVVV, Sched<[sched]>;
defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
(VTI.VT (VTI.LdFrag addr:$src3))))>,
- T8PD, EVEX_4V,
+ T8, PD, EVEX, VVVV,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -12239,7 +12239,7 @@ multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
"$src2, ${src3}"#VTI.BroadcastStr,
(OpNode VTI.RC:$src1, VTI.RC:$src2,
(VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
- T8PD, EVEX_4V, EVEX_B,
+ T8, PD, EVEX, VVVV, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -12284,9 +12284,9 @@ multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
REX_W, EVEX_CD8<16, CD8VF>;
defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
- OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+ OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
- sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
+ sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
}
// Concat & Shift
@@ -12321,13 +12321,13 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
(VTI.VT (OpNode VTI.RC:$src1,
VTI.RC:$src2, VTI.RC:$src3)),
IsCommutable, IsCommutable>,
- EVEX_4V, T8PD, Sched<[sched]>;
+ EVEX, VVVV, T8, PD, Sched<[sched]>;
defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
(VTI.VT (VTI.LdFrag addr:$src3))))>,
- EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
+ EVEX, VVVV, EVEX_CD8<32, CD8VF>, T8, PD,
Sched<[sched.Folded, sched.ReadAfterFold,
sched.ReadAfterFold]>;
defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
@@ -12336,8 +12336,8 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
"$src2, ${src3}"#VTI.BroadcastStr,
(OpNode VTI.RC:$src1, VTI.RC:$src2,
(VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
- EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
- T8PD, Sched<[sched.Folded, sched.ReadAfterFold,
+ EVEX, VVVV, EVEX_CD8<32, CD8VF>, EVEX_B,
+ T8, PD, Sched<[sched.Folded, sched.ReadAfterFold,
sched.ReadAfterFold]>;
}
}
@@ -12406,7 +12406,7 @@ multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
(X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
(VTI.VT VTI.RC:$src2)),
(X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
- (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
+ (VTI.VT VTI.RC:$src2))>, EVEX, VVVV, T8, PD,
Sched<[sched]>;
defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
(ins VTI.RC:$src1, VTI.MemOp:$src2),
@@ -12416,7 +12416,7 @@ multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
(VTI.VT (VTI.LdFrag addr:$src2))),
(X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
(VTI.VT (VTI.LdFrag addr:$src2)))>,
- EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
+ EVEX, VVVV, EVEX_CD8<8, CD8VF>, T8, PD,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -12451,7 +12451,7 @@ multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
SchedWriteVecALU>,
- EVEX_CD8<8, CD8VF>, T8PD;
+ EVEX_CD8<8, CD8VF>, T8;
multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
@@ -12483,10 +12483,10 @@ multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
X86GF2P8affineinvqb, SchedWriteVecIMul>,
- EVEX_4V, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
+ EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
X86GF2P8affineqb, SchedWriteVecIMul>,
- EVEX_4V, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
+ EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
//===----------------------------------------------------------------------===//
@@ -12498,25 +12498,25 @@ let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
(outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
"v4fmaddps", "$src3, $src2", "$src2, $src3",
- []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
+ []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
Sched<[SchedWriteFMA.ZMM.Folded]>;
defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
(outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
"v4fnmaddps", "$src3, $src2", "$src2, $src3",
- []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
+ []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
Sched<[SchedWriteFMA.ZMM.Folded]>;
defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
(outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
"v4fmaddss", "$src3, $src2", "$src2, $src3",
- []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
+ []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>,
Sched<[SchedWriteFMA.Scl.Folded]>;
defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
(outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
"v4fnmaddss", "$src3, $src2", "$src2, $src3",
- []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
+ []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>,
Sched<[SchedWriteFMA.Scl.Folded]>;
}
@@ -12529,13 +12529,13 @@ let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
(outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
"vp4dpwssd", "$src3, $src2", "$src2, $src3",
- []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
+ []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
Sched<[SchedWriteFMA.ZMM.Folded]>;
defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
(outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
"vp4dpwssds", "$src3, $src2", "$src2, $src3",
- []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
+ []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
Sched<[SchedWriteFMA.ZMM.Folded]>;
}
@@ -12558,7 +12558,7 @@ multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInf
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.KRPC:$dst, (X86vp2intersect
_.RC:$src1, (_.VT _.RC:$src2)))]>,
- EVEX_4V, T8XD, Sched<[sched]>;
+ EVEX, VVVV, T8, XD, Sched<[sched]>;
def rm : I<0x68, MRMSrcMem,
(outs _.KRPC:$dst),
@@ -12567,7 +12567,7 @@ multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInf
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.KRPC:$dst, (X86vp2intersect
_.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
- EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
+ EVEX, VVVV, T8, XD, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmb : I<0x68, MRMSrcMem,
@@ -12577,7 +12577,7 @@ multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInf
", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
[(set _.KRPC:$dst, (X86vp2intersect
_.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
- EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
+ EVEX, VVVV, T8, XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -12623,7 +12623,7 @@ let ExeDomain = SSEPackedSingle in
defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
avx512vl_f32_info, avx512vl_bf16_info,
- X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
+ X86cvtne2ps2bf16, HasBF16, 0>, T8, XD;
// Truncate Float to BFloat16
multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
@@ -12660,7 +12660,7 @@ multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
}
defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
- SchedWriteCvtPD2PS>, T8XS,
+ SchedWriteCvtPD2PS>, T8, XS,
EVEX_CD8<32, CD8VF>;
let Predicates = [HasBF16, HasVLX] in {
@@ -12744,13 +12744,13 @@ multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins src_v.RC:$src2, src_v.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
- EVEX_4V, Sched<[sched]>;
+ EVEX, VVVV, Sched<[sched]>;
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins src_v.RC:$src2, src_v.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
- (src_v.LdFrag addr:$src3)))>, EVEX_4V,
+ (src_v.LdFrag addr:$src3)))>, EVEX, VVVV,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -12760,7 +12760,7 @@ multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat("$src2, ${src3}", _.BroadcastStr),
(_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
(src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
- EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_B, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
} // Constraints = "$src1 = $dst"
@@ -12783,7 +12783,7 @@ multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
let ExeDomain = SSEPackedSingle in
defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
avx512vl_f32_info, avx512vl_bf16_info,
- HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
+ HasBF16>, T8, XS, EVEX_CD8<32, CD8VF>;
//===----------------------------------------------------------------------===//
// AVX512FP16
@@ -12792,12 +12792,12 @@ defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWrit
let Predicates = [HasFP16] in {
// Move word ( r/m16) to Packed word
def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
- "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
+ "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
"vmovw\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst,
(v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
- T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
+ T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
def : Pat<(f16 (bitconvert GR16:$src)),
(f16 (COPY_TO_REGCLASS
@@ -12854,13 +12854,13 @@ def : Pat<(v16i32 (X86vzmovl
// Move word from xmm register to r/m16
def VMOVSH2Wrr : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
- "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>;
+ "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveToGpr]>;
def VMOVWmr : AVX512<0x7E, MRMDestMem, (outs),
(ins i16mem:$dst, VR128X:$src),
"vmovw\t{$src, $dst|$dst, $src}",
[(store (i16 (extractelt (v8i16 VR128X:$src),
(iPTR 0))), addr:$dst)]>,
- T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
+ T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
def : Pat<(i16 (bitconvert FR16X:$src)),
(i16 (EXTRACT_SUBREG
@@ -12872,9 +12872,9 @@ def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
// Allow "vmovw" to use GR64
let hasSideEffects = 0 in {
def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
- "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
+ "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
- "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>;
+ "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>;
}
}
@@ -12920,27 +12920,27 @@ multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNo
defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
X86cvtp2UIntRnd, avx512vl_i16_info,
avx512vl_f16_info, SchedWriteCvtPD2DQ>,
- T_MAP5PS, EVEX_CD8<16, CD8VF>;
+ T_MAP5, EVEX_CD8<16, CD8VF>;
defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
X86VUintToFpRnd, avx512vl_f16_info,
avx512vl_i16_info, SchedWriteCvtPD2DQ>,
- T_MAP5XD, EVEX_CD8<16, CD8VF>;
+ T_MAP5, XD, EVEX_CD8<16, CD8VF>;
defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
X86cvttp2si, X86cvttp2siSAE,
avx512vl_i16_info, avx512vl_f16_info,
- SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>;
+ SchedWriteCvtPD2DQ>, T_MAP5, PD, EVEX_CD8<16, CD8VF>;
defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
X86cvttp2ui, X86cvttp2uiSAE,
avx512vl_i16_info, avx512vl_f16_info,
- SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+ SchedWriteCvtPD2DQ>, T_MAP5, EVEX_CD8<16, CD8VF>;
defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
X86cvtp2IntRnd, avx512vl_i16_info,
avx512vl_f16_info, SchedWriteCvtPD2DQ>,
- T_MAP5PD, EVEX_CD8<16, CD8VF>;
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
X86VSintToFpRnd, avx512vl_f16_info,
avx512vl_i16_info, SchedWriteCvtPD2DQ>,
- T_MAP5XS, EVEX_CD8<16, CD8VF>;
+ T_MAP5, XS, EVEX_CD8<16, CD8VF>;
// Convert Half to Signed/Unsigned Doubleword
multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
@@ -12980,20 +12980,20 @@ multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpN
defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
- X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
+ X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
EVEX_CD8<16, CD8VH>;
defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
- X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS,
+ X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5,
EVEX_CD8<16, CD8VH>;
defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
X86cvttp2si, X86cvttp2siSAE,
- SchedWriteCvtPS2DQ>, T_MAP5XS,
+ SchedWriteCvtPS2DQ>, T_MAP5, XS,
EVEX_CD8<16, CD8VH>;
defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
X86cvttp2ui, X86cvttp2uiSAE,
- SchedWriteCvtPS2DQ>, T_MAP5PS,
+ SchedWriteCvtPS2DQ>, T_MAP5,
EVEX_CD8<16, CD8VH>;
// Convert Half to Signed/Unsigned Quardword
@@ -13043,21 +13043,21 @@ multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpN
}
defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
- X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
+ X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
EVEX_CD8<16, CD8VQ>;
defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
- X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
+ X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
EVEX_CD8<16, CD8VQ>;
defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
X86cvttp2si, X86cvttp2siSAE,
- SchedWriteCvtPS2DQ>, T_MAP5PD,
+ SchedWriteCvtPS2DQ>, T_MAP5, PD,
EVEX_CD8<16, CD8VQ>;
defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
X86cvttp2ui, X86cvttp2uiSAE,
- SchedWriteCvtPS2DQ>, T_MAP5PD,
+ SchedWriteCvtPS2DQ>, T_MAP5, PD,
EVEX_CD8<16, CD8VQ>;
// Convert Signed/Unsigned Quardword to Half
@@ -13154,53 +13154,53 @@ multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNo
}
defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
- X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5PS,
+ X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5,
EVEX_CD8<64, CD8VF>;
defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
- X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5XD,
+ X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5, XD,
EVEX_CD8<64, CD8VF>;
// Convert half to signed/unsigned int 32/64
defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
- T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+ T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
- T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>;
+ T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
- T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+ T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
- T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>;
+ T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
- "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+ "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
- "{q}", HasFP16>, REX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+ "{q}", HasFP16>, REX_W, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
- "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+ "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
- "{q}", HasFP16>, T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>;
+ "{q}", HasFP16>, T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
let Predicates = [HasFP16] in {
defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
- T_MAP5XS, EVEX_CD8<32, CD8VT1>;
+ T_MAP5, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
- T_MAP5XS, REX_W, EVEX_CD8<64, CD8VT1>;
+ T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>;
defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
v8f16x_info, i32mem, loadi32,
- "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>;
+ "cvtusi2sh","l">, T_MAP5, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
- T_MAP5XS, REX_W, EVEX_CD8<64, CD8VT1>;
+ T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>;
def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
@@ -13390,17 +13390,17 @@ let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX_4V;
+ (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX, VVVV;
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX_4V;
+ (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX, VVVV;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
- (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX_4V;
+ (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX, VVVV;
}
} // Constraints = "@earlyclobber $dst, $src1 = $dst"
@@ -13411,7 +13411,7 @@ multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
(_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
- EVEX_4V, EVEX_B, EVEX_RC;
+ EVEX, VVVV, EVEX_B, EVEX_RC;
}
@@ -13446,14 +13446,14 @@ multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Uses = [MXCSR] in {
defm VFMADDCPH : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
- T_MAP6XS, EVEX_CD8<32, CD8VF>;
+ T_MAP6, XS, EVEX_CD8<32, CD8VF>;
defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
- T_MAP6XD, EVEX_CD8<32, CD8VF>;
+ T_MAP6, XD, EVEX_CD8<32, CD8VF>;
defm VFMULCPH : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
- x86vfmulcRnd, 1>, T_MAP6XS, EVEX_CD8<32, CD8VF>;
+ x86vfmulcRnd, 1>, T_MAP6, XS, EVEX_CD8<32, CD8VF>;
defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
- x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6XD, EVEX_CD8<32, CD8VF>;
+ x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6, XD, EVEX_CD8<32, CD8VF>;
}
@@ -13504,12 +13504,12 @@ multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNod
let Uses = [MXCSR] in {
defm VFMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
- T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
+ T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV;
defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
- T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
+ T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV;
defm VFMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
- T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
+ T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV;
defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
- T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
+ T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td
index 8c355e84a065..936db48bb9df 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -44,511 +44,396 @@ def PLEA32r : PseudoI<(outs GR32:$dst), (ins anymem:$src), []>;
def PLEA64r : PseudoI<(outs GR64:$dst), (ins anymem:$src), []>;
}
-//===----------------------------------------------------------------------===//
-// Fixed-Register Multiplication and Division Instructions.
-//
-
-// BinOpRR - Binary instructions with inputs "reg, reg".
-class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- dag outlist, X86FoldableSchedWrite sched, list<dag> pattern>
- : ITy<opcode, MRMDestReg, typeinfo, outlist,
- (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
- Sched<[sched]>;
-
-// BinOpRR_F - Binary instructions with inputs "reg, reg", where the pattern
-// has just a EFLAGS as a result.
-class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDPatternOperator opnode>
- : BinOpRR<opcode, mnemonic, typeinfo, (outs), WriteALU,
- [(set EFLAGS,
- (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
-
-// BinOpRR_RF - Binary instructions with inputs "reg, reg", where the pattern
-// has both a regclass and EFLAGS as a result.
-class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode>
- : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteALU,
- [(set typeinfo.RegClass:$dst, EFLAGS,
- (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
-
-// BinOpRR_RFF - Binary instructions with inputs "reg, reg", where the pattern
-// has both a regclass and EFLAGS as a result, and has EFLAGS as input.
-class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode>
- : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC,
- [(set typeinfo.RegClass:$dst, EFLAGS,
- (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2,
- EFLAGS))]>;
-
-// BinOpRR_Rev - Binary instructions with inputs "reg, reg"(reversed encoding).
-class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- X86FoldableSchedWrite sched = WriteALU>
- : ITy<opcode, MRMSrcReg, typeinfo,
- (outs typeinfo.RegClass:$dst),
- (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $dst|$dst, $src2}", []>,
- Sched<[sched]> {
- // The disassembler should know about this, but not the asmparser.
- let isCodeGenOnly = 1;
- let ForceDisassemble = 1;
- let hasSideEffects = 0;
+// BinOpRR - Instructions that read "reg, reg".
+class BinOpRR<bits<8> o, string m, X86TypeInfo t, dag out, list<dag> p>
+ : ITy<o, MRMDestReg, t, out, (ins t.RegClass:$src1, t.RegClass:$src2), m,
+ binop_args, p>, Sched<[WriteALU]>;
+// BinOpRR_F - Instructions that read "reg, reg" and write EFLAGS only.
+class BinOpRR_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+ : BinOpRR<o, m, t, (outs),
+ [(set EFLAGS, (node t.RegClass:$src1, t.RegClass:$src2))]>,
+ DefEFLAGS;
+// BinOpRR_F_Rev - Reversed encoding of BinOpRR_F
+class BinOpRR_F_Rev<bits<8> o, string m, X86TypeInfo t>
+ : BinOpRR_F<o, m, t, null_frag>, DisassembleOnly {
+ let Form = MRMSrcReg;
}
-
-// BinOpRR_RFF_Rev - Binary instructions with inputs "reg, reg"(reversed
-// encoding), with sched = WriteADC.
-class BinOpRR_RFF_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
- : BinOpRR_Rev<opcode, mnemonic, typeinfo, WriteADC>;
-
-// BinOpRR_F_Rev - Binary instructions with inputs "reg, reg"(reversed
-// encoding), without outlist dag.
-class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
- : ITy<opcode, MRMSrcReg, typeinfo, (outs),
- (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", []>,
- Sched<[WriteALU]> {
- // The disassembler should know about this, but not the asmparser.
- let isCodeGenOnly = 1;
- let ForceDisassemble = 1;
- let hasSideEffects = 0;
+// BinOpRR_RF - Instructions that read "reg, reg", and write "reg", EFLAGS.
+class BinOpRR_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+ : BinOpRR<o, m, t, (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS,
+ (node t.RegClass:$src1, t.RegClass:$src2))]>, DefEFLAGS;
+// BinOpRR_RF_Rev - Reversed encoding of BinOpRR_RF.
+class BinOpRR_RF_Rev<bits<8> o, string m, X86TypeInfo t>
+ : BinOpRR_RF<o, m, t, null_frag>, DisassembleOnly {
+ let Form = MRMSrcReg;
+}
+// BinOpRRF_RF - Instructions that read "reg, reg", write "reg" and read/write
+// EFLAGS.
+class BinOpRRF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+ : BinOpRR<o, m, t, (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS,
+ (node t.RegClass:$src1, t.RegClass:$src2,
+ EFLAGS))]>, DefEFLAGS, UseEFLAGS {
+ let SchedRW = [WriteADC];
+}
+// BinOpRRF_RF_Rev - Reversed encoding of BinOpRRF_RF
+class BinOpRRF_RF_Rev<bits<8> o, string m, X86TypeInfo t>
+ : BinOpRRF_RF<o, m, t, null_frag>, DisassembleOnly {
+ let Form = MRMSrcReg;
}
-// BinOpRM - Binary instructions with inputs "reg, [mem]".
-class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- dag outlist, X86FoldableSchedWrite sched, list<dag> pattern>
- : ITy<opcode, MRMSrcMem, typeinfo, outlist,
- (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
-
-// BinOpRM_ImplicitUse - Binary instructions with inputs "reg, [mem]".
-// There is an implicit register read at the end of the operand sequence.
-class BinOpRM_ImplicitUse<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- dag outlist, X86FoldableSchedWrite sched, list<dag> pattern>
- : ITy<opcode, MRMSrcMem, typeinfo, outlist,
- (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
- Sched<[sched.Folded, sched.ReadAfterFold,
- // base, scale, index, offset, segment.
- ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
- // implicit register read.
- sched.ReadAfterFold]>;
-
-// BinOpRM_F - Binary instructions with inputs "reg, [mem]", where the pattern
-// has just a EFLAGS as a result.
-class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode>
- : BinOpRM<opcode, mnemonic, typeinfo, (outs), WriteALU,
- [(set EFLAGS,
- (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
-
-// BinOpRM_RF - Binary instructions with inputs "reg, [mem]", where the pattern
-// has both a regclass and EFLAGS as a result.
-class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode>
- : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteALU,
- [(set typeinfo.RegClass:$dst, EFLAGS,
- (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
-
-// BinOpRM_RFF - Binary instructions with inputs "reg, [mem]", where the pattern
-// has both a regclass and EFLAGS as a result, and has EFLAGS as input.
-class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode>
- : BinOpRM_ImplicitUse<opcode, mnemonic, typeinfo,
- (outs typeinfo.RegClass:$dst), WriteADC,
- [(set typeinfo.RegClass:$dst, EFLAGS,
- (opnode typeinfo.RegClass:$src1,
- (typeinfo.LoadNode addr:$src2), EFLAGS))]>;
-
-// BinOpRI - Binary instructions with inputs "reg, imm".
-class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- Format f, dag outlist, X86FoldableSchedWrite sched, list<dag> pattern>
- : ITy<opcode, f, typeinfo, outlist,
- (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
- Sched<[sched]> {
- let ImmT = typeinfo.ImmEncoding;
+// BinOpRM - Instructions that read "reg, [mem]".
+class BinOpRM<bits<8> o, string m, X86TypeInfo t, dag out, list<dag> p>
+ : ITy<o, MRMSrcMem, t, out, (ins t.RegClass:$src1, t.MemOperand:$src2), m,
+ binop_args, p>,
+ Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]> {
+ let mayLoad = 1;
+}
+// BinOpRM_F - Instructions that read "reg, [mem]" and write EFLAGS only.
+class BinOpRM_F<bits<8> o, string m, X86TypeInfo t, SDNode node>
+ : BinOpRM<o, m, t, (outs),
+ [(set EFLAGS, (node t.RegClass:$src1,
+ (t.LoadNode addr:$src2)))]>, DefEFLAGS;
+// BinOpRM_RF - Instructions that read "reg, reg", and write "reg", EFLAGS.
+class BinOpRM_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+ : BinOpRM<o, m, t, (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS, (node t.RegClass:$src1,
+ (t.LoadNode addr:$src2)))]>, DefEFLAGS;
+// BinOpRMF_RF - Instructions that read "reg, [mem]", write "reg" and read/write
+// EFLAGS.
+class BinOpRMF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+ : BinOpRM<o, m, t, (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS,
+ (node t.RegClass:$src1, (t.LoadNode addr:$src2), EFLAGS))]>,
+ DefEFLAGS, UseEFLAGS {
+ let SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold,
+ // base, scale, index, offset, segment.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // implicit register read.
+ WriteADC.ReadAfterFold];
}
-// BinOpRI_F - Binary instructions with inputs "reg, imm", where the pattern
-// has EFLAGS as a result.
-class BinOpRI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDPatternOperator opnode, Format f>
- : BinOpRI<opcode, mnemonic, typeinfo, f, (outs), WriteALU,
- [(set EFLAGS,
- (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
-
-// BinOpRI_RF - Binary instructions with inputs "reg, imm", where the pattern
-// has both a regclass and EFLAGS as a result.
-class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode, Format f>
- : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), WriteALU,
- [(set typeinfo.RegClass:$dst, EFLAGS,
- (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
-
-// BinOpRI_RFF - Binary instructions with inputs "reg, imm", where the pattern
-// has both a regclass and EFLAGS as a result, and has EFLAGS as input.
-class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode, Format f>
- : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), WriteADC,
- [(set typeinfo.RegClass:$dst, EFLAGS,
- (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2,
- EFLAGS))]>;
-
-// BinOpRI8 - Binary instructions with inputs "reg, imm8".
-class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- Format f, dag outlist, X86FoldableSchedWrite sched, list<dag> pattern>
- : ITy<opcode, f, typeinfo, outlist,
- (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
- Sched<[sched]> {
- let ImmT = Imm8; // Always 8-bit immediate.
+// BinOpRI - Instructions that read "reg, imm".
+class BinOpRI<bits<8> o, string m, X86TypeInfo t, Format f, dag out, list<dag> p>
+ : ITy<o, f, t, out, (ins t.RegClass:$src1, t.ImmOperand:$src2), m,
+ binop_args, p>, Sched<[WriteALU]> {
+ let ImmT = t.ImmEncoding;
+}
+// BinOpRI_F - Instructions that read "reg, imm" and write EFLAGS only.
+class BinOpRI_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node,
+ Format f>
+ : BinOpRI<o, m, t, f, (outs),
+ [(set EFLAGS, (node t.RegClass:$src1,
+ t.ImmOperator:$src2))]>, DefEFLAGS;
+// BinOpRI_RF - Instructions that read "reg, imm" and write "reg", EFLAGS.
+class BinOpRI_RF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f>
+ : BinOpRI<o, m, t, f, (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS,
+ (node t.RegClass:$src1, t.ImmOperator:$src2))]>, DefEFLAGS;
+// BinOpRIF_RF - Instructions that read "reg, imm", write "reg" and read/write
+// EFLAGS.
+class BinOpRIF_RF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f>
+ : BinOpRI<o, m, t, f, (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS,
+ (node t.RegClass:$src1, t.ImmOperator:$src2,
+ EFLAGS))]>, DefEFLAGS, UseEFLAGS {
+ let SchedRW = [WriteADC];
+}
+// BinOpRI8 - Instructions that read "reg, imm8".
+class BinOpRI8<bits<8> o, string m, X86TypeInfo t, Format f, dag out>
+ : ITy<o, f, t, out, (ins t.RegClass:$src1, t.Imm8Operand:$src2), m,
+ binop_args, []>, Sched<[WriteALU]> {
+ let ImmT = Imm8;
+}
+// BinOpRI8_F - Instructions that read "reg, imm8" and write EFLAGS only.
+class BinOpRI8_F<bits<8> o, string m, X86TypeInfo t, Format f>
+ : BinOpRI8<o, m, t, f, (outs)>, DefEFLAGS;
+// BinOpRI8_RF - Instructions that read "reg, imm8" and write "reg", EFLAGS.
+class BinOpRI8_RF<bits<8> o, string m, X86TypeInfo t, Format f>
+ : BinOpRI8<o, m, t, f, (outs t.RegClass:$dst)>, DefEFLAGS;
+// BinOpRI8F_RF - Instructions that read "reg, imm", write "reg" and read/write
+// EFLAGS.
+class BinOpRI8F_RF<bits<8> o, string m, X86TypeInfo t, Format f>
+ : BinOpRI8<o, m, t, f, (outs t.RegClass:$dst)>, DefEFLAGS, UseEFLAGS {
+ let SchedRW = [WriteADC];
}
-// BinOpRI8_F - Binary instructions with inputs "reg, imm8".
-class BinOpRI8_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f>
- : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs), WriteALU, []>;
-
-// BinOpRI8_RF - Binary instructions with inputs "reg, imm8".
-class BinOpRI8_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f>
- : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), WriteALU, []>;
-
-// BinOpRI8_RFF - Binary instructions with inputs "reg, imm8".
-class BinOpRI8_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f>
- : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), WriteADC, []>;
-
-// BinOpMR - Binary instructions with inputs "[mem], reg".
-class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- list<dag> pattern>
- : ITy<opcode, MRMDestMem, typeinfo,
- (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern>;
-
-// BinOpMR_RMW - Binary instructions with inputs "[mem], reg", where the pattern
-// implicitly use EFLAGS.
-class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode>
- : BinOpMR<opcode, mnemonic, typeinfo,
- [(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst),
+// BinOpMR - Instructions that read "[mem], reg".
+class BinOpMR<bits<8> o, string m, X86TypeInfo t, list<dag> p>
+ : ITy<o, MRMDestMem, t, (outs), (ins t.MemOperand:$src1, t.RegClass:$src2), m,
+ binop_args, p> {
+ let mayLoad = 1;
+}
+// BinOpMR_F - Instructions that read "[mem], imm8" and write EFLAGS only.
+class BinOpMR_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+ : BinOpMR<o, m, t,
+ [(set EFLAGS, (node (t.LoadNode addr:$src1), t.RegClass:$src2))]>,
+ Sched<[WriteALU.Folded, ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault, ReadDefault, WriteALU.ReadAfterFold]>, DefEFLAGS;
+// BinOpMR_MF - Instructions that read "[mem], reg" and write "[mem]", EFLAGS.
+class BinOpMR_MF<bits<8> o, string m, X86TypeInfo t, SDNode node>
+ : BinOpMR<o, m, t,
+ [(store (node (load addr:$src1), t.RegClass:$src2), addr:$src1),
(implicit EFLAGS)]>,
Sched<[WriteALURMW,
// base, scale, index, offset, segment
- ReadDefault, ReadDefault, ReadDefault,
- ReadDefault, ReadDefault,
- WriteALU.ReadAfterFold]>; // reg
-
-// BinOpMR_RMW_FF - Binary instructions with inputs "[mem], reg", where the
-// pattern sets EFLAGS and implicitly uses EFLAGS.
-class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode>
- : BinOpMR<opcode, mnemonic, typeinfo,
- [(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
- addr:$dst),
- (implicit EFLAGS)]>,
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ WriteALU.ReadAfterFold]>, // reg
+ DefEFLAGS {
+ let mayStore = 1;
+}
+// BinOpMRF_MF - Instructions that read "[mem], reg", write "[mem]" and
+// read/write EFLAGS.
+class BinOpMRF_MF<bits<8> o, string m, X86TypeInfo t, SDNode node>
+ : BinOpMR<o, m, t,
+ [(store (node (load addr:$src1), t.RegClass:$src2, EFLAGS),
+ addr:$src1), (implicit EFLAGS)]>,
Sched<[WriteADCRMW,
// base, scale, index, offset, segment
ReadDefault, ReadDefault, ReadDefault,
ReadDefault, ReadDefault,
WriteALU.ReadAfterFold, // reg
- WriteALU.ReadAfterFold]>; // EFLAGS
-
-// BinOpMR_F - Binary instructions with inputs "[mem], reg", where the pattern
-// has EFLAGS as a result.
-class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDPatternOperator opnode>
- : BinOpMR<opcode, mnemonic, typeinfo,
- [(set EFLAGS, (opnode (typeinfo.LoadNode addr:$dst),
- typeinfo.RegClass:$src))]>,
- Sched<[WriteALU.Folded, ReadDefault, ReadDefault, ReadDefault,
- ReadDefault, ReadDefault, WriteALU.ReadAfterFold]>;
-
-// BinOpMI - Binary instructions with inputs "[mem], imm".
-class BinOpMI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- Format f, list<dag> pattern>
- : ITy<opcode, f, typeinfo,
- (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern> {
- let ImmT = typeinfo.ImmEncoding;
+ WriteALU.ReadAfterFold]>, // EFLAGS
+ DefEFLAGS, UseEFLAGS {
+ let mayStore = 1;
}
-// BinOpMI_RMW - Binary instructions with inputs "[mem], imm", where the
-// pattern implicitly use EFLAGS.
-class BinOpMI_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode, Format f>
- : BinOpMI<opcode, mnemonic, typeinfo, f,
- [(store (opnode (typeinfo.VT (load addr:$dst)),
- typeinfo.ImmOperator:$src), addr:$dst),
- (implicit EFLAGS)]>,
- Sched<[WriteALURMW]>;
-
-// BinOpMI_RMW_FF - Binary instructions with inputs "[mem], imm", where the
-// pattern sets EFLAGS and implicitly uses EFLAGS.
-class BinOpMI_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode, Format f>
- : BinOpMI<opcode, mnemonic, typeinfo, f,
- [(store (opnode (typeinfo.VT (load addr:$dst)),
- typeinfo.ImmOperator:$src, EFLAGS), addr:$dst),
- (implicit EFLAGS)]>,
- Sched<[WriteADCRMW]>;
-
-// BinOpMI_F - Binary instructions with inputs "[mem], imm", where the pattern
-// has EFLAGS as a result.
-class BinOpMI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDPatternOperator opnode, Format f>
- : BinOpMI<opcode, mnemonic, typeinfo, f,
- [(set EFLAGS, (opnode (typeinfo.LoadNode addr:$dst),
- typeinfo.ImmOperator:$src))]>,
- Sched<[WriteALU.Folded]>;
-
-// BinOpMI8 - Binary instructions with inputs "[mem], imm8".
-class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
- Format f, list<dag> pattern>
- : ITy<0x82, f, typeinfo,
- (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern> {
- let ImmT = Imm8; // Always 8-bit immediate.
+// BinOpMI - Instructions that read "[mem], imm".
+class BinOpMI<bits<8> o, string m, X86TypeInfo t, Format f, list<dag> p>
+ : ITy<o, f, t, (outs), (ins t.MemOperand:$src1, t.ImmOperand:$src2), m,
+ binop_args, p> {
+ let ImmT = t.ImmEncoding;
+ let mayLoad = 1;
+}
+// BinOpMI_F - Instructions that read "[mem], imm" and write EFLAGS only.
+class BinOpMI_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node,
+ Format f>
+ : BinOpMI<o, m, t, f,
+ [(set EFLAGS, (node (t.LoadNode addr:$src1), t.ImmOperator:$src2))]>,
+ Sched<[WriteALU.Folded]>, DefEFLAGS;
+// BinOpMI_MF - Instructions that read "[mem], imm" and write "[mem]", EFLAGS.
+class BinOpMI_MF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f>
+ : BinOpMI<o, m, t, f,
+ [(store (node (t.VT (load addr:$src1)),
+ t.ImmOperator:$src2), addr:$src1), (implicit EFLAGS)]>,
+ Sched<[WriteALURMW]>, DefEFLAGS {
+ let mayStore = 1;
+}
+// BinOpMIF_MF - Instructions that read "[mem], imm", write "[mem]" and
+// read/write EFLAGS.
+class BinOpMIF_MF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f>
+ : BinOpMI<o, m, t, f,
+ [(store (node (t.VT (load addr:$src1)),
+ t.ImmOperator:$src2, EFLAGS), addr:$src1), (implicit EFLAGS)]>,
+ Sched<[WriteADCRMW]>, DefEFLAGS, UseEFLAGS {
+ let mayStore = 1;
}
-// BinOpMI8_RMW - Binary instructions with inputs "[mem], imm8".
-class BinOpMI8_RMW<string mnemonic, X86TypeInfo typeinfo, Format f>
- : BinOpMI8<mnemonic, typeinfo, f, []>, Sched<[WriteALURMW]>;
-
-// BinOpMI8_RMW_FF - Binary instructions with inputs "[mem], imm8".
-class BinOpMI8_RMW_FF<string mnemonic, X86TypeInfo typeinfo, Format f>
- : BinOpMI8<mnemonic, typeinfo, f, []>, Sched<[WriteADCRMW]>;
-
-// BinOpMI8_F - Binary instructions with inputs "[mem], imm8"
-class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo, Format f>
- : BinOpMI8<mnemonic, typeinfo, f, []>, Sched<[WriteALU.Folded]>;
-
-// BinOpAI - Binary instructions with input imm, that implicitly use A reg and
-// implicitly define Areg and EFLAGS.
-class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- Register areg, string operands, X86FoldableSchedWrite sched = WriteALU>
- : ITy<opcode, RawFrm, typeinfo,
- (outs), (ins typeinfo.ImmOperand:$src),
- mnemonic, operands, []>,
- Sched<[sched]> {
- let ImmT = typeinfo.ImmEncoding;
+// BinOpMI8 - Instructions that read "[mem], imm8".
+class BinOpMI8<string m, X86TypeInfo t, Format f>
+ : ITy<0x83, f, t, (outs), (ins t.MemOperand:$src1, t.Imm8Operand:$src2), m,
+ binop_args, []> {
+ let ImmT = Imm8;
+ let mayLoad = 1;
+}
+// BinOpMI8_F - Instructions that read "[mem], imm8" and write EFLAGS only.
+class BinOpMI8_F<string m, X86TypeInfo t, Format f>
+ : BinOpMI8<m, t, f>, Sched<[WriteALU.Folded]>, DefEFLAGS;
+// BinOpMI8_MF - Instructions that read "[mem], imm8" and write "[mem]", EFLAGS.
+class BinOpMI8_MF<string m, X86TypeInfo t, Format f>
+ : BinOpMI8<m, t, f>, Sched<[WriteALURMW]>, DefEFLAGS {
+ let mayStore = 1;
+}
+// BinOpMI8F_MF - Instructions that read "[mem], imm8", write "[mem]" and
+// read/write EFLAGS.
+class BinOpMI8F_MF<string m, X86TypeInfo t, Format f>
+ : BinOpMI8<m, t, f>, Sched<[WriteADCRMW]>, DefEFLAGS, UseEFLAGS {
+ let mayStore = 1;
+}
+
+// BinOpAI - Instructions that read "a-reg imm" (Accumulator register).
+class BinOpAI<bits<8> o, string m, X86TypeInfo t, Register areg, string args>
+ : ITy<o, RawFrm, t, (outs), (ins t.ImmOperand:$src), m, args, []>,
+ Sched<[WriteALU]> {
+ let ImmT = t.ImmEncoding;
let Uses = [areg];
- let Defs = [areg, EFLAGS];
- let hasSideEffects = 0;
}
+// BinOpAI_F - Instructions that read "a-reg imm" and write EFLAGS only.
+class BinOpAI_F<bits<8> o, string m, X86TypeInfo t, Register areg, string args>
+ : BinOpAI<o, m, t, areg, args>, DefEFLAGS;
-// BinOpAI_RFF - Binary instructions with input imm, that implicitly use and
-// define Areg and EFLAGS.
-class BinOpAI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- Register areg, string operands>
- : BinOpAI<opcode, mnemonic, typeinfo, areg, operands, WriteADC> {
+// BinOpAI_AF - Instructions that read "a-reg imm" and write a-reg/EFLAGS.
+class BinOpAI_AF<bits<8> o, string m, X86TypeInfo t, Register areg,
+ string args> : BinOpAI<o, m, t, areg, args> {
+ let Defs = [areg, EFLAGS];
+}
+// BinOpAIF_AF - Instructions that read "a-reg imm", write a-reg and read/write
+// EFLAGS.
+class BinOpAIF_AF<bits<8> o, string m, X86TypeInfo t, Register areg,
+ string args> : BinOpAI<o, m, t, areg, args> {
let Uses = [areg, EFLAGS];
+ let Defs = [areg, EFLAGS];
+ let SchedRW = [WriteADC];
}
-// BinOpAI_F - Binary instructions with input imm, that implicitly use A reg and
-// implicitly define EFLAGS.
-class BinOpAI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- Register areg, string operands>
- : BinOpAI<opcode, mnemonic, typeinfo, areg, operands> {
- let Defs = [EFLAGS];
+// UnaryOpR - Instructions that read "reg" and write "reg".
+class UnaryOpR<bits<8> o, Format f, string m, X86TypeInfo t, list<dag> p>
+ : ITy<o, f, t, (outs t.RegClass:$dst),
+ (ins t.RegClass:$src1), m, "$dst", p>, Sched<[WriteALU]>;
+
+// UnaryOpM - Instructions that read "[mem]" and writes "[mem]".
+class UnaryOpM<bits<8> o, Format f, string m, X86TypeInfo t, list<dag> p>
+ : ITy<o, f, t, (outs), (ins t.MemOperand:$dst), m, "$dst", p>,
+ Sched<[WriteALURMW]> {
+ let mayLoad = 1;
+ let mayStore = 1;
}
-// UnaryOpM - Unary instructions with a memory operand.
-class UnaryOpM<bits<8> opcode, Format f, string mnemonic, X86TypeInfo info,
- list<dag> pattern>
- : ITy<opcode, f, info, (outs), (ins info.MemOperand:$dst), mnemonic,
- "$dst", pattern>;
-
-// UnaryOpR - Unary instructions with a register.
-class UnaryOpR<bits<8> opcode, Format f, string mnemonic, X86TypeInfo info,
- list<dag> pattern>
- : ITy<opcode, f, info, (outs info.RegClass:$dst),
- (ins info.RegClass:$src1), mnemonic, "$dst", pattern>;
-
-// INCDECR - Instructions like "inc reg".
-class INCDECR<Format f, string mnemonic, X86TypeInfo info,
- SDPatternOperator node>
- : UnaryOpR<0xFE, f, mnemonic, info,
- [(set info.RegClass:$dst, EFLAGS,
- (node info.RegClass:$src1, 1))]>;
-
-// INCDECM - Instructions like "inc [mem]".
-class INCDECM<Format f, string mnemonic, X86TypeInfo info, int num>
- : UnaryOpM<0xFE, f, mnemonic, info,
- [(store (add (info.LoadNode addr:$dst), num), addr:$dst),
- (implicit EFLAGS)]>;
-
-// INCDECR_ALT - Instructions like "inc reg" short forms.
-class INCDECR_ALT<bits<8> opcode, string mnemonic, X86TypeInfo info>
- : UnaryOpR<opcode, AddRegFrm, mnemonic, info, []>{
+// INCDECR - Instructions like "inc reg".
+class INCDECR<Format f, string m, X86TypeInfo t, SDPatternOperator node>
+ : UnaryOpR<0xFF, f, m, t,
+ [(set t.RegClass:$dst, EFLAGS, (node t.RegClass:$src1, 1))]>,
+ DefEFLAGS {
+ let isConvertibleToThreeAddress = 1; // Can xform into LEA.
+}
+
+// INCDECM - Instructions like "inc [mem]".
+class INCDECM<Format f, string m, X86TypeInfo t, int num>
+ : UnaryOpM<0xFF, f, m, t,
+ [(store (add (t.LoadNode addr:$dst), num), addr:$dst),
+ (implicit EFLAGS)]>, DefEFLAGS;
+
+// INCDECR_ALT - Instructions like "inc reg" short forms.
+class INCDECR_ALT<bits<8> o, string m, X86TypeInfo t>
+ : UnaryOpR<o, AddRegFrm, m, t, []>, DefEFLAGS {
+ // Short forms only valid in 32-bit mode. Selected during MCInst lowering.
let Predicates = [Not64BitMode];
- let Opcode = opcode;
}
-// MulOpR - Instructions like "mul reg".
-class MulOpR<bits<8> opcode, Format f, string mnemonic, X86TypeInfo info,
- X86FoldableSchedWrite sched, list<dag> pattern>
- : ITy<opcode, f, info, (outs), (ins info.RegClass:$src), mnemonic,
- "$src", pattern>,
- Sched<[sched]>;
-
-// MulOpM - Instructions like "mul [mem]".
-class MulOpM<bits<8> opcode, Format f, string mnemonic, X86TypeInfo info,
- X86FoldableSchedWrite sched, list<dag> pattern>
- : ITy<opcode, f, info, (outs), (ins info.MemOperand:$src), mnemonic,
- "$src", pattern>, SchedLoadReg<sched>;
-
-// NegOpR - Instructions like "neg reg", with implicit EFLAGS.
-class NegOpR<bits<8> opcode, string mnemonic, X86TypeInfo info>
- : UnaryOpR<opcode, MRM3r, mnemonic, info,
- [(set info.RegClass:$dst, (ineg info.RegClass:$src1)),
- (implicit EFLAGS)]>;
-
-// NotOpR - Instructions like "not reg".
-class NotOpR<bits<8> opcode, string mnemonic, X86TypeInfo info>
- : UnaryOpR<opcode, MRM2r, mnemonic, info,
- [(set info.RegClass:$dst,
- (not info.RegClass:$src1))]>;
-
-// NegOpM - Instructions like "neg [mem]", with implicit EFLAGS.
-class NegOpM<bits<8> opcode, string mnemonic, X86TypeInfo info>
- : UnaryOpM<opcode, MRM3m, mnemonic, info,
- [(store (ineg (info.LoadNode addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>;
-
-// NotOpM - Instructions like "neg [mem]".
-class NotOpM<bits<8> opcode, string mnemonic, X86TypeInfo info>
- : UnaryOpM<opcode, MRM2m, mnemonic, info,
- [(store (not (info.LoadNode addr:$dst)), addr:$dst)]>;
-
-// BinOpRR_C - Binary instructions with inputs "reg, reg", which used mainly
-// with Constraints = "$src1 = $dst".
-class BinOpRR_C<bits<8> opcode, Format f, string mnemonic, X86TypeInfo info,
- list<dag> pattern>
- : ITy<opcode, f, info, (outs info.RegClass:$dst),
- (ins info.RegClass:$src1, info.RegClass:$src2),
- mnemonic, "{$src2, $dst|$dst, $src2}", pattern>;
-
-// BinOpRM_C - Binary instructions with inputs "reg, [mem]", which used mainly
-// with Constraints = "$src1 = $dst".
-class BinOpRM_C<bits<8> opcode, Format f, string mnemonic, X86TypeInfo info,
- list<dag> pattern>
- : ITy<opcode, f, info, (outs info.RegClass:$dst),
- (ins info.RegClass:$src1, info.MemOperand:$src2),
- mnemonic, "{$src2, $dst|$dst, $src2}", pattern>;
+// MulOpR - Instructions like "mul reg".
+class MulOpR<bits<8> o, Format f, string m, X86TypeInfo t,
+ X86FoldableSchedWrite sched, list<dag> p>
+ : ITy<o, f, t, (outs), (ins t.RegClass:$src), m, "$src", p>, Sched<[sched]>;
+
+// MulOpM - Instructions like "mul [mem]".
+class MulOpM<bits<8> o, Format f, string m, X86TypeInfo t,
+ X86FoldableSchedWrite sched, list<dag> p>
+ : ITy<o, f, t, (outs), (ins t.MemOperand:$src), m,
+ "$src", p>, SchedLoadReg<sched> {
+ let mayLoad = 1;
+}
+
+// NegOpR - Instructions like "neg reg".
+class NegOpR<bits<8> o, string m, X86TypeInfo t>
+ : UnaryOpR<o, MRM3r, m, t,
+ [(set t.RegClass:$dst, (ineg t.RegClass:$src1)),
+ (implicit EFLAGS)]>, DefEFLAGS;
+
+// NegOpM - Instructions like "neg [mem]".
+class NegOpM<bits<8> o, string m, X86TypeInfo t>
+ : UnaryOpM<o, MRM3m, m, t,
+ [(store (ineg (t.LoadNode addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>, DefEFLAGS;
+
+// NOTE: NOT does not set EFLAGS!
+// NotOpR - Instructions like "not reg".
+class NotOpR<bits<8> o, string m, X86TypeInfo t>
+ : UnaryOpR<o, MRM2r, m, t, [(set t.RegClass:$dst, (not t.RegClass:$src1))]>;
+
+// NotOpM - Instructions like "neg [mem]".
+class NotOpM<bits<8> o, string m, X86TypeInfo t>
+ : UnaryOpM<o, MRM2m, m, t,
+ [(store (not (t.LoadNode addr:$dst)), addr:$dst)]>;
// IMulOpRR - Instructions like "imul reg, reg, i8".
-class IMulOpRR<bits<8> opcode, string mnemonic, X86TypeInfo info,
- X86FoldableSchedWrite sched>
- : BinOpRR_C<opcode, MRMSrcReg, mnemonic, info,
- [(set info.RegClass:$dst, EFLAGS,
- (X86smul_flag info.RegClass:$src1,
- info.RegClass:$src2))]>,
- Sched<[sched]>, TB;
+class IMulOpRR<bits<8> o, string m, X86TypeInfo t, X86FoldableSchedWrite sched>
+ : BinOpRR_RF<o, m, t, X86smul_flag>, TB {
+ let Form = MRMSrcReg;
+ let SchedRW = [sched];
+ // X = IMUL Y, Z --> X = IMUL Z, Y
+ let isCommutable = 1;
+}
// IMulOpRM - Instructions like "imul reg, reg, [mem]".
-class IMulOpRM<bits<8> opcode, string mnemonic, X86TypeInfo info,
- X86FoldableSchedWrite sched>
- : BinOpRM_C<opcode, MRMSrcMem, mnemonic, info,
- [(set info.RegClass:$dst, EFLAGS,
- (X86smul_flag info.RegClass:$src1, (info.LoadNode addr:$src2)))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>, TB;
+class IMulOpRM<bits<8> o, string m, X86TypeInfo t, X86FoldableSchedWrite sched>
+ : BinOpRM_RF<o, m, t, X86smul_flag>, TB {
+let Form = MRMSrcMem;
+let SchedRW = [sched.Folded, sched.ReadAfterFold];
+}
// IMulOpRRI8 - Instructions like "imul reg, reg, i8".
-class IMulOpRRI8<bits<8> opcode, string mnemonic, X86TypeInfo info,
+class IMulOpRRI8<bits<8> o, string m, X86TypeInfo t,
X86FoldableSchedWrite sched>
- : ITy<opcode, MRMSrcReg, info, (outs info.RegClass:$dst),
- (ins info.RegClass:$src1, info.Imm8Operand:$src2), mnemonic,
- "{$src2, $src1, $dst|$dst, $src1, $src2}", []>, Sched<[sched]> {
+ : ITy<o, MRMSrcReg, t, (outs t.RegClass:$dst),
+ (ins t.RegClass:$src1, t.Imm8Operand:$src2), m,
+ "{$src2, $src1, $dst|$dst, $src1, $src2}", []>, Sched<[sched]>, DefEFLAGS {
let ImmT = Imm8;
}
// IMulOpRRI - Instructions like "imul reg, reg, i16/i32/i64".
-class IMulOpRRI<bits<8> opcode, string mnemonic, X86TypeInfo info,
+class IMulOpRRI<bits<8> o, string m, X86TypeInfo t,
X86FoldableSchedWrite sched>
- : ITy<opcode, MRMSrcReg, info, (outs info.RegClass:$dst),
- (ins info.RegClass:$src1, info.ImmOperand:$src2), mnemonic,
+ : ITy<o, MRMSrcReg, t, (outs t.RegClass:$dst),
+ (ins t.RegClass:$src1, t.ImmOperand:$src2), m,
"{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set info.RegClass:$dst, EFLAGS,
- (X86smul_flag info.RegClass:$src1,
- info.ImmNoSuOperator:$src2))]>,
- Sched<[sched]>{
- let ImmT = info.ImmEncoding;
+ [(set t.RegClass:$dst, EFLAGS, (X86smul_flag t.RegClass:$src1,
+ t.ImmNoSuOperator:$src2))]>,
+ Sched<[sched]>, DefEFLAGS {
+ let ImmT = t.ImmEncoding;
}
// IMulOpRMI8 - Instructions like "imul reg, [mem], i8".
-class IMulOpRMI8<bits<8> opcode, string mnemonic, X86TypeInfo info,
+class IMulOpRMI8<bits<8> o, string m, X86TypeInfo t,
X86FoldableSchedWrite sched>
- : ITy<opcode, MRMSrcMem, info, (outs info.RegClass:$dst),
- (ins info.MemOperand:$src1, info.Imm8Operand:$src2), mnemonic,
- "{$src2, $src1, $dst|$dst, $src1, $src2}", []>, Sched<[sched.Folded]> {
+ : ITy<o, MRMSrcMem, t, (outs t.RegClass:$dst),
+ (ins t.MemOperand:$src1, t.Imm8Operand:$src2), m,
+ "{$src2, $src1, $dst|$dst, $src1, $src2}", []>, Sched<[sched.Folded]>,
+ DefEFLAGS {
let ImmT = Imm8;
+ let mayLoad = 1;
}
// IMulOpRMI - Instructions like "imul reg, [mem], i16/i32/i64".
-class IMulOpRMI<bits<8> opcode, string mnemonic, X86TypeInfo info,
+class IMulOpRMI<bits<8> o, string m, X86TypeInfo t,
X86FoldableSchedWrite sched>
- : ITy<opcode, MRMSrcMem, info, (outs info.RegClass:$dst),
- (ins info.MemOperand:$src1, info.ImmOperand:$src2), mnemonic,
+ : ITy<o, MRMSrcMem, t, (outs t.RegClass:$dst),
+ (ins t.MemOperand:$src1, t.ImmOperand:$src2), m,
"{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set info.RegClass:$dst, EFLAGS,
- (X86smul_flag (info.LoadNode addr:$src1),
- info.ImmNoSuOperator:$src2))]>,
- Sched<[sched.Folded]>{
- let ImmT = info.ImmEncoding;
+ [(set t.RegClass:$dst, EFLAGS,
+ (X86smul_flag (t.LoadNode addr:$src1), t.ImmNoSuOperator:$src2))]>,
+ Sched<[sched.Folded]>, DefEFLAGS {
+ let ImmT = t.ImmEncoding;
}
-let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
-// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
-let hasSideEffects = 0 in {
-def INC16r_alt : INCDECR_ALT<0x40, "inc", Xi16>;
-def INC32r_alt : INCDECR_ALT<0x40, "inc", Xi32>;
-} // hasSideEffects = 0
-
-let isConvertibleToThreeAddress = 1 in { // Can xform into LEA.
+let Constraints = "$src1 = $dst" in {
+def INC16r_alt : INCDECR_ALT<0x40, "inc", Xi16>, OpSize16;
+def INC32r_alt : INCDECR_ALT<0x40, "inc", Xi32>, OpSize32;
def INC8r : INCDECR<MRM0r, "inc", Xi8, X86add_flag_nocf>;
-def INC16r : INCDECR<MRM0r, "inc", Xi16, X86add_flag_nocf>;
-def INC32r : INCDECR<MRM0r, "inc", Xi32, X86add_flag_nocf>;
+def INC16r : INCDECR<MRM0r, "inc", Xi16, X86add_flag_nocf>, OpSize16;
+def INC32r : INCDECR<MRM0r, "inc", Xi32, X86add_flag_nocf>, OpSize32;
def INC64r : INCDECR<MRM0r, "inc", Xi64, X86add_flag_nocf>;
-} // isConvertibleToThreeAddress = 1
-} // Constraints = "$src1 = $dst", SchedRW
-let SchedRW = [WriteALURMW] in {
-let Predicates = [UseIncDec] in {
- def INC8m : INCDECM<MRM0m, "inc", Xi8, 1>;
- def INC16m : INCDECM<MRM0m, "inc", Xi16, 1>;
- def INC32m : INCDECM<MRM0m, "inc", Xi32, 1>;
-} // Predicates
-let Predicates = [UseIncDec, In64BitMode] in {
- def INC64m : INCDECM<MRM0m, "inc", Xi64, 1>;
-} // Predicates
-} // SchedRW
-
-let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
-// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
-let hasSideEffects = 0 in {
-def DEC16r_alt : INCDECR_ALT<0x48, "dec", Xi16>;
-def DEC32r_alt : INCDECR_ALT<0x48, "dec", Xi32>;
-} // hasSideEffects = 0
-
-let isConvertibleToThreeAddress = 1 in { // Can xform into LEA.
+def DEC16r_alt : INCDECR_ALT<0x48, "dec", Xi16>, OpSize16;
+def DEC32r_alt : INCDECR_ALT<0x48, "dec", Xi32>, OpSize32;
def DEC8r : INCDECR<MRM1r, "dec", Xi8, X86sub_flag_nocf>;
-def DEC16r : INCDECR<MRM1r, "dec", Xi16, X86sub_flag_nocf>;
-def DEC32r : INCDECR<MRM1r, "dec", Xi32, X86sub_flag_nocf>;
+def DEC16r : INCDECR<MRM1r, "dec", Xi16, X86sub_flag_nocf>, OpSize16;
+def DEC32r : INCDECR<MRM1r, "dec", Xi32, X86sub_flag_nocf>, OpSize32;
def DEC64r : INCDECR<MRM1r, "dec", Xi64, X86sub_flag_nocf>;
-} // isConvertibleToThreeAddress = 1
-} // Constraints = "$src1 = $dst", SchedRW
+}
-let SchedRW = [WriteALURMW] in {
let Predicates = [UseIncDec] in {
- def DEC8m : INCDECM<MRM1m, "dec", Xi8, -1>;
- def DEC16m : INCDECM<MRM1m, "dec", Xi16, -1>;
- def DEC32m : INCDECM<MRM1m, "dec", Xi32, -1>;
-} // Predicates
+def INC8m : INCDECM<MRM0m, "inc", Xi8, 1>;
+def INC16m : INCDECM<MRM0m, "inc", Xi16, 1>, OpSize16;
+def INC32m : INCDECM<MRM0m, "inc", Xi32, 1>, OpSize32;
+def DEC8m : INCDECM<MRM1m, "dec", Xi8, -1>;
+def DEC16m : INCDECM<MRM1m, "dec", Xi16, -1>, OpSize16;
+def DEC32m : INCDECM<MRM1m, "dec", Xi32, -1>, OpSize32;
+}
let Predicates = [UseIncDec, In64BitMode] in {
- def DEC64m : INCDECM<MRM1m, "dec", Xi64, -1>;
-} // Predicates
-} // SchedRW
-} // Defs = [EFLAGS]
+def INC64m : INCDECM<MRM0m, "inc", Xi64, 1>;
+def DEC64m : INCDECM<MRM1m, "dec", Xi64, -1>;
+}
// Extra precision multiplication
@@ -562,14 +447,14 @@ def MUL8r : MulOpR<0xF6, MRM4r, "mul", Xi8, WriteIMul8,
// syntax can be accepted.
[(set AL, (mul AL, GR8:$src)), (implicit EFLAGS)]>;
// AX,DX = AX*GR16
-let Defs = [AX,DX,EFLAGS], Uses = [AX], hasSideEffects = 0 in
-def MUL16r : MulOpR<0xF7, MRM4r, "mul", Xi16, WriteIMul16, []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def MUL16r : MulOpR<0xF7, MRM4r, "mul", Xi16, WriteIMul16, []>, OpSize16;
// EAX,EDX = EAX*GR32
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], hasSideEffects = 0 in
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def MUL32r : MulOpR<0xF7, MRM4r, "mul", Xi32, WriteIMul32,
- [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>;
+ [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>, OpSize32;
// RAX,RDX = RAX*GR64
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], hasSideEffects = 0 in
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def MUL64r : MulOpR<0xF7, MRM4r, "mul", Xi64, WriteIMul64,
[/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>;
// AL,AH = AL*[mem8]
@@ -581,79 +466,68 @@ def MUL8m : MulOpM<0xF6, MRM4m, "mul", Xi8, WriteIMul8,
[(set AL, (mul AL, (loadi8 addr:$src))),
(implicit EFLAGS)]>;
// AX,DX = AX*[mem16]
-let mayLoad = 1, hasSideEffects = 0 in {
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def MUL16m : MulOpM<0xF7, MRM4m, "mul", Xi16, WriteIMul16, []>;
+def MUL16m : MulOpM<0xF7, MRM4m, "mul", Xi16, WriteIMul16, []>, OpSize16;
// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def MUL32m : MulOpM<0xF7, MRM4m, "mul", Xi32, WriteIMul32, []>;
+def MUL32m : MulOpM<0xF7, MRM4m, "mul", Xi32, WriteIMul32, []>, OpSize32;
// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def MUL64m : MulOpM<0xF7, MRM4m, "mul", Xi64, WriteIMul64, []>,
- Requires<[In64BitMode]>;
-}
+ Requires<[In64BitMode]>;
-let hasSideEffects = 0 in {
// AL,AH = AL*GR8
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8r : MulOpR<0xF6, MRM5r, "imul", Xi8, WriteIMul8, []>;
// AX,DX = AX*GR16
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def IMUL16r : MulOpR<0xF7, MRM5r, "imul", Xi16, WriteIMul16, []>;
+def IMUL16r : MulOpR<0xF7, MRM5r, "imul", Xi16, WriteIMul16, []>, OpSize16;
// EAX,EDX = EAX*GR32
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def IMUL32r : MulOpR<0xF7, MRM5r, "imul", Xi32, WriteIMul32, []>;
+def IMUL32r : MulOpR<0xF7, MRM5r, "imul", Xi32, WriteIMul32, []>, OpSize32;
// RAX,RDX = RAX*GR64
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64r : MulOpR<0xF7, MRM5r, "imul", Xi64, WriteIMul64, []>;
-let mayLoad = 1 in {
// AL,AH = AL*[mem8]
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8m : MulOpM<0xF6, MRM5m, "imul", Xi8, WriteIMul8, []>;
// AX,DX = AX*[mem16]
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def IMUL16m : MulOpM<0xF7, MRM5m, "imul", Xi16, WriteIMul16, []>;
+def IMUL16m : MulOpM<0xF7, MRM5m, "imul", Xi16, WriteIMul16, []>, OpSize16;
// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def IMUL32m : MulOpM<0xF7, MRM5m, "imul", Xi32, WriteIMul32, []>;
+def IMUL32m : MulOpM<0xF7, MRM5m, "imul", Xi32, WriteIMul32, []>, OpSize32;
// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64m : MulOpM<0xF7, MRM5m, "imul", Xi64, WriteIMul64, []>,
- Requires<[In64BitMode]>;
-}
+ Requires<[In64BitMode]>;
-let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in {
-// X = IMUL Y, Z --> X = IMUL Z, Y
// Register-Register Signed Integer Multiply
-def IMUL16rr : IMulOpRR<0xAF, "imul", Xi16, WriteIMul16Reg>;
-def IMUL32rr : IMulOpRR<0xAF, "imul", Xi32, WriteIMul32Reg>;
+def IMUL16rr : IMulOpRR<0xAF, "imul", Xi16, WriteIMul16Reg>, OpSize16;
+def IMUL32rr : IMulOpRR<0xAF, "imul", Xi32, WriteIMul32Reg>, OpSize32;
def IMUL64rr : IMulOpRR<0xAF, "imul", Xi64, WriteIMul64Reg>;
-} // isCommutable
// Register-Memory Signed Integer Multiply
-def IMUL16rm : IMulOpRM<0xAF, "imul", Xi16, WriteIMul16Reg>;
-def IMUL32rm : IMulOpRM<0xAF, "imul", Xi32, WriteIMul32Reg>;
+def IMUL16rm : IMulOpRM<0xAF, "imul", Xi16, WriteIMul16Reg>, OpSize16;
+def IMUL32rm : IMulOpRM<0xAF, "imul", Xi32, WriteIMul32Reg>, OpSize32;
def IMUL64rm : IMulOpRM<0xAF, "imul", Xi64, WriteIMul64Reg>;
-} // Constraints = "$src1 = $dst"
-} // Defs = [EFLAGS]
+}
// Surprisingly enough, these are not two address instructions!
-let Defs = [EFLAGS] in {
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
// Register-Integer Signed Integer Multiply
// GR16 = GR16*I8
-def IMUL16rri8 : IMulOpRRI8<0x6B, "imul", Xi16, WriteIMul16Imm>;
+def IMUL16rri8 : IMulOpRRI8<0x6B, "imul", Xi16, WriteIMul16Imm>, OpSize16;
// GR16 = GR16*I16
-def IMUL16rri : IMulOpRRI<0x69, "imul", Xi16, WriteIMul16Imm>;
+def IMUL16rri : IMulOpRRI<0x69, "imul", Xi16, WriteIMul16Imm>, OpSize16;
// GR32 = GR32*I8
-def IMUL32rri8 : IMulOpRRI8<0x6B, "imul", Xi32, WriteIMul32Imm>;
+def IMUL32rri8 : IMulOpRRI8<0x6B, "imul", Xi32, WriteIMul32Imm>, OpSize32;
// GR32 = GR32*I32
-def IMUL32rri : IMulOpRRI<0x69, "imul", Xi32, WriteIMul32Imm>;
+def IMUL32rri : IMulOpRRI<0x69, "imul", Xi32, WriteIMul32Imm>, OpSize32;
// GR64 = GR64*I8
def IMUL64rri8 : IMulOpRRI8<0x6B, "imul", Xi64, WriteIMul64Imm>;
// GR64 = GR64*I32
@@ -661,21 +535,17 @@ def IMUL64rri32 : IMulOpRRI<0x69, "imul", Xi64, WriteIMul64Imm>;
// Memory-Integer Signed Integer Multiply
// GR16 = [mem16]*I8
-let mayLoad = 1 in {
-def IMUL16rmi8 : IMulOpRMI8<0x6B, "imul", Xi16, WriteIMul16Imm>;
+def IMUL16rmi8 : IMulOpRMI8<0x6B, "imul", Xi16, WriteIMul16Imm>, OpSize16;
// GR16 = [mem16]*I16
-def IMUL16rmi : IMulOpRMI<0x69, "imul", Xi16, WriteIMul16Imm>;
+def IMUL16rmi : IMulOpRMI<0x69, "imul", Xi16, WriteIMul16Imm>, OpSize16;
// GR32 = [mem32]*I8
-def IMUL32rmi8 : IMulOpRMI8<0x6B, "imul", Xi32, WriteIMul32Imm>;
+def IMUL32rmi8 : IMulOpRMI8<0x6B, "imul", Xi32, WriteIMul32Imm>, OpSize32;
// GR32 = [mem32]*I32
-def IMUL32rmi : IMulOpRMI<0x69, "imul", Xi32, WriteIMul32Imm>;
+def IMUL32rmi : IMulOpRMI<0x69, "imul", Xi32, WriteIMul32Imm>, OpSize32;
// GR64 = [mem64]*I8
def IMUL64rmi8 : IMulOpRMI8<0x6B, "imul", Xi64, WriteIMul64Imm>;
// GR64 = [mem64]*I32
def IMUL64rmi32 : IMulOpRMI<0x69, "imul", Xi64, WriteIMul64Imm>;
-} // mayLoad
-} // Defs = [EFLAGS]
-} // hasSideEffects
// unsigned division/remainder
let hasSideEffects = 1 in { // so that we don't speculatively execute
@@ -684,10 +554,10 @@ let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def DIV8r : MulOpR<0xF6, MRM6r, "div", Xi8, WriteDiv8, []>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
// DX:AX/r16 = AX,DX
-def DIV16r : MulOpR<0xF7, MRM6r, "div", Xi16, WriteDiv16, []>;
+def DIV16r : MulOpR<0xF7, MRM6r, "div", Xi16, WriteDiv16, []>, OpSize16;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
// EDX:EAX/r32 = EAX,EDX
-def DIV32r : MulOpR<0xF7, MRM6r, "div", Xi32, WriteDiv32, []>;
+def DIV32r : MulOpR<0xF7, MRM6r, "div", Xi32, WriteDiv32, []>, OpSize32;
// RDX:RAX/r64 = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64r : MulOpR<0xF7, MRM6r, "div", Xi64, WriteDiv64, []>;
@@ -698,9 +568,9 @@ let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def DIV8m : MulOpM<0xF6, MRM6m, "div", Xi8, WriteDiv8, []>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
// DX:AX/[mem16] = AX,DX
-def DIV16m : MulOpM<0xF7, MRM6m, "div", Xi16, WriteDiv16, []>;
+def DIV16m : MulOpM<0xF7, MRM6m, "div", Xi16, WriteDiv16, []>, OpSize16;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
-def DIV32m : MulOpM<0xF7, MRM6m, "div", Xi32, WriteDiv32, []>;
+def DIV32m : MulOpM<0xF7, MRM6m, "div", Xi32, WriteDiv32, []>, OpSize32;
// RDX:RAX/[mem64] = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64m : MulOpM<0xF7, MRM6m, "div", Xi64, WriteDiv64, []>,
@@ -713,69 +583,52 @@ let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def IDIV8r : MulOpR<0xF6, MRM7r, "idiv", Xi8, WriteIDiv8, []>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
// DX:AX/r16 = AX,DX
-def IDIV16r: MulOpR<0xF7, MRM7r, "idiv", Xi16, WriteIDiv16, []>;
+def IDIV16r: MulOpR<0xF7, MRM7r, "idiv", Xi16, WriteIDiv16, []>, OpSize16;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
// EDX:EAX/r32 = EAX,EDX
-def IDIV32r: MulOpR<0xF7, MRM7r, "idiv", Xi32, WriteIDiv32, []>;
+def IDIV32r: MulOpR<0xF7, MRM7r, "idiv", Xi32, WriteIDiv32, []>, OpSize32;
// RDX:RAX/r64 = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def IDIV64r: MulOpR<0xF7, MRM7r, "idiv", Xi64, WriteIDiv64, []>;
-let mayLoad = 1 in {
let Defs = [AL,AH,EFLAGS], Uses = [AX] in
// AX/[mem8] = AL,AH
def IDIV8m : MulOpM<0xF6, MRM7m, "idiv", Xi8, WriteIDiv8, []>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
// DX:AX/[mem16] = AX,DX
-def IDIV16m: MulOpM<0xF7, MRM7m, "idiv", Xi16, WriteIDiv16, []>;
+def IDIV16m: MulOpM<0xF7, MRM7m, "idiv", Xi16, WriteIDiv16, []>, OpSize16;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
// EDX:EAX/[mem32] = EAX,EDX
-def IDIV32m: MulOpM<0xF7, MRM7m, "idiv", Xi32, WriteIDiv32, []>;
+def IDIV32m: MulOpM<0xF7, MRM7m, "idiv", Xi32, WriteIDiv32, []>, OpSize32;
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
// RDX:RAX/[mem64] = RAX,RDX
def IDIV64m: MulOpM<0xF7, MRM7m, "idiv", Xi64, WriteIDiv64, []>,
Requires<[In64BitMode]>;
-}
} // hasSideEffects = 1
-//===----------------------------------------------------------------------===//
-// Two address Instructions.
-//
-
-// unary instructions
-let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
+let Constraints = "$src1 = $dst" in {
def NEG8r : NegOpR<0xF6, "neg", Xi8>;
-def NEG16r : NegOpR<0xF7, "neg", Xi16>;
-def NEG32r : NegOpR<0xF7, "neg", Xi32>;
+def NEG16r : NegOpR<0xF7, "neg", Xi16>, OpSize16;
+def NEG32r : NegOpR<0xF7, "neg", Xi32>, OpSize32;
def NEG64r : NegOpR<0xF7, "neg", Xi64>;
-} // Constraints = "$src1 = $dst", SchedRW
+}
-// Read-modify-write negate.
-let SchedRW = [WriteALURMW] in {
def NEG8m : NegOpM<0xF6, "neg", Xi8>;
-def NEG16m : NegOpM<0xF7, "neg", Xi16>;
-def NEG32m : NegOpM<0xF7, "neg", Xi32>;
+def NEG16m : NegOpM<0xF7, "neg", Xi16>, OpSize16;
+def NEG32m : NegOpM<0xF7, "neg", Xi32>, OpSize32;
def NEG64m : NegOpM<0xF7, "neg", Xi64>, Requires<[In64BitMode]>;
-} // SchedRW
-} // Defs = [EFLAGS]
-
-// Note: NOT does not set EFLAGS!
-
-let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
+let Constraints = "$src1 = $dst" in {
def NOT8r : NotOpR<0xF6, "not", Xi8>;
-def NOT16r : NotOpR<0xF7, "not", Xi16>;
-def NOT32r : NotOpR<0xF7, "not", Xi32>;
+def NOT16r : NotOpR<0xF7, "not", Xi16>, OpSize16;
+def NOT32r : NotOpR<0xF7, "not", Xi32>, OpSize32;
def NOT64r : NotOpR<0xF7, "not", Xi64>;
-} // Constraints = "$src1 = $dst", SchedRW
+}
-let SchedRW = [WriteALURMW] in {
def NOT8m : NotOpM<0xF6, "not", Xi8>;
-def NOT16m : NotOpM<0xF7, "not", Xi16>;
-def NOT32m : NotOpM<0xF7, "not", Xi32>;
+def NOT16m : NotOpM<0xF7, "not", Xi16>, OpSize16;
+def NOT32m : NotOpM<0xF7, "not", Xi32>, OpSize32;
def NOT64m : NotOpM<0xF7, "not", Xi64>, Requires<[In64BitMode]>;
-} // SchedRW
/// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is
/// defined with "(set GPR:$dst, EFLAGS, (...".
@@ -787,81 +640,73 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
SDNode opnodeflag, SDNode opnode,
bit CommutableRR, bit ConvertibleToThreeAddress,
bit ConvertibleToThreeAddressRR> {
- let Defs = [EFLAGS] in {
- let Constraints = "$src1 = $dst" in {
- let isCommutable = CommutableRR in {
- let isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in {
- def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
- def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
- def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
- def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
- } // isConvertibleToThreeAddress
- } // isCommutable
-
- def NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
- def NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
- def NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
- def NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
-
- def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
- def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>;
- def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
- def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
-
- let isConvertibleToThreeAddress = ConvertibleToThreeAddress, hasSideEffects= 0 in {
- def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
-
- // NOTE: These are order specific, we want the ri8 forms to be listed
- // first so that they are slightly preferred to the ri forms.
- def NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, RegMRM>;
- def NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, RegMRM>;
- def NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, RegMRM>;
-
- def NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>;
- def NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>;
- def NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>;
- }
- } // Constraints = "$src1 = $dst"
-
- let mayLoad = 1, mayStore = 1, hasSideEffects = 0 in {
- def NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>;
- def NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>;
- def NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>;
- def NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>;
-
- // NOTE: These are order specific, we want the mi8 forms to be listed
- // first so that they are slightly preferred to the mi forms.
- def NAME#16mi8 : BinOpMI8_RMW<mnemonic, Xi16, MemMRM>;
- def NAME#32mi8 : BinOpMI8_RMW<mnemonic, Xi32, MemMRM>;
- let Predicates = [In64BitMode] in
- def NAME#64mi8 : BinOpMI8_RMW<mnemonic, Xi64, MemMRM>;
-
- def NAME#8mi : BinOpMI_RMW<0x80, mnemonic, Xi8 , opnode, MemMRM>;
- def NAME#16mi : BinOpMI_RMW<0x80, mnemonic, Xi16, opnode, MemMRM>;
- def NAME#32mi : BinOpMI_RMW<0x80, mnemonic, Xi32, opnode, MemMRM>;
- let Predicates = [In64BitMode] in
- def NAME#64mi32 : BinOpMI_RMW<0x80, mnemonic, Xi64, opnode, MemMRM>;
+ let Constraints = "$src1 = $dst" in {
+ let isCommutable = CommutableRR,
+ isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in {
+ def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
+ def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>, OpSize16;
+ def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>, OpSize32;
+ def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
}
- // These are for the disassembler since 0x82 opcode behaves like 0x80, but
- // not in 64-bit mode.
- let Predicates = [Not64BitMode], isCodeGenOnly = 1, ForceDisassemble = 1,
- hasSideEffects = 0 in {
- let Constraints = "$src1 = $dst" in
- def NAME#8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, RegMRM>;
- let mayLoad = 1, mayStore = 1 in
- def NAME#8mi8 : BinOpMI8_RMW<mnemonic, Xi8, MemMRM>;
+ def NAME#8rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>;
+ def NAME#16rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16;
+ def NAME#32rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32;
+ def NAME#64rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>;
+
+ def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
+ def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>, OpSize16;
+ def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>, OpSize32;
+ def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
+
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def NAME#16ri8 : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16;
+ def NAME#32ri8 : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32;
+ def NAME#64ri8 : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>;
+
+ def NAME#16ri : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM>, OpSize16;
+ def NAME#32ri : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM>, OpSize32;
+ def NAME#64ri32: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM>;
}
- } // Defs = [EFLAGS]
-
- def NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
- "{$src, %al|al, $src}">;
- def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
- "{$src, %ax|ax, $src}">;
- def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
- "{$src, %eax|eax, $src}">;
- def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
- "{$src, %rax|rax, $src}">;
+ } // Constraints = "$src1 = $dst"
+
+ def NAME#8mr : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16mr : BinOpMR_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
+ def NAME#32mr : BinOpMR_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
+ def NAME#64mr : BinOpMR_MF<BaseOpc, mnemonic, Xi64, opnode>;
+
+ // NOTE: These are order specific, we want the mi8 forms to be listed
+ // first so that they are slightly preferred to the mi forms.
+ def NAME#16mi8 : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, OpSize16;
+ def NAME#32mi8 : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, OpSize32;
+ let Predicates = [In64BitMode] in
+ def NAME#64mi8 : BinOpMI8_MF<mnemonic, Xi64, MemMRM>;
+
+ def NAME#8mi : BinOpMI_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#16mi : BinOpMI_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16;
+ def NAME#32mi : BinOpMI_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32;
+ let Predicates = [In64BitMode] in
+ def NAME#64mi32 : BinOpMI_MF<0x81, mnemonic, Xi64, opnode, MemMRM>;
+
+ // These are for the disassembler since 0x82 opcode behaves like 0x80, but
+ // not in 64-bit mode.
+ let Predicates = [Not64BitMode] in {
+ let Constraints = "$src1 = $dst" in
+ def NAME#8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly;
+ def NAME#8mi8 : BinOpMI8_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly;
+ }
+
+ def NAME#8i8 : BinOpAI_AF<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|al, $src}">;
+ def NAME#16i16 : BinOpAI_AF<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|ax, $src}">, OpSize16;
+ def NAME#32i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|eax, $src}">, OpSize32;
+ def NAME#64i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|rax, $src}">;
}
/// ArithBinOp_RFF - This is an arithmetic binary operator where the pattern is
@@ -874,80 +719,73 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
string mnemonic, Format RegMRM, Format MemMRM,
SDNode opnode, bit CommutableRR,
bit ConvertibleToThreeAddress> {
- let Uses = [EFLAGS], Defs = [EFLAGS] in {
- let Constraints = "$src1 = $dst" in {
- let isCommutable = CommutableRR in {
- def NAME#8rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>;
- let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
- def NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>;
- def NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>;
- def NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
- } // isConvertibleToThreeAddress
- } // isCommutable
-
- def NAME#8rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi8>;
- def NAME#16rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi16>;
- def NAME#32rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi32>;
- def NAME#64rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi64>;
-
- def NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
- def NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
- def NAME#32rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>;
- def NAME#64rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>;
-
- def NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
-
- let isConvertibleToThreeAddress = ConvertibleToThreeAddress, hasSideEffects = 0 in {
- // NOTE: These are order specific, we want the ri8 forms to be listed
- // first so that they are slightly preferred to the ri forms.
- def NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, RegMRM>;
- def NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, RegMRM>;
- def NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, RegMRM>;
-
- def NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>;
- def NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>;
- def NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>;
- }
- } // Constraints = "$src1 = $dst"
-
- def NAME#8mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>;
- def NAME#16mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>;
- def NAME#32mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>;
- def NAME#64mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>;
-
- // NOTE: These are order specific, we want the mi8 forms to be listed
- // first so that they are slightly preferred to the mi forms.
- let mayLoad = 1, mayStore = 1, hasSideEffects = 0 in {
- def NAME#16mi8 : BinOpMI8_RMW_FF<mnemonic, Xi16, MemMRM>;
- def NAME#32mi8 : BinOpMI8_RMW_FF<mnemonic, Xi32, MemMRM>;
- let Predicates = [In64BitMode] in
- def NAME#64mi8 : BinOpMI8_RMW_FF<mnemonic, Xi64, MemMRM>;
-
- def NAME#8mi : BinOpMI_RMW_FF<0x80, mnemonic, Xi8 , opnode, MemMRM>;
- def NAME#16mi : BinOpMI_RMW_FF<0x80, mnemonic, Xi16, opnode, MemMRM>;
- def NAME#32mi : BinOpMI_RMW_FF<0x80, mnemonic, Xi32, opnode, MemMRM>;
- let Predicates = [In64BitMode] in
- def NAME#64mi32 : BinOpMI_RMW_FF<0x80, mnemonic, Xi64, opnode, MemMRM>;
- }
-
- // These are for the disassembler since 0x82 opcode behaves like 0x80, but
- // not in 64-bit mode.
- let Predicates = [Not64BitMode], isCodeGenOnly = 1, ForceDisassemble = 1,
- hasSideEffects = 0 in {
- let Constraints = "$src1 = $dst" in
- def NAME#8ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi8, RegMRM>;
- let mayLoad = 1, mayStore = 1 in
- def NAME#8mi8 : BinOpMI8_RMW_FF<mnemonic, Xi8, MemMRM>;
+ let Constraints = "$src1 = $dst" in {
+ let isCommutable = CommutableRR in {
+ def NAME#8rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode>;
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def NAME#16rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
+ def NAME#32rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
+ def NAME#64rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode>;
+ } // isConvertibleToThreeAddress
+ } // isCommutable
+
+ def NAME#8rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>;
+ def NAME#16rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16;
+ def NAME#32rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32;
+ def NAME#64rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>;
+
+ def NAME#8rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def NAME#16rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16;
+ def NAME#32rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32;
+ def NAME#64rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>;
+
+ def NAME#8ri : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def NAME#16ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16;
+ def NAME#32ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32;
+ def NAME#64ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>;
+
+ def NAME#16ri : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16;
+ def NAME#32ri : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32;
+ def NAME#64ri32: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>;
}
- } // Uses = [EFLAGS], Defs = [EFLAGS]
-
- def NAME#8i8 : BinOpAI_RFF<BaseOpc4, mnemonic, Xi8 , AL,
- "{$src, %al|al, $src}">;
- def NAME#16i16 : BinOpAI_RFF<BaseOpc4, mnemonic, Xi16, AX,
- "{$src, %ax|ax, $src}">;
- def NAME#32i32 : BinOpAI_RFF<BaseOpc4, mnemonic, Xi32, EAX,
- "{$src, %eax|eax, $src}">;
- def NAME#64i32 : BinOpAI_RFF<BaseOpc4, mnemonic, Xi64, RAX,
+ } // Constraints = "$src1 = $dst"
+
+ def NAME#8mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
+ def NAME#32mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
+ def NAME#64mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, opnode>;
+
+ // NOTE: These are order specific, we want the mi8 forms to be listed
+ // first so that they are slightly preferred to the mi forms.
+ def NAME#16mi8 : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, OpSize16;
+ def NAME#32mi8 : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, OpSize32;
+ let Predicates = [In64BitMode] in
+ def NAME#64mi8 : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>;
+
+ def NAME#8mi : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#16mi : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16;
+ def NAME#32mi : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32;
+ let Predicates = [In64BitMode] in
+ def NAME#64mi32 : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>;
+
+ // These are for the disassembler since 0x82 opcode behaves like 0x80, but
+ // not in 64-bit mode.
+ let Predicates = [Not64BitMode] in {
+ let Constraints = "$src1 = $dst" in
+ def NAME#8ri8 : BinOpRI8F_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly;
+ def NAME#8mi8 : BinOpMI8F_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly;
+ }
+
+ def NAME#8i8 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|al, $src}">;
+ def NAME#16i16 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|ax, $src}">, OpSize16;
+ def NAME#32i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|eax, $src}">, OpSize32;
+ def NAME#64i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi64, RAX,
"{$src, %rax|rax, $src}">;
}
@@ -957,93 +795,88 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
///
multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
string mnemonic, Format RegMRM, Format MemMRM,
- SDNode opnode,
- bit CommutableRR, bit ConvertibleToThreeAddress> {
- let Defs = [EFLAGS] in {
- let isCommutable = CommutableRR in {
- def NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>;
- let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
- def NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>;
- def NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>;
- def NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
- }
- } // isCommutable
-
- def NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>;
- def NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>;
- def NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>;
- def NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>;
-
- def NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
- def NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>;
- def NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>;
- def NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>;
-
- def NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
-
- let isConvertibleToThreeAddress = ConvertibleToThreeAddress, hasSideEffects = 0 in {
- // NOTE: These are order specific, we want the ri8 forms to be listed
- // first so that they are slightly preferred to the ri forms.
- def NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, RegMRM>;
- def NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, RegMRM>;
- def NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, RegMRM>;
-
- def NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>;
- def NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>;
- def NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>;
- }
-
- def NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>;
- def NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>;
- def NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>;
- def NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>;
-
- // NOTE: These are order specific, we want the mi8 forms to be listed
- // first so that they are slightly preferred to the mi forms.
- let mayLoad = 1, hasSideEffects = 0 in {
- def NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, MemMRM>;
- def NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, MemMRM>;
- let Predicates = [In64BitMode] in
- def NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, MemMRM>;
-
- def NAME#8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>;
- def NAME#16mi : BinOpMI_F<0x80, mnemonic, Xi16, opnode, MemMRM>;
- def NAME#32mi : BinOpMI_F<0x80, mnemonic, Xi32, opnode, MemMRM>;
- let Predicates = [In64BitMode] in
- def NAME#64mi32 : BinOpMI_F<0x80, mnemonic, Xi64, opnode, MemMRM>;
- }
-
- // These are for the disassembler since 0x82 opcode behaves like 0x80, but
- // not in 64-bit mode.
- let Predicates = [Not64BitMode], isCodeGenOnly = 1, ForceDisassemble = 1,
- hasSideEffects = 0 in {
- def NAME#8ri8 : BinOpRI8_F<0x82, mnemonic, Xi8, RegMRM>;
- let mayLoad = 1 in
- def NAME#8mi8 : BinOpMI8_F<mnemonic, Xi8, MemMRM>;
- }
- } // Defs = [EFLAGS]
-
- def NAME#8i8 : BinOpAI_F<BaseOpc4, mnemonic, Xi8 , AL,
- "{$src, %al|al, $src}">;
+ SDNode opnode, bit CommutableRR,
+ bit ConvertibleToThreeAddress> {
+ let isCommutable = CommutableRR in {
+ def NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
+ def NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
+ def NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
+ } // isConvertibleToThreeAddress
+ } // isCommutable
+
+ def NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>;
+ def NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16;
+ def NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32;
+ def NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>;
+
+ def NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16;
+ def NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32;
+ def NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>;
+
+ def NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def NAME#16ri8 : BinOpRI8_F<0x83, mnemonic, Xi16, RegMRM>, OpSize16;
+ def NAME#32ri8 : BinOpRI8_F<0x83, mnemonic, Xi32, RegMRM>, OpSize32;
+ def NAME#64ri8 : BinOpRI8_F<0x83, mnemonic, Xi64, RegMRM>;
+
+ def NAME#16ri : BinOpRI_F<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16;
+ def NAME#32ri : BinOpRI_F<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32;
+ def NAME#64ri32: BinOpRI_F<0x81, mnemonic, Xi64, opnode, RegMRM>;
+ }
+
+ def NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
+ def NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
+ def NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>;
+
+ // NOTE: These are order specific, we want the mi8 forms to be listed
+ // first so that they are slightly preferred to the mi forms.
+ def NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, MemMRM>, OpSize16;
+ def NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, MemMRM>, OpSize32;
+ let Predicates = [In64BitMode] in
+ def NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, MemMRM>;
+
+ def NAME#8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#16mi : BinOpMI_F<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16;
+ def NAME#32mi : BinOpMI_F<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32;
+ let Predicates = [In64BitMode] in
+ def NAME#64mi32 : BinOpMI_F<0x81, mnemonic, Xi64, opnode, MemMRM>;
+
+ // These are for the disassembler since 0x82 opcode behaves like 0x80, but
+ // not in 64-bit mode.
+ let Predicates = [Not64BitMode] in {
+ def NAME#8ri8 : BinOpRI8_F<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly;
+ let mayLoad = 1 in
+ def NAME#8mi8 : BinOpMI8_F<mnemonic, Xi8, MemMRM>;
+ }
+
+ def NAME#8i8 : BinOpAI_F<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|al, $src}">;
def NAME#16i16 : BinOpAI_F<BaseOpc4, mnemonic, Xi16, AX,
- "{$src, %ax|ax, $src}">;
+ "{$src, %ax|ax, $src}">, OpSize16;
def NAME#32i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi32, EAX,
- "{$src, %eax|eax, $src}">;
+ "{$src, %eax|eax, $src}">, OpSize32;
def NAME#64i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi64, RAX,
- "{$src, %rax|rax, $src}">;
+ "{$src, %rax|rax, $src}">;
}
-defm AND : ArithBinOp_RF<0x20, 0x22, 0x24, "and", MRM4r, MRM4m,
+defm AND : ArithBinOp_RF<0x21, 0x23, 0x25, "and", MRM4r, MRM4m,
X86and_flag, and, 1, 0, 0>;
-defm OR : ArithBinOp_RF<0x08, 0x0A, 0x0C, "or", MRM1r, MRM1m,
+defm OR : ArithBinOp_RF<0x09, 0x0B, 0x0D, "or", MRM1r, MRM1m,
X86or_flag, or, 1, 0, 0>;
-defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m,
+defm XOR : ArithBinOp_RF<0x31, 0x33, 0x35, "xor", MRM6r, MRM6m,
X86xor_flag, xor, 1, 0, 0>;
-defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
+defm ADD : ArithBinOp_RF<0x01, 0x03, 0x05, "add", MRM0r, MRM0m,
X86add_flag, add, 1, 1, 1>;
let isCompare = 1 in {
-defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
+defm SUB : ArithBinOp_RF<0x29, 0x2B, 0x2D, "sub", MRM5r, MRM5m,
X86sub_flag, sub, 0, 1, 0>;
}
@@ -1057,13 +890,13 @@ def XOR8rr_NOREX : I<0x30, MRMDestReg, (outs GR8_NOREX:$dst),
Sched<[WriteALU]>;
// Arithmetic.
-defm ADC : ArithBinOp_RFF<0x10, 0x12, 0x14, "adc", MRM2r, MRM2m, X86adc_flag,
+defm ADC : ArithBinOp_RFF<0x11, 0x13, 0x15, "adc", MRM2r, MRM2m, X86adc_flag,
1, 0>;
-defm SBB : ArithBinOp_RFF<0x18, 0x1A, 0x1C, "sbb", MRM3r, MRM3m, X86sbb_flag,
+defm SBB : ArithBinOp_RFF<0x19, 0x1B, 0x1D, "sbb", MRM3r, MRM3m, X86sbb_flag,
0, 0>;
let isCompare = 1 in {
-defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>;
+defm CMP : ArithBinOp_F<0x39, 0x3B, 0x3D, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>;
}
// Patterns to recognize loads on the LHS of an ADC. We can't make X86adc_flag
@@ -1201,44 +1034,37 @@ def : Pat<(store (X86adc_flag i64relocImmSExt32_su:$src, (load addr:$dst), EFLAG
// they don't have all the usual imm8 and REV forms, and are encoded into a
// different space.
let isCompare = 1 in {
- let Defs = [EFLAGS] in {
- let isCommutable = 1 in {
- // Avoid selecting these and instead use a test+and. Post processing will
- // combine them. This gives bunch of other patterns that start with
- // and a chance to match.
- def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , null_frag>;
- def TEST16rr : BinOpRR_F<0x84, "test", Xi16, null_frag>;
- def TEST32rr : BinOpRR_F<0x84, "test", Xi32, null_frag>;
- def TEST64rr : BinOpRR_F<0x84, "test", Xi64, null_frag>;
- } // isCommutable
-
- let hasSideEffects = 0, mayLoad = 1 in {
- def TEST8mr : BinOpMR_F<0x84, "test", Xi8 , null_frag>;
- def TEST16mr : BinOpMR_F<0x84, "test", Xi16, null_frag>;
- def TEST32mr : BinOpMR_F<0x84, "test", Xi32, null_frag>;
- def TEST64mr : BinOpMR_F<0x84, "test", Xi64, null_frag>;
- }
-
- def TEST8ri : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>;
- def TEST16ri : BinOpRI_F<0xF6, "test", Xi16, X86testpat, MRM0r>;
- def TEST32ri : BinOpRI_F<0xF6, "test", Xi32, X86testpat, MRM0r>;
- def TEST64ri32 : BinOpRI_F<0xF6, "test", Xi64, X86testpat, MRM0r>;
-
- def TEST8mi : BinOpMI_F<0xF6, "test", Xi8 , X86testpat, MRM0m>;
- def TEST16mi : BinOpMI_F<0xF6, "test", Xi16, X86testpat, MRM0m>;
- def TEST32mi : BinOpMI_F<0xF6, "test", Xi32, X86testpat, MRM0m>;
- let Predicates = [In64BitMode] in
- def TEST64mi32 : BinOpMI_F<0xF6, "test", Xi64, X86testpat, MRM0m>;
- } // Defs = [EFLAGS]
-
- def TEST8i8 : BinOpAI_F<0xA8, "test", Xi8 , AL,
- "{$src, %al|al, $src}">;
- def TEST16i16 : BinOpAI_F<0xA8, "test", Xi16, AX,
- "{$src, %ax|ax, $src}">;
- def TEST32i32 : BinOpAI_F<0xA8, "test", Xi32, EAX,
- "{$src, %eax|eax, $src}">;
- def TEST64i32 : BinOpAI_F<0xA8, "test", Xi64, RAX,
- "{$src, %rax|rax, $src}">;
+ let isCommutable = 1 in {
+ // Avoid selecting these and instead use a test+and. Post processing will
+ // combine them. This gives bunch of other patterns that start with
+ // and a chance to match.
+ def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , null_frag>;
+ def TEST16rr : BinOpRR_F<0x85, "test", Xi16, null_frag>, OpSize16;
+ def TEST32rr : BinOpRR_F<0x85, "test", Xi32, null_frag>, OpSize32;
+ def TEST64rr : BinOpRR_F<0x85, "test", Xi64, null_frag>;
+ } // isCommutable
+
+def TEST8mr : BinOpMR_F<0x84, "test", Xi8 , null_frag>;
+def TEST16mr : BinOpMR_F<0x85, "test", Xi16, null_frag>, OpSize16;
+def TEST32mr : BinOpMR_F<0x85, "test", Xi32, null_frag>, OpSize32;
+def TEST64mr : BinOpMR_F<0x85, "test", Xi64, null_frag>;
+
+def TEST8ri : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>;
+def TEST16ri : BinOpRI_F<0xF7, "test", Xi16, X86testpat, MRM0r>, OpSize16;
+def TEST32ri : BinOpRI_F<0xF7, "test", Xi32, X86testpat, MRM0r>, OpSize32;
+def TEST64ri32 : BinOpRI_F<0xF7, "test", Xi64, X86testpat, MRM0r>;
+
+def TEST8mi : BinOpMI_F<0xF6, "test", Xi8 , X86testpat, MRM0m>;
+def TEST16mi : BinOpMI_F<0xF7, "test", Xi16, X86testpat, MRM0m>, OpSize16;
+def TEST32mi : BinOpMI_F<0xF7, "test", Xi32, X86testpat, MRM0m>, OpSize32;
+
+ let Predicates = [In64BitMode] in
+ def TEST64mi32 : BinOpMI_F<0xF7, "test", Xi64, X86testpat, MRM0m>;
+
+def TEST8i8 : BinOpAI_F<0xA8, "test", Xi8 , AL, "{$src, %al|al, $src}">;
+def TEST16i16 : BinOpAI_F<0xA9, "test", Xi16, AX, "{$src, %ax|ax, $src}">, OpSize16;
+def TEST32i32 : BinOpAI_F<0xA9, "test", Xi32, EAX, "{$src, %eax|eax, $src}">, OpSize32;
+def TEST64i32 : BinOpAI_F<0xA9, "test", Xi64, RAX, "{$src, %rax|rax, $src}">;
} // isCompare
// Patterns to match a relocImm into the immediate field.
@@ -1269,30 +1095,30 @@ let Predicates = [HasBMI, NoEGPR] in {
def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>,
- VEX_4V, Sched<[sched]>;
+ VEX, VVVV, Sched<[sched]>;
def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS,
(X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
- VEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ VEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasBMI, HasEGPR, In64BitMode] in {
def rr_EVEX : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>,
- EVEX_4V, Sched<[sched]>;
+ EVEX, VVVV, Sched<[sched]>;
def rm_EVEX : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS,
(X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
// Complexity is reduced to give and with immediate a chance to match first.
let Defs = [EFLAGS], AddedComplexity = -6 in {
- defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8PS;
- defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8PS, REX_W;
+ defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8;
+ defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8, REX_W;
}
let Predicates = [HasBMI], AddedComplexity = -6 in {
@@ -1315,12 +1141,12 @@ let hasSideEffects = 0 in {
let Predicates = [HasBMI2, NoEGPR] in {
def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
- []>, T8XD, VEX_4V, Sched<[WriteIMulH, sched]>;
+ []>, T8, XD, VEX, VVVV, Sched<[WriteIMulH, sched]>;
let mayLoad = 1 in
def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
- []>, T8XD, VEX_4V,
+ []>, T8, XD, VEX, VVVV,
Sched<[WriteIMulHLd, sched.Folded,
// Memory operand.
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
@@ -1339,11 +1165,11 @@ let Predicates = [HasBMI2, NoEGPR] in {
let Predicates = [HasBMI2, HasEGPR, In64BitMode] in
def rr#_EVEX : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
- []>, T8XD, EVEX_4V, Sched<[WriteIMulH, sched]>;
+ []>, T8, XD, EVEX, VVVV, Sched<[WriteIMulH, sched]>;
let Predicates = [HasBMI2, HasEGPR, In64BitMode], mayLoad = 1 in
def rm#_EVEX : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
- []>, T8XD, EVEX_4V,
+ []>, T8, XD, EVEX, VVVV,
Sched<[WriteIMulHLd, sched.Folded,
// Memory operand.
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
@@ -1362,38 +1188,25 @@ let Uses = [RDX] in
//
// We don't have patterns for these as there is no advantage over ADC for
// most code.
-class ADCOXOpRR <bits<8> opcode, string mnemonic, X86TypeInfo info>
- : BinOpRR_C<opcode, MRMSrcReg, mnemonic, info, []>{
- let Opcode = opcode;
- let OpSize = OpSizeFixed;
+class ADCOXOpRR <string m, X86TypeInfo t>
+ : BinOpRRF_RF<0xF6, m, t, null_frag> {
+ let Form = MRMSrcReg;
+ let isCommutable = 1;
}
-class ADCOXOpRM <bits<8> opcode, string mnemonic, X86TypeInfo info>
- : BinOpRM_C<opcode, MRMSrcMem, mnemonic, info, []>{
- let Opcode = opcode;
- let OpSize = OpSizeFixed;
+class ADCOXOpRM <string m, X86TypeInfo t>
+ : BinOpRMF_RF<0xF6, m, t, null_frag> {
+ let Form = MRMSrcMem;
}
-let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
- Constraints = "$src1 = $dst", hasSideEffects = 0 in {
- let SchedRW = [WriteADC], isCommutable = 1 in {
- def ADCX32rr : ADCOXOpRR<0xF6, "adcx", Xi32>, T8PD;
- def ADCX64rr : ADCOXOpRR<0xF6, "adcx", Xi64>, T8PD;
-
- def ADOX32rr : ADCOXOpRR<0xF6, "adox", Xi32>, T8XS;
- def ADOX64rr : ADCOXOpRR<0xF6, "adox", Xi64>, T8XS;
- } // SchedRW
-
- let mayLoad = 1,
- SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold,
- // Memory operand.
- ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
- // Implicit read of EFLAGS
- WriteADC.ReadAfterFold] in {
- def ADCX32rm : ADCOXOpRM<0xF6, "adcx", Xi32>, T8PD;
- def ADCX64rm : ADCOXOpRM<0xF6, "adcx", Xi64>, T8PD;
-
- def ADOX32rm : ADCOXOpRM<0xF6, "adox", Xi32>, T8XS;
- def ADOX64rm : ADCOXOpRM<0xF6, "adox", Xi64>, T8XS;
- } // mayLoad, SchedRW
+let OpSize = OpSizeFixed, Constraints = "$src1 = $dst",
+ Predicates = [HasADX] in {
+def ADCX32rr : ADCOXOpRR<"adcx", Xi32>, T8, PD;
+def ADCX64rr : ADCOXOpRR<"adcx", Xi64>, T8, PD;
+def ADOX32rr : ADCOXOpRR<"adox", Xi32>, T8, XS;
+def ADOX64rr : ADCOXOpRR<"adox", Xi64>, T8, XS;
+def ADCX32rm : ADCOXOpRM<"adcx", Xi32>, T8, PD;
+def ADCX64rm : ADCOXOpRM<"adcx", Xi64>, T8, PD;
+def ADOX32rm : ADCOXOpRM<"adox", Xi32>, T8, XS;
+def ADOX64rm : ADCOXOpRM<"adox", Xi64>, T8, XS;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAsmAlias.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAsmAlias.td
index 9d0735c9cbba..2590be8651d5 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAsmAlias.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAsmAlias.td
@@ -55,6 +55,11 @@ multiclass CMPCCXADD_Aliases<string Cond, int CC> {
(CMPCCXADDmr32 GR32:$dst, i32mem:$dstsrc2, GR32:$src3, CC), 0>;
def : InstAlias<"cmp"#Cond#"xadd"#"\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}",
(CMPCCXADDmr64 GR64:$dst, i64mem:$dstsrc2, GR64:$src3, CC), 0>;
+
+ def : InstAlias<"cmp"#Cond#"xadd"#"\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}",
+ (CMPCCXADDmr32_EVEX GR32:$dst, i32mem:$dstsrc2, GR32:$src3, CC), 0>;
+ def : InstAlias<"cmp"#Cond#"xadd"#"\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}",
+ (CMPCCXADDmr64_EVEX GR64:$dst, i64mem:$dstsrc2, GR64:$src3, CC), 0>;
}
//===----------------------------------------------------------------------===//
@@ -686,3 +691,11 @@ def : InstAlias<"vmsave\t{%rax|rax}", (VMSAVE64), 0>, Requires<[In64BitMode]>;
def : InstAlias<"invlpga\t{%eax, %ecx|eax, ecx}", (INVLPGA32), 0>, Requires<[Not64BitMode]>;
def : InstAlias<"invlpga\t{%rax, %ecx|rax, ecx}", (INVLPGA64), 0>, Requires<[In64BitMode]>;
+// Aliases with explicit %xmm0
+def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
+ (SHA256RNDS2rr VR128:$dst, VR128:$src2), 0>;
+def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
+ (SHA256RNDS2rm VR128:$dst, i128mem:$src2), 0>;
+
+def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
+ (SHA256RNDS2rm_EVEX VR128:$dst, i128mem:$src2), 0>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td
index 457833f8cc33..c77c77ee4a3e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1515,6 +1515,23 @@ def : Pat<(X86add_flag_nocf GR32:$src1, 128),
def : Pat<(X86add_flag_nocf GR64:$src1, 128),
(SUB64ri32 GR64:$src1, -128)>;
+// Depositing value to 8/16 bit subreg:
+def : Pat<(or (and GR64:$dst, -256),
+ (i64 (zextloadi8 addr:$src))),
+ (INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
+
+def : Pat<(or (and GR32:$dst, -256),
+ (i32 (zextloadi8 addr:$src))),
+ (INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
+
+def : Pat<(or (and GR64:$dst, -65536),
+ (i64 (zextloadi16 addr:$src))),
+ (INSERT_SUBREG (i64 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
+
+def : Pat<(or (and GR32:$dst, -65536),
+ (i32 (zextloadi16 addr:$src))),
+ (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
+
// The same trick applies for 32-bit immediate fields in 64-bit
// instructions.
def : Pat<(add GR64:$src1, 0x0000000080000000),
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td
index 09655d939121..6a9a74ce15f2 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -666,20 +666,20 @@ def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", []>;
let Uses = [FPSW, FPCW] in {
def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaquemem:$dst),
- "fxsave\t$dst", [(int_x86_fxsave addr:$dst)]>, PS,
+ "fxsave\t$dst", [(int_x86_fxsave addr:$dst)]>, TB,
Requires<[HasFXSR]>;
def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaquemem:$dst),
"fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)]>,
- PS, Requires<[HasFXSR, In64BitMode]>;
+ TB, Requires<[HasFXSR, In64BitMode]>;
} // Uses = [FPSW, FPCW]
let Defs = [FPSW, FPCW] in {
def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaquemem:$src),
"fxrstor\t$src", [(int_x86_fxrstor addr:$src)]>,
- PS, Requires<[HasFXSR]>;
+ TB, Requires<[HasFXSR]>;
def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaquemem:$src),
"fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)]>,
- PS, Requires<[HasFXSR, In64BitMode]>;
+ TB, Requires<[HasFXSR, In64BitMode]>;
} // Defs = [FPSW, FPCW]
} // SchedRW
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td
index df05a5788a50..07e5576960d6 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td
@@ -180,7 +180,7 @@ class OperandSize<bits<2> val> {
bits<2> Value = val;
}
def OpSizeFixed : OperandSize<0>; // Never needs a 0x66 prefix.
-def OpSize16 : OperandSize<1>; // Needs 0x66 prefix in 32-bit mode.
+def OpSize16 : OperandSize<1>; // Needs 0x66 prefix in 32/64-bit mode.
def OpSize32 : OperandSize<2>; // Needs 0x66 prefix in 16-bit mode.
// Address size for encodings that change based on mode.
@@ -234,7 +234,9 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
// based on address size of the mode?
bits<2> AdSizeBits = AdSize.Value;
- Prefix OpPrefix = NoPrfx; // Which prefix byte does this inst have?
+ Encoding OpEnc = EncNormal; // Encoding used by this instruction
+ // Which prefix byte does this inst have?
+ Prefix OpPrefix = !if(!eq(OpEnc, EncNormal), NoPrfx, PS);
bits<3> OpPrefixBits = OpPrefix.Value;
Map OpMap = OB; // Which opcode map does this inst have?
bits<4> OpMapBits = OpMap.Value;
@@ -243,7 +245,6 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
Domain ExeDomain = d;
bit hasREPPrefix = 0; // Does this inst have a REP prefix?
- Encoding OpEnc = EncNormal; // Encoding used by this instruction
bits<2> OpEncBits = OpEnc.Value;
bit IgnoresW = 0; // Does this inst ignore REX_W field?
bit EVEX_W1_VEX_W0 = 0; // This EVEX inst with VEX.W==1 can become a VEX
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
index bc2d5ed1e17d..bddda6891356 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -8263,8 +8263,8 @@ bool X86InstrInfo::unfoldMemoryOperand(
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), Reg);
- for (unsigned i = 0, e = AddrOps.size(); i != e; ++i)
- MIB.add(AddrOps[i]);
+ for (const MachineOperand &AddrOp : AddrOps)
+ MIB.add(AddrOp);
MIB.setMemRefs(MMOs);
NewMIs.push_back(MIB);
@@ -8341,8 +8341,8 @@ bool X86InstrInfo::unfoldMemoryOperand(
unsigned Opc = getStoreRegOpcode(Reg, DstRC, isAligned, Subtarget);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
- for (unsigned i = 0, e = AddrOps.size(); i != e; ++i)
- MIB.add(AddrOps[i]);
+ for (const MachineOperand &AddrOp : AddrOps)
+ MIB.add(AddrOp);
MIB.addReg(Reg, RegState::Kill);
MIB.setMemRefs(MMOs);
NewMIs.push_back(MIB);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrKL.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrKL.td
index a3392b691c0a..4586fc541627 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrKL.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrKL.td
@@ -19,17 +19,17 @@ let SchedRW = [WriteSystem], Predicates = [HasKL] in {
let Uses = [XMM0, EAX], Defs = [EFLAGS] in {
def LOADIWKEY : I<0xDC, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"loadiwkey\t{$src2, $src1|$src1, $src2}",
- [(int_x86_loadiwkey XMM0, VR128:$src1, VR128:$src2, EAX)]>, T8XS;
+ [(int_x86_loadiwkey XMM0, VR128:$src1, VR128:$src2, EAX)]>, T8, XS;
}
let Uses = [XMM0], Defs = [XMM0, XMM1, XMM2, XMM4, XMM5, XMM6, EFLAGS] in {
def ENCODEKEY128 : I<0xFA, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "encodekey128\t{$src, $dst|$dst, $src}", []>, T8XS;
+ "encodekey128\t{$src, $dst|$dst, $src}", []>, T8, XS;
}
let Uses = [XMM0, XMM1], Defs = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, EFLAGS] in {
def ENCODEKEY256 : I<0xFB, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "encodekey256\t{$src, $dst|$dst, $src}", []>, T8XS;
+ "encodekey256\t{$src, $dst|$dst, $src}", []>, T8, XS;
}
let Constraints = "$src1 = $dst",
@@ -37,22 +37,22 @@ let SchedRW = [WriteSystem], Predicates = [HasKL] in {
def AESENC128KL : I<0xDC, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2),
"aesenc128kl\t{$src2, $src1|$src1, $src2}",
[(set VR128:$dst, EFLAGS,
- (X86aesenc128kl VR128:$src1, addr:$src2))]>, T8XS;
+ (X86aesenc128kl VR128:$src1, addr:$src2))]>, T8, XS;
def AESDEC128KL : I<0xDD, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2),
"aesdec128kl\t{$src2, $src1|$src1, $src2}",
[(set VR128:$dst, EFLAGS,
- (X86aesdec128kl VR128:$src1, addr:$src2))]>, T8XS;
+ (X86aesdec128kl VR128:$src1, addr:$src2))]>, T8, XS;
def AESENC256KL : I<0xDE, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2),
"aesenc256kl\t{$src2, $src1|$src1, $src2}",
[(set VR128:$dst, EFLAGS,
- (X86aesenc256kl VR128:$src1, addr:$src2))]>, T8XS;
+ (X86aesenc256kl VR128:$src1, addr:$src2))]>, T8, XS;
def AESDEC256KL : I<0xDF, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2),
"aesdec256kl\t{$src2, $src1|$src1, $src2}",
[(set VR128:$dst, EFLAGS,
- (X86aesdec256kl VR128:$src1, addr:$src2))]>, T8XS;
+ (X86aesdec256kl VR128:$src1, addr:$src2))]>, T8, XS;
}
} // SchedRW, Predicates
@@ -62,13 +62,13 @@ let SchedRW = [WriteSystem], Predicates = [HasWIDEKL] in {
Defs = [EFLAGS, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7],
mayLoad = 1 in {
def AESENCWIDE128KL : I<0xD8, MRM0m, (outs), (ins opaquemem:$src),
- "aesencwide128kl\t$src", []>, T8XS;
+ "aesencwide128kl\t$src", []>, T8, XS;
def AESDECWIDE128KL : I<0xD8, MRM1m, (outs), (ins opaquemem:$src),
- "aesdecwide128kl\t$src", []>, T8XS;
+ "aesdecwide128kl\t$src", []>, T8, XS;
def AESENCWIDE256KL : I<0xD8, MRM2m, (outs), (ins opaquemem:$src),
- "aesencwide256kl\t$src", []>, T8XS;
+ "aesencwide256kl\t$src", []>, T8, XS;
def AESDECWIDE256KL : I<0xD8, MRM3m, (outs), (ins opaquemem:$src),
- "aesdecwide256kl\t$src", []>, T8XS;
+ "aesdecwide256kl\t$src", []>, T8, XS;
}
} // SchedRW, Predicates
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td
index acf7605b3f53..8d6bc8d0ee2c 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td
@@ -487,24 +487,24 @@ def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
// -- Conversion Instructions
defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
- WriteCvtPS2I, SSEPackedSingle>, PS, SIMD_EXC;
+ WriteCvtPS2I, SSEPackedSingle>, TB, SIMD_EXC;
defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
- WriteCvtPD2I, SSEPackedDouble>, PD, SIMD_EXC;
+ WriteCvtPD2I, SSEPackedDouble>, TB, PD, SIMD_EXC;
defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
- WriteCvtPS2I, SSEPackedSingle>, PS, SIMD_EXC;
+ WriteCvtPS2I, SSEPackedSingle>, TB, SIMD_EXC;
defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
- WriteCvtPD2I, SSEPackedDouble>, PD, SIMD_EXC;
+ WriteCvtPD2I, SSEPackedDouble>, TB, PD, SIMD_EXC;
defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
- WriteCvtI2PD, SSEPackedDouble>, PD;
+ WriteCvtI2PD, SSEPackedDouble>, TB, PD;
let Constraints = "$src1 = $dst" in {
defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
int_x86_sse_cvtpi2ps,
i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
- SSEPackedSingle>, PS, SIMD_EXC;
+ SSEPackedSingle>, TB, SIMD_EXC;
}
// Extract / Insert
@@ -548,13 +548,13 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
// Misc.
let SchedRW = [SchedWriteShuffle.MMX] in {
let Uses = [EDI], Predicates = [HasMMX, HasSSE1,Not64BitMode] in
-def MMX_MASKMOVQ : MMXI32<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
- "maskmovq\t{$mask, $src|$src, $mask}",
- [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>;
+def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
+ "maskmovq\t{$mask, $src|$src, $mask}",
+ [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>;
let Uses = [RDI], Predicates = [HasMMX, HasSSE1,In64BitMode] in
-def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
- "maskmovq\t{$mask, $src|$src, $mask}",
- [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
+def MMX_MASKMOVQ64: MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
+ "maskmovq\t{$mask, $src|$src, $mask}",
+ [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
}
// 64-bit bit convert.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td
index 2ea10e317e12..305bd74f7bd7 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td
@@ -165,10 +165,10 @@ def POPP64r : I<0x58, AddRegFrm, (outs GR64:$reg), (ins), "popp\t$reg", []>,
REX_W, ExplicitREX2Prefix, Requires<[In64BitMode]>;
def POP2: I<0x8F, MRM0r, (outs GR64:$reg1, GR64:$reg2), (ins),
"pop2\t{$reg2, $reg1|$reg1, $reg2}",
- []>, EVEX_4V, EVEX_B, T_MAP4PS;
+ []>, EVEX, VVVV, EVEX_B, T_MAP4;
def POP2P: I<0x8F, MRM0r, (outs GR64:$reg1, GR64:$reg2), (ins),
"pop2p\t{$reg2, $reg1|$reg1, $reg2}",
- []>, EVEX_4V, EVEX_B, T_MAP4PS, REX_W;
+ []>, EVEX, VVVV, EVEX_B, T_MAP4, REX_W;
} // mayLoad, SchedRW
let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in
@@ -186,10 +186,10 @@ def PUSHP64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "pushp\t$reg", []>,
REX_W, ExplicitREX2Prefix, Requires<[In64BitMode]>;
def PUSH2: I<0xFF, MRM6r, (outs), (ins GR64:$reg1, GR64:$reg2),
"push2\t{$reg2, $reg1|$reg1, $reg2}",
- []>, EVEX_4V, EVEX_B, T_MAP4PS;
+ []>, EVEX, VVVV, EVEX_B, T_MAP4;
def PUSH2P: I<0xFF, MRM6r, (outs), (ins GR64:$reg1, GR64:$reg2),
"push2p\t{$reg2, $reg1|$reg1, $reg2}",
- []>, EVEX_4V, EVEX_B, T_MAP4PS, REX_W;
+ []>, EVEX, VVVV, EVEX_B, T_MAP4, REX_W;
} // mayStore, SchedRW
let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in {
def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>,
@@ -251,52 +251,52 @@ let Defs = [EFLAGS] in {
def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"bsf{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>,
- PS, OpSize16, Sched<[WriteBSF]>;
+ TB, OpSize16, Sched<[WriteBSF]>;
def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsf{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>,
- PS, OpSize16, Sched<[WriteBSFLd]>;
+ TB, OpSize16, Sched<[WriteBSFLd]>;
def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"bsf{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))]>,
- PS, OpSize32, Sched<[WriteBSF]>;
+ TB, OpSize32, Sched<[WriteBSF]>;
def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsf{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>,
- PS, OpSize32, Sched<[WriteBSFLd]>;
+ TB, OpSize32, Sched<[WriteBSFLd]>;
def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"bsf{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>,
- PS, Sched<[WriteBSF]>;
+ TB, Sched<[WriteBSF]>;
def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"bsf{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>,
- PS, Sched<[WriteBSFLd]>;
+ TB, Sched<[WriteBSFLd]>;
def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"bsr{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>,
- PS, OpSize16, Sched<[WriteBSR]>;
+ TB, OpSize16, Sched<[WriteBSR]>;
def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsr{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>,
- PS, OpSize16, Sched<[WriteBSRLd]>;
+ TB, OpSize16, Sched<[WriteBSRLd]>;
def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"bsr{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))]>,
- PS, OpSize32, Sched<[WriteBSR]>;
+ TB, OpSize32, Sched<[WriteBSR]>;
def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsr{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>,
- PS, OpSize32, Sched<[WriteBSRLd]>;
+ TB, OpSize32, Sched<[WriteBSRLd]>;
def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"bsr{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>,
- PS, Sched<[WriteBSR]>;
+ TB, Sched<[WriteBSR]>;
def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"bsr{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>,
- PS, Sched<[WriteBSRLd]>;
+ TB, Sched<[WriteBSRLd]>;
} // Defs = [EFLAGS]
let SchedRW = [WriteMicrocoded] in {
@@ -1095,29 +1095,29 @@ let Predicates = [HasMOVBE] in {
def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"movbe{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, (bswap (loadi16 addr:$src)))]>,
- OpSize16, T8PS;
+ OpSize16, T8;
def MOVBE32rm : I<0xF0, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"movbe{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bswap (loadi32 addr:$src)))]>,
- OpSize32, T8PS;
+ OpSize32, T8;
def MOVBE64rm : RI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"movbe{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bswap (loadi64 addr:$src)))]>,
- T8PS;
+ T8;
}
let SchedRW = [WriteStore] in {
def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"movbe{w}\t{$src, $dst|$dst, $src}",
[(store (bswap GR16:$src), addr:$dst)]>,
- OpSize16, T8PS;
+ OpSize16, T8;
def MOVBE32mr : I<0xF1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movbe{l}\t{$src, $dst|$dst, $src}",
[(store (bswap GR32:$src), addr:$dst)]>,
- OpSize32, T8PS;
+ OpSize32, T8;
def MOVBE64mr : RI<0xF1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"movbe{q}\t{$src, $dst|$dst, $src}",
[(store (bswap GR64:$src), addr:$dst)]>,
- T8PS;
+ T8;
}
}
@@ -1127,13 +1127,13 @@ let Predicates = [HasMOVBE] in {
let Predicates = [HasRDRAND], Defs = [EFLAGS], SchedRW = [WriteSystem] in {
def RDRAND16r : I<0xC7, MRM6r, (outs GR16:$dst), (ins),
"rdrand{w}\t$dst", [(set GR16:$dst, EFLAGS, (X86rdrand))]>,
- OpSize16, PS;
+ OpSize16, TB;
def RDRAND32r : I<0xC7, MRM6r, (outs GR32:$dst), (ins),
"rdrand{l}\t$dst", [(set GR32:$dst, EFLAGS, (X86rdrand))]>,
- OpSize32, PS;
+ OpSize32, TB;
def RDRAND64r : RI<0xC7, MRM6r, (outs GR64:$dst), (ins),
"rdrand{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86rdrand))]>,
- PS;
+ TB;
}
//===----------------------------------------------------------------------===//
@@ -1141,11 +1141,11 @@ let Predicates = [HasRDRAND], Defs = [EFLAGS], SchedRW = [WriteSystem] in {
//
let Predicates = [HasRDSEED], Defs = [EFLAGS], SchedRW = [WriteSystem] in {
def RDSEED16r : I<0xC7, MRM7r, (outs GR16:$dst), (ins), "rdseed{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86rdseed))]>, OpSize16, PS;
+ [(set GR16:$dst, EFLAGS, (X86rdseed))]>, OpSize16, TB;
def RDSEED32r : I<0xC7, MRM7r, (outs GR32:$dst), (ins), "rdseed{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86rdseed))]>, OpSize32, PS;
+ [(set GR32:$dst, EFLAGS, (X86rdseed))]>, OpSize32, TB;
def RDSEED64r : RI<0xC7, MRM7r, (outs GR64:$dst), (ins), "rdseed{q}\t$dst",
- [(set GR64:$dst, EFLAGS, (X86rdseed))]>, PS;
+ [(set GR64:$dst, EFLAGS, (X86rdseed))]>, TB;
}
//===----------------------------------------------------------------------===//
@@ -1155,29 +1155,29 @@ let Predicates = [HasLZCNT], Defs = [EFLAGS] in {
def LZCNT16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"lzcnt{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, (ctlz GR16:$src)), (implicit EFLAGS)]>,
- XS, OpSize16, Sched<[WriteLZCNT]>;
+ TB, XS, OpSize16, Sched<[WriteLZCNT]>;
def LZCNT16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"lzcnt{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, (ctlz (loadi16 addr:$src))),
- (implicit EFLAGS)]>, XS, OpSize16, Sched<[WriteLZCNTLd]>;
+ (implicit EFLAGS)]>, TB, XS, OpSize16, Sched<[WriteLZCNTLd]>;
def LZCNT32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"lzcnt{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (ctlz GR32:$src)), (implicit EFLAGS)]>,
- XS, OpSize32, Sched<[WriteLZCNT]>;
+ TB, XS, OpSize32, Sched<[WriteLZCNT]>;
def LZCNT32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"lzcnt{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (ctlz (loadi32 addr:$src))),
- (implicit EFLAGS)]>, XS, OpSize32, Sched<[WriteLZCNTLd]>;
+ (implicit EFLAGS)]>, TB, XS, OpSize32, Sched<[WriteLZCNTLd]>;
def LZCNT64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"lzcnt{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (ctlz GR64:$src)), (implicit EFLAGS)]>,
- XS, Sched<[WriteLZCNT]>;
+ TB, XS, Sched<[WriteLZCNT]>;
def LZCNT64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"lzcnt{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (ctlz (loadi64 addr:$src))),
- (implicit EFLAGS)]>, XS, Sched<[WriteLZCNTLd]>;
+ (implicit EFLAGS)]>, TB, XS, Sched<[WriteLZCNTLd]>;
}
//===----------------------------------------------------------------------===//
@@ -1187,29 +1187,29 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {
def TZCNT16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"tzcnt{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, (cttz GR16:$src)), (implicit EFLAGS)]>,
- XS, OpSize16, Sched<[WriteTZCNT]>;
+ TB, XS, OpSize16, Sched<[WriteTZCNT]>;
def TZCNT16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"tzcnt{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, (cttz (loadi16 addr:$src))),
- (implicit EFLAGS)]>, XS, OpSize16, Sched<[WriteTZCNTLd]>;
+ (implicit EFLAGS)]>, TB, XS, OpSize16, Sched<[WriteTZCNTLd]>;
def TZCNT32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"tzcnt{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (cttz GR32:$src)), (implicit EFLAGS)]>,
- XS, OpSize32, Sched<[WriteTZCNT]>;
+ TB, XS, OpSize32, Sched<[WriteTZCNT]>;
def TZCNT32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"tzcnt{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (cttz (loadi32 addr:$src))),
- (implicit EFLAGS)]>, XS, OpSize32, Sched<[WriteTZCNTLd]>;
+ (implicit EFLAGS)]>, TB, XS, OpSize32, Sched<[WriteTZCNTLd]>;
def TZCNT64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"tzcnt{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (cttz GR64:$src)), (implicit EFLAGS)]>,
- XS, Sched<[WriteTZCNT]>;
+ TB, XS, Sched<[WriteTZCNT]>;
def TZCNT64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"tzcnt{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (cttz (loadi64 addr:$src))),
- (implicit EFLAGS)]>, XS, Sched<[WriteTZCNTLd]>;
+ (implicit EFLAGS)]>, TB, XS, Sched<[WriteTZCNTLd]>;
}
multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,
@@ -1218,11 +1218,11 @@ multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,
let hasSideEffects = 0 in {
def rr#Suffix : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
- T8PS, VEX_4V, Sched<[sched]>;
+ T8, VEX, VVVV, Sched<[sched]>;
let mayLoad = 1 in
def rm#Suffix : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
- T8PS, VEX_4V, Sched<[sched.Folded]>;
+ T8, VEX, VVVV, Sched<[sched.Folded]>;
}
}
@@ -1288,12 +1288,12 @@ multiclass bmi4VOp3_base<bits<8> opc, string mnemonic, RegisterClass RC,
def rr#Suffix : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (OpNode RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
- T8PS, VEX, Sched<[Sched]>;
+ T8, VEX, Sched<[Sched]>;
let mayLoad = 1 in
def rm#Suffix : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (OpNode (ld_frag addr:$src1), RC:$src2)),
- (implicit EFLAGS)]>, T8PS, VEX,
+ (implicit EFLAGS)]>, T8, VEX,
Sched<[Sched.Folded,
// x86memop:$src1
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
@@ -1371,33 +1371,33 @@ multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
def rr#Suffix : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>,
- VEX_4V, Sched<[WriteALU]>;
+ VEX, VVVV, Sched<[WriteALU]>;
def rm#Suffix : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (OpNode RC:$src1, (ld_frag addr:$src2)))]>,
- VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
+ VEX, VVVV, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
}
let Predicates = [HasBMI2, NoEGPR] in {
defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem,
- X86pdep, loadi32>, T8XD;
+ X86pdep, loadi32>, T8, XD;
defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem,
- X86pdep, loadi64>, T8XD, REX_W;
+ X86pdep, loadi64>, T8, XD, REX_W;
defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem,
- X86pext, loadi32>, T8XS;
+ X86pext, loadi32>, T8, XS;
defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem,
- X86pext, loadi64>, T8XS, REX_W;
+ X86pext, loadi64>, T8, XS, REX_W;
}
let Predicates = [HasBMI2, HasEGPR] in {
defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem,
- X86pdep, loadi32, "_EVEX">, T8XD, EVEX;
+ X86pdep, loadi32, "_EVEX">, T8, XD, EVEX;
defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem,
- X86pdep, loadi64, "_EVEX">, T8XD, REX_W, EVEX;
+ X86pdep, loadi64, "_EVEX">, T8, XD, REX_W, EVEX;
defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem,
- X86pext, loadi32, "_EVEX">, T8XS, EVEX;
+ X86pext, loadi32, "_EVEX">, T8, XS, EVEX;
defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem,
- X86pext, loadi64, "_EVEX">, T8XS, REX_W, EVEX;
+ X86pext, loadi64, "_EVEX">, T8, XS, REX_W, EVEX;
}
//===----------------------------------------------------------------------===//
@@ -1419,12 +1419,12 @@ multiclass lwpins_intr<RegisterClass RC> {
def rri : Ii32<0x12, MRM0r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl),
"lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
[(set EFLAGS, (X86lwpins RC:$src0, GR32:$src1, timm:$cntl))]>,
- XOP_4V, XOPA;
+ XOP, VVVV, XOPA;
let mayLoad = 1 in
def rmi : Ii32<0x12, MRM0m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl),
"lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
[(set EFLAGS, (X86lwpins RC:$src0, (loadi32 addr:$src1), timm:$cntl))]>,
- XOP_4V, XOPA;
+ XOP, VVVV, XOPA;
}
let Defs = [EFLAGS] in {
@@ -1435,12 +1435,12 @@ let Defs = [EFLAGS] in {
multiclass lwpval_intr<RegisterClass RC, Intrinsic Int> {
def rri : Ii32<0x12, MRM1r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl),
"lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
- [(Int RC:$src0, GR32:$src1, timm:$cntl)]>, XOP_4V, XOPA;
+ [(Int RC:$src0, GR32:$src1, timm:$cntl)]>, XOP, VVVV, XOPA;
let mayLoad = 1 in
def rmi : Ii32<0x12, MRM1m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl),
"lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
[(Int RC:$src0, (loadi32 addr:$src1), timm:$cntl)]>,
- XOP_4V, XOPA;
+ XOP, VVVV, XOPA;
}
defm LWPVAL32 : lwpval_intr<GR32, int_x86_lwpval32>;
@@ -1471,22 +1471,22 @@ let SchedRW = [ WriteSystem ] in {
let SchedRW = [WriteSystem] in {
def UMONITOR16 : I<0xAE, MRM6r, (outs), (ins GR16:$src),
"umonitor\t$src", [(int_x86_umonitor GR16:$src)]>,
- XS, AdSize16, Requires<[HasWAITPKG, Not64BitMode]>;
+ TB, XS, AdSize16, Requires<[HasWAITPKG, Not64BitMode]>;
def UMONITOR32 : I<0xAE, MRM6r, (outs), (ins GR32:$src),
"umonitor\t$src", [(int_x86_umonitor GR32:$src)]>,
- XS, AdSize32, Requires<[HasWAITPKG]>;
+ TB, XS, AdSize32, Requires<[HasWAITPKG]>;
def UMONITOR64 : I<0xAE, MRM6r, (outs), (ins GR64:$src),
"umonitor\t$src", [(int_x86_umonitor GR64:$src)]>,
- XS, AdSize64, Requires<[HasWAITPKG, In64BitMode]>;
+ TB, XS, AdSize64, Requires<[HasWAITPKG, In64BitMode]>;
let Uses = [EAX, EDX], Defs = [EFLAGS] in {
def UMWAIT : I<0xAE, MRM6r,
(outs), (ins GR32orGR64:$src), "umwait\t$src",
[(set EFLAGS, (X86umwait GR32orGR64:$src, EDX, EAX))]>,
- XD, Requires<[HasWAITPKG]>;
+ TB, XD, Requires<[HasWAITPKG]>;
def TPAUSE : I<0xAE, MRM6r,
(outs), (ins GR32orGR64:$src), "tpause\t$src",
[(set EFLAGS, (X86tpause GR32orGR64:$src, EDX, EAX))]>,
- PD, Requires<[HasWAITPKG]>;
+ TB, PD, Requires<[HasWAITPKG]>;
}
} // SchedRW
@@ -1497,19 +1497,19 @@ let SchedRW = [WriteStore] in {
def MOVDIRI32 : I<0xF9, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movdiri\t{$src, $dst|$dst, $src}",
[(int_x86_directstore32 addr:$dst, GR32:$src)]>,
- T8PS, Requires<[HasMOVDIRI, NoEGPR]>;
+ T8, Requires<[HasMOVDIRI, NoEGPR]>;
def MOVDIRI64 : RI<0xF9, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"movdiri\t{$src, $dst|$dst, $src}",
[(int_x86_directstore64 addr:$dst, GR64:$src)]>,
- T8PS, Requires<[In64BitMode, HasMOVDIRI, NoEGPR]>;
+ T8, Requires<[In64BitMode, HasMOVDIRI, NoEGPR]>;
def MOVDIRI32_EVEX : I<0xF9, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movdiri\t{$src, $dst|$dst, $src}",
[(int_x86_directstore32 addr:$dst, GR32:$src)]>,
- EVEX_NoCD8, T_MAP4PS, Requires<[In64BitMode, HasMOVDIRI, HasEGPR]>;
+ EVEX, NoCD8, T_MAP4, Requires<[In64BitMode, HasMOVDIRI, HasEGPR]>;
def MOVDIRI64_EVEX : RI<0xF9, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"movdiri\t{$src, $dst|$dst, $src}",
[(int_x86_directstore64 addr:$dst, GR64:$src)]>,
- EVEX_NoCD8, T_MAP4PS, Requires<[In64BitMode, HasMOVDIRI, HasEGPR]>;
+ EVEX, NoCD8, T_MAP4, Requires<[In64BitMode, HasMOVDIRI, HasEGPR]>;
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -1518,23 +1518,23 @@ def MOVDIRI64_EVEX : RI<0xF9, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
let SchedRW = [WriteStore] in {
def MOVDIR64B16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem_GR16:$src),
"movdir64b\t{$src, $dst|$dst, $src}", []>,
- T8PD, AdSize16, Requires<[HasMOVDIR64B, Not64BitMode]>;
+ T8, PD, AdSize16, Requires<[HasMOVDIR64B, Not64BitMode]>;
def MOVDIR64B32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem_GR32:$src),
"movdir64b\t{$src, $dst|$dst, $src}",
[(int_x86_movdir64b GR32:$dst, addr:$src)]>,
- T8PD, AdSize32, Requires<[HasMOVDIR64B, NoEGPR]>;
+ T8, PD, AdSize32, Requires<[HasMOVDIR64B, NoEGPR]>;
def MOVDIR64B64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$src),
"movdir64b\t{$src, $dst|$dst, $src}",
[(int_x86_movdir64b GR64:$dst, addr:$src)]>,
- T8PD, AdSize64, Requires<[HasMOVDIR64B, NoEGPR, In64BitMode]>;
+ T8, PD, AdSize64, Requires<[HasMOVDIR64B, NoEGPR, In64BitMode]>;
def MOVDIR64B32_EVEX : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem_GR32:$src),
"movdir64b\t{$src, $dst|$dst, $src}",
[(int_x86_movdir64b GR32:$dst, addr:$src)]>,
- EVEX_NoCD8, T_MAP4PD, AdSize32, Requires<[HasMOVDIR64B, HasEGPR, In64BitMode]>;
+ EVEX, NoCD8, T_MAP4, PD, AdSize32, Requires<[HasMOVDIR64B, HasEGPR, In64BitMode]>;
def MOVDIR64B64_EVEX : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$src),
"movdir64b\t{$src, $dst|$dst, $src}",
[(int_x86_movdir64b GR64:$dst, addr:$src)]>,
- EVEX_NoCD8, T_MAP4PD, AdSize64, Requires<[HasMOVDIR64B, HasEGPR, In64BitMode]>;
+ EVEX, NoCD8, T_MAP4, PD, AdSize64, Requires<[HasMOVDIR64B, HasEGPR, In64BitMode]>;
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -1544,28 +1544,28 @@ let SchedRW = [WriteStore], Defs = [EFLAGS] in {
def ENQCMD16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src),
"enqcmd\t{$src, $dst|$dst, $src}",
[(set EFLAGS, (X86enqcmd GR16:$dst, addr:$src))]>,
- T8XD, AdSize16, Requires<[HasENQCMD, Not64BitMode]>;
+ T8, XD, AdSize16, Requires<[HasENQCMD, Not64BitMode]>;
def ENQCMD32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src),
"enqcmd\t{$src, $dst|$dst, $src}",
[(set EFLAGS, (X86enqcmd GR32:$dst, addr:$src))]>,
- T8XD, AdSize32, Requires<[HasENQCMD]>;
+ T8, XD, AdSize32, Requires<[HasENQCMD]>;
def ENQCMD64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
"enqcmd\t{$src, $dst|$dst, $src}",
[(set EFLAGS, (X86enqcmd GR64:$dst, addr:$src))]>,
- T8XD, AdSize64, Requires<[HasENQCMD, In64BitMode]>;
+ T8, XD, AdSize64, Requires<[HasENQCMD, In64BitMode]>;
def ENQCMDS16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src),
"enqcmds\t{$src, $dst|$dst, $src}",
[(set EFLAGS, (X86enqcmds GR16:$dst, addr:$src))]>,
- T8XS, AdSize16, Requires<[HasENQCMD, Not64BitMode]>;
+ T8, XS, AdSize16, Requires<[HasENQCMD, Not64BitMode]>;
def ENQCMDS32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src),
"enqcmds\t{$src, $dst|$dst, $src}",
[(set EFLAGS, (X86enqcmds GR32:$dst, addr:$src))]>,
- T8XS, AdSize32, Requires<[HasENQCMD]>;
+ T8, XS, AdSize32, Requires<[HasENQCMD]>;
def ENQCMDS64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
"enqcmds\t{$src, $dst|$dst, $src}",
[(set EFLAGS, (X86enqcmds GR64:$dst, addr:$src))]>,
- T8XS, AdSize64, Requires<[HasENQCMD, In64BitMode]>;
+ T8, XS, AdSize64, Requires<[HasENQCMD, In64BitMode]>;
}
//===----------------------------------------------------------------------===//
@@ -1588,11 +1588,11 @@ let SchedRW = [WriteSystem] in {
let Uses = [EAX, EDX] in
def INVLPGB32 : I<0x01, MRM_FE, (outs), (ins),
"invlpgb", []>,
- PS, Requires<[Not64BitMode]>;
+ TB, Requires<[Not64BitMode]>;
let Uses = [RAX, EDX] in
def INVLPGB64 : I<0x01, MRM_FE, (outs), (ins),
"invlpgb", []>,
- PS, Requires<[In64BitMode]>;
+ TB, Requires<[In64BitMode]>;
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -1602,7 +1602,7 @@ let SchedRW = [WriteSystem] in {
let SchedRW = [WriteSystem] in {
def TLBSYNC : I<0x01, MRM_FF, (outs), (ins),
"tlbsync", []>,
- PS, Requires<[]>;
+ TB, Requires<[]>;
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -1610,14 +1610,14 @@ let SchedRW = [WriteSystem] in {
//
let Uses = [EAX], SchedRW = [WriteSystem] in
def HRESET : Ii8<0xF0, MRM_C0, (outs), (ins i32u8imm:$imm), "hreset\t$imm", []>,
- Requires<[HasHRESET]>, TAXS;
+ Requires<[HasHRESET]>, TA, XS;
//===----------------------------------------------------------------------===//
// SERIALIZE Instruction
//
let SchedRW = [WriteSystem] in
def SERIALIZE : I<0x01, MRM_E8, (outs), (ins), "serialize",
- [(int_x86_serialize)]>, PS,
+ [(int_x86_serialize)]>, TB,
Requires<[HasSERIALIZE]>;
//===----------------------------------------------------------------------===//
@@ -1625,9 +1625,9 @@ let SchedRW = [WriteSystem] in
//
let Predicates = [HasTSXLDTRK], SchedRW = [WriteSystem] in {
def XSUSLDTRK : I<0x01, MRM_E8, (outs), (ins), "xsusldtrk",
- [(int_x86_xsusldtrk)]>, XD;
+ [(int_x86_xsusldtrk)]>, TB, XD;
def XRESLDTRK : I<0x01, MRM_E9, (outs), (ins), "xresldtrk",
- [(int_x86_xresldtrk)]>, XD;
+ [(int_x86_xresldtrk)]>, TB, XD;
}
//===----------------------------------------------------------------------===//
@@ -1635,18 +1635,18 @@ let Predicates = [HasTSXLDTRK], SchedRW = [WriteSystem] in {
//
let Predicates = [HasUINTR, In64BitMode], SchedRW = [WriteSystem] in {
def UIRET : I<0x01, MRM_EC, (outs), (ins), "uiret",
- []>, XS;
+ []>, TB, XS;
def CLUI : I<0x01, MRM_EE, (outs), (ins), "clui",
- [(int_x86_clui)]>, XS;
+ [(int_x86_clui)]>, TB, XS;
def STUI : I<0x01, MRM_EF, (outs), (ins), "stui",
- [(int_x86_stui)]>, XS;
+ [(int_x86_stui)]>, TB, XS;
def SENDUIPI : I<0xC7, MRM6r, (outs), (ins GR64:$arg), "senduipi\t$arg",
- [(int_x86_senduipi GR64:$arg)]>, XS;
+ [(int_x86_senduipi GR64:$arg)]>, TB, XS;
let Defs = [EFLAGS] in
def TESTUI : I<0x01, MRM_ED, (outs), (ins), "testui",
- [(set EFLAGS, (X86testui))]>, XS;
+ [(set EFLAGS, (X86testui))]>, TB, XS;
}
//===----------------------------------------------------------------------===//
@@ -1663,21 +1663,38 @@ let Predicates = [HasPREFETCHI, In64BitMode], SchedRW = [WriteLoad] in {
// CMPCCXADD Instructions
//
let isCodeGenOnly = 1, ForceDisassemble = 1, mayLoad = 1, mayStore = 1,
- Predicates = [HasCMPCCXADD, In64BitMode], Defs = [EFLAGS],
- Constraints = "$dstsrc1 = $dst" in {
+ Defs = [EFLAGS], Constraints = "$dstsrc1 = $dst" in {
+let Predicates = [HasCMPCCXADD, NoEGPR, In64BitMode] in {
def CMPCCXADDmr32 : I<0xe0, MRMDestMem4VOp3CC, (outs GR32:$dst),
(ins GR32:$dstsrc1, i32mem:$dstsrc2, GR32:$src3, ccode:$cond),
"cmp${cond}xadd\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}",
[(set GR32:$dst, (X86cmpccxadd addr:$dstsrc2,
GR32:$dstsrc1, GR32:$src3, timm:$cond))]>,
- VEX_4V, T8PD, Sched<[WriteXCHG]>;
+ VEX, VVVV, T8, PD, Sched<[WriteXCHG]>;
def CMPCCXADDmr64 : I<0xe0, MRMDestMem4VOp3CC, (outs GR64:$dst),
(ins GR64:$dstsrc1, i64mem:$dstsrc2, GR64:$src3, ccode:$cond),
"cmp${cond}xadd\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}",
[(set GR64:$dst, (X86cmpccxadd addr:$dstsrc2,
GR64:$dstsrc1, GR64:$src3, timm:$cond))]>,
- VEX_4V, REX_W, T8PD, Sched<[WriteXCHG]>;
+ VEX, VVVV, REX_W, T8, PD, Sched<[WriteXCHG]>;
+}
+
+let Predicates = [HasCMPCCXADD, HasEGPR, In64BitMode] in {
+def CMPCCXADDmr32_EVEX : I<0xe0, MRMDestMem4VOp3CC, (outs GR32:$dst),
+ (ins GR32:$dstsrc1, i32mem:$dstsrc2, GR32:$src3, ccode:$cond),
+ "cmp${cond}xadd\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}",
+ [(set GR32:$dst, (X86cmpccxadd addr:$dstsrc2,
+ GR32:$dstsrc1, GR32:$src3, timm:$cond))]>,
+ EVEX, VVVV, NoCD8, T8, PD, Sched<[WriteXCHG]>;
+
+def CMPCCXADDmr64_EVEX : I<0xe0, MRMDestMem4VOp3CC, (outs GR64:$dst),
+ (ins GR64:$dstsrc1, i64mem:$dstsrc2, GR64:$src3, ccode:$cond),
+ "cmp${cond}xadd\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}",
+ [(set GR64:$dst, (X86cmpccxadd addr:$dstsrc2,
+ GR64:$dstsrc1, GR64:$src3, timm:$cond))]>,
+ EVEX, VVVV, NoCD8, REX_W, T8, PD, Sched<[WriteXCHG]>;
+}
}
//===----------------------------------------------------------------------===//
@@ -1686,12 +1703,12 @@ def CMPCCXADDmr64 : I<0xe0, MRMDestMem4VOp3CC, (outs GR64:$dst),
let Predicates = [HasCLFLUSHOPT], SchedRW = [WriteLoad] in
def CLFLUSHOPT : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
- "clflushopt\t$src", [(int_x86_clflushopt addr:$src)]>, PD;
+ "clflushopt\t$src", [(int_x86_clflushopt addr:$src)]>, TB, PD;
let Predicates = [HasCLWB], SchedRW = [WriteLoad] in
def CLWB : I<0xAE, MRM6m, (outs), (ins i8mem:$src), "clwb\t$src",
- [(int_x86_clwb addr:$src)]>, PD;
+ [(int_x86_clwb addr:$src)]>, TB, PD;
let Predicates = [HasCLDEMOTE], SchedRW = [WriteLoad] in
def CLDEMOTE : I<0x1C, MRM0m, (outs), (ins i8mem:$src), "cldemote\t$src",
- [(int_x86_cldemote addr:$src)]>, PS;
+ [(int_x86_cldemote addr:$src)]>, TB;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrRAOINT.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrRAOINT.td
index dc0e267a83e3..bc17b00f3573 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrRAOINT.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrRAOINT.td
@@ -39,7 +39,7 @@ multiclass RAOINT_BASE<string OpcodeStr> {
Sched<[WriteALURMW]>, REX_W;
}
-defm AADD : RAOINT_BASE<"add">, T8PS;
-defm AAND : RAOINT_BASE<"and">, T8PD;
-defm AOR : RAOINT_BASE<"or" >, T8XD;
-defm AXOR : RAOINT_BASE<"xor">, T8XS;
+defm AADD : RAOINT_BASE<"add">, T8;
+defm AAND : RAOINT_BASE<"and">, T8, PD;
+defm AOR : RAOINT_BASE<"or" >, T8, XD;
+defm AXOR : RAOINT_BASE<"xor">, T8, XS;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSGX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSGX.td
index 6439f717accb..747f5aa86653 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSGX.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSGX.td
@@ -17,13 +17,13 @@
let SchedRW = [WriteSystem], Predicates = [HasSGX] in {
// ENCLS - Execute an Enclave System Function of Specified Leaf Number
def ENCLS : I<0x01, MRM_CF, (outs), (ins),
- "encls", []>, PS;
+ "encls", []>, TB;
// ENCLU - Execute an Enclave User Function of Specified Leaf Number
def ENCLU : I<0x01, MRM_D7, (outs), (ins),
- "enclu", []>, PS;
+ "enclu", []>, TB;
// ENCLV - Execute an Enclave VMM Function of Specified Leaf Number
def ENCLV : I<0x01, MRM_C0, (outs), (ins),
- "enclv", []>, PS;
+ "enclv", []>, TB;
} // SchedRW
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSNP.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSNP.td
index ab13fa43c92d..05ed6585db6d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSNP.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSNP.td
@@ -17,31 +17,31 @@
let SchedRW = [WriteSystem] in {
// F3 0F 01 FF
let Uses = [RAX], Defs = [EAX, EFLAGS] in
-def PSMASH: I<0x01, MRM_FF, (outs), (ins), "psmash", []>, XS,
+def PSMASH: I<0x01, MRM_FF, (outs), (ins), "psmash", []>, TB, XS,
Requires<[In64BitMode]>;
// F2 0F 01 FF
let Uses = [RAX, RCX, RDX], Defs = [EAX, EFLAGS] in
def PVALIDATE64: I<0x01, MRM_FF, (outs), (ins), "pvalidate",[]>,
- XD, Requires<[In64BitMode]>;
+ TB, XD, Requires<[In64BitMode]>;
let Uses = [EAX, ECX, EDX], Defs = [EAX, EFLAGS] in
def PVALIDATE32: I<0x01, MRM_FF, (outs), (ins), "pvalidate",[]>,
- XD, Requires<[Not64BitMode]>;
+ TB, XD, Requires<[Not64BitMode]>;
// F2 0F 01 FE
let Uses = [RAX, RCX], Defs = [EAX, EFLAGS] in
-def RMPUPDATE: I<0x01, MRM_FE, (outs), (ins), "rmpupdate", []>, XD,
+def RMPUPDATE: I<0x01, MRM_FE, (outs), (ins), "rmpupdate", []>, TB, XD,
Requires<[In64BitMode]>;
// F3 0F 01 FE
let Uses = [RAX, RCX, RDX], Defs = [EAX, EFLAGS] in
-def RMPADJUST: I<0x01, MRM_FE, (outs), (ins), "rmpadjust", []>, XS,
+def RMPADJUST: I<0x01, MRM_FE, (outs), (ins), "rmpadjust", []>, TB, XS,
Requires<[In64BitMode]>;
// F3 0F 01 FD
let Uses = [RAX, RDX], Defs = [RAX, RCX, RDX, EFLAGS] in
-def RMPQUERY: I<0x01, MRM_FD, (outs), (ins), "rmpquery", []>, XS,
+def RMPQUERY: I<0x01, MRM_FD, (outs), (ins), "rmpquery", []>, TB, XS,
Requires<[In64BitMode]>;
} // SchedRW
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
index cf57fe562ed5..df1f0b5b4ca7 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
@@ -215,7 +215,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
let Predicates = [UseAVX, OptForSize] in
defm V#NAME : sse12_move_rr<OpNode, vt, OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}", d>,
- VEX_4V, VEX_LIG, WIG;
+ VEX, VVVV, VEX_LIG, WIG;
def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -268,15 +268,15 @@ multiclass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop,
}
defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss",
- SSEPackedSingle, UseSSE1>, XS;
+ SSEPackedSingle, UseSSE1>, TB, XS;
defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd",
- SSEPackedDouble, UseSSE2>, XD;
+ SSEPackedDouble, UseSSE2>, TB, XD;
let canFoldAsLoad = 1, isReMaterializable = 1 in {
defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, X86vzload32, "movss",
- SSEPackedSingle>, XS;
+ SSEPackedSingle>, TB, XS;
defm MOVSD : sse12_move_rm<FR64, v2f64, f64mem, loadf64, X86vzload64, "movsd",
- SSEPackedDouble>, XD;
+ SSEPackedDouble>, TB, XD;
}
// Patterns
@@ -352,46 +352,46 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in
let Predicates = [HasAVX, NoVLX] in {
defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps",
SSEPackedSingle, SchedWriteFMoveLS.XMM>,
- PS, VEX, WIG;
+ TB, VEX, WIG;
defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd",
SSEPackedDouble, SchedWriteFMoveLS.XMM>,
- PD, VEX, WIG;
+ TB, PD, VEX, WIG;
defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups",
SSEPackedSingle, SchedWriteFMoveLS.XMM>,
- PS, VEX, WIG;
+ TB, VEX, WIG;
defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd",
SSEPackedDouble, SchedWriteFMoveLS.XMM>,
- PD, VEX, WIG;
+ TB, PD, VEX, WIG;
defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps",
SSEPackedSingle, SchedWriteFMoveLS.YMM>,
- PS, VEX, VEX_L, WIG;
+ TB, VEX, VEX_L, WIG;
defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd",
SSEPackedDouble, SchedWriteFMoveLS.YMM>,
- PD, VEX, VEX_L, WIG;
+ TB, PD, VEX, VEX_L, WIG;
defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups",
SSEPackedSingle, SchedWriteFMoveLS.YMM>,
- PS, VEX, VEX_L, WIG;
+ TB, VEX, VEX_L, WIG;
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd",
SSEPackedDouble, SchedWriteFMoveLS.YMM>,
- PD, VEX, VEX_L, WIG;
+ TB, PD, VEX, VEX_L, WIG;
}
let Predicates = [UseSSE1] in {
defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps",
SSEPackedSingle, SchedWriteFMoveLS.XMM>,
- PS;
+ TB;
defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups",
SSEPackedSingle, SchedWriteFMoveLS.XMM>,
- PS;
+ TB;
}
let Predicates = [UseSSE2] in {
defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd",
SSEPackedDouble, SchedWriteFMoveLS.XMM>,
- PD;
+ TB, PD;
defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd",
SSEPackedDouble, SchedWriteFMoveLS.XMM>,
- PD;
+ TB, PD;
}
let Predicates = [HasAVX, NoVLX] in {
@@ -666,7 +666,7 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDPatternOperator pdnode,
def PSrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "s", asm_opr),
- [], SSEPackedSingle>, PS,
+ [], SSEPackedSingle>, TB,
Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
def PDrm : PI<opc, MRMSrcMem,
@@ -674,7 +674,7 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDPatternOperator pdnode,
!strconcat(base_opc, "d", asm_opr),
[(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))],
- SSEPackedDouble>, PD,
+ SSEPackedDouble>, TB, PD,
Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
}
@@ -683,7 +683,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode,
let Predicates = [UseAVX] in
defm V#NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
let Constraints = "$src1 = $dst" in
defm NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc,
@@ -823,14 +823,14 @@ let Predicates = [UseAVX] in {
"movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>,
- VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, WIG;
+ VEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>, WIG;
let isCommutable = 1 in
def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>,
- VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, WIG;
+ VEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>, WIG;
}
let Constraints = "$src1 = $dst" in {
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
@@ -903,36 +903,36 @@ let isCodeGenOnly = 1, Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPExceptio
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32,
"cvttss2si", "cvttss2si",
WriteCvtSS2I, SSEPackedSingle>,
- XS, VEX, VEX_LIG;
+ TB, XS, VEX, VEX_LIG;
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32,
"cvttss2si", "cvttss2si",
WriteCvtSS2I, SSEPackedSingle>,
- XS, VEX, REX_W, VEX_LIG;
+ TB, XS, VEX, REX_W, VEX_LIG;
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64,
"cvttsd2si", "cvttsd2si",
WriteCvtSD2I, SSEPackedDouble>,
- XD, VEX, VEX_LIG;
+ TB, XD, VEX, VEX_LIG;
defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64,
"cvttsd2si", "cvttsd2si",
WriteCvtSD2I, SSEPackedDouble>,
- XD, VEX, REX_W, VEX_LIG;
+ TB, XD, VEX, REX_W, VEX_LIG;
defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32,
"cvtss2si", "cvtss2si",
WriteCvtSS2I, SSEPackedSingle>,
- XS, VEX, VEX_LIG;
+ TB, XS, VEX, VEX_LIG;
defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32,
"cvtss2si", "cvtss2si",
WriteCvtSS2I, SSEPackedSingle>,
- XS, VEX, REX_W, VEX_LIG;
+ TB, XS, VEX, REX_W, VEX_LIG;
defm VCVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64,
"cvtsd2si", "cvtsd2si",
WriteCvtSD2I, SSEPackedDouble>,
- XD, VEX, VEX_LIG;
+ TB, XD, VEX, VEX_LIG;
defm VCVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64,
"cvtsd2si", "cvtsd2si",
WriteCvtSD2I, SSEPackedDouble>,
- XD, VEX, REX_W, VEX_LIG;
+ TB, XD, VEX, REX_W, VEX_LIG;
}
// The assembler can recognize rr 64-bit instructions by seeing a rxx
@@ -941,16 +941,16 @@ defm VCVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64,
// where appropriate to do so.
let isCodeGenOnly = 1 in {
defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l",
- WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V,
+ WriteCvtI2SS, SSEPackedSingle>, TB, XS, VEX, VVVV,
VEX_LIG, SIMD_EXC;
defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q",
- WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V,
+ WriteCvtI2SS, SSEPackedSingle>, TB, XS, VEX, VVVV,
REX_W, VEX_LIG, SIMD_EXC;
defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l",
- WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V,
+ WriteCvtI2SD, SSEPackedDouble>, TB, XD, VEX, VVVV,
VEX_LIG;
defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q",
- WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V,
+ WriteCvtI2SD, SSEPackedDouble>, TB, XD, VEX, VVVV,
REX_W, VEX_LIG, SIMD_EXC;
} // isCodeGenOnly = 1
@@ -983,42 +983,42 @@ let Predicates = [UseAVX] in {
let isCodeGenOnly = 1 in {
defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32,
"cvttss2si", "cvttss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC;
+ WriteCvtSS2I, SSEPackedSingle>, TB, XS, SIMD_EXC;
defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32,
"cvttss2si", "cvttss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC;
+ WriteCvtSS2I, SSEPackedSingle>, TB, XS, REX_W, SIMD_EXC;
defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64,
"cvttsd2si", "cvttsd2si",
- WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC;
+ WriteCvtSD2I, SSEPackedDouble>, TB, XD, SIMD_EXC;
defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64,
"cvttsd2si", "cvttsd2si",
- WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
+ WriteCvtSD2I, SSEPackedDouble>, TB, XD, REX_W, SIMD_EXC;
defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32,
"cvtss2si", "cvtss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC;
+ WriteCvtSS2I, SSEPackedSingle>, TB, XS, SIMD_EXC;
defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32,
"cvtss2si", "cvtss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC;
+ WriteCvtSS2I, SSEPackedSingle>, TB, XS, REX_W, SIMD_EXC;
defm CVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64,
"cvtsd2si", "cvtsd2si",
- WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC;
+ WriteCvtSD2I, SSEPackedDouble>, TB, XD, SIMD_EXC;
defm CVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64,
"cvtsd2si", "cvtsd2si",
- WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
+ WriteCvtSD2I, SSEPackedDouble>, TB, XD, REX_W, SIMD_EXC;
defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, any_sint_to_fp, i32mem, loadi32,
"cvtsi2ss", "cvtsi2ss{l}",
- WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC;
+ WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, TB, XS, SIMD_EXC;
defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, any_sint_to_fp, i64mem, loadi64,
"cvtsi2ss", "cvtsi2ss{q}",
- WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, REX_W, SIMD_EXC;
+ WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, TB, XS, REX_W, SIMD_EXC;
defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, any_sint_to_fp, i32mem, loadi32,
"cvtsi2sd", "cvtsi2sd{l}",
- WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD;
+ WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, TB, XD;
defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, any_sint_to_fp, i64mem, loadi64,
"cvtsi2sd", "cvtsi2sd{q}",
- WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC;
+ WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, TB, XD, REX_W, SIMD_EXC;
} // isCodeGenOnly = 1
let Predicates = [UseSSE1] in {
@@ -1074,46 +1074,46 @@ let Uses = [MXCSR], mayRaiseFPException = 1 in {
let Predicates = [UseAVX] in {
defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64,
X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
- WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_LIG;
+ WriteCvtSD2I, SSEPackedDouble>, TB, XD, VEX, VEX_LIG;
defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64,
X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
- WriteCvtSD2I, SSEPackedDouble>, XD, VEX, REX_W, VEX_LIG;
+ WriteCvtSD2I, SSEPackedDouble>, TB, XD, VEX, REX_W, VEX_LIG;
}
defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si,
sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I,
- SSEPackedDouble>, XD;
+ SSEPackedDouble>, TB, XD;
defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si,
sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I,
- SSEPackedDouble>, XD, REX_W;
+ SSEPackedDouble>, TB, XD, REX_W;
}
let Predicates = [UseAVX] in {
defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle, 0>,
- XS, VEX_4V, VEX_LIG, SIMD_EXC;
+ TB, XS, VEX, VVVV, VEX_LIG, SIMD_EXC;
defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle, 0>,
- XS, VEX_4V, VEX_LIG, REX_W, SIMD_EXC;
+ TB, XS, VEX, VVVV, VEX_LIG, REX_W, SIMD_EXC;
defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble, 0>,
- XD, VEX_4V, VEX_LIG;
+ TB, XD, VEX, VVVV, VEX_LIG;
defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble, 0>,
- XD, VEX_4V, VEX_LIG, REX_W, SIMD_EXC;
+ TB, XD, VEX, VVVV, VEX_LIG, REX_W, SIMD_EXC;
}
let Constraints = "$src1 = $dst" in {
defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle>,
- XS, SIMD_EXC;
+ TB, XS, SIMD_EXC;
defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle>,
- XS, REX_W, SIMD_EXC;
+ TB, XS, REX_W, SIMD_EXC;
defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble>,
- XD;
+ TB, XD;
defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble>,
- XD, REX_W, SIMD_EXC;
+ TB, XD, REX_W, SIMD_EXC;
}
def : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -1150,34 +1150,34 @@ def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in {
defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
ssmem, sse_load_f32, "cvttss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG;
+ WriteCvtSS2I, SSEPackedSingle>, TB, XS, VEX, VEX_LIG;
defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
X86cvtts2Int, ssmem, sse_load_f32,
"cvttss2si", WriteCvtSS2I, SSEPackedSingle>,
- XS, VEX, VEX_LIG, REX_W;
+ TB, XS, VEX, VEX_LIG, REX_W;
defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
sdmem, sse_load_f64, "cvttsd2si",
- WriteCvtSS2I, SSEPackedDouble>, XD, VEX, VEX_LIG;
+ WriteCvtSS2I, SSEPackedDouble>, TB, XD, VEX, VEX_LIG;
defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
X86cvtts2Int, sdmem, sse_load_f64,
"cvttsd2si", WriteCvtSS2I, SSEPackedDouble>,
- XD, VEX, VEX_LIG, REX_W;
+ TB, XD, VEX, VEX_LIG, REX_W;
}
let Uses = [MXCSR], mayRaiseFPException = 1 in {
defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
ssmem, sse_load_f32, "cvttss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS;
+ WriteCvtSS2I, SSEPackedSingle>, TB, XS;
defm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
X86cvtts2Int, ssmem, sse_load_f32,
"cvttss2si", WriteCvtSS2I, SSEPackedSingle>,
- XS, REX_W;
+ TB, XS, REX_W;
defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
sdmem, sse_load_f64, "cvttsd2si",
- WriteCvtSD2I, SSEPackedDouble>, XD;
+ WriteCvtSD2I, SSEPackedDouble>, TB, XD;
defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
X86cvtts2Int, sdmem, sse_load_f64,
"cvttsd2si", WriteCvtSD2I, SSEPackedDouble>,
- XD, REX_W;
+ TB, XD, REX_W;
}
def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
@@ -1217,32 +1217,32 @@ def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in {
defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG;
+ WriteCvtSS2I, SSEPackedSingle>, TB, XS, VEX, VEX_LIG;
defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS, VEX, REX_W, VEX_LIG;
+ WriteCvtSS2I, SSEPackedSingle>, TB, XS, VEX, REX_W, VEX_LIG;
}
let Uses = [MXCSR], mayRaiseFPException = 1 in {
defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS;
+ WriteCvtSS2I, SSEPackedSingle>, TB, XS;
defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS, REX_W;
+ WriteCvtSS2I, SSEPackedSingle>, TB, XS, REX_W;
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, WriteCvtI2PS>,
- PS, VEX, Requires<[HasAVX, NoVLX]>, WIG;
+ TB, VEX, Requires<[HasAVX, NoVLX]>, WIG;
defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, load,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, WriteCvtI2PSY>,
- PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, WIG;
+ TB, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, WIG;
defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, WriteCvtI2PS>,
- PS, Requires<[UseSSE2]>;
+ TB, Requires<[UseSSE2]>;
}
// AVX aliases
@@ -1289,13 +1289,13 @@ let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX],
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR32:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- VEX_4V, VEX_LIG, WIG,
+ VEX, VVVV, VEX_LIG, WIG,
Sched<[WriteCvtSD2SS]>, SIMD_EXC;
let mayLoad = 1 in
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
(ins FR32:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- XD, VEX_4V, VEX_LIG, WIG,
+ TB, XD, VEX, VVVV, VEX_LIG, WIG,
Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC;
}
@@ -1311,7 +1311,7 @@ def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (any_fpround (loadf64 addr:$src)))]>,
- XD, Requires<[UseSSE2, OptForSize]>,
+ TB, XD, Requires<[UseSSE2, OptForSize]>,
Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC;
}
@@ -1321,14 +1321,14 @@ def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>,
- XD, VEX_4V, VEX_LIG, WIG, Requires<[UseAVX]>,
+ TB, XD, VEX, VVVV, VEX_LIG, WIG, Requires<[UseAVX]>,
Sched<[WriteCvtSD2SS]>;
def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>,
- XD, VEX_4V, VEX_LIG, WIG, Requires<[UseAVX]>,
+ TB, XD, VEX, VVVV, VEX_LIG, WIG, Requires<[UseAVX]>,
Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
let Constraints = "$src1 = $dst" in {
def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
@@ -1336,13 +1336,13 @@ def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>,
- XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>;
+ TB, XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>;
def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>,
- XD, Requires<[UseSSE2]>,
+ TB, XD, Requires<[UseSSE2]>,
Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
}
}
@@ -1353,13 +1353,13 @@ let isCodeGenOnly = 1, hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR64:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- XS, VEX_4V, VEX_LIG, WIG,
+ TB, XS, VEX, VVVV, VEX_LIG, WIG,
Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>, SIMD_EXC;
let mayLoad = 1 in
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR64:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- XS, VEX_4V, VEX_LIG, WIG,
+ TB, XS, VEX, VVVV, VEX_LIG, WIG,
Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>,
Requires<[UseAVX, OptForSize]>, SIMD_EXC;
} // isCodeGenOnly = 1, hasSideEffects = 0
@@ -1373,11 +1373,11 @@ let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in {
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (any_fpextend FR32:$src))]>,
- XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>, SIMD_EXC;
+ TB, XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>, SIMD_EXC;
def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (any_fpextend (loadf32 addr:$src)))]>,
- XS, Requires<[UseSSE2, OptForSize]>,
+ TB, XS, Requires<[UseSSE2, OptForSize]>,
Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, SIMD_EXC;
} // isCodeGenOnly = 1
@@ -1386,25 +1386,25 @@ let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1,
def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XS, VEX_4V, VEX_LIG, WIG,
+ []>, TB, XS, VEX, VVVV, VEX_LIG, WIG,
Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>;
let mayLoad = 1 in
def VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XS, VEX_4V, VEX_LIG, WIG, Requires<[HasAVX]>,
+ []>, TB, XS, VEX, VVVV, VEX_LIG, WIG, Requires<[HasAVX]>,
Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
- []>, XS, Requires<[UseSSE2]>,
+ []>, TB, XS, Requires<[UseSSE2]>,
Sched<[WriteCvtSS2SD]>;
let mayLoad = 1 in
def CVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
- []>, XS, Requires<[UseSSE2]>,
+ []>, TB, XS, Requires<[UseSSE2]>,
Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
}
} // hasSideEffects = 0
@@ -1699,30 +1699,30 @@ let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>,
- PS, VEX, Sched<[WriteCvtPS2PD]>, WIG;
+ TB, VEX, Sched<[WriteCvtPS2PD]>, WIG;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>,
- PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, WIG;
+ TB, VEX, Sched<[WriteCvtPS2PD.Folded]>, WIG;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (v4f64 (any_fpextend (v4f32 VR128:$src))))]>,
- PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, WIG;
+ TB, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, WIG;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))]>,
- PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, WIG;
+ TB, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, WIG;
}
let Predicates = [UseSSE2], Uses = [MXCSR], mayRaiseFPException = 1 in {
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>,
- PS, Sched<[WriteCvtPS2PD]>;
+ TB, Sched<[WriteCvtPS2PD]>;
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>,
- PS, Sched<[WriteCvtPS2PD.Folded]>;
+ TB, Sched<[WriteCvtPS2PD.Folded]>;
}
// Convert Packed DW Integers to Packed Double FP
@@ -1860,22 +1860,22 @@ let ExeDomain = SSEPackedSingle in
defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32,
"cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SchedWriteFCmpSizes.PS.Scl, sse_load_f32>,
- XS, VEX_4V, VEX_LIG, WIG;
+ TB, XS, VEX, VVVV, VEX_LIG, WIG;
let ExeDomain = SSEPackedDouble in
defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64,
"cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SchedWriteFCmpSizes.PD.Scl, sse_load_f64>,
- XD, VEX_4V, VEX_LIG, WIG;
+ TB, XD, VEX, VVVV, VEX_LIG, WIG;
let Constraints = "$src1 = $dst" in {
let ExeDomain = SSEPackedSingle in
defm CMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32,
"cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS;
+ SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, TB, XS;
let ExeDomain = SSEPackedDouble in
defm CMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64,
"cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD;
+ SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, TB, XD;
}
// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
@@ -1919,44 +1919,44 @@ let mayLoad = 1 in
let Defs = [EFLAGS] in {
defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32,
- "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, WIG;
+ "ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG, WIG;
defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64,
- "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, WIG;
+ "ucomisd", SSEPackedDouble>, TB, PD, VEX, VEX_LIG, WIG;
defm VCOMISS : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32,
- "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, WIG;
+ "comiss", SSEPackedSingle>, TB, VEX, VEX_LIG, WIG;
defm VCOMISD : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64,
- "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, WIG;
+ "comisd", SSEPackedDouble>, TB, PD, VEX, VEX_LIG, WIG;
let isCodeGenOnly = 1 in {
defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
- sse_load_f32, "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, WIG;
+ sse_load_f32, "ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG, WIG;
defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
- sse_load_f64, "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, WIG;
+ sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD, VEX, VEX_LIG, WIG;
defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
- sse_load_f32, "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, WIG;
+ sse_load_f32, "comiss", SSEPackedSingle>, TB, VEX, VEX_LIG, WIG;
defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
- sse_load_f64, "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, WIG;
+ sse_load_f64, "comisd", SSEPackedDouble>, TB, PD, VEX, VEX_LIG, WIG;
}
defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32,
- "ucomiss", SSEPackedSingle>, PS;
+ "ucomiss", SSEPackedSingle>, TB;
defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64,
- "ucomisd", SSEPackedDouble>, PD;
+ "ucomisd", SSEPackedDouble>, TB, PD;
defm COMISS : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32,
- "comiss", SSEPackedSingle>, PS;
+ "comiss", SSEPackedSingle>, TB;
defm COMISD : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64,
- "comisd", SSEPackedDouble>, PD;
+ "comisd", SSEPackedDouble>, TB, PD;
let isCodeGenOnly = 1 in {
defm UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
- sse_load_f32, "ucomiss", SSEPackedSingle>, PS;
+ sse_load_f32, "ucomiss", SSEPackedSingle>, TB;
defm UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
- sse_load_f64, "ucomisd", SSEPackedDouble>, PD;
+ sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD;
defm COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
- sse_load_f32, "comiss", SSEPackedSingle>, PS;
+ sse_load_f32, "comiss", SSEPackedSingle>, TB;
defm COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
- sse_load_f64, "comisd", SSEPackedDouble>, PD;
+ sse_load_f64, "comisd", SSEPackedDouble>, TB, PD;
}
} // Defs = [EFLAGS]
@@ -1979,23 +1979,23 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
defm VCMPPS : sse12_cmp_packed<VR128, f128mem, v4f32,
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, WIG;
+ SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, TB, VEX, VVVV, WIG;
defm VCMPPD : sse12_cmp_packed<VR128, f128mem, v2f64,
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, WIG;
+ SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, TB, PD, VEX, VVVV, WIG;
defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, v8f32,
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, WIG;
+ SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, TB, VEX, VVVV, VEX_L, WIG;
defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, v4f64,
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, WIG;
+ SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, TB, PD, VEX, VVVV, VEX_L, WIG;
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, v4f32,
"cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, PS;
+ SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, TB;
defm CMPPD : sse12_cmp_packed<VR128, f128mem, v2f64,
"cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD;
+ SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, TB, PD;
}
def CommutableCMPCC : PatLeaf<(timm), [{
@@ -2076,27 +2076,27 @@ let Predicates = [HasAVX, NoVLX] in {
defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
loadv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>,
- PS, VEX_4V, WIG;
+ TB, VEX, VVVV, WIG;
defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
loadv8f32, SchedWriteFShuffle.YMM, SSEPackedSingle>,
- PS, VEX_4V, VEX_L, WIG;
+ TB, VEX, VVVV, VEX_L, WIG;
defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
"shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
loadv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>,
- PD, VEX_4V, WIG;
+ TB, PD, VEX, VVVV, WIG;
defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
"shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
loadv4f64, SchedWriteFShuffle.YMM, SSEPackedDouble>,
- PD, VEX_4V, VEX_L, WIG;
+ TB, PD, VEX, VVVV, VEX_L, WIG;
}
let Constraints = "$src1 = $dst" in {
defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
"shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
+ memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, TB;
defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD;
+ memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, TB, PD;
}
//===----------------------------------------------------------------------===//
@@ -2126,44 +2126,44 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
let Predicates = [HasAVX, NoVLX] in {
defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, load,
VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, WIG;
+ SchedWriteFShuffle.XMM, SSEPackedSingle>, TB, VEX, VVVV, WIG;
defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, load,
VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD, VEX_4V, WIG;
+ SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, TB, PD, VEX, VVVV, WIG;
defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, load,
VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, WIG;
+ SchedWriteFShuffle.XMM, SSEPackedSingle>, TB, VEX, VVVV, WIG;
defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, load,
VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedDouble>, PD, VEX_4V, WIG;
+ SchedWriteFShuffle.XMM, SSEPackedDouble>, TB, PD, VEX, VVVV, WIG;
defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, load,
VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, WIG;
+ SchedWriteFShuffle.YMM, SSEPackedSingle>, TB, VEX, VVVV, VEX_L, WIG;
defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, load,
VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, WIG;
+ SchedWriteFShuffle.YMM, SSEPackedDouble>, TB, PD, VEX, VVVV, VEX_L, WIG;
defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, load,
VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, WIG;
+ SchedWriteFShuffle.YMM, SSEPackedSingle>, TB, VEX, VVVV, VEX_L, WIG;
defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, load,
VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, WIG;
+ SchedWriteFShuffle.YMM, SSEPackedDouble>, TB, PD, VEX, VVVV, VEX_L, WIG;
}// Predicates = [HasAVX, NoVLX]
let Constraints = "$src1 = $dst" in {
defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memop,
VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
+ SchedWriteFShuffle.XMM, SSEPackedSingle>, TB;
defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memop,
VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD;
+ SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, TB, PD;
defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memop,
VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
+ SchedWriteFShuffle.XMM, SSEPackedSingle>, TB;
defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memop,
VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedDouble>, PD;
+ SchedWriteFShuffle.XMM, SSEPackedDouble>, TB, PD;
} // Constraints = "$src1 = $dst"
let Predicates = [HasAVX1Only] in {
@@ -2208,13 +2208,13 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, ValueType vt,
let Predicates = [HasAVX] in {
defm VMOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
- SSEPackedSingle>, PS, VEX, WIG;
+ SSEPackedSingle>, TB, VEX, WIG;
defm VMOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd",
- SSEPackedDouble>, PD, VEX, WIG;
+ SSEPackedDouble>, TB, PD, VEX, WIG;
defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, v8f32, "movmskps",
- SSEPackedSingle>, PS, VEX, VEX_L, WIG;
+ SSEPackedSingle>, TB, VEX, VEX_L, WIG;
defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd",
- SSEPackedDouble>, PD, VEX, VEX_L, WIG;
+ SSEPackedDouble>, TB, PD, VEX, VEX_L, WIG;
// Also support integer VTs to avoid a int->fp bitcast in the DAG.
def : Pat<(X86movmsk (v4i32 VR128:$src)),
@@ -2228,9 +2228,9 @@ let Predicates = [HasAVX] in {
}
defm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
- SSEPackedSingle>, PS;
+ SSEPackedSingle>, TB;
defm MOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd",
- SSEPackedDouble>, PD;
+ SSEPackedDouble>, TB, PD;
let Predicates = [UseSSE2] in {
// Also support integer VTs to avoid a int->fp bitcast in the DAG.
@@ -2276,7 +2276,7 @@ multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
let Predicates = [HasAVX, prd] in
defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
VR128, load, i128mem, sched.XMM,
- IsCommutable, 0>, VEX_4V, WIG;
+ IsCommutable, 0>, VEX, VVVV, WIG;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
@@ -2285,7 +2285,7 @@ let Constraints = "$src1 = $dst" in
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
OpVT256, VR256, load, i256mem, sched.YMM,
- IsCommutable, 0>, VEX_4V, VEX_L, WIG;
+ IsCommutable, 0>, VEX, VVVV, VEX_L, WIG;
}
// These are ordered here for pattern ordering requirements with the fp versions
@@ -2312,29 +2312,29 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f256mem, sched.YMM,
- [], [], 0>, PS, VEX_4V, VEX_L, WIG;
+ [], [], 0>, TB, VEX, VVVV, VEX_L, WIG;
defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f256mem, sched.YMM,
- [], [], 0>, PD, VEX_4V, VEX_L, WIG;
+ [], [], 0>, TB, PD, VEX, VVVV, VEX_L, WIG;
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem, sched.XMM,
- [], [], 0>, PS, VEX_4V, WIG;
+ [], [], 0>, TB, VEX, VVVV, WIG;
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem, sched.XMM,
- [], [], 0>, PD, VEX_4V, WIG;
+ [], [], 0>, TB, PD, VEX, VVVV, WIG;
}
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem, sched.XMM,
- [], []>, PS;
+ [], []>, TB;
defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem, sched.XMM,
- [], []>, PD;
+ [], []>, TB, PD;
}
}
@@ -2636,26 +2636,26 @@ let Uses = [MXCSR], mayRaiseFPException = 1 in {
let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
VR128, v4f32, f128mem, loadv4f32,
- SSEPackedSingle, sched.PS.XMM, 0>, PS, VEX_4V, WIG;
+ SSEPackedSingle, sched.PS.XMM, 0>, TB, VEX, VVVV, WIG;
defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
VR128, v2f64, f128mem, loadv2f64,
- SSEPackedDouble, sched.PD.XMM, 0>, PD, VEX_4V, WIG;
+ SSEPackedDouble, sched.PD.XMM, 0>, TB, PD, VEX, VVVV, WIG;
defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
OpNode, VR256, v8f32, f256mem, loadv8f32,
- SSEPackedSingle, sched.PS.YMM, 0>, PS, VEX_4V, VEX_L, WIG;
+ SSEPackedSingle, sched.PS.YMM, 0>, TB, VEX, VVVV, VEX_L, WIG;
defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
OpNode, VR256, v4f64, f256mem, loadv4f64,
- SSEPackedDouble, sched.PD.YMM, 0>, PD, VEX_4V, VEX_L, WIG;
+ SSEPackedDouble, sched.PD.YMM, 0>, TB, PD, VEX, VVVV, VEX_L, WIG;
}
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
v4f32, f128mem, memopv4f32, SSEPackedSingle,
- sched.PS.XMM>, PS;
+ sched.PS.XMM>, TB;
defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
v2f64, f128mem, memopv2f64, SSEPackedDouble,
- sched.PD.XMM>, PD;
+ sched.PD.XMM>, TB, PD;
}
}
}
@@ -2665,18 +2665,18 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDPatternOperat
let Uses = [MXCSR], mayRaiseFPException = 1 in {
defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
OpNode, FR32, f32mem, SSEPackedSingle, sched.PS.Scl, 0>,
- XS, VEX_4V, VEX_LIG, WIG;
+ TB, XS, VEX, VVVV, VEX_LIG, WIG;
defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
OpNode, FR64, f64mem, SSEPackedDouble, sched.PD.Scl, 0>,
- XD, VEX_4V, VEX_LIG, WIG;
+ TB, XD, VEX, VVVV, VEX_LIG, WIG;
let Constraints = "$src1 = $dst" in {
defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
OpNode, FR32, f32mem, SSEPackedSingle,
- sched.PS.Scl>, XS;
+ sched.PS.Scl>, TB, XS;
defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
OpNode, FR64, f64mem, SSEPackedDouble,
- sched.PD.Scl>, XD;
+ sched.PD.Scl>, TB, XD;
}
}
}
@@ -2687,18 +2687,18 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
let Uses = [MXCSR], mayRaiseFPException = 1 in {
defm V#NAME#SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32,
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
- SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, WIG;
+ SSEPackedSingle, sched.PS.Scl, 0>, TB, XS, VEX, VVVV, VEX_LIG, WIG;
defm V#NAME#SD : sse12_fp_scalar_int<opc, OpNode, VR128, v2f64,
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
- SSEPackedDouble, sched.PD.Scl, 0>, XD, VEX_4V, VEX_LIG, WIG;
+ SSEPackedDouble, sched.PD.Scl, 0>, TB, XD, VEX, VVVV, VEX_LIG, WIG;
let Constraints = "$src1 = $dst" in {
defm SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32,
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
- SSEPackedSingle, sched.PS.Scl>, XS;
+ SSEPackedSingle, sched.PS.Scl>, TB, XS;
defm SD : sse12_fp_scalar_int<opc, OpNode, VR128, v2f64,
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
- SSEPackedDouble, sched.PD.Scl>, XD;
+ SSEPackedDouble, sched.PD.Scl>, TB, XD;
}
}
}
@@ -3016,29 +3016,29 @@ let Predicates = [HasAVX, NoVLX] in {
multiclass sse1_fp_unop_s_intr<string OpcodeStr, Predicate AVXTarget> {
defm SS : sse_fp_unop_s_intr<v4f32, sse_load_f32,
!cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss),
- UseSSE1>, XS;
+ UseSSE1>, TB, XS;
defm V#NAME#SS : avx_fp_unop_s_intr<v4f32, sse_load_f32,
!cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss),
AVXTarget>,
- XS, VEX_4V, VEX_LIG, WIG;
+ TB, XS, VEX, VVVV, VEX_LIG, WIG;
}
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
X86SchedWriteWidths sched, Predicate AVXTarget> {
defm SS : sse_fp_unop_s<opc, OpcodeStr#ss, FR32, f32mem,
- ssmem, OpNode, SSEPackedSingle, sched.Scl, UseSSE1>, XS;
+ ssmem, OpNode, SSEPackedSingle, sched.Scl, UseSSE1>, TB, XS;
defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr#ss, FR32, f32,
f32mem, ssmem, OpNode, SSEPackedSingle, sched.Scl, AVXTarget>,
- XS, VEX_4V, VEX_LIG, WIG;
+ TB, XS, VEX, VVVV, VEX_LIG, WIG;
}
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
X86SchedWriteWidths sched, Predicate AVXTarget> {
defm SD : sse_fp_unop_s<opc, OpcodeStr#sd, FR64, f64mem,
- sdmem, OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, XD;
+ sdmem, OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, TB, XD;
defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr#sd, FR64, f64,
f64mem, sdmem, OpNode, SSEPackedDouble, sched.Scl, AVXTarget>,
- XD, VEX_4V, VEX_LIG, WIG;
+ TB, XD, VEX, VVVV, VEX_LIG, WIG;
}
// Square root.
@@ -3165,11 +3165,11 @@ let SchedRW = [WriteStoreNT] in {
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movnti{l}\t{$src, $dst|$dst, $src}",
[(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
- PS, Requires<[HasSSE2]>;
+ TB, Requires<[HasSSE2]>;
def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"movnti{q}\t{$src, $dst|$dst, $src}",
[(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
- PS, Requires<[HasSSE2]>;
+ TB, Requires<[HasSSE2]>;
} // SchedRW = [WriteStoreNT]
let Predicates = [HasAVX, NoVLX] in {
@@ -3226,14 +3226,14 @@ let SchedRW = [WriteLoad] in {
// Flush cache
def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
"clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
- PS, Requires<[HasCLFLUSH]>;
+ TB, Requires<[HasCLFLUSH]>;
}
let SchedRW = [WriteNop] in {
// Pause. This "instruction" is encoded as "rep; nop", so even though it
// was introduced with SSE2, it's backward compatible.
def PAUSE : I<0x90, RawFrm, (outs), (ins),
- "pause", [(int_x86_sse2_pause)]>, OBXS;
+ "pause", [(int_x86_sse2_pause)]>, XS;
}
let SchedRW = [WriteFence] in {
@@ -3241,11 +3241,11 @@ let SchedRW = [WriteFence] in {
// TODO: As with mfence, we may want to ease the availability of sfence/lfence
// to include any 64-bit target.
def SFENCE : I<0xAE, MRM7X, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
- PS, Requires<[HasSSE1]>;
+ TB, Requires<[HasSSE1]>;
def LFENCE : I<0xAE, MRM5X, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>,
- PS, Requires<[HasSSE2]>;
+ TB, Requires<[HasSSE2]>;
def MFENCE : I<0xAE, MRM6X, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>,
- PS, Requires<[HasMFence]>;
+ TB, Requires<[HasMFence]>;
} // SchedRW
def : Pat<(X86MFence), (MFENCE)>;
@@ -3266,11 +3266,11 @@ def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
let mayLoad=1, hasSideEffects=1, Defs=[MXCSR] in
def LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src),
"ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>,
- PS, Sched<[WriteLDMXCSR]>;
+ TB, Sched<[WriteLDMXCSR]>;
let mayStore=1, hasSideEffects=1, Uses=[MXCSR] in
def STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst),
"stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>,
- PS, Sched<[WriteSTMXCSR]>;
+ TB, Sched<[WriteSTMXCSR]>;
//===---------------------------------------------------------------------===//
// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
@@ -3327,11 +3327,11 @@ def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (loadv2i64 addr:$src))]>,
Sched<[SchedWriteVecMoveLS.XMM.RM]>,
- XS, VEX, WIG;
+ TB, XS, VEX, WIG;
def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"vmovdqu\t{$src, $dst|$dst, $src}", []>,
Sched<[SchedWriteVecMoveLS.YMM.RM]>,
- XS, VEX, VEX_L, WIG;
+ TB, XS, VEX, VEX_L, WIG;
}
let mayStore = 1, hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in {
@@ -3347,10 +3347,10 @@ def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",
[(store (v2i64 VR128:$src), addr:$dst)]>,
- Sched<[SchedWriteVecMoveLS.XMM.MR]>, XS, VEX, WIG;
+ Sched<[SchedWriteVecMoveLS.XMM.MR]>, TB, XS, VEX, WIG;
def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",[]>,
- Sched<[SchedWriteVecMoveLS.YMM.MR]>, XS, VEX, VEX_L, WIG;
+ Sched<[SchedWriteVecMoveLS.YMM.MR]>, TB, XS, VEX, VEX_L, WIG;
}
let SchedRW = [SchedWriteVecMoveLS.XMM.RR] in {
@@ -3360,7 +3360,7 @@ def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}", []>,
- XS, Requires<[UseSSE2]>;
+ TB, XS, Requires<[UseSSE2]>;
}
// For Disassembler
@@ -3370,7 +3370,7 @@ def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}", []>,
- XS, Requires<[UseSSE2]>;
+ TB, XS, Requires<[UseSSE2]>;
}
} // SchedRW
@@ -3382,7 +3382,7 @@ def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
- XS, Requires<[UseSSE2]>;
+ TB, XS, Requires<[UseSSE2]>;
}
let mayStore = 1, hasSideEffects = 0,
@@ -3393,7 +3393,7 @@ def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
- XS, Requires<[UseSSE2]>;
+ TB, XS, Requires<[UseSSE2]>;
}
} // ExeDomain = SSEPackedInt
@@ -3537,12 +3537,12 @@ defm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
load, i128mem, SchedWriteVecIMul.XMM, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
defm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16,
VR256, load, i256mem, SchedWriteVecIMul.YMM,
- 0>, VEX_4V, VEX_L, WIG;
+ 0>, VEX, VVVV, VEX_L, WIG;
let Constraints = "$src1 = $dst" in
defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
memop, i128mem, SchedWriteVecIMul.XMM>;
@@ -3550,11 +3550,11 @@ defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128,
load, i128mem, SchedWritePSADBW.XMM, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
defm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256,
load, i256mem, SchedWritePSADBW.YMM, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
let Constraints = "$src1 = $dst" in
defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128,
memop, i128mem, SchedWritePSADBW.XMM>;
@@ -3604,11 +3604,11 @@ multiclass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm,
let Predicates = [HasAVX, prd] in
defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
OpNode, OpNode2, VR128, sched.XMM, schedImm.XMM,
- DstVT128, SrcVT, load, 0>, VEX_4V, WIG;
+ DstVT128, SrcVT, load, 0>, VEX, VVVV, WIG;
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
OpNode, OpNode2, VR256, sched.YMM, schedImm.YMM,
- DstVT256, SrcVT, load, 0>, VEX_4V, VEX_L,
+ DstVT256, SrcVT, load, 0>, VEX, VVVV, VEX_L,
WIG;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2,
@@ -3631,11 +3631,11 @@ multiclass PDI_binop_ri_all<bits<8> opc, Format ImmForm, string OpcodeStr,
SDNode OpNode, X86SchedWriteWidths sched> {
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm V#NAME : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode,
- VR128, v16i8, sched.XMM, 0>, VEX_4V, WIG;
+ VR128, v16i8, sched.XMM, 0>, VEX, VVVV, WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
defm V#NAME#Y : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode,
VR256, v32i8, sched.YMM, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_ri<opc, ImmForm, OpcodeStr, OpNode, VR128, v16i8,
sched.XMM>;
@@ -3757,11 +3757,11 @@ let Predicates = [UseSSE2] in {
} // ExeDomain = SSEPackedInt
defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd,
- SchedWriteShuffle, NoVLX>, PD;
+ SchedWriteShuffle, NoVLX>, TB, PD;
defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw,
- SchedWriteShuffle, NoVLX_Or_NoBWI>, XS;
+ SchedWriteShuffle, NoVLX_Or_NoBWI>, TB, XS;
defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw,
- SchedWriteShuffle, NoVLX_Or_NoBWI>, XD;
+ SchedWriteShuffle, NoVLX_Or_NoBWI>, TB, XD;
//===---------------------------------------------------------------------===//
// Packed Integer Pack Instructions (SSE & AVX)
@@ -3821,33 +3821,33 @@ multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -3892,61 +3892,61 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
}
let Predicates = [HasAVX, NoVLX] in {
defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPUNPCKLBWY : sse2_unpack<0x60, "vpunpcklbw", v32i8, X86Unpckl, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
defm VPUNPCKLWDY : sse2_unpack<0x61, "vpunpcklwd", v16i16, X86Unpckl, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
defm VPUNPCKHBWY : sse2_unpack<0x68, "vpunpckhbw", v32i8, X86Unpckh, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
defm VPUNPCKHWDY : sse2_unpack<0x69, "vpunpckhwd", v16i16, X86Unpckh, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
}
let Predicates = [HasAVX2, NoVLX] in {
defm VPUNPCKLDQY : sse2_unpack<0x62, "vpunpckldq", v8i32, X86Unpckl, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
defm VPUNPCKLQDQY : sse2_unpack<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
defm VPUNPCKHDQY : sse2_unpack<0x6A, "vpunpckhdq", v8i32, X86Unpckh, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
defm VPUNPCKHQDQY : sse2_unpack<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -4004,7 +4004,7 @@ def VPEXTRWrr : Ii8<0xC5, MRMSrcReg,
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
timm:$src2))]>,
- PD, VEX, WIG, Sched<[WriteVecExtract]>;
+ TB, PD, VEX, WIG, Sched<[WriteVecExtract]>;
def PEXTRWrr : PDIi8<0xC5, MRMSrcReg,
(outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -4014,10 +4014,10 @@ def PEXTRWrr : PDIi8<0xC5, MRMSrcReg,
// Insert
let Predicates = [HasAVX, NoBWI] in
-defm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V, WIG;
+defm VPINSRW : sse2_pinsrw<0>, TB, PD, VEX, VVVV, WIG;
let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in
-defm PINSRW : sse2_pinsrw, PD;
+defm PINSRW : sse2_pinsrw, TB, PD;
} // ExeDomain = SSEPackedInt
@@ -4306,13 +4306,13 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLoad] in {
def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, TB, XS,
VEX, Requires<[UseAVX]>, WIG;
def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
- XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix
+ TB, XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix
} // ExeDomain, SchedRW
//===---------------------------------------------------------------------===//
@@ -4369,11 +4369,11 @@ let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
- XS, VEX, Requires<[UseAVX]>, WIG;
+ TB, XS, VEX, Requires<[UseAVX]>, WIG;
def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
- XS, Requires<[UseSSE2]>;
+ TB, XS, Requires<[UseSSE2]>;
} // ExeDomain, SchedRW
let Predicates = [UseAVX] in {
@@ -4563,27 +4563,27 @@ let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedSingle in {
defm VADDSUBPS : sse3_addsub<"vaddsubps", v4f32, VR128, f128mem,
SchedWriteFAddSizes.PS.XMM, loadv4f32, 0>,
- XD, VEX_4V, WIG;
+ TB, XD, VEX, VVVV, WIG;
defm VADDSUBPSY : sse3_addsub<"vaddsubps", v8f32, VR256, f256mem,
SchedWriteFAddSizes.PS.YMM, loadv8f32, 0>,
- XD, VEX_4V, VEX_L, WIG;
+ TB, XD, VEX, VVVV, VEX_L, WIG;
}
let ExeDomain = SSEPackedDouble in {
defm VADDSUBPD : sse3_addsub<"vaddsubpd", v2f64, VR128, f128mem,
SchedWriteFAddSizes.PD.XMM, loadv2f64, 0>,
- PD, VEX_4V, WIG;
+ TB, PD, VEX, VVVV, WIG;
defm VADDSUBPDY : sse3_addsub<"vaddsubpd", v4f64, VR256, f256mem,
SchedWriteFAddSizes.PD.YMM, loadv4f64, 0>,
- PD, VEX_4V, VEX_L, WIG;
+ TB, PD, VEX, VVVV, VEX_L, WIG;
}
}
let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
let ExeDomain = SSEPackedSingle in
defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem,
- SchedWriteFAddSizes.PS.XMM, memopv4f32>, XD;
+ SchedWriteFAddSizes.PS.XMM, memopv4f32>, TB, XD;
let ExeDomain = SSEPackedDouble in
defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem,
- SchedWriteFAddSizes.PD.XMM, memopv2f64>, PD;
+ SchedWriteFAddSizes.PD.XMM, memopv2f64>, TB, PD;
}
//===---------------------------------------------------------------------===//
@@ -4635,23 +4635,23 @@ let Uses = [MXCSR], mayRaiseFPException = 1 in {
let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedSingle in {
defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
- X86fhadd, WriteFHAdd, loadv4f32, 0>, VEX_4V, WIG;
+ X86fhadd, WriteFHAdd, loadv4f32, 0>, VEX, VVVV, WIG;
defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
- X86fhsub, WriteFHAdd, loadv4f32, 0>, VEX_4V, WIG;
+ X86fhsub, WriteFHAdd, loadv4f32, 0>, VEX, VVVV, WIG;
defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
- X86fhadd, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, WIG;
+ X86fhadd, WriteFHAddY, loadv8f32, 0>, VEX, VVVV, VEX_L, WIG;
defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
- X86fhsub, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, WIG;
+ X86fhsub, WriteFHAddY, loadv8f32, 0>, VEX, VVVV, VEX_L, WIG;
}
let ExeDomain = SSEPackedDouble in {
defm VHADDPD : S3_Int<0x7C, "vhaddpd", v2f64, VR128, f128mem,
- X86fhadd, WriteFHAdd, loadv2f64, 0>, VEX_4V, WIG;
+ X86fhadd, WriteFHAdd, loadv2f64, 0>, VEX, VVVV, WIG;
defm VHSUBPD : S3_Int<0x7D, "vhsubpd", v2f64, VR128, f128mem,
- X86fhsub, WriteFHAdd, loadv2f64, 0>, VEX_4V, WIG;
+ X86fhsub, WriteFHAdd, loadv2f64, 0>, VEX, VVVV, WIG;
defm VHADDPDY : S3_Int<0x7C, "vhaddpd", v4f64, VR256, f256mem,
- X86fhadd, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, WIG;
+ X86fhadd, WriteFHAddY, loadv4f64, 0>, VEX, VVVV, VEX_L, WIG;
defm VHSUBPDY : S3_Int<0x7D, "vhsubpd", v4f64, VR256, f256mem,
- X86fhsub, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, WIG;
+ X86fhsub, WriteFHAddY, loadv4f64, 0>, VEX, VVVV, VEX_L, WIG;
}
}
@@ -4806,45 +4806,45 @@ let ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
let isCommutable = 0 in {
defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8,
VR128, load, i128mem,
- SchedWriteVarShuffle.XMM, 0>, VEX_4V, WIG;
+ SchedWriteVarShuffle.XMM, 0>, VEX, VVVV, WIG;
defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16,
v16i8, VR128, load, i128mem,
- SchedWriteVecIMul.XMM, 0>, VEX_4V, WIG;
+ SchedWriteVecIMul.XMM, 0>, VEX, VVVV, WIG;
}
defm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16,
VR128, load, i128mem,
- SchedWriteVecIMul.XMM, 0>, VEX_4V, WIG;
+ SchedWriteVecIMul.XMM, 0>, VEX, VVVV, WIG;
}
let ImmT = NoImm, Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128,
load, i128mem,
- SchedWritePHAdd.XMM, 0>, VEX_4V, WIG;
+ SchedWritePHAdd.XMM, 0>, VEX, VVVV, WIG;
defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128,
load, i128mem,
- SchedWritePHAdd.XMM, 0>, VEX_4V, WIG;
+ SchedWritePHAdd.XMM, 0>, VEX, VVVV, WIG;
defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128,
load, i128mem,
- SchedWritePHAdd.XMM, 0>, VEX_4V, WIG;
+ SchedWritePHAdd.XMM, 0>, VEX, VVVV, WIG;
defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
load, i128mem,
- SchedWritePHAdd.XMM, 0>, VEX_4V, WIG;
+ SchedWritePHAdd.XMM, 0>, VEX, VVVV, WIG;
defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb",
int_x86_ssse3_psign_b_128,
- SchedWriteVecALU.XMM, load, 0>, VEX_4V, WIG;
+ SchedWriteVecALU.XMM, load, 0>, VEX, VVVV, WIG;
defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw",
int_x86_ssse3_psign_w_128,
- SchedWriteVecALU.XMM, load, 0>, VEX_4V, WIG;
+ SchedWriteVecALU.XMM, load, 0>, VEX, VVVV, WIG;
defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd",
int_x86_ssse3_psign_d_128,
- SchedWriteVecALU.XMM, load, 0>, VEX_4V, WIG;
+ SchedWriteVecALU.XMM, load, 0>, VEX, VVVV, WIG;
defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
int_x86_ssse3_phadd_sw_128,
- SchedWritePHAdd.XMM, load, 0>, VEX_4V, WIG;
+ SchedWritePHAdd.XMM, load, 0>, VEX, VVVV, WIG;
defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
int_x86_ssse3_phsub_sw_128,
- SchedWritePHAdd.XMM, load, 0>, VEX_4V, WIG;
+ SchedWritePHAdd.XMM, load, 0>, VEX, VVVV, WIG;
}
}
@@ -4852,42 +4852,42 @@ let ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
let isCommutable = 0 in {
defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8,
VR256, load, i256mem,
- SchedWriteVarShuffle.YMM, 0>, VEX_4V, VEX_L, WIG;
+ SchedWriteVarShuffle.YMM, 0>, VEX, VVVV, VEX_L, WIG;
defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16,
v32i8, VR256, load, i256mem,
- SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, WIG;
+ SchedWriteVecIMul.YMM, 0>, VEX, VVVV, VEX_L, WIG;
}
defm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16,
VR256, load, i256mem,
- SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, WIG;
+ SchedWriteVecIMul.YMM, 0>, VEX, VVVV, VEX_L, WIG;
}
let ImmT = NoImm, Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16,
VR256, load, i256mem,
- SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, WIG;
+ SchedWritePHAdd.YMM, 0>, VEX, VVVV, VEX_L, WIG;
defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256,
load, i256mem,
- SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, WIG;
+ SchedWritePHAdd.YMM, 0>, VEX, VVVV, VEX_L, WIG;
defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16,
VR256, load, i256mem,
- SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, WIG;
+ SchedWritePHAdd.YMM, 0>, VEX, VVVV, VEX_L, WIG;
defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
load, i256mem,
- SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, WIG;
+ SchedWritePHAdd.YMM, 0>, VEX, VVVV, VEX_L, WIG;
defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
- SchedWriteVecALU.YMM>, VEX_4V, VEX_L, WIG;
+ SchedWriteVecALU.YMM>, VEX, VVVV, VEX_L, WIG;
defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w,
- SchedWriteVecALU.YMM>, VEX_4V, VEX_L, WIG;
+ SchedWriteVecALU.YMM>, VEX, VVVV, VEX_L, WIG;
defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d,
- SchedWriteVecALU.YMM>, VEX_4V, VEX_L, WIG;
+ SchedWriteVecALU.YMM>, VEX, VVVV, VEX_L, WIG;
defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
int_x86_avx2_phadd_sw,
- SchedWritePHAdd.YMM>, VEX_4V, VEX_L, WIG;
+ SchedWritePHAdd.YMM>, VEX, VVVV, VEX_L, WIG;
defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw",
int_x86_avx2_phsub_sw,
- SchedWritePHAdd.YMM>, VEX_4V, VEX_L, WIG;
+ SchedWritePHAdd.YMM>, VEX, VVVV, VEX_L, WIG;
}
}
@@ -4956,10 +4956,10 @@ multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, load, i128mem,
- SchedWriteShuffle.XMM, 0>, VEX_4V, WIG;
+ SchedWriteShuffle.XMM, 0>, VEX, VVVV, WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, load, i256mem,
- SchedWriteShuffle.YMM, 0>, VEX_4V, VEX_L, WIG;
+ SchedWriteShuffle.YMM, 0>, VEX, VVVV, VEX_L, WIG;
let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memop, i128mem,
SchedWriteShuffle.XMM>;
@@ -5367,7 +5367,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
}
let Predicates = [HasAVX, NoBWI] in {
- defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V, WIG;
+ defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX, VVVV, WIG;
def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3),
(VPINSRBrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
GR8:$src2, sub_8bit), timm:$src3)>;
@@ -5398,7 +5398,7 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
}
let Predicates = [HasAVX, NoDQI] in
- defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
+ defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX, VVVV;
let Constraints = "$src1 = $dst" in
defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
@@ -5424,7 +5424,7 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
}
let Predicates = [HasAVX, NoDQI] in
- defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, REX_W;
+ defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX, VVVV, REX_W;
let Constraints = "$src1 = $dst" in
defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
@@ -5459,7 +5459,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
let ExeDomain = SSEPackedSingle in {
let Predicates = [UseAVX] in
defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
let Constraints = "$src1 = $dst" in
defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1>;
}
@@ -5638,9 +5638,9 @@ let Predicates = [HasAVX, NoVLX] in {
let Predicates = [UseAVX] in {
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl,
v4f32, v2f64, X86RndScales, 0>,
- VEX_4V, VEX_LIG, WIG, SIMD_EXC;
+ VEX, VVVV, VEX_LIG, WIG, SIMD_EXC;
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>,
- VEX_4V, VEX_LIG, WIG, SIMD_EXC;
+ VEX, VVVV, VEX_LIG, WIG, SIMD_EXC;
}
let Predicates = [UseAVX] in {
@@ -5760,33 +5760,33 @@ let Defs = [EFLAGS], Predicates = [HasPOPCNT] in {
def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"popcnt{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>,
- Sched<[WritePOPCNT]>, OpSize16, XS;
+ Sched<[WritePOPCNT]>, OpSize16, TB, XS;
def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"popcnt{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, (ctpop (loadi16 addr:$src))),
(implicit EFLAGS)]>,
- Sched<[WritePOPCNT.Folded]>, OpSize16, XS;
+ Sched<[WritePOPCNT.Folded]>, OpSize16, TB, XS;
def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"popcnt{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>,
- Sched<[WritePOPCNT]>, OpSize32, XS;
+ Sched<[WritePOPCNT]>, OpSize32, TB, XS;
def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"popcnt{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (ctpop (loadi32 addr:$src))),
(implicit EFLAGS)]>,
- Sched<[WritePOPCNT.Folded]>, OpSize32, XS;
+ Sched<[WritePOPCNT.Folded]>, OpSize32, TB, XS;
def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"popcnt{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>,
- Sched<[WritePOPCNT]>, XS;
+ Sched<[WritePOPCNT]>, TB, XS;
def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"popcnt{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (ctpop (loadi64 addr:$src))),
(implicit EFLAGS)]>,
- Sched<[WritePOPCNT.Folded]>, XS;
+ Sched<[WritePOPCNT.Folded]>, TB, XS;
}
// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
@@ -5842,65 +5842,65 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [HasAVX, NoVLX] in {
defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
defm VPMULDQ : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v2i64, VR128,
load, i128mem, SchedWriteVecIMul.XMM, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
}
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
}
let Predicates = [HasAVX2, NoVLX] in {
defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
defm VPMULDQY : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v4i64, VR256,
load, i256mem, SchedWriteVecIMul.YMM, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -5927,20 +5927,20 @@ let Constraints = "$src1 = $dst" in {
let Predicates = [HasAVX, NoVLX] in
defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
load, i128mem, SchedWritePMULLD.XMM, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
let Predicates = [HasAVX] in
defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
let Predicates = [HasAVX2, NoVLX] in
defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
load, i256mem, SchedWritePMULLD.YMM, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
let Predicates = [HasAVX2] in
defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
let Constraints = "$src1 = $dst" in {
defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
@@ -6088,22 +6088,22 @@ let Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
VR128, load, i128mem, 0,
- SchedWriteMPSAD.XMM>, VEX_4V, WIG;
+ SchedWriteMPSAD.XMM>, VEX, VVVV, WIG;
}
let Uses = [MXCSR], mayRaiseFPException = 1 in {
let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
VR128, load, f128mem, 0,
- SchedWriteDPPS.XMM>, VEX_4V, WIG;
+ SchedWriteDPPS.XMM>, VEX, VVVV, WIG;
let ExeDomain = SSEPackedDouble in
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
VR128, load, f128mem, 0,
- SchedWriteDPPD.XMM>, VEX_4V, WIG;
+ SchedWriteDPPD.XMM>, VEX, VVVV, WIG;
let ExeDomain = SSEPackedSingle in
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
VR256, load, i256mem, 0,
- SchedWriteDPPS.YMM>, VEX_4V, VEX_L, WIG;
+ SchedWriteDPPS.YMM>, VEX, VVVV, VEX_L, WIG;
}
}
@@ -6111,7 +6111,7 @@ let Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
VR256, load, i256mem, 0,
- SchedWriteMPSAD.YMM>, VEX_4V, VEX_L, WIG;
+ SchedWriteMPSAD.YMM>, VEX, VVVV, VEX_L, WIG;
}
}
@@ -6170,30 +6170,30 @@ let Predicates = [HasAVX] in {
defm VBLENDPS : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v4f32,
VR128, load, f128mem, 0, SSEPackedSingle,
SchedWriteFBlend.XMM, BlendCommuteImm4>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32,
VR256, load, f256mem, 0, SSEPackedSingle,
SchedWriteFBlend.YMM, BlendCommuteImm8>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64,
VR128, load, f128mem, 0, SSEPackedDouble,
SchedWriteFBlend.XMM, BlendCommuteImm2>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64,
VR256, load, f256mem, 0, SSEPackedDouble,
SchedWriteFBlend.YMM, BlendCommuteImm4>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16,
VR128, load, i128mem, 0, SSEPackedInt,
SchedWriteBlend.XMM, BlendCommuteImm8>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
}
let Predicates = [HasAVX2] in {
defm VPBLENDWY : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v16i16,
VR256, load, i256mem, 0, SSEPackedInt,
SchedWriteBlend.YMM, BlendCommuteImm8>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
}
// Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw.
@@ -6290,7 +6290,7 @@ multiclass SS41I_quaternary_avx<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (VT (OpNode RC:$src3, RC:$src2, RC:$src1)))],
- SSEPackedInt>, TAPD, VEX_4V,
+ SSEPackedInt>, TA, PD, VEX, VVVV,
Sched<[sched]>;
def rm : Ii8Reg<opc, MRMSrcMem, (outs RC:$dst),
@@ -6299,7 +6299,7 @@ multiclass SS41I_quaternary_avx<bits<8> opc, string OpcodeStr, RegisterClass RC,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src3, (mem_frag addr:$src2),
- RC:$src1))], SSEPackedInt>, TAPD, VEX_4V,
+ RC:$src1))], SSEPackedInt>, TA, PD, VEX, VVVV,
Sched<[sched.Folded, sched.ReadAfterFold,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
@@ -6564,12 +6564,12 @@ multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [HasAVX] in
defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, WIG;
+ VEX, VVVV, WIG;
let Predicates = [HasAVX2] in
defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, WIG;
+ VEX, VVVV, VEX_L, WIG;
let Constraints = "$src1 = $dst" in
defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
@@ -6706,37 +6706,37 @@ let Constraints = "$src1 = $dst" in {
// FIXME: Is there a better scheduler class for SHA than WriteVecIMul?
multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
- X86FoldableSchedWrite sched, bit UsesXMM0 = 0> {
- def rr : I<Opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !if(UsesXMM0,
- !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
- [!if(UsesXMM0,
- (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)),
- (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>,
- T8PS, Sched<[sched]>;
-
- def rm : I<Opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !if(UsesXMM0,
- !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
- [!if(UsesXMM0,
- (set VR128:$dst, (IntId VR128:$src1,
- (memop addr:$src2), XMM0)),
- (set VR128:$dst, (IntId VR128:$src1,
- (memop addr:$src2))))]>, T8PS,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ X86FoldableSchedWrite sched, string Suffix = "", bit UsesXMM0 = 0> {
+ def rr#Suffix : I<Opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(UsesXMM0,
+ !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
+ [!if(UsesXMM0,
+ (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)),
+ (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>,
+ T8, Sched<[sched]>;
+
+ def rm#Suffix : I<Opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(UsesXMM0,
+ !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
+ [!if(UsesXMM0,
+ (set VR128:$dst, (IntId VR128:$src1,
+ (memop addr:$src2), XMM0)),
+ (set VR128:$dst, (IntId VR128:$src1,
+ (memop addr:$src2))))]>, T8,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
-let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
+let Constraints = "$src1 = $dst", Predicates = [HasSHA, NoEGPR] in {
def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, u8imm:$src3),
"sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
(int_x86_sha1rnds4 VR128:$src1, VR128:$src2,
- (i8 timm:$src3)))]>, TAPS,
+ (i8 timm:$src3)))]>, TA,
Sched<[SchedWriteVecIMul.XMM]>;
def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
@@ -6744,7 +6744,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
[(set VR128:$dst,
(int_x86_sha1rnds4 VR128:$src1,
(memop addr:$src2),
- (i8 timm:$src3)))]>, TAPS,
+ (i8 timm:$src3)))]>, TA,
Sched<[SchedWriteVecIMul.XMM.Folded,
SchedWriteVecIMul.XMM.ReadAfterFold]>;
@@ -6757,7 +6757,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
let Uses=[XMM0] in
defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2,
- SchedWriteVecIMul.XMM, 1>;
+ SchedWriteVecIMul.XMM, "", 1>;
defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1,
SchedWriteVecIMul.XMM>;
@@ -6765,11 +6765,47 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
SchedWriteVecIMul.XMM>;
}
-// Aliases with explicit %xmm0
-def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
- (SHA256RNDS2rr VR128:$dst, VR128:$src2), 0>;
-def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
- (SHA256RNDS2rm VR128:$dst, i128mem:$src2), 0>;
+let Constraints = "$src1 = $dst", Predicates = [HasSHA, HasEGPR, In64BitMode] in {
+ def SHA1RNDS4rri_EVEX: Ii8<0xD4, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, u8imm:$src3),
+ "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (int_x86_sha1rnds4 VR128:$src1, VR128:$src2,
+ (i8 timm:$src3)))]>,
+ EVEX, NoCD8, T_MAP4, Sched<[SchedWriteVecIMul.XMM]>;
+ def SHA1RNDS4rmi_EVEX: Ii8<0xD4, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
+ "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (int_x86_sha1rnds4 VR128:$src1,
+ (memop addr:$src2),
+ (i8 timm:$src3)))]>,
+ EVEX, NoCD8, T_MAP4,
+ Sched<[SchedWriteVecIMul.XMM.Folded,
+ SchedWriteVecIMul.XMM.ReadAfterFold]>;
+
+ defm SHA1NEXTE : SHAI_binop<0xD8, "sha1nexte", int_x86_sha1nexte,
+ SchedWriteVecIMul.XMM, "_EVEX">,
+ EVEX, NoCD8, T_MAP4;
+ defm SHA1MSG1 : SHAI_binop<0xD9, "sha1msg1", int_x86_sha1msg1,
+ SchedWriteVecIMul.XMM, "_EVEX">,
+ EVEX, NoCD8, T_MAP4;
+ defm SHA1MSG2 : SHAI_binop<0xDA, "sha1msg2", int_x86_sha1msg2,
+ SchedWriteVecIMul.XMM, "_EVEX">,
+ EVEX, NoCD8, T_MAP4;
+
+ let Uses=[XMM0] in
+ defm SHA256RNDS2 : SHAI_binop<0xDB, "sha256rnds2", int_x86_sha256rnds2,
+ SchedWriteVecIMul.XMM, "_EVEX", 1>,
+ EVEX, NoCD8, T_MAP4;
+
+ defm SHA256MSG1 : SHAI_binop<0xDC, "sha256msg1", int_x86_sha256msg1,
+ SchedWriteVecIMul.XMM, "_EVEX">,
+ EVEX, NoCD8, T_MAP4;
+ defm SHA256MSG2 : SHAI_binop<0xDD, "sha256msg2", int_x86_sha256msg2,
+ SchedWriteVecIMul.XMM, "_EVEX">,
+ EVEX, NoCD8, T_MAP4;
+}
//===----------------------------------------------------------------------===//
// AES-NI Instructions
@@ -6796,28 +6832,28 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
// Perform One Round of an AES Encryption/Decryption Flow
let Predicates = [HasAVX, NoVLX_Or_NoVAES, HasAES] in {
defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc",
- int_x86_aesni_aesenc, load>, VEX_4V, WIG;
+ int_x86_aesni_aesenc, load>, VEX, VVVV, WIG;
defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast",
- int_x86_aesni_aesenclast, load>, VEX_4V, WIG;
+ int_x86_aesni_aesenclast, load>, VEX, VVVV, WIG;
defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec",
- int_x86_aesni_aesdec, load>, VEX_4V, WIG;
+ int_x86_aesni_aesdec, load>, VEX, VVVV, WIG;
defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast",
- int_x86_aesni_aesdeclast, load>, VEX_4V, WIG;
+ int_x86_aesni_aesdeclast, load>, VEX, VVVV, WIG;
}
let Predicates = [NoVLX, HasVAES] in {
defm VAESENCY : AESI_binop_rm_int<0xDC, "vaesenc",
int_x86_aesni_aesenc_256, load, 0, VR256,
- i256mem>, VEX_4V, VEX_L, WIG;
+ i256mem>, VEX, VVVV, VEX_L, WIG;
defm VAESENCLASTY : AESI_binop_rm_int<0xDD, "vaesenclast",
int_x86_aesni_aesenclast_256, load, 0, VR256,
- i256mem>, VEX_4V, VEX_L, WIG;
+ i256mem>, VEX, VVVV, VEX_L, WIG;
defm VAESDECY : AESI_binop_rm_int<0xDE, "vaesdec",
int_x86_aesni_aesdec_256, load, 0, VR256,
- i256mem>, VEX_4V, VEX_L, WIG;
+ i256mem>, VEX, VVVV, VEX_L, WIG;
defm VAESDECLASTY : AESI_binop_rm_int<0xDF, "vaesdeclast",
int_x86_aesni_aesdeclast_256, load, 0, VR256,
- i256mem>, VEX_4V, VEX_L, WIG;
+ i256mem>, VEX, VVVV, VEX_L, WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -6958,11 +6994,11 @@ multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp,
let Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in
defm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, load,
- int_x86_pclmulqdq>, VEX_4V, WIG;
+ int_x86_pclmulqdq>, VEX, VVVV, WIG;
let Predicates = [NoVLX, HasVPCLMULQDQ] in
defm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, load,
- int_x86_pclmulqdq_256>, VEX_4V, VEX_L, WIG;
+ int_x86_pclmulqdq_256>, VEX, VVVV, VEX_L, WIG;
multiclass vpclmulqdq_aliases_impl<string InstStr, RegisterClass RC,
X86MemOperand MemOp, string Hi, string Lo> {
@@ -6999,26 +7035,26 @@ def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst),
"extrq\t{$idx, $len, $src|$src, $len, $idx}",
[(set VR128:$dst, (X86extrqi VR128:$src, timm:$len,
timm:$idx))]>,
- PD, Sched<[SchedWriteVecALU.XMM]>;
+ TB, PD, Sched<[SchedWriteVecALU.XMM]>;
def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$mask),
"extrq\t{$mask, $src|$src, $mask}",
[(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src,
VR128:$mask))]>,
- PD, Sched<[SchedWriteVecALU.XMM]>;
+ TB, PD, Sched<[SchedWriteVecALU.XMM]>;
def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx),
"insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}",
[(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2,
timm:$len, timm:$idx))]>,
- XD, Sched<[SchedWriteVecALU.XMM]>;
+ TB, XD, Sched<[SchedWriteVecALU.XMM]>;
def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$mask),
"insertq\t{$mask, $src|$src, $mask}",
[(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src,
VR128:$mask))]>,
- XD, Sched<[SchedWriteVecALU.XMM]>;
+ TB, XD, Sched<[SchedWriteVecALU.XMM]>;
}
} // ExeDomain = SSEPackedInt
@@ -7026,10 +7062,10 @@ def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
let AddedComplexity = 400 in { // Prefer non-temporal versions
let hasSideEffects = 0, mayStore = 1, SchedRW = [SchedWriteFMoveLSNT.Scl.MR] in {
def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
- "movntss\t{$src, $dst|$dst, $src}", []>, XS;
+ "movntss\t{$src, $dst|$dst, $src}", []>, TB, XS;
def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movntsd\t{$src, $dst|$dst, $src}", []>, XD;
+ "movntsd\t{$src, $dst|$dst, $src}", []>, TB, XD;
} // SchedRW
def : Pat<(nontemporalstore FR32:$src, addr:$dst),
@@ -7133,11 +7169,11 @@ let isCommutable = 1 in
def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, u8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
- VEX_4V, VEX_L, Sched<[WriteFShuffle256]>;
+ VEX, VVVV, VEX_L, Sched<[WriteFShuffle256]>;
def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, u8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
- VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
+ VEX, VVVV, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
}
// Immediate transform to help with commuting.
@@ -7176,12 +7212,12 @@ let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR128:$src2, u8imm:$src3),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, Sched<[WriteFShuffle256]>, VEX_4V, VEX_L;
+ []>, Sched<[WriteFShuffle256]>, VEX, VVVV, VEX_L;
let mayLoad = 1 in
def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f128mem:$src2, u8imm:$src3),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
+ []>, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>, VEX, VVVV, VEX_L;
}
// To create a 256-bit all ones value, we should produce VCMPTRUEPS
@@ -7279,22 +7315,22 @@ multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
(ins VR128:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
- VEX_4V, Sched<[schedX.RM]>;
+ VEX, VVVV, Sched<[schedX.RM]>;
def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
- VEX_4V, VEX_L, Sched<[schedY.RM]>;
+ VEX, VVVV, VEX_L, Sched<[schedY.RM]>;
def mr : AVX8I<opc_mr, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>,
- VEX_4V, Sched<[schedX.MR]>;
+ VEX, VVVV, Sched<[schedX.MR]>;
def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
- VEX_4V, VEX_L, Sched<[schedY.MR]>;
+ VEX, VVVV, VEX_L, Sched<[schedY.MR]>;
}
let ExeDomain = SSEPackedSingle in
@@ -7325,14 +7361,14 @@ multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR128:$dst, (v4i32 (OpNode VR128:$src1,
VR128:$src2, VR128:$src3)))]>,
- VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
+ VEX, VVVV, Sched<[SchedWriteVecIMul.XMM]>;
def rm : AVX8I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i128mem:$src3),
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2,
(loadv4i32 addr:$src3))))]>,
- VEX_4V, Sched<[SchedWriteVecIMul.XMM.Folded,
+ VEX, VVVV, Sched<[SchedWriteVecIMul.XMM.Folded,
SchedWriteVecIMul.XMM.ReadAfterFold,
SchedWriteVecIMul.XMM.ReadAfterFold]>;
@@ -7342,14 +7378,14 @@ multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR256:$dst, (v8i32 (OpNode VR256:$src1,
VR256:$src2, VR256:$src3)))]>,
- VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
+ VEX, VVVV, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i256mem:$src3),
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2,
(loadv8i32 addr:$src3))))]>,
- VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM.Folded,
+ VEX, VVVV, VEX_L, Sched<[SchedWriteVecIMul.YMM.Folded,
SchedWriteVecIMul.YMM.ReadAfterFold,
SchedWriteVecIMul.YMM.ReadAfterFold]>;
}
@@ -7388,13 +7424,13 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX_4V,
+ [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX, VVVV,
Sched<[varsched]>;
def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop_i:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (f_vt (X86VPermilpv RC:$src1,
- (i_vt (load addr:$src2)))))]>, VEX_4V,
+ (i_vt (load addr:$src2)))))]>, VEX, VVVV,
Sched<[varsched.Folded, sched.ReadAfterFold]>;
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
@@ -7438,12 +7474,12 @@ let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in {
// Zero All YMM registers
def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
- [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L,
+ [(int_x86_avx_vzeroall)]>, TB, VEX, VEX_L,
Requires<[HasAVX]>, WIG;
// Zero Upper bits of YMM registers
def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
- [(int_x86_avx_vzeroupper)]>, PS, VEX,
+ [(int_x86_avx_vzeroupper)]>, TB, VEX,
Requires<[HasAVX]>, WIG;
} // Defs
} // SchedRW
@@ -7457,11 +7493,11 @@ multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop,
def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}",
[(set RC:$dst, (X86any_cvtph2ps VR128:$src))]>,
- T8PD, VEX, Sched<[sched]>;
+ T8, PD, VEX, Sched<[sched]>;
let hasSideEffects = 0, mayLoad = 1 in
def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}",
- []>, T8PD, VEX, Sched<[sched.Folded]>;
+ []>, T8, PD, VEX, Sched<[sched.Folded]>;
}
multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
@@ -7470,12 +7506,12 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
(ins RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (X86any_cvtps2ph RC:$src1, timm:$src2))]>,
- TAPD, VEX, Sched<[RR]>;
+ TA, PD, VEX, Sched<[RR]>;
let hasSideEffects = 0, mayStore = 1 in
def mr : Ii8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- TAPD, VEX, Sched<[MR]>;
+ TA, PD, VEX, Sched<[MR]>;
}
let Predicates = [HasF16C, NoVLX] in {
@@ -7522,14 +7558,14 @@ multiclass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>,
- Sched<[sched]>, VEX_4V;
+ Sched<[sched]>, VEX, VVVV;
def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
(OpVT (OpNode RC:$src1, (load addr:$src2), timm:$src3)))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, VEX, VVVV;
// Pattern to commute if load is in first source.
def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, timm:$src3)),
@@ -7779,7 +7815,7 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>,
- Sched<[Sched]>, VEX_4V, VEX_L;
+ Sched<[Sched]>, VEX, VVVV, VEX_L;
def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, memOp:$src2),
!strconcat(OpcodeStr,
@@ -7787,7 +7823,7 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr,
[(set VR256:$dst,
(OpVT (X86VPermv VR256:$src1,
(load addr:$src2))))]>,
- Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX_4V, VEX_L;
+ Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VVVV, VEX_L;
}
}
@@ -7830,11 +7866,11 @@ let isCommutable = 1 in
def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, u8imm:$src3),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
- Sched<[WriteShuffle256]>, VEX_4V, VEX_L;
+ Sched<[WriteShuffle256]>, VEX, VVVV, VEX_L;
def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, u8imm:$src3),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
- Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
+ Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX, VVVV, VEX_L;
let Predicates = [HasAVX2] in {
defm : vperm2x128_lowering<"VPERM2I128", v4i64, loadv4i64>;
@@ -7852,12 +7888,12 @@ let hasSideEffects = 0 in {
def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR128:$src2, u8imm:$src3),
"vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, Sched<[WriteShuffle256]>, VEX_4V, VEX_L;
+ []>, Sched<[WriteShuffle256]>, VEX, VVVV, VEX_L;
let mayLoad = 1 in
def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i128mem:$src2, u8imm:$src3),
"vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
+ []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX, VVVV, VEX_L;
}
let Predicates = [HasAVX2, NoVLX] in {
@@ -7903,22 +7939,22 @@ multiclass avx2_pmovmask<string OpcodeStr,
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>,
- VEX_4V, Sched<[schedX.RM]>;
+ VEX, VVVV, Sched<[schedX.RM]>;
def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
- VEX_4V, VEX_L, Sched<[schedY.RM]>;
+ VEX, VVVV, VEX_L, Sched<[schedY.RM]>;
def mr : AVX28I<0x8e, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>,
- VEX_4V, Sched<[schedX.MR]>;
+ VEX, VVVV, Sched<[schedX.MR]>;
def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
(ins i256mem:$dst, VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
- VEX_4V, VEX_L, Sched<[schedY.MR]>;
+ VEX, VVVV, VEX_L, Sched<[schedY.MR]>;
}
defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
@@ -7976,28 +8012,28 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>,
- VEX_4V, Sched<[SchedWriteVarVecShift.XMM]>;
+ VEX, VVVV, Sched<[SchedWriteVarVecShift.XMM]>;
def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1,
(vt128 (load addr:$src2)))))]>,
- VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded,
+ VEX, VVVV, Sched<[SchedWriteVarVecShift.XMM.Folded,
SchedWriteVarVecShift.XMM.ReadAfterFold]>;
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>,
- VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM]>;
+ VEX, VVVV, VEX_L, Sched<[SchedWriteVarVecShift.YMM]>;
def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1,
(vt256 (load addr:$src2)))))]>,
- VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded,
+ VEX, VVVV, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded,
SchedWriteVarVecShift.YMM.ReadAfterFold]>;
}
@@ -8073,12 +8109,12 @@ multiclass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT,
let isCommutable = 1 in
def rr : PDI<0xCF, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), "",
[(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, RC:$src2)))]>,
- Sched<[sched]>, T8PD;
+ Sched<[sched]>, T8;
def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "",
[(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1,
(MemOpFrag addr:$src2))))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>, T8PD;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, T8;
}
}
@@ -8110,10 +8146,10 @@ multiclass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> {
let Predicates = [HasGFNI, HasAVX, NoVLX] in {
defm V#NAME : GF2P8AFFINE_rmi<Op, "v"#OpStr, v16i8, OpNode, VR128,
load, i128mem, SchedWriteVecIMul.XMM>,
- VEX_4V, REX_W;
+ VEX, VVVV, REX_W;
defm V#NAME#Y : GF2P8AFFINE_rmi<Op, "v"#OpStr, v32i8, OpNode, VR256,
load, i256mem, SchedWriteVecIMul.YMM>,
- VEX_4V, VEX_L, REX_W;
+ VEX, VVVV, VEX_L, REX_W;
}
}
@@ -8124,16 +8160,16 @@ defm GF2P8MULB : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memop,
i128mem, SchedWriteVecALU.XMM, 1>;
let Predicates = [HasGFNI, HasAVX, NoVLX] in {
defm VGF2P8MULB : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, load,
- i128mem, SchedWriteVecALU.XMM>, VEX_4V;
+ i128mem, SchedWriteVecALU.XMM>, VEX, VVVV;
defm VGF2P8MULBY : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, load,
- i256mem, SchedWriteVecALU.YMM>, VEX_4V, VEX_L;
+ i256mem, SchedWriteVecALU.YMM>, VEX, VVVV, VEX_L;
}
// GF2P8AFFINEINVQB, GF2P8AFFINEQB
let isCommutable = 0 in {
defm GF2P8AFFINEINVQB : GF2P8AFFINE_common<0xCF, "gf2p8affineinvqb",
- X86GF2P8affineinvqb>, TAPD;
+ X86GF2P8affineinvqb>, TA, PD;
defm GF2P8AFFINEQB : GF2P8AFFINE_common<0xCE, "gf2p8affineqb",
- X86GF2P8affineqb>, TAPD;
+ X86GF2P8affineqb>, TA, PD;
}
// AVX-IFMA
@@ -8147,28 +8183,28 @@ multiclass avx_ifma_rm<bits<8> opc, string OpcodeStr, SDNode OpNode> {
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR128:$dst, (v2i64 (OpNode VR128:$src2,
VR128:$src3, VR128:$src1)))]>,
- VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
+ VEX, VVVV, Sched<[SchedWriteVecIMul.XMM]>;
}
def rm : AVX8I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i128mem:$src3),
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR128:$dst, (v2i64 (OpNode VR128:$src2,
(loadv2i64 addr:$src3), VR128:$src1)))]>,
- VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
+ VEX, VVVV, Sched<[SchedWriteVecIMul.XMM]>;
let isCommutable = 1 in {
def Yrr : AVX8I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR256:$dst, (v4i64 (OpNode VR256:$src2,
VR256:$src3, VR256:$src1)))]>,
- VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
+ VEX, VVVV, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
}
def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i256mem:$src3),
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR256:$dst, (v4i64 (OpNode VR256:$src2,
(loadv4i64 addr:$src3), VR256:$src1)))]>,
- VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
+ VEX, VVVV, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
}
defm VPMADD52HUQ : avx_ifma_rm<0xb5, "vpmadd52huq", x86vpmadd52h>, REX_W, ExplicitVEXPrefix;
@@ -8186,52 +8222,52 @@ multiclass avx_dotprod_rm<bits<8> Opc, string OpcodeStr, ValueType OpVT,
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
- VEX_4V, Sched<[Sched]>;
+ VEX, VVVV, Sched<[Sched]>;
def rm : I<Opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, X86memop:$src3),
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
(MemOpFrag addr:$src3))))]>,
- VEX_4V, Sched<[Sched.Folded, Sched.ReadAfterFold]>;
+ VEX, VVVV, Sched<[Sched.Folded, Sched.ReadAfterFold]>;
}
let Predicates = [HasAVXVNNIINT8] in {
defm VPDPBSSD : avx_dotprod_rm<0x50,"vpdpbssd", v4i32, VR128, loadv4i32,
i128mem, X86vpdpbssd, SchedWriteVecIMul.XMM,
- 1>, T8XD;
+ 1>, T8, XD;
defm VPDPBSSDY : avx_dotprod_rm<0x50,"vpdpbssd", v8i32, VR256, loadv8i32,
i256mem, X86vpdpbssd, SchedWriteVecIMul.YMM,
- 1>, VEX_L, T8XD;
+ 1>, VEX_L, T8, XD;
defm VPDPBUUD : avx_dotprod_rm<0x50,"vpdpbuud", v4i32, VR128, loadv4i32,
i128mem, X86vpdpbuud, SchedWriteVecIMul.XMM,
- 1>, T8PS;
+ 1>, T8;
defm VPDPBUUDY : avx_dotprod_rm<0x50,"vpdpbuud", v8i32, VR256, loadv8i32,
i256mem, X86vpdpbuud, SchedWriteVecIMul.YMM,
- 1>, VEX_L, T8PS;
+ 1>, VEX_L, T8;
defm VPDPBSSDS : avx_dotprod_rm<0x51,"vpdpbssds", v4i32, VR128, loadv4i32,
i128mem, X86vpdpbssds, SchedWriteVecIMul.XMM,
- 1>, T8XD;
+ 1>, T8, XD;
defm VPDPBSSDSY : avx_dotprod_rm<0x51,"vpdpbssds", v8i32, VR256, loadv8i32,
i256mem, X86vpdpbssds, SchedWriteVecIMul.YMM,
- 1>, VEX_L, T8XD;
+ 1>, VEX_L, T8, XD;
defm VPDPBUUDS : avx_dotprod_rm<0x51,"vpdpbuuds", v4i32, VR128, loadv4i32,
i128mem, X86vpdpbuuds, SchedWriteVecIMul.XMM,
- 1>, T8PS;
+ 1>, T8;
defm VPDPBUUDSY : avx_dotprod_rm<0x51,"vpdpbuuds", v8i32, VR256, loadv8i32,
i256mem, X86vpdpbuuds, SchedWriteVecIMul.YMM,
- 1>, VEX_L, T8PS;
+ 1>, VEX_L, T8;
defm VPDPBSUD : avx_dotprod_rm<0x50,"vpdpbsud", v4i32, VR128, loadv4i32,
i128mem, X86vpdpbsud, SchedWriteVecIMul.XMM,
- 0>, T8XS;
+ 0>, T8, XS;
defm VPDPBSUDY : avx_dotprod_rm<0x50,"vpdpbsud", v8i32, VR256, loadv8i32,
i256mem, X86vpdpbsud, SchedWriteVecIMul.YMM,
- 0>, VEX_L, T8XS;
+ 0>, VEX_L, T8, XS;
defm VPDPBSUDS : avx_dotprod_rm<0x51,"vpdpbsuds", v4i32, VR128, loadv4i32,
i128mem, X86vpdpbsuds, SchedWriteVecIMul.XMM,
- 0>, T8XS;
+ 0>, T8, XS;
defm VPDPBSUDSY : avx_dotprod_rm<0x51,"vpdpbsuds", v8i32, VR256, loadv8i32,
i256mem, X86vpdpbsuds, SchedWriteVecIMul.YMM,
- 0>, VEX_L, T8XS;
+ 0>, VEX_L, T8, XS;
}
// AVX-NE-CONVERT
@@ -8270,18 +8306,18 @@ multiclass VCVTNEPS2BF16_BASE {
let Predicates = [HasAVXNECONVERT] in {
defm VBCSTNEBF162PS : AVX_NE_CONVERT_BASE<0xb1, "vbcstnebf162ps", f16mem,
- f16mem>, T8XS;
+ f16mem>, T8, XS;
defm VBCSTNESH2PS : AVX_NE_CONVERT_BASE<0xb1, "vbcstnesh2ps", f16mem, f16mem>,
- T8PD;
+ T8, PD;
defm VCVTNEEBF162PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneebf162ps", f128mem,
- f256mem>, T8XS;
+ f256mem>, T8, XS;
defm VCVTNEEPH2PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneeph2ps", f128mem,
- f256mem>, T8PD;
+ f256mem>, T8, PD;
defm VCVTNEOBF162PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneobf162ps", f128mem,
- f256mem>, T8XD;
+ f256mem>, T8, XD;
defm VCVTNEOPH2PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneoph2ps", f128mem,
- f256mem>, T8PS;
- defm VCVTNEPS2BF16 : VCVTNEPS2BF16_BASE, VEX, T8XS, ExplicitVEXPrefix;
+ f256mem>, T8;
+ defm VCVTNEPS2BF16 : VCVTNEPS2BF16_BASE, VEX, T8, XS, ExplicitVEXPrefix;
def : Pat<(v8bf16 (X86vfpround (v8f32 VR256:$src))),
(VCVTNEPS2BF16Yrr VR256:$src)>;
@@ -8301,19 +8337,19 @@ def VSHA512MSG1rr : I<0xcc, MRMSrcReg, (outs VR256:$dst),
"vsha512msg1\t{$src2, $dst|$dst, $src2}",
[(set VR256:$dst,
(int_x86_vsha512msg1 VR256:$src1, VR128:$src2))]>, VEX_L,
- VEX, T8XD, Sched<[WriteVecIMul]>;
+ VEX, T8, XD, Sched<[WriteVecIMul]>;
def VSHA512MSG2rr : I<0xcd, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2),
"vsha512msg2\t{$src2, $dst|$dst, $src2}",
[(set VR256:$dst,
(int_x86_vsha512msg2 VR256:$src1, VR256:$src2))]>, VEX_L,
- VEX, T8XD, Sched<[WriteVecIMul]>;
+ VEX, T8, XD, Sched<[WriteVecIMul]>;
def VSHA512RNDS2rr : I<0xcb, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR128:$src3),
"vsha512rnds2\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR256:$dst,
(int_x86_vsha512rnds2 VR256:$src1, VR256:$src2, VR128:$src3))]>,
- VEX_L, VEX_4V, T8XD, Sched<[WriteVecIMul]>;
+ VEX_L, VEX, VVVV, T8, XD, Sched<[WriteVecIMul]>;
}
// FIXME: Is there a better scheduler class for SM3 than WriteVecIMul?
@@ -8325,14 +8361,14 @@ let Predicates = [HasSM3], Constraints = "$src1 = $dst" in {
[(set VR128:$dst,
(!cast<Intrinsic>("int_x86_"#OpStr) VR128:$src1,
VR128:$src2, VR128:$src3))]>,
- Sched<[WriteVecIMul]>, VEX_4V;
+ Sched<[WriteVecIMul]>, VEX, VVVV;
def rm : I<0xda, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i128mem:$src3),
!strconcat(OpStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR128:$dst,
(!cast<Intrinsic>("int_x86_"#OpStr) VR128:$src1,
VR128:$src2, (loadv4i32 addr:$src3)))]>,
- Sched<[WriteVecIMul]>, VEX_4V;
+ Sched<[WriteVecIMul]>, VEX, VVVV;
}
multiclass VSM3RNDS2_Base {
@@ -8353,9 +8389,9 @@ let Predicates = [HasSM3], Constraints = "$src1 = $dst" in {
}
}
-defm VSM3MSG1 : SM3_Base<"vsm3msg1">, T8PS;
-defm VSM3MSG2 : SM3_Base<"vsm3msg2">, T8PD;
-defm VSM3RNDS2 : VSM3RNDS2_Base, VEX_4V, TAPD;
+defm VSM3MSG1 : SM3_Base<"vsm3msg1">, T8;
+defm VSM3MSG2 : SM3_Base<"vsm3msg2">, T8, PD;
+defm VSM3RNDS2 : VSM3RNDS2_Base, VEX, VVVV, TA, PD;
// FIXME: Is there a better scheduler class for SM4 than WriteVecIMul?
let Predicates = [HasSM4] in {
@@ -8376,10 +8412,10 @@ let Predicates = [HasSM4] in {
}
}
-defm VSM4KEY4 : SM4_Base<"vsm4key4", VR128, "128", loadv4i32, i128mem>, T8XS, VEX_4V;
-defm VSM4KEY4Y : SM4_Base<"vsm4key4", VR256, "256", loadv8i32, i256mem>, T8XS, VEX_L, VEX_4V;
-defm VSM4RNDS4 : SM4_Base<"vsm4rnds4", VR128, "128", loadv4i32, i128mem>, T8XD, VEX_4V;
-defm VSM4RNDS4Y : SM4_Base<"vsm4rnds4", VR256, "256", loadv8i32, i256mem>, T8XD, VEX_L, VEX_4V;
+defm VSM4KEY4 : SM4_Base<"vsm4key4", VR128, "128", loadv4i32, i128mem>, T8, XS, VEX, VVVV;
+defm VSM4KEY4Y : SM4_Base<"vsm4key4", VR256, "256", loadv8i32, i256mem>, T8, XS, VEX_L, VEX, VVVV;
+defm VSM4RNDS4 : SM4_Base<"vsm4rnds4", VR128, "128", loadv4i32, i128mem>, T8, XD, VEX, VVVV;
+defm VSM4RNDS4Y : SM4_Base<"vsm4rnds4", VR256, "256", loadv8i32, i256mem>, T8, XD, VEX_L, VEX, VVVV;
let Predicates = [HasAVXVNNIINT16], Constraints = "$src1 = $dst" in
multiclass avx_vnni_int16<bits<8> opc, string OpcodeStr, bit IsCommutable> {
@@ -8390,7 +8426,7 @@ multiclass avx_vnni_int16<bits<8> opc, string OpcodeStr, bit IsCommutable> {
[(set VR128:$dst,
(v4i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_128")
VR128:$src1, VR128:$src2, VR128:$src3)))]>,
- VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
+ VEX, VVVV, Sched<[SchedWriteVecIMul.XMM]>;
def rm : I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i128mem:$src3),
@@ -8398,7 +8434,7 @@ multiclass avx_vnni_int16<bits<8> opc, string OpcodeStr, bit IsCommutable> {
[(set VR128:$dst,
(v4i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_128")
VR128:$src1, VR128:$src2, (loadv4i32 addr:$src3))))]>,
- VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
+ VEX, VVVV, Sched<[SchedWriteVecIMul.XMM]>;
let isCommutable = IsCommutable in
def Yrr : I<opc, MRMSrcReg, (outs VR256:$dst),
@@ -8407,7 +8443,7 @@ multiclass avx_vnni_int16<bits<8> opc, string OpcodeStr, bit IsCommutable> {
[(set VR256:$dst,
(v8i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_256")
VR256:$src1, VR256:$src2, VR256:$src3)))]>,
- VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
+ VEX, VVVV, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
def Yrm : I<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i256mem:$src3),
@@ -8415,12 +8451,12 @@ multiclass avx_vnni_int16<bits<8> opc, string OpcodeStr, bit IsCommutable> {
[(set VR256:$dst,
(v8i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_256")
VR256:$src1, VR256:$src2, (loadv8i32 addr:$src3))))]>,
- VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
+ VEX, VVVV, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
}
-defm VPDPWSUD : avx_vnni_int16<0xd2, "vpdpwsud", 0>, T8XS;
-defm VPDPWSUDS : avx_vnni_int16<0xd3, "vpdpwsuds", 0>, T8XS;
-defm VPDPWUSD : avx_vnni_int16<0xd2, "vpdpwusd", 0>, T8PD;
-defm VPDPWUSDS : avx_vnni_int16<0xd3, "vpdpwusds", 0>, T8PD;
-defm VPDPWUUD : avx_vnni_int16<0xd2, "vpdpwuud", 1>, T8PS;
-defm VPDPWUUDS : avx_vnni_int16<0xd3, "vpdpwuuds", 1>, T8PS;
+defm VPDPWSUD : avx_vnni_int16<0xd2, "vpdpwsud", 0>, T8, XS;
+defm VPDPWSUDS : avx_vnni_int16<0xd3, "vpdpwsuds", 0>, T8, XS;
+defm VPDPWUSD : avx_vnni_int16<0xd2, "vpdpwusd", 0>, T8, PD;
+defm VPDPWUSDS : avx_vnni_int16<0xd3, "vpdpwusds", 0>, T8, PD;
+defm VPDPWUUD : avx_vnni_int16<0xd2, "vpdpwuud", 1>, T8;
+defm VPDPWUUDS : avx_vnni_int16<0xd3, "vpdpwuuds", 1>, T8;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrShiftRotate.td
index 48bf23f8cbf7..d13e3b7af69a 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -829,12 +829,12 @@ multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop,
let hasSideEffects = 0 in {
def ri#Suffix : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
- TAXD, VEX, Sched<[WriteShift]>;
+ TA, XD, VEX, Sched<[WriteShift]>;
let mayLoad = 1 in
def mi#Suffix : Ii8<0xF0, MRMSrcMem, (outs RC:$dst),
(ins x86memop:$src1, u8imm:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
- TAXD, VEX, Sched<[WriteShiftLd]>;
+ TA, XD, VEX, Sched<[WriteShiftLd]>;
}
}
@@ -860,23 +860,23 @@ let hasSideEffects = 0 in {
let Predicates = [HasBMI2, NoEGPR] in {
defm RORX32 : bmi_rotate<"rorx{l}", GR32, i32mem>;
defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem>, REX_W;
- defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem>, T8XS;
- defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem>, T8XS, REX_W;
- defm SHRX32 : bmi_shift<"shrx{l}", GR32, i32mem>, T8XD;
- defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, REX_W;
- defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8PD;
- defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8PD, REX_W;
+ defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem>, T8, XS;
+ defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem>, T8, XS, REX_W;
+ defm SHRX32 : bmi_shift<"shrx{l}", GR32, i32mem>, T8, XD;
+ defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8, XD, REX_W;
+ defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8, PD;
+ defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8, PD, REX_W;
}
let Predicates = [HasBMI2, HasEGPR] in {
defm RORX32 : bmi_rotate<"rorx{l}", GR32, i32mem, "_EVEX">, EVEX;
defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem, "_EVEX">, REX_W, EVEX;
- defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem, "_EVEX">, T8XS, EVEX;
- defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem, "_EVEX">, T8XS, REX_W, EVEX;
- defm SHRX32 : bmi_shift<"shrx{l}", GR32, i32mem, "_EVEX">, T8XD, EVEX;
- defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem, "_EVEX">, T8XD, REX_W, EVEX;
- defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem, "_EVEX">, T8PD, EVEX;
- defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem, "_EVEX">, T8PD, REX_W, EVEX;
+ defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem, "_EVEX">, T8, XS, EVEX;
+ defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem, "_EVEX">, T8, XS, REX_W, EVEX;
+ defm SHRX32 : bmi_shift<"shrx{l}", GR32, i32mem, "_EVEX">, T8, XD, EVEX;
+ defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem, "_EVEX">, T8, XD, REX_W, EVEX;
+ defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem, "_EVEX">, T8, PD, EVEX;
+ defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem, "_EVEX">, T8, PD, REX_W, EVEX;
}
let Predicates = [HasBMI2] in {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td
index cbb5d4ed5bbd..efb58c6102dd 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td
@@ -426,31 +426,31 @@ let SchedRW = [WriteSystem] in {
let Uses = [EAX, ECX, EDX] in
def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB;
let Uses = [EAX, ECX, EDX] in
-def WRMSRNS : I<0x01, MRM_C6, (outs), (ins), "wrmsrns", []>, PS;
+def WRMSRNS : I<0x01, MRM_C6, (outs), (ins), "wrmsrns", []>, TB;
let Defs = [EAX, EDX], Uses = [ECX] in
def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
let Defs = [RAX, EFLAGS], Uses = [RBX, RCX], Predicates = [In64BitMode] in
-def PBNDKB : I<0x01, MRM_C7, (outs), (ins), "pbndkb", []>, PS;
+def PBNDKB : I<0x01, MRM_C7, (outs), (ins), "pbndkb", []>, TB;
let Uses = [RSI, RDI, RCX], Predicates = [In64BitMode] in {
-def WRMSRLIST : I<0x01, MRM_C6, (outs), (ins), "wrmsrlist", []>, XS;
-def RDMSRLIST : I<0x01, MRM_C6, (outs), (ins), "rdmsrlist", []>, XD;
+def WRMSRLIST : I<0x01, MRM_C6, (outs), (ins), "wrmsrlist", []>, TB, XS;
+def RDMSRLIST : I<0x01, MRM_C6, (outs), (ins), "rdmsrlist", []>, TB, XD;
}
let Predicates = [HasUSERMSR], mayLoad = 1 in {
def URDMSRrr : I<0xf8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"urdmsr\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (int_x86_urdmsr GR64:$src))]>, T8XD;
+ [(set GR64:$dst, (int_x86_urdmsr GR64:$src))]>, T8, XD;
def URDMSRri : Ii32<0xf8, MRM0r, (outs GR64:$dst), (ins i64i32imm:$imm),
"urdmsr\t{$imm, $dst|$dst, $imm}",
- [(set GR64:$dst, (int_x86_urdmsr i64immSExt32_su:$imm))]>, T_MAP7XD, VEX;
+ [(set GR64:$dst, (int_x86_urdmsr i64immSExt32_su:$imm))]>, T_MAP7, XD, VEX;
}
let Predicates = [HasUSERMSR], mayStore = 1 in {
def UWRMSRrr : I<0xf8, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
"uwrmsr\t{$src1, $src2|$src2, $src1}",
- [(int_x86_uwrmsr GR64:$src1, GR64:$src2)]>, T8XS;
+ [(int_x86_uwrmsr GR64:$src1, GR64:$src2)]>, T8, XS;
def UWRMSRir : Ii32<0xf8, MRM0r, (outs), (ins GR64:$src, i64i32imm:$imm),
"uwrmsr\t{$src, $imm|$imm, $src}",
- [(int_x86_uwrmsr GR64:$src, i64immSExt32_su:$imm)]>, T_MAP7XS, VEX;
+ [(int_x86_uwrmsr GR64:$src, i64immSExt32_su:$imm)]>, T_MAP7, XS, VEX;
}
let Defs = [RAX, RDX], Uses = [ECX] in
def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
@@ -481,12 +481,12 @@ let Defs = [EAX, EBX, ECX, EDX], Uses = [EAX, ECX] in
// Cache instructions
let SchedRW = [WriteSystem] in {
def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
-def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [(int_x86_wbinvd)]>, PS;
+def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [(int_x86_wbinvd)]>, TB, PS;
// wbnoinvd is like wbinvd, except without invalidation
// encoding: like wbinvd + an 0xF3 prefix
def WBNOINVD : I<0x09, RawFrm, (outs), (ins), "wbnoinvd",
- [(int_x86_wbnoinvd)]>, XS,
+ [(int_x86_wbnoinvd)]>, TB, XS,
Requires<[HasWBNOINVD]>;
} // SchedRW
@@ -497,57 +497,74 @@ let SchedRW = [WriteSystem] in {
let Uses = [SSP] in {
let Defs = [SSP] in {
def INCSSPD : I<0xAE, MRM5r, (outs), (ins GR32:$src), "incsspd\t$src",
- [(int_x86_incsspd GR32:$src)]>, XS;
+ [(int_x86_incsspd GR32:$src)]>, TB, XS;
def INCSSPQ : RI<0xAE, MRM5r, (outs), (ins GR64:$src), "incsspq\t$src",
- [(int_x86_incsspq GR64:$src)]>, XS;
+ [(int_x86_incsspq GR64:$src)]>, TB, XS;
} // Defs SSP
let Constraints = "$src = $dst" in {
def RDSSPD : I<0x1E, MRM1r, (outs GR32:$dst), (ins GR32:$src),
"rdsspd\t$dst",
- [(set GR32:$dst, (int_x86_rdsspd GR32:$src))]>, XS;
+ [(set GR32:$dst, (int_x86_rdsspd GR32:$src))]>, TB, XS;
def RDSSPQ : RI<0x1E, MRM1r, (outs GR64:$dst), (ins GR64:$src),
"rdsspq\t$dst",
- [(set GR64:$dst, (int_x86_rdsspq GR64:$src))]>, XS;
+ [(set GR64:$dst, (int_x86_rdsspq GR64:$src))]>, TB, XS;
}
let Defs = [SSP] in {
def SAVEPREVSSP : I<0x01, MRM_EA, (outs), (ins), "saveprevssp",
- [(int_x86_saveprevssp)]>, XS;
+ [(int_x86_saveprevssp)]>, TB, XS;
def RSTORSSP : I<0x01, MRM5m, (outs), (ins i32mem:$src),
"rstorssp\t$src",
- [(int_x86_rstorssp addr:$src)]>, XS;
+ [(int_x86_rstorssp addr:$src)]>, TB, XS;
} // Defs SSP
} // Uses SSP
+let Predicates = [NoEGPR] in {
def WRSSD : I<0xF6, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"wrssd\t{$src, $dst|$dst, $src}",
- [(int_x86_wrssd GR32:$src, addr:$dst)]>, T8PS;
+ [(int_x86_wrssd GR32:$src, addr:$dst)]>, T8;
def WRSSQ : RI<0xF6, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"wrssq\t{$src, $dst|$dst, $src}",
- [(int_x86_wrssq GR64:$src, addr:$dst)]>, T8PS;
+ [(int_x86_wrssq GR64:$src, addr:$dst)]>, T8;
def WRUSSD : I<0xF5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"wrussd\t{$src, $dst|$dst, $src}",
- [(int_x86_wrussd GR32:$src, addr:$dst)]>, T8PD;
+ [(int_x86_wrussd GR32:$src, addr:$dst)]>, T8, PD;
def WRUSSQ : RI<0xF5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"wrussq\t{$src, $dst|$dst, $src}",
- [(int_x86_wrussq GR64:$src, addr:$dst)]>, T8PD;
+ [(int_x86_wrussq GR64:$src, addr:$dst)]>, T8, PD;
+}
+
+let Predicates = [HasEGPR, In64BitMode] in {
+ def WRSSD_EVEX : I<0x66, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "wrssd\t{$src, $dst|$dst, $src}",
+ [(int_x86_wrssd GR32:$src, addr:$dst)]>, EVEX, NoCD8, T_MAP4;
+ def WRSSQ_EVEX : RI<0x66, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "wrssq\t{$src, $dst|$dst, $src}",
+ [(int_x86_wrssq GR64:$src, addr:$dst)]>, EVEX, NoCD8, T_MAP4;
+ def WRUSSD_EVEX : I<0x65, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "wrussd\t{$src, $dst|$dst, $src}",
+ [(int_x86_wrussd GR32:$src, addr:$dst)]>, EVEX, NoCD8, T_MAP4, PD;
+ def WRUSSQ_EVEX : RI<0x65, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "wrussq\t{$src, $dst|$dst, $src}",
+ [(int_x86_wrussq GR64:$src, addr:$dst)]>, EVEX, NoCD8, T_MAP4, PD;
+}
let Defs = [SSP] in {
let Uses = [SSP] in {
def SETSSBSY : I<0x01, MRM_E8, (outs), (ins), "setssbsy",
- [(int_x86_setssbsy)]>, XS;
+ [(int_x86_setssbsy)]>, TB, XS;
} // Uses SSP
def CLRSSBSY : I<0xAE, MRM6m, (outs), (ins i32mem:$src),
"clrssbsy\t$src",
- [(int_x86_clrssbsy addr:$src)]>, XS;
+ [(int_x86_clrssbsy addr:$src)]>, TB, XS;
} // Defs SSP
} // SchedRW
let SchedRW = [WriteSystem] in {
- def ENDBR64 : I<0x1E, MRM_FA, (outs), (ins), "endbr64", []>, XS;
- def ENDBR32 : I<0x1E, MRM_FB, (outs), (ins), "endbr32", []>, XS;
+ def ENDBR64 : I<0x1E, MRM_FA, (outs), (ins), "endbr64", []>, TB, XS;
+ def ENDBR32 : I<0x1E, MRM_FB, (outs), (ins), "endbr32", []>, TB, XS;
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -557,51 +574,51 @@ let SchedRW = [WriteSystem] in {
// on Windows without needing to enable the xsave feature to be compatible with
// MSVC.
let Defs = [EDX, EAX], Uses = [ECX] in
-def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, PS;
+def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
let Uses = [EDX, EAX, ECX] in
def XSETBV : I<0x01, MRM_D1, (outs), (ins),
"xsetbv",
- [(int_x86_xsetbv ECX, EDX, EAX)]>, PS;
+ [(int_x86_xsetbv ECX, EDX, EAX)]>, TB;
let Uses = [EDX, EAX] in {
def XSAVE : I<0xAE, MRM4m, (outs), (ins opaquemem:$dst),
"xsave\t$dst",
- [(int_x86_xsave addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE]>;
+ [(int_x86_xsave addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVE]>;
def XSAVE64 : RI<0xAE, MRM4m, (outs), (ins opaquemem:$dst),
"xsave64\t$dst",
- [(int_x86_xsave64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE, In64BitMode]>;
+ [(int_x86_xsave64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVE, In64BitMode]>;
def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaquemem:$dst),
"xrstor\t$dst",
- [(int_x86_xrstor addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE]>;
+ [(int_x86_xrstor addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVE]>;
def XRSTOR64 : RI<0xAE, MRM5m, (outs), (ins opaquemem:$dst),
"xrstor64\t$dst",
- [(int_x86_xrstor64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE, In64BitMode]>;
+ [(int_x86_xrstor64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVE, In64BitMode]>;
def XSAVEOPT : I<0xAE, MRM6m, (outs), (ins opaquemem:$dst),
"xsaveopt\t$dst",
- [(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEOPT]>;
+ [(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEOPT]>;
def XSAVEOPT64 : RI<0xAE, MRM6m, (outs), (ins opaquemem:$dst),
"xsaveopt64\t$dst",
- [(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEOPT, In64BitMode]>;
+ [(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEOPT, In64BitMode]>;
def XSAVEC : I<0xC7, MRM4m, (outs), (ins opaquemem:$dst),
"xsavec\t$dst",
- [(int_x86_xsavec addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEC]>;
+ [(int_x86_xsavec addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEC]>;
def XSAVEC64 : RI<0xC7, MRM4m, (outs), (ins opaquemem:$dst),
"xsavec64\t$dst",
- [(int_x86_xsavec64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEC, In64BitMode]>;
+ [(int_x86_xsavec64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEC, In64BitMode]>;
def XSAVES : I<0xC7, MRM5m, (outs), (ins opaquemem:$dst),
"xsaves\t$dst",
- [(int_x86_xsaves addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVES]>;
+ [(int_x86_xsaves addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES]>;
def XSAVES64 : RI<0xC7, MRM5m, (outs), (ins opaquemem:$dst),
"xsaves64\t$dst",
- [(int_x86_xsaves64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE, In64BitMode]>;
+ [(int_x86_xsaves64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVE, In64BitMode]>;
def XRSTORS : I<0xC7, MRM3m, (outs), (ins opaquemem:$dst),
"xrstors\t$dst",
- [(int_x86_xrstors addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVES]>;
+ [(int_x86_xrstors addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES]>;
def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaquemem:$dst),
"xrstors64\t$dst",
- [(int_x86_xrstors64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVES, In64BitMode]>;
+ [(int_x86_xrstors64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES, In64BitMode]>;
} // Uses
} // SchedRW
@@ -634,10 +651,10 @@ let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
let SchedRW = [WriteSystem] in {
let Defs = [EAX, EDX], Uses = [ECX] in
def RDPKRUr : I<0x01, MRM_EE, (outs), (ins), "rdpkru",
- [(set EAX, (X86rdpkru ECX)), (implicit EDX)]>, PS;
+ [(set EAX, (X86rdpkru ECX)), (implicit EDX)]>, TB;
let Uses = [EAX, ECX, EDX] in
def WRPKRUr : I<0x01, MRM_EF, (outs), (ins), "wrpkru",
- [(X86wrpkru EAX, EDX, ECX)]>, PS;
+ [(X86wrpkru EAX, EDX, ECX)]>, TB;
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -645,28 +662,28 @@ let Uses = [EAX, ECX, EDX] in
let Predicates = [HasFSGSBase, In64BitMode], SchedRW = [WriteSystem] in {
def RDFSBASE : I<0xAE, MRM0r, (outs GR32:$dst), (ins),
"rdfsbase{l}\t$dst",
- [(set GR32:$dst, (int_x86_rdfsbase_32))]>, XS;
+ [(set GR32:$dst, (int_x86_rdfsbase_32))]>, TB, XS;
def RDFSBASE64 : RI<0xAE, MRM0r, (outs GR64:$dst), (ins),
"rdfsbase{q}\t$dst",
- [(set GR64:$dst, (int_x86_rdfsbase_64))]>, XS;
+ [(set GR64:$dst, (int_x86_rdfsbase_64))]>, TB, XS;
def RDGSBASE : I<0xAE, MRM1r, (outs GR32:$dst), (ins),
"rdgsbase{l}\t$dst",
- [(set GR32:$dst, (int_x86_rdgsbase_32))]>, XS;
+ [(set GR32:$dst, (int_x86_rdgsbase_32))]>, TB, XS;
def RDGSBASE64 : RI<0xAE, MRM1r, (outs GR64:$dst), (ins),
"rdgsbase{q}\t$dst",
- [(set GR64:$dst, (int_x86_rdgsbase_64))]>, XS;
+ [(set GR64:$dst, (int_x86_rdgsbase_64))]>, TB, XS;
def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$src),
"wrfsbase{l}\t$src",
- [(int_x86_wrfsbase_32 GR32:$src)]>, XS;
+ [(int_x86_wrfsbase_32 GR32:$src)]>, TB, XS;
def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$src),
"wrfsbase{q}\t$src",
- [(int_x86_wrfsbase_64 GR64:$src)]>, XS;
+ [(int_x86_wrfsbase_64 GR64:$src)]>, TB, XS;
def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$src),
"wrgsbase{l}\t$src",
- [(int_x86_wrgsbase_32 GR32:$src)]>, XS;
+ [(int_x86_wrgsbase_32 GR32:$src)]>, TB, XS;
def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$src),
"wrgsbase{q}\t$src",
- [(int_x86_wrgsbase_64 GR64:$src)]>, XS;
+ [(int_x86_wrgsbase_64 GR64:$src)]>, TB, XS;
}
//===----------------------------------------------------------------------===//
@@ -674,15 +691,15 @@ let Predicates = [HasFSGSBase, In64BitMode], SchedRW = [WriteSystem] in {
let SchedRW = [WriteSystem] in {
def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
"invpcid\t{$src2, $src1|$src1, $src2}",
- [(int_x86_invpcid GR32:$src1, addr:$src2)]>, T8PD,
+ [(int_x86_invpcid GR32:$src1, addr:$src2)]>, T8, PD,
Requires<[Not64BitMode, HasINVPCID]>;
def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
- "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
+ "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
Requires<[In64BitMode, HasINVPCID]>;
def INVPCID64_EVEX : I<0xF2, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invpcid\t{$src2, $src1|$src1, $src2}", []>,
- EVEX_NoCD8, T_MAP4XS, Requires<[In64BitMode, HasINVPCID]>;
+ EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID]>;
} // SchedRW
let Predicates = [In64BitMode, HasINVPCID] in {
@@ -701,15 +718,15 @@ let Predicates = [In64BitMode, HasINVPCID] in {
//===----------------------------------------------------------------------===//
// SMAP Instruction
let Defs = [EFLAGS], SchedRW = [WriteSystem] in {
- def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, PS;
- def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, PS;
+ def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, TB;
+ def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, TB;
}
//===----------------------------------------------------------------------===//
// SMX Instruction
let SchedRW = [WriteSystem] in {
let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX] in {
- def GETSEC : I<0x37, RawFrm, (outs), (ins), "getsec", []>, PS;
+ def GETSEC : I<0x37, RawFrm, (outs), (ins), "getsec", []>, TB;
} // Uses, Defs
} // SchedRW
@@ -730,9 +747,9 @@ def STI : I<0xFB, RawFrm, (outs), (ins), "sti", []>;
// RDPID Instruction
let SchedRW = [WriteSystem] in {
def RDPID32 : I<0xC7, MRM7r, (outs GR32:$dst), (ins),
- "rdpid\t$dst", [(set GR32:$dst, (int_x86_rdpid))]>, XS,
+ "rdpid\t$dst", [(set GR32:$dst, (int_x86_rdpid))]>, TB, XS,
Requires<[Not64BitMode, HasRDPID]>;
-def RDPID64 : I<0xC7, MRM7r, (outs GR64:$dst), (ins), "rdpid\t$dst", []>, XS,
+def RDPID64 : I<0xC7, MRM7r, (outs GR64:$dst), (ins), "rdpid\t$dst", []>, TB, XS,
Requires<[In64BitMode, HasRDPID]>;
} // SchedRW
@@ -748,17 +765,17 @@ let Predicates = [In64BitMode, HasRDPID] in {
// PTWRITE Instruction - Write Data to a Processor Trace Packet
let SchedRW = [WriteSystem] in {
def PTWRITEm: I<0xAE, MRM4m, (outs), (ins i32mem:$dst),
- "ptwrite{l}\t$dst", [(int_x86_ptwrite32 (loadi32 addr:$dst))]>, XS,
+ "ptwrite{l}\t$dst", [(int_x86_ptwrite32 (loadi32 addr:$dst))]>, TB, XS,
Requires<[HasPTWRITE]>;
def PTWRITE64m : RI<0xAE, MRM4m, (outs), (ins i64mem:$dst),
- "ptwrite{q}\t$dst", [(int_x86_ptwrite64 (loadi64 addr:$dst))]>, XS,
+ "ptwrite{q}\t$dst", [(int_x86_ptwrite64 (loadi64 addr:$dst))]>, TB, XS,
Requires<[In64BitMode, HasPTWRITE]>;
def PTWRITEr : I<0xAE, MRM4r, (outs), (ins GR32:$dst),
- "ptwrite{l}\t$dst", [(int_x86_ptwrite32 GR32:$dst)]>, XS,
+ "ptwrite{l}\t$dst", [(int_x86_ptwrite32 GR32:$dst)]>, TB, XS,
Requires<[HasPTWRITE]>;
def PTWRITE64r : RI<0xAE, MRM4r, (outs), (ins GR64:$dst),
- "ptwrite{q}\t$dst", [(int_x86_ptwrite64 GR64:$dst)]>, XS,
+ "ptwrite{q}\t$dst", [(int_x86_ptwrite64 GR64:$dst)]>, TB, XS,
Requires<[In64BitMode, HasPTWRITE]>;
} // SchedRW
@@ -767,7 +784,7 @@ def PTWRITE64r : RI<0xAE, MRM4r, (outs), (ins GR64:$dst),
let SchedRW = [WriteSystem] in {
let Uses = [ECX], Defs = [EAX, EDX] in
- def RDPRU : I<0x01, MRM_FD, (outs), (ins), "rdpru", []>, PS,
+ def RDPRU : I<0x01, MRM_FD, (outs), (ins), "rdpru", []>, TB,
Requires<[HasRDPRU]>;
}
@@ -786,6 +803,6 @@ let Uses = [ECX], Defs = [EAX, EDX] in
let SchedRW = [WriteSystem] in {
let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX, RDX, EFLAGS] in
- def PCONFIG : I<0x01, MRM_C5, (outs), (ins), "pconfig", []>, PS,
+ def PCONFIG : I<0x01, MRM_C5, (outs), (ins), "pconfig", []>, TB,
Requires<[HasPCONFIG]>;
} // SchedRW
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTBM.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTBM.td
index ed514038a12e..09200f0c1a9f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTBM.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTBM.td
@@ -46,11 +46,11 @@ multiclass tbm_binary_rm<bits<8> opc, Format FormReg, Format FormMem,
let hasSideEffects = 0 in {
def rr : I<opc, FormReg, (outs RC:$dst), (ins RC:$src),
!strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"), []>,
- XOP_4V, XOP9, Sched<[Sched]>;
+ XOP, VVVV, XOP9, Sched<[Sched]>;
let mayLoad = 1 in
def rm : I<opc, FormMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"), []>,
- XOP_4V, XOP9, Sched<[Sched.Folded]>;
+ XOP, VVVV, XOP9, Sched<[Sched.Folded]>;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTDX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTDX.td
index 8d7cd6082095..fe01677b2ea1 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTDX.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTDX.td
@@ -17,23 +17,17 @@
// 64-bit only instructions
let SchedRW = [WriteSystem], Predicates = [In64BitMode] in {
// SEAMCALL - Call to SEAM VMX-root Operation Module
-def SEAMCALL : I<0x01, MRM_CF, (outs), (ins),
- "seamcall", []>, PD;
+def SEAMCALL : I<0x01, MRM_CF, (outs), (ins), "seamcall", []>, TB, PD;
// SEAMRET - Return to Legacy VMX-root Operation
-def SEAMRET : I<0x01, MRM_CD, (outs), (ins),
- "seamret", []>, PD;
+def SEAMRET : I<0x01, MRM_CD, (outs), (ins), "seamret", []>, TB, PD;
// SEAMOPS - SEAM Operations
-def SEAMOPS : I<0x01, MRM_CE, (outs), (ins),
- "seamops", []>, PD;
-
+def SEAMOPS : I<0x01, MRM_CE, (outs), (ins), "seamops", []>, TB, PD;
} // SchedRW
// common instructions
let SchedRW = [WriteSystem] in {
// TDCALL - Call SEAM Module Functions
-def TDCALL : I<0x01, MRM_CC, (outs), (ins),
- "tdcall", []>, PD;
-
+def TDCALL : I<0x01, MRM_CC, (outs), (ins), "tdcall", []>, TB, PD;
} // SchedRW
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTSX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTSX.td
index 7671eb4676ee..57604b682d54 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTSX.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTSX.td
@@ -37,11 +37,11 @@ def XABORT_DEF : I<0, Pseudo, (outs), (ins), "# XABORT DEF", []>;
}
def XEND : I<0x01, MRM_D5, (outs), (ins),
- "xend", [(int_x86_xend)]>, PS, Requires<[HasRTM]>;
+ "xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>;
let Defs = [EFLAGS] in
def XTEST : I<0x01, MRM_D6, (outs), (ins),
- "xtest", [(set EFLAGS, (X86xtest))]>, PS, Requires<[HasRTM]>;
+ "xtest", [(set EFLAGS, (X86xtest))]>, TB, Requires<[HasRTM]>;
def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
"xabort\t$imm",
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td
index 2f056f2ead62..9499753143d9 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td
@@ -27,50 +27,26 @@ class REP { bit hasREPPrefix = 1; }
class TB { Map OpMap = TB; }
class T8 { Map OpMap = T8; }
class TA { Map OpMap = TA; }
-class XOP8 { Map OpMap = XOP8; Prefix OpPrefix = PS; }
-class XOP9 { Map OpMap = XOP9; Prefix OpPrefix = PS; }
-class XOPA { Map OpMap = XOPA; Prefix OpPrefix = PS; }
+class T_MAP4 { Map OpMap = T_MAP4; }
+class T_MAP5 { Map OpMap = T_MAP5; }
+class T_MAP6 { Map OpMap = T_MAP6; }
+class T_MAP7 { Map OpMap = T_MAP7; }
+class XOP8 { Map OpMap = XOP8; }
+class XOP9 { Map OpMap = XOP9; }
+class XOPA { Map OpMap = XOPA; }
class ThreeDNow { Map OpMap = ThreeDNow; }
-class T_MAP4 { Map OpMap = T_MAP4; }
-class T_MAP4PS : T_MAP4 { Prefix OpPrefix = PS; } // none
-class T_MAP4PD : T_MAP4 { Prefix OpPrefix = PD; } // 0x66
-class T_MAP4XS : T_MAP4 { Prefix OpPrefix = XS; } // 0xF3
-class T_MAP4XD : T_MAP4 { Prefix OpPrefix = XD; } // 0xF2
-class T_MAP5 { Map OpMap = T_MAP5; }
-class T_MAP5PS : T_MAP5 { Prefix OpPrefix = PS; } // none
-class T_MAP5PD : T_MAP5 { Prefix OpPrefix = PD; } // 0x66
-class T_MAP5XS : T_MAP5 { Prefix OpPrefix = XS; } // 0xF3
-class T_MAP5XD : T_MAP5 { Prefix OpPrefix = XD; } // 0xF2
-class T_MAP6 { Map OpMap = T_MAP6; }
-class T_MAP6PS : T_MAP6 { Prefix OpPrefix = PS; }
-class T_MAP6PD : T_MAP6 { Prefix OpPrefix = PD; }
-class T_MAP6XS : T_MAP6 { Prefix OpPrefix = XS; }
-class T_MAP6XD : T_MAP6 { Prefix OpPrefix = XD; }
-class T_MAP7 { Map OpMap = T_MAP7; }
-class T_MAP7XS : T_MAP7 { Prefix OpPrefix = XS; } // 0xF3
-class T_MAP7XD : T_MAP7 { Prefix OpPrefix = XD; } // 0xF2
-class OBXS { Prefix OpPrefix = XS; }
-class PS : TB { Prefix OpPrefix = PS; }
-class PD : TB { Prefix OpPrefix = PD; }
-class XD : TB { Prefix OpPrefix = XD; }
-class XS : TB { Prefix OpPrefix = XS; }
-class T8PS : T8 { Prefix OpPrefix = PS; }
-class T8PD : T8 { Prefix OpPrefix = PD; }
-class T8XD : T8 { Prefix OpPrefix = XD; }
-class T8XS : T8 { Prefix OpPrefix = XS; }
-class TAPS : TA { Prefix OpPrefix = PS; }
-class TAPD : TA { Prefix OpPrefix = PD; }
-class TAXD : TA { Prefix OpPrefix = XD; }
-class TAXS : TA { Prefix OpPrefix = XS; }
+class PS { Prefix OpPrefix = PS; }
+class PD { Prefix OpPrefix = PD; }
+class XD { Prefix OpPrefix = XD; }
+class XS { Prefix OpPrefix = XS; }
class VEX { Encoding OpEnc = EncVEX; }
class WIG { bit IgnoresW = 1; }
// Special version of REX_W that can be changed to VEX.W==0 for EVEX2VEX.
class VEX_W1X { bit hasREX_W = 1; bit EVEX_W1_VEX_W0 = 1; }
-class VEX_4V : VEX { bit hasVEX_4V = 1; }
class VEX_L { bit hasVEX_L = 1; }
class VEX_LIG { bit ignoresVEX_L = 1; }
+class VVVV { bit hasVEX_4V = 1; }
class EVEX { Encoding OpEnc = EncEVEX; }
-class EVEX_4V : EVEX { bit hasVEX_4V = 1; }
class EVEX_K { bit hasEVEX_K = 1; }
class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; }
class EVEX_B { bit hasEVEX_B = 1; }
@@ -86,29 +62,28 @@ class EVEX_CD8<int esize, CD8VForm form> {
int CD8_EltSize = !srl(esize, 3);
bits<3> CD8_Form = form.Value;
}
-class EVEX_NoCD8 : EVEX { bits<7> CD8_Scale = 0; }
+class NoCD8 { bits<7> CD8_Scale = 0; }
class XOP { Encoding OpEnc = EncXOP; }
-class XOP_4V : XOP { bit hasVEX_4V = 1; }
class EVEX2VEXOverride<string VEXInstrName> {
string EVEX2VEXOverride = VEXInstrName;
}
-class AVX512BIi8Base : PD {
+class AVX512BIi8Base : TB, PD {
Domain ExeDomain = SSEPackedInt;
ImmType ImmT = Imm8;
}
-class AVX512XSIi8Base : XS {
+class AVX512XSIi8Base : TB, XS {
Domain ExeDomain = SSEPackedInt;
ImmType ImmT = Imm8;
}
-class AVX512XDIi8Base : XD {
+class AVX512XDIi8Base : TB, XD {
Domain ExeDomain = SSEPackedInt;
ImmType ImmT = Imm8;
}
-class AVX512PSIi8Base : PS {
+class AVX512PSIi8Base : TB {
Domain ExeDomain = SSEPackedSingle;
ImmType ImmT = Imm8;
}
-class AVX512PDIi8Base : PD {
+class AVX512PDIi8Base : TB, PD {
Domain ExeDomain = SSEPackedDouble;
ImmType ImmT = Imm8;
}
@@ -116,6 +91,14 @@ class NotEVEX2VEXConvertible { bit notEVEX2VEXConvertible = 1; }
class ExplicitREX2Prefix { ExplicitOpPrefix explicitOpPrefix = ExplicitREX2; }
class ExplicitVEXPrefix { ExplicitOpPrefix explicitOpPrefix = ExplicitVEX; }
class ExplicitEVEXPrefix { ExplicitOpPrefix explicitOpPrefix = ExplicitEVEX; }
+class DefEFLAGS { list<Register> Defs = [EFLAGS]; }
+class UseEFLAGS { list<Register> Uses = [EFLAGS]; }
+class DisassembleOnly {
+ // The disassembler should know about this, but not the asmparser.
+ bit isCodeGenOnly = 1;
+ bit ForceDisassemble = 1;
+}
+
// SchedModel info for instruction that loads one value and gets the second
// (and possibly third) value from a register.
@@ -139,8 +122,7 @@ class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
Operand immoperand, SDPatternOperator immoperator,
SDPatternOperator immnosuoperator, Operand imm8operand,
SDPatternOperator imm8operator, SDPatternOperator imm8nosuoperator,
- bit hasOddOpcode, OperandSize opSize,
- bit hasREX_W> {
+ bit hasEvenOpcode, bit hasREX_W> {
/// VT - This is the value type itself.
ValueType VT = vt;
@@ -189,15 +171,10 @@ class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
SDPatternOperator Imm8NoSuOperator = imm8nosuoperator;
- /// HasOddOpcode - This bit is true if the instruction should have an odd (as
- /// opposed to even) opcode. Operations on i8 are usually even, operations on
- /// other datatypes are odd.
- bit HasOddOpcode = hasOddOpcode;
-
- /// OpSize - Selects whether the instruction needs a 0x66 prefix based on
- /// 16-bit vs 32-bit mode. i8/i64 set this to OpSizeFixed. i16 sets this
- /// to Opsize16. i32 sets this to OpSize32.
- OperandSize OpSize = opSize;
+ /// HasEvenOpcode - This bit is true if the instruction should have an even (as
+ /// opposed to odd) opcode. Operations on i8 are even, operations on
+ /// other datatypes are usually odd.
+ bit HasEvenOpcode = hasEvenOpcode;
/// HasREX_W - This bit is set to true if the instruction should have
/// the 0x40 REX prefix. This is set for i64 types.
@@ -208,16 +185,16 @@ def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
def Xi8 : X86TypeInfo<i8, "b", GR8, loadi8, i8mem, Imm8, i8imm,
imm_su, imm, i8imm, invalid_node, invalid_node,
- 0, OpSizeFixed, 0>;
+ 1, 0>;
def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem, Imm16, i16imm,
imm_su, imm, i16i8imm, i16immSExt8_su, i16immSExt8,
- 1, OpSize16, 0>;
+ 0, 0>;
def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem, Imm32, i32imm,
imm_su, imm, i32i8imm, i32immSExt8_su, i32immSExt8,
- 1, OpSize32, 0>;
+ 0, 0>;
def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem, Imm32S, i64i32imm,
- i64immSExt32_su, i64immSExt32, i64i8imm, i64immSExt8_su,
- i64immSExt8, 1, OpSizeFixed, 1>;
+ i64immSExt32_su, i64immSExt32, i64i8imm, i64immSExt8_su,
+ i64immSExt8, 0, 1>;
// Group template arguments that can be derived from the vector type (EltNum x
// EltVT). These are things like the register class for the writemask, etc.
@@ -585,26 +562,26 @@ class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
class SSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE1]>;
+ : I<o, F, outs, ins, asm, pattern>, TB, XS, Requires<[UseSSE1]>;
class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE1]>;
+ : Ii8<o, F, outs, ins, asm, pattern>, TB, XS, Requires<[UseSSE1]>;
class PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
Requires<[UseSSE1]>;
class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
Requires<[UseSSE1]>;
class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern>, TB, XS,
Requires<[HasAVX]>;
class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, PS,
- Requires<[HasAVX]>;
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>,
+ TB, Requires<[HasAVX]>;
// SSE2 Instruction Templates:
//
@@ -626,49 +603,49 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XD, Requires<[UseSSE2]>;
+ : I<o, F, outs, ins, asm, pattern>, TB, XD, Requires<[UseSSE2]>;
class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[UseSSE2]>;
+ : Ii8<o, F, outs, ins, asm, pattern>, TB, XD, Requires<[UseSSE2]>;
class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
+ : I<o, F, outs, ins, asm, pattern>, TB, XS, Requires<[UseSSE2]>;
class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
+ : Ii8<o, F, outs, ins, asm, pattern>, TB, XS, Requires<[UseSSE2]>;
class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, PD,
Requires<[UseSSE2]>;
class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, PD,
Requires<[UseSSE2]>;
class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD,
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern>, TB, XD,
Requires<[UseAVX]>;
class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern>, TB, XS,
Requires<[HasAVX]>;
class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>,
- PD, Requires<[HasAVX]>;
+ TB, PD, Requires<[HasAVX]>;
class VS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern>, PD,
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern>, TB, PD,
Requires<[UseAVX]>;
class S2I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, PD, Requires<[UseSSE2]>;
+ : I<o, F, outs, ins, asm, pattern>, TB, PD, Requires<[UseSSE2]>;
class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX, HasSSE2]>;
+ : Ii8<o, F, outs, ins, asm, pattern>, TB, XD, Requires<[HasMMX, HasSSE2]>;
class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX, HasSSE2]>;
+ : Ii8<o, F, outs, ins, asm, pattern>, TB, XS, Requires<[HasMMX, HasSSE2]>;
// SSE3 Instruction Templates:
//
@@ -678,15 +655,15 @@ class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB, XS,
Requires<[UseSSE3]>;
class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, XD,
Requires<[UseSSE3]>;
class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, PD,
Requires<[UseSSE3]>;
@@ -703,19 +680,19 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, PD,
Requires<[UseSSSE3]>;
class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD,
Requires<[UseSSSE3]>;
class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PS,
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
Requires<[HasMMX, HasSSSE3]>;
class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPS,
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
Requires<[HasMMX, HasSSSE3]>;
// SSE4.1 Instruction Templates:
@@ -725,11 +702,11 @@ class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
//
class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, PD,
Requires<[UseSSE41]>;
class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD,
Requires<[UseSSE41]>;
// SSE4.2 Instruction Templates:
@@ -737,13 +714,13 @@ class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
// SS428I - SSE 4.2 instructions with T8 prefix.
class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, PD,
Requires<[UseSSE42]>;
// SS42AI = SSE 4.2 instructions with TA prefix
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD,
Requires<[UseSSE42]>;
// CRC32I - SSE 4.2 CRC32 instructions.
@@ -751,42 +728,42 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
// controlled by the SSE42 flag.
class CRC32I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasCRC32]>;
+ : I<o, F, outs, ins, asm, pattern>, T8, XD, Requires<[HasCRC32]>;
// AVX Instruction Templates:
// Instructions introduced in AVX (no SSE equivalent forms)
//
-// AVX8I - AVX instructions with T8PD prefix.
-// AVXAIi8 - AVX instructions with TAPD prefix and ImmT = Imm8.
+// AVX8I - AVX instructions with T8, PD prefix.
+// AVXAIi8 - AVX instructions with TA, PD prefix and ImmT = Imm8.
class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, PD,
Requires<[HasAVX]>;
class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD,
Requires<[HasAVX]>;
// AVX2 Instruction Templates:
// Instructions introduced in AVX2 (no SSE equivalent forms)
//
-// AVX28I - AVX2 instructions with T8PD prefix.
-// AVX2AIi8 - AVX2 instructions with TAPD prefix and ImmT = Imm8.
+// AVX28I - AVX2 instructions with T8, PD prefix.
+// AVX2AIi8 - AVX2 instructions with TA, PD prefix and ImmT = Imm8.
class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, PD,
Requires<[HasAVX2]>;
class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD,
Requires<[HasAVX2]>;
// AVX-512 Instruction Templates:
// Instructions introduced in AVX-512 (no SSE equivalent forms)
//
-// AVX5128I - AVX-512 instructions with T8PD prefix.
-// AVX512AIi8 - AVX-512 instructions with TAPD prefix and ImmT = Imm8.
+// AVX5128I - AVX-512 instructions with T8, PD prefix.
+// AVX512AIi8 - AVX-512 instructions with TA, PD prefix and ImmT = Imm8.
// AVX512PDI - AVX-512 instructions with PD, double packed.
// AVX512PSI - AVX-512 instructions with PS, single packed.
// AVX512XS8I - AVX-512 instructions with T8 and XS prefixes.
@@ -796,39 +773,39 @@ class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
class AVX5128I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, PD,
Requires<[HasAVX512]>;
-class AVX5128IBase : T8PD {
+class AVX5128IBase : T8, PD {
Domain ExeDomain = SSEPackedInt;
}
class AVX512XS8I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8XS,
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, XS,
Requires<[HasAVX512]>;
class AVX512XSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XS,
+ : I<o, F, outs, ins, asm, pattern>, TB, XS,
Requires<[HasAVX512]>;
class AVX512XDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, XD,
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, TB, XD,
Requires<[HasAVX512]>;
class AVX512BI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, PD,
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, TB, PD,
Requires<[HasAVX512]>;
-class AVX512BIBase : PD {
+class AVX512BIBase : TB, PD {
Domain ExeDomain = SSEPackedInt;
}
class AVX512BIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, PD,
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TB, PD,
Requires<[HasAVX512]>;
class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD,
Requires<[HasAVX512]>;
-class AVX512AIi8Base : TAPD {
+class AVX512AIi8Base : TA, PD {
ImmType ImmT = Imm8;
}
class AVX512Ii8<bits<8> o, Format F, dag outs, dag ins, string asm,
@@ -837,11 +814,11 @@ class AVX512Ii8<bits<8> o, Format F, dag outs, dag ins, string asm,
Requires<[HasAVX512]>;
class AVX512PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, PD,
Requires<[HasAVX512]>;
class AVX512PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
Requires<[HasAVX512]>;
class AVX512PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, Domain d>
@@ -851,8 +828,8 @@ class AVX512PI<bits<8> o, Format F, dag outs, dag ins, string asm,
: I<o, F, outs, ins, asm, pattern, d>, Requires<[HasAVX512]>;
class AVX512FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
- : I<o, F, outs, ins, asm, pattern>, T8PD,
- EVEX_4V, Requires<[HasAVX512]>;
+ : I<o, F, outs, ins, asm, pattern>, T8, PD,
+ EVEX, VVVV, Requires<[HasAVX512]>;
class AVX512<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
@@ -864,46 +841,46 @@ class AVX512<bits<8> o, Format F, dag outs, dag ins, string asm,
// These use the same encoding as the SSE4.2 T8 and TA encodings.
class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, PD,
Requires<[NoAVX, HasAES]>;
class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD,
Requires<[NoAVX, HasAES]>;
// PCLMUL Instruction Templates
class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD;
// FMA3 Instruction Templates
class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
- : I<o, F, outs, ins, asm, pattern>, T8PD,
- VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoVLX]>;
+ : I<o, F, outs, ins, asm, pattern>, T8, PD,
+ VEX, VVVV, FMASC, Requires<[HasFMA, NoFMA4, NoVLX]>;
class FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
- : I<o, F, outs, ins, asm, pattern>, T8PD,
- VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoAVX512]>;
+ : I<o, F, outs, ins, asm, pattern>, T8, PD,
+ VEX, VVVV, FMASC, Requires<[HasFMA, NoFMA4, NoAVX512]>;
class FMA3S_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
- : I<o, F, outs, ins, asm, pattern>, T8PD,
- VEX_4V, FMASC, Requires<[HasFMA, NoAVX512]>;
+ : I<o, F, outs, ins, asm, pattern>, T8, PD,
+ VEX, VVVV, FMASC, Requires<[HasFMA, NoAVX512]>;
// FMA4 Instruction Templates
class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
- : Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
- VEX_4V, FMASC, Requires<[HasFMA4, NoVLX]>;
+ : Ii8Reg<o, F, outs, ins, asm, pattern>, TA, PD,
+ VEX, VVVV, FMASC, Requires<[HasFMA4, NoVLX]>;
class FMA4S<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
- : Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
- VEX_4V, FMASC, Requires<[HasFMA4, NoAVX512]>;
+ : Ii8Reg<o, F, outs, ins, asm, pattern>, TA, PD,
+ VEX, VVVV, FMASC, Requires<[HasFMA4, NoAVX512]>;
class FMA4S_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
- : Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
- VEX_4V, FMASC, Requires<[HasFMA4]>;
+ : Ii8Reg<o, F, outs, ins, asm, pattern>, TA, PD,
+ VEX, VVVV, FMASC, Requires<[HasFMA4]>;
// XOP 2, 3 and 4 Operand Instruction Template
class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
@@ -925,8 +902,8 @@ class IXOPi8Reg<bits<8> o, Format F, dag outs, dag ins, string asm,
// XOP 5 operand instruction (VEX encoding!)
class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
- : Ii8Reg<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
- VEX_4V, Requires<[HasXOP]>;
+ : Ii8Reg<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD,
+ VEX, VVVV, Requires<[HasXOP]>;
// X86-64 Instruction templates...
//
@@ -959,57 +936,36 @@ class VRS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
// MMX Instruction templates
//
-
// MMXI - MMX instructions with TB prefix.
-// MMXI32 - MMX instructions with TB prefix valid only in 32 bit mode.
-// MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode.
-// MMX2I - MMX / SSE2 instructions with PD prefix.
-// MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix.
+// MMXRI - MMX instructions with TB prefix and REX.W.
// MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix.
-// MMXID - MMX instructions with XD prefix.
-// MMXIS - MMX instructions with XS prefix.
class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX]>;
-class MMXI32<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX,Not64BitMode]>;
-class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX,In64BitMode]>;
+ : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>;
class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, PS, REX_W,
+ : I<o, F, outs, ins, asm, pattern>, TB, REX_W,
Requires<[HasMMX,In64BitMode]>;
-class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, PD, Requires<[HasMMX]>;
class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX]>;
-class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
-class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
+ : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>;
/// ITy - This instruction base class takes the type info for the instruction.
/// Using this, it:
/// 1. Concatenates together the instruction mnemonic with the appropriate
/// suffix letter, a tab, and the arguments.
-/// 2. Infers whether the instruction should have a 0x66 prefix byte.
-/// 3. Infers whether the instruction should have a 0x40 REX_W prefix.
-/// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations)
+/// 2. Infers whether the instruction should have a 0x40 REX_W prefix.
+/// 3. Infers whether the low bit of the opcode should be 0 (for i8 operations)
/// or 1 (for i16,i32,i64 operations).
class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
string mnemonic, string args, list<dag> pattern>
: I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
- opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode },
- f, outs, ins,
+ opcode{3}, opcode{2}, opcode{1},
+ !if(!eq(typeinfo.HasEvenOpcode, 1), 0, opcode{0})}, f, outs, ins,
!strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> {
- // Infer instruction prefixes from type info.
- let OpSize = typeinfo.OpSize;
+ let hasSideEffects = 0;
let hasREX_W = typeinfo.HasREX_W;
}
+
+defvar binop_args = "{$src2, $src1|$src1, $src2}";
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVMX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVMX.td
index 5289819119ce..7cc468fe15ad 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVMX.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVMX.td
@@ -17,33 +17,33 @@
let SchedRW = [WriteSystem] in {
// 66 0F 38 80
def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
- "invept\t{$src2, $src1|$src1, $src2}", []>, T8PD,
+ "invept\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
Requires<[Not64BitMode]>;
def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
- "invept\t{$src2, $src1|$src1, $src2}", []>, T8PD,
+ "invept\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
Requires<[In64BitMode]>;
def INVEPT64_EVEX : I<0xF0, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invept\t{$src2, $src1|$src1, $src2}", []>,
- EVEX_NoCD8, T_MAP4XS, Requires<[In64BitMode]>;
+ EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>;
// 66 0F 38 81
def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
- "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
+ "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
Requires<[Not64BitMode]>;
def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
- "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
+ "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
Requires<[In64BitMode]>;
def INVVPID64_EVEX : I<0xF1, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invvpid\t{$src2, $src1|$src1, $src2}", []>,
- EVEX_NoCD8, T_MAP4XS, Requires<[In64BitMode]>;
+ EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>;
// 0F 01 C1
def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
- "vmclear\t$vmcs", []>, PD;
+ "vmclear\t$vmcs", []>, TB, PD;
// OF 01 D4
-def VMFUNC : I<0x01, MRM_D4, (outs), (ins), "vmfunc", []>, PS;
+def VMFUNC : I<0x01, MRM_D4, (outs), (ins), "vmfunc", []>, TB;
// 0F 01 C2
def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
@@ -51,35 +51,35 @@ def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
// 0F 01 C3
def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
- "vmptrld\t$vmcs", []>, PS;
+ "vmptrld\t$vmcs", []>, TB;
def VMPTRSTm : I<0xC7, MRM7m, (outs), (ins i64mem:$vmcs),
- "vmptrst\t$vmcs", []>, PS;
+ "vmptrst\t$vmcs", []>, TB;
def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
- "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
+ "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>;
def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
- "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
+ "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[Not64BitMode]>;
let mayStore = 1 in {
def VMREAD64mr : I<0x78, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
+ "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>;
def VMREAD32mr : I<0x78, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
+ "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[Not64BitMode]>;
} // mayStore
def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
+ "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>;
def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
+ "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[Not64BitMode]>;
let mayLoad = 1 in {
def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
+ "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>;
def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
+ "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[Not64BitMode]>;
} // mayLoad
// 0F 01 C4
def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
- "vmxon\t$vmxon", []>, XS;
+ "vmxon\t$vmxon", []>, TB, XS;
} // SchedRW
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrXOP.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrXOP.td
index a62bb2e855c9..1504d77bfb86 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrXOP.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrXOP.td
@@ -105,7 +105,7 @@ multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1),
(vt128 (load addr:$src2)))))]>,
- XOP_4V, REX_W, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ XOP, VVVV, REX_W, Sched<[sched.Folded, sched.ReadAfterFold]>;
def mr : IXOP<opc, MRMSrcMem4VOp3, (outs VR128:$dst),
(ins i128mem:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -119,7 +119,7 @@ multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>,
- XOP_4V, REX_W, Sched<[sched]>;
+ XOP, VVVV, REX_W, Sched<[sched]>;
}
let ExeDomain = SSEPackedInt in {
@@ -173,7 +173,7 @@ multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, XOP_4V,
+ (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, XOP, VVVV,
Sched<[sched]>;
def rm : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, VR128:$src3),
@@ -181,7 +181,7 @@ multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(Int VR128:$src1, (load addr:$src2),
- VR128:$src3))]>, XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ VR128:$src3))]>, XOP, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let ExeDomain = SSEPackedInt in {
@@ -252,7 +252,7 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
timm:$cc)))]>,
- XOP_4V, Sched<[sched]>;
+ XOP, VVVV, Sched<[sched]>;
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, u8imm:$cc),
!strconcat("vpcom", Suffix,
@@ -261,7 +261,7 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
(vt128 (OpNode (vt128 VR128:$src1),
(vt128 (load addr:$src2)),
timm:$cc)))]>,
- XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ XOP, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
def : Pat<(OpNode (load addr:$src2),
@@ -288,7 +288,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
(vt128 VR128:$src3))))]>,
- XOP_4V, Sched<[sched]>;
+ XOP, VVVV, Sched<[sched]>;
def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i128mem:$src3),
!strconcat(OpcodeStr,
@@ -296,7 +296,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
(vt128 (load addr:$src3)))))]>,
- XOP_4V, REX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
+ XOP, VVVV, REX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@@ -304,7 +304,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set VR128:$dst,
(v16i8 (OpNode (vt128 VR128:$src1), (vt128 (load addr:$src2)),
(vt128 VR128:$src3))))]>,
- XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold,
+ XOP, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
// 128mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
@@ -316,7 +316,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, REX_W, Sched<[sched]>;
+ []>, XOP, VVVV, REX_W, Sched<[sched]>;
}
let ExeDomain = SSEPackedInt in {
@@ -333,7 +333,7 @@ multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
- (X86andnp RC:$src3, RC:$src2))))]>, XOP_4V,
+ (X86andnp RC:$src3, RC:$src2))))]>, XOP, VVVV,
Sched<[sched]>;
// FIXME: We can't write a pattern for this in tablegen.
let hasSideEffects = 0, mayLoad = 1 in
@@ -342,14 +342,14 @@ multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>,
- XOP_4V, REX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
+ XOP, VVVV, REX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
(X86andnp RC:$src3, (load addr:$src2)))))]>,
- XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold,
+ XOP, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
@@ -361,7 +361,7 @@ multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, REX_W, Sched<[sched]>;
+ []>, XOP, VVVV, REX_W, Sched<[sched]>;
}
let ExeDomain = SSEPackedInt in {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp
index 82667b8cdbdb..c0fa9aa70324 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp
@@ -68,7 +68,8 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
namespace llvm {
std::unique_ptr<ScheduleDAGMutation> createX86MacroFusionDAGMutation() {
- return createBranchMacroFusionDAGMutation(shouldScheduleAdjacent);
+ return createMacroFusionDAGMutation(shouldScheduleAdjacent,
+ /*BranchOnly=*/true);
}
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp
index d63f1ca1695b..07f535685e8f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -69,11 +69,11 @@ X86Subtarget::classifyGlobalReference(const GlobalValue *GV) const {
unsigned char
X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {
+ CodeModel::Model CM = TM.getCodeModel();
// Tagged globals have non-zero upper bits, which makes direct references
- // require a 64-bit immediate. On the small code model this causes relocation
- // errors, so we go through the GOT instead.
- if (AllowTaggedGlobals && TM.getCodeModel() == CodeModel::Small && GV &&
- !isa<Function>(GV))
+ // require a 64-bit immediate. With the small/medium code models this causes
+ // relocation errors, so we go through the GOT instead.
+ if (AllowTaggedGlobals && CM != CodeModel::Large && GV && !isa<Function>(GV))
return X86II::MO_GOTPCREL_NORELAX;
// If we're not PIC, it's not very interesting.
@@ -83,7 +83,6 @@ X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {
if (is64Bit()) {
// 64-bit ELF PIC local references may use GOTOFF relocations.
if (isTargetELF()) {
- CodeModel::Model CM = TM.getCodeModel();
assert(CM != CodeModel::Tiny &&
"Tiny codesize model not supported on X86");
// In the large code model, all text is far from any global data, so we
diff --git a/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParserCommon.cpp b/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
index 10b80cad4347..6d3a59d532fd 100644
--- a/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
+++ b/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
@@ -134,13 +134,13 @@ ARM::EndianKind ARM::parseArchEndian(StringRef Arch) {
}
// Parse a branch protection specification, which has the form
-// standard | none | [bti,pac-ret[+b-key,+leaf]*]
+// standard | none | [bti,pac-ret[+b-key,+leaf,+pc]*]
// Returns true on success, with individual elements of the specification
// returned in `PBP`. Returns false in error, with `Err` containing
// an erroneous part of the spec.
bool ARM::parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP,
StringRef &Err) {
- PBP = {"none", "a_key", false};
+ PBP = {"none", "a_key", false, false};
if (Spec == "none")
return true; // defaults are ok
@@ -166,6 +166,8 @@ bool ARM::parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP,
PBP.Scope = "all";
else if (PACOpt == "b-key")
PBP.Key = "b_key";
+ else if (PACOpt == "pc")
+ PBP.BranchProtectionPAuthLR = true;
else
break;
}
diff --git a/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp b/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp
index e61fcb248fae..11c5000acc07 100644
--- a/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp
+++ b/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp
@@ -1131,37 +1131,59 @@ getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 23:
CPU = "znver1";
*Type = X86::AMDFAM17H;
- if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
+ if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) ||
+ (Model >= 0x60 && Model <= 0x67) || (Model >= 0x68 && Model <= 0x6f) ||
+ (Model >= 0x70 && Model <= 0x7f) || (Model >= 0x84 && Model <= 0x87) ||
+ (Model >= 0x90 && Model <= 0x97) || (Model >= 0x98 && Model <= 0x9f) ||
+ (Model >= 0xa0 && Model <= 0xaf)) {
+ // Family 17h Models 30h-3Fh (Starship) Zen 2
+ // Family 17h Models 47h (Cardinal) Zen 2
+ // Family 17h Models 60h-67h (Renoir) Zen 2
+ // Family 17h Models 68h-6Fh (Lucienne) Zen 2
+ // Family 17h Models 70h-7Fh (Matisse) Zen 2
+ // Family 17h Models 84h-87h (ProjectX) Zen 2
+ // Family 17h Models 90h-97h (VanGogh) Zen 2
+ // Family 17h Models 98h-9Fh (Mero) Zen 2
+ // Family 17h Models A0h-AFh (Mendocino) Zen 2
CPU = "znver2";
*Subtype = X86::AMDFAM17H_ZNVER2;
- break; // 30h-3fh, 71h: Zen2
+ break;
}
- if (Model <= 0x0f) {
+ if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x20 && Model <= 0x2f)) {
+ // Family 17h Models 10h-1Fh (Raven1) Zen
+ // Family 17h Models 10h-1Fh (Picasso) Zen+
+ // Family 17h Models 20h-2Fh (Raven2 x86) Zen
*Subtype = X86::AMDFAM17H_ZNVER1;
- break; // 00h-0Fh: Zen1
+ break;
}
break;
case 25:
CPU = "znver3";
*Type = X86::AMDFAM19H;
- if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x5f)) {
- // Family 19h Models 00h-0Fh - Zen3
- // Family 19h Models 20h-2Fh - Zen3
- // Family 19h Models 30h-3Fh - Zen3
- // Family 19h Models 40h-4Fh - Zen3+
- // Family 19h Models 50h-5Fh - Zen3+
+ if ((Model >= 0x00 && Model <= 0x0f) || (Model >= 0x20 && Model <= 0x2f) ||
+ (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) ||
+ (Model >= 0x50 && Model <= 0x5f)) {
+ // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3
+ // Family 19h Models 20h-2Fh (Vermeer) Zen 3
+ // Family 19h Models 30h-3Fh (Badami) Zen 3
+ // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+
+ // Family 19h Models 50h-5Fh (Cezanne) Zen 3
*Subtype = X86::AMDFAM19H_ZNVER3;
break;
}
- if ((Model >= 0x10 && Model <= 0x1f) ||
- (Model >= 0x60 && Model <= 0x74) ||
- (Model >= 0x78 && Model <= 0x7b) ||
- (Model >= 0xA0 && Model <= 0xAf)) {
+ if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x6f) ||
+ (Model >= 0x70 && Model <= 0x77) || (Model >= 0x78 && Model <= 0x7f) ||
+ (Model >= 0xa0 && Model <= 0xaf)) {
+ // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4
+ // Family 19h Models 60h-6Fh (Raphael) Zen 4
+ // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4
+ // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4
+ // Family 19h Models A0h-AFh (Stones-Dense) Zen 4
CPU = "znver4";
*Subtype = X86::AMDFAM19H_ZNVER4;
break; // "znver4"
}
- break; // family 19h
+ break;
default:
break; // Unknown AMD CPU.
}
diff --git a/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp b/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp
index ac04dab04897..d475650c2d18 100644
--- a/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp
+++ b/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp
@@ -819,8 +819,6 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
}
static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
- if (T.isOSDarwin())
- return Triple::MachO;
switch (T.getArch()) {
case Triple::UnknownArch:
case Triple::aarch64:
@@ -829,12 +827,13 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
case Triple::thumb:
case Triple::x86:
case Triple::x86_64:
- if (T.isOSWindows())
+ switch (T.getOS()) {
+ case Triple::Win32:
+ case Triple::UEFI:
return Triple::COFF;
- else if (T.isUEFI())
- return Triple::COFF;
- return Triple::ELF;
-
+ default:
+ return T.isOSDarwin() ? Triple::MachO : Triple::ELF;
+ }
case Triple::aarch64_be:
case Triple::amdgcn:
case Triple::amdil64:
@@ -887,6 +886,8 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
case Triple::ppc:
if (T.isOSAIX())
return Triple::XCOFF;
+ if (T.isOSDarwin())
+ return Triple::MachO;
return Triple::ELF;
case Triple::systemz:
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp b/contrib/llvm-project/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp
new file mode 100644
index 000000000000..40b57b5e40ea
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/TextAPI/BinaryReader/DylibReader.cpp
@@ -0,0 +1,429 @@
+//===- DylibReader.cpp -------------- TAPI MachO Dylib Reader --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Implements the TAPI Reader for Mach-O dynamic libraries.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TextAPI/DylibReader.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/MachOUniversal.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/TargetParser/Triple.h"
+#include "llvm/TextAPI/RecordsSlice.h"
+#include "llvm/TextAPI/TextAPIError.h"
+#include <iomanip>
+#include <set>
+#include <sstream>
+#include <string>
+#include <tuple>
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::MachO;
+using namespace llvm::MachO::DylibReader;
+
+using TripleVec = std::vector<Triple>;
+static typename TripleVec::iterator emplace(TripleVec &Container, Triple &&T) {
+ auto I = partition_point(Container, [=](const Triple &CT) {
+ return std::forward_as_tuple(CT.getArch(), CT.getOS(),
+ CT.getEnvironment()) <
+ std::forward_as_tuple(T.getArch(), T.getOS(), T.getEnvironment());
+ });
+
+ if (I != Container.end() && *I == T)
+ return I;
+ return Container.emplace(I, T);
+}
+
+static TripleVec constructTriples(MachOObjectFile *Obj,
+ const Architecture ArchT) {
+ auto getOSVersionStr = [](uint32_t V) {
+ PackedVersion OSVersion(V);
+ std::string Vers;
+ raw_string_ostream VStream(Vers);
+ VStream << OSVersion;
+ return VStream.str();
+ };
+ auto getOSVersion = [&](const MachOObjectFile::LoadCommandInfo &cmd) {
+ auto Vers = Obj->getVersionMinLoadCommand(cmd);
+ return getOSVersionStr(Vers.version);
+ };
+
+ TripleVec Triples;
+ bool IsIntel = ArchitectureSet(ArchT).hasX86();
+ auto Arch = getArchitectureName(ArchT);
+
+ for (const auto &cmd : Obj->load_commands()) {
+ std::string OSVersion;
+ switch (cmd.C.cmd) {
+ case MachO::LC_VERSION_MIN_MACOSX:
+ OSVersion = getOSVersion(cmd);
+ emplace(Triples, {Arch, "apple", "macos" + OSVersion});
+ break;
+ case MachO::LC_VERSION_MIN_IPHONEOS:
+ OSVersion = getOSVersion(cmd);
+ if (IsIntel)
+ emplace(Triples, {Arch, "apple", "ios" + OSVersion, "simulator"});
+ else
+ emplace(Triples, {Arch, "apple", "ios" + OSVersion});
+ break;
+ case MachO::LC_VERSION_MIN_TVOS:
+ OSVersion = getOSVersion(cmd);
+ if (IsIntel)
+ emplace(Triples, {Arch, "apple", "tvos" + OSVersion, "simulator"});
+ else
+ emplace(Triples, {Arch, "apple", "tvos" + OSVersion});
+ break;
+ case MachO::LC_VERSION_MIN_WATCHOS:
+ OSVersion = getOSVersion(cmd);
+ if (IsIntel)
+ emplace(Triples, {Arch, "apple", "watchos" + OSVersion, "simulator"});
+ else
+ emplace(Triples, {Arch, "apple", "watchos" + OSVersion});
+ break;
+ case MachO::LC_BUILD_VERSION: {
+ OSVersion = getOSVersionStr(Obj->getBuildVersionLoadCommand(cmd).minos);
+ switch (Obj->getBuildVersionLoadCommand(cmd).platform) {
+ case MachO::PLATFORM_MACOS:
+ emplace(Triples, {Arch, "apple", "macos" + OSVersion});
+ break;
+ case MachO::PLATFORM_IOS:
+ emplace(Triples, {Arch, "apple", "ios" + OSVersion});
+ break;
+ case MachO::PLATFORM_TVOS:
+ emplace(Triples, {Arch, "apple", "tvos" + OSVersion});
+ break;
+ case MachO::PLATFORM_WATCHOS:
+ emplace(Triples, {Arch, "apple", "watchos" + OSVersion});
+ break;
+ case MachO::PLATFORM_BRIDGEOS:
+ emplace(Triples, {Arch, "apple", "bridgeos" + OSVersion});
+ break;
+ case MachO::PLATFORM_MACCATALYST:
+ emplace(Triples, {Arch, "apple", "ios" + OSVersion, "macabi"});
+ break;
+ case MachO::PLATFORM_IOSSIMULATOR:
+ emplace(Triples, {Arch, "apple", "ios" + OSVersion, "simulator"});
+ break;
+ case MachO::PLATFORM_TVOSSIMULATOR:
+ emplace(Triples, {Arch, "apple", "tvos" + OSVersion, "simulator"});
+ break;
+ case MachO::PLATFORM_WATCHOSSIMULATOR:
+ emplace(Triples, {Arch, "apple", "watchos" + OSVersion, "simulator"});
+ break;
+ case MachO::PLATFORM_DRIVERKIT:
+ emplace(Triples, {Arch, "apple", "driverkit" + OSVersion});
+ break;
+ default:
+ break; // Skip any others.
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ // Record unknown platform for older binaries that don't enforce platform
+ // load commands.
+ if (Triples.empty())
+ emplace(Triples, {Arch, "apple", "unknown"});
+
+ return Triples;
+}
+
+static Error readMachOHeader(MachOObjectFile *Obj, RecordsSlice &Slice) {
+ auto H = Obj->getHeader();
+ auto &BA = Slice.getBinaryAttrs();
+
+ switch (H.filetype) {
+ default:
+ llvm_unreachable("unsupported binary type");
+ case MachO::MH_DYLIB:
+ BA.File = FileType::MachO_DynamicLibrary;
+ break;
+ case MachO::MH_DYLIB_STUB:
+ BA.File = FileType::MachO_DynamicLibrary_Stub;
+ break;
+ case MachO::MH_BUNDLE:
+ BA.File = FileType::MachO_Bundle;
+ break;
+ }
+
+ if (H.flags & MachO::MH_TWOLEVEL)
+ BA.TwoLevelNamespace = true;
+ if (H.flags & MachO::MH_APP_EXTENSION_SAFE)
+ BA.AppExtensionSafe = true;
+
+ for (const auto &LCI : Obj->load_commands()) {
+ switch (LCI.C.cmd) {
+ case MachO::LC_ID_DYLIB: {
+ auto DLLC = Obj->getDylibIDLoadCommand(LCI);
+ BA.InstallName = Slice.copyString(LCI.Ptr + DLLC.dylib.name);
+ BA.CurrentVersion = DLLC.dylib.current_version;
+ BA.CompatVersion = DLLC.dylib.compatibility_version;
+ break;
+ }
+ case MachO::LC_REEXPORT_DYLIB: {
+ auto DLLC = Obj->getDylibIDLoadCommand(LCI);
+ BA.RexportedLibraries.emplace_back(
+ Slice.copyString(LCI.Ptr + DLLC.dylib.name));
+ break;
+ }
+ case MachO::LC_SUB_FRAMEWORK: {
+ auto SFC = Obj->getSubFrameworkCommand(LCI);
+ BA.ParentUmbrella = Slice.copyString(LCI.Ptr + SFC.umbrella);
+ break;
+ }
+ case MachO::LC_SUB_CLIENT: {
+ auto SCLC = Obj->getSubClientCommand(LCI);
+ BA.AllowableClients.emplace_back(Slice.copyString(LCI.Ptr + SCLC.client));
+ break;
+ }
+ case MachO::LC_UUID: {
+ auto UUIDLC = Obj->getUuidCommand(LCI);
+ std::stringstream Stream;
+ for (unsigned I = 0; I < 16; ++I) {
+ if (I == 4 || I == 6 || I == 8 || I == 10)
+ Stream << '-';
+ Stream << std::setfill('0') << std::setw(2) << std::uppercase
+ << std::hex << static_cast<int>(UUIDLC.uuid[I]);
+ }
+ BA.UUID = Slice.copyString(Stream.str());
+ break;
+ }
+ case MachO::LC_RPATH: {
+ auto RPLC = Obj->getRpathCommand(LCI);
+ BA.RPaths.emplace_back(Slice.copyString(LCI.Ptr + RPLC.path));
+ break;
+ }
+ case MachO::LC_SEGMENT_SPLIT_INFO: {
+ auto SSILC = Obj->getLinkeditDataLoadCommand(LCI);
+ if (SSILC.datasize == 0)
+ BA.OSLibNotForSharedCache = true;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ for (auto &Sect : Obj->sections()) {
+ auto SectName = Sect.getName();
+ if (!SectName)
+ return SectName.takeError();
+ if (*SectName != "__objc_imageinfo" && *SectName != "__image_info")
+ continue;
+
+ auto Content = Sect.getContents();
+ if (!Content)
+ return Content.takeError();
+
+ if ((Content->size() >= 8) && (Content->front() == 0)) {
+ uint32_t Flags;
+ if (Obj->isLittleEndian()) {
+ auto *p =
+ reinterpret_cast<const support::ulittle32_t *>(Content->data() + 4);
+ Flags = *p;
+ } else {
+ auto *p =
+ reinterpret_cast<const support::ubig32_t *>(Content->data() + 4);
+ Flags = *p;
+ }
+ BA.SwiftABI = (Flags >> 8) & 0xFF;
+ }
+ }
+ return Error::success();
+}
+
+static Error readSymbols(MachOObjectFile *Obj, RecordsSlice &Slice,
+ const ParseOption &Opt) {
+
+ auto parseExport = [](const auto ExportFlags,
+ auto Addr) -> std::tuple<SymbolFlags, RecordLinkage> {
+ SymbolFlags Flags = SymbolFlags::None;
+ switch (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) {
+ case MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR:
+ if (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION)
+ Flags |= SymbolFlags::WeakDefined;
+ break;
+ case MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL:
+ Flags |= SymbolFlags::ThreadLocalValue;
+ break;
+ }
+
+ RecordLinkage Linkage = (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT)
+ ? RecordLinkage::Rexported
+ : RecordLinkage::Exported;
+ return {Flags, Linkage};
+ };
+
+ Error Err = Error::success();
+
+ StringMap<std::pair<SymbolFlags, RecordLinkage>> Exports;
+ // Collect symbols from export trie first. Sometimes, there are more exports
+ // in the trie than in n-list due to stripping. This is common for swift
+ // mangled symbols.
+ for (auto &Sym : Obj->exports(Err)) {
+ auto [Flags, Linkage] = parseExport(Sym.flags(), Sym.address());
+ Slice.addRecord(Sym.name(), Flags, GlobalRecord::Kind::Unknown, Linkage);
+ Exports[Sym.name()] = {Flags, Linkage};
+ }
+
+ for (const auto &Sym : Obj->symbols()) {
+ auto FlagsOrErr = Sym.getFlags();
+ if (!FlagsOrErr)
+ return FlagsOrErr.takeError();
+ auto Flags = *FlagsOrErr;
+
+ auto NameOrErr = Sym.getName();
+ if (!NameOrErr)
+ return NameOrErr.takeError();
+ auto Name = *NameOrErr;
+
+ RecordLinkage Linkage = RecordLinkage::Unknown;
+ SymbolFlags RecordFlags = SymbolFlags::None;
+
+ if (Opt.Undefineds && (Flags & SymbolRef::SF_Undefined)) {
+ Linkage = RecordLinkage::Undefined;
+ if (Flags & SymbolRef::SF_Weak)
+ RecordFlags |= SymbolFlags::WeakReferenced;
+ } else if (Flags & SymbolRef::SF_Exported) {
+ auto Exp = Exports.find(Name);
+ // This should never be possible when binaries are produced with Apple
+ // linkers. However it is possible to craft dylibs where the export trie
+ // is either malformed or has conflicting symbols compared to n_list.
+ if (Exp != Exports.end())
+ std::tie(RecordFlags, Linkage) = Exp->second;
+ else
+ Linkage = RecordLinkage::Exported;
+ } else if (Flags & SymbolRef::SF_Hidden) {
+ Linkage = RecordLinkage::Internal;
+ } else
+ continue;
+
+ auto TypeOrErr = Sym.getType();
+ if (!TypeOrErr)
+ return TypeOrErr.takeError();
+ auto Type = *TypeOrErr;
+
+ GlobalRecord::Kind GV = (Type & SymbolRef::ST_Function)
+ ? GlobalRecord::Kind::Function
+ : GlobalRecord::Kind::Variable;
+
+ if (GV == GlobalRecord::Kind::Function)
+ RecordFlags |= SymbolFlags::Text;
+ else
+ RecordFlags |= SymbolFlags::Data;
+
+ Slice.addRecord(Name, RecordFlags, GV, Linkage);
+ }
+ return Err;
+}
+
+static Error load(MachOObjectFile *Obj, RecordsSlice &Slice,
+ const ParseOption &Opt, const Architecture Arch) {
+ if (Arch == AK_unknown)
+ return make_error<TextAPIError>(TextAPIErrorCode::UnsupportedTarget);
+
+ if (Opt.MachOHeader)
+ if (auto Err = readMachOHeader(Obj, Slice))
+ return Err;
+
+ if (Opt.SymbolTable)
+ if (auto Err = readSymbols(Obj, Slice, Opt))
+ return Err;
+
+ return Error::success();
+}
+
+Expected<Records> DylibReader::readFile(MemoryBufferRef Buffer,
+ const ParseOption &Opt) {
+ Records Results;
+
+ auto BinOrErr = createBinary(Buffer);
+ if (!BinOrErr)
+ return BinOrErr.takeError();
+
+ Binary &Bin = *BinOrErr.get();
+ if (auto *Obj = dyn_cast<MachOObjectFile>(&Bin)) {
+ const auto Arch = getArchitectureFromCpuType(Obj->getHeader().cputype,
+ Obj->getHeader().cpusubtype);
+ if (!Opt.Archs.has(Arch))
+ return make_error<TextAPIError>(TextAPIErrorCode::NoSuchArchitecture);
+
+ auto Triples = constructTriples(Obj, Arch);
+ for (const auto &T : Triples) {
+ if (mapToPlatformType(T) == PLATFORM_UNKNOWN)
+ return make_error<TextAPIError>(TextAPIErrorCode::UnsupportedTarget);
+ Results.emplace_back(std::make_shared<RecordsSlice>(RecordsSlice({T})));
+ if (auto Err = load(Obj, *Results.back(), Opt, Arch))
+ return std::move(Err);
+ Results.back()->getBinaryAttrs().Path = Buffer.getBufferIdentifier();
+ }
+ return Results;
+ }
+
+ // Only expect MachO universal binaries at this point.
+ assert(isa<MachOUniversalBinary>(&Bin) &&
+ "Expected a MachO universal binary.");
+ auto *UB = cast<MachOUniversalBinary>(&Bin);
+
+ for (auto OI = UB->begin_objects(), OE = UB->end_objects(); OI != OE; ++OI) {
+ // Skip architecture if not requested.
+ auto Arch =
+ getArchitectureFromCpuType(OI->getCPUType(), OI->getCPUSubType());
+ if (!Opt.Archs.has(Arch))
+ continue;
+
+ // Skip unknown architectures.
+ if (Arch == AK_unknown)
+ continue;
+
+ // This can fail if the object is an archive.
+ auto ObjOrErr = OI->getAsObjectFile();
+
+ // Skip the archive and consume the error.
+ if (!ObjOrErr) {
+ consumeError(ObjOrErr.takeError());
+ continue;
+ }
+
+ auto &Obj = *ObjOrErr.get();
+ switch (Obj.getHeader().filetype) {
+ default:
+ break;
+ case MachO::MH_BUNDLE:
+ case MachO::MH_DYLIB:
+ case MachO::MH_DYLIB_STUB:
+ for (const auto &T : constructTriples(&Obj, Arch)) {
+ Results.emplace_back(std::make_shared<RecordsSlice>(RecordsSlice({T})));
+ if (auto Err = load(&Obj, *Results.back(), Opt, Arch))
+ return std::move(Err);
+ }
+ break;
+ }
+ }
+
+ if (Results.empty())
+ return make_error<TextAPIError>(TextAPIErrorCode::EmptyResults);
+ return Results;
+}
+
+Expected<std::unique_ptr<InterfaceFile>>
+DylibReader::get(MemoryBufferRef Buffer) {
+ ParseOption Options;
+ auto SlicesOrErr = readFile(Buffer, Options);
+ if (!SlicesOrErr)
+ return SlicesOrErr.takeError();
+
+ return convertToInterfaceFile(*SlicesOrErr);
+}
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/RecordVisitor.cpp b/contrib/llvm-project/llvm/lib/TextAPI/RecordVisitor.cpp
new file mode 100644
index 000000000000..cee04e644755
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/TextAPI/RecordVisitor.cpp
@@ -0,0 +1,65 @@
+//===- RecordVisitor.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Implements the TAPI Record Visitor.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TextAPI/RecordVisitor.h"
+
+using namespace llvm;
+using namespace llvm::MachO;
+
+RecordVisitor::~RecordVisitor() {}
+void RecordVisitor::visitObjCInterface(const ObjCInterfaceRecord &) {}
+void RecordVisitor::visitObjCCategory(const ObjCCategoryRecord &) {}
+
+static bool shouldSkipRecord(const Record &R, const bool RecordUndefs) {
+ if (R.isExported())
+ return false;
+
+ // Skip non exported symbols unless for flat namespace libraries.
+ return !(RecordUndefs && R.isUndefined());
+}
+
+void SymbolConverter::visitGlobal(const GlobalRecord &GR) {
+ auto [SymName, SymKind] = parseSymbol(GR.getName(), GR.getFlags());
+ if (shouldSkipRecord(GR, RecordUndefs))
+ return;
+ Symbols->addGlobal(SymKind, SymName, GR.getFlags(), Targ);
+}
+
+void SymbolConverter::addIVars(const ArrayRef<ObjCIVarRecord *> IVars,
+ StringRef ContainerName) {
+ for (auto *IV : IVars) {
+ if (shouldSkipRecord(*IV, RecordUndefs))
+ continue;
+ std::string Name =
+ ObjCIVarRecord::createScopedName(ContainerName, IV->getName());
+ Symbols->addGlobal(SymbolKind::ObjectiveCInstanceVariable, Name,
+ IV->getFlags(), Targ);
+ }
+}
+
+void SymbolConverter::visitObjCInterface(const ObjCInterfaceRecord &ObjCR) {
+ if (!shouldSkipRecord(ObjCR, RecordUndefs)) {
+ Symbols->addGlobal(SymbolKind::ObjectiveCClass, ObjCR.getName(),
+ ObjCR.getFlags(), Targ);
+ if (ObjCR.hasExceptionAttribute())
+ Symbols->addGlobal(SymbolKind::ObjectiveCClassEHType, ObjCR.getName(),
+ ObjCR.getFlags(), Targ);
+ }
+
+ addIVars(ObjCR.getObjCIVars(), ObjCR.getName());
+ for (const auto *Cat : ObjCR.getObjCCategories())
+ addIVars(Cat->getObjCIVars(), ObjCR.getName());
+}
+
+void SymbolConverter::visitObjCCategory(const ObjCCategoryRecord &Cat) {
+ addIVars(Cat.getObjCIVars(), Cat.getName());
+}
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/RecordsSlice.cpp b/contrib/llvm-project/llvm/lib/TextAPI/RecordsSlice.cpp
index a220b255aea3..7ceffc7c9284 100644
--- a/contrib/llvm-project/llvm/lib/TextAPI/RecordsSlice.cpp
+++ b/contrib/llvm-project/llvm/lib/TextAPI/RecordsSlice.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/TextAPI/RecordsSlice.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/TextAPI/Record.h"
#include "llvm/TextAPI/Symbol.h"
#include <utility>
@@ -142,8 +143,10 @@ GlobalRecord *RecordsSlice::addGlobal(StringRef Name, RecordLinkage Linkage,
if (Result.second)
Result.first->second =
std::make_unique<GlobalRecord>(Name, Linkage, Flags, GV);
- else
+ else {
updateLinkage(Result.first->second.get(), Linkage);
+ updateFlags(Result.first->second.get(), Flags);
+ }
return Result.first->second.get();
}
@@ -164,6 +167,19 @@ ObjCInterfaceRecord *RecordsSlice::addObjCInterface(StringRef Name,
return Result.first->second.get();
}
+SymbolFlags Record::mergeFlags(SymbolFlags Flags, RecordLinkage Linkage) {
+ // Add Linkage properties into Flags.
+ switch (Linkage) {
+ case RecordLinkage::Rexported:
+ Flags |= SymbolFlags::Rexported;
+ return Flags;
+ case RecordLinkage::Undefined:
+ Flags |= SymbolFlags::Undefined;
+ return Flags;
+ default:
+ return Flags;
+ }
+}
bool ObjCInterfaceRecord::addObjCCategory(ObjCCategoryRecord *Record) {
auto Result = Categories.insert({Name, Record});
@@ -188,11 +204,26 @@ ObjCCategoryRecord *RecordsSlice::addObjCCategory(StringRef ClassToExtend,
return Result.first->second.get();
}
+std::vector<ObjCIVarRecord *> ObjCContainerRecord::getObjCIVars() const {
+ std::vector<ObjCIVarRecord *> Records;
+ llvm::for_each(IVars,
+ [&](auto &Record) { Records.push_back(Record.second.get()); });
+ return Records;
+}
+
+std::vector<ObjCCategoryRecord *>
+ObjCInterfaceRecord::getObjCCategories() const {
+ std::vector<ObjCCategoryRecord *> Records;
+ llvm::for_each(Categories,
+ [&](auto &Record) { Records.push_back(Record.second); });
+ return Records;
+}
+
ObjCIVarRecord *ObjCContainerRecord::addObjCIVar(StringRef IVar,
RecordLinkage Linkage) {
auto Result = IVars.insert({IVar, nullptr});
if (Result.second)
- Result.first->second = std::make_unique<ObjCIVarRecord>(Name, Linkage);
+ Result.first->second = std::make_unique<ObjCIVarRecord>(IVar, Linkage);
return Result.first->second.get();
}
@@ -222,3 +253,88 @@ RecordsSlice::BinaryAttrs &RecordsSlice::getBinaryAttrs() {
BA = std::make_unique<BinaryAttrs>();
return *BA;
}
+
+void RecordsSlice::visit(RecordVisitor &V) const {
+ for (auto &G : Globals)
+ V.visitGlobal(*G.second);
+ for (auto &C : Classes)
+ V.visitObjCInterface(*C.second);
+ for (auto &Cat : Categories)
+ V.visitObjCCategory(*Cat.second);
+}
+
+static std::unique_ptr<InterfaceFile>
+createInterfaceFile(const Records &Slices, StringRef InstallName) {
+ // Pickup symbols first.
+ auto Symbols = std::make_unique<SymbolSet>();
+ for (auto &S : Slices) {
+ if (S->empty())
+ continue;
+ auto &BA = S->getBinaryAttrs();
+ if (BA.InstallName != InstallName)
+ continue;
+
+ SymbolConverter Converter(Symbols.get(), S->getTarget(),
+ !BA.TwoLevelNamespace);
+ S->visit(Converter);
+ }
+
+ auto File = std::make_unique<InterfaceFile>(std::move(Symbols));
+ File->setInstallName(InstallName);
+ // Assign other attributes.
+ for (auto &S : Slices) {
+ if (S->empty())
+ continue;
+ auto &BA = S->getBinaryAttrs();
+ if (BA.InstallName != InstallName)
+ continue;
+ const Target &Targ = S->getTarget();
+ File->addTarget(Targ);
+ if (File->getFileType() == FileType::Invalid)
+ File->setFileType(BA.File);
+ if (BA.AppExtensionSafe && !File->isApplicationExtensionSafe())
+ File->setApplicationExtensionSafe();
+ if (BA.TwoLevelNamespace && !File->isTwoLevelNamespace())
+ File->setTwoLevelNamespace();
+ if (BA.OSLibNotForSharedCache && !File->isOSLibNotForSharedCache())
+ File->setOSLibNotForSharedCache();
+ if (File->getCurrentVersion().empty())
+ File->setCurrentVersion(BA.CurrentVersion);
+ if (File->getCompatibilityVersion().empty())
+ File->setCompatibilityVersion(BA.CompatVersion);
+ if (File->getSwiftABIVersion() == 0)
+ File->setSwiftABIVersion(BA.SwiftABI);
+ if (File->getPath().empty())
+ File->setPath(BA.Path);
+ if (!BA.ParentUmbrella.empty())
+ File->addParentUmbrella(Targ, BA.ParentUmbrella);
+ for (const auto &Client : BA.AllowableClients)
+ File->addAllowableClient(Client, Targ);
+ for (const auto &Lib : BA.RexportedLibraries)
+ File->addReexportedLibrary(Lib, Targ);
+ }
+
+ return File;
+}
+
+std::unique_ptr<InterfaceFile>
+llvm::MachO::convertToInterfaceFile(const Records &Slices) {
+ std::unique_ptr<InterfaceFile> File;
+ if (Slices.empty())
+ return File;
+
+ SetVector<StringRef> InstallNames;
+ for (auto &S : Slices) {
+ auto Name = S->getBinaryAttrs().InstallName;
+ if (Name.empty())
+ continue;
+ InstallNames.insert(Name);
+ }
+
+ File = createInterfaceFile(Slices, *InstallNames.begin());
+ for (auto it = std::next(InstallNames.begin()); it != InstallNames.end();
+ ++it)
+ File->addDocument(createInterfaceFile(Slices, *it));
+
+ return File;
+}
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/Utils.cpp b/contrib/llvm-project/llvm/lib/TextAPI/Utils.cpp
new file mode 100644
index 000000000000..6d85083e0b54
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/TextAPI/Utils.cpp
@@ -0,0 +1,40 @@
+//===- Utils.cpp ----------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements utility functions for TextAPI Darwin operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TextAPI/Utils.h"
+
+using namespace llvm;
+using namespace llvm::MachO;
+
+void llvm::MachO::replace_extension(SmallVectorImpl<char> &Path,
+ const Twine &Extension) {
+ StringRef P(Path.begin(), Path.size());
+ auto ParentPath = sys::path::parent_path(P);
+ auto Filename = sys::path::filename(P);
+
+ if (!ParentPath.ends_with(Filename.str() + ".framework")) {
+ sys::path::replace_extension(Path, Extension);
+ return;
+ }
+ // Framework dylibs do not have a file extension, in those cases the new
+ // extension is appended. e.g. given Path: "Foo.framework/Foo" and Extension:
+ // "tbd", the result is "Foo.framework/Foo.tbd".
+ SmallString<8> Storage;
+ StringRef Ext = Extension.toStringRef(Storage);
+
+ // Append '.' if needed.
+ if (!Ext.empty() && Ext[0] != '.')
+ Path.push_back('.');
+
+ // Append extension.
+ Path.append(Ext.begin(), Ext.end());
+}
diff --git a/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
index 781bc9a058e1..834903857a88 100644
--- a/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
+++ b/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
@@ -171,7 +171,7 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
if (!Def) {
llvm::errs() << "error parsing definition\n"
- << errorToErrorCode(Def.takeError()).message();
+ << errorToErrorCode(Def.takeError()).message() << "\n";
return 1;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 8e1f782f7cd8..b2618e35b085 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -291,42 +291,15 @@ static const Value *getPointerOperand(const Instruction *I,
}
/// Helper function to create a pointer based on \p Ptr, and advanced by \p
-/// Offset bytes. To aid later analysis the method tries to build
-/// getelement pointer instructions that traverse the natural type of \p Ptr if
-/// possible. If that fails, the remaining offset is adjusted byte-wise, hence
-/// through a cast to i8*.
-///
-/// TODO: This could probably live somewhere more prominantly if it doesn't
-/// already exist.
-static Value *constructPointer(Type *PtrElemTy, Value *Ptr, int64_t Offset,
- IRBuilder<NoFolder> &IRB, const DataLayout &DL) {
- assert(Offset >= 0 && "Negative offset not supported yet!");
+/// Offset bytes.
+static Value *constructPointer(Value *Ptr, int64_t Offset,
+ IRBuilder<NoFolder> &IRB) {
LLVM_DEBUG(dbgs() << "Construct pointer: " << *Ptr << " + " << Offset
<< "-bytes\n");
- if (Offset) {
- Type *Ty = PtrElemTy;
- APInt IntOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset);
- SmallVector<APInt> IntIndices = DL.getGEPIndicesForOffset(Ty, IntOffset);
-
- SmallVector<Value *, 4> ValIndices;
- std::string GEPName = Ptr->getName().str();
- for (const APInt &Index : IntIndices) {
- ValIndices.push_back(IRB.getInt(Index));
- GEPName += "." + std::to_string(Index.getZExtValue());
- }
-
- // Create a GEP for the indices collected above.
- Ptr = IRB.CreateGEP(PtrElemTy, Ptr, ValIndices, GEPName);
-
- // If an offset is left we use byte-wise adjustment.
- if (IntOffset != 0) {
- Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(IntOffset),
- GEPName + ".b" + Twine(IntOffset.getZExtValue()));
- }
- }
-
- LLVM_DEBUG(dbgs() << "Constructed pointer: " << *Ptr << "\n");
+ if (Offset)
+ Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt64(Offset),
+ Ptr->getName() + ".b" + Twine(Offset));
return Ptr;
}
@@ -7487,16 +7460,15 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType);
for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) {
- Value *Ptr = constructPointer(
- PrivType, &Base, PrivStructLayout->getElementOffset(u), IRB, DL);
+ Value *Ptr =
+ constructPointer(&Base, PrivStructLayout->getElementOffset(u), IRB);
new StoreInst(F.getArg(ArgNo + u), Ptr, &IP);
}
} else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
Type *PointeeTy = PrivArrayType->getElementType();
uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy);
for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
- Value *Ptr =
- constructPointer(PrivType, &Base, u * PointeeTySize, IRB, DL);
+ Value *Ptr = constructPointer(&Base, u * PointeeTySize, IRB);
new StoreInst(F.getArg(ArgNo + u), Ptr, &IP);
}
} else {
@@ -7521,8 +7493,8 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType);
for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) {
Type *PointeeTy = PrivStructType->getElementType(u);
- Value *Ptr = constructPointer(
- PrivType, Base, PrivStructLayout->getElementOffset(u), IRB, DL);
+ Value *Ptr =
+ constructPointer(Base, PrivStructLayout->getElementOffset(u), IRB);
LoadInst *L = new LoadInst(PointeeTy, Ptr, "", IP);
L->setAlignment(Alignment);
ReplacementValues.push_back(L);
@@ -7531,8 +7503,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
Type *PointeeTy = PrivArrayType->getElementType();
uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy);
for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
- Value *Ptr =
- constructPointer(PrivType, Base, u * PointeeTySize, IRB, DL);
+ Value *Ptr = constructPointer(Base, u * PointeeTySize, IRB);
LoadInst *L = new LoadInst(PointeeTy, Ptr, "", IP);
L->setAlignment(Alignment);
ReplacementValues.push_back(L);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index b2665161c090..4176d561363f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -2053,6 +2053,9 @@ private:
LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size()
<< " functions, result: " << Changed << ".\n");
+ if (Changed == ChangeStatus::CHANGED)
+ OMPInfoCache.invalidateAnalyses();
+
return Changed == ChangeStatus::CHANGED;
}
@@ -3763,7 +3766,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
ConstantInt *ExecModeC =
KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC);
ConstantInt *AssumedExecModeC = ConstantInt::get(
- ExecModeC->getType(),
+ ExecModeC->getIntegerType(),
ExecModeC->getSExtValue() | OMP_TGT_EXEC_MODE_GENERIC_SPMD);
if (ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)
SPMDCompatibilityTracker.indicateOptimisticFixpoint();
@@ -3792,7 +3795,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
ConstantInt *MayUseNestedParallelismC =
KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(KernelEnvC);
ConstantInt *AssumedMayUseNestedParallelismC = ConstantInt::get(
- MayUseNestedParallelismC->getType(), NestedParallelism);
+ MayUseNestedParallelismC->getIntegerType(), NestedParallelism);
setMayUseNestedParallelismOfKernelEnvironment(
AssumedMayUseNestedParallelismC);
@@ -3801,7 +3804,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
KernelEnvC);
ConstantInt *AssumedUseGenericStateMachineC =
- ConstantInt::get(UseGenericStateMachineC->getType(), false);
+ ConstantInt::get(UseGenericStateMachineC->getIntegerType(), false);
setUseGenericStateMachineOfKernelEnvironment(
AssumedUseGenericStateMachineC);
}
@@ -4280,8 +4283,9 @@ struct AAKernelInfoFunction : AAKernelInfo {
// kernel is executed in.
assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC &&
"Initially non-SPMD kernel has SPMD exec mode!");
- setExecModeOfKernelEnvironment(ConstantInt::get(
- ExecModeC->getType(), ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));
+ setExecModeOfKernelEnvironment(
+ ConstantInt::get(ExecModeC->getIntegerType(),
+ ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));
++NumOpenMPTargetRegionKernelsSPMD;
@@ -4332,7 +4336,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
// If not SPMD mode, indicate we use a custom state machine now.
setUseGenericStateMachineOfKernelEnvironment(
- ConstantInt::get(UseStateMachineC->getType(), false));
+ ConstantInt::get(UseStateMachineC->getIntegerType(), false));
// If we don't actually need a state machine we are done here. This can
// happen if there simply are no parallel regions. In the resulting kernel
@@ -4658,7 +4662,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(
AA.KernelEnvC);
ConstantInt *NewMayUseNestedParallelismC = ConstantInt::get(
- MayUseNestedParallelismC->getType(), AA.NestedParallelism);
+ MayUseNestedParallelismC->getIntegerType(), AA.NestedParallelism);
AA.setMayUseNestedParallelismOfKernelEnvironment(
NewMayUseNestedParallelismC);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 6c6f0a0eca72..2fd8668d15e2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -794,10 +794,9 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
return R;
auto CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
- auto T = FS->findCallTargetMapAt(CallSite);
Sum = 0;
- if (T)
- for (const auto &T_C : T.get())
+ if (auto T = FS->findCallTargetMapAt(CallSite))
+ for (const auto &T_C : *T)
Sum += T_C.second;
if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) {
if (M->empty())
@@ -1679,7 +1678,8 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
if (!FS)
continue;
auto CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
- auto T = FS->findCallTargetMapAt(CallSite);
+ ErrorOr<SampleRecord::CallTargetMap> T =
+ FS->findCallTargetMapAt(CallSite);
if (!T || T.get().empty())
continue;
if (FunctionSamples::ProfileIsProbeBased) {
@@ -2261,9 +2261,8 @@ void SampleProfileMatcher::countProfileCallsiteMismatches(
// Compute number of samples in the original profile.
uint64_t CallsiteSamples = 0;
- auto CTM = FS.findCallTargetMapAt(Loc);
- if (CTM) {
- for (const auto &I : CTM.get())
+ if (auto CTM = FS.findCallTargetMapAt(Loc)) {
+ for (const auto &I : *CTM)
CallsiteSamples += I.second;
}
const auto *FSMap = FS.findFunctionSamplesMapAt(Loc);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 5e362f4117d0..63b1e0f64a88 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3956,35 +3956,50 @@ Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS,
const APInt *LC, *RC;
if (match(LHS1, m_APInt(LC)) && match(RHS1, m_APInt(RC)) &&
LHS0->getType() == RHS0->getType() &&
- LHS0->getType()->isIntOrIntVectorTy() &&
- (LHS->hasOneUse() || RHS->hasOneUse())) {
+ LHS0->getType()->isIntOrIntVectorTy()) {
// Convert xor of signbit tests to signbit test of xor'd values:
// (X > -1) ^ (Y > -1) --> (X ^ Y) < 0
// (X < 0) ^ (Y < 0) --> (X ^ Y) < 0
// (X > -1) ^ (Y < 0) --> (X ^ Y) > -1
// (X < 0) ^ (Y > -1) --> (X ^ Y) > -1
bool TrueIfSignedL, TrueIfSignedR;
- if (isSignBitCheck(PredL, *LC, TrueIfSignedL) &&
+ if ((LHS->hasOneUse() || RHS->hasOneUse()) &&
+ isSignBitCheck(PredL, *LC, TrueIfSignedL) &&
isSignBitCheck(PredR, *RC, TrueIfSignedR)) {
Value *XorLR = Builder.CreateXor(LHS0, RHS0);
return TrueIfSignedL == TrueIfSignedR ? Builder.CreateIsNeg(XorLR) :
Builder.CreateIsNotNeg(XorLR);
}
- // (X > C) ^ (X < C + 2) --> X != C + 1
- // (X < C + 2) ^ (X > C) --> X != C + 1
- // Considering the correctness of this pattern, we should avoid that C is
- // non-negative and C + 2 is negative, although it will be matched by other
- // patterns.
- const APInt *C1, *C2;
- if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_APInt(C1)) &&
- PredR == CmpInst::ICMP_SLT && match(RHS1, m_APInt(C2))) ||
- (PredL == CmpInst::ICMP_SLT && match(LHS1, m_APInt(C2)) &&
- PredR == CmpInst::ICMP_SGT && match(RHS1, m_APInt(C1))))
- if (LHS0 == RHS0 && *C1 + 2 == *C2 &&
- (C1->isNegative() || C2->isNonNegative()))
- return Builder.CreateICmpNE(LHS0,
- ConstantInt::get(LHS0->getType(), *C1 + 1));
+ // Fold (icmp pred1 X, C1) ^ (icmp pred2 X, C2)
+ // into a single comparison using range-based reasoning.
+ if (LHS0 == RHS0) {
+ ConstantRange CR1 = ConstantRange::makeExactICmpRegion(PredL, *LC);
+ ConstantRange CR2 = ConstantRange::makeExactICmpRegion(PredR, *RC);
+ auto CRUnion = CR1.exactUnionWith(CR2);
+ auto CRIntersect = CR1.exactIntersectWith(CR2);
+ if (CRUnion && CRIntersect)
+ if (auto CR = CRUnion->exactIntersectWith(CRIntersect->inverse())) {
+ if (CR->isFullSet())
+ return ConstantInt::getTrue(I.getType());
+ if (CR->isEmptySet())
+ return ConstantInt::getFalse(I.getType());
+
+ CmpInst::Predicate NewPred;
+ APInt NewC, Offset;
+ CR->getEquivalentICmp(NewPred, NewC, Offset);
+
+ if ((Offset.isZero() && (LHS->hasOneUse() || RHS->hasOneUse())) ||
+ (LHS->hasOneUse() && RHS->hasOneUse())) {
+ Value *NewV = LHS0;
+ Type *Ty = LHS0->getType();
+ if (!Offset.isZero())
+ NewV = Builder.CreateAdd(NewV, ConstantInt::get(Ty, Offset));
+ return Builder.CreateICmp(NewPred, NewV,
+ ConstantInt::get(Ty, NewC));
+ }
+ }
+ }
}
// Instead of trying to imitate the folds for and/or, decompose this 'xor'
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 1539fa9a3269..3b7fe7fa2266 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -357,9 +357,9 @@ Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
// Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
- APInt UndefElts(DemandedElts.getBitWidth(), 0);
- if (Value *V =
- SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts))
+ APInt PoisonElts(DemandedElts.getBitWidth(), 0);
+ if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
+ PoisonElts))
return replaceOperand(II, 0, V);
return nullptr;
@@ -439,12 +439,12 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
// Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
- APInt UndefElts(DemandedElts.getBitWidth(), 0);
- if (Value *V =
- SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts))
+ APInt PoisonElts(DemandedElts.getBitWidth(), 0);
+ if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
+ PoisonElts))
return replaceOperand(II, 0, V);
- if (Value *V =
- SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts, UndefElts))
+ if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts,
+ PoisonElts))
return replaceOperand(II, 1, V);
return nullptr;
@@ -1526,9 +1526,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// support.
if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
auto VWidth = IIFVTy->getNumElements();
- APInt UndefElts(VWidth, 0);
+ APInt PoisonElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
- if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
+ if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, PoisonElts)) {
if (V != II)
return replaceInstUsesWith(*II, V);
return II;
@@ -1539,6 +1539,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Instruction *I = foldCommutativeIntrinsicOverSelects(*II))
return I;
+ if (Instruction *I = foldCommutativeIntrinsicOverPhis(*II))
+ return I;
+
if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
return NewCall;
}
@@ -1793,6 +1796,23 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Instruction *NewMinMax = factorizeMinMaxTree(II))
return NewMinMax;
+ // Try to fold minmax with constant RHS based on range information
+ const APInt *RHSC;
+ if (match(I1, m_APIntAllowUndef(RHSC))) {
+ ICmpInst::Predicate Pred =
+ ICmpInst::getNonStrictPredicate(MinMaxIntrinsic::getPredicate(IID));
+ bool IsSigned = MinMaxIntrinsic::isSigned(IID);
+ ConstantRange LHS_CR = computeConstantRangeIncludingKnownBits(
+ I0, IsSigned, SQ.getWithInstruction(II));
+ if (!LHS_CR.isFullSet()) {
+ if (LHS_CR.icmp(Pred, *RHSC))
+ return replaceInstUsesWith(*II, I0);
+ if (LHS_CR.icmp(ICmpInst::getSwappedPredicate(Pred), *RHSC))
+ return replaceInstUsesWith(*II,
+ ConstantInt::get(II->getType(), *RHSC));
+ }
+ }
+
break;
}
case Intrinsic::bitreverse: {
@@ -4237,3 +4257,22 @@ InstCombinerImpl::foldCommutativeIntrinsicOverSelects(IntrinsicInst &II) {
return nullptr;
}
+
+Instruction *
+InstCombinerImpl::foldCommutativeIntrinsicOverPhis(IntrinsicInst &II) {
+ assert(II.isCommutative() && "Instruction should be commutative");
+
+ PHINode *LHS = dyn_cast<PHINode>(II.getOperand(0));
+ PHINode *RHS = dyn_cast<PHINode>(II.getOperand(1));
+
+ if (!LHS || !RHS)
+ return nullptr;
+
+ if (auto P = matchSymmetricPhiNodesPair(LHS, RHS)) {
+ replaceOperand(II, 0, P->first);
+ replaceOperand(II, 1, P->second);
+ return &II;
+ }
+
+ return nullptr;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 1d50fa9b6bf7..9e76a0cf17b1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -278,6 +278,16 @@ private:
IntrinsicInst &Tramp);
Instruction *foldCommutativeIntrinsicOverSelects(IntrinsicInst &II);
+ // Match a pair of Phi Nodes like
+ // phi [a, BB0], [b, BB1] & phi [b, BB0], [a, BB1]
+ // Return the matched two operands.
+ std::optional<std::pair<Value *, Value *>>
+ matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS);
+
+ // Tries to fold (op phi(a, b) phi(b, a)) -> (op a, b)
+ // while op is a commutative intrinsic call.
+ Instruction *foldCommutativeIntrinsicOverPhis(IntrinsicInst &II);
+
Value *simplifyMaskedLoad(IntrinsicInst &II);
Instruction *simplifyMaskedStore(IntrinsicInst &II);
Instruction *simplifyMaskedGather(IntrinsicInst &II);
@@ -492,6 +502,11 @@ public:
/// X % (C0 * C1)
Value *SimplifyAddWithRemainder(BinaryOperator &I);
+ // Tries to fold (Binop phi(a, b) phi(b, a)) -> (Binop a, b)
+ // while Binop is commutative.
+ Value *SimplifyPhiCommutativeBinaryOp(BinaryOperator &I, Value *LHS,
+ Value *RHS);
+
// Binary Op helper for select operations where the expression can be
// efficiently reorganized.
Value *SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS,
@@ -550,7 +565,7 @@ public:
bool SimplifyDemandedInstructionBits(Instruction &Inst, KnownBits &Known);
Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
- APInt &UndefElts, unsigned Depth = 0,
+ APInt &PoisonElts, unsigned Depth = 0,
bool AllowMultipleUsers = false) override;
/// Canonicalize the position of binops relative to shufflevector.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index e5566578869d..f0ea3d9fcad5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -350,6 +350,13 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
if (match(&I, m_c_Mul(m_OneUse(m_Neg(m_Value(X))), m_Value(Y))))
return BinaryOperator::CreateNeg(Builder.CreateMul(X, Y));
+ // (-X * Y) * -X --> (X * Y) * X
+ // (-X << Y) * -X --> (X << Y) * X
+ if (match(Op1, m_Neg(m_Value(X)))) {
+ if (Value *NegOp0 = Negator::Negate(false, /*IsNSW*/ false, Op0, *this))
+ return BinaryOperator::CreateMul(NegOp0, X);
+ }
+
// (X / Y) * Y = X - (X % Y)
// (X / Y) * -Y = (X % Y) - X
{
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 2dda46986f0f..20bf00344b14 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2440,9 +2440,9 @@ Instruction *InstCombinerImpl::foldVectorSelect(SelectInst &Sel) {
return nullptr;
unsigned NumElts = VecTy->getNumElements();
- APInt UndefElts(NumElts, 0);
+ APInt PoisonElts(NumElts, 0);
APInt AllOnesEltMask(APInt::getAllOnes(NumElts));
- if (Value *V = SimplifyDemandedVectorElts(&Sel, AllOnesEltMask, UndefElts)) {
+ if (Value *V = SimplifyDemandedVectorElts(&Sel, AllOnesEltMask, PoisonElts)) {
if (V != &Sel)
return replaceInstUsesWith(Sel, V);
return &Sel;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 846116a929b1..a8a5f9831e15 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1319,8 +1319,8 @@ Value *InstCombinerImpl::simplifyShrShlDemandedBits(
}
/// The specified value produces a vector with any number of elements.
-/// This method analyzes which elements of the operand are undef or poison and
-/// returns that information in UndefElts.
+/// This method analyzes which elements of the operand are poison and
+/// returns that information in PoisonElts.
///
/// DemandedElts contains the set of elements that are actually used by the
/// caller, and by default (AllowMultipleUsers equals false) the value is
@@ -1333,7 +1333,7 @@ Value *InstCombinerImpl::simplifyShrShlDemandedBits(
/// returned. This returns null if no change was made.
Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
APInt DemandedElts,
- APInt &UndefElts,
+ APInt &PoisonElts,
unsigned Depth,
bool AllowMultipleUsers) {
// Cannot analyze scalable type. The number of vector elements is not a
@@ -1345,18 +1345,18 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
APInt EltMask(APInt::getAllOnes(VWidth));
assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
- if (match(V, m_Undef())) {
- // If the entire vector is undef or poison, just return this info.
- UndefElts = EltMask;
+ if (match(V, m_Poison())) {
+ // If the entire vector is poison, just return this info.
+ PoisonElts = EltMask;
return nullptr;
}
if (DemandedElts.isZero()) { // If nothing is demanded, provide poison.
- UndefElts = EltMask;
+ PoisonElts = EltMask;
return PoisonValue::get(V->getType());
}
- UndefElts = 0;
+ PoisonElts = 0;
if (auto *C = dyn_cast<Constant>(V)) {
// Check if this is identity. If so, return 0 since we are not simplifying
@@ -1370,7 +1370,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
for (unsigned i = 0; i != VWidth; ++i) {
if (!DemandedElts[i]) { // If not demanded, set to poison.
Elts.push_back(Poison);
- UndefElts.setBit(i);
+ PoisonElts.setBit(i);
continue;
}
@@ -1378,8 +1378,8 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
if (!Elt) return nullptr;
Elts.push_back(Elt);
- if (isa<UndefValue>(Elt)) // Already undef or poison.
- UndefElts.setBit(i);
+ if (isa<PoisonValue>(Elt)) // Already poison.
+ PoisonElts.setBit(i);
}
// If we changed the constant, return it.
@@ -1400,7 +1400,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
// They'll be handled when it's their turn to be visited by
// the main instcombine process.
if (Depth != 0)
- // TODO: Just compute the UndefElts information recursively.
+ // TODO: Just compute the PoisonElts information recursively.
return nullptr;
// Conservatively assume that all elements are needed.
@@ -1422,8 +1422,8 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
}
};
- APInt UndefElts2(VWidth, 0);
- APInt UndefElts3(VWidth, 0);
+ APInt PoisonElts2(VWidth, 0);
+ APInt PoisonElts3(VWidth, 0);
switch (I->getOpcode()) {
default: break;
@@ -1449,17 +1449,17 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
if (i == 0 ? match(I->getOperand(i), m_Undef())
: match(I->getOperand(i), m_Poison())) {
// If the entire vector is undefined, just return this info.
- UndefElts = EltMask;
+ PoisonElts = EltMask;
return nullptr;
}
if (I->getOperand(i)->getType()->isVectorTy()) {
- APInt UndefEltsOp(VWidth, 0);
- simplifyAndSetOp(I, i, DemandedElts, UndefEltsOp);
+ APInt PoisonEltsOp(VWidth, 0);
+ simplifyAndSetOp(I, i, DemandedElts, PoisonEltsOp);
// gep(x, undef) is not undef, so skip considering idx ops here
// Note that we could propagate poison, but we can't distinguish between
// undef & poison bits ATM
if (i == 0)
- UndefElts |= UndefEltsOp;
+ PoisonElts |= PoisonEltsOp;
}
}
@@ -1472,7 +1472,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
if (!Idx) {
// Note that we can't propagate undef elt info, because we don't know
// which elt is getting updated.
- simplifyAndSetOp(I, 0, DemandedElts, UndefElts2);
+ simplifyAndSetOp(I, 0, DemandedElts, PoisonElts2);
break;
}
@@ -1487,7 +1487,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
// was extracted from the same index in another vector with the same type,
// replace this insert with that other vector.
// Note: This is attempted before the call to simplifyAndSetOp because that
- // may change UndefElts to a value that does not match with Vec.
+ // may change PoisonElts to a value that does not match with Vec.
Value *Vec;
if (PreInsertDemandedElts == 0 &&
match(I->getOperand(1),
@@ -1496,7 +1496,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
return Vec;
}
- simplifyAndSetOp(I, 0, PreInsertDemandedElts, UndefElts);
+ simplifyAndSetOp(I, 0, PreInsertDemandedElts, PoisonElts);
// If this is inserting an element that isn't demanded, remove this
// insertelement.
@@ -1506,7 +1506,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
}
// The inserted element is defined.
- UndefElts.clearBit(IdxNo);
+ PoisonElts.clearBit(IdxNo);
break;
}
case Instruction::ShuffleVector: {
@@ -1520,17 +1520,17 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
// operand.
if (all_of(Shuffle->getShuffleMask(), [](int Elt) { return Elt == 0; }) &&
DemandedElts.isAllOnes()) {
- if (!match(I->getOperand(1), m_Undef())) {
+ if (!isa<PoisonValue>(I->getOperand(1))) {
I->setOperand(1, PoisonValue::get(I->getOperand(1)->getType()));
MadeChange = true;
}
APInt LeftDemanded(OpWidth, 1);
- APInt LHSUndefElts(OpWidth, 0);
- simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts);
- if (LHSUndefElts[0])
- UndefElts = EltMask;
+ APInt LHSPoisonElts(OpWidth, 0);
+ simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts);
+ if (LHSPoisonElts[0])
+ PoisonElts = EltMask;
else
- UndefElts.clearAllBits();
+ PoisonElts.clearAllBits();
break;
}
@@ -1549,11 +1549,11 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
}
}
- APInt LHSUndefElts(OpWidth, 0);
- simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts);
+ APInt LHSPoisonElts(OpWidth, 0);
+ simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts);
- APInt RHSUndefElts(OpWidth, 0);
- simplifyAndSetOp(I, 1, RightDemanded, RHSUndefElts);
+ APInt RHSPoisonElts(OpWidth, 0);
+ simplifyAndSetOp(I, 1, RightDemanded, RHSPoisonElts);
// If this shuffle does not change the vector length and the elements
// demanded by this shuffle are an identity mask, then this shuffle is
@@ -1579,7 +1579,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
return Shuffle->getOperand(0);
}
- bool NewUndefElts = false;
+ bool NewPoisonElts = false;
unsigned LHSIdx = -1u, LHSValIdx = -1u;
unsigned RHSIdx = -1u, RHSValIdx = -1u;
bool LHSUniform = true;
@@ -1587,23 +1587,23 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
for (unsigned i = 0; i < VWidth; i++) {
unsigned MaskVal = Shuffle->getMaskValue(i);
if (MaskVal == -1u) {
- UndefElts.setBit(i);
+ PoisonElts.setBit(i);
} else if (!DemandedElts[i]) {
- NewUndefElts = true;
- UndefElts.setBit(i);
+ NewPoisonElts = true;
+ PoisonElts.setBit(i);
} else if (MaskVal < OpWidth) {
- if (LHSUndefElts[MaskVal]) {
- NewUndefElts = true;
- UndefElts.setBit(i);
+ if (LHSPoisonElts[MaskVal]) {
+ NewPoisonElts = true;
+ PoisonElts.setBit(i);
} else {
LHSIdx = LHSIdx == -1u ? i : OpWidth;
LHSValIdx = LHSValIdx == -1u ? MaskVal : OpWidth;
LHSUniform = LHSUniform && (MaskVal == i);
}
} else {
- if (RHSUndefElts[MaskVal - OpWidth]) {
- NewUndefElts = true;
- UndefElts.setBit(i);
+ if (RHSPoisonElts[MaskVal - OpWidth]) {
+ NewPoisonElts = true;
+ PoisonElts.setBit(i);
} else {
RHSIdx = RHSIdx == -1u ? i : OpWidth;
RHSValIdx = RHSValIdx == -1u ? MaskVal - OpWidth : OpWidth;
@@ -1646,11 +1646,11 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
return New;
}
}
- if (NewUndefElts) {
+ if (NewPoisonElts) {
// Add additional discovered undefs.
SmallVector<int, 16> Elts;
for (unsigned i = 0; i < VWidth; ++i) {
- if (UndefElts[i])
+ if (PoisonElts[i])
Elts.push_back(PoisonMaskElem);
else
Elts.push_back(Shuffle->getMaskValue(i));
@@ -1665,12 +1665,12 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
// on the current demanded elements.
SelectInst *Sel = cast<SelectInst>(I);
if (Sel->getCondition()->getType()->isVectorTy()) {
- // TODO: We are not doing anything with UndefElts based on this call.
+ // TODO: We are not doing anything with PoisonElts based on this call.
// It is overwritten below based on the other select operands. If an
// element of the select condition is known undef, then we are free to
// choose the output value from either arm of the select. If we know that
// one of those values is undef, then the output can be undef.
- simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
+ simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);
}
// Next, see if we can transform the arms of the select.
@@ -1692,12 +1692,12 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
}
}
- simplifyAndSetOp(I, 1, DemandedLHS, UndefElts2);
- simplifyAndSetOp(I, 2, DemandedRHS, UndefElts3);
+ simplifyAndSetOp(I, 1, DemandedLHS, PoisonElts2);
+ simplifyAndSetOp(I, 2, DemandedRHS, PoisonElts3);
// Output elements are undefined if the element from each arm is undefined.
// TODO: This can be improved. See comment in select condition handling.
- UndefElts = UndefElts2 & UndefElts3;
+ PoisonElts = PoisonElts2 & PoisonElts3;
break;
}
case Instruction::BitCast: {
@@ -1706,7 +1706,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
if (!VTy) break;
unsigned InVWidth = cast<FixedVectorType>(VTy)->getNumElements();
APInt InputDemandedElts(InVWidth, 0);
- UndefElts2 = APInt(InVWidth, 0);
+ PoisonElts2 = APInt(InVWidth, 0);
unsigned Ratio;
if (VWidth == InVWidth) {
@@ -1735,25 +1735,25 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
break;
}
- simplifyAndSetOp(I, 0, InputDemandedElts, UndefElts2);
+ simplifyAndSetOp(I, 0, InputDemandedElts, PoisonElts2);
if (VWidth == InVWidth) {
- UndefElts = UndefElts2;
+ PoisonElts = PoisonElts2;
} else if ((VWidth % InVWidth) == 0) {
// If the number of elements in the output is a multiple of the number of
// elements in the input then an output element is undef if the
// corresponding input element is undef.
for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
- if (UndefElts2[OutIdx / Ratio])
- UndefElts.setBit(OutIdx);
+ if (PoisonElts2[OutIdx / Ratio])
+ PoisonElts.setBit(OutIdx);
} else if ((InVWidth % VWidth) == 0) {
// If the number of elements in the input is a multiple of the number of
// elements in the output then an output element is undef if all of the
// corresponding input elements are undef.
for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
- APInt SubUndef = UndefElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio);
+ APInt SubUndef = PoisonElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio);
if (SubUndef.popcount() == Ratio)
- UndefElts.setBit(OutIdx);
+ PoisonElts.setBit(OutIdx);
}
} else {
llvm_unreachable("Unimp");
@@ -1762,7 +1762,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
}
case Instruction::FPTrunc:
case Instruction::FPExt:
- simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
+ simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);
break;
case Instruction::Call: {
@@ -1785,18 +1785,18 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
DemandedPassThrough.clearBit(i);
}
if (II->getIntrinsicID() == Intrinsic::masked_gather)
- simplifyAndSetOp(II, 0, DemandedPtrs, UndefElts2);
- simplifyAndSetOp(II, 3, DemandedPassThrough, UndefElts3);
+ simplifyAndSetOp(II, 0, DemandedPtrs, PoisonElts2);
+ simplifyAndSetOp(II, 3, DemandedPassThrough, PoisonElts3);
// Output elements are undefined if the element from both sources are.
// TODO: can strengthen via mask as well.
- UndefElts = UndefElts2 & UndefElts3;
+ PoisonElts = PoisonElts2 & PoisonElts3;
break;
}
default: {
// Handle target specific intrinsics
std::optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic(
- *II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
+ *II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3,
simplifyAndSetOp);
if (V)
return *V;
@@ -1859,18 +1859,18 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
return ShufBO;
}
- simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
- simplifyAndSetOp(I, 1, DemandedElts, UndefElts2);
+ simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);
+ simplifyAndSetOp(I, 1, DemandedElts, PoisonElts2);
// Output elements are undefined if both are undefined. Consider things
// like undef & 0. The result is known zero, not undef.
- UndefElts &= UndefElts2;
+ PoisonElts &= PoisonElts2;
}
- // If we've proven all of the lanes undef, return an undef value.
+ // If we've proven all of the lanes poison, return a poison value.
// TODO: Intersect w/demanded lanes
- if (UndefElts.isAllOnes())
- return UndefValue::get(I->getType());
+ if (PoisonElts.isAllOnes())
+ return PoisonValue::get(I->getType());
return MadeChange ? I : nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index c8b58c51d4e6..18ab510aae7f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -388,7 +388,7 @@ static APInt findDemandedEltsByAllUsers(Value *V) {
/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
/// matter, we just want a consistent type to simplify CSE.
static ConstantInt *getPreferredVectorIndex(ConstantInt *IndexC) {
- const unsigned IndexBW = IndexC->getType()->getBitWidth();
+ const unsigned IndexBW = IndexC->getBitWidth();
if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
return nullptr;
return ConstantInt::get(IndexC->getContext(),
@@ -581,20 +581,20 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
// If the input vector has a single use, simplify it based on this use
// property.
if (SrcVec->hasOneUse()) {
- APInt UndefElts(NumElts, 0);
+ APInt PoisonElts(NumElts, 0);
APInt DemandedElts(NumElts, 0);
DemandedElts.setBit(IndexC->getZExtValue());
if (Value *V =
- SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts))
+ SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts))
return replaceOperand(EI, 0, V);
} else {
// If the input vector has multiple uses, simplify it based on a union
// of all elements used.
APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
if (!DemandedElts.isAllOnes()) {
- APInt UndefElts(NumElts, 0);
+ APInt PoisonElts(NumElts, 0);
if (Value *V = SimplifyDemandedVectorElts(
- SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
+ SrcVec, DemandedElts, PoisonElts, 0 /* Depth */,
true /* AllowMultipleUsers */)) {
if (V != SrcVec) {
Worklist.addValue(SrcVec);
@@ -777,10 +777,10 @@ static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,
assert(V->getType()->isVectorTy() && "Invalid shuffle!");
unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
- if (match(V, m_Undef())) {
+ if (match(V, m_Poison())) {
Mask.assign(NumElts, -1);
return std::make_pair(
- PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
+ PermittedRHS ? PoisonValue::get(PermittedRHS->getType()) : V, nullptr);
}
if (isa<ConstantAggregateZero>(V)) {
@@ -1633,7 +1633,8 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
// bitcast (inselt undef, ScalarSrc, IdxOp)
Type *ScalarTy = ScalarSrc->getType();
Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
- UndefValue *NewUndef = UndefValue::get(VecTy);
+ Constant *NewUndef = isa<PoisonValue>(VecOp) ? PoisonValue::get(VecTy)
+ : UndefValue::get(VecTy);
Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
return new BitCastInst(NewInsElt, IE.getType());
}
@@ -1713,9 +1714,10 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
unsigned VWidth = VecTy->getNumElements();
- APInt UndefElts(VWidth, 0);
+ APInt PoisonElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
- if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
+ if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask,
+ PoisonElts)) {
if (V != &IE)
return replaceInstUsesWith(IE, V);
return &IE;
@@ -1918,6 +1920,10 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask,
assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
Type *EltTy = V->getType()->getScalarType();
+
+ if (isa<PoisonValue>(V))
+ return PoisonValue::get(FixedVectorType::get(EltTy, Mask.size()));
+
if (match(V, m_Undef()))
return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
@@ -2639,7 +2645,7 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
// Index is updated to the potentially translated insertion lane.
- IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
+ IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);
return true;
};
@@ -2769,6 +2775,11 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (Instruction *I = simplifyBinOpSplats(SVI))
return I;
+ // Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
+ // order to support scalable vectors.
+ if (match(SVI.getShuffleMask(), m_ZeroMask()) && !isa<PoisonValue>(RHS))
+ return replaceOperand(SVI, 1, PoisonValue::get(RHS->getType()));
+
if (isa<ScalableVectorType>(LHS->getType()))
return nullptr;
@@ -2855,9 +2866,9 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (Instruction *I = foldCastShuffle(SVI, Builder))
return I;
- APInt UndefElts(VWidth, 0);
+ APInt PoisonElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
- if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+ if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) {
if (V != &SVI)
return replaceInstUsesWith(SVI, V);
return &SVI;
@@ -3012,10 +3023,11 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
if (LHSShuffle)
- if (!match(LHSShuffle->getOperand(1), m_Undef()) && !match(RHS, m_Undef()))
+ if (!match(LHSShuffle->getOperand(1), m_Poison()) &&
+ !match(RHS, m_Poison()))
LHSShuffle = nullptr;
if (RHSShuffle)
- if (!match(RHSShuffle->getOperand(1), m_Undef()))
+ if (!match(RHSShuffle->getOperand(1), m_Poison()))
RHSShuffle = nullptr;
if (!LHSShuffle && !RHSShuffle)
return MadeChange ? &SVI : nullptr;
@@ -3038,7 +3050,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value* newRHS = RHS;
if (LHSShuffle) {
// case 1
- if (match(RHS, m_Undef())) {
+ if (match(RHS, m_Poison())) {
newLHS = LHSOp0;
newRHS = LHSOp1;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index a7ddadc25de4..7f5a7b666903 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -173,14 +173,14 @@ std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
}
std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
- IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2,
- APInt &UndefElts3,
+ IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
+ APInt &PoisonElts2, APInt &PoisonElts3,
std::function<void(Instruction *, unsigned, APInt, APInt &)>
SimplifyAndSetOp) {
// Handle target specific intrinsics
if (II.getCalledFunction()->isTargetIntrinsic()) {
return TTI.simplifyDemandedVectorEltsIntrinsic(
- *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
+ *this, II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3,
SimplifyAndSetOp);
}
return std::nullopt;
@@ -1096,6 +1096,54 @@ Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) {
return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
}
+std::optional<std::pair<Value *, Value *>>
+InstCombinerImpl::matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS) {
+ if (LHS->getParent() != RHS->getParent())
+ return std::nullopt;
+
+ if (LHS->getNumIncomingValues() < 2)
+ return std::nullopt;
+
+ if (!equal(LHS->blocks(), RHS->blocks()))
+ return std::nullopt;
+
+ Value *L0 = LHS->getIncomingValue(0);
+ Value *R0 = RHS->getIncomingValue(0);
+
+ for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) {
+ Value *L1 = LHS->getIncomingValue(I);
+ Value *R1 = RHS->getIncomingValue(I);
+
+ if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1))
+ continue;
+
+ return std::nullopt;
+ }
+
+ return std::optional(std::pair(L0, R0));
+}
+
+Value *InstCombinerImpl::SimplifyPhiCommutativeBinaryOp(BinaryOperator &I,
+ Value *Op0,
+ Value *Op1) {
+ assert(I.isCommutative() && "Instruction should be commutative");
+
+ PHINode *LHS = dyn_cast<PHINode>(Op0);
+ PHINode *RHS = dyn_cast<PHINode>(Op1);
+
+ if (!LHS || !RHS)
+ return nullptr;
+
+ if (auto P = matchSymmetricPhiNodesPair(LHS, RHS)) {
+ Value *BI = Builder.CreateBinOp(I.getOpcode(), P->first, P->second);
+ if (auto *BO = dyn_cast<BinaryOperator>(BI))
+ BO->copyIRFlags(&I);
+ return BI;
+ }
+
+ return nullptr;
+}
+
Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
Value *LHS,
Value *RHS) {
@@ -1529,6 +1577,11 @@ Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
BO.getParent() != Phi1->getParent())
return nullptr;
+ if (BO.isCommutative()) {
+ if (Value *V = SimplifyPhiCommutativeBinaryOp(BO, Phi0, Phi1))
+ return replaceInstUsesWith(BO, V);
+ }
+
// Fold if there is at least one specific constant value in phi0 or phi1's
// incoming values that comes from the same block and this specific constant
// value can be used to do optimization for specific binary operator.
@@ -1728,8 +1781,8 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
// If both arguments of the binary operation are shuffles that use the same
// mask and shuffle within a single vector, move the shuffle after the binop.
- if (match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))) &&
- match(RHS, m_Shuffle(m_Value(V2), m_Undef(), m_SpecificMask(Mask))) &&
+ if (match(LHS, m_Shuffle(m_Value(V1), m_Poison(), m_Mask(Mask))) &&
+ match(RHS, m_Shuffle(m_Value(V2), m_Poison(), m_SpecificMask(Mask))) &&
V1->getType() == V2->getType() &&
(LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
// Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
@@ -1770,9 +1823,9 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
Constant *C;
auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
if (InstVTy &&
- match(&Inst,
- m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))),
- m_ImmConstant(C))) &&
+ match(&Inst, m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Poison(),
+ m_Mask(Mask))),
+ m_ImmConstant(C))) &&
cast<FixedVectorType>(V1->getType())->getNumElements() <=
InstVTy->getNumElements()) {
assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
@@ -1787,8 +1840,8 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
ArrayRef<int> ShMask = Mask;
unsigned SrcVecNumElts =
cast<FixedVectorType>(V1->getType())->getNumElements();
- UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType());
- SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar);
+ PoisonValue *PoisonScalar = PoisonValue::get(C->getType()->getScalarType());
+ SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, PoisonScalar);
bool MayChange = true;
unsigned NumElts = InstVTy->getNumElements();
for (unsigned I = 0; I < NumElts; ++I) {
@@ -1801,29 +1854,29 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
// 2. The shuffle needs an element of the constant vector that can't
// be mapped to a new constant vector.
// 3. This is a widening shuffle that copies elements of V1 into the
- // extended elements (extending with undef is allowed).
- if (!CElt || (!isa<UndefValue>(NewCElt) && NewCElt != CElt) ||
+ // extended elements (extending with poison is allowed).
+ if (!CElt || (!isa<PoisonValue>(NewCElt) && NewCElt != CElt) ||
I >= SrcVecNumElts) {
MayChange = false;
break;
}
NewVecC[ShMask[I]] = CElt;
}
- // If this is a widening shuffle, we must be able to extend with undef
- // elements. If the original binop does not produce an undef in the high
+ // If this is a widening shuffle, we must be able to extend with poison
+ // elements. If the original binop does not produce a poison in the high
// lanes, then this transform is not safe.
- // Similarly for undef lanes due to the shuffle mask, we can only
- // transform binops that preserve undef.
- // TODO: We could shuffle those non-undef constant values into the
- // result by using a constant vector (rather than an undef vector)
+ // Similarly for poison lanes due to the shuffle mask, we can only
+ // transform binops that preserve poison.
+ // TODO: We could shuffle those non-poison constant values into the
+ // result by using a constant vector (rather than an poison vector)
// as operand 1 of the new binop, but that might be too aggressive
// for target-independent shuffle creation.
if (I >= SrcVecNumElts || ShMask[I] < 0) {
- Constant *MaybeUndef =
+ Constant *MaybePoison =
ConstOp1
- ? ConstantFoldBinaryOpOperands(Opcode, UndefScalar, CElt, DL)
- : ConstantFoldBinaryOpOperands(Opcode, CElt, UndefScalar, DL);
- if (!MaybeUndef || !match(MaybeUndef, m_Undef())) {
+ ? ConstantFoldBinaryOpOperands(Opcode, PoisonScalar, CElt, DL)
+ : ConstantFoldBinaryOpOperands(Opcode, CElt, PoisonScalar, DL);
+ if (!MaybePoison || !isa<PoisonValue>(MaybePoison)) {
MayChange = false;
break;
}
@@ -1831,9 +1884,10 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
}
if (MayChange) {
Constant *NewC = ConstantVector::get(NewVecC);
- // It may not be safe to execute a binop on a vector with undef elements
+ // It may not be safe to execute a binop on a vector with poison elements
// because the entire instruction can be folded to undef or create poison
// that did not exist in the original code.
+ // TODO: The shift case should not be necessary.
if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
@@ -2241,10 +2295,10 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// compile-time.
if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
auto VWidth = GEPFVTy->getNumElements();
- APInt UndefElts(VWidth, 0);
+ APInt PoisonElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
- UndefElts)) {
+ PoisonElts)) {
if (V != &GEP)
return replaceInstUsesWith(GEP, V);
return &GEP;
@@ -2462,7 +2516,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Idx2);
}
ConstantInt *C;
- if (match(GEP.getOperand(1), m_OneUse(m_SExt(m_OneUse(m_NSWAdd(
+ if (match(GEP.getOperand(1), m_OneUse(m_SExtLike(m_OneUse(m_NSWAdd(
m_Value(Idx1), m_ConstantInt(C))))))) {
// %add = add nsw i32 %idx1, idx2
// %sidx = sext i32 %add to i64
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 6468d07b4f4f..afb0e6cd1548 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -2737,7 +2737,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
// the shadow memory.
// We cannot just ignore these methods, because they may call other
// instrumented functions.
- if (F.getName().find(" load]") != std::string::npos) {
+ if (F.getName().contains(" load]")) {
FunctionCallee AsanInitFunction =
declareSanitizerInitFunction(*F.getParent(), kAsanInitName, {});
IRBuilder<> IRB(&F.front(), F.front().begin());
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 539b7441d24b..2236e9cd44c5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -535,7 +535,7 @@ bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) {
// the shadow memory.
// We cannot just ignore these methods, because they may call other
// instrumented functions.
- if (F.getName().find(" load]") != std::string::npos) {
+ if (F.getName().contains(" load]")) {
FunctionCallee MemProfInitFunction =
declareSanitizerInitFunction(*F.getParent(), MemProfInitName, {});
IRBuilder<> IRB(&F.front(), F.front().begin());
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index fe672a4377a1..ce570bdfd8b8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -603,7 +603,7 @@ void ModuleSanitizerCoverage::instrumentFunction(
Function &F, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) {
if (F.empty())
return;
- if (F.getName().find(".module_ctor") != std::string::npos)
+ if (F.getName().contains(".module_ctor"))
return; // Should not instrument sanitizer init functions.
if (F.getName().starts_with("__sanitizer_"))
return; // Don't instrument __sanitizer_* callbacks.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 1fb9d7fff32f..9e40d94dd73c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -674,8 +674,7 @@ void ConstantHoistingPass::findBaseConstants(GlobalVariable *BaseGV) {
llvm::stable_sort(ConstCandVec, [](const ConstantCandidate &LHS,
const ConstantCandidate &RHS) {
if (LHS.ConstInt->getType() != RHS.ConstInt->getType())
- return LHS.ConstInt->getType()->getBitWidth() <
- RHS.ConstInt->getType()->getBitWidth();
+ return LHS.ConstInt->getBitWidth() < RHS.ConstInt->getBitWidth();
return LHS.ConstInt->getValue().ult(RHS.ConstInt->getValue());
});
@@ -890,7 +889,7 @@ bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) {
Type *Ty = ConstInfo.BaseExpr->getType();
Base = new BitCastInst(ConstInfo.BaseExpr, Ty, "const", IP);
} else {
- IntegerType *Ty = ConstInfo.BaseInt->getType();
+ IntegerType *Ty = ConstInfo.BaseInt->getIntegerType();
Base = new BitCastInst(ConstInfo.BaseInt, Ty, "const", IP);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 18266ba07898..899d7e0a11e6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -273,7 +273,16 @@ class ConstraintInfo {
public:
ConstraintInfo(const DataLayout &DL, ArrayRef<Value *> FunctionArgs)
- : UnsignedCS(FunctionArgs), SignedCS(FunctionArgs), DL(DL) {}
+ : UnsignedCS(FunctionArgs), SignedCS(FunctionArgs), DL(DL) {
+ auto &Value2Index = getValue2Index(false);
+ // Add Arg > -1 constraints to unsigned system for all function arguments.
+ for (Value *Arg : FunctionArgs) {
+ ConstraintTy VarPos(SmallVector<int64_t, 8>(Value2Index.size() + 1, 0),
+ false, false, false);
+ VarPos.Coefficients[Value2Index[Arg]] = -1;
+ UnsignedCS.addVariableRow(VarPos.Coefficients);
+ }
+ }
DenseMap<Value *, unsigned> &getValue2Index(bool Signed) {
return Signed ? SignedCS.getValue2Index() : UnsignedCS.getValue2Index();
@@ -1365,18 +1374,34 @@ removeEntryFromStack(const StackEntry &E, ConstraintInfo &Info,
ReproducerCondStack.pop_back();
}
-/// Check if the first condition for an AND implies the second.
-static bool checkAndSecondOpImpliedByFirst(
+/// Check if either the first condition of an AND or OR is implied by the
+/// (negated in case of OR) second condition or vice versa.
+static bool checkOrAndOpImpliedByOther(
FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule,
SmallVectorImpl<ReproducerEntry> &ReproducerCondStack,
SmallVectorImpl<StackEntry> &DFSInStack) {
CmpInst::Predicate Pred;
Value *A, *B;
- Instruction *And = CB.getContextInst();
- if (!match(And->getOperand(0), m_ICmp(Pred, m_Value(A), m_Value(B))))
+ Instruction *JoinOp = CB.getContextInst();
+ CmpInst *CmpToCheck = cast<CmpInst>(CB.getInstructionToSimplify());
+ unsigned OtherOpIdx = JoinOp->getOperand(0) == CmpToCheck ? 1 : 0;
+
+ // Don't try to simplify the first condition of a select by the second, as
+ // this may make the select more poisonous than the original one.
+ // TODO: check if the first operand may be poison.
+ if (OtherOpIdx != 0 && isa<SelectInst>(JoinOp))
return false;
+ if (!match(JoinOp->getOperand(OtherOpIdx),
+ m_ICmp(Pred, m_Value(A), m_Value(B))))
+ return false;
+
+ // For OR, check if the negated condition implies CmpToCheck.
+ bool IsOr = match(JoinOp, m_LogicalOr());
+ if (IsOr)
+ Pred = CmpInst::getInversePredicate(Pred);
+
// Optimistically add fact from first condition.
unsigned OldSize = DFSInStack.size();
Info.addFact(Pred, A, B, CB.NumIn, CB.NumOut, DFSInStack);
@@ -1385,11 +1410,19 @@ static bool checkAndSecondOpImpliedByFirst(
bool Changed = false;
// Check if the second condition can be simplified now.
- ICmpInst *Cmp = cast<ICmpInst>(And->getOperand(1));
- if (auto ImpliedCondition = checkCondition(
- Cmp->getPredicate(), Cmp->getOperand(0), Cmp->getOperand(1), Cmp,
- Info, CB.NumIn, CB.NumOut, CB.getContextInst())) {
- And->setOperand(1, ConstantInt::getBool(And->getType(), *ImpliedCondition));
+ if (auto ImpliedCondition =
+ checkCondition(CmpToCheck->getPredicate(), CmpToCheck->getOperand(0),
+ CmpToCheck->getOperand(1), CmpToCheck, Info, CB.NumIn,
+ CB.NumOut, CB.getContextInst())) {
+ if (IsOr && isa<SelectInst>(JoinOp)) {
+ JoinOp->setOperand(
+ OtherOpIdx == 0 ? 2 : 0,
+ ConstantInt::getBool(JoinOp->getType(), *ImpliedCondition));
+ } else
+ JoinOp->setOperand(
+ 1 - OtherOpIdx,
+ ConstantInt::getBool(JoinOp->getType(), *ImpliedCondition));
+
Changed = true;
}
@@ -1442,6 +1475,17 @@ void ConstraintInfo::addFact(CmpInst::Predicate Pred, Value *A, Value *B,
DFSInStack.emplace_back(NumIn, NumOut, R.IsSigned,
std::move(ValuesToRelease));
+ if (!R.IsSigned) {
+ for (Value *V : NewVariables) {
+ ConstraintTy VarPos(SmallVector<int64_t, 8>(Value2Index.size() + 1, 0),
+ false, false, false);
+ VarPos.Coefficients[Value2Index[V]] = -1;
+ CSToUse.addVariableRow(VarPos.Coefficients);
+ DFSInStack.emplace_back(NumIn, NumOut, R.IsSigned,
+ SmallVector<Value *, 2>());
+ }
+ }
+
if (R.isEq()) {
// Also add the inverted constraint for equality constraints.
for (auto &Coeff : R.Coefficients)
@@ -1609,11 +1653,11 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
bool Simplified = checkAndReplaceCondition(
Cmp, Info, CB.NumIn, CB.NumOut, CB.getContextInst(),
ReproducerModule.get(), ReproducerCondStack, S.DT, ToRemove);
- if (!Simplified && match(CB.getContextInst(),
- m_LogicalAnd(m_Value(), m_Specific(Inst)))) {
+ if (!Simplified &&
+ match(CB.getContextInst(), m_LogicalOp(m_Value(), m_Value()))) {
Simplified =
- checkAndSecondOpImpliedByFirst(CB, Info, ReproducerModule.get(),
- ReproducerCondStack, DFSInStack);
+ checkOrAndOpImpliedByOther(CB, Info, ReproducerModule.get(),
+ ReproducerCondStack, DFSInStack);
}
Changed |= Simplified;
}
@@ -1687,7 +1731,8 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
#ifndef NDEBUG
unsigned SignedEntries =
count_if(DFSInStack, [](const StackEntry &E) { return E.IsSigned; });
- assert(Info.getCS(false).size() == DFSInStack.size() - SignedEntries &&
+ assert(Info.getCS(false).size() - FunctionArgs.size() ==
+ DFSInStack.size() - SignedEntries &&
"updates to CS and DFSInStack are out of sync");
assert(Info.getCS(true).size() == SignedEntries &&
"updates to CS and DFSInStack are out of sync");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index d2dfc764d042..c44d3748a80d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -935,11 +935,13 @@ static bool processSDiv(BinaryOperator *SDI, const ConstantRange &LCR,
UDiv->setDebugLoc(SDI->getDebugLoc());
UDiv->setIsExact(SDI->isExact());
- Value *Res = UDiv;
+ auto *Res = UDiv;
// If the operands had two different domains, we need to negate the result.
- if (Ops[0].D != Ops[1].D)
+ if (Ops[0].D != Ops[1].D) {
Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI);
+ Res->setDebugLoc(SDI->getDebugLoc());
+ }
SDI->replaceAllUsesWith(Res);
SDI->eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index edfeb36f3422..c5bf913cda30 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -521,7 +521,7 @@ struct AllSwitchPaths {
const BasicBlock *PrevBB = Path.back();
for (const BasicBlock *BB : Path) {
- if (StateDef.count(BB) != 0) {
+ if (StateDef.contains(BB)) {
const PHINode *Phi = dyn_cast<PHINode>(StateDef[BB]);
assert(Phi && "Expected a state-defining instr to be a phi node.");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
index 5e58af0edc15..e36578f3de7a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -592,7 +592,7 @@ uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
/// Returns true if a value number exists for the specified value.
bool GVNPass::ValueTable::exists(Value *V) const {
- return valueNumbering.count(V) != 0;
+ return valueNumbering.contains(V);
}
/// lookup_or_add - Returns the value number for the specified value, assigning
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index b1add3c42976..eef94636578d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -343,9 +343,8 @@ static bool verifyTripCount(Value *RHS, Loop *L,
// If the RHS of the compare is equal to the backedge taken count we need
// to add one to get the trip count.
if (SCEVRHS == BackedgeTCExt || SCEVRHS == BackedgeTakenCount) {
- ConstantInt *One = ConstantInt::get(ConstantRHS->getType(), 1);
- Value *NewRHS = ConstantInt::get(
- ConstantRHS->getContext(), ConstantRHS->getValue() + One->getValue());
+ Value *NewRHS = ConstantInt::get(ConstantRHS->getContext(),
+ ConstantRHS->getValue() + 1);
return setLoopComponents(NewRHS, TripCount, Increment,
IterationInstructions);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 39607464dd00..a58bbe318563 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -7006,7 +7006,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
LLVM_DEBUG(dbgs() << "Old term-cond:\n"
<< *OldTermCond << "\n"
- << "New term-cond:\b" << *NewTermCond << "\n");
+ << "New term-cond:\n" << *NewTermCond << "\n");
BI->setCondition(NewTermCond);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index f14541a1a037..7cfeb019af97 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -200,6 +200,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.Count = 0;
UP.DefaultUnrollRuntimeCount = 8;
UP.MaxCount = std::numeric_limits<unsigned>::max();
+ UP.MaxUpperBound = UnrollMaxUpperBound;
UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max();
UP.BEInsns = 2;
UP.Partial = false;
@@ -237,6 +238,8 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.MaxPercentThresholdBoost = UnrollMaxPercentThresholdBoost;
if (UnrollMaxCount.getNumOccurrences() > 0)
UP.MaxCount = UnrollMaxCount;
+ if (UnrollMaxUpperBound.getNumOccurrences() > 0)
+ UP.MaxUpperBound = UnrollMaxUpperBound;
if (UnrollFullMaxCount.getNumOccurrences() > 0)
UP.FullUnrollMaxCount = UnrollFullMaxCount;
if (UnrollAllowPartial.getNumOccurrences() > 0)
@@ -777,7 +780,7 @@ shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
return TripCount;
if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount &&
- MaxTripCount <= UnrollMaxUpperBound)
+ MaxTripCount <= UP.MaxUpperBound)
return MaxTripCount;
// if didn't return until here, should continue to other priorties
@@ -952,7 +955,7 @@ bool llvm::computeUnrollCount(
// cost of exact full unrolling. As such, if we have an exact count and
// found it unprofitable, we'll never chose to bounded unroll.
if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) &&
- MaxTripCount <= UnrollMaxUpperBound) {
+ MaxTripCount <= UP.MaxUpperBound) {
UP.Count = MaxTripCount;
if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
MaxTripCount, UCE, UP)) {
@@ -1026,7 +1029,7 @@ bool llvm::computeUnrollCount(
}
// Don't unroll a small upper bound loop unless user or TTI asked to do so.
- if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) {
+ if (MaxTripCount && !UP.Force && MaxTripCount < UP.MaxUpperBound) {
UP.Count = 0;
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 40b4ea92e1ff..3f02441b74ba 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -2057,7 +2057,7 @@ static void relocationViaAlloca(
for (const auto &Info : Records)
for (auto RematerializedValuePair : Info.RematerializedValues) {
Value *OriginalValue = RematerializedValuePair.second;
- if (AllocaMap.count(OriginalValue) != 0)
+ if (AllocaMap.contains(OriginalValue))
continue;
emitAllocaFor(OriginalValue);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
index 24da26c9f0f2..656abdb0abbf 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -3285,6 +3285,7 @@ private:
(BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
SliceSize !=
DL.getTypeStoreSize(NewAI.getAllocatedType()).getFixedValue() ||
+ !DL.typeSizeEqualsStoreSize(NewAI.getAllocatedType()) ||
!NewAI.getAllocatedType()->isSingleValueType());
// If we're just going to emit a memcpy, the alloca hasn't changed, and the
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/DXILUpgrade.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/DXILUpgrade.cpp
index 735686ddce38..09991f628224 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/DXILUpgrade.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/DXILUpgrade.cpp
@@ -7,14 +7,26 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/DXILUpgrade.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
+#define DEBUG_TYPE "dxil-upgrade"
+
static bool handleValVerMetadata(Module &M) {
NamedMDNode *ValVer = M.getNamedMetadata("dx.valver");
if (!ValVer)
return false;
+ LLVM_DEBUG({
+ MDNode *N = ValVer->getOperand(0);
+ auto X = mdconst::extract<ConstantInt>(N->getOperand(0))->getZExtValue();
+ auto Y = mdconst::extract<ConstantInt>(N->getOperand(1))->getZExtValue();
+ dbgs() << "DXIL: validation version: " << X << "." << Y << "\n";
+ });
// We don't need the validation version internally, so we drop it.
ValVer->dropAllReferences();
ValVer->eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp
index 79ca99d1566c..09e19be0d293 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -405,6 +405,8 @@ int FunctionComparator::cmpConstants(const Constant *L,
case Value::ConstantExprVal: {
const ConstantExpr *LE = cast<ConstantExpr>(L);
const ConstantExpr *RE = cast<ConstantExpr>(R);
+ if (int Res = cmpNumbers(LE->getOpcode(), RE->getOpcode()))
+ return Res;
unsigned NumOperandsL = LE->getNumOperands();
unsigned NumOperandsR = RE->getNumOperands();
if (int Res = cmpNumbers(NumOperandsL, NumOperandsR))
@@ -414,6 +416,29 @@ int FunctionComparator::cmpConstants(const Constant *L,
cast<Constant>(RE->getOperand(i))))
return Res;
}
+ if (LE->isCompare())
+ if (int Res = cmpNumbers(LE->getPredicate(), RE->getPredicate()))
+ return Res;
+ if (auto *GEPL = dyn_cast<GEPOperator>(LE)) {
+ auto *GEPR = cast<GEPOperator>(RE);
+ if (int Res = cmpTypes(GEPL->getSourceElementType(),
+ GEPR->getSourceElementType()))
+ return Res;
+ if (int Res = cmpNumbers(GEPL->isInBounds(), GEPR->isInBounds()))
+ return Res;
+ if (int Res = cmpNumbers(GEPL->getInRangeIndex().value_or(unsigned(-1)),
+ GEPR->getInRangeIndex().value_or(unsigned(-1))))
+ return Res;
+ }
+ if (auto *OBOL = dyn_cast<OverflowingBinaryOperator>(LE)) {
+ auto *OBOR = cast<OverflowingBinaryOperator>(RE);
+ if (int Res =
+ cmpNumbers(OBOL->hasNoUnsignedWrap(), OBOR->hasNoUnsignedWrap()))
+ return Res;
+ if (int Res =
+ cmpNumbers(OBOL->hasNoSignedWrap(), OBOR->hasNoSignedWrap()))
+ return Res;
+ }
return 0;
}
case Value::BlockAddressVal: {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
index 1e42d7491676..f94047633022 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
@@ -64,7 +64,7 @@ bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT,
// sure that the return is covered. Otherwise, we can check whether there
// is a way to reach the RI from the start of the lifetime without passing
// through an end.
- if (EndBlocks.count(RI->getParent()) > 0 ||
+ if (EndBlocks.contains(RI->getParent()) ||
!isPotentiallyReachable(Start, RI, &EndBlocks, &DT, &LI)) {
++NumCoveredExits;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 89494a7f6497..55e375670cc6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -6293,7 +6293,7 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
}
case BitMapKind: {
// Type of the bitmap (e.g. i59).
- IntegerType *MapTy = BitMap->getType();
+ IntegerType *MapTy = BitMap->getIntegerType();
// Cast Index to the same type as the bitmap.
// Note: The Index is <= the number of elements in the table, so
@@ -6668,7 +6668,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
Value *TableIndex;
ConstantInt *TableIndexOffset;
if (UseSwitchConditionAsTableIndex) {
- TableIndexOffset = ConstantInt::get(MaxCaseVal->getType(), 0);
+ TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
TableIndex = SI->getCondition();
} else {
TableIndexOffset = MinCaseVal;
@@ -6752,7 +6752,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Get the TableIndex'th bit of the bitmask.
// If this bit is 0 (meaning hole) jump to the default destination,
// else continue with table lookup.
- IntegerType *MapTy = TableMask->getType();
+ IntegerType *MapTy = TableMask->getIntegerType();
Value *MaskIndex =
Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
@@ -6975,7 +6975,7 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
// Replace each case with its trailing zeros number.
for (auto &Case : SI->cases()) {
auto *OrigValue = Case.getCaseValue();
- Case.setValue(ConstantInt::get(OrigValue->getType(),
+ Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
OrigValue->getValue().countr_zero()));
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 722ed03db3de..42e7c4006b42 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -27,6 +27,7 @@
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
using namespace llvm;
+using namespace llvm::PatternMatch;
#define DEBUG_TYPE "indvars"
@@ -786,8 +787,6 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
/// otherwise.
bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO,
Instruction *IVOperand) {
- using namespace llvm::PatternMatch;
-
if (BO->getOpcode() == Instruction::Shl) {
bool Changed = false;
ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand));
@@ -1763,7 +1762,7 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri
};
// Our raison d'etre! Eliminate sign and zero extension.
- if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
+ if ((match(DU.NarrowUse, m_SExtLike(m_Value())) && canWidenBySExt()) ||
(isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
Value *NewDef = DU.WideDef;
if (DU.NarrowUse->getType() != WideType) {
@@ -2011,8 +2010,6 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
/// by looking at dominating conditions inside of the loop
void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
Instruction *NarrowUser) {
- using namespace llvm::PatternMatch;
-
Value *NarrowDefLHS;
const APInt *NarrowDefRHS;
if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9d799124074c..32913b3f5569 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3760,40 +3760,7 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
OrdersType CurrentOrder(NumScalars, NumScalars);
SmallVector<int> Positions;
SmallBitVector UsedPositions(NumScalars);
- DenseMap<const TreeEntry *, unsigned> UsedEntries;
- DenseMap<Value *, std::pair<const TreeEntry *, unsigned>> ValueToEntryPos;
- for (Value *V : TE.Scalars) {
- if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
- continue;
- const auto *LocalSTE = getTreeEntry(V);
- if (!LocalSTE)
- continue;
- unsigned Lane =
- std::distance(LocalSTE->Scalars.begin(), find(LocalSTE->Scalars, V));
- if (Lane >= NumScalars)
- continue;
- ++UsedEntries.try_emplace(LocalSTE, 0).first->getSecond();
- ValueToEntryPos.try_emplace(V, LocalSTE, Lane);
- }
- if (UsedEntries.empty())
- return std::nullopt;
- const TreeEntry &BestSTE =
- *std::max_element(UsedEntries.begin(), UsedEntries.end(),
- [](const std::pair<const TreeEntry *, unsigned> &P1,
- const std::pair<const TreeEntry *, unsigned> &P2) {
- return P1.second < P2.second;
- })
- ->first;
- UsedEntries.erase(&BestSTE);
- const TreeEntry *SecondBestSTE = nullptr;
- if (!UsedEntries.empty())
- SecondBestSTE =
- std::max_element(UsedEntries.begin(), UsedEntries.end(),
- [](const std::pair<const TreeEntry *, unsigned> &P1,
- const std::pair<const TreeEntry *, unsigned> &P2) {
- return P1.second < P2.second;
- })
- ->first;
+ const TreeEntry *STE = nullptr;
// Try to find all gathered scalars that are gets vectorized in other
// vectorize node. Here we can have only one single tree vector node to
// correctly identify order of the gathered scalars.
@@ -3801,46 +3768,53 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
Value *V = TE.Scalars[I];
if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
continue;
- const auto [LocalSTE, Lane] = ValueToEntryPos.lookup(V);
- if (!LocalSTE || (LocalSTE != &BestSTE && LocalSTE != SecondBestSTE))
- continue;
- if (CurrentOrder[Lane] != NumScalars) {
- if ((CurrentOrder[Lane] >= BestSTE.Scalars.size() ||
- BestSTE.Scalars[CurrentOrder[Lane]] == V) &&
- (Lane != I || LocalSTE == SecondBestSTE))
- continue;
- UsedPositions.reset(CurrentOrder[Lane]);
+ if (const auto *LocalSTE = getTreeEntry(V)) {
+ if (!STE)
+ STE = LocalSTE;
+ else if (STE != LocalSTE)
+ // Take the order only from the single vector node.
+ return std::nullopt;
+ unsigned Lane =
+ std::distance(STE->Scalars.begin(), find(STE->Scalars, V));
+ if (Lane >= NumScalars)
+ return std::nullopt;
+ if (CurrentOrder[Lane] != NumScalars) {
+ if (Lane != I)
+ continue;
+ UsedPositions.reset(CurrentOrder[Lane]);
+ }
+ // The partial identity (where only some elements of the gather node are
+ // in the identity order) is good.
+ CurrentOrder[Lane] = I;
+ UsedPositions.set(I);
}
- // The partial identity (where only some elements of the gather node are
- // in the identity order) is good.
- CurrentOrder[Lane] = I;
- UsedPositions.set(I);
}
// Need to keep the order if we have a vector entry and at least 2 scalars or
// the vectorized entry has just 2 scalars.
- if (BestSTE.Scalars.size() != 2 && UsedPositions.count() <= 1)
- return std::nullopt;
- auto IsIdentityOrder = [&](ArrayRef<unsigned> CurrentOrder) {
- for (unsigned I = 0; I < NumScalars; ++I)
- if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars)
- return false;
- return true;
- };
- if (IsIdentityOrder(CurrentOrder))
- return OrdersType();
- auto *It = CurrentOrder.begin();
- for (unsigned I = 0; I < NumScalars;) {
- if (UsedPositions.test(I)) {
- ++I;
- continue;
- }
- if (*It == NumScalars) {
- *It = I;
- ++I;
+ if (STE && (UsedPositions.count() > 1 || STE->Scalars.size() == 2)) {
+ auto &&IsIdentityOrder = [NumScalars](ArrayRef<unsigned> CurrentOrder) {
+ for (unsigned I = 0; I < NumScalars; ++I)
+ if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars)
+ return false;
+ return true;
+ };
+ if (IsIdentityOrder(CurrentOrder))
+ return OrdersType();
+ auto *It = CurrentOrder.begin();
+ for (unsigned I = 0; I < NumScalars;) {
+ if (UsedPositions.test(I)) {
+ ++I;
+ continue;
+ }
+ if (*It == NumScalars) {
+ *It = I;
+ ++I;
+ }
+ ++It;
}
- ++It;
+ return std::move(CurrentOrder);
}
- return std::move(CurrentOrder);
+ return std::nullopt;
}
namespace {
@@ -6469,7 +6443,7 @@ bool BoUpSLP::areAllUsersVectorized(
Instruction *I, const SmallDenseSet<Value *> *VectorizedVals) const {
return (I->hasOneUse() && (!VectorizedVals || VectorizedVals->contains(I))) ||
all_of(I->users(), [this](User *U) {
- return ScalarToTreeEntry.count(U) > 0 ||
+ return ScalarToTreeEntry.contains(U) ||
isVectorLikeInstWithConstOps(U) ||
(isa<ExtractElementInst>(U) && MustGather.contains(U));
});
@@ -11498,7 +11472,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
Value *V = Builder.CreateBinOp(
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
RHS);
- propagateIRFlags(V, E->Scalars, VL0);
+ propagateIRFlags(V, E->Scalars, VL0, !MinBWs.contains(E));
if (auto *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
@@ -15730,6 +15704,8 @@ static bool compareCmp(Value *V, Value *V2, TargetLibraryInfo &TLI,
assert(isValidElementType(V->getType()) &&
isValidElementType(V2->getType()) &&
"Expected valid element types only.");
+ if (V == V2)
+ return IsCompatibility;
auto *CI1 = cast<CmpInst>(V);
auto *CI2 = cast<CmpInst>(V2);
if (CI1->getOperand(0)->getType()->getTypeID() <
@@ -15754,6 +15730,8 @@ static bool compareCmp(Value *V, Value *V2, TargetLibraryInfo &TLI,
for (int I = 0, E = CI1->getNumOperands(); I < E; ++I) {
auto *Op1 = CI1->getOperand(CI1Preds ? I : E - I - 1);
auto *Op2 = CI2->getOperand(CI2Preds ? I : E - I - 1);
+ if (Op1 == Op2)
+ continue;
if (Op1->getValueID() < Op2->getValueID())
return !IsCompatibility;
if (Op1->getValueID() > Op2->getValueID())
@@ -15780,7 +15758,10 @@ static bool compareCmp(Value *V, Value *V2, TargetLibraryInfo &TLI,
InstructionsState S = getSameOpcode({I1, I2}, TLI);
if (S.getOpcode() && (IsCompatibility || !S.isAltShuffle()))
continue;
- return !IsCompatibility && I1->getOpcode() < I2->getOpcode();
+ if (IsCompatibility)
+ return false;
+ if (I1->getOpcode() != I2->getOpcode())
+ return I1->getOpcode() < I2->getOpcode();
}
}
return IsCompatibility;