diff options
Diffstat (limited to 'lib/Target')
326 files changed, 5474 insertions, 1525 deletions
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 981fd22c213c..5ce57926cc03 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -12,13 +12,13 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/AArch64AddressingModes.h" #include "AArch64.h" #include "AArch64MCInstLower.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" #include "InstPrinter/AArch64InstPrinter.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "MCTargetDesc/AArch64MCExpr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" @@ -35,11 +35,11 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCLinkerOptimizationHint.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp index 30e2b2310456..544f67433fd5 100644 --- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -20,8 +20,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "aarch64-dead-defs" diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 33fec74998d6..160107cd7e2b 100644 --- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -14,9 +14,9 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/AArch64AddressingModes.h" #include "AArch64InstrInfo.h" #include "AArch64Subtarget.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "Utils/AArch64BaseInfo.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 9ac7ecb9cdb4..e8fcf1a0e9b7 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -2827,7 +2827,7 @@ bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { return false; EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); - if (SrcVT == MVT::f128) + if (SrcVT == MVT::f128 || SrcVT == MVT::f16) return false; unsigned Opc; @@ -2854,6 +2854,10 @@ bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { MVT DestVT; if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) return false; + // Let regular ISEL handle FP16 + if (DestVT == MVT::f16) + return false; + assert((DestVT == MVT::f32 || DestVT == MVT::f64) && "Unexpected value type."); diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index b18fb30eb2d4..8c2c0a564c30 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -2566,7 +2566,7 @@ bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { // pstatefield for the MSR (immediate) instruction, we also require that an // immediate value has been provided as an argument, we know that this is // the case as it has been ensured by semantic checking. - auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());; + auto PMapper = AArch64PState::lookupPStateByName(RegString->getString()); if (PMapper) { assert (isa<ConstantSDNode>(N->getOperand(2)) && "Expected a constant integer expression."); diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index f798010906cc..059556a560c0 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "AArch64ISelLowering.h" #include "AArch64CallingConvention.h" #include "AArch64MachineFunctionInfo.h" -#include "AArch64ISelLowering.h" #include "AArch64PerfectShuffle.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" @@ -22,9 +22,9 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" @@ -51,10 +51,10 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Type.h" diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index faf39be9b41e..eea012382150 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -17,8 +17,8 @@ #include "MCTargetDesc/AArch64AddressingModes.h" #include "Utils/AArch64BaseInfo.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 6e6daf812295..01196817f311 100644 --- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -13,12 +13,12 @@ //===----------------------------------------------------------------------===// #include "AArch64LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/IR/Type.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Type.h" #include "llvm/Target/TargetOpcodes.h" using namespace llvm; diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 976498aa70d6..9243eb91cc1a 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -16,10 +16,10 @@ #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp index 038162c6f54a..fe4ef4b40ece 100644 --- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp +++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp @@ -17,8 +17,8 @@ #define DEBUG_TYPE "aarch64-pbqp" -#include "AArch64.h" #include "AArch64PBQPRegAlloc.h" +#include "AArch64.h" #include "AArch64RegisterInfo.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp index 789270c2a34b..9b3899e0681c 100644 --- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -15,13 +15,13 @@ #include "AArch64RegisterBankInfo.h" #include "AArch64InstrInfo.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LowLevelType.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp index 12a2e9a867f0..4bc2c060a068 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -9,12 +9,12 @@ #include "AArch64TargetObjectFile.h" #include "AArch64TargetMachine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/Dwarf.h" using namespace llvm; using namespace dwarf; diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 43569af04347..a4328682b93c 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -9,8 +9,8 @@ #include "AArch64TargetTransformInfo.h" #include "MCTargetDesc/AArch64AddressingModes.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 449d732a8d44..e841fb894519 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -15,8 +15,8 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index ebf05ae303dd..43a6fa9ce089 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -11,8 +11,9 @@ #include "AArch64RegisterInfo.h" #include "MCTargetDesc/AArch64FixupKinds.h" #include "llvm/ADT/Triple.h" -#include "llvm/MC/MCAssembler.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -22,7 +23,6 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" using namespace llvm; namespace { diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index 10e7241da709..f7dda92fb551 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -15,11 +15,11 @@ #include "MCTargetDesc/AArch64FixupKinds.h" #include "MCTargetDesc/AArch64MCExpr.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include <cassert> #include <cstdint> diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 271263507ae1..031aa8b81e35 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" @@ -30,7 +31,6 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index 3d296ba4806b..19b2576f6895 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -10,6 +10,7 @@ #include "MCTargetDesc/AArch64FixupKinds.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -23,7 +24,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/MathExtras.h" #include <cassert> #include <cstdint> diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index b50e8d1d659e..6ab2b9ef0459 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -447,6 +447,16 @@ class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping, Implies >; +def FeatureISAVersion6_0_0 : SubtargetFeatureISAVersion <6,0,0, + [FeatureSouthernIslands, + FeatureFastFMAF32, + HalfRate64Ops, + FeatureLDSBankCount32]>; + +def FeatureISAVersion6_0_1 : SubtargetFeatureISAVersion <6,0,1, + [FeatureSouthernIslands, + FeatureLDSBankCount32]>; + def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0, [FeatureSeaIslands, FeatureLDSBankCount32]>; @@ -461,6 +471,10 @@ def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2, [FeatureSeaIslands, FeatureLDSBankCount16]>; +def FeatureISAVersion7_0_3 : SubtargetFeatureISAVersion <7,0,3, + [FeatureSeaIslands, + FeatureLDSBankCount16]>; + def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0, [FeatureVolcanicIslands, FeatureLDSBankCount32, @@ -489,8 +503,23 @@ def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0, FeatureLDSBankCount16, FeatureXNACK]>; -def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,[]>; -def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,[]>; +def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0, + [FeatureGFX9, + FeatureLDSBankCount32]>; + +def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1, + [FeatureGFX9, + FeatureLDSBankCount32, + FeatureXNACK]>; + +def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2, + [FeatureGFX9, + FeatureLDSBankCount32]>; + +def FeatureISAVersion9_0_3 : SubtargetFeatureISAVersion <9,0,3, + [FeatureGFX9, + FeatureLDSBankCount32, + FeatureXNACK]>; //===----------------------------------------------------------------------===// // Debugger related subtarget features. diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp index 3c99f48e818a..faa424eb0a64 100644 --- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -10,15 +10,15 @@ /// This is the AMGPU address space based alias analysis pass. //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUAliasAnalysis.h" +#include "AMDGPU.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp index 91b3649f5c39..3c788fa1dcea 100644 --- a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -19,8 +19,8 @@ #include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/IR/InstVisitor.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 0959014812d8..83ad1a5c6ee3 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -17,25 +17,25 @@ // #include "AMDGPUAsmPrinter.h" -#include "AMDGPUTargetMachine.h" -#include "MCTargetDesc/AMDGPUTargetStreamer.h" -#include "InstPrinter/AMDGPUInstPrinter.h" -#include "Utils/AMDGPUBaseInfo.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" +#include "AMDGPUTargetMachine.h" +#include "InstPrinter/AMDGPUInstPrinter.h" +#include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "R600Defines.h" #include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" #include "SIDefines.h" -#include "SIMachineFunctionInfo.h" #include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index e5adeeb465e1..0a58ce06704d 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -15,8 +15,8 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H -#include "AMDKernelCodeT.h" #include "AMDGPU.h" +#include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/AsmPrinter.h" #include <cstddef> diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index e67ae092fdda..515cc07dd449 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -18,8 +18,8 @@ #include "AMDGPUISelLowering.h" #include "AMDGPUSubtarget.h" #include "SIISelLowering.h" -#include "SIRegisterInfo.h" #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index d923cb117c12..b312dbc8d14d 100644 --- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -25,13 +25,13 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 19fce064783d..251c2f9bb25a 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -13,15 +13,15 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUISelLowering.h" // For AMDGPUISD #include "AMDGPUInstrInfo.h" #include "AMDGPURegisterInfo.h" -#include "AMDGPUISelLowering.h" // For AMDGPUISD #include "AMDGPUSubtarget.h" #include "SIDefines.h" -#include "SIInstrInfo.h" -#include "SIRegisterInfo.h" #include "SIISelLowering.h" +#include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 723e8a7b54e2..5586b513b5fc 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -21,6 +21,7 @@ #include "AMDGPURegisterInfo.h" #include "AMDGPUSubtarget.h" #include "R600MachineFunctionInfo.h" +#include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" @@ -30,7 +31,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/Support/KnownBits.h" -#include "SIInstrInfo.h" using namespace llvm; static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT, diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h index 12caa5118342..41cc7d7093ec 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -17,8 +17,8 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H #include "AMDGPU.h" -#include "llvm/Target/TargetInstrInfo.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "AMDGPUGenInstrInfo.inc" diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index c87102e55dfb..ef845f44d365 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -15,9 +15,9 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H #include "AMDGPU.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" namespace llvm { diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 267f4807a788..b889788c3426 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -14,10 +14,10 @@ #include "AMDGPULegalizerInfo.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/IR/Type.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/Target/TargetOpcodes.h" +#include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetOpcodes.h" using namespace llvm; @@ -47,12 +47,18 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo() { setAction({G_GEP, P2}, Legal); setAction({G_GEP, 1, S64}, Legal); + setAction({G_ICMP, S1}, Legal); + setAction({G_ICMP, 1, S32}, Legal); + setAction({G_LOAD, P1}, Legal); setAction({G_LOAD, P2}, Legal); setAction({G_LOAD, S32}, Legal); setAction({G_LOAD, 1, P1}, Legal); setAction({G_LOAD, 1, P2}, Legal); + setAction({G_SELECT, S32}, Legal); + setAction({G_SELECT, 1, S1}, Legal); + setAction({G_STORE, S32}, Legal); setAction({G_STORE, 1, P1}, Legal); diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index f1ef6281c90f..63dd0d726d91 100644 --- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -38,7 +38,6 @@ using namespace llvm; #include "AMDGPUGenMCPseudoLowering.inc" - AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st, const AsmPrinter &ap): Ctx(ctx), ST(st), AP(ap) { } diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp index 6d2785ba1c60..2071b6f157cd 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp +++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "SIInstrInfo.h" #include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h index 8bfeb67ad4ec..99bb61b21db0 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -10,8 +10,8 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunction.h" namespace llvm { diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 07f92918a43f..625c9b77e2de 100644 --- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -33,11 +33,11 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -319,15 +319,17 @@ static bool canVectorizeInst(Instruction *Inst, User *User) { switch (Inst->getOpcode()) { case Instruction::Load: { LoadInst *LI = cast<LoadInst>(Inst); - return !LI->isVolatile(); + // Currently only handle the case where the Pointer Operand is a GEP so check for that case. + return isa<GetElementPtrInst>(LI->getPointerOperand()) && !LI->isVolatile(); } case Instruction::BitCast: case Instruction::AddrSpaceCast: return true; case Instruction::Store: { - // Must be the stored pointer operand, not a stored value. + // Must be the stored pointer operand, not a stored value, plus + // since it should be canonical form, the User should be a GEP. StoreInst *SI = cast<StoreInst>(Inst); - return (SI->getPointerOperand() == User) && !SI->isVolatile(); + return (SI->getPointerOperand() == User) && isa<GetElementPtrInst>(User) && !SI->isVolatile(); } default: return false; @@ -341,8 +343,11 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { // FIXME: There is no reason why we can't support larger arrays, we // are just being conservative for now. + // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these + // could also be promoted but we don't currently handle this case if (!AllocaTy || AllocaTy->getElementType()->isVectorTy() || + AllocaTy->getElementType()->isArrayTy() || AllocaTy->getNumElements() > 4 || AllocaTy->getNumElements() < 2) { DEBUG(dbgs() << " Cannot convert type to vector\n"); @@ -390,7 +395,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { switch (Inst->getOpcode()) { case Instruction::Load: { Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); - Value *Ptr = Inst->getOperand(0); + Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); @@ -403,12 +408,13 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { case Instruction::Store: { Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); - Value *Ptr = Inst->getOperand(1); + StoreInst *SI = cast<StoreInst>(Inst); + Value *Ptr = SI->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); Value *VecValue = Builder.CreateLoad(BitCast); Value *NewVecValue = Builder.CreateInsertElement(VecValue, - Inst->getOperand(0), + SI->getValueOperand(), Index); Builder.CreateStore(NewVecValue, BitCast); Inst->eraseFromParent(); diff --git a/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp b/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp new file mode 100644 index 000000000000..36d88f52910d --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp @@ -0,0 +1,353 @@ +//===-- AMDGPURegAsmNames.inc - Register asm names ----------*- C++ -*-----===// + +#ifdef AMDGPU_REG_ASM_NAMES + +static const char *const VGPR32RegNames[] = { + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", + "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", + "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", + "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", + "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", + "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", + "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", + "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", + "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", + "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", + "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", + "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", + "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", + "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", + "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", + "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", + "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", + "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", + "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", + "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", + "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", + "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", + "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", + "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", + "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", + "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", + "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", + "v252", "v253", "v254", "v255" +}; + +static const char *const SGPR32RegNames[] = { + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", + "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", + "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29", + "s30", "s31", "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", + "s40", "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", + "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", "s59", + "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", "s68", "s69", + "s70", "s71", "s72", "s73", "s74", "s75", "s76", "s77", "s78", "s79", + "s80", "s81", "s82", "s83", "s84", "s85", "s86", "s87", "s88", "s89", + "s90", "s91", "s92", "s93", "s94", "s95", "s96", "s97", "s98", "s99", + "s100", "s101", "s102", "s103" +}; + +static const char *const VGPR64RegNames[] = { + "v[0:1]", "v[1:2]", "v[2:3]", "v[3:4]", "v[4:5]", + "v[5:6]", "v[6:7]", "v[7:8]", "v[8:9]", "v[9:10]", + "v[10:11]", "v[11:12]", "v[12:13]", "v[13:14]", "v[14:15]", + "v[15:16]", "v[16:17]", "v[17:18]", "v[18:19]", "v[19:20]", + "v[20:21]", "v[21:22]", "v[22:23]", "v[23:24]", "v[24:25]", + "v[25:26]", "v[26:27]", "v[27:28]", "v[28:29]", "v[29:30]", + "v[30:31]", "v[31:32]", "v[32:33]", "v[33:34]", "v[34:35]", + "v[35:36]", "v[36:37]", "v[37:38]", "v[38:39]", "v[39:40]", + "v[40:41]", "v[41:42]", "v[42:43]", "v[43:44]", "v[44:45]", + "v[45:46]", "v[46:47]", "v[47:48]", "v[48:49]", "v[49:50]", + "v[50:51]", "v[51:52]", "v[52:53]", "v[53:54]", "v[54:55]", + "v[55:56]", "v[56:57]", "v[57:58]", "v[58:59]", "v[59:60]", + "v[60:61]", "v[61:62]", "v[62:63]", "v[63:64]", "v[64:65]", + "v[65:66]", "v[66:67]", "v[67:68]", "v[68:69]", "v[69:70]", + "v[70:71]", "v[71:72]", "v[72:73]", "v[73:74]", "v[74:75]", + "v[75:76]", "v[76:77]", "v[77:78]", "v[78:79]", "v[79:80]", + "v[80:81]", "v[81:82]", "v[82:83]", "v[83:84]", "v[84:85]", + "v[85:86]", "v[86:87]", "v[87:88]", "v[88:89]", "v[89:90]", + "v[90:91]", "v[91:92]", "v[92:93]", "v[93:94]", "v[94:95]", + "v[95:96]", "v[96:97]", "v[97:98]", "v[98:99]", "v[99:100]", + "v[100:101]", "v[101:102]", "v[102:103]", "v[103:104]", "v[104:105]", + "v[105:106]", "v[106:107]", "v[107:108]", "v[108:109]", "v[109:110]", + "v[110:111]", "v[111:112]", "v[112:113]", "v[113:114]", "v[114:115]", + "v[115:116]", "v[116:117]", "v[117:118]", "v[118:119]", "v[119:120]", + "v[120:121]", "v[121:122]", "v[122:123]", "v[123:124]", "v[124:125]", + "v[125:126]", "v[126:127]", "v[127:128]", "v[128:129]", "v[129:130]", + "v[130:131]", "v[131:132]", "v[132:133]", "v[133:134]", "v[134:135]", + "v[135:136]", "v[136:137]", "v[137:138]", "v[138:139]", "v[139:140]", + "v[140:141]", "v[141:142]", "v[142:143]", "v[143:144]", "v[144:145]", + "v[145:146]", "v[146:147]", "v[147:148]", "v[148:149]", "v[149:150]", + "v[150:151]", "v[151:152]", "v[152:153]", "v[153:154]", "v[154:155]", + "v[155:156]", "v[156:157]", "v[157:158]", "v[158:159]", "v[159:160]", + "v[160:161]", "v[161:162]", "v[162:163]", "v[163:164]", "v[164:165]", + "v[165:166]", "v[166:167]", "v[167:168]", "v[168:169]", "v[169:170]", + "v[170:171]", "v[171:172]", "v[172:173]", "v[173:174]", "v[174:175]", + "v[175:176]", "v[176:177]", "v[177:178]", "v[178:179]", "v[179:180]", + "v[180:181]", "v[181:182]", "v[182:183]", "v[183:184]", "v[184:185]", + "v[185:186]", "v[186:187]", "v[187:188]", "v[188:189]", "v[189:190]", + "v[190:191]", "v[191:192]", "v[192:193]", "v[193:194]", "v[194:195]", + "v[195:196]", "v[196:197]", "v[197:198]", "v[198:199]", "v[199:200]", + "v[200:201]", "v[201:202]", "v[202:203]", "v[203:204]", "v[204:205]", + "v[205:206]", "v[206:207]", "v[207:208]", "v[208:209]", "v[209:210]", + "v[210:211]", "v[211:212]", "v[212:213]", "v[213:214]", "v[214:215]", + "v[215:216]", "v[216:217]", "v[217:218]", "v[218:219]", "v[219:220]", + "v[220:221]", "v[221:222]", "v[222:223]", "v[223:224]", "v[224:225]", + "v[225:226]", "v[226:227]", "v[227:228]", "v[228:229]", "v[229:230]", + "v[230:231]", "v[231:232]", "v[232:233]", "v[233:234]", "v[234:235]", + "v[235:236]", "v[236:237]", "v[237:238]", "v[238:239]", "v[239:240]", + "v[240:241]", "v[241:242]", "v[242:243]", "v[243:244]", "v[244:245]", + "v[245:246]", "v[246:247]", "v[247:248]", "v[248:249]", "v[249:250]", + "v[250:251]", "v[251:252]", "v[252:253]", "v[253:254]", "v[254:255]" +}; + +static const char *const VGPR96RegNames[] = { + "v[0:2]", "v[1:3]", "v[2:4]", "v[3:5]", "v[4:6]", + "v[5:7]", "v[6:8]", "v[7:9]", "v[8:10]", "v[9:11]", + "v[10:12]", "v[11:13]", "v[12:14]", "v[13:15]", "v[14:16]", + "v[15:17]", "v[16:18]", "v[17:19]", "v[18:20]", "v[19:21]", + "v[20:22]", "v[21:23]", "v[22:24]", "v[23:25]", "v[24:26]", + "v[25:27]", "v[26:28]", "v[27:29]", "v[28:30]", "v[29:31]", + "v[30:32]", "v[31:33]", "v[32:34]", "v[33:35]", "v[34:36]", + "v[35:37]", "v[36:38]", "v[37:39]", "v[38:40]", "v[39:41]", + "v[40:42]", "v[41:43]", "v[42:44]", "v[43:45]", "v[44:46]", + "v[45:47]", "v[46:48]", "v[47:49]", "v[48:50]", "v[49:51]", + "v[50:52]", "v[51:53]", "v[52:54]", "v[53:55]", "v[54:56]", + "v[55:57]", "v[56:58]", "v[57:59]", "v[58:60]", "v[59:61]", + "v[60:62]", "v[61:63]", "v[62:64]", "v[63:65]", "v[64:66]", + "v[65:67]", "v[66:68]", "v[67:69]", "v[68:70]", "v[69:71]", + "v[70:72]", "v[71:73]", "v[72:74]", "v[73:75]", "v[74:76]", + "v[75:77]", "v[76:78]", "v[77:79]", "v[78:80]", "v[79:81]", + "v[80:82]", "v[81:83]", "v[82:84]", "v[83:85]", "v[84:86]", + "v[85:87]", "v[86:88]", "v[87:89]", "v[88:90]", "v[89:91]", + "v[90:92]", "v[91:93]", "v[92:94]", "v[93:95]", "v[94:96]", + "v[95:97]", "v[96:98]", "v[97:99]", "v[98:100]", "v[99:101]", + "v[100:102]", "v[101:103]", "v[102:104]", "v[103:105]", "v[104:106]", + "v[105:107]", "v[106:108]", "v[107:109]", "v[108:110]", "v[109:111]", + "v[110:112]", "v[111:113]", "v[112:114]", "v[113:115]", "v[114:116]", + "v[115:117]", "v[116:118]", "v[117:119]", "v[118:120]", "v[119:121]", + "v[120:122]", "v[121:123]", "v[122:124]", "v[123:125]", "v[124:126]", + "v[125:127]", "v[126:128]", "v[127:129]", "v[128:130]", "v[129:131]", + "v[130:132]", "v[131:133]", "v[132:134]", "v[133:135]", "v[134:136]", + "v[135:137]", "v[136:138]", "v[137:139]", "v[138:140]", "v[139:141]", + "v[140:142]", "v[141:143]", "v[142:144]", "v[143:145]", "v[144:146]", + "v[145:147]", "v[146:148]", "v[147:149]", "v[148:150]", "v[149:151]", + "v[150:152]", "v[151:153]", "v[152:154]", "v[153:155]", "v[154:156]", + "v[155:157]", "v[156:158]", "v[157:159]", "v[158:160]", "v[159:161]", + "v[160:162]", "v[161:163]", "v[162:164]", "v[163:165]", "v[164:166]", + "v[165:167]", "v[166:168]", "v[167:169]", "v[168:170]", "v[169:171]", + "v[170:172]", "v[171:173]", "v[172:174]", "v[173:175]", "v[174:176]", + "v[175:177]", "v[176:178]", "v[177:179]", "v[178:180]", "v[179:181]", + "v[180:182]", "v[181:183]", "v[182:184]", "v[183:185]", "v[184:186]", + "v[185:187]", "v[186:188]", "v[187:189]", "v[188:190]", "v[189:191]", + "v[190:192]", "v[191:193]", "v[192:194]", "v[193:195]", "v[194:196]", + "v[195:197]", "v[196:198]", "v[197:199]", "v[198:200]", "v[199:201]", + "v[200:202]", "v[201:203]", "v[202:204]", "v[203:205]", "v[204:206]", + "v[205:207]", "v[206:208]", "v[207:209]", "v[208:210]", "v[209:211]", + "v[210:212]", "v[211:213]", "v[212:214]", "v[213:215]", "v[214:216]", + "v[215:217]", "v[216:218]", "v[217:219]", "v[218:220]", "v[219:221]", + "v[220:222]", "v[221:223]", "v[222:224]", "v[223:225]", "v[224:226]", + "v[225:227]", "v[226:228]", "v[227:229]", "v[228:230]", "v[229:231]", + "v[230:232]", "v[231:233]", "v[232:234]", "v[233:235]", "v[234:236]", + "v[235:237]", "v[236:238]", "v[237:239]", "v[238:240]", "v[239:241]", + "v[240:242]", "v[241:243]", "v[242:244]", "v[243:245]", "v[244:246]", + "v[245:247]", "v[246:248]", "v[247:249]", "v[248:250]", "v[249:251]", + "v[250:252]", "v[251:253]", "v[252:254]", "v[253:255]" +}; + +static const char *const VGPR128RegNames[] = { + "v[0:3]", "v[1:4]", "v[2:5]", "v[3:6]", "v[4:7]", + "v[5:8]", "v[6:9]", "v[7:10]", "v[8:11]", "v[9:12]", + "v[10:13]", "v[11:14]", "v[12:15]", "v[13:16]", "v[14:17]", + "v[15:18]", "v[16:19]", "v[17:20]", "v[18:21]", "v[19:22]", + "v[20:23]", "v[21:24]", "v[22:25]", "v[23:26]", "v[24:27]", + "v[25:28]", "v[26:29]", "v[27:30]", "v[28:31]", "v[29:32]", + "v[30:33]", "v[31:34]", "v[32:35]", "v[33:36]", "v[34:37]", + "v[35:38]", "v[36:39]", "v[37:40]", "v[38:41]", "v[39:42]", + "v[40:43]", "v[41:44]", "v[42:45]", "v[43:46]", "v[44:47]", + "v[45:48]", "v[46:49]", "v[47:50]", "v[48:51]", "v[49:52]", + "v[50:53]", "v[51:54]", "v[52:55]", "v[53:56]", "v[54:57]", + "v[55:58]", "v[56:59]", "v[57:60]", "v[58:61]", "v[59:62]", + "v[60:63]", "v[61:64]", "v[62:65]", "v[63:66]", "v[64:67]", + "v[65:68]", "v[66:69]", "v[67:70]", "v[68:71]", "v[69:72]", + "v[70:73]", "v[71:74]", "v[72:75]", "v[73:76]", "v[74:77]", + "v[75:78]", "v[76:79]", "v[77:80]", "v[78:81]", "v[79:82]", + "v[80:83]", "v[81:84]", "v[82:85]", "v[83:86]", "v[84:87]", + "v[85:88]", "v[86:89]", "v[87:90]", "v[88:91]", "v[89:92]", + "v[90:93]", "v[91:94]", "v[92:95]", "v[93:96]", "v[94:97]", + "v[95:98]", "v[96:99]", "v[97:100]", "v[98:101]", "v[99:102]", + "v[100:103]", "v[101:104]", "v[102:105]", "v[103:106]", "v[104:107]", + "v[105:108]", "v[106:109]", "v[107:110]", "v[108:111]", "v[109:112]", + "v[110:113]", "v[111:114]", "v[112:115]", "v[113:116]", "v[114:117]", + "v[115:118]", "v[116:119]", "v[117:120]", "v[118:121]", "v[119:122]", + "v[120:123]", "v[121:124]", "v[122:125]", "v[123:126]", "v[124:127]", + "v[125:128]", "v[126:129]", "v[127:130]", "v[128:131]", "v[129:132]", + "v[130:133]", "v[131:134]", "v[132:135]", "v[133:136]", "v[134:137]", + "v[135:138]", "v[136:139]", "v[137:140]", "v[138:141]", "v[139:142]", + "v[140:143]", "v[141:144]", "v[142:145]", "v[143:146]", "v[144:147]", + "v[145:148]", "v[146:149]", "v[147:150]", "v[148:151]", "v[149:152]", + "v[150:153]", "v[151:154]", "v[152:155]", "v[153:156]", "v[154:157]", + "v[155:158]", "v[156:159]", "v[157:160]", "v[158:161]", "v[159:162]", + "v[160:163]", "v[161:164]", "v[162:165]", "v[163:166]", "v[164:167]", + "v[165:168]", "v[166:169]", "v[167:170]", "v[168:171]", "v[169:172]", + "v[170:173]", "v[171:174]", "v[172:175]", "v[173:176]", "v[174:177]", + "v[175:178]", "v[176:179]", "v[177:180]", "v[178:181]", "v[179:182]", + "v[180:183]", "v[181:184]", "v[182:185]", "v[183:186]", "v[184:187]", + "v[185:188]", "v[186:189]", "v[187:190]", "v[188:191]", "v[189:192]", + "v[190:193]", "v[191:194]", "v[192:195]", "v[193:196]", "v[194:197]", + "v[195:198]", "v[196:199]", "v[197:200]", "v[198:201]", "v[199:202]", + "v[200:203]", "v[201:204]", "v[202:205]", "v[203:206]", "v[204:207]", + "v[205:208]", "v[206:209]", "v[207:210]", "v[208:211]", "v[209:212]", + "v[210:213]", "v[211:214]", "v[212:215]", "v[213:216]", "v[214:217]", + "v[215:218]", "v[216:219]", "v[217:220]", "v[218:221]", "v[219:222]", + "v[220:223]", "v[221:224]", "v[222:225]", "v[223:226]", "v[224:227]", + "v[225:228]", "v[226:229]", "v[227:230]", "v[228:231]", "v[229:232]", + "v[230:233]", "v[231:234]", "v[232:235]", "v[233:236]", "v[234:237]", + "v[235:238]", "v[236:239]", "v[237:240]", "v[238:241]", "v[239:242]", + "v[240:243]", "v[241:244]", "v[242:245]", "v[243:246]", "v[244:247]", + "v[245:248]", "v[246:249]", "v[247:250]", "v[248:251]", "v[249:252]", + "v[250:253]", "v[251:254]", "v[252:255]" +}; + +static const char *const VGPR256RegNames[] = { + "v[0:7]", "v[1:8]", "v[2:9]", "v[3:10]", "v[4:11]", + "v[5:12]", "v[6:13]", "v[7:14]", "v[8:15]", "v[9:16]", + "v[10:17]", "v[11:18]", "v[12:19]", "v[13:20]", "v[14:21]", + "v[15:22]", "v[16:23]", "v[17:24]", "v[18:25]", "v[19:26]", + "v[20:27]", "v[21:28]", "v[22:29]", "v[23:30]", "v[24:31]", + "v[25:32]", "v[26:33]", "v[27:34]", "v[28:35]", "v[29:36]", + "v[30:37]", "v[31:38]", "v[32:39]", "v[33:40]", "v[34:41]", + "v[35:42]", "v[36:43]", "v[37:44]", "v[38:45]", "v[39:46]", + "v[40:47]", "v[41:48]", "v[42:49]", "v[43:50]", "v[44:51]", + "v[45:52]", "v[46:53]", "v[47:54]", "v[48:55]", "v[49:56]", + "v[50:57]", "v[51:58]", "v[52:59]", "v[53:60]", "v[54:61]", + "v[55:62]", "v[56:63]", "v[57:64]", "v[58:65]", "v[59:66]", + "v[60:67]", "v[61:68]", "v[62:69]", "v[63:70]", "v[64:71]", + "v[65:72]", "v[66:73]", "v[67:74]", "v[68:75]", "v[69:76]", + "v[70:77]", "v[71:78]", "v[72:79]", "v[73:80]", "v[74:81]", + "v[75:82]", "v[76:83]", "v[77:84]", "v[78:85]", "v[79:86]", + "v[80:87]", "v[81:88]", "v[82:89]", "v[83:90]", "v[84:91]", + "v[85:92]", "v[86:93]", "v[87:94]", "v[88:95]", "v[89:96]", + "v[90:97]", "v[91:98]", "v[92:99]", "v[93:100]", "v[94:101]", + "v[95:102]", "v[96:103]", "v[97:104]", "v[98:105]", "v[99:106]", + "v[100:107]", "v[101:108]", "v[102:109]", "v[103:110]", "v[104:111]", + "v[105:112]", "v[106:113]", "v[107:114]", "v[108:115]", "v[109:116]", + "v[110:117]", "v[111:118]", "v[112:119]", "v[113:120]", "v[114:121]", + "v[115:122]", "v[116:123]", "v[117:124]", "v[118:125]", "v[119:126]", + "v[120:127]", "v[121:128]", "v[122:129]", "v[123:130]", "v[124:131]", + "v[125:132]", "v[126:133]", "v[127:134]", "v[128:135]", "v[129:136]", + "v[130:137]", "v[131:138]", "v[132:139]", "v[133:140]", "v[134:141]", + "v[135:142]", "v[136:143]", "v[137:144]", "v[138:145]", "v[139:146]", + "v[140:147]", "v[141:148]", "v[142:149]", "v[143:150]", "v[144:151]", + "v[145:152]", "v[146:153]", "v[147:154]", "v[148:155]", "v[149:156]", + "v[150:157]", "v[151:158]", "v[152:159]", "v[153:160]", "v[154:161]", + "v[155:162]", "v[156:163]", "v[157:164]", "v[158:165]", "v[159:166]", + "v[160:167]", "v[161:168]", "v[162:169]", "v[163:170]", "v[164:171]", + "v[165:172]", "v[166:173]", "v[167:174]", "v[168:175]", "v[169:176]", + "v[170:177]", "v[171:178]", "v[172:179]", "v[173:180]", "v[174:181]", + "v[175:182]", "v[176:183]", "v[177:184]", "v[178:185]", "v[179:186]", + "v[180:187]", "v[181:188]", "v[182:189]", "v[183:190]", "v[184:191]", + "v[185:192]", "v[186:193]", "v[187:194]", "v[188:195]", "v[189:196]", + "v[190:197]", "v[191:198]", "v[192:199]", "v[193:200]", "v[194:201]", + "v[195:202]", "v[196:203]", "v[197:204]", "v[198:205]", "v[199:206]", + "v[200:207]", "v[201:208]", "v[202:209]", "v[203:210]", "v[204:211]", + "v[205:212]", "v[206:213]", "v[207:214]", "v[208:215]", "v[209:216]", + "v[210:217]", "v[211:218]", "v[212:219]", "v[213:220]", "v[214:221]", + "v[215:222]", "v[216:223]", "v[217:224]", "v[218:225]", "v[219:226]", + "v[220:227]", "v[221:228]", "v[222:229]", "v[223:230]", "v[224:231]", + "v[225:232]", "v[226:233]", "v[227:234]", "v[228:235]", "v[229:236]", + "v[230:237]", "v[231:238]", "v[232:239]", "v[233:240]", "v[234:241]", + "v[235:242]", "v[236:243]", "v[237:244]", "v[238:245]", "v[239:246]", + "v[240:247]", "v[241:248]", "v[242:249]", "v[243:250]", "v[244:251]", + "v[245:252]", "v[246:253]", "v[247:254]", "v[248:255]" +}; + +static const char *const VGPR512RegNames[] = { + "v[0:15]", "v[1:16]", "v[2:17]", "v[3:18]", "v[4:19]", + "v[5:20]", "v[6:21]", "v[7:22]", "v[8:23]", "v[9:24]", + "v[10:25]", "v[11:26]", "v[12:27]", "v[13:28]", "v[14:29]", + "v[15:30]", "v[16:31]", "v[17:32]", "v[18:33]", "v[19:34]", + "v[20:35]", "v[21:36]", "v[22:37]", "v[23:38]", "v[24:39]", + "v[25:40]", "v[26:41]", "v[27:42]", "v[28:43]", "v[29:44]", + "v[30:45]", "v[31:46]", "v[32:47]", "v[33:48]", "v[34:49]", + "v[35:50]", "v[36:51]", "v[37:52]", "v[38:53]", "v[39:54]", + "v[40:55]", "v[41:56]", "v[42:57]", "v[43:58]", "v[44:59]", + "v[45:60]", "v[46:61]", "v[47:62]", "v[48:63]", "v[49:64]", + "v[50:65]", "v[51:66]", "v[52:67]", "v[53:68]", "v[54:69]", + "v[55:70]", "v[56:71]", "v[57:72]", "v[58:73]", "v[59:74]", + "v[60:75]", "v[61:76]", "v[62:77]", "v[63:78]", "v[64:79]", + "v[65:80]", "v[66:81]", "v[67:82]", "v[68:83]", "v[69:84]", + "v[70:85]", "v[71:86]", "v[72:87]", "v[73:88]", "v[74:89]", + "v[75:90]", "v[76:91]", "v[77:92]", "v[78:93]", "v[79:94]", + "v[80:95]", "v[81:96]", "v[82:97]", "v[83:98]", "v[84:99]", + "v[85:100]", "v[86:101]", "v[87:102]", "v[88:103]", "v[89:104]", + "v[90:105]", "v[91:106]", "v[92:107]", "v[93:108]", "v[94:109]", + "v[95:110]", "v[96:111]", "v[97:112]", "v[98:113]", "v[99:114]", + "v[100:115]", "v[101:116]", "v[102:117]", "v[103:118]", "v[104:119]", + "v[105:120]", "v[106:121]", "v[107:122]", "v[108:123]", "v[109:124]", + "v[110:125]", "v[111:126]", "v[112:127]", "v[113:128]", "v[114:129]", + "v[115:130]", "v[116:131]", "v[117:132]", "v[118:133]", "v[119:134]", + "v[120:135]", "v[121:136]", "v[122:137]", "v[123:138]", "v[124:139]", + "v[125:140]", "v[126:141]", "v[127:142]", "v[128:143]", "v[129:144]", + "v[130:145]", "v[131:146]", "v[132:147]", "v[133:148]", "v[134:149]", + "v[135:150]", "v[136:151]", "v[137:152]", "v[138:153]", "v[139:154]", + "v[140:155]", "v[141:156]", "v[142:157]", "v[143:158]", "v[144:159]", + "v[145:160]", "v[146:161]", "v[147:162]", "v[148:163]", "v[149:164]", + "v[150:165]", "v[151:166]", "v[152:167]", "v[153:168]", "v[154:169]", + "v[155:170]", "v[156:171]", "v[157:172]", "v[158:173]", "v[159:174]", + "v[160:175]", "v[161:176]", "v[162:177]", "v[163:178]", "v[164:179]", + "v[165:180]", "v[166:181]", "v[167:182]", "v[168:183]", "v[169:184]", + "v[170:185]", "v[171:186]", "v[172:187]", "v[173:188]", "v[174:189]", + "v[175:190]", "v[176:191]", "v[177:192]", "v[178:193]", "v[179:194]", + "v[180:195]", "v[181:196]", "v[182:197]", "v[183:198]", "v[184:199]", + "v[185:200]", "v[186:201]", "v[187:202]", "v[188:203]", "v[189:204]", + "v[190:205]", "v[191:206]", "v[192:207]", "v[193:208]", "v[194:209]", + "v[195:210]", "v[196:211]", "v[197:212]", "v[198:213]", "v[199:214]", + "v[200:215]", "v[201:216]", "v[202:217]", "v[203:218]", "v[204:219]", + "v[205:220]", "v[206:221]", "v[207:222]", "v[208:223]", "v[209:224]", + "v[210:225]", "v[211:226]", "v[212:227]", "v[213:228]", "v[214:229]", + "v[215:230]", "v[216:231]", "v[217:232]", "v[218:233]", "v[219:234]", + "v[220:235]", "v[221:236]", "v[222:237]", "v[223:238]", "v[224:239]", + "v[225:240]", "v[226:241]", "v[227:242]", "v[228:243]", "v[229:244]", + "v[230:245]", "v[231:246]", "v[232:247]", "v[233:248]", "v[234:249]", + "v[235:250]", "v[236:251]", "v[237:252]", "v[238:253]", "v[239:254]", + "v[240:255]" +}; + +static const char *const SGPR64RegNames[] = { + "s[0:1]", "s[2:3]", "s[4:5]", "s[6:7]", "s[8:9]", "s[10:11]", + "s[12:13]", "s[14:15]", "s[16:17]", "s[18:19]", "s[20:21]", "s[22:23]", + "s[24:25]", "s[26:27]", "s[28:29]", "s[30:31]", "s[32:33]", "s[34:35]", + "s[36:37]", "s[38:39]", "s[40:41]", "s[42:43]", "s[44:45]", "s[46:47]", + "s[48:49]", "s[50:51]", "s[52:53]", "s[54:55]", "s[56:57]", "s[58:59]", + "s[60:61]", "s[62:63]", "s[64:65]", "s[66:67]", "s[68:69]", "s[70:71]", + "s[72:73]", "s[74:75]", "s[76:77]", "s[78:79]", "s[80:81]", "s[82:83]", + "s[84:85]", "s[86:87]", "s[88:89]", "s[90:91]", "s[92:93]", "s[94:95]", + "s[96:97]", "s[98:99]", "s[100:101]", "s[102:103]" +}; + +static const char *const SGPR128RegNames[] = { + "s[0:3]", "s[4:7]", "s[8:11]", "s[12:15]", "s[16:19]", "s[20:23]", + "s[24:27]", "s[28:31]", "s[32:35]", "s[36:39]", "s[40:43]", "s[44:47]", + "s[48:51]", "s[52:55]", "s[56:59]", "s[60:63]", "s[64:67]", "s[68:71]", + "s[72:75]", "s[76:79]", "s[80:83]", "s[84:87]", "s[88:91]", "s[92:95]", + "s[96:99]", "s[100:103]" +}; + +static const char *const SGPR256RegNames[] = { + "s[0:7]", "s[4:11]", "s[8:15]", "s[12:19]", "s[16:23]", + "s[20:27]", "s[24:31]", "s[28:35]", "s[32:39]", "s[36:43]", + "s[40:47]", "s[44:51]", "s[48:55]", "s[52:59]", "s[56:63]", + "s[60:67]", "s[64:71]", "s[68:75]", "s[72:79]", "s[76:83]", + "s[80:87]", "s[84:91]", "s[88:95]", "s[92:99]", "s[96:103]" +}; + +static const char *const SGPR512RegNames[] = { + "s[0:15]", "s[4:19]", "s[8:23]", "s[12:27]", "s[16:31]", "s[20:35]", + "s[24:39]", "s[28:43]", "s[32:47]", "s[36:51]", "s[40:55]", "s[44:59]", + "s[48:63]", "s[52:67]", "s[56:71]", "s[60:75]", "s[64:79]", "s[68:83]", + "s[72:87]", "s[76:91]", "s[80:95]", "s[84:99]", "s[88:103]" +}; + +#endif diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index 7c198a1b8a3f..201fdc1974c6 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -36,7 +36,6 @@ protected: #define GET_TARGET_REGBANK_CLASS #include "AMDGPUGenRegisterBank.inc" - }; class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const SIRegisterInfo *TRI; diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp index b2867fcc49f9..ff58aa5741a1 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp @@ -40,7 +40,6 @@ unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const { #define GET_REGINFO_TARGET_DESC #include "AMDGPUGenRegisterInfo.inc" - // Forced to be here by one .inc const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs( const MachineFunction *MF) const { diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index ed9cbb994fad..5f4f20316a6b 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -16,12 +16,12 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H #include "AMDGPU.h" -#include "R600InstrInfo.h" -#include "R600ISelLowering.h" #include "R600FrameLowering.h" -#include "SIInstrInfo.h" -#include "SIISelLowering.h" +#include "R600ISelLowering.h" +#include "R600InstrInfo.h" #include "SIFrameLowering.h" +#include "SIISelLowering.h" +#include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/Triple.h" @@ -57,9 +57,12 @@ public: enum { ISAVersion0_0_0, + ISAVersion6_0_0, + ISAVersion6_0_1, ISAVersion7_0_0, ISAVersion7_0_1, ISAVersion7_0_2, + ISAVersion7_0_3, ISAVersion8_0_0, ISAVersion8_0_1, ISAVersion8_0_2, @@ -67,7 +70,9 @@ public: ISAVersion8_0_4, ISAVersion8_1_0, ISAVersion9_0_0, - ISAVersion9_0_1 + ISAVersion9_0_1, + ISAVersion9_0_2, + ISAVersion9_0_3 }; enum TrapHandlerAbi { @@ -787,7 +792,7 @@ public: /// \returns VGPR allocation granularity supported by the subtarget. unsigned getVGPRAllocGranule() const { - return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());; + return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits()); } /// \returns VGPR encoding granularity supported by the subtarget. diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 404598ff4738..b644eba536fa 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -28,26 +28,26 @@ #include "GCNSchedStrategy.h" #include "R600MachineScheduler.h" #include "SIMachineScheduler.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/IPO/AlwaysInliner.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/GVN.h" -#include "llvm/Transforms/Vectorize.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Vectorize.h" #include <memory> using namespace llvm; @@ -734,7 +734,6 @@ void GCNPassConfig::addMachineSSAOptimization() { addPass(&SIFoldOperandsID); addPass(&DeadMachineInstructionElimID); addPass(&SILoadStoreOptimizerID); - addPass(createSIShrinkInstructionsPass()); if (EnableSDWAPeephole) { addPass(&SIPeepholeSDWAID); addPass(&MachineLICMID); @@ -742,6 +741,7 @@ void GCNPassConfig::addMachineSSAOptimization() { addPass(&SIFoldOperandsID); addPass(&DeadMachineInstructionElimID); } + addPass(createSIShrinkInstructionsPass()); } bool GCNPassConfig::addILPOpts() { diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp index c96761c0b04e..6c1885e67fcb 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp @@ -7,13 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPUTargetMachine.h" #include "AMDGPUTargetObjectFile.h" #include "AMDGPU.h" +#include "AMDGPUTargetMachine.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" -#include "Utils/AMDGPUBaseInfo.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index beafebc1284a..dee3d2856701 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -20,8 +20,8 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/BasicTTIImpl.h" -#include "llvm/IR/Module.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" #include "llvm/Target/TargetLowering.h" diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index cc68c971b249..16e3b7b4ebee 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -11,18 +11,19 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "SIDefines.h" +#include "Utils/AMDGPUAsmUtils.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" -#include "Utils/AMDGPUAsmUtils.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -40,12 +41,11 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -814,14 +814,8 @@ private: bool ParseDirectiveCodeObjectMetadata(); bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); bool ParseDirectiveAMDKernelCodeT(); - bool ParseSectionDirectiveHSAText(); bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; bool ParseDirectiveAMDGPUHsaKernel(); - bool ParseDirectiveAMDGPUHsaModuleGlobal(); - bool ParseDirectiveAMDGPUHsaProgramGlobal(); - bool ParseSectionDirectiveHSADataGlobalAgent(); - bool ParseSectionDirectiveHSADataGlobalProgram(); - bool ParseSectionDirectiveHSARodataReadonlyAgent(); bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum); @@ -2365,12 +2359,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { return false; } -bool AMDGPUAsmParser::ParseSectionDirectiveHSAText() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSATextSection(getContext())); - return false; -} - bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { if (getLexer().isNot(AsmToken::Identifier)) return TokError("expected symbol name"); @@ -2384,46 +2372,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { return false; } -bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaModuleGlobal() { - if (getLexer().isNot(AsmToken::Identifier)) - return TokError("expected symbol name"); - - StringRef GlobalName = Parser.getTok().getIdentifier(); - - getTargetStreamer().EmitAMDGPUHsaModuleScopeGlobal(GlobalName); - Lex(); - return false; -} - -bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaProgramGlobal() { - if (getLexer().isNot(AsmToken::Identifier)) - return TokError("expected symbol name"); - - StringRef GlobalName = Parser.getTok().getIdentifier(); - - getTargetStreamer().EmitAMDGPUHsaProgramScopeGlobal(GlobalName); - Lex(); - return false; -} - -bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalAgent() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSADataGlobalAgentSection(getContext())); - return false; -} - -bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalProgram() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSADataGlobalProgramSection(getContext())); - return false; -} - -bool AMDGPUAsmParser::ParseSectionDirectiveHSARodataReadonlyAgent() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSARodataReadonlyAgentSection(getContext())); - return false; -} - bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); @@ -2439,27 +2387,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".amd_kernel_code_t") return ParseDirectiveAMDKernelCodeT(); - if (IDVal == ".hsatext") - return ParseSectionDirectiveHSAText(); - if (IDVal == ".amdgpu_hsa_kernel") return ParseDirectiveAMDGPUHsaKernel(); - if (IDVal == ".amdgpu_hsa_module_global") - return ParseDirectiveAMDGPUHsaModuleGlobal(); - - if (IDVal == ".amdgpu_hsa_program_global") - return ParseDirectiveAMDGPUHsaProgramGlobal(); - - if (IDVal == ".hsadata_global_agent") - return ParseSectionDirectiveHSADataGlobalAgent(); - - if (IDVal == ".hsadata_global_program") - return ParseSectionDirectiveHSADataGlobalProgram(); - - if (IDVal == ".hsarodata_readonly_agent") - return ParseSectionDirectiveHSARodataReadonlyAgent(); - return true; } @@ -2919,6 +2849,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { if (getLexer().isNot(AsmToken::Integer)) return true; + SMLoc ValLoc = Parser.getTok().getLoc(); if (getParser().parseAbsoluteExpression(CntVal)) return true; @@ -2936,21 +2867,24 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); } - // To improve diagnostics, do not skip delimiters on errors - if (!Failed) { - if (getLexer().isNot(AsmToken::RParen)) { - return true; - } - Parser.Lex(); - if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { - const AsmToken NextToken = getLexer().peekTok(); - if (NextToken.is(AsmToken::Identifier)) { - Parser.Lex(); - } + if (Failed) { + Error(ValLoc, "too large value for " + CntName); + return true; + } + + if (getLexer().isNot(AsmToken::RParen)) { + return true; + } + + Parser.Lex(); + if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { + const AsmToken NextToken = getLexer().peekTok(); + if (NextToken.is(AsmToken::Identifier)) { + Parser.Lex(); } } - return Failed; + return false; } OperandMatchResultTy diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt index cafce0164fa9..e30844f082cd 100644 --- a/lib/Target/AMDGPU/CMakeLists.txt +++ b/lib/Target/AMDGPU/CMakeLists.txt @@ -58,6 +58,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPUISelLowering.cpp AMDGPUInstrInfo.cpp AMDGPUPromoteAlloca.cpp + AMDGPURegAsmNames.inc.cpp AMDGPURegisterInfo.cpp AMDGPUUnifyDivergentExitNodes.cpp GCNHazardRecognizer.cpp diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 9b3cde7c4df6..88c92b9582fd 100644 --- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -20,21 +20,20 @@ #include "AMDGPUDisassembler.h" #include "AMDGPU.h" #include "AMDGPURegisterInfo.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ELF.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/TargetRegistry.h" - using namespace llvm; #define DEBUG_TYPE "amdgpu-disassembler" diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 0ff405a71e9b..5fa3cf1a223f 100644 --- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -20,8 +20,8 @@ #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCDisassembler/MCSymbolizer.h" -#include <cstdint> #include <algorithm> +#include <cstdint> #include <memory> namespace llvm { diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 80fc4ac9d2a3..cd9e7fb04f16 100644 --- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPUSubtarget.h" #include "GCNHazardRecognizer.h" +#include "AMDGPUSubtarget.h" #include "SIDefines.h" #include "SIInstrInfo.h" #include "SIRegisterInfo.h" diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index 523eea41897e..b84640230eee 100644 --- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -9,8 +9,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPUInstPrinter.h" -#include "SIDefines.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIDefines.h" #include "Utils/AMDGPUAsmUtils.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index f3266fe82955..0a9c2b94c1ee 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -8,8 +8,8 @@ /// \file //===----------------------------------------------------------------------===// -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/AMDGPUFixupKinds.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h deleted file mode 100644 index 816e8c744b27..000000000000 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h +++ /dev/null @@ -1,422 +0,0 @@ -//===--- AMDGPUCodeObjectMetadata.h -----------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// \brief AMDGPU Code Object Metadata definitions and in-memory -/// representations. -/// -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H -#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H - -#include <cstdint> -#include <string> -#include <system_error> -#include <vector> - -namespace llvm { -namespace AMDGPU { - -//===----------------------------------------------------------------------===// -// Code Object Metadata. -//===----------------------------------------------------------------------===// -namespace CodeObject { - -/// \brief Code object metadata major version. -constexpr uint32_t MetadataVersionMajor = 1; -/// \brief Code object metadata minor version. -constexpr uint32_t MetadataVersionMinor = 0; - -/// \brief Code object metadata beginning assembler directive. -constexpr char MetadataAssemblerDirectiveBegin[] = - ".amdgpu_code_object_metadata"; -/// \brief Code object metadata ending assembler directive. -constexpr char MetadataAssemblerDirectiveEnd[] = - ".end_amdgpu_code_object_metadata"; - -/// \brief Access qualifiers. -enum class AccessQualifier : uint8_t { - Default = 0, - ReadOnly = 1, - WriteOnly = 2, - ReadWrite = 3, - Unknown = 0xff -}; - -/// \brief Address space qualifiers. -enum class AddressSpaceQualifier : uint8_t { - Private = 0, - Global = 1, - Constant = 2, - Local = 3, - Generic = 4, - Region = 5, - Unknown = 0xff -}; - -/// \brief Value kinds. -enum class ValueKind : uint8_t { - ByValue = 0, - GlobalBuffer = 1, - DynamicSharedPointer = 2, - Sampler = 3, - Image = 4, - Pipe = 5, - Queue = 6, - HiddenGlobalOffsetX = 7, - HiddenGlobalOffsetY = 8, - HiddenGlobalOffsetZ = 9, - HiddenNone = 10, - HiddenPrintfBuffer = 11, - HiddenDefaultQueue = 12, - HiddenCompletionAction = 13, - Unknown = 0xff -}; - -/// \brief Value types. -enum class ValueType : uint8_t { - Struct = 0, - I8 = 1, - U8 = 2, - I16 = 3, - U16 = 4, - F16 = 5, - I32 = 6, - U32 = 7, - F32 = 8, - I64 = 9, - U64 = 10, - F64 = 11, - Unknown = 0xff -}; - -//===----------------------------------------------------------------------===// -// Kernel Metadata. -//===----------------------------------------------------------------------===// -namespace Kernel { - -//===----------------------------------------------------------------------===// -// Kernel Attributes Metadata. -//===----------------------------------------------------------------------===// -namespace Attrs { - -namespace Key { -/// \brief Key for Kernel::Attr::Metadata::mReqdWorkGroupSize. -constexpr char ReqdWorkGroupSize[] = "ReqdWorkGroupSize"; -/// \brief Key for Kernel::Attr::Metadata::mWorkGroupSizeHint. -constexpr char WorkGroupSizeHint[] = "WorkGroupSizeHint"; -/// \brief Key for Kernel::Attr::Metadata::mVecTypeHint. -constexpr char VecTypeHint[] = "VecTypeHint"; -} // end namespace Key - -/// \brief In-memory representation of kernel attributes metadata. -struct Metadata final { - /// \brief 'reqd_work_group_size' attribute. Optional. - std::vector<uint32_t> mReqdWorkGroupSize = std::vector<uint32_t>(); - /// \brief 'work_group_size_hint' attribute. Optional. - std::vector<uint32_t> mWorkGroupSizeHint = std::vector<uint32_t>(); - /// \brief 'vec_type_hint' attribute. Optional. - std::string mVecTypeHint = std::string(); - - /// \brief Default constructor. - Metadata() = default; - - /// \returns True if kernel attributes metadata is empty, false otherwise. - bool empty() const { - return mReqdWorkGroupSize.empty() && - mWorkGroupSizeHint.empty() && - mVecTypeHint.empty(); - } - - /// \returns True if kernel attributes metadata is not empty, false otherwise. - bool notEmpty() const { - return !empty(); - } -}; - -} // end namespace Attrs - -//===----------------------------------------------------------------------===// -// Kernel Argument Metadata. -//===----------------------------------------------------------------------===// -namespace Arg { - -namespace Key { -/// \brief Key for Kernel::Arg::Metadata::mSize. -constexpr char Size[] = "Size"; -/// \brief Key for Kernel::Arg::Metadata::mAlign. -constexpr char Align[] = "Align"; -/// \brief Key for Kernel::Arg::Metadata::mValueKind. -constexpr char ValueKind[] = "ValueKind"; -/// \brief Key for Kernel::Arg::Metadata::mValueType. -constexpr char ValueType[] = "ValueType"; -/// \brief Key for Kernel::Arg::Metadata::mPointeeAlign. -constexpr char PointeeAlign[] = "PointeeAlign"; -/// \brief Key for Kernel::Arg::Metadata::mAccQual. -constexpr char AccQual[] = "AccQual"; -/// \brief Key for Kernel::Arg::Metadata::mAddrSpaceQual. -constexpr char AddrSpaceQual[] = "AddrSpaceQual"; -/// \brief Key for Kernel::Arg::Metadata::mIsConst. -constexpr char IsConst[] = "IsConst"; -/// \brief Key for Kernel::Arg::Metadata::mIsPipe. -constexpr char IsPipe[] = "IsPipe"; -/// \brief Key for Kernel::Arg::Metadata::mIsRestrict. -constexpr char IsRestrict[] = "IsRestrict"; -/// \brief Key for Kernel::Arg::Metadata::mIsVolatile. -constexpr char IsVolatile[] = "IsVolatile"; -/// \brief Key for Kernel::Arg::Metadata::mName. -constexpr char Name[] = "Name"; -/// \brief Key for Kernel::Arg::Metadata::mTypeName. -constexpr char TypeName[] = "TypeName"; -} // end namespace Key - -/// \brief In-memory representation of kernel argument metadata. -struct Metadata final { - /// \brief Size in bytes. Required. - uint32_t mSize = 0; - /// \brief Alignment in bytes. Required. - uint32_t mAlign = 0; - /// \brief Value kind. Required. - ValueKind mValueKind = ValueKind::Unknown; - /// \brief Value type. Required. - ValueType mValueType = ValueType::Unknown; - /// \brief Pointee alignment in bytes. Optional. - uint32_t mPointeeAlign = 0; - /// \brief Access qualifier. Optional. - AccessQualifier mAccQual = AccessQualifier::Unknown; - /// \brief Address space qualifier. Optional. - AddressSpaceQualifier mAddrSpaceQual = AddressSpaceQualifier::Unknown; - /// \brief True if 'const' qualifier is specified. Optional. - bool mIsConst = false; - /// \brief True if 'pipe' qualifier is specified. Optional. - bool mIsPipe = false; - /// \brief True if 'restrict' qualifier is specified. Optional. - bool mIsRestrict = false; - /// \brief True if 'volatile' qualifier is specified. Optional. - bool mIsVolatile = false; - /// \brief Name. Optional. - std::string mName = std::string(); - /// \brief Type name. Optional. - std::string mTypeName = std::string(); - - /// \brief Default constructor. - Metadata() = default; -}; - -} // end namespace Arg - -//===----------------------------------------------------------------------===// -// Kernel Code Properties Metadata. -//===----------------------------------------------------------------------===// -namespace CodeProps { - -namespace Key { -/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentSize. -constexpr char KernargSegmentSize[] = "KernargSegmentSize"; -/// \brief Key for Kernel::CodeProps::Metadata::mWorkgroupGroupSegmentSize. -constexpr char WorkgroupGroupSegmentSize[] = "WorkgroupGroupSegmentSize"; -/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemPrivateSegmentSize. -constexpr char WorkitemPrivateSegmentSize[] = "WorkitemPrivateSegmentSize"; -/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontNumSGPRs. -constexpr char WavefrontNumSGPRs[] = "WavefrontNumSGPRs"; -/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemNumVGPRs. -constexpr char WorkitemNumVGPRs[] = "WorkitemNumVGPRs"; -/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentAlign. -constexpr char KernargSegmentAlign[] = "KernargSegmentAlign"; -/// \brief Key for Kernel::CodeProps::Metadata::mGroupSegmentAlign. -constexpr char GroupSegmentAlign[] = "GroupSegmentAlign"; -/// \brief Key for Kernel::CodeProps::Metadata::mPrivateSegmentAlign. -constexpr char PrivateSegmentAlign[] = "PrivateSegmentAlign"; -/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontSize. -constexpr char WavefrontSize[] = "WavefrontSize"; -} // end namespace Key - -/// \brief In-memory representation of kernel code properties metadata. -struct Metadata final { - /// \brief Size in bytes of the kernarg segment memory. Kernarg segment memory - /// holds the values of the arguments to the kernel. Optional. - uint64_t mKernargSegmentSize = 0; - /// \brief Size in bytes of the group segment memory required by a workgroup. - /// This value does not include any dynamically allocated group segment memory - /// that may be added when the kernel is dispatched. Optional. - uint32_t mWorkgroupGroupSegmentSize = 0; - /// \brief Size in bytes of the private segment memory required by a workitem. - /// Private segment memory includes arg, spill and private segments. Optional. - uint32_t mWorkitemPrivateSegmentSize = 0; - /// \brief Total number of SGPRs used by a wavefront. Optional. - uint16_t mWavefrontNumSGPRs = 0; - /// \brief Total number of VGPRs used by a workitem. Optional. - uint16_t mWorkitemNumVGPRs = 0; - /// \brief Maximum byte alignment of variables used by the kernel in the - /// kernarg memory segment. Expressed as a power of two. Optional. - uint8_t mKernargSegmentAlign = 0; - /// \brief Maximum byte alignment of variables used by the kernel in the - /// group memory segment. Expressed as a power of two. Optional. - uint8_t mGroupSegmentAlign = 0; - /// \brief Maximum byte alignment of variables used by the kernel in the - /// private memory segment. Expressed as a power of two. Optional. - uint8_t mPrivateSegmentAlign = 0; - /// \brief Wavefront size. Expressed as a power of two. Optional. - uint8_t mWavefrontSize = 0; - - /// \brief Default constructor. - Metadata() = default; - - /// \returns True if kernel code properties metadata is empty, false - /// otherwise. - bool empty() const { - return !notEmpty(); - } - - /// \returns True if kernel code properties metadata is not empty, false - /// otherwise. - bool notEmpty() const { - return mKernargSegmentSize || mWorkgroupGroupSegmentSize || - mWorkitemPrivateSegmentSize || mWavefrontNumSGPRs || - mWorkitemNumVGPRs || mKernargSegmentAlign || mGroupSegmentAlign || - mPrivateSegmentAlign || mWavefrontSize; - } -}; - -} // end namespace CodeProps - -//===----------------------------------------------------------------------===// -// Kernel Debug Properties Metadata. -//===----------------------------------------------------------------------===// -namespace DebugProps { - -namespace Key { -/// \brief Key for Kernel::DebugProps::Metadata::mDebuggerABIVersion. -constexpr char DebuggerABIVersion[] = "DebuggerABIVersion"; -/// \brief Key for Kernel::DebugProps::Metadata::mReservedNumVGPRs. -constexpr char ReservedNumVGPRs[] = "ReservedNumVGPRs"; -/// \brief Key for Kernel::DebugProps::Metadata::mReservedFirstVGPR. -constexpr char ReservedFirstVGPR[] = "ReservedFirstVGPR"; -/// \brief Key for Kernel::DebugProps::Metadata::mPrivateSegmentBufferSGPR. -constexpr char PrivateSegmentBufferSGPR[] = "PrivateSegmentBufferSGPR"; -/// \brief Key for -/// Kernel::DebugProps::Metadata::mWavefrontPrivateSegmentOffsetSGPR. -constexpr char WavefrontPrivateSegmentOffsetSGPR[] = - "WavefrontPrivateSegmentOffsetSGPR"; -} // end namespace Key - -/// \brief In-memory representation of kernel debug properties metadata. -struct Metadata final { - /// \brief Debugger ABI version. Optional. - std::vector<uint32_t> mDebuggerABIVersion = std::vector<uint32_t>(); - /// \brief Consecutive number of VGPRs reserved for debugger use. Must be 0 if - /// mDebuggerABIVersion is not set. Optional. - uint16_t mReservedNumVGPRs = 0; - /// \brief First fixed VGPR reserved. Must be uint16_t(-1) if - /// mDebuggerABIVersion is not set or mReservedFirstVGPR is 0. Optional. - uint16_t mReservedFirstVGPR = uint16_t(-1); - /// \brief Fixed SGPR of the first of 4 SGPRs used to hold the scratch V# used - /// for the entire kernel execution. Must be uint16_t(-1) if - /// mDebuggerABIVersion is not set or SGPR not used or not known. Optional. - uint16_t mPrivateSegmentBufferSGPR = uint16_t(-1); - /// \brief Fixed SGPR used to hold the wave scratch offset for the entire - /// kernel execution. Must be uint16_t(-1) if mDebuggerABIVersion is not set - /// or SGPR is not used or not known. Optional. - uint16_t mWavefrontPrivateSegmentOffsetSGPR = uint16_t(-1); - - /// \brief Default constructor. - Metadata() = default; - - /// \returns True if kernel debug properties metadata is empty, false - /// otherwise. - bool empty() const { - return !notEmpty(); - } - - /// \returns True if kernel debug properties metadata is not empty, false - /// otherwise. - bool notEmpty() const { - return !mDebuggerABIVersion.empty(); - } -}; - -} // end namespace DebugProps - -namespace Key { -/// \brief Key for Kernel::Metadata::mName. -constexpr char Name[] = "Name"; -/// \brief Key for Kernel::Metadata::mLanguage. -constexpr char Language[] = "Language"; -/// \brief Key for Kernel::Metadata::mLanguageVersion. -constexpr char LanguageVersion[] = "LanguageVersion"; -/// \brief Key for Kernel::Metadata::mAttrs. -constexpr char Attrs[] = "Attrs"; -/// \brief Key for Kernel::Metadata::mArgs. -constexpr char Args[] = "Args"; -/// \brief Key for Kernel::Metadata::mCodeProps. -constexpr char CodeProps[] = "CodeProps"; -/// \brief Key for Kernel::Metadata::mDebugProps. -constexpr char DebugProps[] = "DebugProps"; -} // end namespace Key - -/// \brief In-memory representation of kernel metadata. -struct Metadata final { - /// \brief Name. Required. - std::string mName = std::string(); - /// \brief Language. Optional. - std::string mLanguage = std::string(); - /// \brief Language version. Optional. - std::vector<uint32_t> mLanguageVersion = std::vector<uint32_t>(); - /// \brief Attributes metadata. Optional. - Attrs::Metadata mAttrs = Attrs::Metadata(); - /// \brief Arguments metadata. Optional. - std::vector<Arg::Metadata> mArgs = std::vector<Arg::Metadata>(); - /// \brief Code properties metadata. Optional. - CodeProps::Metadata mCodeProps = CodeProps::Metadata(); - /// \brief Debug properties metadata. Optional. - DebugProps::Metadata mDebugProps = DebugProps::Metadata(); - - /// \brief Default constructor. - Metadata() = default; -}; - -} // end namespace Kernel - -namespace Key { -/// \brief Key for CodeObject::Metadata::mVersion. -constexpr char Version[] = "Version"; -/// \brief Key for CodeObject::Metadata::mPrintf. -constexpr char Printf[] = "Printf"; -/// \brief Key for CodeObject::Metadata::mKernels. -constexpr char Kernels[] = "Kernels"; -} // end namespace Key - -/// \brief In-memory representation of code object metadata. -struct Metadata final { - /// \brief Code object metadata version. Required. - std::vector<uint32_t> mVersion = std::vector<uint32_t>(); - /// \brief Printf metadata. Optional. - std::vector<std::string> mPrintf = std::vector<std::string>(); - /// \brief Kernels metadata. Optional. - std::vector<Kernel::Metadata> mKernels = std::vector<Kernel::Metadata>(); - - /// \brief Default constructor. - Metadata() = default; - - /// \brief Converts \p YamlString to \p CodeObjectMetadata. - static std::error_code fromYamlString(std::string YamlString, - Metadata &CodeObjectMetadata); - - /// \brief Converts \p CodeObjectMetadata to \p YamlString. - static std::error_code toYamlString(Metadata CodeObjectMetadata, - std::string &YamlString); -}; - -} // end namespace CodeObject -} // end namespace AMDGPU -} // end namespace llvm - -#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp index 647017d5061d..4e828a791e09 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp @@ -13,20 +13,12 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUCodeObjectMetadataStreamer.h" +#include "AMDGPU.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Module.h" -#include "llvm/Support/YAMLTraits.h" - -using namespace llvm::AMDGPU; -using namespace llvm::AMDGPU::CodeObject; - -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t) -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) -LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata) -LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata) +#include "llvm/Support/raw_ostream.h" namespace llvm { @@ -37,192 +29,7 @@ static cl::opt<bool> VerifyCodeObjectMetadata( "amdgpu-verify-comd", cl::desc("Verify AMDGPU Code Object Metadata")); -namespace yaml { - -template <> -struct ScalarEnumerationTraits<AccessQualifier> { - static void enumeration(IO &YIO, AccessQualifier &EN) { - YIO.enumCase(EN, "Default", AccessQualifier::Default); - YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly); - YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly); - YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite); - } -}; - -template <> -struct ScalarEnumerationTraits<AddressSpaceQualifier> { - static void enumeration(IO &YIO, AddressSpaceQualifier &EN) { - YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private); - YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global); - YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant); - YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local); - YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic); - YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region); - } -}; - -template <> -struct ScalarEnumerationTraits<ValueKind> { - static void enumeration(IO &YIO, ValueKind &EN) { - YIO.enumCase(EN, "ByValue", ValueKind::ByValue); - YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer); - YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer); - YIO.enumCase(EN, "Sampler", ValueKind::Sampler); - YIO.enumCase(EN, "Image", ValueKind::Image); - YIO.enumCase(EN, "Pipe", ValueKind::Pipe); - YIO.enumCase(EN, "Queue", ValueKind::Queue); - YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX); - YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY); - YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ); - YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone); - YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer); - YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue); - YIO.enumCase(EN, "HiddenCompletionAction", - ValueKind::HiddenCompletionAction); - } -}; - -template <> -struct ScalarEnumerationTraits<ValueType> { - static void enumeration(IO &YIO, ValueType &EN) { - YIO.enumCase(EN, "Struct", ValueType::Struct); - YIO.enumCase(EN, "I8", ValueType::I8); - YIO.enumCase(EN, "U8", ValueType::U8); - YIO.enumCase(EN, "I16", ValueType::I16); - YIO.enumCase(EN, "U16", ValueType::U16); - YIO.enumCase(EN, "F16", ValueType::F16); - YIO.enumCase(EN, "I32", ValueType::I32); - YIO.enumCase(EN, "U32", ValueType::U32); - YIO.enumCase(EN, "F32", ValueType::F32); - YIO.enumCase(EN, "I64", ValueType::I64); - YIO.enumCase(EN, "U64", ValueType::U64); - YIO.enumCase(EN, "F64", ValueType::F64); - } -}; - -template <> -struct MappingTraits<Kernel::Attrs::Metadata> { - static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) { - YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize, - MD.mReqdWorkGroupSize, std::vector<uint32_t>()); - YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint, - MD.mWorkGroupSizeHint, std::vector<uint32_t>()); - YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint, - MD.mVecTypeHint, std::string()); - } -}; - -template <> -struct MappingTraits<Kernel::Arg::Metadata> { - static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) { - YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize); - YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign); - YIO.mapRequired(Kernel::Arg::Key::ValueKind, MD.mValueKind); - YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType); - YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign, - uint32_t(0)); - YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual, - AccessQualifier::Unknown); - YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual, - AddressSpaceQualifier::Unknown); - YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false); - YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false); - YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false); - YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false); - YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string()); - YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string()); - } -}; - -template <> -struct MappingTraits<Kernel::CodeProps::Metadata> { - static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) { - YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize, - MD.mKernargSegmentSize, uint64_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize, - MD.mWorkgroupGroupSegmentSize, uint32_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize, - MD.mWorkitemPrivateSegmentSize, uint32_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs, - MD.mWavefrontNumSGPRs, uint16_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs, - MD.mWorkitemNumVGPRs, uint16_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign, - MD.mKernargSegmentAlign, uint8_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign, - MD.mGroupSegmentAlign, uint8_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign, - MD.mPrivateSegmentAlign, uint8_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize, - MD.mWavefrontSize, uint8_t(0)); - } -}; - -template <> -struct MappingTraits<Kernel::DebugProps::Metadata> { - static void mapping(IO &YIO, Kernel::DebugProps::Metadata &MD) { - YIO.mapOptional(Kernel::DebugProps::Key::DebuggerABIVersion, - MD.mDebuggerABIVersion, std::vector<uint32_t>()); - YIO.mapOptional(Kernel::DebugProps::Key::ReservedNumVGPRs, - MD.mReservedNumVGPRs, uint16_t(0)); - YIO.mapOptional(Kernel::DebugProps::Key::ReservedFirstVGPR, - MD.mReservedFirstVGPR, uint16_t(-1)); - YIO.mapOptional(Kernel::DebugProps::Key::PrivateSegmentBufferSGPR, - MD.mPrivateSegmentBufferSGPR, uint16_t(-1)); - YIO.mapOptional(Kernel::DebugProps::Key::WavefrontPrivateSegmentOffsetSGPR, - MD.mWavefrontPrivateSegmentOffsetSGPR, uint16_t(-1)); - } -}; - -template <> -struct MappingTraits<Kernel::Metadata> { - static void mapping(IO &YIO, Kernel::Metadata &MD) { - YIO.mapRequired(Kernel::Key::Name, MD.mName); - YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string()); - YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion, - std::vector<uint32_t>()); - if (!MD.mAttrs.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs); - if (!MD.mArgs.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::Args, MD.mArgs); - if (!MD.mCodeProps.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps); - if (!MD.mDebugProps.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::DebugProps, MD.mDebugProps); - } -}; - -template <> -struct MappingTraits<CodeObject::Metadata> { - static void mapping(IO &YIO, CodeObject::Metadata &MD) { - YIO.mapRequired(Key::Version, MD.mVersion); - YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector<std::string>()); - if (!MD.mKernels.empty() || !YIO.outputting()) - YIO.mapOptional(Key::Kernels, MD.mKernels); - } -}; - -} // end namespace yaml - namespace AMDGPU { - -/* static */ -std::error_code CodeObject::Metadata::fromYamlString( - std::string YamlString, CodeObject::Metadata &CodeObjectMetadata) { - yaml::Input YamlInput(YamlString); - YamlInput >> CodeObjectMetadata; - return YamlInput.error(); -} - -/* static */ -std::error_code CodeObject::Metadata::toYamlString( - CodeObject::Metadata CodeObjectMetadata, std::string &YamlString) { - raw_string_ostream YamlStream(YamlString); - yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits<int>::max()); - YamlOutput << CodeObjectMetadata; - return std::error_code(); -} - namespace CodeObject { void MetadataStreamer::dump(StringRef YamlString) const { diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h index 8d4c51763f63..c6681431d74d 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h @@ -17,9 +17,9 @@ #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H #include "AMDGPU.h" -#include "AMDGPUCodeObjectMetadata.h" #include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/AMDGPUCodeObjectMetadata.h" #include "llvm/Support/ErrorOr.h" namespace llvm { diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp index 073d19422e86..6abe7f3d37d5 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -8,12 +8,12 @@ //===----------------------------------------------------------------------===// #include "AMDGPUMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 8dc863f723e2..2a0032fc9adc 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -11,12 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUTargetStreamer.h" +#include "AMDGPU.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" @@ -25,7 +26,6 @@ #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/FormattedStream.h" namespace llvm { @@ -100,16 +100,6 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, } } -void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleScopeGlobal( - StringRef GlobalName) { - OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n'; -} - -void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal( - StringRef GlobalName) { - OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n'; -} - bool AMDGPUTargetAsmStreamer::EmitCodeObjectMetadata(StringRef YamlString) { auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString); if (!VerifiedYamlString) @@ -214,24 +204,6 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL); } -void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleScopeGlobal( - StringRef GlobalName) { - - MCSymbolELF *Symbol = cast<MCSymbolELF>( - getStreamer().getContext().getOrCreateSymbol(GlobalName)); - Symbol->setType(ELF::STT_OBJECT); - Symbol->setBinding(ELF::STB_LOCAL); -} - -void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal( - StringRef GlobalName) { - - MCSymbolELF *Symbol = cast<MCSymbolELF>( - getStreamer().getContext().getOrCreateSymbol(GlobalName)); - Symbol->setType(ELF::STT_OBJECT); - Symbol->setBinding(ELF::STB_GLOBAL); -} - bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata(StringRef YamlString) { auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString); if (!VerifiedYamlString) diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 5c588bbded9c..968128e94d0b 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -44,10 +44,6 @@ public: virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0; - virtual void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) = 0; - - virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0; - virtual void EmitStartOfCodeObjectMetadata(const Module &Mod); virtual void EmitKernelCodeObjectMetadata( @@ -74,10 +70,6 @@ public: void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; - void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override; - - void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - /// \returns True on success, false on failure. bool EmitCodeObjectMetadata(StringRef YamlString) override; }; @@ -105,10 +97,6 @@ public: void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; - void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override; - - void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - /// \returns True on success, false on failure. bool EmitCodeObjectMetadata(StringRef YamlString) override; }; diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index 6015ec190fd4..eab90e1d344c 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -14,10 +14,10 @@ // //===----------------------------------------------------------------------===// -#include "R600Defines.h" #include "MCTargetDesc/AMDGPUFixupKinds.h" #include "MCTargetDesc/AMDGPUMCCodeEmitter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "R600Defines.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixup.h" diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td index 0e4eda982139..f6f2582aa11b 100644 --- a/lib/Target/AMDGPU/Processors.td +++ b/lib/Target/AMDGPU/Processors.td @@ -80,50 +80,53 @@ def : Proc<"cayman", R600_VLIW4_Itin, // Southern Islands //===----------------------------------------------------------------------===// -def : ProcessorModel<"SI", SIFullSpeedModel, - [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops] +def : ProcessorModel<"gfx600", SIFullSpeedModel, + [FeatureISAVersion6_0_0]>; + +def : ProcessorModel<"SI", SIFullSpeedModel, + [FeatureISAVersion6_0_0] +>; + +def : ProcessorModel<"tahiti", SIFullSpeedModel, + [FeatureISAVersion6_0_0] >; -def : ProcessorModel<"tahiti", SIFullSpeedModel, - [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops] +def : ProcessorModel<"gfx601", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1] >; -def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1]>; -def : ProcessorModel<"verde", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"verde", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1]>; -def : ProcessorModel<"oland", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"oland", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1]>; -def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureISAVersion6_0_1]>; //===----------------------------------------------------------------------===// // Sea Islands //===----------------------------------------------------------------------===// -def : ProcessorModel<"bonaire", SIQuarterSpeedModel, +def : ProcessorModel<"gfx700", SIQuarterSpeedModel, [FeatureISAVersion7_0_0] >; -def : ProcessorModel<"kabini", SIQuarterSpeedModel, - [FeatureISAVersion7_0_2] +def : ProcessorModel<"bonaire", SIQuarterSpeedModel, + [FeatureISAVersion7_0_0] >; def : ProcessorModel<"kaveri", SIQuarterSpeedModel, [FeatureISAVersion7_0_0] >; -def : ProcessorModel<"hawaii", SIFullSpeedModel, +def : ProcessorModel<"gfx701", SIFullSpeedModel, [FeatureISAVersion7_0_1] >; -def : ProcessorModel<"mullins", SIQuarterSpeedModel, - [FeatureISAVersion7_0_2]>; - -def : ProcessorModel<"gfx700", SIQuarterSpeedModel, - [FeatureISAVersion7_0_0] ->; - -def : ProcessorModel<"gfx701", SIFullSpeedModel, +def : ProcessorModel<"hawaii", SIFullSpeedModel, [FeatureISAVersion7_0_1] >; @@ -131,6 +134,17 @@ def : ProcessorModel<"gfx702", SIQuarterSpeedModel, [FeatureISAVersion7_0_2] >; +def : ProcessorModel<"gfx703", SIQuarterSpeedModel, + [FeatureISAVersion7_0_3] +>; + +def : ProcessorModel<"kabini", SIQuarterSpeedModel, + [FeatureISAVersion7_0_3] +>; + +def : ProcessorModel<"mullins", SIQuarterSpeedModel, + [FeatureISAVersion7_0_3]>; + //===----------------------------------------------------------------------===// // Volcanic Islands //===----------------------------------------------------------------------===// @@ -187,10 +201,23 @@ def : ProcessorModel<"gfx810", SIQuarterSpeedModel, [FeatureISAVersion8_1_0] >; -def : ProcessorModel<"gfx900", SIQuarterSpeedModel, - [FeatureGFX9, FeatureISAVersion9_0_0, FeatureLDSBankCount32] +//===----------------------------------------------------------------------===// +// GFX9 +//===----------------------------------------------------------------------===// + +def : ProcessorModel<"gfx900", SIQuarterSpeedModel, + [FeatureISAVersion9_0_0] +>; + +def : ProcessorModel<"gfx901", SIQuarterSpeedModel, + [FeatureISAVersion9_0_1] +>; + +def : ProcessorModel<"gfx902", SIQuarterSpeedModel, + [FeatureISAVersion9_0_2] >; -def : ProcessorModel<"gfx901", SIQuarterSpeedModel, - [FeatureGFX9, FeatureXNACK, FeatureISAVersion9_0_1, FeatureLDSBankCount32] +def : ProcessorModel<"gfx903", SIQuarterSpeedModel, + [FeatureISAVersion9_0_3] >; + diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp index 09b328765604..6993e8a62a9c 100644 --- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -12,15 +12,14 @@ /// computing their address on the fly ; it also sets STACK_SIZE info. //===----------------------------------------------------------------------===// -#include "llvm/Support/Debug.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -30,6 +29,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DebugLoc.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> diff --git a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp index 03fc1aff5ec1..0d8ccd088ec4 100644 --- a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp +++ b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp @@ -15,10 +15,10 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600RegisterInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp index 5c30a0734f0d..66def2d29caf 100644 --- a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp +++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp @@ -15,11 +15,11 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/Target/AMDGPU/R600FrameLowering.cpp b/lib/Target/AMDGPU/R600FrameLowering.cpp index 1f01ad732e00..37787b3c5f72 100644 --- a/lib/Target/AMDGPU/R600FrameLowering.cpp +++ b/lib/Target/AMDGPU/R600FrameLowering.cpp @@ -10,8 +10,8 @@ #include "R600FrameLowering.h" #include "AMDGPUSubtarget.h" #include "R600RegisterInfo.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/Support/MathExtras.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 60b913cfd39a..c55878f8bff0 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1120,7 +1120,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, Mask = DAG.getConstant(0xff, DL, MVT::i32); } else if (Store->getMemoryVT() == MVT::i16) { assert(Store->getAlignment() >= 2); - Mask = DAG.getConstant(0xffff, DL, MVT::i32);; + Mask = DAG.getConstant(0xffff, DL, MVT::i32); } else { llvm_unreachable("Unsupported private trunc store"); } diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp index 2422d57269eb..c5da5e404200 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -12,12 +12,12 @@ // //===----------------------------------------------------------------------===// +#include "R600InstrInfo.h" #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" #include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600FrameLowering.h" -#include "R600InstrInfo.h" #include "R600RegisterInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/BitVector.h" @@ -35,8 +35,8 @@ #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> -#include <cstring> #include <cstdint> +#include <cstring> #include <iterator> #include <utility> #include <vector> diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp index db18e5bd1afa..47fda1c8fa82 100644 --- a/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -13,11 +13,11 @@ //===----------------------------------------------------------------------===// #include "R600MachineScheduler.h" -#include "R600InstrInfo.h" #include "AMDGPUSubtarget.h" +#include "R600InstrInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Pass.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/R600Packetizer.cpp b/lib/Target/AMDGPU/R600Packetizer.cpp index 3e957126b497..1cb40938cee7 100644 --- a/lib/Target/AMDGPU/R600Packetizer.cpp +++ b/lib/Target/AMDGPU/R600Packetizer.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Debug.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "R600InstrInfo.h" @@ -24,6 +23,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp index 62ebef8e91af..b5c439b21b89 100644 --- a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp +++ b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp @@ -19,8 +19,8 @@ // //===----------------------------------------------------------------------===// -#include "SIInstrInfo.h" #include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 3cca815d8773..5f5f25103c02 100644 --- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -65,10 +65,10 @@ /// ultimately led to the creation of an illegal COPY. //===----------------------------------------------------------------------===// -#include "llvm/ADT/DenseSet.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index dfac068d1f69..e10f1ed3762e 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -730,7 +730,8 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const { // Make sure sources are identical. const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); - if (!Src0->isReg() || Src0->getSubReg() != Src1->getSubReg() || + if (!Src0->isReg() || !Src1->isReg() || + Src0->getSubReg() != Src1->getSubReg() || Src0->getSubReg() != AMDGPU::NoSubRegister) return nullptr; diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index 97bb0f0c0656..b1bd14e421f0 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -8,10 +8,10 @@ //==-----------------------------------------------------------------------===// #include "SIFrameLowering.h" +#include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index b48b23911105..599ee942d738 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -17,12 +17,12 @@ #define _USE_MATH_DEFINES #endif +#include "SIISelLowering.h" #include "AMDGPU.h" #include "AMDGPUIntrinsicInfo.h" -#include "AMDGPUTargetMachine.h" #include "AMDGPUSubtarget.h" +#include "AMDGPUTargetMachine.h" #include "SIDefines.h" -#include "SIISelLowering.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" @@ -2604,7 +2604,7 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { SDValue FpToFp16 = DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i32, Src); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToFp16); - return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);; + return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc); } SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index c10badba88f3..0f009a48754a 100644 --- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -229,7 +229,7 @@ public: MachineInstr &MI); BlockWaitcntBrackets() - : WaitAtBeginning(false), ValidLoop(false), MixedExpTypes(false), + : WaitAtBeginning(false), RevisitLoop(false), ValidLoop(false), MixedExpTypes(false), LoopRegion(NULL), PostOrder(0), Waitcnt(NULL), VgprUB(0), SgprUB(0) { for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; T = (enum InstCounterType)(T + 1)) { diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 36d29b8ecf06..58c05cf16f15 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -20,10 +20,10 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" -#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/Debug.h" diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp index 35d3a93d8710..5f1c7f1fc42f 100644 --- a/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -60,8 +60,8 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp index 3680e02da576..ba616ada0c9c 100644 --- a/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -21,8 +21,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 348bb4fa0260..9fdb8caac6f2 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -15,8 +15,8 @@ #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H #include "AMDGPUMachineFunction.h" -#include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp index 9d4e677400e6..bb17dbbdfbd6 100644 --- a/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "SIMachineScheduler.h" #include "AMDGPU.h" #include "SIInstrInfo.h" -#include "SIMachineScheduler.h" #include "SIRegisterInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index fae249b04492..f4ddf1891683 100644 --- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -20,13 +20,12 @@ /// //===----------------------------------------------------------------------===// - #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIDefines.h" #include "SIInstrInfo.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include <unordered_map> @@ -129,7 +128,8 @@ public: bool getNeg() const { return Neg; } bool getSext() const { return Sext; } - uint64_t getSrcMods() const; + uint64_t getSrcMods(const SIInstrInfo *TII, + const MachineOperand *SrcOp) const; }; class SDWADstOperand : public SDWAOperand { @@ -240,13 +240,24 @@ static bool isSubregOf(const MachineOperand &SubReg, return SuperMask.all(); } -uint64_t SDWASrcOperand::getSrcMods() const { +uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII, + const MachineOperand *SrcOp) const { uint64_t Mods = 0; + const auto *MI = SrcOp->getParent(); + if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) { + if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) { + Mods = Mod->getImm(); + } + } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) { + if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) { + Mods = Mod->getImm(); + } + } if (Abs || Neg) { assert(!Sext && "Float and integer src modifiers can't be set simulteniously"); Mods |= Abs ? SISrcMods::ABS : 0; - Mods |= Neg ? SISrcMods::NEG : 0; + Mods ^= Neg ? SISrcMods::NEG : 0; } else if (Sext) { Mods |= SISrcMods::SEXT; } @@ -312,7 +323,7 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { } copyRegOperand(*Src, *getTargetOperand()); SrcSel->setImm(getSrcSel()); - SrcMods->setImm(getSrcMods()); + SrcMods->setImm(getSrcMods(TII, Src)); getTargetOperand()->setIsKill(false); return true; } @@ -409,7 +420,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { switch (Opcode) { case AMDGPU::V_LSHRREV_B32_e32: case AMDGPU::V_ASHRREV_I32_e32: - case AMDGPU::V_LSHLREV_B32_e32: { + case AMDGPU::V_LSHLREV_B32_e32: + case AMDGPU::V_LSHRREV_B32_e64: + case AMDGPU::V_ASHRREV_I32_e64: + case AMDGPU::V_LSHLREV_B32_e64: { // from: v_lshrrev_b32_e32 v1, 16/24, v0 // to SDWA src:v0 src_sel:WORD_1/BYTE_3 @@ -432,7 +446,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { TRI->isPhysicalRegister(Dst->getReg())) break; - if (Opcode == AMDGPU::V_LSHLREV_B32_e32) { + if (Opcode == AMDGPU::V_LSHLREV_B32_e32 || + Opcode == AMDGPU::V_LSHLREV_B32_e64) { auto SDWADst = make_unique<SDWADstOperand>( Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n'); @@ -441,7 +456,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { } else { auto SDWASrc = make_unique<SDWASrcOperand>( Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false, - Opcode == AMDGPU::V_LSHRREV_B32_e32 ? false : true); + Opcode != AMDGPU::V_LSHRREV_B32_e32 && + Opcode != AMDGPU::V_LSHRREV_B32_e64); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); SDWAOperands[&MI] = std::move(SDWASrc); ++NumSDWAPatternsFound; @@ -451,7 +467,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { case AMDGPU::V_LSHRREV_B16_e32: case AMDGPU::V_ASHRREV_I16_e32: - case AMDGPU::V_LSHLREV_B16_e32: { + case AMDGPU::V_LSHLREV_B16_e32: + case AMDGPU::V_LSHRREV_B16_e64: + case AMDGPU::V_ASHRREV_I16_e64: + case AMDGPU::V_LSHLREV_B16_e64: { // from: v_lshrrev_b16_e32 v1, 8, v0 // to SDWA src:v0 src_sel:BYTE_1 @@ -472,7 +491,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { TRI->isPhysicalRegister(Dst->getReg())) break; - if (Opcode == AMDGPU::V_LSHLREV_B16_e32) { + if (Opcode == AMDGPU::V_LSHLREV_B16_e32 || + Opcode == AMDGPU::V_LSHLREV_B16_e64) { auto SDWADst = make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n'); @@ -481,7 +501,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { } else { auto SDWASrc = make_unique<SDWASrcOperand>( Src1, Dst, BYTE_1, false, false, - Opcode == AMDGPU::V_LSHRREV_B16_e32 ? false : true); + Opcode != AMDGPU::V_LSHRREV_B16_e32 && + Opcode != AMDGPU::V_LSHRREV_B16_e64); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); SDWAOperands[&MI] = std::move(SDWASrc); ++NumSDWAPatternsFound; @@ -549,20 +570,25 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { ++NumSDWAPatternsFound; break; } - case AMDGPU::V_AND_B32_e32: { + case AMDGPU::V_AND_B32_e32: + case AMDGPU::V_AND_B32_e64: { // e.g.: // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0 // to SDWA src:v0 src_sel:WORD_0/BYTE_0 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); + MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); + auto ValSrc = Src1; auto Imm = foldToImm(*Src0); - if (!Imm) - break; - if (*Imm != 0x0000ffff && *Imm != 0x000000ff) + if (!Imm) { + Imm = foldToImm(*Src1); + ValSrc = Src0; + } + + if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff)) break; - MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); if (TRI->isPhysicalRegister(Src1->getReg()) || @@ -570,7 +596,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { break; auto SDWASrc = make_unique<SDWASrcOperand>( - Src1, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0); + ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); SDWAOperands[&MI] = std::move(SDWASrc); ++NumSDWAPatternsFound; @@ -583,28 +609,38 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI) const { // Check if this instruction has opcode that supports SDWA - return AMDGPU::getSDWAOp(MI.getOpcode()) != -1; + unsigned Opc = MI.getOpcode(); + if (AMDGPU::getSDWAOp(Opc) != -1) + return true; + int Opc32 = AMDGPU::getVOPe32(Opc); + if (Opc32 != -1 && AMDGPU::getSDWAOp(Opc32) != -1) + return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) && + !TII->getNamedOperand(MI, AMDGPU::OpName::sdst); + return false; } bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands) { // Convert to sdwa int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode()); + if (SDWAOpcode == -1) + SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode())); assert(SDWAOpcode != -1); + // Copy dst, if it is present in original then should also be present in SDWA + MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); + if (!Dst && !TII->isVOPC(MI)) + return false; + const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode); // Create SDWA version of instruction MI and initialize its operands MachineInstrBuilder SDWAInst = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc); - // Copy dst, if it is present in original then should also be present in SDWA - MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); if (Dst) { assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1); SDWAInst.add(*Dst); - } else { - assert(TII->isVOPC(MI)); } // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and @@ -614,7 +650,10 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, Src0 && AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 && AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1); - SDWAInst.addImm(0); + if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)) + SDWAInst.addImm(Mod->getImm()); + else + SDWAInst.addImm(0); SDWAInst.add(*Src0); // Copy src1 if present, initialize src1_modifiers. @@ -623,10 +662,11 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, assert( AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 && AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1); - SDWAInst.addImm(0); + if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)) + SDWAInst.addImm(Mod->getImm()); + else + SDWAInst.addImm(0); SDWAInst.add(*Src1); - } else { - assert(TII->isVOP1(MI)); } if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa || @@ -746,8 +786,9 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { PotentialMatches.clear(); SDWAOperands.clear(); + bool Ret = !ConvertedInstructions.empty(); while (!ConvertedInstructions.empty()) legalizeScalarOperands(*ConvertedInstructions.pop_back_val()); - return false; + return Ret; } diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 6fb01a09fe13..b611f28fcabd 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "SIRegisterInfo.h" +#include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/RegisterScavenging.h" @@ -1104,6 +1104,66 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } } +StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const { + #define AMDGPU_REG_ASM_NAMES + #include "AMDGPURegAsmNames.inc.cpp" + + #define REG_RANGE(BeginReg, EndReg, RegTable) \ + if (Reg >= BeginReg && Reg <= EndReg) { \ + unsigned Index = Reg - BeginReg; \ + assert(Index < array_lengthof(RegTable)); \ + return RegTable[Index]; \ + } + + REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames); + REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames); + REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames); + REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames); + REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255, + VGPR96RegNames); + + REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3, + AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255, + VGPR128RegNames); + REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, + AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103, + SGPR128RegNames); + + REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7, + AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, + VGPR256RegNames); + + REG_RANGE( + AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15, + AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, + VGPR512RegNames); + + REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7, + AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103, + SGPR256RegNames); + + REG_RANGE( + AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15, + AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103, + SGPR512RegNames + ); + +#undef REG_RANGE + + // FIXME: Rename flat_scr so we don't need to special case this. + switch (Reg) { + case AMDGPU::FLAT_SCR: + return "flat_scratch"; + case AMDGPU::FLAT_SCR_LO: + return "flat_scratch_lo"; + case AMDGPU::FLAT_SCR_HI: + return "flat_scratch_hi"; + default: + // For the special named registers the default is fine. + return TargetRegisterInfo::getRegAsmName(Reg); + } +} + // FIXME: This is very slow. It might be worth creating a map from physreg to // register class. const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h index a648c178101a..8fed6d5f9710 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/lib/Target/AMDGPU/SIRegisterInfo.h @@ -16,8 +16,8 @@ #define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H #include "AMDGPURegisterInfo.h" -#include "SIDefines.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIDefines.h" #include "llvm/CodeGen/MachineRegisterInfo.h" namespace llvm { @@ -118,6 +118,8 @@ public: bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const; + StringRef getRegAsmName(unsigned Reg) const override; + unsigned getHWRegIndex(unsigned Reg) const { return getEncodingValue(Reg) & 0xff; } diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 630f469eabf0..f581e69980c7 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -7,11 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUBaseInfo.h" +#include "AMDGPU.h" #include "SIDefines.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" @@ -27,7 +28,6 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include <algorithm> @@ -38,7 +38,6 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" - #define GET_INSTRINFO_NAMED_OPS #include "AMDGPUGenInstrInfo.inc" #undef GET_INSTRINFO_NAMED_OPS @@ -104,6 +103,11 @@ namespace AMDGPU { namespace IsaInfo { IsaVersion getIsaVersion(const FeatureBitset &Features) { + // SI. + if (Features.test(FeatureISAVersion6_0_0)) + return {6, 0, 0}; + if (Features.test(FeatureISAVersion6_0_1)) + return {6, 0, 1}; // CI. if (Features.test(FeatureISAVersion7_0_0)) return {7, 0, 0}; @@ -111,6 +115,8 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) { return {7, 0, 1}; if (Features.test(FeatureISAVersion7_0_2)) return {7, 0, 2}; + if (Features.test(FeatureISAVersion7_0_3)) + return {7, 0, 3}; // VI. if (Features.test(FeatureISAVersion8_0_0)) @@ -131,6 +137,10 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) { return {9, 0, 0}; if (Features.test(FeatureISAVersion9_0_1)) return {9, 0, 1}; + if (Features.test(FeatureISAVersion9_0_2)) + return {9, 0, 2}; + if (Features.test(FeatureISAVersion9_0_3)) + return {9, 0, 3}; if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) return {0, 0, 0}; @@ -327,33 +337,6 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, Header.private_segment_alignment = 4; } -MCSection *getHSATextSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_WRITE | - ELF::SHF_EXECINSTR | - ELF::SHF_AMDGPU_HSA_AGENT | - ELF::SHF_AMDGPU_HSA_CODE); -} - -MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_WRITE | - ELF::SHF_AMDGPU_HSA_GLOBAL | - ELF::SHF_AMDGPU_HSA_AGENT); -} - -MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_WRITE | - ELF::SHF_AMDGPU_HSA_GLOBAL); -} - -MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY | - ELF::SHF_AMDGPU_HSA_AGENT); -} - bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; } diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 19888ad7556a..eff0230d21f5 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -149,13 +149,6 @@ int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features); -MCSection *getHSATextSection(MCContext &Ctx); - -MCSection *getHSADataGlobalAgentSection(MCContext &Ctx); - -MCSection *getHSADataGlobalProgramSection(MCContext &Ctx); - -MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx); bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS); bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS); diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td index 77fc9551cff9..a8ca593f14ed 100644 --- a/lib/Target/AMDGPU/VOP3Instructions.td +++ b/lib/Target/AMDGPU/VOP3Instructions.td @@ -172,8 +172,8 @@ def V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, def V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>; def V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>; def V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>; -def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>; -def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>; +def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbit>; +def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>; def V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmin3>; def V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmin3>; def V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumin3>; @@ -209,7 +209,10 @@ def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, } def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>; + +let Constraints = "@earlyclobber $vdst" in { def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_mqsad_pk_u16_u8>; +} // End Constraints = "@earlyclobber $vdst" def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUtrig_preop> { let SchedRW = [WriteDouble]; @@ -232,8 +235,10 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>; let SubtargetPredicate = isCIVI in { +let Constraints = "@earlyclobber $vdst" in { def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_qsad_pk_u16_u8>; def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_V4I32>, int_amdgcn_mqsad_u32_u8>; +} // End Constraints = "@earlyclobber $vdst" let isCommutable = 1 in { def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 14e197f477f1..f9da036c7e46 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -23,6 +23,8 @@ #include "MCTargetDesc/ARMMCExpr.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" @@ -43,9 +45,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ARMBuildAttributes.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index f8b65573f9cd..8715657ad5e2 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -21,9 +21,9 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index b18ed509ed23..b4fb292c0116 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// +#include "ARMBaseRegisterInfo.h" #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMBaseRegisterInfo.h" #include "ARMFrameLowering.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp index a33d025d114e..a7ac9a1dca6e 100644 --- a/lib/Target/ARM/ARMCallLowering.cpp +++ b/lib/Target/ARM/ARMCallLowering.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; @@ -122,8 +123,7 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)), MRI.createGenericVirtualRegister(LLT::scalar(32))}; - MIRBuilder.buildExtract(NewRegs[0], Arg.Reg, 0); - MIRBuilder.buildExtract(NewRegs[1], Arg.Reg, 32); + MIRBuilder.buildUnmerge(NewRegs, Arg.Reg); bool IsLittle = MIRBuilder.getMF().getSubtarget<ARMSubtarget>().isLittle(); if (!IsLittle) @@ -339,7 +339,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { if (!IsLittle) std::swap(NewRegs[0], NewRegs[1]); - MIRBuilder.buildSequence(Arg.Reg, NewRegs, {0, 32}); + MIRBuilder.buildMerge(Arg.Reg, NewRegs); return 1; } @@ -461,7 +461,8 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, MachineFunction &MF = MIRBuilder.getMF(); const auto &TLI = *getTLI<ARMTargetLowering>(); const auto &DL = MF.getDataLayout(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const auto &STI = MF.getSubtarget(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); if (MF.getSubtarget<ARMSubtarget>().genLongCalls()) @@ -473,6 +474,13 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // registers, but don't insert it yet. auto MIB = MIRBuilder.buildInstrNoInsert(ARM::BLX).add(Callee).addRegMask( TRI->getCallPreservedMask(MF, CallConv)); + if (Callee.isReg()) { + auto CalleeReg = Callee.getReg(); + if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg)) + MIB->getOperand(0).setReg(constrainOperandRegClass( + MF, *TRI, MRI, *STI.getInstrInfo(), *STI.getRegBankInfo(), + *MIB.getInstr(), MIB->getDesc(), CalleeReg, 0)); + } SmallVector<ArgInfo, 8> ArgInfos; for (auto Arg : OrigArgs) { diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 6434df317aa8..667337dc9267 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -21,10 +21,10 @@ #include "MCTargetDesc/ARMBaseInfo.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 4f6a73b5980d..384f80356cc8 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -26,8 +26,8 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index c2b2502843c0..16b54e8848c2 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -20,9 +20,9 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 949d821e36b2..5b2d093e8f0d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// +#include "ARMISelLowering.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" -#include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" #include "ARMPerfectShuffle.h" #include "ARMRegisterInfo.h" @@ -29,13 +29,13 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/VectorUtils.h" @@ -61,7 +61,6 @@ #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" @@ -103,8 +102,8 @@ #include <cstdlib> #include <iterator> #include <limits> -#include <tuple> #include <string> +#include <tuple> #include <utility> #include <vector> diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 0f225156d4ca..817b567db767 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -1958,7 +1958,8 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VFMSS : ASbIn<0b11101, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1966,7 +1967,8 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0, [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } @@ -1976,7 +1978,8 @@ def VFMSH : AHbI<0b11101, 0b10, 1, 0, IIC_fpFMAC16, "vfms", ".f16\t$Sd, $Sn, $Sm", []>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFusedMAC]>; + Requires<[HasFullFP16,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp index b1f059835ff5..2ae3bad4076b 100644 --- a/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/lib/Target/ARM/ARMInstructionSelector.cpp @@ -127,34 +127,30 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, return true; } -static bool selectSequence(MachineInstrBuilder &MIB, - const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) { - assert(TII.getSubtarget().hasVFP2() && "Can't select sequence without VFP"); - - // We only support G_SEQUENCE as a way to stick together two scalar GPRs +static bool selectMergeValues(MachineInstrBuilder &MIB, + const ARMBaseInstrInfo &TII, + MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + assert(TII.getSubtarget().hasVFP2() && "Can't select merge without VFP"); + + // We only support G_MERGE_VALUES as a way to stick together two scalar GPRs // into one DPR. unsigned VReg0 = MIB->getOperand(0).getReg(); (void)VReg0; assert(MRI.getType(VReg0).getSizeInBits() == 64 && RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::FPRRegBankID && - "Unsupported operand for G_SEQUENCE"); + "Unsupported operand for G_MERGE_VALUES"); unsigned VReg1 = MIB->getOperand(1).getReg(); (void)VReg1; assert(MRI.getType(VReg1).getSizeInBits() == 32 && RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID && - "Unsupported operand for G_SEQUENCE"); - unsigned VReg2 = MIB->getOperand(3).getReg(); + "Unsupported operand for G_MERGE_VALUES"); + unsigned VReg2 = MIB->getOperand(2).getReg(); (void)VReg2; assert(MRI.getType(VReg2).getSizeInBits() == 32 && RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::GPRRegBankID && - "Unsupported operand for G_SEQUENCE"); - - // Remove the operands corresponding to the offsets. - MIB->RemoveOperand(4); - MIB->RemoveOperand(2); + "Unsupported operand for G_MERGE_VALUES"); MIB->setDesc(TII.get(ARM::VMOVDRR)); MIB.add(predOps(ARMCC::AL)); @@ -162,30 +158,32 @@ static bool selectSequence(MachineInstrBuilder &MIB, return true; } -static bool selectExtract(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) { - assert(TII.getSubtarget().hasVFP2() && "Can't select extract without VFP"); +static bool selectUnmergeValues(MachineInstrBuilder &MIB, + const ARMBaseInstrInfo &TII, + MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + assert(TII.getSubtarget().hasVFP2() && "Can't select unmerge without VFP"); - // We only support G_EXTRACT as a way to break up one DPR into two GPRs. + // We only support G_UNMERGE_VALUES as a way to break up one DPR into two + // GPRs. unsigned VReg0 = MIB->getOperand(0).getReg(); (void)VReg0; assert(MRI.getType(VReg0).getSizeInBits() == 32 && RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::GPRRegBankID && - "Unsupported operand for G_EXTRACT"); + "Unsupported operand for G_UNMERGE_VALUES"); unsigned VReg1 = MIB->getOperand(1).getReg(); (void)VReg1; - assert(MRI.getType(VReg1).getSizeInBits() == 64 && - RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::FPRRegBankID && - "Unsupported operand for G_EXTRACT"); - assert(MIB->getOperand(2).getImm() % 32 == 0 && - "Unsupported operand for G_EXTRACT"); - - // Remove the operands corresponding to the offsets. - MIB->getOperand(2).setImm(MIB->getOperand(2).getImm() / 32); + assert(MRI.getType(VReg1).getSizeInBits() == 32 && + RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID && + "Unsupported operand for G_UNMERGE_VALUES"); + unsigned VReg2 = MIB->getOperand(2).getReg(); + (void)VReg2; + assert(MRI.getType(VReg2).getSizeInBits() == 64 && + RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::FPRRegBankID && + "Unsupported operand for G_UNMERGE_VALUES"); - MIB->setDesc(TII.get(ARM::VGETLNi32)); + MIB->setDesc(TII.get(ARM::VMOVRRD)); MIB.add(predOps(ARMCC::AL)); return true; @@ -407,13 +405,13 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { MIB.addImm(0).add(predOps(ARMCC::AL)); break; } - case G_SEQUENCE: { - if (!selectSequence(MIB, TII, MRI, TRI, RBI)) + case G_MERGE_VALUES: { + if (!selectMergeValues(MIB, TII, MRI, TRI, RBI)) return false; break; } - case G_EXTRACT: { - if (!selectExtract(MIB, TII, MRI, TRI, RBI)) + case G_UNMERGE_VALUES: { + if (!selectUnmergeValues(MIB, TII, MRI, TRI, RBI)) return false; break; } diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index 5bf6c7aed6b8..2d490b7c303e 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -45,7 +45,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({Op, 1, p0}, Legal); } - for (unsigned Op : {G_ADD, G_SUB, G_MUL}) { + for (unsigned Op : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) { for (auto Ty : {s1, s8, s16}) setAction({Op, Ty}, WidenScalar); setAction({Op, s32}, Legal); diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 72fcf7cd6a4f..7a452d4a2095 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -33,7 +34,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index 9e9c1ba6c114..13acea3c28a9 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -25,9 +25,9 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/Constants.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp index a20997c95cd9..f59b075e6dd9 100644 --- a/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -221,6 +221,9 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case G_ADD: case G_SUB: case G_MUL: + case G_AND: + case G_OR: + case G_XOR: case G_SDIV: case G_UDIV: case G_SEXT: @@ -252,30 +255,32 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); break; - case G_SEQUENCE: { - // We only support G_SEQUENCE for creating a double precision floating point - // value out of two GPRs. + case G_MERGE_VALUES: { + // We only support G_MERGE_VALUES for creating a double precision floating + // point value out of two GPRs. LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); - LLT Ty2 = MRI.getType(MI.getOperand(3).getReg()); + LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); if (Ty.getSizeInBits() != 64 || Ty1.getSizeInBits() != 32 || Ty2.getSizeInBits() != 32) return getInvalidInstructionMapping(); OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx], - &ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr, - &ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx]}); break; } - case G_EXTRACT: { - // We only support G_EXTRACT for splitting a double precision floating point - // value into two GPRs. + case G_UNMERGE_VALUES: { + // We only support G_UNMERGE_VALUES for splitting a double precision + // floating point value into two GPRs. LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); - if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 64 || - MI.getOperand(2).getImm() % 32 != 0) + LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); + if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 32 || + Ty2.getSizeInBits() != 64) return getInvalidInstructionMapping(); - OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], - &ARM::ValueMappings[ARM::DPR3OpsIdx], - nullptr, nullptr}); + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::DPR3OpsIdx]}); break; } default: diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index b8a708a20a95..d9d0c27c6304 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -28,10 +28,10 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCTargetOptions.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetParser.h" +#include "llvm/Target/TargetOptions.h" #include <cassert> #include <string> diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index f5e4043882ff..c0506cfda612 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -37,6 +37,7 @@ #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" @@ -389,6 +390,20 @@ public: return getTM<ARMBaseTargetMachine>(); } + ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { + ScheduleDAGMILive *DAG = createGenericSchedLive(C); + // add DAG Mutations here. + return DAG; + } + + ScheduleDAGInstrs * + createPostMachineScheduler(MachineSchedContext *C) const override { + ScheduleDAGMI *DAG = createGenericSchedPostRA(C); + // add DAG Mutations here. + return DAG; + } + void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index edbf2b99126c..a5b27abeb27f 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -7,17 +7,17 @@ // //===----------------------------------------------------------------------===// +#include "ARMTargetObjectFile.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" -#include "ARMTargetObjectFile.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/SectionKind.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" #include <cassert> diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index ada816c16389..19fba3033bb2 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -17,6 +17,8 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -39,10 +41,8 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMEHABI.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetParser.h" @@ -1026,6 +1026,15 @@ public: ARM_AM::getSOImmVal(-Value) != -1); } bool isT2SOImm() const { + // If we have an immediate that's not a constant, treat it as an expression + // needing a fixup. + if (isImm() && !isa<MCConstantExpr>(getImm())) { + // We want to avoid matching :upper16: and :lower16: as we want these + // expressions to match in isImm0_65535Expr() + const ARMMCExpr *ARM16Expr = dyn_cast<ARMMCExpr>(getImm()); + return (!ARM16Expr || (ARM16Expr->getKind() != ARMMCExpr::VK_ARM_HI16 && + ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16)); + } if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; @@ -8404,7 +8413,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // wide encoding wasn't explicit. if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() || !isARMLowRegister(Inst.getOperand(0).getReg()) || - (unsigned)Inst.getOperand(2).getImm() > 255 || + (Inst.getOperand(2).isImm() && + (unsigned)Inst.getOperand(2).getImm() > 255) || ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) || (inITBlock() && Inst.getOperand(5).getReg() != 0)) || (static_cast<ARMOperand &>(*Operands[3]).isToken() && @@ -8556,7 +8566,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // If we can use the 16-bit encoding and the user didn't explicitly // request the 32-bit variant, transform it here. if (isARMLowRegister(Inst.getOperand(0).getReg()) && - (unsigned)Inst.getOperand(1).getImm() <= 255 && + (Inst.getOperand(1).isImm() && + (unsigned)Inst.getOperand(1).getImm() <= 255) && ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL && Inst.getOperand(4).getReg() == ARM::CPSR) || (inITBlock() && Inst.getOperand(4).getReg() == 0)) && diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index e812d32cc76f..585726208a8d 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -20,8 +20,8 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> #include <cstdint> diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index b0d1d3fb9ef0..716492ea2566 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -7,15 +7,17 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMMCTargetDesc.h" -#include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMAsmBackend.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMAsmBackendDarwin.h" #include "MCTargetDesc/ARMAsmBackendELF.h" #include "MCTargetDesc/ARMAsmBackendWinCOFF.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -31,10 +33,8 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -98,6 +98,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_t2_movt_hi16", 0, 20, 0}, {"fixup_t2_movw_lo16", 0, 20, 0}, {"fixup_arm_mod_imm", 0, 12, 0}, + {"fixup_t2_so_imm", 0, 26, 0}, }; const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = { // This table *must* be in the order that the fixup_* kinds are defined in @@ -148,6 +149,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_t2_movt_hi16", 12, 20, 0}, {"fixup_t2_movw_lo16", 12, 20, 0}, {"fixup_arm_mod_imm", 20, 12, 0}, + {"fixup_t2_so_imm", 26, 6, 0}, }; if (Kind < FirstTargetFixupKind) @@ -693,6 +695,23 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return 0; } return Value; + case ARM::fixup_t2_so_imm: { + Value = ARM_AM::getT2SOImmVal(Value); + if ((int64_t)Value < 0) { + Ctx.reportError(Fixup.getLoc(), "out of range immediate fixup value"); + return 0; + } + // Value will contain a 12-bit value broken up into a 4-bit shift in bits + // 11:8 and the 8-bit immediate in 0:7. The instruction has the immediate + // in 0:7. The 4-bit shift is split up into i:imm3 where i is placed at bit + // 10 of the upper half-word and imm3 is placed at 14:12 of the lower + // half-word. + uint64_t EncValue = 0; + EncValue |= (Value & 0x800) << 15; + EncValue |= (Value & 0x700) << 4; + EncValue |= (Value & 0xff); + return swapHalfWords(EncValue, IsLittleEndian); + } } } @@ -704,16 +723,17 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, bool &IsResolved) { const MCSymbolRefExpr *A = Target.getSymA(); const MCSymbol *Sym = A ? &A->getSymbol() : nullptr; + const unsigned FixupKind = Fixup.getKind() ; // MachO (the only user of "Value") tries to make .o files that look vaguely // pre-linked, so for MOVW/MOVT and .word relocations they put the Thumb bit // into the addend if possible. Other relocation types don't want this bit // though (branches couldn't encode it if it *was* present, and no other // relocations exist) and it can interfere with checking valid expressions. - if ((unsigned)Fixup.getKind() == FK_Data_4 || - (unsigned)Fixup.getKind() == ARM::fixup_arm_movw_lo16 || - (unsigned)Fixup.getKind() == ARM::fixup_arm_movt_hi16 || - (unsigned)Fixup.getKind() == ARM::fixup_t2_movw_lo16 || - (unsigned)Fixup.getKind() == ARM::fixup_t2_movt_hi16) { + if (FixupKind == FK_Data_4 || + FixupKind == ARM::fixup_arm_movw_lo16 || + FixupKind == ARM::fixup_arm_movt_hi16 || + FixupKind == ARM::fixup_t2_movw_lo16 || + FixupKind == ARM::fixup_t2_movt_hi16) { if (Sym) { if (Asm.isThumbFunc(Sym)) Value |= 1; @@ -729,23 +749,27 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, // linker can handle it. GNU AS produces an error in this case. if (Sym->isExternal() || Value >= 0x400004) IsResolved = false; - // When an ARM function is called from a Thumb function, produce a - // relocation so the linker will use the correct branch instruction for ELF - // binaries. - if (Sym->isELF()) { - unsigned Type = dyn_cast<MCSymbolELF>(Sym)->getType(); - if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC) && - !Asm.isThumbFunc(Sym)) + } + // Create relocations for unconditional branches to function symbols with + // different execution mode in ELF binaries. + if (Sym && Sym->isELF()) { + unsigned Type = dyn_cast<MCSymbolELF>(Sym)->getType(); + if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)) { + if (Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_uncondbranch)) + IsResolved = false; + if (!Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_thumb_br || + FixupKind == ARM::fixup_arm_thumb_bl || + FixupKind == ARM::fixup_t2_uncondbranch)) IsResolved = false; } } // We must always generate a relocation for BL/BLX instructions if we have // a symbol to reference, as the linker relies on knowing the destination // symbol's thumb-ness to get interworking right. - if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx || - (unsigned)Fixup.getKind() == ARM::fixup_arm_blx || - (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl || - (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl)) + if (A && (FixupKind == ARM::fixup_arm_thumb_blx || + FixupKind == ARM::fixup_arm_blx || + FixupKind == ARM::fixup_arm_uncondbl || + FixupKind == ARM::fixup_arm_condbl)) IsResolved = false; } @@ -792,6 +816,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case ARM::fixup_arm_movw_lo16: case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_so_imm: return 4; case FK_SecRel_2: @@ -844,6 +869,7 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) { case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movw_lo16: case ARM::fixup_arm_mod_imm: + case ARM::fixup_t2_so_imm: // Instruction size is 4 bytes. return 4; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h index 09dc0173ade6..bd729fabedf5 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h @@ -11,7 +11,7 @@ #define LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H #include "ARMAsmBackend.h" -#include "llvm/Support/MachO.h" +#include "llvm/BinaryFormat/MachO.h" namespace llvm { class ARMAsmBackendDarwin : public ARMAsmBackend { diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index e1fa24571820..59f31be69d58 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -9,12 +9,12 @@ #include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <cstdint> diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 4d6c52f3cd49..93f4006cee87 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" @@ -43,12 +44,11 @@ #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMEHABI.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetParser.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> #include <climits> diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index 3fe2302bdd37..9f6c5d7bf920 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h +++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -110,6 +110,9 @@ enum Fixups { // fixup_arm_mod_imm - Fixup for mod_imm fixup_arm_mod_imm, + // fixup_t2_so_imm - Fixup for Thumb2 8-bit rotated operand + fixup_t2_so_imm, + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index d9df2c6da7ec..f1f35f409900 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -339,7 +339,17 @@ public: unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { - unsigned SoImm = MI.getOperand(Op).getImm(); + const MCOperand &MO = MI.getOperand(Op); + + // Support for fixups (MCFixup) + if (MO.isExpr()) { + const MCExpr *Expr = MO.getExpr(); + // Fixups resolve to plain values that need to be encoded. + MCFixupKind Kind = MCFixupKind(ARM::fixup_t2_so_imm); + Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc())); + return 0; + } + unsigned SoImm = MO.getImm(); unsigned Encoded = ARM_AM::getT2SOImmVal(SoImm); assert(Encoded != ~0U && "Not a Thumb2 so_imm value?"); return Encoded; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 477755157040..b8a8b1f7619a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "ARMMCTargetDesc.h" #include "ARMBaseInfo.h" #include "ARMMCAsmInfo.h" -#include "ARMMCTargetDesc.h" #include "InstPrinter/ARMInstPrinter.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCELFStreamer.h" diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp index 34c770440e1b..5516a1bdb03d 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp @@ -9,10 +9,10 @@ #include "ARMMCExpr.h" #include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm-c/Disassembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCExpr.h" -#include "llvm-c/Disassembler.h" using namespace llvm; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index b77181f29b2d..4a8139dea668 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -7,10 +7,11 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMMCTargetDesc.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -21,7 +22,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" using namespace llvm; namespace { diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp index 7ae2f864d79d..00505a103e00 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -9,13 +9,13 @@ #include "MCTargetDesc/ARMFixupKinds.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <cassert> diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index f10427e2ed57..0b6574c37de1 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -11,26 +11,26 @@ // //===----------------------------------------------------------------------===// +#include "Thumb1FrameLowering.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMBaseInfo.h" -#include "Thumb1FrameLowering.h" #include "Thumb1InstrInfo.h" #include "ThumbRegisterInfo.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCDwarf.h" diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 0ebf55924647..3a3920a2db32 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "ARMSubtarget.h" #include "Thumb1InstrInfo.h" +#include "ARMSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 2e2dfe035e26..9125be96a07b 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -1,4 +1,4 @@ -//===-- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information -------------===// +//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------------===// // // The LLVM Compiler Infrastructure // @@ -11,16 +11,26 @@ // //===----------------------------------------------------------------------===// -#include "Thumb2InstrInfo.h" -#include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "Thumb2InstrInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> using namespace llvm; @@ -30,7 +40,7 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden, cl::init(false)); Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI() {} + : ARMBaseInstrInfo(STI) {} /// Return the noop instruction to use for a noop. void Thumb2InstrInfo::getNoop(MCInst &NopInst) const { @@ -539,9 +549,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // Add cc_out operand if the original instruction did not have one. if (!HasCCOut) MI.addOperand(MachineOperand::CreateReg(0, false)); - } else { - // AddrMode4 and AddrMode6 cannot handle any offset. if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6) return false; diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index c90475c28db7..d911dd97b1ac 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -14,10 +14,10 @@ #include "Thumb2InstrInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/AVR/AVR.h b/lib/Target/AVR/AVR.h index 8e5cc5360ad4..5eadf7bdcef6 100644 --- a/lib/Target/AVR/AVR.h +++ b/lib/Target/AVR/AVR.h @@ -15,8 +15,8 @@ #ifndef LLVM_AVR_H #define LLVM_AVR_H -#include "llvm/Target/TargetMachine.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Target/TargetMachine.h" namespace llvm { diff --git a/lib/Target/AVR/AVRAsmPrinter.cpp b/lib/Target/AVR/AVRAsmPrinter.cpp index d6491ce5c3bf..f0c7b11895b4 100644 --- a/lib/Target/AVR/AVRAsmPrinter.cpp +++ b/lib/Target/AVR/AVRAsmPrinter.cpp @@ -18,8 +18,8 @@ #include "InstPrinter/AVRInstPrinter.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp index 11a47bad78ba..55f3f5cf428a 100644 --- a/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/lib/Target/AVR/AVRRegisterInfo.cpp @@ -51,7 +51,6 @@ AVRRegisterInfo::getCallPreservedMask(const MachineFunction &MF, BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - const AVRTargetMachine &TM = static_cast<const AVRTargetMachine&>(MF.getTarget()); // Reserve the intermediate result registers r1 and r2 // The result of instructions like 'mul' is always stored here. @@ -269,4 +268,3 @@ void AVRRegisterInfo::splitReg(unsigned Reg, } } // end of namespace llvm - diff --git a/lib/Target/AVR/AVRSubtarget.cpp b/lib/Target/AVR/AVRSubtarget.cpp index c228d051d771..556d69ec5234 100644 --- a/lib/Target/AVR/AVRSubtarget.cpp +++ b/lib/Target/AVR/AVRSubtarget.cpp @@ -13,7 +13,7 @@ #include "AVRSubtarget.h" -#include "llvm/Support/ELF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Support/TargetRegistry.h" #include "AVR.h" diff --git a/lib/Target/AVR/AVRSubtarget.h b/lib/Target/AVR/AVRSubtarget.h index a37849c3f3f7..b0e634f86168 100644 --- a/lib/Target/AVR/AVRSubtarget.h +++ b/lib/Target/AVR/AVRSubtarget.h @@ -14,10 +14,9 @@ #ifndef LLVM_AVR_SUBTARGET_H #define LLVM_AVR_SUBTARGET_H -#include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "AVRFrameLowering.h" #include "AVRISelLowering.h" diff --git a/lib/Target/AVR/AVRTargetMachine.cpp b/lib/Target/AVR/AVRTargetMachine.cpp index 2ab0b1080c6a..91d2a8737b87 100644 --- a/lib/Target/AVR/AVRTargetMachine.cpp +++ b/lib/Target/AVR/AVRTargetMachine.cpp @@ -15,12 +15,12 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/IR/Module.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" -#include "AVRTargetObjectFile.h" #include "AVR.h" +#include "AVRTargetObjectFile.h" #include "MCTargetDesc/AVRMCTargetDesc.h" namespace llvm { diff --git a/lib/Target/AVR/AVRTargetObjectFile.cpp b/lib/Target/AVR/AVRTargetObjectFile.cpp index af14d9292f27..0cebb0f043f9 100644 --- a/lib/Target/AVR/AVRTargetObjectFile.cpp +++ b/lib/Target/AVR/AVRTargetObjectFile.cpp @@ -9,12 +9,12 @@ #include "AVRTargetObjectFile.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include "AVR.h" diff --git a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp index 5b0398c0ca34..cf52e552978f 100644 --- a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp +++ b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp @@ -18,12 +18,12 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" diff --git a/lib/Target/AVR/Disassembler/AVRDisassembler.cpp b/lib/Target/AVR/Disassembler/AVRDisassembler.cpp index d2a21fb64635..e69accfa9393 100644 --- a/lib/Target/AVR/Disassembler/AVRDisassembler.cpp +++ b/lib/Target/AVR/Disassembler/AVRDisassembler.cpp @@ -16,11 +16,11 @@ #include "AVRSubtarget.h" #include "MCTargetDesc/AVRMCTargetDesc.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp index 713754821005..1e61eccf775f 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp @@ -1,8 +1,8 @@ #include "AVRELFStreamer.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SubtargetFeature.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/FormattedStream.h" #include "AVRMCTargetDesc.h" diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp index 400296b8409b..085afd23a83c 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp @@ -9,11 +9,11 @@ #include "AVRMCExpr.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCValue.h" -#include "llvm/MC/MCAsmLayout.h" namespace llvm { diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp index a4fa5c0a9310..826430e94b9c 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "AVRMCTargetDesc.h" #include "AVRELFStreamer.h" #include "AVRMCAsmInfo.h" -#include "AVRMCTargetDesc.h" #include "AVRTargetStreamer.h" #include "InstPrinter/AVRInstPrinter.h" diff --git a/lib/Target/BPF/BPFAsmPrinter.cpp b/lib/Target/BPF/BPFAsmPrinter.cpp index c5201465e074..fcd903b7a4a8 100644 --- a/lib/Target/BPF/BPFAsmPrinter.cpp +++ b/lib/Target/BPF/BPFAsmPrinter.cpp @@ -18,10 +18,10 @@ #include "BPFTargetMachine.h" #include "InstPrinter/BPFInstPrinter.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" diff --git a/lib/Target/BPF/BPFInstrInfo.cpp b/lib/Target/BPF/BPFInstrInfo.cpp index e38facead922..5351cfa95020 100644 --- a/lib/Target/BPF/BPFInstrInfo.cpp +++ b/lib/Target/BPF/BPFInstrInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "BPF.h" #include "BPFInstrInfo.h" +#include "BPF.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/BPF/BPFRegisterInfo.cpp b/lib/Target/BPF/BPFRegisterInfo.cpp index 7925bee9c587..273843e92701 100644 --- a/lib/Target/BPF/BPFRegisterInfo.cpp +++ b/lib/Target/BPF/BPFRegisterInfo.cpp @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// -#include "BPF.h" #include "BPFRegisterInfo.h" +#include "BPF.h" #include "BPFSubtarget.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/IR/DiagnosticInfo.h" #define GET_REGINFO_TARGET_DESC #include "BPFGenRegisterInfo.inc" diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp index cf8e73540904..d84b0a80fc0c 100644 --- a/lib/Target/BPF/BPFTargetMachine.cpp +++ b/lib/Target/BPF/BPFTargetMachine.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "BPF.h" #include "BPFTargetMachine.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/IR/LegacyPassManager.h" +#include "BPF.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp index b98621ca4749..a1d732c339e5 100644 --- a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp +++ b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp @@ -15,10 +15,10 @@ #include "BPFSubtarget.h" #include "MCTargetDesc/BPFMCTargetDesc.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp index ffd29f3ea991..64e986fe0f04 100644 --- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp +++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "BPF.h" #include "BPFInstPrinter.h" +#include "BPF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp index ebe9abd8ffac..d5e1d7706edc 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/BPFMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include <cstdint> diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp index b58409730de0..797904e1c976 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/BPFMCTargetDesc.h" #include "BPF.h" #include "InstPrinter/BPFInstPrinter.h" -#include "MCTargetDesc/BPFMCTargetDesc.h" #include "MCTargetDesc/BPFMCAsmInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h index 3df673eaeb4b..d1c97c9987e1 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h @@ -14,8 +14,8 @@ #ifndef LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCTARGETDESC_H #define LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCTARGETDESC_H -#include "llvm/Support/DataTypes.h" #include "llvm/Config/config.h" +#include "llvm/Support/DataTypes.h" namespace llvm { class MCAsmBackend; diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index b0b2644fffbe..c19e636d79ca 100644 --- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -17,11 +17,12 @@ #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "MCTargetDesc/HexagonShuffler.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" @@ -42,13 +43,12 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> #include <cctype> diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp index 07767d1037a9..5b02aa3ca3ae 100644 --- a/lib/Target/Hexagon/BitTracker.cpp +++ b/lib/Target/Hexagon/BitTracker.cpp @@ -65,9 +65,9 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" -#include <iterator> #include <cassert> #include <cstdint> +#include <iterator> using namespace llvm; diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 87c212b6163f..586220dfec26 100644 --- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -12,12 +12,12 @@ #include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCChecker.h" -#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" @@ -25,8 +25,8 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include <cassert> #include <cstddef> #include <cstdint> diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index bb5128e7500f..e689483a0999 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#include "Hexagon.h" #include "HexagonAsmPrinter.h" +#include "Hexagon.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" @@ -23,6 +23,7 @@ #include "MCTargetDesc/HexagonMCShuffler.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -43,7 +44,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp index 8502bf24c02f..14c682c6df4b 100644 --- a/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -13,8 +13,8 @@ #include "HexagonTargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp index af0f8b265bda..730026121d3b 100644 --- a/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "Hexagon.h" #include "HexagonBitTracker.h" +#include "Hexagon.h" #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonTargetMachine.h" diff --git a/lib/Target/Hexagon/HexagonBlockRanges.h b/lib/Target/Hexagon/HexagonBlockRanges.h index 717480314d16..769ec7044a0e 100644 --- a/lib/Target/Hexagon/HexagonBlockRanges.h +++ b/lib/Target/Hexagon/HexagonBlockRanges.h @@ -14,8 +14,8 @@ #include <cassert> #include <map> #include <set> -#include <vector> #include <utility> +#include <vector> namespace llvm { diff --git a/lib/Target/Hexagon/HexagonCommonGEP.cpp b/lib/Target/Hexagon/HexagonCommonGEP.cpp index a07ba77e6f3e..b5b46f2b7d19 100644 --- a/lib/Target/Hexagon/HexagonCommonGEP.cpp +++ b/lib/Target/Hexagon/HexagonCommonGEP.cpp @@ -175,7 +175,8 @@ namespace { None = 0, Root = 0x01, Internal = 0x02, - Used = 0x04 + Used = 0x04, + InBounds = 0x08 }; uint32_t Flags; @@ -231,6 +232,11 @@ namespace { OS << ','; OS << "used"; } + if (GN.Flags & GepNode::InBounds) { + if (Comma) + OS << ','; + OS << "inbounds"; + } OS << "} "; if (GN.Flags & GepNode::Root) OS << "BaseVal:" << GN.BaseVal->getName() << '(' << GN.BaseVal << ')'; @@ -334,10 +340,11 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI, DEBUG(dbgs() << "Visiting GEP: " << *GepI << '\n'); GepNode *N = new (*Mem) GepNode; Value *PtrOp = GepI->getPointerOperand(); + uint32_t InBounds = GepI->isInBounds() ? GepNode::InBounds : 0; ValueToNodeMap::iterator F = NM.find(PtrOp); if (F == NM.end()) { N->BaseVal = PtrOp; - N->Flags |= GepNode::Root; + N->Flags |= GepNode::Root | InBounds; } else { // If PtrOp was a GEP instruction, it must have already been processed. // The ValueToNodeMap entry for it is the last gep node in the generated @@ -373,7 +380,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI, Value *Op = *OI; GepNode *Nx = new (*Mem) GepNode; Nx->Parent = PN; // Link Nx to the previous node. - Nx->Flags |= GepNode::Internal; + Nx->Flags |= GepNode::Internal | InBounds; Nx->PTy = PtrTy; Nx->Idx = Op; Nodes.push_back(Nx); @@ -1081,7 +1088,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At, GepNode *RN = NA[0]; assert((RN->Flags & GepNode::Root) && "Creating GEP for non-root"); - Value *NewInst = nullptr; + GetElementPtrInst *NewInst = nullptr; Value *Input = RN->BaseVal; Value **IdxList = new Value*[Num+1]; unsigned nax = 0; @@ -1112,6 +1119,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At, Type *InpTy = Input->getType(); Type *ElTy = cast<PointerType>(InpTy->getScalarType())->getElementType(); NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", &*At); + NewInst->setIsInBounds(RN->Flags & GepNode::InBounds); DEBUG(dbgs() << "new GEP: " << *NewInst << '\n'); Input = NewInst; } while (nax <= Num); diff --git a/lib/Target/Hexagon/HexagonConstPropagation.cpp b/lib/Target/Hexagon/HexagonConstPropagation.cpp index 783b916e04b0..aa68f6cfdfc1 100644 --- a/lib/Target/Hexagon/HexagonConstPropagation.cpp +++ b/lib/Target/Hexagon/HexagonConstPropagation.cpp @@ -2276,7 +2276,7 @@ Undetermined: goto Undetermined; uint32_t Props = PredC.properties(); - bool CTrue = false, CFalse = false;; + bool CTrue = false, CFalse = false; if (Props & ConstantProperties::Zero) CFalse = true; else if (Props & ConstantProperties::NonZero) diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp index 8118c8eb149d..6b4f53428256 100644 --- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" -#include "llvm/PassSupport.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -22,6 +21,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/PassSupport.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp index 67af947e089d..03c4a83594b3 100644 --- a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp +++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -65,9 +65,9 @@ #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp index 015d3b840e6f..23d4e2610d9a 100644 --- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp +++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -12,10 +12,9 @@ // form. //===----------------------------------------------------------------------===// - -#include "llvm/ADT/DenseMap.h" #include "Hexagon.h" #include "HexagonTargetMachine.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 25018b9ed510..18e49c69b8e3 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -10,8 +10,8 @@ #define DEBUG_TYPE "hexagon-pei" -#include "HexagonBlockRanges.h" #include "HexagonFrameLowering.h" +#include "HexagonBlockRanges.h" #include "HexagonInstrInfo.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonRegisterInfo.h" diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp index c99ad5130aef..7c6de6d513e8 100644 --- a/lib/Target/Hexagon/HexagonGenExtract.cpp +++ b/lib/Target/Hexagon/HexagonGenExtract.cpp @@ -14,10 +14,10 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp index 54d99d399f88..bf31e1699284 100644 --- a/lib/Target/Hexagon/HexagonGenInsert.cpp +++ b/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -17,9 +17,9 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" @@ -34,8 +34,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> #include <cassert> diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp index 85222944c77c..3c37d9ebb0eb 100644 --- a/lib/Target/Hexagon/HexagonGenMux.cpp +++ b/lib/Target/Hexagon/HexagonGenMux.cpp @@ -40,8 +40,8 @@ #include "llvm/Pass.h" #include "llvm/Support/MathExtras.h" #include <algorithm> -#include <limits> #include <iterator> +#include <limits> #include <utility> using namespace llvm; @@ -235,8 +235,11 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { unsigned DR = MI->getOperand(0).getReg(); if (isRegPair(DR)) continue; + MachineOperand &PredOp = MI->getOperand(1); + if (PredOp.isUndef()) + continue; - unsigned PR = MI->getOperand(1).getReg(); + unsigned PR = PredOp.getReg(); unsigned Idx = I2X.lookup(MI); CondsetMap::iterator F = CM.find(DR); bool IfTrue = HII->isPredicatedTrue(Opc); diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 4c6c6eeafbe0..afed894cfb9a 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "Hexagon.h" #include "HexagonISelLowering.h" +#include "Hexagon.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" @@ -26,8 +26,8 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/BasicBlock.h" diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index b76da727237c..f43101fa456d 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "HexagonInstrInfo.h" #include "Hexagon.h" #include "HexagonHazardRecognizer.h" -#include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "llvm/ADT/SmallPtrSet.h" @@ -57,9 +57,9 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #define GET_INSTRMAP_INFO -#include "HexagonGenInstrInfo.inc" -#include "HexagonGenDFAPacketizer.inc" #include "HexagonDepTimingClasses.h" +#include "HexagonGenDFAPacketizer.inc" +#include "HexagonGenInstrInfo.inc" cl::opt<bool> ScheduleInlineAsm("hexagon-sched-inline-asm", cl::Hidden, cl::init(false), cl::desc("Do not consider inline-asm a scheduling/" diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index 9aa185fc85a6..b748b58bc0ae 100644 --- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -23,11 +23,11 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <array> diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 324108284a9a..4602de979024 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -563,40 +563,33 @@ void ConvergingVLIWScheduler::readyQueueVerboseDump( } #endif -/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor -/// of SU, return it, otherwise return null. -static SUnit *getSingleUnscheduledPred(SUnit *SU) { - SUnit *OnlyAvailablePred = nullptr; - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - SUnit &Pred = *I->getSUnit(); - if (!Pred.isScheduled) { - // We found an available, but not scheduled, predecessor. If it's the - // only one we have found, keep track of it... otherwise give up. - if (OnlyAvailablePred && OnlyAvailablePred != &Pred) - return nullptr; - OnlyAvailablePred = &Pred; - } +/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor +/// of SU, return true (we may have duplicates) +static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) { + if (SU->NumPredsLeft == 0) + return false; + + for (auto &Pred : SU->Preds) { + // We found an available, but not scheduled, predecessor. + if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2)) + return false; } - return OnlyAvailablePred; + + return true; } -/// getSingleUnscheduledSucc - If there is exactly one unscheduled successor -/// of SU, return it, otherwise return null. -static SUnit *getSingleUnscheduledSucc(SUnit *SU) { - SUnit *OnlyAvailableSucc = nullptr; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - SUnit &Succ = *I->getSUnit(); - if (!Succ.isScheduled) { - // We found an available, but not scheduled, successor. If it's the - // only one we have found, keep track of it... otherwise give up. - if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ) - return nullptr; - OnlyAvailableSucc = &Succ; - } +/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor +/// of SU, return true (we may have duplicates) +static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { + if (SU->NumSuccsLeft == 0) + return false; + + for (auto &Succ : SU->Succs) { + // We found an available, but not scheduled, successor. + if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2)) + return false; } - return OnlyAvailableSucc; + return true; } // Constants used to denote relative importance of @@ -673,12 +666,12 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, // Count the number of nodes that // this node is the sole unscheduled node for. for (const SDep &SI : SU->Succs) - if (getSingleUnscheduledPred(SI.getSUnit()) == SU) + if (isSingleUnscheduledPred(SI.getSUnit(), SU)) ++NumNodesBlocking; } else { // How many unscheduled predecessors block this node? for (const SDep &PI : SU->Preds) - if (getSingleUnscheduledSucc(PI.getSUnit()) == SU) + if (isSingleUnscheduledSucc(PI.getSUnit(), SU)) ++NumNodesBlocking; } ResCount += (NumNodesBlocking * ScaleTwo); diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td index 70ed123bc898..f269b74fc447 100644 --- a/lib/Target/Hexagon/HexagonPatterns.td +++ b/lib/Target/Hexagon/HexagonPatterns.td @@ -1,3 +1,12 @@ +//==- HexagonPatterns.td - Target Description for Hexagon -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + // Pattern fragment that combines the value type and the register class // into a single parameter. @@ -345,7 +354,7 @@ def: Pat<(add (mul IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1), (M2_macsip IntRegs:$src1, IntRegs:$src2, imm:$src3)>; def: Pat<(add (mul I32:$src2, I32:$src3), I32:$src1), (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; -def: Pat<(add (add IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1), +def: Pat<(add (add IntRegs:$src2, s32_0ImmPred:$src3), IntRegs:$src1), (M2_accii IntRegs:$src1, IntRegs:$src2, imm:$src3)>; def: Pat<(add (add I32:$src2, I32:$src3), I32:$src1), (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; @@ -674,6 +683,8 @@ def I32toI1: OutPatFrag<(ops node:$Rs), defm: Storexm_pat<store, I1, s32_0ImmPred, I1toI32, S2_storerb_io>; def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>; +def: Pat<(sra (add (sra I64:$src, u6_0ImmPred:$u6), 1), (i32 1)), + (S2_asr_i_p_rnd DoubleRegs:$src, imm:$u6)>, Requires<[HasV5T]>; def: Pat<(sra I64:$src, u6_0ImmPred:$u6), (S2_asr_i_p DoubleRegs:$src, imm:$u6)>; def: Pat<(srl I64:$src, u6_0ImmPred:$u6), @@ -786,27 +797,19 @@ def: Pat<(i64 (sext_inreg I64:$src1, i16)), def: Pat<(i64 (sext_inreg I64:$src1, i8)), (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>; -// We want to prevent emitting pnot's as much as possible. -// Map brcond with an unsupported setcc to a J2_jumpf. -def : Pat <(brcond (i1 (setne I32:$src1, I32:$src2)), - bb:$offset), - (J2_jumpf (C2_cmpeq I32:$src1, I32:$src2), - bb:$offset)>; - -def : Pat <(brcond (i1 (setne I32:$src1, s10_0ImmPred:$src2)), - bb:$offset), - (J2_jumpf (C2_cmpeqi I32:$src1, s10_0ImmPred:$src2), bb:$offset)>; - -def: Pat<(brcond (i1 (setne I1:$src1, (i1 -1))), bb:$offset), - (J2_jumpf PredRegs:$src1, bb:$offset)>; - -def: Pat<(brcond (i1 (setne I1:$src1, (i1 0))), bb:$offset), - (J2_jumpt PredRegs:$src1, bb:$offset)>; +def: Pat<(brcond (i1 (setne I32:$Rs, I32:$Rt)), bb:$offset), + (J2_jumpf (C2_cmpeq I32:$Rs, I32:$Rt), bb:$offset)>; +def: Pat<(brcond (i1 (setne I32:$Rs, s10_0ImmPred:$s10)), bb:$offset), + (J2_jumpf (C2_cmpeqi I32:$Rs, imm:$s10), bb:$offset)>; +def: Pat<(brcond (i1 (setne I1:$Pu, (i1 -1))), bb:$offset), + (J2_jumpf PredRegs:$Pu, bb:$offset)>; +def: Pat<(brcond (i1 (setne I1:$Pu, (i1 0))), bb:$offset), + (J2_jumpt PredRegs:$Pu, bb:$offset)>; // cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1) -def: Pat<(brcond (i1 (setlt I32:$src1, s8_0ImmPred:$src2)), bb:$offset), - (J2_jumpf (C2_cmpgti IntRegs:$src1, (SDEC1 s8_0ImmPred:$src2)), - bb:$offset)>; +def: Pat<(brcond (i1 (setlt I32:$Rs, s8_0ImmPred:$s8)), bb:$offset), + (J2_jumpf (C2_cmpgti IntRegs:$Rs, (SDEC1 imm:$s8)), bb:$offset)>; + // Map from a 64-bit select to an emulated 64-bit mux. // Hexagon does not support 64-bit MUXes; so emulate with combines. @@ -860,15 +863,13 @@ def: Pat<(i1 (setne I1:$src1, I1:$src2)), def: Pat<(i1 (setne I64:$src1, I64:$src2)), (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>; -// Map cmpge(Rs, Rt) -> !cmpgt(Rs, Rt). -// rs >= rt -> !(rt > rs). -def : Pat <(i1 (setge I32:$src1, I32:$src2)), - (i1 (C2_not (i1 (C2_cmpgt I32:$src2, I32:$src1))))>; +// rs >= rt -> rt <= rs +def: Pat<(i1 (setge I32:$Rs, I32:$Rt)), + (C4_cmplte I32:$Rt, I32:$Rs)>; -// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1) let AddedComplexity = 30 in -def: Pat<(i1 (setge I32:$src1, s32_0ImmPred:$src2)), - (C2_cmpgti IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2))>; +def: Pat<(i1 (setge I32:$Rs, s32_0ImmPred:$s10)), + (C2_cmpgti IntRegs:$Rs, (SDEC1 imm:$s10))>; // Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). // rss >= rtt -> !(rtt > rss). @@ -1634,9 +1635,14 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), u32_0ImmPred:$u6), (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>; +def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), + (HexagonCONST32 tglobaladdr:$global)), + (M4_mpyri_addi tglobaladdr:$global, IntRegs:$Rs, imm:$U6)>; def: Pat<(add (mul I32:$Rs, I32:$Rt), u32_0ImmPred:$u6), (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>; - +def: Pat<(add (mul I32:$Rs, I32:$Rt), + (HexagonCONST32 tglobaladdr:$global)), + (M4_mpyrr_addi tglobaladdr:$global, IntRegs:$Rs, IntRegs:$Rt)>; def: Pat<(add I32:$src1, (mul I32:$src3, u6_2ImmPred:$src2)), (M4_mpyri_addr_u2 IntRegs:$src1, imm:$src2, IntRegs:$src3)>; def: Pat<(add I32:$src1, (mul I32:$src3, u32_0ImmPred:$src2)), @@ -2129,6 +2135,11 @@ let AddedComplexity = 30 in { def: Storea_pat<truncstorei8, I32, u32_0ImmPred, PS_storerbabs>; def: Storea_pat<truncstorei16, I32, u32_0ImmPred, PS_storerhabs>; def: Storea_pat<store, I32, u32_0ImmPred, PS_storeriabs>; + def: Storea_pat<store, I64, u32_0ImmPred, PS_storerdabs>; + + def: Stoream_pat<truncstorei8, I64, u32_0ImmPred, LoReg, PS_storerbabs>; + def: Stoream_pat<truncstorei16, I64, u32_0ImmPred, LoReg, PS_storerhabs>; + def: Stoream_pat<truncstorei32, I64, u32_0ImmPred, LoReg, PS_storeriabs>; } let AddedComplexity = 30 in { @@ -2137,6 +2148,19 @@ let AddedComplexity = 30 in { def: Loada_pat<zextloadi8, i32, u32_0ImmPred, PS_loadrubabs>; def: Loada_pat<sextloadi16, i32, u32_0ImmPred, PS_loadrhabs>; def: Loada_pat<zextloadi16, i32, u32_0ImmPred, PS_loadruhabs>; + def: Loada_pat<load, i64, u32_0ImmPred, PS_loadrdabs>; + + def: Loadam_pat<extloadi8, i64, u32_0ImmPred, ToZext64, PS_loadrubabs>; + def: Loadam_pat<sextloadi8, i64, u32_0ImmPred, ToSext64, PS_loadrbabs>; + def: Loadam_pat<zextloadi8, i64, u32_0ImmPred, ToZext64, PS_loadrubabs>; + + def: Loadam_pat<extloadi16, i64, u32_0ImmPred, ToZext64, PS_loadruhabs>; + def: Loadam_pat<sextloadi16, i64, u32_0ImmPred, ToSext64, PS_loadrhabs>; + def: Loadam_pat<zextloadi16, i64, u32_0ImmPred, ToZext64, PS_loadruhabs>; + + def: Loadam_pat<extloadi32, i64, u32_0ImmPred, ToZext64, PS_loadriabs>; + def: Loadam_pat<sextloadi32, i64, u32_0ImmPred, ToSext64, PS_loadriabs>; + def: Loadam_pat<zextloadi32, i64, u32_0ImmPred, ToZext64, PS_loadriabs>; } // Indexed store word - global address. @@ -2707,6 +2731,15 @@ def: Pat<(fneg F64:$Rs), (S2_togglebit_i (HiReg $Rs), 31), isub_hi, (i32 (LoReg $Rs)), isub_lo)>; +def: Pat<(mul I64:$Rss, I64:$Rtt), + (A2_combinew + (M2_maci (M2_maci (HiReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))), + (LoReg $Rss), + (HiReg $Rtt)), + (LoReg $Rtt), + (HiReg $Rss)), + (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))))>; + def alignedload : PatFrag<(ops node:$addr), (load $addr), [{ return isAlignedMemNode(dyn_cast<MemSDNode>(N)); }]>; diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp index 471e32221b29..db268b78cd73 100644 --- a/lib/Target/Hexagon/HexagonSplitDouble.cpp +++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp @@ -13,8 +13,8 @@ #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 14ecf297d351..c757b6ecdd00 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -23,8 +23,8 @@ #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; @@ -276,27 +276,27 @@ bool HexagonPassConfig::addInstSelector() { if (!NoOpt) { // Create logical operations on predicate registers. if (EnableGenPred) - addPass(createHexagonGenPredicate(), false); + addPass(createHexagonGenPredicate()); // Rotate loops to expose bit-simplification opportunities. if (EnableLoopResched) - addPass(createHexagonLoopRescheduling(), false); + addPass(createHexagonLoopRescheduling()); // Split double registers. if (!DisableHSDR) addPass(createHexagonSplitDoubleRegs()); // Bit simplification. if (EnableBitSimplify) - addPass(createHexagonBitSimplify(), false); + addPass(createHexagonBitSimplify()); addPass(createHexagonPeephole()); printAndVerify("After hexagon peephole pass"); // Constant propagation. if (!DisableHCP) { - addPass(createHexagonConstPropagationPass(), false); - addPass(&UnreachableMachineBlockElimID, false); + addPass(createHexagonConstPropagationPass()); + addPass(&UnreachableMachineBlockElimID); } if (EnableGenInsert) - addPass(createHexagonGenInsert(), false); + addPass(createHexagonGenInsert()); if (EnableEarlyIf) - addPass(createHexagonEarlyIfConversion(), false); + addPass(createHexagonEarlyIfConversion()); } return false; @@ -307,9 +307,9 @@ void HexagonPassConfig::addPreRegAlloc() { if (EnableExpandCondsets) insertPass(&RegisterCoalescerID, &HexagonExpandCondsetsID); if (!DisableStoreWidening) - addPass(createHexagonStoreWidening(), false); + addPass(createHexagonStoreWidening()); if (!DisableHardwareLoops) - addPass(createHexagonHardwareLoops(), false); + addPass(createHexagonHardwareLoops()); } if (TM->getOptLevel() >= CodeGenOpt::Default) addPass(&MachinePipelinerID); @@ -320,16 +320,16 @@ void HexagonPassConfig::addPostRegAlloc() { if (EnableRDFOpt) addPass(createHexagonRDFOpt()); if (!DisableHexagonCFGOpt) - addPass(createHexagonCFGOptimizer(), false); + addPass(createHexagonCFGOptimizer()); if (!DisableAModeOpt) - addPass(createHexagonOptAddrMode(), false); + addPass(createHexagonOptAddrMode()); } } void HexagonPassConfig::addPreSched2() { - addPass(createHexagonCopyToCombine(), false); + addPass(createHexagonCopyToCombine()); if (getOptLevel() != CodeGenOpt::None) - addPass(&IfConverterID, false); + addPass(&IfConverterID); addPass(createHexagonSplitConst32AndConst64()); } @@ -337,17 +337,17 @@ void HexagonPassConfig::addPreEmitPass() { bool NoOpt = (getOptLevel() == CodeGenOpt::None); if (!NoOpt) - addPass(createHexagonNewValueJump(), false); + addPass(createHexagonNewValueJump()); - addPass(createHexagonBranchRelaxation(), false); + addPass(createHexagonBranchRelaxation()); // Create Packets. if (!NoOpt) { if (!DisableHardwareLoops) - addPass(createHexagonFixupHwLoops(), false); + addPass(createHexagonFixupHwLoops()); // Generate MUX from pairs of conditional transfers. if (EnableGenMux) - addPass(createHexagonGenMux(), false); + addPass(createHexagonGenMux()); addPass(createHexagonPacketizer(), false); } diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index c9c4f95dbaaa..4dacb1501392 100644 --- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalObject.h" @@ -28,7 +29,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index fa08afe4019d..7667bfb7a0eb 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -16,10 +16,10 @@ // prune the dependence. // //===----------------------------------------------------------------------===// +#include "HexagonVLIWPacketizer.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" -#include "HexagonVLIWPacketizer.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 904403543e18..545c8b6b2acd 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -12,9 +12,9 @@ #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCChecker.h" #include "MCTargetDesc/HexagonMCCodeEmitter.h" -#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCShuffler.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp index dd790fd41257..1929152129fa 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "HexagonAsmPrinter.h" #include "HexagonInstPrinter.h" +#include "HexagonAsmPrinter.h" #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "llvm/MC/MCAsmInfo.h" diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index 70410ff03a64..50f00d1aaeac 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/HexagonMCCodeEmitter.h" #include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonFixupKinds.h" -#include "MCTargetDesc/HexagonMCCodeEmitter.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/Statistic.h" diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp index 9e1ff9ca35d7..47007e08a2ff 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCShuffler.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -29,7 +30,6 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include <cassert> diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp index aece36790486..b2c7f1569380 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp @@ -14,9 +14,9 @@ #define DEBUG_TYPE "hexagon-shuffle" +#include "MCTargetDesc/HexagonMCShuffler.h" #include "Hexagon.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" -#include "MCTargetDesc/HexagonMCShuffler.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index bb98c2bbef6d..1a361548f938 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -11,14 +11,15 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "Hexagon.h" #include "HexagonTargetStreamer.h" #include "MCTargetDesc/HexagonInstPrinter.h" #include "MCTargetDesc/HexagonMCAsmInfo.h" #include "MCTargetDesc/HexagonMCELFStreamer.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" -#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCELFStreamer.h" @@ -27,10 +28,9 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include <cassert> #include <cstdint> #include <new> diff --git a/lib/Target/Hexagon/RDFDeadCode.cpp b/lib/Target/Hexagon/RDFDeadCode.cpp index 9aa8ad68e07e..60a12dcf2f03 100644 --- a/lib/Target/Hexagon/RDFDeadCode.cpp +++ b/lib/Target/Hexagon/RDFDeadCode.cpp @@ -9,9 +9,9 @@ // // RDF-based generic dead code elimination. +#include "RDFDeadCode.h" #include "RDFGraph.h" #include "RDFLiveness.h" -#include "RDFDeadCode.h" #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp index 7a2895aa4e8c..8d1272370899 100644 --- a/lib/Target/Hexagon/RDFGraph.cpp +++ b/lib/Target/Hexagon/RDFGraph.cpp @@ -10,8 +10,8 @@ // Target-independent, SSA-based data flow graph for register data flow (RDF). // #include "RDFGraph.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominanceFrontier.h" #include "llvm/CodeGen/MachineDominators.h" diff --git a/lib/Target/Hexagon/RDFLiveness.cpp b/lib/Target/Hexagon/RDFLiveness.cpp index 9d8a3881797b..83e8968086d8 100644 --- a/lib/Target/Hexagon/RDFLiveness.cpp +++ b/lib/Target/Hexagon/RDFLiveness.cpp @@ -23,8 +23,8 @@ // and Embedded Architectures and Compilers", 8 (4), // <10.1145/2086696.2086706>. <hal-00647369> // -#include "RDFGraph.h" #include "RDFLiveness.h" +#include "RDFGraph.h" #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominanceFrontier.h" diff --git a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp index 1d6c07974beb..72e471f5766e 100644 --- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp +++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp @@ -28,8 +28,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SMLoc.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> #include <cstddef> diff --git a/lib/Target/Lanai/LanaiTargetObjectFile.cpp b/lib/Target/Lanai/LanaiTargetObjectFile.cpp index 7475dbd68ae4..38e75108ba16 100644 --- a/lib/Target/Lanai/LanaiTargetObjectFile.cpp +++ b/lib/Target/Lanai/LanaiTargetObjectFile.cpp @@ -10,13 +10,13 @@ #include "LanaiSubtarget.h" #include "LanaiTargetMachine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp index e02bba529bd5..64cd3342ac18 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp +++ b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp @@ -9,8 +9,8 @@ #include "MCTargetDesc/LanaiBaseInfo.h" #include "MCTargetDesc/LanaiFixupKinds.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp index 10254677a5ad..c3727416ecb9 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp +++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp @@ -19,8 +19,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCFixup.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp index a47ff9ff3d61..bcbde2b8b794 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp +++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "LanaiMCAsmInfo.h" #include "LanaiMCTargetDesc.h" #include "InstPrinter/LanaiInstPrinter.h" +#include "LanaiMCAsmInfo.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCInst.h" diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp index abf062fe86ae..f39c21fc8aa2 100644 --- a/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "MSP430.h" #include "InstPrinter/MSP430InstPrinter.h" +#include "MSP430.h" #include "MSP430InstrInfo.h" #include "MSP430MCInstLower.h" #include "MSP430TargetMachine.h" diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index d855d3e7f778..694c201cbe8d 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -9,17 +9,18 @@ #include "MCTargetDesc/MipsABIFlagsSection.h" #include "MCTargetDesc/MipsABIInfo.h" +#include "MCTargetDesc/MipsBaseInfo.h" #include "MCTargetDesc/MipsMCExpr.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "MipsTargetStreamer.h" -#include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/ADT/APFloat.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -40,13 +41,12 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> #include <cstdint> diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index ecdf6b0de6e7..b0b994323036 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -17,14 +17,14 @@ #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include <cassert> #include <cstdint> diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 1e2eb7dbec3e..6d3d4db03603 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// // -#include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsAsmBackend.h" +#include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsMCExpr.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 1a1c613cfce0..d116ac3471bc 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -10,13 +10,13 @@ #include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp index ae3278322311..f658aadff22f 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -10,12 +10,12 @@ #include "MipsELFStreamer.h" #include "MipsOptionRecord.h" #include "MipsTargetStreamer.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" using namespace llvm; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index a35eb2a8e03a..0330824fd614 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "MipsMCCodeEmitter.h" #include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsMCExpr.h" #include "MCTargetDesc/MipsMCTargetDesc.h" -#include "MipsMCCodeEmitter.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp index be04480044d4..aad6bf378ea0 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp @@ -8,14 +8,14 @@ //===----------------------------------------------------------------------===// #include "MipsMCExpr.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp index 74d5e4cc9841..2d84528e7469 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp @@ -7,15 +7,15 @@ // //===----------------------------------------------------------------------===// +#include "MipsOptionRecord.h" #include "MipsABIInfo.h" #include "MipsELFStreamer.h" -#include "MipsOptionRecord.h" #include "MipsTargetStreamer.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include <cassert> using namespace llvm; diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 2d4083b27ed1..0cd4aebe4d16 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -11,19 +11,19 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/MipsABIInfo.h" #include "MipsTargetStreamer.h" #include "InstPrinter/MipsInstPrinter.h" +#include "MCTargetDesc/MipsABIInfo.h" #include "MipsELFStreamer.h" #include "MipsMCExpr.h" #include "MipsMCTargetDesc.h" #include "MipsTargetObjectFile.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 9615bc38bfce..f24761d7d101 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -185,6 +185,9 @@ def FeatureUseTCCInDIV : SubtargetFeature< "UseTCCInDIV", "false", "Force the assembler to use trapping">; +def FeatureMadd4 : SubtargetFeature<"nomadd4", "DisableMadd4", "true", + "Disable 4-operand madd.fmt and related instructions">; + //===----------------------------------------------------------------------===// // Mips processors supported. //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp index a222080f6b81..09e41e1423ae 100644 --- a/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/lib/Target/Mips/Mips16FrameLowering.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/MipsBaseInfo.h" #include "Mips16FrameLowering.h" +#include "MCTargetDesc/MipsBaseInfo.h" #include "Mips16InstrInfo.h" #include "MipsInstrInfo.h" #include "MipsRegisterInfo.h" @@ -25,10 +25,10 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/DebugLoc.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MachineLocation.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetFrameLowering.h" #include <cassert> diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 9cdbf510737f..f7ff7c3dc7bb 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -12,17 +12,18 @@ // //===----------------------------------------------------------------------===// +#include "MipsAsmPrinter.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MCTargetDesc/MipsMCNaCl.h" #include "Mips.h" -#include "MipsAsmPrinter.h" #include "MipsInstrInfo.h" #include "MipsMCInstLower.h" #include "MipsTargetMachine.h" #include "MipsTargetStreamer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -43,7 +44,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLoweringObjectFile.h" diff --git a/lib/Target/Mips/MipsCCState.cpp b/lib/Target/Mips/MipsCCState.cpp index cb9f676c237a..6a03ee9927d7 100644 --- a/lib/Target/Mips/MipsCCState.cpp +++ b/lib/Target/Mips/MipsCCState.cpp @@ -51,6 +51,22 @@ static bool originalTypeIsF128(const Type *Ty, const char *Func) { return (Func && Ty->isIntegerTy(128) && isF128SoftLibCall(Func)); } +/// Return true if the original type was vXfXX. +static bool originalEVTTypeIsVectorFloat(EVT Ty) { + if (Ty.isVector() && Ty.getVectorElementType().isFloatingPoint()) + return true; + + return false; +} + +/// Return true if the original type was vXfXX / vXfXX. +static bool originalTypeIsVectorFloat(const Type * Ty) { + if (Ty->isVectorTy() && Ty->isFPOrFPVectorTy()) + return true; + + return false; +} + MipsCCState::SpecialCallingConvType MipsCCState::getSpecialCallingConvForCallee(const SDNode *Callee, const MipsSubtarget &Subtarget) { @@ -78,8 +94,8 @@ void MipsCCState::PreAnalyzeCallResultForF128( } } -/// Identify lowered values that originated from f128 arguments and record -/// this for use by RetCC_MipsN. +/// Identify lowered values that originated from f128 or float arguments and +/// record this for use by RetCC_MipsN. void MipsCCState::PreAnalyzeReturnForF128( const SmallVectorImpl<ISD::OutputArg> &Outs) { const MachineFunction &MF = getMachineFunction(); @@ -91,23 +107,44 @@ void MipsCCState::PreAnalyzeReturnForF128( } } -/// Identify lowered values that originated from f128 arguments and record +/// Identify lower values that originated from vXfXX and record /// this. +void MipsCCState::PreAnalyzeCallResultForVectorFloat( + const SmallVectorImpl<ISD::InputArg> &Ins, const Type *RetTy) { + for (unsigned i = 0; i < Ins.size(); ++i) { + OriginalRetWasFloatVector.push_back(originalTypeIsVectorFloat(RetTy)); + } +} + +/// Identify lowered values that originated from vXfXX arguments and record +/// this. +void MipsCCState::PreAnalyzeReturnForVectorFloat( + const SmallVectorImpl<ISD::OutputArg> &Outs) { + for (unsigned i = 0; i < Outs.size(); ++i) { + ISD::OutputArg Out = Outs[i]; + OriginalRetWasFloatVector.push_back( + originalEVTTypeIsVectorFloat(Out.ArgVT)); + } +} + +/// Identify lowered values that originated from f128, float and sret to vXfXX +/// arguments and record this. void MipsCCState::PreAnalyzeCallOperands( const SmallVectorImpl<ISD::OutputArg> &Outs, std::vector<TargetLowering::ArgListEntry> &FuncArgs, const char *Func) { for (unsigned i = 0; i < Outs.size(); ++i) { - OriginalArgWasF128.push_back( - originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, Func)); - OriginalArgWasFloat.push_back( - FuncArgs[Outs[i].OrigArgIndex].Ty->isFloatingPointTy()); + TargetLowering::ArgListEntry FuncArg = FuncArgs[Outs[i].OrigArgIndex]; + + OriginalArgWasF128.push_back(originalTypeIsF128(FuncArg.Ty, Func)); + OriginalArgWasFloat.push_back(FuncArg.Ty->isFloatingPointTy()); + OriginalArgWasFloatVector.push_back(FuncArg.Ty->isVectorTy()); CallOperandIsFixed.push_back(Outs[i].IsFixed); } } -/// Identify lowered values that originated from f128 arguments and record -/// this. +/// Identify lowered values that originated from f128, float and vXfXX arguments +/// and record this. void MipsCCState::PreAnalyzeFormalArgumentsForF128( const SmallVectorImpl<ISD::InputArg> &Ins) { const MachineFunction &MF = getMachineFunction(); @@ -120,6 +157,7 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128( if (Ins[i].Flags.isSRet()) { OriginalArgWasF128.push_back(false); OriginalArgWasFloat.push_back(false); + OriginalArgWasFloatVector.push_back(false); continue; } @@ -129,5 +167,10 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128( OriginalArgWasF128.push_back( originalTypeIsF128(FuncArg->getType(), nullptr)); OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy()); + + // The MIPS vector ABI exhibits a corner case of sorts or quirk; if the + // first argument is actually an SRet pointer to a vector, then the next + // argument slot is $a2. + OriginalArgWasFloatVector.push_back(FuncArg->getType()->isVectorTy()); } } diff --git a/lib/Target/Mips/MipsCCState.h b/lib/Target/Mips/MipsCCState.h index 77ecc65b2eee..27901699480b 100644 --- a/lib/Target/Mips/MipsCCState.h +++ b/lib/Target/Mips/MipsCCState.h @@ -45,16 +45,33 @@ private: const char *Func); /// Identify lowered values that originated from f128 arguments and record - /// this. + /// this for use by RetCC_MipsN. void PreAnalyzeFormalArgumentsForF128(const SmallVectorImpl<ISD::InputArg> &Ins); + void + PreAnalyzeCallResultForVectorFloat(const SmallVectorImpl<ISD::InputArg> &Ins, + const Type *RetTy); + + void PreAnalyzeFormalArgumentsForVectorFloat( + const SmallVectorImpl<ISD::InputArg> &Ins); + + void + PreAnalyzeReturnForVectorFloat(const SmallVectorImpl<ISD::OutputArg> &Outs); + /// Records whether the value has been lowered from an f128. SmallVector<bool, 4> OriginalArgWasF128; /// Records whether the value has been lowered from float. SmallVector<bool, 4> OriginalArgWasFloat; + /// Records whether the value has been lowered from a floating point vector. + SmallVector<bool, 4> OriginalArgWasFloatVector; + + /// Records whether the return value has been lowered from a floating point + /// vector. + SmallVector<bool, 4> OriginalRetWasFloatVector; + /// Records whether the value was a fixed argument. /// See ISD::OutputArg::IsFixed, SmallVector<bool, 4> CallOperandIsFixed; @@ -78,6 +95,7 @@ public: CCState::AnalyzeCallOperands(Outs, Fn); OriginalArgWasF128.clear(); OriginalArgWasFloat.clear(); + OriginalArgWasFloatVector.clear(); CallOperandIsFixed.clear(); } @@ -96,31 +114,38 @@ public: CCState::AnalyzeFormalArguments(Ins, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); } void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, CCAssignFn Fn, const Type *RetTy, const char *Func) { PreAnalyzeCallResultForF128(Ins, RetTy, Func); + PreAnalyzeCallResultForVectorFloat(Ins, RetTy); CCState::AnalyzeCallResult(Ins, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); } void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, CCAssignFn Fn) { PreAnalyzeReturnForF128(Outs); + PreAnalyzeReturnForVectorFloat(Outs); CCState::AnalyzeReturn(Outs, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); } bool CheckReturn(const SmallVectorImpl<ISD::OutputArg> &ArgsFlags, CCAssignFn Fn) { PreAnalyzeReturnForF128(ArgsFlags); + PreAnalyzeReturnForVectorFloat(ArgsFlags); bool Return = CCState::CheckReturn(ArgsFlags, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); return Return; } @@ -128,6 +153,12 @@ public: bool WasOriginalArgFloat(unsigned ValNo) { return OriginalArgWasFloat[ValNo]; } + bool WasOriginalArgVectorFloat(unsigned ValNo) const { + return OriginalArgWasFloatVector[ValNo]; + } + bool WasOriginalRetVectorFloat(unsigned ValNo) const { + return OriginalRetWasFloatVector[ValNo]; + } bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; } SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; } }; diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index a57cb7badc17..b5df78f89a6b 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -37,6 +37,10 @@ class CCIfOrigArgWasF128<CCAction A> class CCIfArgIsVarArg<CCAction A> : CCIf<"!static_cast<MipsCCState *>(&State)->IsCallOperandFixed(ValNo)", A>; +/// Match if the return was a floating point vector. +class CCIfOrigArgWasNotVectorFloat<CCAction A> + : CCIf<"!static_cast<MipsCCState *>(&State)" + "->WasOriginalRetVectorFloat(ValNo)", A>; /// Match if the special calling conv is the specified value. class CCIfSpecialCallingConv<string CC, CCAction A> @@ -93,8 +97,10 @@ def RetCC_MipsO32 : CallingConv<[ // Promote i1/i8/i16 return values to i32. CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, - // i32 are returned in registers V0, V1, A0, A1 - CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>, + // i32 are returned in registers V0, V1, A0, A1, unless the original return + // type was a vector of floats. + CCIfOrigArgWasNotVectorFloat<CCIfType<[i32], + CCAssignToReg<[V0, V1, A0, A1]>>>, // f32 are returned in registers F0, F2 CCIfType<[f32], CCAssignToReg<[F0, F2]>>, diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp index 026f66a1c0e1..ff43a3950610 100644 --- a/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -24,10 +24,10 @@ #include "Mips16InstrInfo.h" #include "MipsMachineFunction.h" #include "MipsSubtarget.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index ac9a81b1bb2f..c238a65378e2 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -19,6 +19,7 @@ def immZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}]>; def immZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}]>; def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>; def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>; +def immSExt10 : ImmLeaf<i32, [{return isInt<10>(Imm);}]>; // Mips-specific dsp nodes def SDT_MipsExtr : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, @@ -851,8 +852,8 @@ class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph, class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, uimm8, immZExt8, NoItinerary, DSPROpnd>; -class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, uimm10, - immZExt10, NoItinerary, DSPROpnd>; +class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, simm10, + immSExt10, NoItinerary, DSPROpnd>; class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb, NoItinerary, DSPROpnd, GPR32Opnd>; diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index b83f44a74d5b..f79cb0e67200 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -17,8 +17,8 @@ #include "MCTargetDesc/MipsABIInfo.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MipsCCState.h" -#include "MipsInstrInfo.h" #include "MipsISelLowering.h" +#include "MipsInstrInfo.h" #include "MipsMachineFunction.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 760630c41176..f2193013b7aa 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -22,12 +22,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" @@ -71,6 +71,48 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { return true; } +// The MIPS MSA ABI passes vector arguments in the integer register set. +// The number of integer registers used is dependant on the ABI used. +MVT MipsTargetLowering::getRegisterTypeForCallingConv(MVT VT) const { + if (VT.isVector() && Subtarget.hasMSA()) + return Subtarget.isABI_O32() ? MVT::i32 : MVT::i64; + return MipsTargetLowering::getRegisterType(VT); +} + +MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const { + if (VT.isVector()) { + if (Subtarget.isABI_O32()) { + return MVT::i32; + } else { + return (VT.getSizeInBits() == 32) ? MVT::i32 : MVT::i64; + } + } + return MipsTargetLowering::getRegisterType(Context, VT); +} + +unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const { + if (VT.isVector()) + return std::max((VT.getSizeInBits() / (Subtarget.isABI_O32() ? 32 : 64)), + 1U); + return MipsTargetLowering::getNumRegisters(Context, VT); +} + +unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const { + + // Break down vector types to either 2 i64s or 4 i32s. + RegisterVT = getRegisterTypeForCallingConv(Context, VT) ; + IntermediateVT = RegisterVT; + NumIntermediates = VT.getSizeInBits() < RegisterVT.getSizeInBits() + ? VT.getVectorNumElements() + : VT.getSizeInBits() / RegisterVT.getSizeInBits(); + + return NumIntermediates; +} + SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const { MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>(); return DAG.getRegister(FI->getGlobalBaseReg(), Ty); @@ -470,8 +512,9 @@ MipsTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, !Subtarget.hasMips32r6() && !Subtarget.inMips16Mode() && !Subtarget.inMicroMipsMode(); - // Disable if we don't generate PIC or the ABI isn't O32. - if (!TM.isPositionIndependent() || !TM.getABI().IsO32()) + // Disable if either of the following is true: + // We do not generate PIC, the ABI is not O32, LargeGOT is being used. + if (!TM.isPositionIndependent() || !TM.getABI().IsO32() || LargeGOT) UseFastISel = false; return UseFastISel ? Mips::createFastISel(funcInfo, libInfo) : nullptr; @@ -2551,6 +2594,11 @@ SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op, // yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is // not used, it must be shadowed. If only A3 is available, shadow it and // go to stack. +// vXiX - Received as scalarized i32s, passed in A0 - A3 and the stack. +// vXf32 - Passed in either a pair of registers {A0, A1}, {A2, A3} or {A0 - A3} +// with the remainder spilled to the stack. +// vXf64 - Passed in either {A0, A1, A2, A3} or {A2, A3} and in both cases +// spilling the remainder to the stack. // // For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack. //===----------------------------------------------------------------------===// @@ -2562,8 +2610,13 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, State.getMachineFunction().getSubtarget()); static const MCPhysReg IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; + + const MipsCCState * MipsState = static_cast<MipsCCState *>(&State); + static const MCPhysReg F32Regs[] = { Mips::F12, Mips::F14 }; + static const MCPhysReg FloatVectorIntRegs[] = { Mips::A0, Mips::A2 }; + // Do not process byval args here. if (ArgFlags.isByVal()) return true; @@ -2601,8 +2654,26 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, State.getFirstUnallocated(F32Regs) != ValNo; unsigned OrigAlign = ArgFlags.getOrigAlign(); bool isI64 = (ValVT == MVT::i32 && OrigAlign == 8); - - if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) { + bool isVectorFloat = MipsState->WasOriginalArgVectorFloat(ValNo); + + // The MIPS vector ABI for floats passes them in a pair of registers + if (ValVT == MVT::i32 && isVectorFloat) { + // This is the start of an vector that was scalarized into an unknown number + // of components. It doesn't matter how many there are. Allocate one of the + // notional 8 byte aligned registers which map onto the argument stack, and + // shadow the register lost to alignment requirements. + if (ArgFlags.isSplit()) { + Reg = State.AllocateReg(FloatVectorIntRegs); + if (Reg == Mips::A2) + State.AllocateReg(Mips::A1); + else if (Reg == 0) + State.AllocateReg(Mips::A3); + } else { + // If we're an intermediate component of the split, we can just attempt to + // allocate a register directly. + Reg = State.AllocateReg(IntRegs); + } + } else if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) { Reg = State.AllocateReg(IntRegs); // If this is the first part of an i64 arg, // the allocated register must be either A0 or A2. diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 2dcafd51061a..0e47ed38f420 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -248,6 +248,33 @@ namespace llvm { bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; + /// Return the register type for a given MVT, ensuring vectors are treated + /// as a series of gpr sized integers. + virtual MVT getRegisterTypeForCallingConv(MVT VT) const override; + + /// Return the register type for a given MVT, ensuring vectors are treated + /// as a series of gpr sized integers. + virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const override; + + /// Return the number of registers for a given MVT, ensuring vectors are + /// treated as a series of gpr sized integers. + virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const override; + + /// Break down vectors to the correct number of gpr sized integers. + virtual unsigned getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const override; + + /// Return the correct alignment for the current calling convention. + virtual unsigned + getABIAlignmentForCallingConv(Type *ArgTy, DataLayout DL) const override { + if (ArgTy->isVectorTy()) + return std::min(DL.getABITypeAlignment(ArgTy), 8U); + return DL.getABITypeAlignment(ArgTy); + } + ISD::NodeType getExtendForAtomicOps() const override { return ISD::SIGN_EXTEND; } diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index d81a769d7fd9..94f3a74be98b 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -557,11 +557,11 @@ def FSUB_S : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>, defm FSUB : ADDS_M<"sub.d", II_SUB_D, 0, fsub>, ADDS_FM<0x01, 17>; def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>, - MADDS_FM<4, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; + MADDS_FM<4, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6, MADD4; def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>, - MADDS_FM<5, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; + MADDS_FM<5, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6, MADD4; -let AdditionalPredicates = [NoNaNsFPMath] in { +let AdditionalPredicates = [NoNaNsFPMath, HasMadd4] in { def NMADD_S : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S, fadd>, MADDS_FM<6, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; def NMSUB_S : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, II_NMSUB_S, fsub>, @@ -569,11 +569,11 @@ let AdditionalPredicates = [NoNaNsFPMath] in { } def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>, - MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; + MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32, MADD4; def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>, - MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; + MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32, MADD4; -let AdditionalPredicates = [NoNaNsFPMath] in { +let AdditionalPredicates = [NoNaNsFPMath, HasMadd4] in { def NMADD_D32 : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>, MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; def NMSUB_D32 : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>, @@ -582,12 +582,12 @@ let AdditionalPredicates = [NoNaNsFPMath] in { let DecoderNamespace = "Mips64" in { def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, II_MADD_D, fadd>, - MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; + MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64, MADD4; def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, II_MSUB_D, fsub>, - MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; + MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64, MADD4; } -let AdditionalPredicates = [NoNaNsFPMath], +let AdditionalPredicates = [NoNaNsFPMath, HasMadd4], DecoderNamespace = "Mips64" in { def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, II_NMADD_D, fadd>, MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 8761946b8dbb..40078fb77144 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -238,6 +238,8 @@ def HasEVA : Predicate<"Subtarget->hasEVA()">, AssemblerPredicate<"FeatureEVA,FeatureMips32r2">; def HasMSA : Predicate<"Subtarget->hasMSA()">, AssemblerPredicate<"FeatureMSA">; +def HasMadd4 : Predicate<"!Subtarget->disableMadd4()">, + AssemblerPredicate<"!FeatureMadd4">; //===----------------------------------------------------------------------===// @@ -390,6 +392,10 @@ class ASE_NOT_DSP { list<Predicate> InsnPredicates = [NotDSP]; } +class MADD4 { + list<Predicate> AdditionalPredicates = [HasMadd4]; +} + //===----------------------------------------------------------------------===// class MipsPat<dag pattern, dag result> : Pat<pattern, result>, PredicateControl { diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp index 63034ecab93b..e01c03db2227 100644 --- a/lib/Target/Mips/MipsMachineFunction.cpp +++ b/lib/Target/Mips/MipsMachineFunction.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/MipsABIInfo.h" #include "MipsMachineFunction.h" +#include "MCTargetDesc/MipsABIInfo.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -40,7 +40,11 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() { const TargetRegisterClass *RC = STI.inMips16Mode() ? &Mips::CPU16RegsRegClass - : static_cast<const MipsTargetMachine &>(MF.getTarget()) + : STI.inMicroMipsMode() + ? STI.hasMips64() + ? &Mips::GPRMM16_64RegClass + : &Mips::GPRMM16RegClass + : static_cast<const MipsTargetMachine &>(MF.getTarget()) .getABI() .IsN64() ? &Mips::GPR64RegClass diff --git a/lib/Target/Mips/MipsOptimizePICCall.cpp b/lib/Target/Mips/MipsOptimizePICCall.cpp index 94a1965f9ffb..79c8395d9dcc 100644 --- a/lib/Target/Mips/MipsOptimizePICCall.cpp +++ b/lib/Target/Mips/MipsOptimizePICCall.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "Mips.h" #include "MCTargetDesc/MipsBaseInfo.h" +#include "Mips.h" #include "MipsMachineFunction.h" #include "MipsTargetMachine.h" #include "llvm/ADT/ScopedHashTable.h" diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp index 70ead5cde6fa..7ee45c28a7d0 100644 --- a/lib/Target/Mips/MipsOs16.cpp +++ b/lib/Target/Mips/MipsOs16.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Instructions.h" #include "Mips.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 65be350f259d..de3389b5a6bf 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -286,7 +286,9 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n" << "spOffset : " << spOffset << "\n" - << "stackSize : " << stackSize << "\n"); + << "stackSize : " << stackSize << "\n" + << "alignment : " + << MF.getFrameInfo().getObjectAlignment(FrameIndex) << "\n"); eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset); } diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index e765b4625206..102ebb21609a 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "MipsSEFrameLowering.h" #include "MCTargetDesc/MipsABIInfo.h" #include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" -#include "MipsSEFrameLowering.h" #include "MipsSEInstrInfo.h" #include "MipsSubtarget.h" #include "llvm/ADT/BitVector.h" diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index c9cf9363b8c9..49ae6dd4cd39 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -24,11 +24,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" -#include "llvm/IR/Dominators.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 1f4e933db2a2..154d5825427b 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "MipsMachineFunction.h" +#include "MipsSubtarget.h" #include "Mips.h" +#include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" -#include "MipsSubtarget.h" #include "MipsTargetMachine.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" @@ -70,7 +70,7 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false), - HasEVA(false), TM(TM), TargetTriple(TT), TSInfo(), + HasEVA(false), DisableMadd4(false), TM(TM), TargetTriple(TT), TSInfo(), InstrInfo( MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))), FrameLowering(MipsFrameLowering::create(*this)), diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index b4d15ee361ff..625a652a0ca0 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -144,6 +144,10 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // HasEVA -- supports EVA ASE. bool HasEVA; + + // nomadd4 - disables generation of 4-operand madd.s, madd.d and + // related instructions. + bool DisableMadd4; InstrItineraryData InstrItins; @@ -253,6 +257,7 @@ public: bool hasDSPR2() const { return HasDSPR2; } bool hasDSPR3() const { return HasDSPR3; } bool hasMSA() const { return HasMSA; } + bool disableMadd4() const { return DisableMadd4; } bool hasEVA() const { return HasEVA; } bool useSmallSection() const { return UseSmallSection; } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index a9d6ab055892..330ae19ecd0f 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "MipsTargetMachine.h" #include "MCTargetDesc/MipsABIInfo.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "Mips.h" @@ -18,7 +19,6 @@ #include "MipsSEISelDAGToDAG.h" #include "MipsSubtarget.h" #include "MipsTargetObjectFile.h" -#include "MipsTargetMachine.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp index c5d6a05d6611..4d73c3991035 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -10,13 +10,13 @@ #include "MipsTargetObjectFile.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 58cb7793d040..0139646fc3f7 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// +#include "NVPTXAsmPrinter.h" #include "InstPrinter/NVPTXInstPrinter.h" #include "MCTargetDesc/NVPTXBaseInfo.h" #include "MCTargetDesc/NVPTXMCAsmInfo.h" #include "NVPTX.h" -#include "NVPTXAsmPrinter.h" #include "NVPTXMCExpr.h" #include "NVPTXMachineFunctionInfo.h" #include "NVPTXRegisterInfo.h" @@ -73,8 +73,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Path.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index 390776212ce7..916b0e115664 100644 --- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "NVPTX.h" #include "MCTargetDesc/NVPTXBaseInfo.h" +#include "NVPTX.h" #include "NVPTXUtilities.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Constants.h" diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index ebaaf42bc64e..f26b9a7cb8dd 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -12,9 +12,9 @@ //
//===----------------------------------------------------------------------===//
+#include "NVPTXISelLowering.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
-#include "NVPTXISelLowering.h"
#include "NVPTXSection.h"
#include "NVPTXSubtarget.h"
#include "NVPTXTargetMachine.h"
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 0f6c2e53e60a..da563f0531d4 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "NVPTX.h" #include "NVPTXInstrInfo.h" +#include "NVPTX.h" #include "NVPTXTargetMachine.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/lib/Target/NVPTX/NVPTXLowerArgs.cpp index e858b37e1843..139dc7fbeeda 100644 --- a/lib/Target/NVPTX/NVPTXLowerArgs.cpp +++ b/lib/Target/NVPTX/NVPTXLowerArgs.cpp @@ -90,8 +90,8 @@ //===----------------------------------------------------------------------===// #include "NVPTX.h" -#include "NVPTXUtilities.h" #include "NVPTXTargetMachine.h" +#include "NVPTXUtilities.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" diff --git a/lib/Target/NVPTX/NVPTXPeephole.cpp b/lib/Target/NVPTX/NVPTXPeephole.cpp index e10b046f7c97..4e902c0fb507 100644 --- a/lib/Target/NVPTX/NVPTXPeephole.cpp +++ b/lib/Target/NVPTX/NVPTXPeephole.cpp @@ -36,8 +36,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 8dfbfece9b8e..2b6ba8c85d4d 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "NVPTXTargetMachine.h" #include "NVPTX.h" #include "NVPTXAllocaHoisting.h" #include "NVPTXLowerAggrCopies.h" -#include "NVPTXTargetMachine.h" #include "NVPTXTargetObjectFile.h" #include "NVPTXTargetTransformInfo.h" #include "llvm/ADT/STLExtras.h" diff --git a/lib/Target/NVPTX/NVVMIntrRange.cpp b/lib/Target/NVPTX/NVVMIntrRange.cpp index 9c71a2ee165b..11277f5ba596 100644 --- a/lib/Target/NVPTX/NVVMIntrRange.cpp +++ b/lib/Target/NVPTX/NVVMIntrRange.cpp @@ -15,8 +15,8 @@ #include "NVPTX.h" #include "llvm/IR/Constants.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" using namespace llvm; diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 84bb9ec56800..baf5902ddf58 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "PPCInstPrinter.h" -#include "PPCInstrInfo.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPCInstrInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 4863ac542736..028c2cb562f8 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -7,8 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -18,9 +20,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index fd279c60f3f5..1488bd5b0be6 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" #include "MCTargetDesc/PPCMCExpr.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index 6b97d4c1456b..54f664314578 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "PPCFixupKinds.h" #include "PPCMCExpr.h" +#include "PPCFixupKinds.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 2d686f227919..e8f220ea5457 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -11,12 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCMCAsmInfo.h" -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "PPCTargetStreamer.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" @@ -30,11 +31,10 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index 1f38a8c947e7..6d591ca964a6 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -7,9 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -18,7 +19,6 @@ #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MachO.h" using namespace llvm; @@ -151,7 +151,7 @@ static void makeRelocationInfo(MachO::any_relocation_info &MRE, // The bitfield offsets that work (as determined by trial-and-error) // are different than what is documented in the mach-o manuals. // This appears to be an endianness issue; reversing the order of the - // documented bitfields in <llvm/Support/MachO.h> fixes this (but + // documented bitfields in <llvm/BinaryFormat/MachO.h> fixes this (but // breaks x86/ARM assembly). MRE.r_word1 = ((Index << 8) | // was << 0 (IsPCRel << 7) | // was << 24 @@ -222,7 +222,7 @@ bool PPCMachObjectWriter::recordScatteredRelocation( report_fatal_error("symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); - // FIXME: is Type correct? see include/llvm/Support/MachO.h + // FIXME: is Type correct? see include/llvm/BinaryFormat/MachO.h Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout); FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent()); } diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 1f181d007f63..841b8c514464 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -16,11 +16,11 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" -#include "PPCInstrInfo.h" #include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCMCExpr.h" #include "MCTargetDesc/PPCMCTargetDesc.h" +#include "PPC.h" +#include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" @@ -29,6 +29,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -55,11 +57,9 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include <algorithm> #include <cassert> diff --git a/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/lib/Target/PowerPC/PPCBoolRetToInt.cpp index 93c201d03869..55e105dad0e5 100644 --- a/lib/Target/PowerPC/PPCBoolRetToInt.cpp +++ b/lib/Target/PowerPC/PPCBoolRetToInt.cpp @@ -7,15 +7,15 @@ // //===----------------------------------------------------------------------===// // -// This file implements converting i1 values to i32 if they could be more +// This file implements converting i1 values to i32/i64 if they could be more // profitably allocated as GPRs rather than CRs. This pass will become totally // unnecessary if Register Bank Allocation and Global Instruction Selection ever // go upstream. // -// Presently, the pass converts i1 Constants, and Arguments to i32 if the +// Presently, the pass converts i1 Constants, and Arguments to i32/i64 if the // transitive closure of their uses includes only PHINodes, CallInsts, and // ReturnInsts. The rational is that arguments are generally passed and returned -// in GPRs rather than CRs, so casting them to i32 at the LLVM IR level will +// in GPRs rather than CRs, so casting them to i32/i64 at the LLVM IR level will // actually save casts at the Machine Instruction level. // // It might be useful to expand this pass to add bit-wise operations to the list @@ -33,11 +33,12 @@ //===----------------------------------------------------------------------===// #include "PPC.h" +#include "PPCTargetMachine.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" @@ -50,8 +51,9 @@ #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" #include "llvm/Pass.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Support/Casting.h" #include <cassert> using namespace llvm; @@ -87,17 +89,19 @@ class PPCBoolRetToInt : public FunctionPass { return Defs; } - // Translate a i1 value to an equivalent i32 value: - static Value *translate(Value *V) { - Type *Int32Ty = Type::getInt32Ty(V->getContext()); + // Translate a i1 value to an equivalent i32/i64 value: + Value *translate(Value *V) { + Type *IntTy = ST->isPPC64() ? Type::getInt64Ty(V->getContext()) + : Type::getInt32Ty(V->getContext()); + if (auto *C = dyn_cast<Constant>(V)) - return ConstantExpr::getZExt(C, Int32Ty); + return ConstantExpr::getZExt(C, IntTy); if (auto *P = dyn_cast<PHINode>(V)) { // Temporarily set the operands to 0. We'll fix this later in // runOnUse. - Value *Zero = Constant::getNullValue(Int32Ty); + Value *Zero = Constant::getNullValue(IntTy); PHINode *Q = - PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P); + PHINode::Create(IntTy, P->getNumIncomingValues(), P->getName(), P); for (unsigned i = 0; i < P->getNumOperands(); ++i) Q->addIncoming(Zero, P->getIncomingBlock(i)); return Q; @@ -109,7 +113,7 @@ class PPCBoolRetToInt : public FunctionPass { auto InstPt = A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode(); - return new ZExtInst(V, Int32Ty, "", InstPt); + return new ZExtInst(V, IntTy, "", InstPt); } typedef SmallPtrSet<const PHINode *, 8> PHINodeSet; @@ -185,6 +189,13 @@ class PPCBoolRetToInt : public FunctionPass { if (skipFunction(F)) return false; + auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); + if (!TPC) + return false; + + auto &TM = TPC->getTM<PPCTargetMachine>(); + ST = TM.getSubtargetImpl(F); + PHINodeSet PromotablePHINodes = getPromotablePHINodes(F); B2IMap Bool2IntMap; bool Changed = false; @@ -205,7 +216,7 @@ class PPCBoolRetToInt : public FunctionPass { return Changed; } - static bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes, + bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes, B2IMap &BoolToIntMap) { auto Defs = findAllDefs(U); @@ -262,13 +273,16 @@ class PPCBoolRetToInt : public FunctionPass { AU.addPreserved<DominatorTreeWrapperPass>(); FunctionPass::getAnalysisUsage(AU); } + +private: + const PPCSubtarget *ST; }; } // end anonymous namespace char PPCBoolRetToInt::ID = 0; INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int", - "Convert i1 constants to i32 if they are returned", + "Convert i1 constants to i32/i64 if they are returned", false, false) FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); } diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index b7d3154d0000..d0b66f9bca09 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -15,8 +15,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCSubtarget.h" diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 70c4170653ae..24bc027f8106 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -23,7 +23,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "PPC.h" #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" @@ -43,6 +42,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" diff --git a/lib/Target/PowerPC/PPCEarlyReturn.cpp b/lib/Target/PowerPC/PPCEarlyReturn.cpp index 6bd229625fc3..811e4dd9dfe1 100644 --- a/lib/Target/PowerPC/PPCEarlyReturn.cpp +++ b/lib/Target/PowerPC/PPCEarlyReturn.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 2fc8654deeab..bc9957194f6d 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -13,10 +13,10 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" -#include "PPCCallingConv.h" +#include "PPC.h" #include "PPCCCState.h" +#include "PPCCallingConv.h" #include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 54414457388d..28d496ee9ca1 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -21,9 +21,10 @@ #include "PPCTargetMachine.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -54,7 +55,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/Statistic.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -2824,6 +2824,20 @@ SDValue PPCDAGToDAGISel::get32BitZExtCompare(SDValue LHS, SDValue RHS, return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); } + case ISD::SETNE: { + // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1) + // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); + SDValue ShiftOps[] = { Clz, getI32Imm(27, dl), getI32Imm(5, dl), + getI32Imm(31, dl) }; + SDValue Shift = + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); + return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, + getI32Imm(1, dl)), 0); + } } } @@ -2850,6 +2864,27 @@ SDValue PPCDAGToDAGISel::get32BitSExtCompare(SDValue LHS, SDValue RHS, return SDValue(CurDAG->getMachineNode(PPC::SRADI_32, dl, MVT::i32, Sldi, getI32Imm(63, dl)), 0); } + case ISD::SETNE: { + // Bitwise xor the operands, count leading zeros, shift right by 5 bits and + // flip the bit, finally take 2's complement. + // (sext (setcc %a, %b, setne)) -> + // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1)) + // Same as above, but the first xor is not needed. + // (sext (setcc %a, 0, setne)) -> + // (neg (xor (lshr (ctlz %a), 5), 1)) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); + SDValue ShiftOps[] = + { Clz, getI32Imm(27, dl), getI32Imm(5, dl), getI32Imm(31, dl) }; + SDValue Shift = + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); + SDValue Xori = + SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, + getI32Imm(1, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0); + } } } @@ -2872,6 +2907,19 @@ SDValue PPCDAGToDAGISel::get64BitZExtCompare(SDValue LHS, SDValue RHS, getI64Imm(58, dl), getI64Imm(63, dl)), 0); } + case ISD::SETNE: { + // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) + // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA) + // {addcz.reg, addcz.CA} = (addcarry %a, -1) + // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); + SDValue AC = + SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, + Xor, getI32Imm(~0U, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC, + Xor, AC.getValue(1)), 0); + } } } @@ -2896,6 +2944,19 @@ SDValue PPCDAGToDAGISel::get64BitSExtCompare(SDValue LHS, SDValue RHS, return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, Addic, Addic.getValue(1)), 0); } + case ISD::SETNE: { + // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b)) + // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA) + // {subfcz.reg, subfcz.CA} = (subcarry 0, %a) + // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); + SDValue SC = + SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue, + Xor, getI32Imm(0, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC, + SC, SC.getValue(1)), 0); + } } } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 41ff9d903aa0..bda4e5e81734 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "PPCISelLowering.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" -#include "PPCCallingConv.h" #include "PPCCCState.h" +#include "PPCCallingConv.h" #include "PPCFrameLowering.h" #include "PPCInstrInfo.h" -#include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCRegisterInfo.h" @@ -28,11 +28,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -52,8 +52,8 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/IR/CallingConv.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -61,9 +61,9 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index c4139ca8b7bd..e214d26c063b 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -2717,6 +2717,40 @@ def DblToFlt { dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0)))); dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1)))); } + +def ByteToWord { + dag A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8)); + dag A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8)); + dag A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8)); + dag A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8)); +} + +def ByteToDWord { + dag A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8)); + dag A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8)); +} + +def HWordToWord { + dag A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16)); + dag A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16)); + dag A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16)); + dag A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16)); +} + +def HWordToDWord { + dag A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16)); + dag A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16)); +} + +def WordToDWord { + dag A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0)))); + dag A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2)))); +} + def FltToIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A))))); } @@ -2969,4 +3003,21 @@ let AddedComplexity = 400 in { (VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC), (COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC))>; } + // P9 Altivec instructions that can be used to build vectors. + // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete + // with complexities of existing build vector patterns in this file. + let Predicates = [HasP9Altivec] in { + def : Pat<(v2i64 (build_vector WordToDWord.A0, WordToDWord.A1)), + (v2i64 (VEXTSW2D $A))>; + def : Pat<(v2i64 (build_vector HWordToDWord.A0, HWordToDWord.A1)), + (v2i64 (VEXTSH2D $A))>; + def : Pat<(v4i32 (build_vector HWordToWord.A0, HWordToWord.A1, + HWordToWord.A2, HWordToWord.A3)), + (v4i32 (VEXTSH2W $A))>; + def : Pat<(v4i32 (build_vector ByteToWord.A0, ByteToWord.A1, + ByteToWord.A2, ByteToWord.A3)), + (v4i32 (VEXTSB2W $A))>; + def : Pat<(v2i64 (build_vector ByteToDWord.A0, ByteToDWord.A1)), + (v2i64 (VEXTSB2D $A))>; + } } diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 541b98e01b99..b310493587ae 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCMCExpr.h" +#include "PPC.h" #include "PPCSubtarget.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp index c6d2c3ebcc0f..ff5f17c7628f 100644 --- a/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -19,9 +19,9 @@ // //===---------------------------------------------------------------------===// -#include "PPCInstrInfo.h" #include "PPC.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCTargetMachine.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp index c7aa4cb78b7a..31c50785c2ee 100644 --- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -21,9 +21,9 @@ // //===----------------------------------------------------------------------===// -#include "PPCInstrInfo.h" #include "PPC.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCTargetMachine.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/lib/Target/PowerPC/PPCTOCRegDeps.cpp index 7c53a5601790..17345b6ca8d3 100644 --- a/lib/Target/PowerPC/PPCTOCRegDeps.cpp +++ b/lib/Target/PowerPC/PPCTOCRegDeps.cpp @@ -61,8 +61,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index b9004cc8a9f5..5a226b23ff96 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "PPCTargetMachine.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "PPC.h" #include "PPCSubtarget.h" #include "PPCTargetObjectFile.h" -#include "PPCTargetMachine.h" #include "PPCTargetTransformInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp index f3a0290da054..93fe3230ab81 100644 --- a/lib/Target/PowerPC/PPCVSXCopy.cpp +++ b/lib/Target/PowerPC/PPCVSXCopy.cpp @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" #include "PPCHazardRecognizers.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index f6d20ced15a0..a57484e5abdf 100644 --- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// -#include "PPCInstrInfo.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index d3434b77be8a..491eaf326a50 100644 --- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -42,9 +42,9 @@ // //===---------------------------------------------------------------------===// -#include "PPCInstrInfo.h" #include "PPC.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCTargetMachine.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/EquivalenceClasses.h" diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index d6f2672271e9..d9a71893afee 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -12,10 +12,10 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index b2ed13758d41..9309d493cef4 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -13,13 +13,13 @@ #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "llvm/ADT/Statistic.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h index ddc3bf350452..7c98b1c8f321 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h @@ -14,9 +14,9 @@ #ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCTARGETDESC_H #define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCTARGETDESC_H +#include "llvm/Config/config.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Config/config.h" namespace llvm { class MCAsmBackend; diff --git a/lib/Target/RISCV/RISCVTargetMachine.cpp b/lib/Target/RISCV/RISCVTargetMachine.cpp index efdde04c582d..744d7b8aaa3a 100644 --- a/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -13,10 +13,10 @@ #include "RISCVTargetMachine.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LegacyPassManager.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index 7e6dff6b7894..087c037614a9 100644 --- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -9,8 +9,8 @@ #include "MCTargetDesc/SparcMCExpr.h" #include "MCTargetDesc/SparcMCTargetDesc.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCContext.h" @@ -28,8 +28,8 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SMLoc.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> #include <cstdint> diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp index da7e0b737e78..8e298e8316da 100644 --- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp +++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp @@ -14,11 +14,11 @@ #include "Sparc.h" #include "SparcRegisterInfo.h" #include "SparcSubtarget.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index cc07547ede2c..d1d1334163a2 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCAsmBackend.h" #include "MCTargetDesc/SparcFixupKinds.h" #include "MCTargetDesc/SparcMCTargetDesc.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index 21df60237d96..50e8825b15e8 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -14,10 +14,10 @@ #include "SparcMCAsmInfo.h" #include "SparcMCExpr.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCTargetOptions.h" -#include "llvm/Support/Dwarf.h" using namespace llvm; diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp index e85a8cd5e339..a77f760d9eff 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp @@ -19,7 +19,6 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/Object/ELF.h" - using namespace llvm; #define DEBUG_TYPE "sparcmcexpr" diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index 31a128a5f271..19fb94534b25 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// -#include "Sparc.h" #include "InstPrinter/SparcInstPrinter.h" #include "MCTargetDesc/SparcMCExpr.h" +#include "Sparc.h" #include "SparcInstrInfo.h" #include "SparcTargetMachine.h" #include "SparcTargetStreamer.h" diff --git a/lib/Target/Sparc/SparcMCInstLower.cpp b/lib/Target/Sparc/SparcMCInstLower.cpp index a3cedcbf9dd1..a784124ff688 100644 --- a/lib/Target/Sparc/SparcMCInstLower.cpp +++ b/lib/Target/Sparc/SparcMCInstLower.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "Sparc.h" #include "MCTargetDesc/SparcMCExpr.h" +#include "Sparc.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 49c67e0819f7..c7a1ca262d2c 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -11,9 +11,9 @@ //===----------------------------------------------------------------------===// #include "SparcTargetMachine.h" -#include "SparcTargetObjectFile.h" -#include "Sparc.h" #include "LeonPasses.h" +#include "Sparc.h" +#include "SparcTargetObjectFile.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LegacyPassManager.h" diff --git a/lib/Target/Sparc/SparcTargetObjectFile.cpp b/lib/Target/Sparc/SparcTargetObjectFile.cpp index 8fdde15d8d27..627e49a95f3c 100644 --- a/lib/Target/Sparc/SparcTargetObjectFile.cpp +++ b/lib/Target/Sparc/SparcTargetObjectFile.cpp @@ -9,8 +9,8 @@ #include "SparcTargetObjectFile.h" #include "MCTargetDesc/SparcMCExpr.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Target/TargetLowering.h" using namespace llvm; diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index efcf6696fd50..ad05779a9f64 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/SystemZMCTargetDesc.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index 23b7d5b5d501..fd1fd7bc40dc 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/SystemZMCTargetDesc.h" #include "MCTargetDesc/SystemZMCFixups.h" +#include "MCTargetDesc/SystemZMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixupKindInfo.h" diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp index 3de570bf30cc..df0a8161e6e7 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp @@ -9,11 +9,11 @@ #include "MCTargetDesc/SystemZMCFixups.h" #include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include <cassert> #include <cstdint> diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.h b/lib/Target/SystemZ/SystemZHazardRecognizer.h index 8fa54ee434cf..0c755c9ad1b9 100644 --- a/lib/Target/SystemZ/SystemZHazardRecognizer.h +++ b/lib/Target/SystemZ/SystemZHazardRecognizer.h @@ -25,10 +25,10 @@ #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H #include "SystemZSubtarget.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/raw_ostream.h" #include <string> diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index ae141dbcad34..ac4c3f6db684 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -5367,12 +5367,24 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) { if (Invert) CCMask ^= CCValid; + + // ISel pattern matching also adds a load memory operand of the same + // address, so take special care to find the storing memory operand. + MachineMemOperand *MMO = nullptr; + for (auto *I : MI.memoperands()) + if (I->isStore()) { + MMO = I; + break; + } + BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) - .addReg(SrcReg) - .add(Base) - .addImm(Disp) - .addImm(CCValid) - .addImm(CCMask); + .addReg(SrcReg) + .add(Base) + .addImm(Disp) + .addImm(CCValid) + .addImm(CCMask) + .addMemOperand(MMO); + MI.eraseFromParent(); return MBB; } @@ -5950,7 +5962,8 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper( .addImm(DestDisp) .addImm(ThisLength) .add(SrcBase) - .addImm(SrcDisp); + .addImm(SrcDisp) + ->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); DestDisp += ThisLength; SrcDisp += ThisLength; Length -= ThisLength; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index b34c181124de..66a5ff12be46 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "SystemZInstrInfo.h" #include "MCTargetDesc/SystemZMCTargetDesc.h" #include "SystemZ.h" #include "SystemZInstrBuilder.h" -#include "SystemZInstrInfo.h" #include "SystemZSubtarget.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp index ec8ce6e911fa..3a0e01da42f0 100644 --- a/lib/Target/SystemZ/SystemZLDCleanup.cpp +++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#include "SystemZTargetMachine.h" #include "SystemZMachineFunctionInfo.h" +#include "SystemZTargetMachine.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 6ef8000d6f43..d14a0fb0b0b2 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "SystemZInstrInfo.h" #include "SystemZRegisterInfo.h" +#include "SystemZInstrInfo.h" #include "SystemZSubtarget.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp index 263aff8b7bfb..7391df8342ef 100644 --- a/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -14,9 +14,9 @@ //===----------------------------------------------------------------------===// #include "SystemZTargetMachine.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h index 36e51921bf2f..be480f03c572 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.h +++ b/lib/Target/SystemZ/SystemZSubtarget.h @@ -19,8 +19,8 @@ #include "SystemZInstrInfo.h" #include "SystemZRegisterInfo.h" #include "SystemZSelectionDAGInfo.h" -#include "llvm/IR/DataLayout.h" #include "llvm/ADT/Triple.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> diff --git a/lib/Target/SystemZ/SystemZTDC.cpp b/lib/Target/SystemZ/SystemZTDC.cpp index 96a9ef82c125..5dbd23d420a3 100644 --- a/lib/Target/SystemZ/SystemZTDC.cpp +++ b/lib/Target/SystemZ/SystemZTDC.cpp @@ -47,10 +47,10 @@ #include "SystemZ.h" #include "llvm/ADT/MapVector.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Instructions.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include <deque> diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index f30d52f859d7..cb81c0e5276e 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -7,14 +7,14 @@ // //===----------------------------------------------------------------------===// +#include "SystemZTargetMachine.h" #include "MCTargetDesc/SystemZMCTargetDesc.h" #include "SystemZ.h" #include "SystemZMachineScheduler.h" -#include "SystemZTargetMachine.h" #include "SystemZTargetTransformInfo.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp index 5d1616d03779..42d92622d6c8 100644 --- a/lib/Target/Target.cpp +++ b/lib/Target/Target.cpp @@ -14,12 +14,12 @@ #include "llvm-c/Target.h" #include "llvm-c/Initialization.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include <cstring> using namespace llvm; diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 91cc97e38b3d..f941891f3183 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -24,7 +25,6 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" @@ -240,6 +240,20 @@ MCSection *TargetLoweringObjectFile::SectionForGlobal( if (GO->hasSection()) return getExplicitSectionGlobal(GO, Kind, TM); + if (auto *GVar = dyn_cast<GlobalVariable>(GO)) { + auto Attrs = GVar->getAttributes(); + if ((Attrs.hasAttribute("bss-section") && Kind.isBSS()) || + (Attrs.hasAttribute("data-section") && Kind.isData()) || + (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly())) { + return getExplicitSectionGlobal(GO, Kind, TM); + } + } + + if (auto *F = dyn_cast<Function>(GO)) { + if (F->hasFnAttribute("implicit-section-name")) + return getExplicitSectionGlobal(GO, Kind, TM); + } + // Use default section depending on the 'type' of global return SelectSectionForGlobal(GO, Kind, TM); } diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp index df12e0e88e3b..01f14939864f 100644 --- a/lib/Target/TargetMachineC.cpp +++ b/lib/Target/TargetMachineC.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm-c/TargetMachine.h" #include "llvm-c/Core.h" #include "llvm-c/Target.h" +#include "llvm-c/TargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Module.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" #include "llvm/Support/CodeGenCWrappers.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormattedStream.h" diff --git a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp index b5f53114d3e1..9be11da9afac 100644 --- a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp +++ b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -15,8 +15,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInst.h" diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h index c6158720d62f..b1de84d7e8e6 100644 --- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h +++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h @@ -16,9 +16,9 @@ #define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H #include "llvm/ADT/SmallVector.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/MC/MCInstPrinter.h" -#include "llvm/Support/Wasm.h" namespace llvm { diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp index 7c78285fbda4..4f20096c1583 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "MCTargetDesc/WebAssemblyFixupKinds.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp index 544cd653fd72..c56c591def36 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "MCTargetDesc/WebAssemblyFixupKinds.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 795658ca96b4..0ba700a86b74 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -15,9 +15,9 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/Wasm.h" namespace llvm { diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h index 68d6747298df..ddf964e7dbb7 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h @@ -16,9 +16,9 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Support/Wasm.h" namespace llvm { diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp index 2846ec5e9337..27c01cb8acf7 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp @@ -13,14 +13,14 @@ /// //===----------------------------------------------------------------------===// -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "MCTargetDesc/WebAssemblyFixupKinds.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCSymbolWasm.h" #include "llvm/MC/MCWasmObjectWriter.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Wasm.h" using namespace llvm; namespace { @@ -54,6 +54,11 @@ static bool IsFunctionExpr(const MCExpr *Expr) { return false; } +static bool IsFunctionType(const MCValue &Target) { + const MCSymbolRefExpr *RefA = Target.getSymA(); + return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_TYPEINDEX; +} + unsigned WebAssemblyWasmObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, @@ -71,6 +76,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(MCContext &Ctx, case WebAssembly::fixup_code_sleb128_i64: llvm_unreachable("fixup_sleb128_i64 not implemented yet"); case WebAssembly::fixup_code_uleb128_i32: + if (IsFunctionType(Target)) + return wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB; if (IsFunction) return wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB; return wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB; diff --git a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp index 40e1928197bc..1691808d05a0 100644 --- a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp +++ b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp @@ -17,8 +17,8 @@ /// ////===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" #include "llvm/ADT/PriorityQueue.h" diff --git a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index bd11d1b46906..21e0f6b23777 100644 --- a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -18,8 +18,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" diff --git a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp index bc6360aafd61..b2330a232093 100644 --- a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp +++ b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp @@ -22,8 +22,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_* +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/Analysis/AliasAnalysis.h" diff --git a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 53698ff09b10..09338a4898e0 100644 --- a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -16,8 +16,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyTargetMachine.h" diff --git a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp index 2bbf7a2b42f9..41f315c2825b 100644 --- a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp @@ -26,8 +26,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/ADT/PriorityQueue.h" diff --git a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index 257f1d110aa2..4f3ae57733e5 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/Function.h" // To access function attributes. diff --git a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp index 744a3ed427af..576b71dd7966 100644 --- a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp +++ b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp @@ -15,8 +15,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp index 473dcb7a33fd..1462c49aa9fd 100644 --- a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp +++ b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp @@ -19,8 +19,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" diff --git a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp index e3470825940c..766ab456a8e6 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp @@ -13,8 +13,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index 57d454746b06..6650191807dc 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -20,8 +20,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_* +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" diff --git a/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp index 9e944df637d9..878ffd08d228 100644 --- a/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp +++ b/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp @@ -19,8 +19,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp index 2441ead7cb27..b1385f409fd3 100644 --- a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp +++ b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" diff --git a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp index a9aa781610ce..8173364fa880 100644 --- a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp +++ b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -24,8 +24,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/Analysis/TargetLibraryInfo.h" diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index d9b2b8743649..7b05f671bdcb 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -12,9 +12,9 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssemblyTargetMachine.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyTargetObjectFile.h" #include "WebAssemblyTargetTransformInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp index 788fac62626b..f7e31de65f6d 100644 --- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp +++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp @@ -7,11 +7,11 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/X86MCTargetDesc.h" #include "X86AsmInstrumentation.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "X86Operand.h" -#include "llvm/ADT/Twine.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h index 33eff14b8215..0fba15cc692c 100644 --- a/lib/Target/X86/AsmParser/X86Operand.h +++ b/lib/Target/X86/AsmParser/X86Operand.h @@ -15,8 +15,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SMLoc.h" diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 7471373334f6..fc4adddc149b 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -11,7 +11,6 @@ tablegen(LLVM X86GenFastISel.inc -gen-fast-isel) tablegen(LLVM X86GenCallingConv.inc -gen-callingconv) tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM X86GenEVEX2VEXTables.inc -gen-x86-EVEX2VEX-tables) -tablegen(LLVM X86GenFoldTables.inc -gen-x86-fold-tables) if(LLVM_BUILD_GLOBAL_ISEL) tablegen(LLVM X86GenRegisterBank.inc -gen-register-bank) tablegen(LLVM X86GenGlobalISel.inc -gen-global-isel) diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 36ad23bb41c0..4ce908b1da64 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -74,8 +74,8 @@ // //===----------------------------------------------------------------------===// -#include "X86DisassemblerDecoder.h" #include "MCTargetDesc/X86MCTargetDesc.h" +#include "X86DisassemblerDecoder.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index b7f637e9a8cd..577b7a776c6d 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -13,10 +13,10 @@ // //===----------------------------------------------------------------------===// -#include <cstdarg> /* for va_*() */ -#include <cstdio> /* for vsnprintf() */ -#include <cstdlib> /* for exit() */ -#include <cstring> /* for memset() */ +#include <cstdarg> /* for va_*() */ +#include <cstdio> /* for vsnprintf() */ +#include <cstdlib> /* for exit() */ +#include <cstring> /* for memset() */ #include "X86DisassemblerDecoder.h" diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 6aa700306744..4d91300c7ede 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/X86BaseInfo.h" #include "X86ATTInstPrinter.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "X86InstComments.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index b5a926f915af..5e809c34325e 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -15,8 +15,8 @@ #include "X86InstComments.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "Utils/X86ShuffleDecode.h" -#include "llvm/MC/MCInst.h" #include "llvm/CodeGen/MachineValueType.h" +#include "llvm/MC/MCInst.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index a8c631ae282f..d6af6712d5a1 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "X86IntelInstPrinter.h" #include "MCTargetDesc/X86BaseInfo.h" #include "X86InstComments.h" -#include "X86IntelInstPrinter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index a713af6aadb5..7a9e4f4468ec 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -10,6 +10,8 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86FixupKinds.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" @@ -22,9 +24,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index 0b73df3a2ff8..4da4eebec038 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -9,13 +9,13 @@ #include "MCTargetDesc/X86FixupKinds.h" #include "MCTargetDesc/X86MCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include <cassert> #include <cstdint> diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 9c35a251e480..1538a515f419 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -13,12 +13,12 @@ #include "X86MCAsmInfo.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" using namespace llvm; enum AsmWriterFlavorTy { diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 297926ddcfda..4097ef224d50 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -7,9 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/X86MCTargetDesc.h" #include "MCTargetDesc/X86FixupKinds.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -19,7 +20,6 @@ #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MachO.h" using namespace llvm; diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp index d6777fc8aa6a..105580c913a1 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp @@ -9,11 +9,11 @@ #include "MCTargetDesc/X86FixupKinds.h" #include "MCTargetDesc/X86MCTargetDesc.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index e1825ca1eda1..dc15aeadaa61 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineValueType.h" @@ -34,7 +35,6 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 3cfb924abd01..621505aaded9 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -414,6 +414,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, assert(HasAVX); if (IsNonTemporal && Alignment >= 32 && HasAVX2) Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm; + else if (IsNonTemporal && Alignment >= 16) + return false; // Force split for X86::VMOVNTDQArm else if (Alignment >= 32) Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm; else @@ -424,6 +426,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, assert(HasAVX); if (IsNonTemporal && Alignment >= 32 && HasAVX2) Opc = X86::VMOVNTDQAYrm; + else if (IsNonTemporal && Alignment >= 16) + return false; // Force split for X86::VMOVNTDQArm else if (Alignment >= 32) Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm; else @@ -437,6 +441,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, assert(HasAVX); if (IsNonTemporal && Alignment >= 32 && HasAVX2) Opc = X86::VMOVNTDQAYrm; + else if (IsNonTemporal && Alignment >= 16) + return false; // Force split for X86::VMOVNTDQArm else if (Alignment >= 32) Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm; else diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 328a80304602..2777fa89330f 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -29,8 +29,8 @@ #include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetOptions.h" #include <cstdlib> using namespace llvm; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5303d7a406ad..831e9bdab0e1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1,4 +1,4 @@ - + //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// // // The LLVM Compiler Infrastructure @@ -81,6 +81,12 @@ static cl::opt<int> ExperimentalPrefLoopAlignment( " of the loop header PC will be 0)."), cl::Hidden); +static cl::opt<bool> MulConstantOptimization( + "mul-constant-optimization", cl::init(true), + cl::desc("Replace 'mul x, Const' with more effective instructions like " + "SHIFT, LEA, etc."), + cl::Hidden); + /// Call this when the user attempts to do something unsupported, like /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike /// report_fatal_error, so calling code should attempt to recover without @@ -5810,7 +5816,8 @@ static bool setTargetShuffleZeroElements(SDValue N, // The decoded shuffle mask may contain a different number of elements to the // destination value type. static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask, - SmallVectorImpl<SDValue> &Ops) { + SmallVectorImpl<SDValue> &Ops, + SelectionDAG &DAG) { Mask.clear(); Ops.clear(); @@ -5868,8 +5875,7 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask, assert(SrcExtract.getOperand(0).getValueType() == MVT::v16i8); } - if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1)) || - NumElts <= SrcExtract.getConstantOperandVal(1)) + if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1))) return false; SDValue SrcVec = SrcExtract.getOperand(0); @@ -5877,8 +5883,12 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask, unsigned NumSrcElts = SrcVT.getVectorNumElements(); unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1; + unsigned SrcIdx = SrcExtract.getConstantOperandVal(1); + if (NumSrcElts <= SrcIdx) + return false; + Ops.push_back(SrcVec); - Mask.push_back(SrcExtract.getConstantOperandVal(1)); + Mask.push_back(SrcIdx); Mask.append(NumZeros, SM_SentinelZero); Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef); return true; @@ -5915,6 +5925,19 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask, Mask.push_back(i == InIdx ? NumElts + ExIdx : i); return true; } + case X86ISD::PACKSS: { + // If we know input saturation won't happen we can treat this + // as a truncation shuffle. + if (DAG.ComputeNumSignBits(N.getOperand(0)) <= NumBitsPerElt || + DAG.ComputeNumSignBits(N.getOperand(1)) <= NumBitsPerElt) + return false; + + Ops.push_back(N.getOperand(0)); + Ops.push_back(N.getOperand(1)); + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(i * 2); + return true; + } case X86ISD::VSHLI: case X86ISD::VSRLI: { uint64_t ShiftVal = N.getConstantOperandVal(1); @@ -5989,9 +6012,10 @@ static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs, /// Returns true if the target shuffle mask was decoded. static bool resolveTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs, - SmallVectorImpl<int> &Mask) { + SmallVectorImpl<int> &Mask, + SelectionDAG &DAG) { if (!setTargetShuffleZeroElements(Op, Mask, Inputs)) - if (!getFauxShuffleMask(Op, Mask, Inputs)) + if (!getFauxShuffleMask(Op, Mask, Inputs, DAG)) return false; resolveTargetShuffleInputsAndMask(Inputs, Mask); @@ -6391,6 +6415,7 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, /// Example: <load i32 *a, load i32 *a+4, zero, undef> -> zextload a static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, const SDLoc &DL, SelectionDAG &DAG, + const X86Subtarget &Subtarget, bool isAfterLegalize) { unsigned NumElems = Elts.size(); @@ -6495,6 +6520,12 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, if (isAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT)) return SDValue(); + // Don't create 256-bit non-temporal aligned loads without AVX2 as these + // will lower to regular temporal loads and use the cache. + if (LDBase->isNonTemporal() && LDBase->getAlignment() >= 32 && + VT.is256BitVector() && !Subtarget.hasInt256()) + return SDValue(); + if (IsConsecutiveLoad) return CreateLoad(VT, LDBase); @@ -7701,7 +7732,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // See if we can use a vector load to get all of the elements. if (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) { SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems); - if (SDValue LD = EltsFromConsecutiveLoads(VT, Ops, dl, DAG, false)) + if (SDValue LD = + EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false)) return LD; } @@ -7825,24 +7857,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { } // Next, we iteratively mix elements, e.g. for v4f32: - // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> - // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> - // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> - unsigned EltStride = NumElems >> 1; - while (EltStride != 0) { - for (unsigned i = 0; i < EltStride; ++i) { - // If Ops[i+EltStride] is undef and this is the first round of mixing, - // then it is safe to just drop this shuffle: V[i] is already in the - // right place, the one element (since it's the first round) being - // inserted as undef can be dropped. This isn't safe for successive - // rounds because they will permute elements within both vectors. - if (Ops[i+EltStride].isUndef() && - EltStride == NumElems/2) - continue; - - Ops[i] = getUnpackl(DAG, dl, VT, Ops[i], Ops[i + EltStride]); - } - EltStride >>= 1; + // Step 1: unpcklps 0, 1 ==> X: <?, ?, 1, 0> + // : unpcklps 2, 3 ==> Y: <?, ?, 3, 2> + // Step 2: unpcklpd X, Y ==> <3, 2, 1, 0> + for (unsigned Scale = 1; Scale < NumElems; Scale *= 2) { + // Generate scaled UNPCKL shuffle mask. + SmallVector<int, 16> Mask; + for(unsigned i = 0; i != Scale; ++i) + Mask.push_back(i); + for (unsigned i = 0; i != Scale; ++i) + Mask.push_back(NumElems+i); + Mask.append(NumElems - Mask.size(), SM_SentinelUndef); + + for (unsigned i = 0, e = NumElems / (2 * Scale); i != e; ++i) + Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2*i], Ops[(2*i)+1], Mask); } return Ops[0]; } @@ -17177,7 +17205,13 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, Cond == ISD::SETGE || Cond == ISD::SETUGE; bool Invert = Cond == ISD::SETNE || (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond)); - bool FlipSigns = ISD::isUnsignedIntSetCC(Cond); + + // If both operands are known non-negative, then an unsigned compare is the + // same as a signed compare and there's no need to flip signbits. + // TODO: We could check for more general simplifications here since we're + // computing known bits. + bool FlipSigns = ISD::isUnsignedIntSetCC(Cond) && + !(DAG.SignBitIsZero(Op0) && DAG.SignBitIsZero(Op1)); // Special case: Use min/max operations for SETULE/SETUGE MVT VET = VT.getVectorElementType(); @@ -26741,6 +26775,17 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode( return Tmp; } + case X86ISD::VSHLI: { + SDValue Src = Op.getOperand(0); + unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1); + APInt ShiftVal = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue(); + if (ShiftVal.uge(VTBits)) + return VTBits; // Shifted all bits out --> zero. + if (ShiftVal.uge(Tmp)) + return 1; // Shifted all sign bits out --> unknown. + return Tmp - ShiftVal.getZExtValue(); + } + case X86ISD::VSRAI: { SDValue Src = Op.getOperand(0); unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1); @@ -27889,7 +27934,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps, // Extract target shuffle mask and resolve sentinels and inputs. SmallVector<int, 64> OpMask; SmallVector<SDValue, 2> OpInputs; - if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask)) + if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG)) return false; assert(OpInputs.size() <= 2 && "Too many shuffle inputs"); @@ -28788,7 +28833,8 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, } if (Elts.size() == VT.getVectorNumElements()) - if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true)) + if (SDValue LD = + EltsFromConsecutiveLoads(VT, Elts, dl, DAG, Subtarget, true)) return LD; // For AVX2, we sometimes want to combine @@ -29430,7 +29476,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, // Resolve the target shuffle inputs and mask. SmallVector<int, 16> Mask; SmallVector<SDValue, 2> Ops; - if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask)) + if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask, DAG)) return SDValue(); // Attempt to narrow/widen the shuffle mask to the correct size. @@ -31017,6 +31063,77 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, } } +static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, + EVT VT, SDLoc DL) { + + auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) { + SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), + DAG.getConstant(Mult, DL, VT)); + Result = DAG.getNode(ISD::SHL, DL, VT, Result, + DAG.getConstant(Shift, DL, MVT::i8)); + Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, + N->getOperand(0)); + return Result; + }; + + auto combineMulMulAddOrSub = [&](bool isAdd) { + SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), + DAG.getConstant(9, DL, VT)); + Result = DAG.getNode(ISD::MUL, DL, VT, Result, DAG.getConstant(3, DL, VT)); + Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, + N->getOperand(0)); + return Result; + }; + + switch (MulAmt) { + default: + break; + case 11: + // mul x, 11 => add ((shl (mul x, 5), 1), x) + return combineMulShlAddOrSub(5, 1, /*isAdd*/ true); + case 21: + // mul x, 21 => add ((shl (mul x, 5), 2), x) + return combineMulShlAddOrSub(5, 2, /*isAdd*/ true); + case 22: + // mul x, 22 => add (add ((shl (mul x, 5), 2), x), x) + return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), + combineMulShlAddOrSub(5, 2, /*isAdd*/ true)); + case 19: + // mul x, 19 => sub ((shl (mul x, 5), 2), x) + return combineMulShlAddOrSub(5, 2, /*isAdd*/ false); + case 13: + // mul x, 13 => add ((shl (mul x, 3), 2), x) + return combineMulShlAddOrSub(3, 2, /*isAdd*/ true); + case 23: + // mul x, 13 => sub ((shl (mul x, 3), 3), x) + return combineMulShlAddOrSub(3, 3, /*isAdd*/ false); + case 14: + // mul x, 14 => add (add ((shl (mul x, 3), 2), x), x) + return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), + combineMulShlAddOrSub(3, 2, /*isAdd*/ true)); + case 26: + // mul x, 26 => sub ((mul (mul x, 9), 3), x) + return combineMulMulAddOrSub(/*isAdd*/ false); + case 28: + // mul x, 28 => add ((mul (mul x, 9), 3), x) + return combineMulMulAddOrSub(/*isAdd*/ true); + case 29: + // mul x, 29 => add (add ((mul (mul x, 9), 3), x), x) + return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), + combineMulMulAddOrSub(/*isAdd*/ true)); + case 30: + // mul x, 30 => sub (sub ((shl x, 5), x), x) + return DAG.getNode( + ISD::SUB, DL, VT, + DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(5, DL, MVT::i8)), + N->getOperand(0)), + N->getOperand(0)); + } + return SDValue(); +} + /// Optimize a single multiply with constant into two operations in order to /// implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA. static SDValue combineMul(SDNode *N, SelectionDAG &DAG, @@ -31026,6 +31143,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG, if (DCI.isBeforeLegalize() && VT.isVector()) return reduceVMULWidth(N, DAG, Subtarget); + if (!MulConstantOptimization) + return SDValue(); // An imul is usually smaller than the alternative sequence. if (DAG.getMachineFunction().getFunction()->optForMinSize()) return SDValue(); @@ -31081,7 +31200,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG, else NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul, DAG.getConstant(MulAmt2, DL, VT)); - } + } else if (!Subtarget.slowLEA()) + NewMul = combineMulSpecial(MulAmt, N, DAG, VT, DL); if (!NewMul) { assert(MulAmt != 0 && @@ -32381,15 +32501,17 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // For chips with slow 32-byte unaligned loads, break the 32-byte operation - // into two 16-byte operations. + // into two 16-byte operations. Also split non-temporal aligned loads on + // pre-AVX2 targets as 32-byte loads will lower to regular temporal loads. ISD::LoadExtType Ext = Ld->getExtensionType(); bool Fast; unsigned AddressSpace = Ld->getAddressSpace(); unsigned Alignment = Ld->getAlignment(); if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() && Ext == ISD::NON_EXTLOAD && - TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT, - AddressSpace, Alignment, &Fast) && !Fast) { + ((Ld->isNonTemporal() && !Subtarget.hasInt256() && Alignment >= 16) || + (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT, + AddressSpace, Alignment, &Fast) && !Fast))) { unsigned NumElems = RegVT.getVectorNumElements(); if (NumElems < 2) return SDValue(); @@ -35097,7 +35219,8 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), OpVT, AS, Alignment, &Fast) && Fast) { SDValue Ops[] = {SubVec2, SubVec}; - if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false)) + if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, + Subtarget, false)) return Ld; } } diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index e2e228f5544b..5224a16613cb 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -641,22 +641,37 @@ def sdmem : Operand<v2f64> { // SSE pattern fragments //===----------------------------------------------------------------------===// +// Vector load wrappers to prevent folding of non-temporal aligned loads on +// supporting targets. +def vec128load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return !Subtarget->hasSSE41() || !cast<LoadSDNode>(N)->isNonTemporal() || + cast<LoadSDNode>(N)->getAlignment() < 16; +}]>; +def vec256load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return !Subtarget->hasAVX2() || !cast<LoadSDNode>(N)->isNonTemporal() || + cast<LoadSDNode>(N)->getAlignment() < 32; +}]>; +def vec512load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return !Subtarget->hasAVX512() || !cast<LoadSDNode>(N)->isNonTemporal() || + cast<LoadSDNode>(N)->getAlignment() < 64; +}]>; + // 128-bit load pattern fragments // NOTE: all 128-bit integer vector loads are promoted to v2i64 -def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; -def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; -def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; +def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (vec128load node:$ptr))>; +def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (vec128load node:$ptr))>; +def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (vec128load node:$ptr))>; // 256-bit load pattern fragments // NOTE: all 256-bit integer vector loads are promoted to v4i64 -def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>; -def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; -def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; +def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (vec256load node:$ptr))>; +def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (vec256load node:$ptr))>; +def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (vec256load node:$ptr))>; // 512-bit load pattern fragments -def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>; -def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>; -def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>; +def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (vec512load node:$ptr))>; +def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (vec512load node:$ptr))>; +def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (vec512load node:$ptr))>; // 128-/256-/512-bit extload pattern fragments def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>; @@ -728,9 +743,13 @@ def alignedloadv8i64 : PatFrag<(ops node:$ptr), // allows unaligned accesses, match any load, though this may require // setting a feature bit in the processor (on startup, for example). // Opteron 10h and later implement such a feature. +// Avoid non-temporal aligned loads on supported targets. def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return Subtarget->hasSSEUnalignedMem() - || cast<LoadSDNode>(N)->getAlignment() >= 16; + return (Subtarget->hasSSEUnalignedMem() || + cast<LoadSDNode>(N)->getAlignment() >= 16) && + (!Subtarget->hasSSE41() || + !(cast<LoadSDNode>(N)->getAlignment() >= 16 && + cast<LoadSDNode>(N)->isNonTemporal())); }]>; // 128-bit memop pattern fragments diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 0aee30081a35..ff5d90c4e78b 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -121,8 +121,172 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) (STI.is64Bit() ? X86::RETQ : X86::RETL)), Subtarget(STI), RI(STI.getTargetTriple()) { -// Generated memory folding tables. -#include "X86GenFoldTables.inc" + static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = { + { X86::ADC32ri, X86::ADC32mi, 0 }, + { X86::ADC32ri8, X86::ADC32mi8, 0 }, + { X86::ADC32rr, X86::ADC32mr, 0 }, + { X86::ADC64ri32, X86::ADC64mi32, 0 }, + { X86::ADC64ri8, X86::ADC64mi8, 0 }, + { X86::ADC64rr, X86::ADC64mr, 0 }, + { X86::ADD16ri, X86::ADD16mi, 0 }, + { X86::ADD16ri8, X86::ADD16mi8, 0 }, + { X86::ADD16ri_DB, X86::ADD16mi, TB_NO_REVERSE }, + { X86::ADD16ri8_DB, X86::ADD16mi8, TB_NO_REVERSE }, + { X86::ADD16rr, X86::ADD16mr, 0 }, + { X86::ADD16rr_DB, X86::ADD16mr, TB_NO_REVERSE }, + { X86::ADD32ri, X86::ADD32mi, 0 }, + { X86::ADD32ri8, X86::ADD32mi8, 0 }, + { X86::ADD32ri_DB, X86::ADD32mi, TB_NO_REVERSE }, + { X86::ADD32ri8_DB, X86::ADD32mi8, TB_NO_REVERSE }, + { X86::ADD32rr, X86::ADD32mr, 0 }, + { X86::ADD32rr_DB, X86::ADD32mr, TB_NO_REVERSE }, + { X86::ADD64ri32, X86::ADD64mi32, 0 }, + { X86::ADD64ri8, X86::ADD64mi8, 0 }, + { X86::ADD64ri32_DB,X86::ADD64mi32, TB_NO_REVERSE }, + { X86::ADD64ri8_DB, X86::ADD64mi8, TB_NO_REVERSE }, + { X86::ADD64rr, X86::ADD64mr, 0 }, + { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE }, + { X86::ADD8ri, X86::ADD8mi, 0 }, + { X86::ADD8rr, X86::ADD8mr, 0 }, + { X86::AND16ri, X86::AND16mi, 0 }, + { X86::AND16ri8, X86::AND16mi8, 0 }, + { X86::AND16rr, X86::AND16mr, 0 }, + { X86::AND32ri, X86::AND32mi, 0 }, + { X86::AND32ri8, X86::AND32mi8, 0 }, + { X86::AND32rr, X86::AND32mr, 0 }, + { X86::AND64ri32, X86::AND64mi32, 0 }, + { X86::AND64ri8, X86::AND64mi8, 0 }, + { X86::AND64rr, X86::AND64mr, 0 }, + { X86::AND8ri, X86::AND8mi, 0 }, + { X86::AND8rr, X86::AND8mr, 0 }, + { X86::DEC16r, X86::DEC16m, 0 }, + { X86::DEC32r, X86::DEC32m, 0 }, + { X86::DEC64r, X86::DEC64m, 0 }, + { X86::DEC8r, X86::DEC8m, 0 }, + { X86::INC16r, X86::INC16m, 0 }, + { X86::INC32r, X86::INC32m, 0 }, + { X86::INC64r, X86::INC64m, 0 }, + { X86::INC8r, X86::INC8m, 0 }, + { X86::NEG16r, X86::NEG16m, 0 }, + { X86::NEG32r, X86::NEG32m, 0 }, + { X86::NEG64r, X86::NEG64m, 0 }, + { X86::NEG8r, X86::NEG8m, 0 }, + { X86::NOT16r, X86::NOT16m, 0 }, + { X86::NOT32r, X86::NOT32m, 0 }, + { X86::NOT64r, X86::NOT64m, 0 }, + { X86::NOT8r, X86::NOT8m, 0 }, + { X86::OR16ri, X86::OR16mi, 0 }, + { X86::OR16ri8, X86::OR16mi8, 0 }, + { X86::OR16rr, X86::OR16mr, 0 }, + { X86::OR32ri, X86::OR32mi, 0 }, + { X86::OR32ri8, X86::OR32mi8, 0 }, + { X86::OR32rr, X86::OR32mr, 0 }, + { X86::OR64ri32, X86::OR64mi32, 0 }, + { X86::OR64ri8, X86::OR64mi8, 0 }, + { X86::OR64rr, X86::OR64mr, 0 }, + { X86::OR8ri, X86::OR8mi, 0 }, + { X86::OR8rr, X86::OR8mr, 0 }, + { X86::ROL16r1, X86::ROL16m1, 0 }, + { X86::ROL16rCL, X86::ROL16mCL, 0 }, + { X86::ROL16ri, X86::ROL16mi, 0 }, + { X86::ROL32r1, X86::ROL32m1, 0 }, + { X86::ROL32rCL, X86::ROL32mCL, 0 }, + { X86::ROL32ri, X86::ROL32mi, 0 }, + { X86::ROL64r1, X86::ROL64m1, 0 }, + { X86::ROL64rCL, X86::ROL64mCL, 0 }, + { X86::ROL64ri, X86::ROL64mi, 0 }, + { X86::ROL8r1, X86::ROL8m1, 0 }, + { X86::ROL8rCL, X86::ROL8mCL, 0 }, + { X86::ROL8ri, X86::ROL8mi, 0 }, + { X86::ROR16r1, X86::ROR16m1, 0 }, + { X86::ROR16rCL, X86::ROR16mCL, 0 }, + { X86::ROR16ri, X86::ROR16mi, 0 }, + { X86::ROR32r1, X86::ROR32m1, 0 }, + { X86::ROR32rCL, X86::ROR32mCL, 0 }, + { X86::ROR32ri, X86::ROR32mi, 0 }, + { X86::ROR64r1, X86::ROR64m1, 0 }, + { X86::ROR64rCL, X86::ROR64mCL, 0 }, + { X86::ROR64ri, X86::ROR64mi, 0 }, + { X86::ROR8r1, X86::ROR8m1, 0 }, + { X86::ROR8rCL, X86::ROR8mCL, 0 }, + { X86::ROR8ri, X86::ROR8mi, 0 }, + { X86::SAR16r1, X86::SAR16m1, 0 }, + { X86::SAR16rCL, X86::SAR16mCL, 0 }, + { X86::SAR16ri, X86::SAR16mi, 0 }, + { X86::SAR32r1, X86::SAR32m1, 0 }, + { X86::SAR32rCL, X86::SAR32mCL, 0 }, + { X86::SAR32ri, X86::SAR32mi, 0 }, + { X86::SAR64r1, X86::SAR64m1, 0 }, + { X86::SAR64rCL, X86::SAR64mCL, 0 }, + { X86::SAR64ri, X86::SAR64mi, 0 }, + { X86::SAR8r1, X86::SAR8m1, 0 }, + { X86::SAR8rCL, X86::SAR8mCL, 0 }, + { X86::SAR8ri, X86::SAR8mi, 0 }, + { X86::SBB32ri, X86::SBB32mi, 0 }, + { X86::SBB32ri8, X86::SBB32mi8, 0 }, + { X86::SBB32rr, X86::SBB32mr, 0 }, + { X86::SBB64ri32, X86::SBB64mi32, 0 }, + { X86::SBB64ri8, X86::SBB64mi8, 0 }, + { X86::SBB64rr, X86::SBB64mr, 0 }, + { X86::SHL16r1, X86::SHL16m1, 0 }, + { X86::SHL16rCL, X86::SHL16mCL, 0 }, + { X86::SHL16ri, X86::SHL16mi, 0 }, + { X86::SHL32r1, X86::SHL32m1, 0 }, + { X86::SHL32rCL, X86::SHL32mCL, 0 }, + { X86::SHL32ri, X86::SHL32mi, 0 }, + { X86::SHL64r1, X86::SHL64m1, 0 }, + { X86::SHL64rCL, X86::SHL64mCL, 0 }, + { X86::SHL64ri, X86::SHL64mi, 0 }, + { X86::SHL8r1, X86::SHL8m1, 0 }, + { X86::SHL8rCL, X86::SHL8mCL, 0 }, + { X86::SHL8ri, X86::SHL8mi, 0 }, + { X86::SHLD16rrCL, X86::SHLD16mrCL, 0 }, + { X86::SHLD16rri8, X86::SHLD16mri8, 0 }, + { X86::SHLD32rrCL, X86::SHLD32mrCL, 0 }, + { X86::SHLD32rri8, X86::SHLD32mri8, 0 }, + { X86::SHLD64rrCL, X86::SHLD64mrCL, 0 }, + { X86::SHLD64rri8, X86::SHLD64mri8, 0 }, + { X86::SHR16r1, X86::SHR16m1, 0 }, + { X86::SHR16rCL, X86::SHR16mCL, 0 }, + { X86::SHR16ri, X86::SHR16mi, 0 }, + { X86::SHR32r1, X86::SHR32m1, 0 }, + { X86::SHR32rCL, X86::SHR32mCL, 0 }, + { X86::SHR32ri, X86::SHR32mi, 0 }, + { X86::SHR64r1, X86::SHR64m1, 0 }, + { X86::SHR64rCL, X86::SHR64mCL, 0 }, + { X86::SHR64ri, X86::SHR64mi, 0 }, + { X86::SHR8r1, X86::SHR8m1, 0 }, + { X86::SHR8rCL, X86::SHR8mCL, 0 }, + { X86::SHR8ri, X86::SHR8mi, 0 }, + { X86::SHRD16rrCL, X86::SHRD16mrCL, 0 }, + { X86::SHRD16rri8, X86::SHRD16mri8, 0 }, + { X86::SHRD32rrCL, X86::SHRD32mrCL, 0 }, + { X86::SHRD32rri8, X86::SHRD32mri8, 0 }, + { X86::SHRD64rrCL, X86::SHRD64mrCL, 0 }, + { X86::SHRD64rri8, X86::SHRD64mri8, 0 }, + { X86::SUB16ri, X86::SUB16mi, 0 }, + { X86::SUB16ri8, X86::SUB16mi8, 0 }, + { X86::SUB16rr, X86::SUB16mr, 0 }, + { X86::SUB32ri, X86::SUB32mi, 0 }, + { X86::SUB32ri8, X86::SUB32mi8, 0 }, + { X86::SUB32rr, X86::SUB32mr, 0 }, + { X86::SUB64ri32, X86::SUB64mi32, 0 }, + { X86::SUB64ri8, X86::SUB64mi8, 0 }, + { X86::SUB64rr, X86::SUB64mr, 0 }, + { X86::SUB8ri, X86::SUB8mi, 0 }, + { X86::SUB8rr, X86::SUB8mr, 0 }, + { X86::XOR16ri, X86::XOR16mi, 0 }, + { X86::XOR16ri8, X86::XOR16mi8, 0 }, + { X86::XOR16rr, X86::XOR16mr, 0 }, + { X86::XOR32ri, X86::XOR32mi, 0 }, + { X86::XOR32ri8, X86::XOR32mi8, 0 }, + { X86::XOR32rr, X86::XOR32mr, 0 }, + { X86::XOR64ri32, X86::XOR64mi32, 0 }, + { X86::XOR64ri8, X86::XOR64mi8, 0 }, + { X86::XOR64rr, X86::XOR64mr, 0 }, + { X86::XOR8ri, X86::XOR8mi, 0 }, + { X86::XOR8rr, X86::XOR8mr, 0 } + }; for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2Addr) { AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable, @@ -131,11 +295,746 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) Entry.Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE); } + static const X86MemoryFoldTableEntry MemoryFoldTable0[] = { + { X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD }, + { X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD }, + { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD }, + { X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD }, + { X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD }, + { X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD }, + { X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD }, + { X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD }, + { X86::CMP32ri, X86::CMP32mi, TB_FOLDED_LOAD }, + { X86::CMP32ri8, X86::CMP32mi8, TB_FOLDED_LOAD }, + { X86::CMP32rr, X86::CMP32mr, TB_FOLDED_LOAD }, + { X86::CMP64ri32, X86::CMP64mi32, TB_FOLDED_LOAD }, + { X86::CMP64ri8, X86::CMP64mi8, TB_FOLDED_LOAD }, + { X86::CMP64rr, X86::CMP64mr, TB_FOLDED_LOAD }, + { X86::CMP8ri, X86::CMP8mi, TB_FOLDED_LOAD }, + { X86::CMP8rr, X86::CMP8mr, TB_FOLDED_LOAD }, + { X86::DIV16r, X86::DIV16m, TB_FOLDED_LOAD }, + { X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD }, + { X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD }, + { X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD }, + { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE }, + { X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD }, + { X86::IDIV32r, X86::IDIV32m, TB_FOLDED_LOAD }, + { X86::IDIV64r, X86::IDIV64m, TB_FOLDED_LOAD }, + { X86::IDIV8r, X86::IDIV8m, TB_FOLDED_LOAD }, + { X86::IMUL16r, X86::IMUL16m, TB_FOLDED_LOAD }, + { X86::IMUL32r, X86::IMUL32m, TB_FOLDED_LOAD }, + { X86::IMUL64r, X86::IMUL64m, TB_FOLDED_LOAD }, + { X86::IMUL8r, X86::IMUL8m, TB_FOLDED_LOAD }, + { X86::JMP32r, X86::JMP32m, TB_FOLDED_LOAD }, + { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD }, + { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE }, + { X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE }, + { X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE }, + { X86::MOV32rr, X86::MOV32mr, TB_FOLDED_STORE }, + { X86::MOV64ri32, X86::MOV64mi32, TB_FOLDED_STORE }, + { X86::MOV64rr, X86::MOV64mr, TB_FOLDED_STORE }, + { X86::MOV8ri, X86::MOV8mi, TB_FOLDED_STORE }, + { X86::MOV8rr, X86::MOV8mr, TB_FOLDED_STORE }, + { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE }, + { X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::MOVDQUrr, X86::MOVDQUmr, TB_FOLDED_STORE }, + { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, TB_FOLDED_STORE }, + { X86::MOVPQIto64rr,X86::MOVPQI2QImr, TB_FOLDED_STORE }, + { X86::MOVSDto64rr, X86::MOVSDto64mr, TB_FOLDED_STORE }, + { X86::MOVSS2DIrr, X86::MOVSS2DImr, TB_FOLDED_STORE }, + { X86::MOVUPDrr, X86::MOVUPDmr, TB_FOLDED_STORE }, + { X86::MOVUPSrr, X86::MOVUPSmr, TB_FOLDED_STORE }, + { X86::MUL16r, X86::MUL16m, TB_FOLDED_LOAD }, + { X86::MUL32r, X86::MUL32m, TB_FOLDED_LOAD }, + { X86::MUL64r, X86::MUL64m, TB_FOLDED_LOAD }, + { X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD }, + { X86::PEXTRDrr, X86::PEXTRDmr, TB_FOLDED_STORE }, + { X86::PEXTRQrr, X86::PEXTRQmr, TB_FOLDED_STORE }, + { X86::PUSH16r, X86::PUSH16rmm, TB_FOLDED_LOAD }, + { X86::PUSH32r, X86::PUSH32rmm, TB_FOLDED_LOAD }, + { X86::PUSH64r, X86::PUSH64rmm, TB_FOLDED_LOAD }, + { X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE }, + { X86::SETAr, X86::SETAm, TB_FOLDED_STORE }, + { X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE }, + { X86::SETBr, X86::SETBm, TB_FOLDED_STORE }, + { X86::SETEr, X86::SETEm, TB_FOLDED_STORE }, + { X86::SETGEr, X86::SETGEm, TB_FOLDED_STORE }, + { X86::SETGr, X86::SETGm, TB_FOLDED_STORE }, + { X86::SETLEr, X86::SETLEm, TB_FOLDED_STORE }, + { X86::SETLr, X86::SETLm, TB_FOLDED_STORE }, + { X86::SETNEr, X86::SETNEm, TB_FOLDED_STORE }, + { X86::SETNOr, X86::SETNOm, TB_FOLDED_STORE }, + { X86::SETNPr, X86::SETNPm, TB_FOLDED_STORE }, + { X86::SETNSr, X86::SETNSm, TB_FOLDED_STORE }, + { X86::SETOr, X86::SETOm, TB_FOLDED_STORE }, + { X86::SETPr, X86::SETPm, TB_FOLDED_STORE }, + { X86::SETSr, X86::SETSm, TB_FOLDED_STORE }, + { X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD }, + { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD }, + { X86::TAILJMPr64_REX, X86::TAILJMPm64_REX, TB_FOLDED_LOAD }, + { X86::TEST16ri, X86::TEST16mi, TB_FOLDED_LOAD }, + { X86::TEST32ri, X86::TEST32mi, TB_FOLDED_LOAD }, + { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD }, + { X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD }, + + // AVX 128-bit versions of foldable instructions + { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE }, + { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVDQUrr, X86::VMOVDQUmr, TB_FOLDED_STORE }, + { X86::VMOVPDI2DIrr,X86::VMOVPDI2DImr, TB_FOLDED_STORE }, + { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr,TB_FOLDED_STORE }, + { X86::VMOVSDto64rr,X86::VMOVSDto64mr, TB_FOLDED_STORE }, + { X86::VMOVSS2DIrr, X86::VMOVSS2DImr, TB_FOLDED_STORE }, + { X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE }, + { X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE }, + { X86::VPEXTRDrr, X86::VPEXTRDmr, TB_FOLDED_STORE }, + { X86::VPEXTRQrr, X86::VPEXTRQmr, TB_FOLDED_STORE }, + + // AVX 256-bit foldable instructions + { X86::VEXTRACTI128rr, X86::VEXTRACTI128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVDQUYrr, X86::VMOVDQUYmr, TB_FOLDED_STORE }, + { X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE }, + { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE }, + + // AVX-512 foldable instructions + { X86::VEXTRACTF32x4Zrr,X86::VEXTRACTF32x4Zmr, TB_FOLDED_STORE }, + { X86::VEXTRACTF32x8Zrr,X86::VEXTRACTF32x8Zmr, TB_FOLDED_STORE }, + { X86::VEXTRACTF64x2Zrr,X86::VEXTRACTF64x2Zmr, TB_FOLDED_STORE }, + { X86::VEXTRACTF64x4Zrr,X86::VEXTRACTF64x4Zmr, TB_FOLDED_STORE }, + { X86::VEXTRACTI32x4Zrr,X86::VEXTRACTI32x4Zmr, TB_FOLDED_STORE }, + { X86::VEXTRACTI32x8Zrr,X86::VEXTRACTI32x8Zmr, TB_FOLDED_STORE }, + { X86::VEXTRACTI64x2Zrr,X86::VEXTRACTI64x2Zmr, TB_FOLDED_STORE }, + { X86::VEXTRACTI64x4Zrr,X86::VEXTRACTI64x4Zmr, TB_FOLDED_STORE }, + { X86::VEXTRACTPSZrr, X86::VEXTRACTPSZmr, TB_FOLDED_STORE }, + { X86::VMOVAPDZrr, X86::VMOVAPDZmr, TB_FOLDED_STORE | TB_ALIGN_64 }, + { X86::VMOVAPSZrr, X86::VMOVAPSZmr, TB_FOLDED_STORE | TB_ALIGN_64 }, + { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zmr, TB_FOLDED_STORE | TB_ALIGN_64 }, + { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zmr, TB_FOLDED_STORE | TB_ALIGN_64 }, + { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zmr, TB_FOLDED_STORE }, + { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zmr, TB_FOLDED_STORE }, + { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zmr, TB_FOLDED_STORE }, + { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zmr, TB_FOLDED_STORE }, + { X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE }, + { X86::VMOVPQIto64Zrr, X86::VMOVPQI2QIZmr, TB_FOLDED_STORE }, + { X86::VMOVSDto64Zrr, X86::VMOVSDto64Zmr, TB_FOLDED_STORE }, + { X86::VMOVSS2DIZrr, X86::VMOVSS2DIZmr, TB_FOLDED_STORE }, + { X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE }, + { X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE }, + { X86::VPEXTRDZrr, X86::VPEXTRDZmr, TB_FOLDED_STORE }, + { X86::VPEXTRQZrr, X86::VPEXTRQZmr, TB_FOLDED_STORE }, + { X86::VPMOVDBZrr, X86::VPMOVDBZmr, TB_FOLDED_STORE }, + { X86::VPMOVDWZrr, X86::VPMOVDWZmr, TB_FOLDED_STORE }, + { X86::VPMOVQDZrr, X86::VPMOVQDZmr, TB_FOLDED_STORE }, + { X86::VPMOVQWZrr, X86::VPMOVQWZmr, TB_FOLDED_STORE }, + { X86::VPMOVWBZrr, X86::VPMOVWBZmr, TB_FOLDED_STORE }, + { X86::VPMOVSDBZrr, X86::VPMOVSDBZmr, TB_FOLDED_STORE }, + { X86::VPMOVSDWZrr, X86::VPMOVSDWZmr, TB_FOLDED_STORE }, + { X86::VPMOVSQDZrr, X86::VPMOVSQDZmr, TB_FOLDED_STORE }, + { X86::VPMOVSQWZrr, X86::VPMOVSQWZmr, TB_FOLDED_STORE }, + { X86::VPMOVSWBZrr, X86::VPMOVSWBZmr, TB_FOLDED_STORE }, + { X86::VPMOVUSDBZrr, X86::VPMOVUSDBZmr, TB_FOLDED_STORE }, + { X86::VPMOVUSDWZrr, X86::VPMOVUSDWZmr, TB_FOLDED_STORE }, + { X86::VPMOVUSQDZrr, X86::VPMOVUSQDZmr, TB_FOLDED_STORE }, + { X86::VPMOVUSQWZrr, X86::VPMOVUSQWZmr, TB_FOLDED_STORE }, + { X86::VPMOVUSWBZrr, X86::VPMOVUSWBZmr, TB_FOLDED_STORE }, + + // AVX-512 foldable instructions (256-bit versions) + { X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTF32x4Z256mr, TB_FOLDED_STORE }, + { X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTF64x2Z256mr, TB_FOLDED_STORE }, + { X86::VEXTRACTI32x4Z256rr,X86::VEXTRACTI32x4Z256mr, TB_FOLDED_STORE }, + { X86::VEXTRACTI64x2Z256rr,X86::VEXTRACTI64x2Z256mr, TB_FOLDED_STORE }, + { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256mr, TB_FOLDED_STORE }, + { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256mr, TB_FOLDED_STORE }, + { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256mr, TB_FOLDED_STORE }, + { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256mr, TB_FOLDED_STORE }, + { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256mr, TB_FOLDED_STORE }, + { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256mr, TB_FOLDED_STORE }, + { X86::VPMOVDWZ256rr, X86::VPMOVDWZ256mr, TB_FOLDED_STORE }, + { X86::VPMOVQDZ256rr, X86::VPMOVQDZ256mr, TB_FOLDED_STORE }, + { X86::VPMOVWBZ256rr, X86::VPMOVWBZ256mr, TB_FOLDED_STORE }, + { X86::VPMOVSDWZ256rr, X86::VPMOVSDWZ256mr, TB_FOLDED_STORE }, + { X86::VPMOVSQDZ256rr, X86::VPMOVSQDZ256mr, TB_FOLDED_STORE }, + { X86::VPMOVSWBZ256rr, X86::VPMOVSWBZ256mr, TB_FOLDED_STORE }, + { X86::VPMOVUSDWZ256rr, X86::VPMOVUSDWZ256mr, TB_FOLDED_STORE }, + { X86::VPMOVUSQDZ256rr, X86::VPMOVUSQDZ256mr, TB_FOLDED_STORE }, + { X86::VPMOVUSWBZ256rr, X86::VPMOVUSWBZ256mr, TB_FOLDED_STORE }, + + // AVX-512 foldable instructions (128-bit versions) + { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128mr, TB_FOLDED_STORE }, + { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128mr, TB_FOLDED_STORE }, + { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128mr, TB_FOLDED_STORE }, + { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128mr, TB_FOLDED_STORE }, + { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128mr, TB_FOLDED_STORE }, + { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128mr, TB_FOLDED_STORE }, + + // F16C foldable instructions + { X86::VCVTPS2PHrr, X86::VCVTPS2PHmr, TB_FOLDED_STORE }, + { X86::VCVTPS2PHYrr, X86::VCVTPS2PHYmr, TB_FOLDED_STORE } + }; + for (X86MemoryFoldTableEntry Entry : MemoryFoldTable0) { AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable, Entry.RegOp, Entry.MemOp, TB_INDEX_0 | Entry.Flags); } + static const X86MemoryFoldTableEntry MemoryFoldTable1[] = { + { X86::BSF16rr, X86::BSF16rm, 0 }, + { X86::BSF32rr, X86::BSF32rm, 0 }, + { X86::BSF64rr, X86::BSF64rm, 0 }, + { X86::BSR16rr, X86::BSR16rm, 0 }, + { X86::BSR32rr, X86::BSR32rm, 0 }, + { X86::BSR64rr, X86::BSR64rm, 0 }, + { X86::CMP16rr, X86::CMP16rm, 0 }, + { X86::CMP32rr, X86::CMP32rm, 0 }, + { X86::CMP64rr, X86::CMP64rm, 0 }, + { X86::CMP8rr, X86::CMP8rm, 0 }, + { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, + { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, + { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, + { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, + { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, + { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, + { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, + { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, + { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, + { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, + { X86::IMUL16rri, X86::IMUL16rmi, 0 }, + { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, + { X86::IMUL32rri, X86::IMUL32rmi, 0 }, + { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, + { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, + { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, + { X86::Int_COMISDrr, X86::Int_COMISDrm, TB_NO_REVERSE }, + { X86::Int_COMISSrr, X86::Int_COMISSrm, TB_NO_REVERSE }, + { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, TB_NO_REVERSE }, + { X86::CVTSD2SIrr, X86::CVTSD2SIrm, TB_NO_REVERSE }, + { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, TB_NO_REVERSE }, + { X86::CVTSS2SIrr, X86::CVTSS2SIrm, TB_NO_REVERSE }, + { X86::CVTDQ2PDrr, X86::CVTDQ2PDrm, TB_NO_REVERSE }, + { X86::CVTDQ2PSrr, X86::CVTDQ2PSrm, TB_ALIGN_16 }, + { X86::CVTPD2DQrr, X86::CVTPD2DQrm, TB_ALIGN_16 }, + { X86::CVTPD2PSrr, X86::CVTPD2PSrm, TB_ALIGN_16 }, + { X86::CVTPS2DQrr, X86::CVTPS2DQrm, TB_ALIGN_16 }, + { X86::CVTPS2PDrr, X86::CVTPS2PDrm, TB_NO_REVERSE }, + { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 }, + { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 }, + { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, TB_NO_REVERSE }, + { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, TB_NO_REVERSE }, + { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, TB_NO_REVERSE }, + { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, TB_NO_REVERSE }, + { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, TB_NO_REVERSE }, + { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, TB_NO_REVERSE }, + { X86::MOV16rr, X86::MOV16rm, 0 }, + { X86::MOV32rr, X86::MOV32rm, 0 }, + { X86::MOV64rr, X86::MOV64rm, 0 }, + { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, + { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, + { X86::MOV8rr, X86::MOV8rm, 0 }, + { X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 }, + { X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 }, + { X86::MOVDDUPrr, X86::MOVDDUPrm, TB_NO_REVERSE }, + { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, + { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, + { X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 }, + { X86::MOVDQUrr, X86::MOVDQUrm, 0 }, + { X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 }, + { X86::MOVSLDUPrr, X86::MOVSLDUPrm, TB_ALIGN_16 }, + { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, + { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, + { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, + { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 }, + { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 }, + { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, + { X86::MOVUPDrr, X86::MOVUPDrm, 0 }, + { X86::MOVUPSrr, X86::MOVUPSrm, 0 }, + { X86::MOVZPQILo2PQIrr, X86::MOVQI2PQIrm, TB_NO_REVERSE }, + { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, + { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, + { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, + { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, + { X86::PABSBrr, X86::PABSBrm, TB_ALIGN_16 }, + { X86::PABSDrr, X86::PABSDrm, TB_ALIGN_16 }, + { X86::PABSWrr, X86::PABSWrm, TB_ALIGN_16 }, + { X86::PCMPESTRIrr, X86::PCMPESTRIrm, TB_ALIGN_16 }, + { X86::PCMPESTRM128rr, X86::PCMPESTRM128rm, TB_ALIGN_16 }, + { X86::PCMPISTRIrr, X86::PCMPISTRIrm, TB_ALIGN_16 }, + { X86::PCMPISTRM128rr, X86::PCMPISTRM128rm, TB_ALIGN_16 }, + { X86::PHMINPOSUWrr128, X86::PHMINPOSUWrm128, TB_ALIGN_16 }, + { X86::PMOVSXBDrr, X86::PMOVSXBDrm, TB_NO_REVERSE }, + { X86::PMOVSXBQrr, X86::PMOVSXBQrm, TB_NO_REVERSE }, + { X86::PMOVSXBWrr, X86::PMOVSXBWrm, TB_NO_REVERSE }, + { X86::PMOVSXDQrr, X86::PMOVSXDQrm, TB_NO_REVERSE }, + { X86::PMOVSXWDrr, X86::PMOVSXWDrm, TB_NO_REVERSE }, + { X86::PMOVSXWQrr, X86::PMOVSXWQrm, TB_NO_REVERSE }, + { X86::PMOVZXBDrr, X86::PMOVZXBDrm, TB_NO_REVERSE }, + { X86::PMOVZXBQrr, X86::PMOVZXBQrm, TB_NO_REVERSE }, + { X86::PMOVZXBWrr, X86::PMOVZXBWrm, TB_NO_REVERSE }, + { X86::PMOVZXDQrr, X86::PMOVZXDQrm, TB_NO_REVERSE }, + { X86::PMOVZXWDrr, X86::PMOVZXWDrm, TB_NO_REVERSE }, + { X86::PMOVZXWQrr, X86::PMOVZXWQrm, TB_NO_REVERSE }, + { X86::PSHUFDri, X86::PSHUFDmi, TB_ALIGN_16 }, + { X86::PSHUFHWri, X86::PSHUFHWmi, TB_ALIGN_16 }, + { X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 }, + { X86::PTESTrr, X86::PTESTrm, TB_ALIGN_16 }, + { X86::RCPPSr, X86::RCPPSm, TB_ALIGN_16 }, + { X86::RCPSSr, X86::RCPSSm, 0 }, + { X86::RCPSSr_Int, X86::RCPSSm_Int, TB_NO_REVERSE }, + { X86::ROUNDPDr, X86::ROUNDPDm, TB_ALIGN_16 }, + { X86::ROUNDPSr, X86::ROUNDPSm, TB_ALIGN_16 }, + { X86::ROUNDSDr, X86::ROUNDSDm, 0 }, + { X86::ROUNDSSr, X86::ROUNDSSm, 0 }, + { X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16 }, + { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, + { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, TB_NO_REVERSE }, + { X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 }, + { X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16 }, + { X86::SQRTSDr, X86::SQRTSDm, 0 }, + { X86::SQRTSDr_Int, X86::SQRTSDm_Int, TB_NO_REVERSE }, + { X86::SQRTSSr, X86::SQRTSSm, 0 }, + { X86::SQRTSSr_Int, X86::SQRTSSm_Int, TB_NO_REVERSE }, + { X86::TEST16rr, X86::TEST16rm, 0 }, + { X86::TEST32rr, X86::TEST32rm, 0 }, + { X86::TEST64rr, X86::TEST64rm, 0 }, + { X86::TEST8rr, X86::TEST8rm, 0 }, + // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 + { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, + { X86::UCOMISSrr, X86::UCOMISSrm, 0 }, + + // MMX version of foldable instructions + { X86::MMX_CVTPD2PIirr, X86::MMX_CVTPD2PIirm, 0 }, + { X86::MMX_CVTPI2PDirr, X86::MMX_CVTPI2PDirm, 0 }, + { X86::MMX_CVTPS2PIirr, X86::MMX_CVTPS2PIirm, 0 }, + { X86::MMX_CVTTPD2PIirr, X86::MMX_CVTTPD2PIirm, 0 }, + { X86::MMX_CVTTPS2PIirr, X86::MMX_CVTTPS2PIirm, 0 }, + { X86::MMX_MOVD64to64rr, X86::MMX_MOVQ64rm, 0 }, + { X86::MMX_PABSBrr64, X86::MMX_PABSBrm64, 0 }, + { X86::MMX_PABSDrr64, X86::MMX_PABSDrm64, 0 }, + { X86::MMX_PABSWrr64, X86::MMX_PABSWrm64, 0 }, + { X86::MMX_PSHUFWri, X86::MMX_PSHUFWmi, 0 }, + + // 3DNow! version of foldable instructions + { X86::PF2IDrr, X86::PF2IDrm, 0 }, + { X86::PF2IWrr, X86::PF2IWrm, 0 }, + { X86::PFRCPrr, X86::PFRCPrm, 0 }, + { X86::PFRSQRTrr, X86::PFRSQRTrm, 0 }, + { X86::PI2FDrr, X86::PI2FDrm, 0 }, + { X86::PI2FWrr, X86::PI2FWrm, 0 }, + { X86::PSWAPDrr, X86::PSWAPDrm, 0 }, + + // AVX 128-bit versions of foldable instructions + { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, TB_NO_REVERSE }, + { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, TB_NO_REVERSE }, + { X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, TB_NO_REVERSE }, + { X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, TB_NO_REVERSE }, + { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 }, + { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,TB_NO_REVERSE }, + { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 }, + { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm, TB_NO_REVERSE }, + { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 }, + { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,TB_NO_REVERSE }, + { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 }, + { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm, TB_NO_REVERSE }, + { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, TB_NO_REVERSE }, + { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, TB_NO_REVERSE }, + { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, TB_NO_REVERSE }, + { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, TB_NO_REVERSE }, + { X86::VCVTDQ2PDrr, X86::VCVTDQ2PDrm, TB_NO_REVERSE }, + { X86::VCVTDQ2PSrr, X86::VCVTDQ2PSrm, 0 }, + { X86::VCVTPD2DQrr, X86::VCVTPD2DQrm, 0 }, + { X86::VCVTPD2PSrr, X86::VCVTPD2PSrm, 0 }, + { X86::VCVTPS2DQrr, X86::VCVTPS2DQrm, 0 }, + { X86::VCVTPS2PDrr, X86::VCVTPS2PDrm, TB_NO_REVERSE }, + { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, 0 }, + { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 }, + { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 }, + { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 }, + { X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 }, + { X86::VMOVAPSrr, X86::VMOVAPSrm, TB_ALIGN_16 }, + { X86::VMOVDDUPrr, X86::VMOVDDUPrm, TB_NO_REVERSE }, + { X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 }, + { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 }, + { X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 }, + { X86::VMOVDQUrr, X86::VMOVDQUrm, 0 }, + { X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, 0 }, + { X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, 0 }, + { X86::VMOVUPDrr, X86::VMOVUPDrm, 0 }, + { X86::VMOVUPSrr, X86::VMOVUPSrm, 0 }, + { X86::VMOVZPQILo2PQIrr,X86::VMOVQI2PQIrm, TB_NO_REVERSE }, + { X86::VPABSBrr, X86::VPABSBrm, 0 }, + { X86::VPABSDrr, X86::VPABSDrm, 0 }, + { X86::VPABSWrr, X86::VPABSWrm, 0 }, + { X86::VPCMPESTRIrr, X86::VPCMPESTRIrm, 0 }, + { X86::VPCMPESTRM128rr, X86::VPCMPESTRM128rm, 0 }, + { X86::VPCMPISTRIrr, X86::VPCMPISTRIrm, 0 }, + { X86::VPCMPISTRM128rr, X86::VPCMPISTRM128rm, 0 }, + { X86::VPHMINPOSUWrr128, X86::VPHMINPOSUWrm128, 0 }, + { X86::VPERMILPDri, X86::VPERMILPDmi, 0 }, + { X86::VPERMILPSri, X86::VPERMILPSmi, 0 }, + { X86::VPMOVSXBDrr, X86::VPMOVSXBDrm, TB_NO_REVERSE }, + { X86::VPMOVSXBQrr, X86::VPMOVSXBQrm, TB_NO_REVERSE }, + { X86::VPMOVSXBWrr, X86::VPMOVSXBWrm, TB_NO_REVERSE }, + { X86::VPMOVSXDQrr, X86::VPMOVSXDQrm, TB_NO_REVERSE }, + { X86::VPMOVSXWDrr, X86::VPMOVSXWDrm, TB_NO_REVERSE }, + { X86::VPMOVSXWQrr, X86::VPMOVSXWQrm, TB_NO_REVERSE }, + { X86::VPMOVZXBDrr, X86::VPMOVZXBDrm, TB_NO_REVERSE }, + { X86::VPMOVZXBQrr, X86::VPMOVZXBQrm, TB_NO_REVERSE }, + { X86::VPMOVZXBWrr, X86::VPMOVZXBWrm, TB_NO_REVERSE }, + { X86::VPMOVZXDQrr, X86::VPMOVZXDQrm, TB_NO_REVERSE }, + { X86::VPMOVZXWDrr, X86::VPMOVZXWDrm, TB_NO_REVERSE }, + { X86::VPMOVZXWQrr, X86::VPMOVZXWQrm, TB_NO_REVERSE }, + { X86::VPSHUFDri, X86::VPSHUFDmi, 0 }, + { X86::VPSHUFHWri, X86::VPSHUFHWmi, 0 }, + { X86::VPSHUFLWri, X86::VPSHUFLWmi, 0 }, + { X86::VPTESTrr, X86::VPTESTrm, 0 }, + { X86::VRCPPSr, X86::VRCPPSm, 0 }, + { X86::VROUNDPDr, X86::VROUNDPDm, 0 }, + { X86::VROUNDPSr, X86::VROUNDPSm, 0 }, + { X86::VRSQRTPSr, X86::VRSQRTPSm, 0 }, + { X86::VSQRTPDr, X86::VSQRTPDm, 0 }, + { X86::VSQRTPSr, X86::VSQRTPSm, 0 }, + { X86::VTESTPDrr, X86::VTESTPDrm, 0 }, + { X86::VTESTPSrr, X86::VTESTPSrm, 0 }, + { X86::VUCOMISDrr, X86::VUCOMISDrm, 0 }, + { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 }, + + // AVX 256-bit foldable instructions + { X86::VCVTDQ2PDYrr, X86::VCVTDQ2PDYrm, TB_NO_REVERSE }, + { X86::VCVTDQ2PSYrr, X86::VCVTDQ2PSYrm, 0 }, + { X86::VCVTPD2DQYrr, X86::VCVTPD2DQYrm, 0 }, + { X86::VCVTPD2PSYrr, X86::VCVTPD2PSYrm, 0 }, + { X86::VCVTPS2DQYrr, X86::VCVTPS2DQYrm, 0 }, + { X86::VCVTPS2PDYrr, X86::VCVTPS2PDYrm, TB_NO_REVERSE }, + { X86::VCVTTPD2DQYrr, X86::VCVTTPD2DQYrm, 0 }, + { X86::VCVTTPS2DQYrr, X86::VCVTTPS2DQYrm, 0 }, + { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 }, + { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 }, + { X86::VMOVDDUPYrr, X86::VMOVDDUPYrm, 0 }, + { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 }, + { X86::VMOVDQUYrr, X86::VMOVDQUYrm, 0 }, + { X86::VMOVSLDUPYrr, X86::VMOVSLDUPYrm, 0 }, + { X86::VMOVSHDUPYrr, X86::VMOVSHDUPYrm, 0 }, + { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 }, + { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }, + { X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 }, + { X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 }, + { X86::VPTESTYrr, X86::VPTESTYrm, 0 }, + { X86::VRCPPSYr, X86::VRCPPSYm, 0 }, + { X86::VROUNDYPDr, X86::VROUNDYPDm, 0 }, + { X86::VROUNDYPSr, X86::VROUNDYPSm, 0 }, + { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 }, + { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 }, + { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 }, + { X86::VTESTPDYrr, X86::VTESTPDYrm, 0 }, + { X86::VTESTPSYrr, X86::VTESTPSYrm, 0 }, + + // AVX2 foldable instructions + + // VBROADCASTS{SD}rr register instructions were an AVX2 addition while the + // VBROADCASTS{SD}rm memory instructions were available from AVX1. + // TB_NO_REVERSE prevents unfolding from introducing an illegal instruction + // on AVX1 targets. The VPBROADCAST instructions are all AVX2 instructions + // so they don't need an equivalent limitation. + { X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE }, + { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE }, + { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE }, + { X86::VPABSBYrr, X86::VPABSBYrm, 0 }, + { X86::VPABSDYrr, X86::VPABSDYrm, 0 }, + { X86::VPABSWYrr, X86::VPABSWYrm, 0 }, + { X86::VPBROADCASTBrr, X86::VPBROADCASTBrm, TB_NO_REVERSE }, + { X86::VPBROADCASTBYrr, X86::VPBROADCASTBYrm, TB_NO_REVERSE }, + { X86::VPBROADCASTDrr, X86::VPBROADCASTDrm, TB_NO_REVERSE }, + { X86::VPBROADCASTDYrr, X86::VPBROADCASTDYrm, TB_NO_REVERSE }, + { X86::VPBROADCASTQrr, X86::VPBROADCASTQrm, TB_NO_REVERSE }, + { X86::VPBROADCASTQYrr, X86::VPBROADCASTQYrm, TB_NO_REVERSE }, + { X86::VPBROADCASTWrr, X86::VPBROADCASTWrm, TB_NO_REVERSE }, + { X86::VPBROADCASTWYrr, X86::VPBROADCASTWYrm, TB_NO_REVERSE }, + { X86::VPERMPDYri, X86::VPERMPDYmi, 0 }, + { X86::VPERMQYri, X86::VPERMQYmi, 0 }, + { X86::VPMOVSXBDYrr, X86::VPMOVSXBDYrm, TB_NO_REVERSE }, + { X86::VPMOVSXBQYrr, X86::VPMOVSXBQYrm, TB_NO_REVERSE }, + { X86::VPMOVSXBWYrr, X86::VPMOVSXBWYrm, 0 }, + { X86::VPMOVSXDQYrr, X86::VPMOVSXDQYrm, 0 }, + { X86::VPMOVSXWDYrr, X86::VPMOVSXWDYrm, 0 }, + { X86::VPMOVSXWQYrr, X86::VPMOVSXWQYrm, TB_NO_REVERSE }, + { X86::VPMOVZXBDYrr, X86::VPMOVZXBDYrm, TB_NO_REVERSE }, + { X86::VPMOVZXBQYrr, X86::VPMOVZXBQYrm, TB_NO_REVERSE }, + { X86::VPMOVZXBWYrr, X86::VPMOVZXBWYrm, 0 }, + { X86::VPMOVZXDQYrr, X86::VPMOVZXDQYrm, 0 }, + { X86::VPMOVZXWDYrr, X86::VPMOVZXWDYrm, 0 }, + { X86::VPMOVZXWQYrr, X86::VPMOVZXWQYrm, TB_NO_REVERSE }, + { X86::VPSHUFDYri, X86::VPSHUFDYmi, 0 }, + { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, 0 }, + { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, 0 }, + + // XOP foldable instructions + { X86::VFRCZPDrr, X86::VFRCZPDrm, 0 }, + { X86::VFRCZPDrrY, X86::VFRCZPDrmY, 0 }, + { X86::VFRCZPSrr, X86::VFRCZPSrm, 0 }, + { X86::VFRCZPSrrY, X86::VFRCZPSrmY, 0 }, + { X86::VFRCZSDrr, X86::VFRCZSDrm, 0 }, + { X86::VFRCZSSrr, X86::VFRCZSSrm, 0 }, + { X86::VPHADDBDrr, X86::VPHADDBDrm, 0 }, + { X86::VPHADDBQrr, X86::VPHADDBQrm, 0 }, + { X86::VPHADDBWrr, X86::VPHADDBWrm, 0 }, + { X86::VPHADDDQrr, X86::VPHADDDQrm, 0 }, + { X86::VPHADDWDrr, X86::VPHADDWDrm, 0 }, + { X86::VPHADDWQrr, X86::VPHADDWQrm, 0 }, + { X86::VPHADDUBDrr, X86::VPHADDUBDrm, 0 }, + { X86::VPHADDUBQrr, X86::VPHADDUBQrm, 0 }, + { X86::VPHADDUBWrr, X86::VPHADDUBWrm, 0 }, + { X86::VPHADDUDQrr, X86::VPHADDUDQrm, 0 }, + { X86::VPHADDUWDrr, X86::VPHADDUWDrm, 0 }, + { X86::VPHADDUWQrr, X86::VPHADDUWQrm, 0 }, + { X86::VPHSUBBWrr, X86::VPHSUBBWrm, 0 }, + { X86::VPHSUBDQrr, X86::VPHSUBDQrm, 0 }, + { X86::VPHSUBWDrr, X86::VPHSUBWDrm, 0 }, + { X86::VPROTBri, X86::VPROTBmi, 0 }, + { X86::VPROTBrr, X86::VPROTBmr, 0 }, + { X86::VPROTDri, X86::VPROTDmi, 0 }, + { X86::VPROTDrr, X86::VPROTDmr, 0 }, + { X86::VPROTQri, X86::VPROTQmi, 0 }, + { X86::VPROTQrr, X86::VPROTQmr, 0 }, + { X86::VPROTWri, X86::VPROTWmi, 0 }, + { X86::VPROTWrr, X86::VPROTWmr, 0 }, + { X86::VPSHABrr, X86::VPSHABmr, 0 }, + { X86::VPSHADrr, X86::VPSHADmr, 0 }, + { X86::VPSHAQrr, X86::VPSHAQmr, 0 }, + { X86::VPSHAWrr, X86::VPSHAWmr, 0 }, + { X86::VPSHLBrr, X86::VPSHLBmr, 0 }, + { X86::VPSHLDrr, X86::VPSHLDmr, 0 }, + { X86::VPSHLQrr, X86::VPSHLQmr, 0 }, + { X86::VPSHLWrr, X86::VPSHLWmr, 0 }, + + // LWP foldable instructions + { X86::LWPINS32rri, X86::LWPINS32rmi, 0 }, + { X86::LWPINS64rri, X86::LWPINS64rmi, 0 }, + { X86::LWPVAL32rri, X86::LWPVAL32rmi, 0 }, + { X86::LWPVAL64rri, X86::LWPVAL64rmi, 0 }, + + // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions + { X86::BEXTR32rr, X86::BEXTR32rm, 0 }, + { X86::BEXTR64rr, X86::BEXTR64rm, 0 }, + { X86::BEXTRI32ri, X86::BEXTRI32mi, 0 }, + { X86::BEXTRI64ri, X86::BEXTRI64mi, 0 }, + { X86::BLCFILL32rr, X86::BLCFILL32rm, 0 }, + { X86::BLCFILL64rr, X86::BLCFILL64rm, 0 }, + { X86::BLCI32rr, X86::BLCI32rm, 0 }, + { X86::BLCI64rr, X86::BLCI64rm, 0 }, + { X86::BLCIC32rr, X86::BLCIC32rm, 0 }, + { X86::BLCIC64rr, X86::BLCIC64rm, 0 }, + { X86::BLCMSK32rr, X86::BLCMSK32rm, 0 }, + { X86::BLCMSK64rr, X86::BLCMSK64rm, 0 }, + { X86::BLCS32rr, X86::BLCS32rm, 0 }, + { X86::BLCS64rr, X86::BLCS64rm, 0 }, + { X86::BLSFILL32rr, X86::BLSFILL32rm, 0 }, + { X86::BLSFILL64rr, X86::BLSFILL64rm, 0 }, + { X86::BLSI32rr, X86::BLSI32rm, 0 }, + { X86::BLSI64rr, X86::BLSI64rm, 0 }, + { X86::BLSIC32rr, X86::BLSIC32rm, 0 }, + { X86::BLSIC64rr, X86::BLSIC64rm, 0 }, + { X86::BLSMSK32rr, X86::BLSMSK32rm, 0 }, + { X86::BLSMSK64rr, X86::BLSMSK64rm, 0 }, + { X86::BLSR32rr, X86::BLSR32rm, 0 }, + { X86::BLSR64rr, X86::BLSR64rm, 0 }, + { X86::BZHI32rr, X86::BZHI32rm, 0 }, + { X86::BZHI64rr, X86::BZHI64rm, 0 }, + { X86::LZCNT16rr, X86::LZCNT16rm, 0 }, + { X86::LZCNT32rr, X86::LZCNT32rm, 0 }, + { X86::LZCNT64rr, X86::LZCNT64rm, 0 }, + { X86::POPCNT16rr, X86::POPCNT16rm, 0 }, + { X86::POPCNT32rr, X86::POPCNT32rm, 0 }, + { X86::POPCNT64rr, X86::POPCNT64rm, 0 }, + { X86::RORX32ri, X86::RORX32mi, 0 }, + { X86::RORX64ri, X86::RORX64mi, 0 }, + { X86::SARX32rr, X86::SARX32rm, 0 }, + { X86::SARX64rr, X86::SARX64rm, 0 }, + { X86::SHRX32rr, X86::SHRX32rm, 0 }, + { X86::SHRX64rr, X86::SHRX64rm, 0 }, + { X86::SHLX32rr, X86::SHLX32rm, 0 }, + { X86::SHLX64rr, X86::SHLX64rm, 0 }, + { X86::T1MSKC32rr, X86::T1MSKC32rm, 0 }, + { X86::T1MSKC64rr, X86::T1MSKC64rm, 0 }, + { X86::TZCNT16rr, X86::TZCNT16rm, 0 }, + { X86::TZCNT32rr, X86::TZCNT32rm, 0 }, + { X86::TZCNT64rr, X86::TZCNT64rm, 0 }, + { X86::TZMSK32rr, X86::TZMSK32rm, 0 }, + { X86::TZMSK64rr, X86::TZMSK64rm, 0 }, + + // AVX-512 foldable instructions + { X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE }, + { X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE }, + { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 }, + { X86::VMOV64toSDZrr, X86::VMOV64toSDZrm, 0 }, + { X86::VMOVDI2PDIZrr, X86::VMOVDI2PDIZrm, 0 }, + { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 }, + { X86::VMOVAPDZrr, X86::VMOVAPDZrm, TB_ALIGN_64 }, + { X86::VMOVAPSZrr, X86::VMOVAPSZrm, TB_ALIGN_64 }, + { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 }, + { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrm, TB_ALIGN_64 }, + { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zrm, 0 }, + { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zrm, 0 }, + { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zrm, 0 }, + { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zrm, 0 }, + { X86::VMOVUPDZrr, X86::VMOVUPDZrm, 0 }, + { X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 }, + { X86::VMOVZPQILo2PQIZrr,X86::VMOVQI2PQIZrm, TB_NO_REVERSE }, + { X86::VPABSBZrr, X86::VPABSBZrm, 0 }, + { X86::VPABSDZrr, X86::VPABSDZrm, 0 }, + { X86::VPABSQZrr, X86::VPABSQZrm, 0 }, + { X86::VPABSWZrr, X86::VPABSWZrm, 0 }, + { X86::VPERMILPDZri, X86::VPERMILPDZmi, 0 }, + { X86::VPERMILPSZri, X86::VPERMILPSZmi, 0 }, + { X86::VPERMPDZri, X86::VPERMPDZmi, 0 }, + { X86::VPERMQZri, X86::VPERMQZmi, 0 }, + { X86::VPMOVSXBDZrr, X86::VPMOVSXBDZrm, 0 }, + { X86::VPMOVSXBQZrr, X86::VPMOVSXBQZrm, TB_NO_REVERSE }, + { X86::VPMOVSXBWZrr, X86::VPMOVSXBWZrm, 0 }, + { X86::VPMOVSXDQZrr, X86::VPMOVSXDQZrm, 0 }, + { X86::VPMOVSXWDZrr, X86::VPMOVSXWDZrm, 0 }, + { X86::VPMOVSXWQZrr, X86::VPMOVSXWQZrm, 0 }, + { X86::VPMOVZXBDZrr, X86::VPMOVZXBDZrm, 0 }, + { X86::VPMOVZXBQZrr, X86::VPMOVZXBQZrm, TB_NO_REVERSE }, + { X86::VPMOVZXBWZrr, X86::VPMOVZXBWZrm, 0 }, + { X86::VPMOVZXDQZrr, X86::VPMOVZXDQZrm, 0 }, + { X86::VPMOVZXWDZrr, X86::VPMOVZXWDZrm, 0 }, + { X86::VPMOVZXWQZrr, X86::VPMOVZXWQZrm, 0 }, + { X86::VPOPCNTDZrr, X86::VPOPCNTDZrm, 0 }, + { X86::VPOPCNTQZrr, X86::VPOPCNTQZrm, 0 }, + { X86::VPSHUFDZri, X86::VPSHUFDZmi, 0 }, + { X86::VPSHUFHWZri, X86::VPSHUFHWZmi, 0 }, + { X86::VPSHUFLWZri, X86::VPSHUFLWZmi, 0 }, + { X86::VPSLLDQZ512rr, X86::VPSLLDQZ512rm, 0 }, + { X86::VPSLLDZri, X86::VPSLLDZmi, 0 }, + { X86::VPSLLQZri, X86::VPSLLQZmi, 0 }, + { X86::VPSLLWZri, X86::VPSLLWZmi, 0 }, + { X86::VPSRADZri, X86::VPSRADZmi, 0 }, + { X86::VPSRAQZri, X86::VPSRAQZmi, 0 }, + { X86::VPSRAWZri, X86::VPSRAWZmi, 0 }, + { X86::VPSRLDQZ512rr, X86::VPSRLDQZ512rm, 0 }, + { X86::VPSRLDZri, X86::VPSRLDZmi, 0 }, + { X86::VPSRLQZri, X86::VPSRLQZmi, 0 }, + { X86::VPSRLWZri, X86::VPSRLWZmi, 0 }, + + // AVX-512 foldable instructions (256-bit versions) + { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE }, + { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE }, + { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 }, + { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 }, + { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 }, + { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256rm, TB_ALIGN_32 }, + { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256rm, 0 }, + { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256rm, 0 }, + { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256rm, 0 }, + { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 }, + { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 }, + { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 }, + { X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0 }, + { X86::VPABSDZ256rr, X86::VPABSDZ256rm, 0 }, + { X86::VPABSQZ256rr, X86::VPABSQZ256rm, 0 }, + { X86::VPABSWZ256rr, X86::VPABSWZ256rm, 0 }, + { X86::VPERMILPDZ256ri, X86::VPERMILPDZ256mi, 0 }, + { X86::VPERMILPSZ256ri, X86::VPERMILPSZ256mi, 0 }, + { X86::VPERMPDZ256ri, X86::VPERMPDZ256mi, 0 }, + { X86::VPERMQZ256ri, X86::VPERMQZ256mi, 0 }, + { X86::VPMOVSXBDZ256rr, X86::VPMOVSXBDZ256rm, TB_NO_REVERSE }, + { X86::VPMOVSXBQZ256rr, X86::VPMOVSXBQZ256rm, TB_NO_REVERSE }, + { X86::VPMOVSXBWZ256rr, X86::VPMOVSXBWZ256rm, 0 }, + { X86::VPMOVSXDQZ256rr, X86::VPMOVSXDQZ256rm, 0 }, + { X86::VPMOVSXWDZ256rr, X86::VPMOVSXWDZ256rm, 0 }, + { X86::VPMOVSXWQZ256rr, X86::VPMOVSXWQZ256rm, TB_NO_REVERSE }, + { X86::VPMOVZXBDZ256rr, X86::VPMOVZXBDZ256rm, TB_NO_REVERSE }, + { X86::VPMOVZXBQZ256rr, X86::VPMOVZXBQZ256rm, TB_NO_REVERSE }, + { X86::VPMOVZXBWZ256rr, X86::VPMOVZXBWZ256rm, 0 }, + { X86::VPMOVZXDQZ256rr, X86::VPMOVZXDQZ256rm, 0 }, + { X86::VPMOVZXWDZ256rr, X86::VPMOVZXWDZ256rm, 0 }, + { X86::VPMOVZXWQZ256rr, X86::VPMOVZXWQZ256rm, TB_NO_REVERSE }, + { X86::VPSHUFDZ256ri, X86::VPSHUFDZ256mi, 0 }, + { X86::VPSHUFHWZ256ri, X86::VPSHUFHWZ256mi, 0 }, + { X86::VPSHUFLWZ256ri, X86::VPSHUFLWZ256mi, 0 }, + { X86::VPSLLDQZ256rr, X86::VPSLLDQZ256rm, 0 }, + { X86::VPSLLDZ256ri, X86::VPSLLDZ256mi, 0 }, + { X86::VPSLLQZ256ri, X86::VPSLLQZ256mi, 0 }, + { X86::VPSLLWZ256ri, X86::VPSLLWZ256mi, 0 }, + { X86::VPSRADZ256ri, X86::VPSRADZ256mi, 0 }, + { X86::VPSRAQZ256ri, X86::VPSRAQZ256mi, 0 }, + { X86::VPSRAWZ256ri, X86::VPSRAWZ256mi, 0 }, + { X86::VPSRLDQZ256rr, X86::VPSRLDQZ256rm, 0 }, + { X86::VPSRLDZ256ri, X86::VPSRLDZ256mi, 0 }, + { X86::VPSRLQZ256ri, X86::VPSRLQZ256mi, 0 }, + { X86::VPSRLWZ256ri, X86::VPSRLWZ256mi, 0 }, + + // AVX-512 foldable instructions (128-bit versions) + { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE }, + { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 }, + { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 }, + { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 }, + { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128rm, TB_ALIGN_16 }, + { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128rm, 0 }, + { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128rm, 0 }, + { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128rm, 0 }, + { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 }, + { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 }, + { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 }, + { X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0 }, + { X86::VPABSDZ128rr, X86::VPABSDZ128rm, 0 }, + { X86::VPABSQZ128rr, X86::VPABSQZ128rm, 0 }, + { X86::VPABSWZ128rr, X86::VPABSWZ128rm, 0 }, + { X86::VPERMILPDZ128ri, X86::VPERMILPDZ128mi, 0 }, + { X86::VPERMILPSZ128ri, X86::VPERMILPSZ128mi, 0 }, + { X86::VPMOVSXBDZ128rr, X86::VPMOVSXBDZ128rm, TB_NO_REVERSE }, + { X86::VPMOVSXBQZ128rr, X86::VPMOVSXBQZ128rm, TB_NO_REVERSE }, + { X86::VPMOVSXBWZ128rr, X86::VPMOVSXBWZ128rm, TB_NO_REVERSE }, + { X86::VPMOVSXDQZ128rr, X86::VPMOVSXDQZ128rm, TB_NO_REVERSE }, + { X86::VPMOVSXWDZ128rr, X86::VPMOVSXWDZ128rm, TB_NO_REVERSE }, + { X86::VPMOVSXWQZ128rr, X86::VPMOVSXWQZ128rm, TB_NO_REVERSE }, + { X86::VPMOVZXBDZ128rr, X86::VPMOVZXBDZ128rm, TB_NO_REVERSE }, + { X86::VPMOVZXBQZ128rr, X86::VPMOVZXBQZ128rm, TB_NO_REVERSE }, + { X86::VPMOVZXBWZ128rr, X86::VPMOVZXBWZ128rm, TB_NO_REVERSE }, + { X86::VPMOVZXDQZ128rr, X86::VPMOVZXDQZ128rm, TB_NO_REVERSE }, + { X86::VPMOVZXWDZ128rr, X86::VPMOVZXWDZ128rm, TB_NO_REVERSE }, + { X86::VPMOVZXWQZ128rr, X86::VPMOVZXWQZ128rm, TB_NO_REVERSE }, + { X86::VPSHUFDZ128ri, X86::VPSHUFDZ128mi, 0 }, + { X86::VPSHUFHWZ128ri, X86::VPSHUFHWZ128mi, 0 }, + { X86::VPSHUFLWZ128ri, X86::VPSHUFLWZ128mi, 0 }, + { X86::VPSLLDQZ128rr, X86::VPSLLDQZ128rm, 0 }, + { X86::VPSLLDZ128ri, X86::VPSLLDZ128mi, 0 }, + { X86::VPSLLQZ128ri, X86::VPSLLQZ128mi, 0 }, + { X86::VPSLLWZ128ri, X86::VPSLLWZ128mi, 0 }, + { X86::VPSRADZ128ri, X86::VPSRADZ128mi, 0 }, + { X86::VPSRAQZ128ri, X86::VPSRAQZ128mi, 0 }, + { X86::VPSRAWZ128ri, X86::VPSRAWZ128mi, 0 }, + { X86::VPSRLDQZ128rr, X86::VPSRLDQZ128rm, 0 }, + { X86::VPSRLDZ128ri, X86::VPSRLDZ128mi, 0 }, + { X86::VPSRLQZ128ri, X86::VPSRLQZ128mi, 0 }, + { X86::VPSRLWZ128ri, X86::VPSRLWZ128mi, 0 }, + + // F16C foldable instructions + { X86::VCVTPH2PSrr, X86::VCVTPH2PSrm, 0 }, + { X86::VCVTPH2PSYrr, X86::VCVTPH2PSYrm, 0 }, + + // AES foldable instructions + { X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 }, + { X86::AESKEYGENASSIST128rr, X86::AESKEYGENASSIST128rm, TB_ALIGN_16 }, + { X86::VAESIMCrr, X86::VAESIMCrm, 0 }, + { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, 0 } + }; + for (X86MemoryFoldTableEntry Entry : MemoryFoldTable1) { AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable, Entry.RegOp, Entry.MemOp, @@ -143,6 +1042,1396 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) Entry.Flags | TB_INDEX_1 | TB_FOLDED_LOAD); } + static const X86MemoryFoldTableEntry MemoryFoldTable2[] = { + { X86::ADC32rr, X86::ADC32rm, 0 }, + { X86::ADC64rr, X86::ADC64rm, 0 }, + { X86::ADD16rr, X86::ADD16rm, 0 }, + { X86::ADD16rr_DB, X86::ADD16rm, TB_NO_REVERSE }, + { X86::ADD32rr, X86::ADD32rm, 0 }, + { X86::ADD32rr_DB, X86::ADD32rm, TB_NO_REVERSE }, + { X86::ADD64rr, X86::ADD64rm, 0 }, + { X86::ADD64rr_DB, X86::ADD64rm, TB_NO_REVERSE }, + { X86::ADD8rr, X86::ADD8rm, 0 }, + { X86::ADDPDrr, X86::ADDPDrm, TB_ALIGN_16 }, + { X86::ADDPSrr, X86::ADDPSrm, TB_ALIGN_16 }, + { X86::ADDSDrr, X86::ADDSDrm, 0 }, + { X86::ADDSDrr_Int, X86::ADDSDrm_Int, TB_NO_REVERSE }, + { X86::ADDSSrr, X86::ADDSSrm, 0 }, + { X86::ADDSSrr_Int, X86::ADDSSrm_Int, TB_NO_REVERSE }, + { X86::ADDSUBPDrr, X86::ADDSUBPDrm, TB_ALIGN_16 }, + { X86::ADDSUBPSrr, X86::ADDSUBPSrm, TB_ALIGN_16 }, + { X86::AND16rr, X86::AND16rm, 0 }, + { X86::AND32rr, X86::AND32rm, 0 }, + { X86::AND64rr, X86::AND64rm, 0 }, + { X86::AND8rr, X86::AND8rm, 0 }, + { X86::ANDNPDrr, X86::ANDNPDrm, TB_ALIGN_16 }, + { X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16 }, + { X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16 }, + { X86::ANDPSrr, X86::ANDPSrm, TB_ALIGN_16 }, + { X86::BLENDPDrri, X86::BLENDPDrmi, TB_ALIGN_16 }, + { X86::BLENDPSrri, X86::BLENDPSrmi, TB_ALIGN_16 }, + { X86::BLENDVPDrr0, X86::BLENDVPDrm0, TB_ALIGN_16 }, + { X86::BLENDVPSrr0, X86::BLENDVPSrm0, TB_ALIGN_16 }, + { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, + { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, + { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, + { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, + { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, + { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, + { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, + { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, + { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, + { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, + { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, + { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, + { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, + { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, + { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, + { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, + { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, + { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, + { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, + { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, + { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, + { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, + { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, + { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, + { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, + { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, + { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, + { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, + { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, + { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, + { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, + { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, + { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, + { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, + { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, + { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, + { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, + { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, + { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, + { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, + { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, + { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, + { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, + { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, + { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, + { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, + { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, + { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, + { X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 }, + { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 }, + { X86::CMPSDrr, X86::CMPSDrm, 0 }, + { X86::CMPSSrr, X86::CMPSSrm, 0 }, + { X86::CRC32r32r32, X86::CRC32r32m32, 0 }, + { X86::CRC32r64r64, X86::CRC32r64m64, 0 }, + { X86::DIVPDrr, X86::DIVPDrm, TB_ALIGN_16 }, + { X86::DIVPSrr, X86::DIVPSrm, TB_ALIGN_16 }, + { X86::DIVSDrr, X86::DIVSDrm, 0 }, + { X86::DIVSDrr_Int, X86::DIVSDrm_Int, TB_NO_REVERSE }, + { X86::DIVSSrr, X86::DIVSSrm, 0 }, + { X86::DIVSSrr_Int, X86::DIVSSrm_Int, TB_NO_REVERSE }, + { X86::DPPDrri, X86::DPPDrmi, TB_ALIGN_16 }, + { X86::DPPSrri, X86::DPPSrmi, TB_ALIGN_16 }, + { X86::HADDPDrr, X86::HADDPDrm, TB_ALIGN_16 }, + { X86::HADDPSrr, X86::HADDPSrm, TB_ALIGN_16 }, + { X86::HSUBPDrr, X86::HSUBPDrm, TB_ALIGN_16 }, + { X86::HSUBPSrr, X86::HSUBPSrm, TB_ALIGN_16 }, + { X86::IMUL16rr, X86::IMUL16rm, 0 }, + { X86::IMUL32rr, X86::IMUL32rm, 0 }, + { X86::IMUL64rr, X86::IMUL64rm, 0 }, + { X86::Int_CMPSDrr, X86::Int_CMPSDrm, TB_NO_REVERSE }, + { X86::Int_CMPSSrr, X86::Int_CMPSSrm, TB_NO_REVERSE }, + { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, TB_NO_REVERSE }, + { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, + { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, + { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, + { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, + { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, TB_NO_REVERSE }, + { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 }, + { X86::MAXCPDrr, X86::MAXCPDrm, TB_ALIGN_16 }, + { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 }, + { X86::MAXCPSrr, X86::MAXCPSrm, TB_ALIGN_16 }, + { X86::MAXSDrr, X86::MAXSDrm, 0 }, + { X86::MAXCSDrr, X86::MAXCSDrm, 0 }, + { X86::MAXSDrr_Int, X86::MAXSDrm_Int, TB_NO_REVERSE }, + { X86::MAXSSrr, X86::MAXSSrm, 0 }, + { X86::MAXCSSrr, X86::MAXCSSrm, 0 }, + { X86::MAXSSrr_Int, X86::MAXSSrm_Int, TB_NO_REVERSE }, + { X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 }, + { X86::MINCPDrr, X86::MINCPDrm, TB_ALIGN_16 }, + { X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 }, + { X86::MINCPSrr, X86::MINCPSrm, TB_ALIGN_16 }, + { X86::MINSDrr, X86::MINSDrm, 0 }, + { X86::MINCSDrr, X86::MINCSDrm, 0 }, + { X86::MINSDrr_Int, X86::MINSDrm_Int, TB_NO_REVERSE }, + { X86::MINSSrr, X86::MINSSrm, 0 }, + { X86::MINCSSrr, X86::MINCSSrm, 0 }, + { X86::MINSSrr_Int, X86::MINSSrm_Int, TB_NO_REVERSE }, + { X86::MOVLHPSrr, X86::MOVHPSrm, TB_NO_REVERSE }, + { X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 }, + { X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 }, + { X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 }, + { X86::MULSDrr, X86::MULSDrm, 0 }, + { X86::MULSDrr_Int, X86::MULSDrm_Int, TB_NO_REVERSE }, + { X86::MULSSrr, X86::MULSSrm, 0 }, + { X86::MULSSrr_Int, X86::MULSSrm_Int, TB_NO_REVERSE }, + { X86::OR16rr, X86::OR16rm, 0 }, + { X86::OR32rr, X86::OR32rm, 0 }, + { X86::OR64rr, X86::OR64rm, 0 }, + { X86::OR8rr, X86::OR8rm, 0 }, + { X86::ORPDrr, X86::ORPDrm, TB_ALIGN_16 }, + { X86::ORPSrr, X86::ORPSrm, TB_ALIGN_16 }, + { X86::PACKSSDWrr, X86::PACKSSDWrm, TB_ALIGN_16 }, + { X86::PACKSSWBrr, X86::PACKSSWBrm, TB_ALIGN_16 }, + { X86::PACKUSDWrr, X86::PACKUSDWrm, TB_ALIGN_16 }, + { X86::PACKUSWBrr, X86::PACKUSWBrm, TB_ALIGN_16 }, + { X86::PADDBrr, X86::PADDBrm, TB_ALIGN_16 }, + { X86::PADDDrr, X86::PADDDrm, TB_ALIGN_16 }, + { X86::PADDQrr, X86::PADDQrm, TB_ALIGN_16 }, + { X86::PADDSBrr, X86::PADDSBrm, TB_ALIGN_16 }, + { X86::PADDSWrr, X86::PADDSWrm, TB_ALIGN_16 }, + { X86::PADDUSBrr, X86::PADDUSBrm, TB_ALIGN_16 }, + { X86::PADDUSWrr, X86::PADDUSWrm, TB_ALIGN_16 }, + { X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 }, + { X86::PALIGNRrri, X86::PALIGNRrmi, TB_ALIGN_16 }, + { X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 }, + { X86::PANDrr, X86::PANDrm, TB_ALIGN_16 }, + { X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 }, + { X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 }, + { X86::PBLENDVBrr0, X86::PBLENDVBrm0, TB_ALIGN_16 }, + { X86::PBLENDWrri, X86::PBLENDWrmi, TB_ALIGN_16 }, + { X86::PCLMULQDQrr, X86::PCLMULQDQrm, TB_ALIGN_16 }, + { X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 }, + { X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 }, + { X86::PCMPEQQrr, X86::PCMPEQQrm, TB_ALIGN_16 }, + { X86::PCMPEQWrr, X86::PCMPEQWrm, TB_ALIGN_16 }, + { X86::PCMPGTBrr, X86::PCMPGTBrm, TB_ALIGN_16 }, + { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 }, + { X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 }, + { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 }, + { X86::PHADDDrr, X86::PHADDDrm, TB_ALIGN_16 }, + { X86::PHADDWrr, X86::PHADDWrm, TB_ALIGN_16 }, + { X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 }, + { X86::PHSUBDrr, X86::PHSUBDrm, TB_ALIGN_16 }, + { X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 }, + { X86::PHSUBWrr, X86::PHSUBWrm, TB_ALIGN_16 }, + { X86::PINSRBrr, X86::PINSRBrm, 0 }, + { X86::PINSRDrr, X86::PINSRDrm, 0 }, + { X86::PINSRQrr, X86::PINSRQrm, 0 }, + { X86::PINSRWrri, X86::PINSRWrmi, 0 }, + { X86::PMADDUBSWrr, X86::PMADDUBSWrm, TB_ALIGN_16 }, + { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 }, + { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 }, + { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 }, + { X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 }, + { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 }, + { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 }, + { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 }, + { X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 }, + { X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 }, + { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 }, + { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 }, + { X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 }, + { X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 }, + { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 }, + { X86::PMULHRSWrr, X86::PMULHRSWrm, TB_ALIGN_16 }, + { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 }, + { X86::PMULHWrr, X86::PMULHWrm, TB_ALIGN_16 }, + { X86::PMULLDrr, X86::PMULLDrm, TB_ALIGN_16 }, + { X86::PMULLWrr, X86::PMULLWrm, TB_ALIGN_16 }, + { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 }, + { X86::PORrr, X86::PORrm, TB_ALIGN_16 }, + { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 }, + { X86::PSHUFBrr, X86::PSHUFBrm, TB_ALIGN_16 }, + { X86::PSIGNBrr128, X86::PSIGNBrm128, TB_ALIGN_16 }, + { X86::PSIGNWrr128, X86::PSIGNWrm128, TB_ALIGN_16 }, + { X86::PSIGNDrr128, X86::PSIGNDrm128, TB_ALIGN_16 }, + { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 }, + { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 }, + { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 }, + { X86::PSRADrr, X86::PSRADrm, TB_ALIGN_16 }, + { X86::PSRAWrr, X86::PSRAWrm, TB_ALIGN_16 }, + { X86::PSRLDrr, X86::PSRLDrm, TB_ALIGN_16 }, + { X86::PSRLQrr, X86::PSRLQrm, TB_ALIGN_16 }, + { X86::PSRLWrr, X86::PSRLWrm, TB_ALIGN_16 }, + { X86::PSUBBrr, X86::PSUBBrm, TB_ALIGN_16 }, + { X86::PSUBDrr, X86::PSUBDrm, TB_ALIGN_16 }, + { X86::PSUBQrr, X86::PSUBQrm, TB_ALIGN_16 }, + { X86::PSUBSBrr, X86::PSUBSBrm, TB_ALIGN_16 }, + { X86::PSUBSWrr, X86::PSUBSWrm, TB_ALIGN_16 }, + { X86::PSUBUSBrr, X86::PSUBUSBrm, TB_ALIGN_16 }, + { X86::PSUBUSWrr, X86::PSUBUSWrm, TB_ALIGN_16 }, + { X86::PSUBWrr, X86::PSUBWrm, TB_ALIGN_16 }, + { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, TB_ALIGN_16 }, + { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, TB_ALIGN_16 }, + { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, TB_ALIGN_16 }, + { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, TB_ALIGN_16 }, + { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, TB_ALIGN_16 }, + { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, TB_ALIGN_16 }, + { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 }, + { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 }, + { X86::PXORrr, X86::PXORrm, TB_ALIGN_16 }, + { X86::ROUNDSDr_Int, X86::ROUNDSDm_Int, TB_NO_REVERSE }, + { X86::ROUNDSSr_Int, X86::ROUNDSSm_Int, TB_NO_REVERSE }, + { X86::SBB32rr, X86::SBB32rm, 0 }, + { X86::SBB64rr, X86::SBB64rm, 0 }, + { X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 }, + { X86::SHUFPSrri, X86::SHUFPSrmi, TB_ALIGN_16 }, + { X86::SUB16rr, X86::SUB16rm, 0 }, + { X86::SUB32rr, X86::SUB32rm, 0 }, + { X86::SUB64rr, X86::SUB64rm, 0 }, + { X86::SUB8rr, X86::SUB8rm, 0 }, + { X86::SUBPDrr, X86::SUBPDrm, TB_ALIGN_16 }, + { X86::SUBPSrr, X86::SUBPSrm, TB_ALIGN_16 }, + { X86::SUBSDrr, X86::SUBSDrm, 0 }, + { X86::SUBSDrr_Int, X86::SUBSDrm_Int, TB_NO_REVERSE }, + { X86::SUBSSrr, X86::SUBSSrm, 0 }, + { X86::SUBSSrr_Int, X86::SUBSSrm_Int, TB_NO_REVERSE }, + // FIXME: TEST*rr -> swapped operand of TEST*mr. + { X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 }, + { X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 }, + { X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 }, + { X86::UNPCKLPSrr, X86::UNPCKLPSrm, TB_ALIGN_16 }, + { X86::XOR16rr, X86::XOR16rm, 0 }, + { X86::XOR32rr, X86::XOR32rm, 0 }, + { X86::XOR64rr, X86::XOR64rm, 0 }, + { X86::XOR8rr, X86::XOR8rm, 0 }, + { X86::XORPDrr, X86::XORPDrm, TB_ALIGN_16 }, + { X86::XORPSrr, X86::XORPSrm, TB_ALIGN_16 }, + + // MMX version of foldable instructions + { X86::MMX_CVTPI2PSirr, X86::MMX_CVTPI2PSirm, 0 }, + { X86::MMX_PACKSSDWirr, X86::MMX_PACKSSDWirm, 0 }, + { X86::MMX_PACKSSWBirr, X86::MMX_PACKSSWBirm, 0 }, + { X86::MMX_PACKUSWBirr, X86::MMX_PACKUSWBirm, 0 }, + { X86::MMX_PADDBirr, X86::MMX_PADDBirm, 0 }, + { X86::MMX_PADDDirr, X86::MMX_PADDDirm, 0 }, + { X86::MMX_PADDQirr, X86::MMX_PADDQirm, 0 }, + { X86::MMX_PADDSBirr, X86::MMX_PADDSBirm, 0 }, + { X86::MMX_PADDSWirr, X86::MMX_PADDSWirm, 0 }, + { X86::MMX_PADDUSBirr, X86::MMX_PADDUSBirm, 0 }, + { X86::MMX_PADDUSWirr, X86::MMX_PADDUSWirm, 0 }, + { X86::MMX_PADDWirr, X86::MMX_PADDWirm, 0 }, + { X86::MMX_PALIGNR64irr, X86::MMX_PALIGNR64irm, 0 }, + { X86::MMX_PANDNirr, X86::MMX_PANDNirm, 0 }, + { X86::MMX_PANDirr, X86::MMX_PANDirm, 0 }, + { X86::MMX_PAVGBirr, X86::MMX_PAVGBirm, 0 }, + { X86::MMX_PAVGWirr, X86::MMX_PAVGWirm, 0 }, + { X86::MMX_PCMPEQBirr, X86::MMX_PCMPEQBirm, 0 }, + { X86::MMX_PCMPEQDirr, X86::MMX_PCMPEQDirm, 0 }, + { X86::MMX_PCMPEQWirr, X86::MMX_PCMPEQWirm, 0 }, + { X86::MMX_PCMPGTBirr, X86::MMX_PCMPGTBirm, 0 }, + { X86::MMX_PCMPGTDirr, X86::MMX_PCMPGTDirm, 0 }, + { X86::MMX_PCMPGTWirr, X86::MMX_PCMPGTWirm, 0 }, + { X86::MMX_PHADDSWrr64, X86::MMX_PHADDSWrm64, 0 }, + { X86::MMX_PHADDWrr64, X86::MMX_PHADDWrm64, 0 }, + { X86::MMX_PHADDrr64, X86::MMX_PHADDrm64, 0 }, + { X86::MMX_PHSUBDrr64, X86::MMX_PHSUBDrm64, 0 }, + { X86::MMX_PHSUBSWrr64, X86::MMX_PHSUBSWrm64, 0 }, + { X86::MMX_PHSUBWrr64, X86::MMX_PHSUBWrm64, 0 }, + { X86::MMX_PINSRWirri, X86::MMX_PINSRWirmi, 0 }, + { X86::MMX_PMADDUBSWrr64, X86::MMX_PMADDUBSWrm64, 0 }, + { X86::MMX_PMADDWDirr, X86::MMX_PMADDWDirm, 0 }, + { X86::MMX_PMAXSWirr, X86::MMX_PMAXSWirm, 0 }, + { X86::MMX_PMAXUBirr, X86::MMX_PMAXUBirm, 0 }, + { X86::MMX_PMINSWirr, X86::MMX_PMINSWirm, 0 }, + { X86::MMX_PMINUBirr, X86::MMX_PMINUBirm, 0 }, + { X86::MMX_PMULHRSWrr64, X86::MMX_PMULHRSWrm64, 0 }, + { X86::MMX_PMULHUWirr, X86::MMX_PMULHUWirm, 0 }, + { X86::MMX_PMULHWirr, X86::MMX_PMULHWirm, 0 }, + { X86::MMX_PMULLWirr, X86::MMX_PMULLWirm, 0 }, + { X86::MMX_PMULUDQirr, X86::MMX_PMULUDQirm, 0 }, + { X86::MMX_PORirr, X86::MMX_PORirm, 0 }, + { X86::MMX_PSADBWirr, X86::MMX_PSADBWirm, 0 }, + { X86::MMX_PSHUFBrr64, X86::MMX_PSHUFBrm64, 0 }, + { X86::MMX_PSIGNBrr64, X86::MMX_PSIGNBrm64, 0 }, + { X86::MMX_PSIGNDrr64, X86::MMX_PSIGNDrm64, 0 }, + { X86::MMX_PSIGNWrr64, X86::MMX_PSIGNWrm64, 0 }, + { X86::MMX_PSLLDrr, X86::MMX_PSLLDrm, 0 }, + { X86::MMX_PSLLQrr, X86::MMX_PSLLQrm, 0 }, + { X86::MMX_PSLLWrr, X86::MMX_PSLLWrm, 0 }, + { X86::MMX_PSRADrr, X86::MMX_PSRADrm, 0 }, + { X86::MMX_PSRAWrr, X86::MMX_PSRAWrm, 0 }, + { X86::MMX_PSRLDrr, X86::MMX_PSRLDrm, 0 }, + { X86::MMX_PSRLQrr, X86::MMX_PSRLQrm, 0 }, + { X86::MMX_PSRLWrr, X86::MMX_PSRLWrm, 0 }, + { X86::MMX_PSUBBirr, X86::MMX_PSUBBirm, 0 }, + { X86::MMX_PSUBDirr, X86::MMX_PSUBDirm, 0 }, + { X86::MMX_PSUBQirr, X86::MMX_PSUBQirm, 0 }, + { X86::MMX_PSUBSBirr, X86::MMX_PSUBSBirm, 0 }, + { X86::MMX_PSUBSWirr, X86::MMX_PSUBSWirm, 0 }, + { X86::MMX_PSUBUSBirr, X86::MMX_PSUBUSBirm, 0 }, + { X86::MMX_PSUBUSWirr, X86::MMX_PSUBUSWirm, 0 }, + { X86::MMX_PSUBWirr, X86::MMX_PSUBWirm, 0 }, + { X86::MMX_PUNPCKHBWirr, X86::MMX_PUNPCKHBWirm, 0 }, + { X86::MMX_PUNPCKHDQirr, X86::MMX_PUNPCKHDQirm, 0 }, + { X86::MMX_PUNPCKHWDirr, X86::MMX_PUNPCKHWDirm, 0 }, + { X86::MMX_PUNPCKLBWirr, X86::MMX_PUNPCKLBWirm, 0 }, + { X86::MMX_PUNPCKLDQirr, X86::MMX_PUNPCKLDQirm, 0 }, + { X86::MMX_PUNPCKLWDirr, X86::MMX_PUNPCKLWDirm, 0 }, + { X86::MMX_PXORirr, X86::MMX_PXORirm, 0 }, + + // 3DNow! version of foldable instructions + { X86::PAVGUSBrr, X86::PAVGUSBrm, 0 }, + { X86::PFACCrr, X86::PFACCrm, 0 }, + { X86::PFADDrr, X86::PFADDrm, 0 }, + { X86::PFCMPEQrr, X86::PFCMPEQrm, 0 }, + { X86::PFCMPGErr, X86::PFCMPGErm, 0 }, + { X86::PFCMPGTrr, X86::PFCMPGTrm, 0 }, + { X86::PFMAXrr, X86::PFMAXrm, 0 }, + { X86::PFMINrr, X86::PFMINrm, 0 }, + { X86::PFMULrr, X86::PFMULrm, 0 }, + { X86::PFNACCrr, X86::PFNACCrm, 0 }, + { X86::PFPNACCrr, X86::PFPNACCrm, 0 }, + { X86::PFRCPIT1rr, X86::PFRCPIT1rm, 0 }, + { X86::PFRCPIT2rr, X86::PFRCPIT2rm, 0 }, + { X86::PFRSQIT1rr, X86::PFRSQIT1rm, 0 }, + { X86::PFSUBrr, X86::PFSUBrm, 0 }, + { X86::PFSUBRrr, X86::PFSUBRrm, 0 }, + { X86::PMULHRWrr, X86::PMULHRWrm, 0 }, + + // AVX 128-bit versions of foldable instructions + { X86::VCVTSI2SD64rr, X86::VCVTSI2SD64rm, 0 }, + { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm, 0 }, + { X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 }, + { X86::Int_VCVTSI2SDrr, X86::Int_VCVTSI2SDrm, 0 }, + { X86::VCVTSI2SS64rr, X86::VCVTSI2SS64rm, 0 }, + { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 }, + { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 }, + { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 }, + { X86::VADDPDrr, X86::VADDPDrm, 0 }, + { X86::VADDPSrr, X86::VADDPSrm, 0 }, + { X86::VADDSDrr, X86::VADDSDrm, 0 }, + { X86::VADDSDrr_Int, X86::VADDSDrm_Int, TB_NO_REVERSE }, + { X86::VADDSSrr, X86::VADDSSrm, 0 }, + { X86::VADDSSrr_Int, X86::VADDSSrm_Int, TB_NO_REVERSE }, + { X86::VADDSUBPDrr, X86::VADDSUBPDrm, 0 }, + { X86::VADDSUBPSrr, X86::VADDSUBPSrm, 0 }, + { X86::VANDNPDrr, X86::VANDNPDrm, 0 }, + { X86::VANDNPSrr, X86::VANDNPSrm, 0 }, + { X86::VANDPDrr, X86::VANDPDrm, 0 }, + { X86::VANDPSrr, X86::VANDPSrm, 0 }, + { X86::VBLENDPDrri, X86::VBLENDPDrmi, 0 }, + { X86::VBLENDPSrri, X86::VBLENDPSrmi, 0 }, + { X86::VBLENDVPDrr, X86::VBLENDVPDrm, 0 }, + { X86::VBLENDVPSrr, X86::VBLENDVPSrm, 0 }, + { X86::VCMPPDrri, X86::VCMPPDrmi, 0 }, + { X86::VCMPPSrri, X86::VCMPPSrmi, 0 }, + { X86::VCMPSDrr, X86::VCMPSDrm, 0 }, + { X86::VCMPSSrr, X86::VCMPSSrm, 0 }, + { X86::VDIVPDrr, X86::VDIVPDrm, 0 }, + { X86::VDIVPSrr, X86::VDIVPSrm, 0 }, + { X86::VDIVSDrr, X86::VDIVSDrm, 0 }, + { X86::VDIVSDrr_Int, X86::VDIVSDrm_Int, TB_NO_REVERSE }, + { X86::VDIVSSrr, X86::VDIVSSrm, 0 }, + { X86::VDIVSSrr_Int, X86::VDIVSSrm_Int, TB_NO_REVERSE }, + { X86::VDPPDrri, X86::VDPPDrmi, 0 }, + { X86::VDPPSrri, X86::VDPPSrmi, 0 }, + { X86::VHADDPDrr, X86::VHADDPDrm, 0 }, + { X86::VHADDPSrr, X86::VHADDPSrm, 0 }, + { X86::VHSUBPDrr, X86::VHSUBPDrm, 0 }, + { X86::VHSUBPSrr, X86::VHSUBPSrm, 0 }, + { X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, TB_NO_REVERSE }, + { X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, TB_NO_REVERSE }, + { X86::VMAXCPDrr, X86::VMAXCPDrm, 0 }, + { X86::VMAXCPSrr, X86::VMAXCPSrm, 0 }, + { X86::VMAXCSDrr, X86::VMAXCSDrm, 0 }, + { X86::VMAXCSSrr, X86::VMAXCSSrm, 0 }, + { X86::VMAXPDrr, X86::VMAXPDrm, 0 }, + { X86::VMAXPSrr, X86::VMAXPSrm, 0 }, + { X86::VMAXSDrr, X86::VMAXSDrm, 0 }, + { X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, TB_NO_REVERSE }, + { X86::VMAXSSrr, X86::VMAXSSrm, 0 }, + { X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, TB_NO_REVERSE }, + { X86::VMINCPDrr, X86::VMINCPDrm, 0 }, + { X86::VMINCPSrr, X86::VMINCPSrm, 0 }, + { X86::VMINCSDrr, X86::VMINCSDrm, 0 }, + { X86::VMINCSSrr, X86::VMINCSSrm, 0 }, + { X86::VMINPDrr, X86::VMINPDrm, 0 }, + { X86::VMINPSrr, X86::VMINPSrm, 0 }, + { X86::VMINSDrr, X86::VMINSDrm, 0 }, + { X86::VMINSDrr_Int, X86::VMINSDrm_Int, TB_NO_REVERSE }, + { X86::VMINSSrr, X86::VMINSSrm, 0 }, + { X86::VMINSSrr_Int, X86::VMINSSrm_Int, TB_NO_REVERSE }, + { X86::VMOVLHPSrr, X86::VMOVHPSrm, TB_NO_REVERSE }, + { X86::VMPSADBWrri, X86::VMPSADBWrmi, 0 }, + { X86::VMULPDrr, X86::VMULPDrm, 0 }, + { X86::VMULPSrr, X86::VMULPSrm, 0 }, + { X86::VMULSDrr, X86::VMULSDrm, 0 }, + { X86::VMULSDrr_Int, X86::VMULSDrm_Int, TB_NO_REVERSE }, + { X86::VMULSSrr, X86::VMULSSrm, 0 }, + { X86::VMULSSrr_Int, X86::VMULSSrm_Int, TB_NO_REVERSE }, + { X86::VORPDrr, X86::VORPDrm, 0 }, + { X86::VORPSrr, X86::VORPSrm, 0 }, + { X86::VPACKSSDWrr, X86::VPACKSSDWrm, 0 }, + { X86::VPACKSSWBrr, X86::VPACKSSWBrm, 0 }, + { X86::VPACKUSDWrr, X86::VPACKUSDWrm, 0 }, + { X86::VPACKUSWBrr, X86::VPACKUSWBrm, 0 }, + { X86::VPADDBrr, X86::VPADDBrm, 0 }, + { X86::VPADDDrr, X86::VPADDDrm, 0 }, + { X86::VPADDQrr, X86::VPADDQrm, 0 }, + { X86::VPADDSBrr, X86::VPADDSBrm, 0 }, + { X86::VPADDSWrr, X86::VPADDSWrm, 0 }, + { X86::VPADDUSBrr, X86::VPADDUSBrm, 0 }, + { X86::VPADDUSWrr, X86::VPADDUSWrm, 0 }, + { X86::VPADDWrr, X86::VPADDWrm, 0 }, + { X86::VPALIGNRrri, X86::VPALIGNRrmi, 0 }, + { X86::VPANDNrr, X86::VPANDNrm, 0 }, + { X86::VPANDrr, X86::VPANDrm, 0 }, + { X86::VPAVGBrr, X86::VPAVGBrm, 0 }, + { X86::VPAVGWrr, X86::VPAVGWrm, 0 }, + { X86::VPBLENDVBrr, X86::VPBLENDVBrm, 0 }, + { X86::VPBLENDWrri, X86::VPBLENDWrmi, 0 }, + { X86::VPCLMULQDQrr, X86::VPCLMULQDQrm, 0 }, + { X86::VPCMPEQBrr, X86::VPCMPEQBrm, 0 }, + { X86::VPCMPEQDrr, X86::VPCMPEQDrm, 0 }, + { X86::VPCMPEQQrr, X86::VPCMPEQQrm, 0 }, + { X86::VPCMPEQWrr, X86::VPCMPEQWrm, 0 }, + { X86::VPCMPGTBrr, X86::VPCMPGTBrm, 0 }, + { X86::VPCMPGTDrr, X86::VPCMPGTDrm, 0 }, + { X86::VPCMPGTQrr, X86::VPCMPGTQrm, 0 }, + { X86::VPCMPGTWrr, X86::VPCMPGTWrm, 0 }, + { X86::VPHADDDrr, X86::VPHADDDrm, 0 }, + { X86::VPHADDSWrr128, X86::VPHADDSWrm128, 0 }, + { X86::VPHADDWrr, X86::VPHADDWrm, 0 }, + { X86::VPHSUBDrr, X86::VPHSUBDrm, 0 }, + { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, 0 }, + { X86::VPHSUBWrr, X86::VPHSUBWrm, 0 }, + { X86::VPERMILPDrr, X86::VPERMILPDrm, 0 }, + { X86::VPERMILPSrr, X86::VPERMILPSrm, 0 }, + { X86::VPINSRBrr, X86::VPINSRBrm, 0 }, + { X86::VPINSRDrr, X86::VPINSRDrm, 0 }, + { X86::VPINSRQrr, X86::VPINSRQrm, 0 }, + { X86::VPINSRWrri, X86::VPINSRWrmi, 0 }, + { X86::VPMADDUBSWrr, X86::VPMADDUBSWrm, 0 }, + { X86::VPMADDWDrr, X86::VPMADDWDrm, 0 }, + { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 }, + { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 }, + { X86::VPMAXSWrr, X86::VPMAXSWrm, 0 }, + { X86::VPMAXUBrr, X86::VPMAXUBrm, 0 }, + { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 }, + { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 }, + { X86::VPMINSBrr, X86::VPMINSBrm, 0 }, + { X86::VPMINSDrr, X86::VPMINSDrm, 0 }, + { X86::VPMINSWrr, X86::VPMINSWrm, 0 }, + { X86::VPMINUBrr, X86::VPMINUBrm, 0 }, + { X86::VPMINUDrr, X86::VPMINUDrm, 0 }, + { X86::VPMINUWrr, X86::VPMINUWrm, 0 }, + { X86::VPMULDQrr, X86::VPMULDQrm, 0 }, + { X86::VPMULHRSWrr, X86::VPMULHRSWrm, 0 }, + { X86::VPMULHUWrr, X86::VPMULHUWrm, 0 }, + { X86::VPMULHWrr, X86::VPMULHWrm, 0 }, + { X86::VPMULLDrr, X86::VPMULLDrm, 0 }, + { X86::VPMULLWrr, X86::VPMULLWrm, 0 }, + { X86::VPMULUDQrr, X86::VPMULUDQrm, 0 }, + { X86::VPORrr, X86::VPORrm, 0 }, + { X86::VPSADBWrr, X86::VPSADBWrm, 0 }, + { X86::VPSHUFBrr, X86::VPSHUFBrm, 0 }, + { X86::VPSIGNBrr128, X86::VPSIGNBrm128, 0 }, + { X86::VPSIGNWrr128, X86::VPSIGNWrm128, 0 }, + { X86::VPSIGNDrr128, X86::VPSIGNDrm128, 0 }, + { X86::VPSLLDrr, X86::VPSLLDrm, 0 }, + { X86::VPSLLQrr, X86::VPSLLQrm, 0 }, + { X86::VPSLLWrr, X86::VPSLLWrm, 0 }, + { X86::VPSRADrr, X86::VPSRADrm, 0 }, + { X86::VPSRAWrr, X86::VPSRAWrm, 0 }, + { X86::VPSRLDrr, X86::VPSRLDrm, 0 }, + { X86::VPSRLQrr, X86::VPSRLQrm, 0 }, + { X86::VPSRLWrr, X86::VPSRLWrm, 0 }, + { X86::VPSUBBrr, X86::VPSUBBrm, 0 }, + { X86::VPSUBDrr, X86::VPSUBDrm, 0 }, + { X86::VPSUBQrr, X86::VPSUBQrm, 0 }, + { X86::VPSUBSBrr, X86::VPSUBSBrm, 0 }, + { X86::VPSUBSWrr, X86::VPSUBSWrm, 0 }, + { X86::VPSUBUSBrr, X86::VPSUBUSBrm, 0 }, + { X86::VPSUBUSWrr, X86::VPSUBUSWrm, 0 }, + { X86::VPSUBWrr, X86::VPSUBWrm, 0 }, + { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, 0 }, + { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, 0 }, + { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, 0 }, + { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, 0 }, + { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, 0 }, + { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, 0 }, + { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, 0 }, + { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, 0 }, + { X86::VPXORrr, X86::VPXORrm, 0 }, + { X86::VRCPSSr, X86::VRCPSSm, 0 }, + { X86::VRCPSSr_Int, X86::VRCPSSm_Int, TB_NO_REVERSE }, + { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 }, + { X86::VRSQRTSSr_Int, X86::VRSQRTSSm_Int, TB_NO_REVERSE }, + { X86::VROUNDSDr, X86::VROUNDSDm, 0 }, + { X86::VROUNDSDr_Int, X86::VROUNDSDm_Int, TB_NO_REVERSE }, + { X86::VROUNDSSr, X86::VROUNDSSm, 0 }, + { X86::VROUNDSSr_Int, X86::VROUNDSSm_Int, TB_NO_REVERSE }, + { X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 }, + { X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 }, + { X86::VSQRTSDr, X86::VSQRTSDm, 0 }, + { X86::VSQRTSDr_Int, X86::VSQRTSDm_Int, TB_NO_REVERSE }, + { X86::VSQRTSSr, X86::VSQRTSSm, 0 }, + { X86::VSQRTSSr_Int, X86::VSQRTSSm_Int, TB_NO_REVERSE }, + { X86::VSUBPDrr, X86::VSUBPDrm, 0 }, + { X86::VSUBPSrr, X86::VSUBPSrm, 0 }, + { X86::VSUBSDrr, X86::VSUBSDrm, 0 }, + { X86::VSUBSDrr_Int, X86::VSUBSDrm_Int, TB_NO_REVERSE }, + { X86::VSUBSSrr, X86::VSUBSSrm, 0 }, + { X86::VSUBSSrr_Int, X86::VSUBSSrm_Int, TB_NO_REVERSE }, + { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, 0 }, + { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, 0 }, + { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, 0 }, + { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, 0 }, + { X86::VXORPDrr, X86::VXORPDrm, 0 }, + { X86::VXORPSrr, X86::VXORPSrm, 0 }, + + // AVX 256-bit foldable instructions + { X86::VADDPDYrr, X86::VADDPDYrm, 0 }, + { X86::VADDPSYrr, X86::VADDPSYrm, 0 }, + { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, 0 }, + { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, 0 }, + { X86::VANDNPDYrr, X86::VANDNPDYrm, 0 }, + { X86::VANDNPSYrr, X86::VANDNPSYrm, 0 }, + { X86::VANDPDYrr, X86::VANDPDYrm, 0 }, + { X86::VANDPSYrr, X86::VANDPSYrm, 0 }, + { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, 0 }, + { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, 0 }, + { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, 0 }, + { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, 0 }, + { X86::VCMPPDYrri, X86::VCMPPDYrmi, 0 }, + { X86::VCMPPSYrri, X86::VCMPPSYrmi, 0 }, + { X86::VDIVPDYrr, X86::VDIVPDYrm, 0 }, + { X86::VDIVPSYrr, X86::VDIVPSYrm, 0 }, + { X86::VDPPSYrri, X86::VDPPSYrmi, 0 }, + { X86::VHADDPDYrr, X86::VHADDPDYrm, 0 }, + { X86::VHADDPSYrr, X86::VHADDPSYrm, 0 }, + { X86::VHSUBPDYrr, X86::VHSUBPDYrm, 0 }, + { X86::VHSUBPSYrr, X86::VHSUBPSYrm, 0 }, + { X86::VINSERTF128rr, X86::VINSERTF128rm, 0 }, + { X86::VMAXCPDYrr, X86::VMAXCPDYrm, 0 }, + { X86::VMAXCPSYrr, X86::VMAXCPSYrm, 0 }, + { X86::VMAXPDYrr, X86::VMAXPDYrm, 0 }, + { X86::VMAXPSYrr, X86::VMAXPSYrm, 0 }, + { X86::VMINCPDYrr, X86::VMINCPDYrm, 0 }, + { X86::VMINCPSYrr, X86::VMINCPSYrm, 0 }, + { X86::VMINPDYrr, X86::VMINPDYrm, 0 }, + { X86::VMINPSYrr, X86::VMINPSYrm, 0 }, + { X86::VMULPDYrr, X86::VMULPDYrm, 0 }, + { X86::VMULPSYrr, X86::VMULPSYrm, 0 }, + { X86::VORPDYrr, X86::VORPDYrm, 0 }, + { X86::VORPSYrr, X86::VORPSYrm, 0 }, + { X86::VPERM2F128rr, X86::VPERM2F128rm, 0 }, + { X86::VPERMILPDYrr, X86::VPERMILPDYrm, 0 }, + { X86::VPERMILPSYrr, X86::VPERMILPSYrm, 0 }, + { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, 0 }, + { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, 0 }, + { X86::VSUBPDYrr, X86::VSUBPDYrm, 0 }, + { X86::VSUBPSYrr, X86::VSUBPSYrm, 0 }, + { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, 0 }, + { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, 0 }, + { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, 0 }, + { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, 0 }, + { X86::VXORPDYrr, X86::VXORPDYrm, 0 }, + { X86::VXORPSYrr, X86::VXORPSYrm, 0 }, + + // AVX2 foldable instructions + { X86::VINSERTI128rr, X86::VINSERTI128rm, 0 }, + { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, 0 }, + { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, 0 }, + { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, 0 }, + { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, 0 }, + { X86::VPADDBYrr, X86::VPADDBYrm, 0 }, + { X86::VPADDDYrr, X86::VPADDDYrm, 0 }, + { X86::VPADDQYrr, X86::VPADDQYrm, 0 }, + { X86::VPADDSBYrr, X86::VPADDSBYrm, 0 }, + { X86::VPADDSWYrr, X86::VPADDSWYrm, 0 }, + { X86::VPADDUSBYrr, X86::VPADDUSBYrm, 0 }, + { X86::VPADDUSWYrr, X86::VPADDUSWYrm, 0 }, + { X86::VPADDWYrr, X86::VPADDWYrm, 0 }, + { X86::VPALIGNRYrri, X86::VPALIGNRYrmi, 0 }, + { X86::VPANDNYrr, X86::VPANDNYrm, 0 }, + { X86::VPANDYrr, X86::VPANDYrm, 0 }, + { X86::VPAVGBYrr, X86::VPAVGBYrm, 0 }, + { X86::VPAVGWYrr, X86::VPAVGWYrm, 0 }, + { X86::VPBLENDDrri, X86::VPBLENDDrmi, 0 }, + { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, 0 }, + { X86::VPBLENDVBYrr, X86::VPBLENDVBYrm, 0 }, + { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, 0 }, + { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, 0 }, + { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, 0 }, + { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, 0 }, + { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, 0 }, + { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, 0 }, + { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, 0 }, + { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, 0 }, + { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, 0 }, + { X86::VPERM2I128rr, X86::VPERM2I128rm, 0 }, + { X86::VPERMDYrr, X86::VPERMDYrm, 0 }, + { X86::VPERMPSYrr, X86::VPERMPSYrm, 0 }, + { X86::VPHADDDYrr, X86::VPHADDDYrm, 0 }, + { X86::VPHADDSWrr256, X86::VPHADDSWrm256, 0 }, + { X86::VPHADDWYrr, X86::VPHADDWYrm, 0 }, + { X86::VPHSUBDYrr, X86::VPHSUBDYrm, 0 }, + { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, 0 }, + { X86::VPHSUBWYrr, X86::VPHSUBWYrm, 0 }, + { X86::VPMADDUBSWYrr, X86::VPMADDUBSWYrm, 0 }, + { X86::VPMADDWDYrr, X86::VPMADDWDYrm, 0 }, + { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 }, + { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 }, + { X86::VPMAXSWYrr, X86::VPMAXSWYrm, 0 }, + { X86::VPMAXUBYrr, X86::VPMAXUBYrm, 0 }, + { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 }, + { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 }, + { X86::VPMINSBYrr, X86::VPMINSBYrm, 0 }, + { X86::VPMINSDYrr, X86::VPMINSDYrm, 0 }, + { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 }, + { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 }, + { X86::VPMINUDYrr, X86::VPMINUDYrm, 0 }, + { X86::VPMINUWYrr, X86::VPMINUWYrm, 0 }, + { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, 0 }, + { X86::VPMULDQYrr, X86::VPMULDQYrm, 0 }, + { X86::VPMULHRSWYrr, X86::VPMULHRSWYrm, 0 }, + { X86::VPMULHUWYrr, X86::VPMULHUWYrm, 0 }, + { X86::VPMULHWYrr, X86::VPMULHWYrm, 0 }, + { X86::VPMULLDYrr, X86::VPMULLDYrm, 0 }, + { X86::VPMULLWYrr, X86::VPMULLWYrm, 0 }, + { X86::VPMULUDQYrr, X86::VPMULUDQYrm, 0 }, + { X86::VPORYrr, X86::VPORYrm, 0 }, + { X86::VPSADBWYrr, X86::VPSADBWYrm, 0 }, + { X86::VPSHUFBYrr, X86::VPSHUFBYrm, 0 }, + { X86::VPSIGNBYrr256, X86::VPSIGNBYrm256, 0 }, + { X86::VPSIGNWYrr256, X86::VPSIGNWYrm256, 0 }, + { X86::VPSIGNDYrr256, X86::VPSIGNDYrm256, 0 }, + { X86::VPSLLDYrr, X86::VPSLLDYrm, 0 }, + { X86::VPSLLQYrr, X86::VPSLLQYrm, 0 }, + { X86::VPSLLWYrr, X86::VPSLLWYrm, 0 }, + { X86::VPSLLVDrr, X86::VPSLLVDrm, 0 }, + { X86::VPSLLVDYrr, X86::VPSLLVDYrm, 0 }, + { X86::VPSLLVQrr, X86::VPSLLVQrm, 0 }, + { X86::VPSLLVQYrr, X86::VPSLLVQYrm, 0 }, + { X86::VPSRADYrr, X86::VPSRADYrm, 0 }, + { X86::VPSRAWYrr, X86::VPSRAWYrm, 0 }, + { X86::VPSRAVDrr, X86::VPSRAVDrm, 0 }, + { X86::VPSRAVDYrr, X86::VPSRAVDYrm, 0 }, + { X86::VPSRLDYrr, X86::VPSRLDYrm, 0 }, + { X86::VPSRLQYrr, X86::VPSRLQYrm, 0 }, + { X86::VPSRLWYrr, X86::VPSRLWYrm, 0 }, + { X86::VPSRLVDrr, X86::VPSRLVDrm, 0 }, + { X86::VPSRLVDYrr, X86::VPSRLVDYrm, 0 }, + { X86::VPSRLVQrr, X86::VPSRLVQrm, 0 }, + { X86::VPSRLVQYrr, X86::VPSRLVQYrm, 0 }, + { X86::VPSUBBYrr, X86::VPSUBBYrm, 0 }, + { X86::VPSUBDYrr, X86::VPSUBDYrm, 0 }, + { X86::VPSUBQYrr, X86::VPSUBQYrm, 0 }, + { X86::VPSUBSBYrr, X86::VPSUBSBYrm, 0 }, + { X86::VPSUBSWYrr, X86::VPSUBSWYrm, 0 }, + { X86::VPSUBUSBYrr, X86::VPSUBUSBYrm, 0 }, + { X86::VPSUBUSWYrr, X86::VPSUBUSWYrm, 0 }, + { X86::VPSUBWYrr, X86::VPSUBWYrm, 0 }, + { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, 0 }, + { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, 0 }, + { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, 0 }, + { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, 0 }, + { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, 0 }, + { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, 0 }, + { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, 0 }, + { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, 0 }, + { X86::VPXORYrr, X86::VPXORYrm, 0 }, + + // FMA4 foldable patterns + { X86::VFMADDSS4rr, X86::VFMADDSS4mr, TB_ALIGN_NONE }, + { X86::VFMADDSS4rr_Int, X86::VFMADDSS4mr_Int, TB_NO_REVERSE }, + { X86::VFMADDSD4rr, X86::VFMADDSD4mr, TB_ALIGN_NONE }, + { X86::VFMADDSD4rr_Int, X86::VFMADDSD4mr_Int, TB_NO_REVERSE }, + { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_NONE }, + { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_NONE }, + { X86::VFMADDPS4Yrr, X86::VFMADDPS4Ymr, TB_ALIGN_NONE }, + { X86::VFMADDPD4Yrr, X86::VFMADDPD4Ymr, TB_ALIGN_NONE }, + { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, TB_ALIGN_NONE }, + { X86::VFNMADDSS4rr_Int, X86::VFNMADDSS4mr_Int, TB_NO_REVERSE }, + { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, TB_ALIGN_NONE }, + { X86::VFNMADDSD4rr_Int, X86::VFNMADDSD4mr_Int, TB_NO_REVERSE }, + { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_NONE }, + { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_NONE }, + { X86::VFNMADDPS4Yrr, X86::VFNMADDPS4Ymr, TB_ALIGN_NONE }, + { X86::VFNMADDPD4Yrr, X86::VFNMADDPD4Ymr, TB_ALIGN_NONE }, + { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, TB_ALIGN_NONE }, + { X86::VFMSUBSS4rr_Int, X86::VFMSUBSS4mr_Int, TB_NO_REVERSE }, + { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, TB_ALIGN_NONE }, + { X86::VFMSUBSD4rr_Int, X86::VFMSUBSD4mr_Int, TB_NO_REVERSE }, + { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_NONE }, + { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_NONE }, + { X86::VFMSUBPS4Yrr, X86::VFMSUBPS4Ymr, TB_ALIGN_NONE }, + { X86::VFMSUBPD4Yrr, X86::VFMSUBPD4Ymr, TB_ALIGN_NONE }, + { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, TB_ALIGN_NONE }, + { X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4mr_Int, TB_NO_REVERSE }, + { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, TB_ALIGN_NONE }, + { X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4mr_Int, TB_NO_REVERSE }, + { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_NONE }, + { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_NONE }, + { X86::VFNMSUBPS4Yrr, X86::VFNMSUBPS4Ymr, TB_ALIGN_NONE }, + { X86::VFNMSUBPD4Yrr, X86::VFNMSUBPD4Ymr, TB_ALIGN_NONE }, + { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_NONE }, + { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_NONE }, + { X86::VFMADDSUBPS4Yrr, X86::VFMADDSUBPS4Ymr, TB_ALIGN_NONE }, + { X86::VFMADDSUBPD4Yrr, X86::VFMADDSUBPD4Ymr, TB_ALIGN_NONE }, + { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_NONE }, + { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_NONE }, + { X86::VFMSUBADDPS4Yrr, X86::VFMSUBADDPS4Ymr, TB_ALIGN_NONE }, + { X86::VFMSUBADDPD4Yrr, X86::VFMSUBADDPD4Ymr, TB_ALIGN_NONE }, + + // XOP foldable instructions + { X86::VPCMOVrrr, X86::VPCMOVrmr, 0 }, + { X86::VPCMOVYrrr, X86::VPCMOVYrmr, 0 }, + { X86::VPCOMBri, X86::VPCOMBmi, 0 }, + { X86::VPCOMDri, X86::VPCOMDmi, 0 }, + { X86::VPCOMQri, X86::VPCOMQmi, 0 }, + { X86::VPCOMWri, X86::VPCOMWmi, 0 }, + { X86::VPCOMUBri, X86::VPCOMUBmi, 0 }, + { X86::VPCOMUDri, X86::VPCOMUDmi, 0 }, + { X86::VPCOMUQri, X86::VPCOMUQmi, 0 }, + { X86::VPCOMUWri, X86::VPCOMUWmi, 0 }, + { X86::VPERMIL2PDrr, X86::VPERMIL2PDmr, 0 }, + { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYmr, 0 }, + { X86::VPERMIL2PSrr, X86::VPERMIL2PSmr, 0 }, + { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYmr, 0 }, + { X86::VPMACSDDrr, X86::VPMACSDDrm, 0 }, + { X86::VPMACSDQHrr, X86::VPMACSDQHrm, 0 }, + { X86::VPMACSDQLrr, X86::VPMACSDQLrm, 0 }, + { X86::VPMACSSDDrr, X86::VPMACSSDDrm, 0 }, + { X86::VPMACSSDQHrr, X86::VPMACSSDQHrm, 0 }, + { X86::VPMACSSDQLrr, X86::VPMACSSDQLrm, 0 }, + { X86::VPMACSSWDrr, X86::VPMACSSWDrm, 0 }, + { X86::VPMACSSWWrr, X86::VPMACSSWWrm, 0 }, + { X86::VPMACSWDrr, X86::VPMACSWDrm, 0 }, + { X86::VPMACSWWrr, X86::VPMACSWWrm, 0 }, + { X86::VPMADCSSWDrr, X86::VPMADCSSWDrm, 0 }, + { X86::VPMADCSWDrr, X86::VPMADCSWDrm, 0 }, + { X86::VPPERMrrr, X86::VPPERMrmr, 0 }, + { X86::VPROTBrr, X86::VPROTBrm, 0 }, + { X86::VPROTDrr, X86::VPROTDrm, 0 }, + { X86::VPROTQrr, X86::VPROTQrm, 0 }, + { X86::VPROTWrr, X86::VPROTWrm, 0 }, + { X86::VPSHABrr, X86::VPSHABrm, 0 }, + { X86::VPSHADrr, X86::VPSHADrm, 0 }, + { X86::VPSHAQrr, X86::VPSHAQrm, 0 }, + { X86::VPSHAWrr, X86::VPSHAWrm, 0 }, + { X86::VPSHLBrr, X86::VPSHLBrm, 0 }, + { X86::VPSHLDrr, X86::VPSHLDrm, 0 }, + { X86::VPSHLQrr, X86::VPSHLQrm, 0 }, + { X86::VPSHLWrr, X86::VPSHLWrm, 0 }, + + // BMI/BMI2 foldable instructions + { X86::ANDN32rr, X86::ANDN32rm, 0 }, + { X86::ANDN64rr, X86::ANDN64rm, 0 }, + { X86::MULX32rr, X86::MULX32rm, 0 }, + { X86::MULX64rr, X86::MULX64rm, 0 }, + { X86::PDEP32rr, X86::PDEP32rm, 0 }, + { X86::PDEP64rr, X86::PDEP64rm, 0 }, + { X86::PEXT32rr, X86::PEXT32rm, 0 }, + { X86::PEXT64rr, X86::PEXT64rm, 0 }, + + // ADX foldable instructions + { X86::ADCX32rr, X86::ADCX32rm, 0 }, + { X86::ADCX64rr, X86::ADCX64rm, 0 }, + { X86::ADOX32rr, X86::ADOX32rm, 0 }, + { X86::ADOX64rr, X86::ADOX64rm, 0 }, + + // AVX-512 foldable instructions + { X86::VADDPDZrr, X86::VADDPDZrm, 0 }, + { X86::VADDPSZrr, X86::VADDPSZrm, 0 }, + { X86::VADDSDZrr, X86::VADDSDZrm, 0 }, + { X86::VADDSDZrr_Int, X86::VADDSDZrm_Int, TB_NO_REVERSE }, + { X86::VADDSSZrr, X86::VADDSSZrm, 0 }, + { X86::VADDSSZrr_Int, X86::VADDSSZrm_Int, TB_NO_REVERSE }, + { X86::VALIGNDZrri, X86::VALIGNDZrmi, 0 }, + { X86::VALIGNQZrri, X86::VALIGNQZrmi, 0 }, + { X86::VANDNPDZrr, X86::VANDNPDZrm, 0 }, + { X86::VANDNPSZrr, X86::VANDNPSZrm, 0 }, + { X86::VANDPDZrr, X86::VANDPDZrm, 0 }, + { X86::VANDPSZrr, X86::VANDPSZrm, 0 }, + { X86::VCMPPDZrri, X86::VCMPPDZrmi, 0 }, + { X86::VCMPPSZrri, X86::VCMPPSZrmi, 0 }, + { X86::VCMPSDZrr, X86::VCMPSDZrm, 0 }, + { X86::VCMPSDZrr_Int, X86::VCMPSDZrm_Int, TB_NO_REVERSE }, + { X86::VCMPSSZrr, X86::VCMPSSZrm, 0 }, + { X86::VCMPSSZrr_Int, X86::VCMPSSZrm_Int, TB_NO_REVERSE }, + { X86::VDIVPDZrr, X86::VDIVPDZrm, 0 }, + { X86::VDIVPSZrr, X86::VDIVPSZrm, 0 }, + { X86::VDIVSDZrr, X86::VDIVSDZrm, 0 }, + { X86::VDIVSDZrr_Int, X86::VDIVSDZrm_Int, TB_NO_REVERSE }, + { X86::VDIVSSZrr, X86::VDIVSSZrm, 0 }, + { X86::VDIVSSZrr_Int, X86::VDIVSSZrm_Int, TB_NO_REVERSE }, + { X86::VINSERTF32x4Zrr, X86::VINSERTF32x4Zrm, 0 }, + { X86::VINSERTF32x8Zrr, X86::VINSERTF32x8Zrm, 0 }, + { X86::VINSERTF64x2Zrr, X86::VINSERTF64x2Zrm, 0 }, + { X86::VINSERTF64x4Zrr, X86::VINSERTF64x4Zrm, 0 }, + { X86::VINSERTI32x4Zrr, X86::VINSERTI32x4Zrm, 0 }, + { X86::VINSERTI32x8Zrr, X86::VINSERTI32x8Zrm, 0 }, + { X86::VINSERTI64x2Zrr, X86::VINSERTI64x2Zrm, 0 }, + { X86::VINSERTI64x4Zrr, X86::VINSERTI64x4Zrm, 0 }, + { X86::VMAXCPDZrr, X86::VMAXCPDZrm, 0 }, + { X86::VMAXCPSZrr, X86::VMAXCPSZrm, 0 }, + { X86::VMAXCSDZrr, X86::VMAXCSDZrm, 0 }, + { X86::VMAXCSSZrr, X86::VMAXCSSZrm, 0 }, + { X86::VMAXPDZrr, X86::VMAXPDZrm, 0 }, + { X86::VMAXPSZrr, X86::VMAXPSZrm, 0 }, + { X86::VMAXSDZrr, X86::VMAXSDZrm, 0 }, + { X86::VMAXSDZrr_Int, X86::VMAXSDZrm_Int, TB_NO_REVERSE }, + { X86::VMAXSSZrr, X86::VMAXSSZrm, 0 }, + { X86::VMAXSSZrr_Int, X86::VMAXSSZrm_Int, TB_NO_REVERSE }, + { X86::VMINCPDZrr, X86::VMINCPDZrm, 0 }, + { X86::VMINCPSZrr, X86::VMINCPSZrm, 0 }, + { X86::VMINCSDZrr, X86::VMINCSDZrm, 0 }, + { X86::VMINCSSZrr, X86::VMINCSSZrm, 0 }, + { X86::VMINPDZrr, X86::VMINPDZrm, 0 }, + { X86::VMINPSZrr, X86::VMINPSZrm, 0 }, + { X86::VMINSDZrr, X86::VMINSDZrm, 0 }, + { X86::VMINSDZrr_Int, X86::VMINSDZrm_Int, TB_NO_REVERSE }, + { X86::VMINSSZrr, X86::VMINSSZrm, 0 }, + { X86::VMINSSZrr_Int, X86::VMINSSZrm_Int, TB_NO_REVERSE }, + { X86::VMOVLHPSZrr, X86::VMOVHPSZ128rm, TB_NO_REVERSE }, + { X86::VMULPDZrr, X86::VMULPDZrm, 0 }, + { X86::VMULPSZrr, X86::VMULPSZrm, 0 }, + { X86::VMULSDZrr, X86::VMULSDZrm, 0 }, + { X86::VMULSDZrr_Int, X86::VMULSDZrm_Int, TB_NO_REVERSE }, + { X86::VMULSSZrr, X86::VMULSSZrm, 0 }, + { X86::VMULSSZrr_Int, X86::VMULSSZrm_Int, TB_NO_REVERSE }, + { X86::VORPDZrr, X86::VORPDZrm, 0 }, + { X86::VORPSZrr, X86::VORPSZrm, 0 }, + { X86::VPACKSSDWZrr, X86::VPACKSSDWZrm, 0 }, + { X86::VPACKSSWBZrr, X86::VPACKSSWBZrm, 0 }, + { X86::VPACKUSDWZrr, X86::VPACKUSDWZrm, 0 }, + { X86::VPACKUSWBZrr, X86::VPACKUSWBZrm, 0 }, + { X86::VPADDBZrr, X86::VPADDBZrm, 0 }, + { X86::VPADDDZrr, X86::VPADDDZrm, 0 }, + { X86::VPADDQZrr, X86::VPADDQZrm, 0 }, + { X86::VPADDSBZrr, X86::VPADDSBZrm, 0 }, + { X86::VPADDSWZrr, X86::VPADDSWZrm, 0 }, + { X86::VPADDUSBZrr, X86::VPADDUSBZrm, 0 }, + { X86::VPADDUSWZrr, X86::VPADDUSWZrm, 0 }, + { X86::VPADDWZrr, X86::VPADDWZrm, 0 }, + { X86::VPALIGNRZrri, X86::VPALIGNRZrmi, 0 }, + { X86::VPANDDZrr, X86::VPANDDZrm, 0 }, + { X86::VPANDNDZrr, X86::VPANDNDZrm, 0 }, + { X86::VPANDNQZrr, X86::VPANDNQZrm, 0 }, + { X86::VPANDQZrr, X86::VPANDQZrm, 0 }, + { X86::VPAVGBZrr, X86::VPAVGBZrm, 0 }, + { X86::VPAVGWZrr, X86::VPAVGWZrm, 0 }, + { X86::VPCMPBZrri, X86::VPCMPBZrmi, 0 }, + { X86::VPCMPDZrri, X86::VPCMPDZrmi, 0 }, + { X86::VPCMPEQBZrr, X86::VPCMPEQBZrm, 0 }, + { X86::VPCMPEQDZrr, X86::VPCMPEQDZrm, 0 }, + { X86::VPCMPEQQZrr, X86::VPCMPEQQZrm, 0 }, + { X86::VPCMPEQWZrr, X86::VPCMPEQWZrm, 0 }, + { X86::VPCMPGTBZrr, X86::VPCMPGTBZrm, 0 }, + { X86::VPCMPGTDZrr, X86::VPCMPGTDZrm, 0 }, + { X86::VPCMPGTQZrr, X86::VPCMPGTQZrm, 0 }, + { X86::VPCMPGTWZrr, X86::VPCMPGTWZrm, 0 }, + { X86::VPCMPQZrri, X86::VPCMPQZrmi, 0 }, + { X86::VPCMPUBZrri, X86::VPCMPUBZrmi, 0 }, + { X86::VPCMPUDZrri, X86::VPCMPUDZrmi, 0 }, + { X86::VPCMPUQZrri, X86::VPCMPUQZrmi, 0 }, + { X86::VPCMPUWZrri, X86::VPCMPUWZrmi, 0 }, + { X86::VPCMPWZrri, X86::VPCMPWZrmi, 0 }, + { X86::VPERMBZrr, X86::VPERMBZrm, 0 }, + { X86::VPERMDZrr, X86::VPERMDZrm, 0 }, + { X86::VPERMILPDZrr, X86::VPERMILPDZrm, 0 }, + { X86::VPERMILPSZrr, X86::VPERMILPSZrm, 0 }, + { X86::VPERMPDZrr, X86::VPERMPDZrm, 0 }, + { X86::VPERMPSZrr, X86::VPERMPSZrm, 0 }, + { X86::VPERMQZrr, X86::VPERMQZrm, 0 }, + { X86::VPERMWZrr, X86::VPERMWZrm, 0 }, + { X86::VPINSRBZrr, X86::VPINSRBZrm, 0 }, + { X86::VPINSRDZrr, X86::VPINSRDZrm, 0 }, + { X86::VPINSRQZrr, X86::VPINSRQZrm, 0 }, + { X86::VPINSRWZrr, X86::VPINSRWZrm, 0 }, + { X86::VPMADDUBSWZrr, X86::VPMADDUBSWZrm, 0 }, + { X86::VPMADDWDZrr, X86::VPMADDWDZrm, 0 }, + { X86::VPMAXSBZrr, X86::VPMAXSBZrm, 0 }, + { X86::VPMAXSDZrr, X86::VPMAXSDZrm, 0 }, + { X86::VPMAXSQZrr, X86::VPMAXSQZrm, 0 }, + { X86::VPMAXSWZrr, X86::VPMAXSWZrm, 0 }, + { X86::VPMAXUBZrr, X86::VPMAXUBZrm, 0 }, + { X86::VPMAXUDZrr, X86::VPMAXUDZrm, 0 }, + { X86::VPMAXUQZrr, X86::VPMAXUQZrm, 0 }, + { X86::VPMAXUWZrr, X86::VPMAXUWZrm, 0 }, + { X86::VPMINSBZrr, X86::VPMINSBZrm, 0 }, + { X86::VPMINSDZrr, X86::VPMINSDZrm, 0 }, + { X86::VPMINSQZrr, X86::VPMINSQZrm, 0 }, + { X86::VPMINSWZrr, X86::VPMINSWZrm, 0 }, + { X86::VPMINUBZrr, X86::VPMINUBZrm, 0 }, + { X86::VPMINUDZrr, X86::VPMINUDZrm, 0 }, + { X86::VPMINUQZrr, X86::VPMINUQZrm, 0 }, + { X86::VPMINUWZrr, X86::VPMINUWZrm, 0 }, + { X86::VPMULDQZrr, X86::VPMULDQZrm, 0 }, + { X86::VPMULLDZrr, X86::VPMULLDZrm, 0 }, + { X86::VPMULLQZrr, X86::VPMULLQZrm, 0 }, + { X86::VPMULLWZrr, X86::VPMULLWZrm, 0 }, + { X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 }, + { X86::VPORDZrr, X86::VPORDZrm, 0 }, + { X86::VPORQZrr, X86::VPORQZrm, 0 }, + { X86::VPSADBWZ512rr, X86::VPSADBWZ512rm, 0 }, + { X86::VPSHUFBZrr, X86::VPSHUFBZrm, 0 }, + { X86::VPSLLDZrr, X86::VPSLLDZrm, 0 }, + { X86::VPSLLQZrr, X86::VPSLLQZrm, 0 }, + { X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 }, + { X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 }, + { X86::VPSLLVWZrr, X86::VPSLLVWZrm, 0 }, + { X86::VPSLLWZrr, X86::VPSLLWZrm, 0 }, + { X86::VPSRADZrr, X86::VPSRADZrm, 0 }, + { X86::VPSRAQZrr, X86::VPSRAQZrm, 0 }, + { X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 }, + { X86::VPSRAVQZrr, X86::VPSRAVQZrm, 0 }, + { X86::VPSRAVWZrr, X86::VPSRAVWZrm, 0 }, + { X86::VPSRAWZrr, X86::VPSRAWZrm, 0 }, + { X86::VPSRLDZrr, X86::VPSRLDZrm, 0 }, + { X86::VPSRLQZrr, X86::VPSRLQZrm, 0 }, + { X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 }, + { X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 }, + { X86::VPSRLVWZrr, X86::VPSRLVWZrm, 0 }, + { X86::VPSRLWZrr, X86::VPSRLWZrm, 0 }, + { X86::VPSUBBZrr, X86::VPSUBBZrm, 0 }, + { X86::VPSUBDZrr, X86::VPSUBDZrm, 0 }, + { X86::VPSUBQZrr, X86::VPSUBQZrm, 0 }, + { X86::VPSUBSBZrr, X86::VPSUBSBZrm, 0 }, + { X86::VPSUBSWZrr, X86::VPSUBSWZrm, 0 }, + { X86::VPSUBUSBZrr, X86::VPSUBUSBZrm, 0 }, + { X86::VPSUBUSWZrr, X86::VPSUBUSWZrm, 0 }, + { X86::VPSUBWZrr, X86::VPSUBWZrm, 0 }, + { X86::VPUNPCKHBWZrr, X86::VPUNPCKHBWZrm, 0 }, + { X86::VPUNPCKHDQZrr, X86::VPUNPCKHDQZrm, 0 }, + { X86::VPUNPCKHQDQZrr, X86::VPUNPCKHQDQZrm, 0 }, + { X86::VPUNPCKHWDZrr, X86::VPUNPCKHWDZrm, 0 }, + { X86::VPUNPCKLBWZrr, X86::VPUNPCKLBWZrm, 0 }, + { X86::VPUNPCKLDQZrr, X86::VPUNPCKLDQZrm, 0 }, + { X86::VPUNPCKLQDQZrr, X86::VPUNPCKLQDQZrm, 0 }, + { X86::VPUNPCKLWDZrr, X86::VPUNPCKLWDZrm, 0 }, + { X86::VPXORDZrr, X86::VPXORDZrm, 0 }, + { X86::VPXORQZrr, X86::VPXORQZrm, 0 }, + { X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 }, + { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 }, + { X86::VSUBPDZrr, X86::VSUBPDZrm, 0 }, + { X86::VSUBPSZrr, X86::VSUBPSZrm, 0 }, + { X86::VSUBSDZrr, X86::VSUBSDZrm, 0 }, + { X86::VSUBSDZrr_Int, X86::VSUBSDZrm_Int, TB_NO_REVERSE }, + { X86::VSUBSSZrr, X86::VSUBSSZrm, 0 }, + { X86::VSUBSSZrr_Int, X86::VSUBSSZrm_Int, TB_NO_REVERSE }, + { X86::VUNPCKHPDZrr, X86::VUNPCKHPDZrm, 0 }, + { X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrm, 0 }, + { X86::VUNPCKLPDZrr, X86::VUNPCKLPDZrm, 0 }, + { X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrm, 0 }, + { X86::VXORPDZrr, X86::VXORPDZrm, 0 }, + { X86::VXORPSZrr, X86::VXORPSZrm, 0 }, + + // AVX-512{F,VL} foldable instructions + { X86::VADDPDZ128rr, X86::VADDPDZ128rm, 0 }, + { X86::VADDPDZ256rr, X86::VADDPDZ256rm, 0 }, + { X86::VADDPSZ128rr, X86::VADDPSZ128rm, 0 }, + { X86::VADDPSZ256rr, X86::VADDPSZ256rm, 0 }, + { X86::VALIGNDZ128rri, X86::VALIGNDZ128rmi, 0 }, + { X86::VALIGNDZ256rri, X86::VALIGNDZ256rmi, 0 }, + { X86::VALIGNQZ128rri, X86::VALIGNQZ128rmi, 0 }, + { X86::VALIGNQZ256rri, X86::VALIGNQZ256rmi, 0 }, + { X86::VANDNPDZ128rr, X86::VANDNPDZ128rm, 0 }, + { X86::VANDNPDZ256rr, X86::VANDNPDZ256rm, 0 }, + { X86::VANDNPSZ128rr, X86::VANDNPSZ128rm, 0 }, + { X86::VANDNPSZ256rr, X86::VANDNPSZ256rm, 0 }, + { X86::VANDPDZ128rr, X86::VANDPDZ128rm, 0 }, + { X86::VANDPDZ256rr, X86::VANDPDZ256rm, 0 }, + { X86::VANDPSZ128rr, X86::VANDPSZ128rm, 0 }, + { X86::VANDPSZ256rr, X86::VANDPSZ256rm, 0 }, + { X86::VCMPPDZ128rri, X86::VCMPPDZ128rmi, 0 }, + { X86::VCMPPDZ256rri, X86::VCMPPDZ256rmi, 0 }, + { X86::VCMPPSZ128rri, X86::VCMPPSZ128rmi, 0 }, + { X86::VCMPPSZ256rri, X86::VCMPPSZ256rmi, 0 }, + { X86::VDIVPDZ128rr, X86::VDIVPDZ128rm, 0 }, + { X86::VDIVPDZ256rr, X86::VDIVPDZ256rm, 0 }, + { X86::VDIVPSZ128rr, X86::VDIVPSZ128rm, 0 }, + { X86::VDIVPSZ256rr, X86::VDIVPSZ256rm, 0 }, + { X86::VINSERTF32x4Z256rr,X86::VINSERTF32x4Z256rm, 0 }, + { X86::VINSERTF64x2Z256rr,X86::VINSERTF64x2Z256rm, 0 }, + { X86::VINSERTI32x4Z256rr,X86::VINSERTI32x4Z256rm, 0 }, + { X86::VINSERTI64x2Z256rr,X86::VINSERTI64x2Z256rm, 0 }, + { X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rm, 0 }, + { X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rm, 0 }, + { X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rm, 0 }, + { X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rm, 0 }, + { X86::VMAXPDZ128rr, X86::VMAXPDZ128rm, 0 }, + { X86::VMAXPDZ256rr, X86::VMAXPDZ256rm, 0 }, + { X86::VMAXPSZ128rr, X86::VMAXPSZ128rm, 0 }, + { X86::VMAXPSZ256rr, X86::VMAXPSZ256rm, 0 }, + { X86::VMINCPDZ128rr, X86::VMINCPDZ128rm, 0 }, + { X86::VMINCPDZ256rr, X86::VMINCPDZ256rm, 0 }, + { X86::VMINCPSZ128rr, X86::VMINCPSZ128rm, 0 }, + { X86::VMINCPSZ256rr, X86::VMINCPSZ256rm, 0 }, + { X86::VMINPDZ128rr, X86::VMINPDZ128rm, 0 }, + { X86::VMINPDZ256rr, X86::VMINPDZ256rm, 0 }, + { X86::VMINPSZ128rr, X86::VMINPSZ128rm, 0 }, + { X86::VMINPSZ256rr, X86::VMINPSZ256rm, 0 }, + { X86::VMULPDZ128rr, X86::VMULPDZ128rm, 0 }, + { X86::VMULPDZ256rr, X86::VMULPDZ256rm, 0 }, + { X86::VMULPSZ128rr, X86::VMULPSZ128rm, 0 }, + { X86::VMULPSZ256rr, X86::VMULPSZ256rm, 0 }, + { X86::VORPDZ128rr, X86::VORPDZ128rm, 0 }, + { X86::VORPDZ256rr, X86::VORPDZ256rm, 0 }, + { X86::VORPSZ128rr, X86::VORPSZ128rm, 0 }, + { X86::VORPSZ256rr, X86::VORPSZ256rm, 0 }, + { X86::VPACKSSDWZ256rr, X86::VPACKSSDWZ256rm, 0 }, + { X86::VPACKSSDWZ128rr, X86::VPACKSSDWZ128rm, 0 }, + { X86::VPACKSSWBZ256rr, X86::VPACKSSWBZ256rm, 0 }, + { X86::VPACKSSWBZ128rr, X86::VPACKSSWBZ128rm, 0 }, + { X86::VPACKUSDWZ256rr, X86::VPACKUSDWZ256rm, 0 }, + { X86::VPACKUSDWZ128rr, X86::VPACKUSDWZ128rm, 0 }, + { X86::VPACKUSWBZ256rr, X86::VPACKUSWBZ256rm, 0 }, + { X86::VPACKUSWBZ128rr, X86::VPACKUSWBZ128rm, 0 }, + { X86::VPADDBZ128rr, X86::VPADDBZ128rm, 0 }, + { X86::VPADDBZ256rr, X86::VPADDBZ256rm, 0 }, + { X86::VPADDDZ128rr, X86::VPADDDZ128rm, 0 }, + { X86::VPADDDZ256rr, X86::VPADDDZ256rm, 0 }, + { X86::VPADDQZ128rr, X86::VPADDQZ128rm, 0 }, + { X86::VPADDQZ256rr, X86::VPADDQZ256rm, 0 }, + { X86::VPADDSBZ128rr, X86::VPADDSBZ128rm, 0 }, + { X86::VPADDSBZ256rr, X86::VPADDSBZ256rm, 0 }, + { X86::VPADDSWZ128rr, X86::VPADDSWZ128rm, 0 }, + { X86::VPADDSWZ256rr, X86::VPADDSWZ256rm, 0 }, + { X86::VPADDUSBZ128rr, X86::VPADDUSBZ128rm, 0 }, + { X86::VPADDUSBZ256rr, X86::VPADDUSBZ256rm, 0 }, + { X86::VPADDUSWZ128rr, X86::VPADDUSWZ128rm, 0 }, + { X86::VPADDUSWZ256rr, X86::VPADDUSWZ256rm, 0 }, + { X86::VPADDWZ128rr, X86::VPADDWZ128rm, 0 }, + { X86::VPADDWZ256rr, X86::VPADDWZ256rm, 0 }, + { X86::VPALIGNRZ128rri, X86::VPALIGNRZ128rmi, 0 }, + { X86::VPALIGNRZ256rri, X86::VPALIGNRZ256rmi, 0 }, + { X86::VPANDDZ128rr, X86::VPANDDZ128rm, 0 }, + { X86::VPANDDZ256rr, X86::VPANDDZ256rm, 0 }, + { X86::VPANDNDZ128rr, X86::VPANDNDZ128rm, 0 }, + { X86::VPANDNDZ256rr, X86::VPANDNDZ256rm, 0 }, + { X86::VPANDNQZ128rr, X86::VPANDNQZ128rm, 0 }, + { X86::VPANDNQZ256rr, X86::VPANDNQZ256rm, 0 }, + { X86::VPANDQZ128rr, X86::VPANDQZ128rm, 0 }, + { X86::VPANDQZ256rr, X86::VPANDQZ256rm, 0 }, + { X86::VPAVGBZ128rr, X86::VPAVGBZ128rm, 0 }, + { X86::VPAVGBZ256rr, X86::VPAVGBZ256rm, 0 }, + { X86::VPAVGWZ128rr, X86::VPAVGWZ128rm, 0 }, + { X86::VPAVGWZ256rr, X86::VPAVGWZ256rm, 0 }, + { X86::VPCMPBZ128rri, X86::VPCMPBZ128rmi, 0 }, + { X86::VPCMPBZ256rri, X86::VPCMPBZ256rmi, 0 }, + { X86::VPCMPDZ128rri, X86::VPCMPDZ128rmi, 0 }, + { X86::VPCMPDZ256rri, X86::VPCMPDZ256rmi, 0 }, + { X86::VPCMPEQBZ128rr, X86::VPCMPEQBZ128rm, 0 }, + { X86::VPCMPEQBZ256rr, X86::VPCMPEQBZ256rm, 0 }, + { X86::VPCMPEQDZ128rr, X86::VPCMPEQDZ128rm, 0 }, + { X86::VPCMPEQDZ256rr, X86::VPCMPEQDZ256rm, 0 }, + { X86::VPCMPEQQZ128rr, X86::VPCMPEQQZ128rm, 0 }, + { X86::VPCMPEQQZ256rr, X86::VPCMPEQQZ256rm, 0 }, + { X86::VPCMPEQWZ128rr, X86::VPCMPEQWZ128rm, 0 }, + { X86::VPCMPEQWZ256rr, X86::VPCMPEQWZ256rm, 0 }, + { X86::VPCMPGTBZ128rr, X86::VPCMPGTBZ128rm, 0 }, + { X86::VPCMPGTBZ256rr, X86::VPCMPGTBZ256rm, 0 }, + { X86::VPCMPGTDZ128rr, X86::VPCMPGTDZ128rm, 0 }, + { X86::VPCMPGTDZ256rr, X86::VPCMPGTDZ256rm, 0 }, + { X86::VPCMPGTQZ128rr, X86::VPCMPGTQZ128rm, 0 }, + { X86::VPCMPGTQZ256rr, X86::VPCMPGTQZ256rm, 0 }, + { X86::VPCMPGTWZ128rr, X86::VPCMPGTWZ128rm, 0 }, + { X86::VPCMPGTWZ256rr, X86::VPCMPGTWZ256rm, 0 }, + { X86::VPCMPQZ128rri, X86::VPCMPQZ128rmi, 0 }, + { X86::VPCMPQZ256rri, X86::VPCMPQZ256rmi, 0 }, + { X86::VPCMPUBZ128rri, X86::VPCMPUBZ128rmi, 0 }, + { X86::VPCMPUBZ256rri, X86::VPCMPUBZ256rmi, 0 }, + { X86::VPCMPUDZ128rri, X86::VPCMPUDZ128rmi, 0 }, + { X86::VPCMPUDZ256rri, X86::VPCMPUDZ256rmi, 0 }, + { X86::VPCMPUQZ128rri, X86::VPCMPUQZ128rmi, 0 }, + { X86::VPCMPUQZ256rri, X86::VPCMPUQZ256rmi, 0 }, + { X86::VPCMPUWZ128rri, X86::VPCMPUWZ128rmi, 0 }, + { X86::VPCMPUWZ256rri, X86::VPCMPUWZ256rmi, 0 }, + { X86::VPCMPWZ128rri, X86::VPCMPWZ128rmi, 0 }, + { X86::VPCMPWZ256rri, X86::VPCMPWZ256rmi, 0 }, + { X86::VPERMBZ128rr, X86::VPERMBZ128rm, 0 }, + { X86::VPERMBZ256rr, X86::VPERMBZ256rm, 0 }, + { X86::VPERMDZ256rr, X86::VPERMDZ256rm, 0 }, + { X86::VPERMILPDZ128rr, X86::VPERMILPDZ128rm, 0 }, + { X86::VPERMILPDZ256rr, X86::VPERMILPDZ256rm, 0 }, + { X86::VPERMILPSZ128rr, X86::VPERMILPSZ128rm, 0 }, + { X86::VPERMILPSZ256rr, X86::VPERMILPSZ256rm, 0 }, + { X86::VPERMPDZ256rr, X86::VPERMPDZ256rm, 0 }, + { X86::VPERMPSZ256rr, X86::VPERMPSZ256rm, 0 }, + { X86::VPERMQZ256rr, X86::VPERMQZ256rm, 0 }, + { X86::VPERMWZ128rr, X86::VPERMWZ128rm, 0 }, + { X86::VPERMWZ256rr, X86::VPERMWZ256rm, 0 }, + { X86::VPMADDUBSWZ128rr, X86::VPMADDUBSWZ128rm, 0 }, + { X86::VPMADDUBSWZ256rr, X86::VPMADDUBSWZ256rm, 0 }, + { X86::VPMADDWDZ128rr, X86::VPMADDWDZ128rm, 0 }, + { X86::VPMADDWDZ256rr, X86::VPMADDWDZ256rm, 0 }, + { X86::VPMAXSBZ128rr, X86::VPMAXSBZ128rm, 0 }, + { X86::VPMAXSBZ256rr, X86::VPMAXSBZ256rm, 0 }, + { X86::VPMAXSDZ128rr, X86::VPMAXSDZ128rm, 0 }, + { X86::VPMAXSDZ256rr, X86::VPMAXSDZ256rm, 0 }, + { X86::VPMAXSQZ128rr, X86::VPMAXSQZ128rm, 0 }, + { X86::VPMAXSQZ256rr, X86::VPMAXSQZ256rm, 0 }, + { X86::VPMAXSWZ128rr, X86::VPMAXSWZ128rm, 0 }, + { X86::VPMAXSWZ256rr, X86::VPMAXSWZ256rm, 0 }, + { X86::VPMAXUBZ128rr, X86::VPMAXUBZ128rm, 0 }, + { X86::VPMAXUBZ256rr, X86::VPMAXUBZ256rm, 0 }, + { X86::VPMAXUDZ128rr, X86::VPMAXUDZ128rm, 0 }, + { X86::VPMAXUDZ256rr, X86::VPMAXUDZ256rm, 0 }, + { X86::VPMAXUQZ128rr, X86::VPMAXUQZ128rm, 0 }, + { X86::VPMAXUQZ256rr, X86::VPMAXUQZ256rm, 0 }, + { X86::VPMAXUWZ128rr, X86::VPMAXUWZ128rm, 0 }, + { X86::VPMAXUWZ256rr, X86::VPMAXUWZ256rm, 0 }, + { X86::VPMINSBZ128rr, X86::VPMINSBZ128rm, 0 }, + { X86::VPMINSBZ256rr, X86::VPMINSBZ256rm, 0 }, + { X86::VPMINSDZ128rr, X86::VPMINSDZ128rm, 0 }, + { X86::VPMINSDZ256rr, X86::VPMINSDZ256rm, 0 }, + { X86::VPMINSQZ128rr, X86::VPMINSQZ128rm, 0 }, + { X86::VPMINSQZ256rr, X86::VPMINSQZ256rm, 0 }, + { X86::VPMINSWZ128rr, X86::VPMINSWZ128rm, 0 }, + { X86::VPMINSWZ256rr, X86::VPMINSWZ256rm, 0 }, + { X86::VPMINUBZ128rr, X86::VPMINUBZ128rm, 0 }, + { X86::VPMINUBZ256rr, X86::VPMINUBZ256rm, 0 }, + { X86::VPMINUDZ128rr, X86::VPMINUDZ128rm, 0 }, + { X86::VPMINUDZ256rr, X86::VPMINUDZ256rm, 0 }, + { X86::VPMINUQZ128rr, X86::VPMINUQZ128rm, 0 }, + { X86::VPMINUQZ256rr, X86::VPMINUQZ256rm, 0 }, + { X86::VPMINUWZ128rr, X86::VPMINUWZ128rm, 0 }, + { X86::VPMINUWZ256rr, X86::VPMINUWZ256rm, 0 }, + { X86::VPMULDQZ128rr, X86::VPMULDQZ128rm, 0 }, + { X86::VPMULDQZ256rr, X86::VPMULDQZ256rm, 0 }, + { X86::VPMULLDZ128rr, X86::VPMULLDZ128rm, 0 }, + { X86::VPMULLDZ256rr, X86::VPMULLDZ256rm, 0 }, + { X86::VPMULLQZ128rr, X86::VPMULLQZ128rm, 0 }, + { X86::VPMULLQZ256rr, X86::VPMULLQZ256rm, 0 }, + { X86::VPMULLWZ128rr, X86::VPMULLWZ128rm, 0 }, + { X86::VPMULLWZ256rr, X86::VPMULLWZ256rm, 0 }, + { X86::VPMULUDQZ128rr, X86::VPMULUDQZ128rm, 0 }, + { X86::VPMULUDQZ256rr, X86::VPMULUDQZ256rm, 0 }, + { X86::VPORDZ128rr, X86::VPORDZ128rm, 0 }, + { X86::VPORDZ256rr, X86::VPORDZ256rm, 0 }, + { X86::VPORQZ128rr, X86::VPORQZ128rm, 0 }, + { X86::VPORQZ256rr, X86::VPORQZ256rm, 0 }, + { X86::VPSADBWZ128rr, X86::VPSADBWZ128rm, 0 }, + { X86::VPSADBWZ256rr, X86::VPSADBWZ256rm, 0 }, + { X86::VPSHUFBZ128rr, X86::VPSHUFBZ128rm, 0 }, + { X86::VPSHUFBZ256rr, X86::VPSHUFBZ256rm, 0 }, + { X86::VPSLLDZ128rr, X86::VPSLLDZ128rm, 0 }, + { X86::VPSLLDZ256rr, X86::VPSLLDZ256rm, 0 }, + { X86::VPSLLQZ128rr, X86::VPSLLQZ128rm, 0 }, + { X86::VPSLLQZ256rr, X86::VPSLLQZ256rm, 0 }, + { X86::VPSLLVDZ128rr, X86::VPSLLVDZ128rm, 0 }, + { X86::VPSLLVDZ256rr, X86::VPSLLVDZ256rm, 0 }, + { X86::VPSLLVQZ128rr, X86::VPSLLVQZ128rm, 0 }, + { X86::VPSLLVQZ256rr, X86::VPSLLVQZ256rm, 0 }, + { X86::VPSLLVWZ128rr, X86::VPSLLVWZ128rm, 0 }, + { X86::VPSLLVWZ256rr, X86::VPSLLVWZ256rm, 0 }, + { X86::VPSLLWZ128rr, X86::VPSLLWZ128rm, 0 }, + { X86::VPSLLWZ256rr, X86::VPSLLWZ256rm, 0 }, + { X86::VPSRADZ128rr, X86::VPSRADZ128rm, 0 }, + { X86::VPSRADZ256rr, X86::VPSRADZ256rm, 0 }, + { X86::VPSRAQZ128rr, X86::VPSRAQZ128rm, 0 }, + { X86::VPSRAQZ256rr, X86::VPSRAQZ256rm, 0 }, + { X86::VPSRAVDZ128rr, X86::VPSRAVDZ128rm, 0 }, + { X86::VPSRAVDZ256rr, X86::VPSRAVDZ256rm, 0 }, + { X86::VPSRAVQZ128rr, X86::VPSRAVQZ128rm, 0 }, + { X86::VPSRAVQZ256rr, X86::VPSRAVQZ256rm, 0 }, + { X86::VPSRAVWZ128rr, X86::VPSRAVWZ128rm, 0 }, + { X86::VPSRAVWZ256rr, X86::VPSRAVWZ256rm, 0 }, + { X86::VPSRAWZ128rr, X86::VPSRAWZ128rm, 0 }, + { X86::VPSRAWZ256rr, X86::VPSRAWZ256rm, 0 }, + { X86::VPSRLDZ128rr, X86::VPSRLDZ128rm, 0 }, + { X86::VPSRLDZ256rr, X86::VPSRLDZ256rm, 0 }, + { X86::VPSRLQZ128rr, X86::VPSRLQZ128rm, 0 }, + { X86::VPSRLQZ256rr, X86::VPSRLQZ256rm, 0 }, + { X86::VPSRLVDZ128rr, X86::VPSRLVDZ128rm, 0 }, + { X86::VPSRLVDZ256rr, X86::VPSRLVDZ256rm, 0 }, + { X86::VPSRLVQZ128rr, X86::VPSRLVQZ128rm, 0 }, + { X86::VPSRLVQZ256rr, X86::VPSRLVQZ256rm, 0 }, + { X86::VPSRLVWZ128rr, X86::VPSRLVWZ128rm, 0 }, + { X86::VPSRLVWZ256rr, X86::VPSRLVWZ256rm, 0 }, + { X86::VPSRLWZ128rr, X86::VPSRLWZ128rm, 0 }, + { X86::VPSRLWZ256rr, X86::VPSRLWZ256rm, 0 }, + { X86::VPSUBBZ128rr, X86::VPSUBBZ128rm, 0 }, + { X86::VPSUBBZ256rr, X86::VPSUBBZ256rm, 0 }, + { X86::VPSUBDZ128rr, X86::VPSUBDZ128rm, 0 }, + { X86::VPSUBDZ256rr, X86::VPSUBDZ256rm, 0 }, + { X86::VPSUBQZ128rr, X86::VPSUBQZ128rm, 0 }, + { X86::VPSUBQZ256rr, X86::VPSUBQZ256rm, 0 }, + { X86::VPSUBSBZ128rr, X86::VPSUBSBZ128rm, 0 }, + { X86::VPSUBSBZ256rr, X86::VPSUBSBZ256rm, 0 }, + { X86::VPSUBSWZ128rr, X86::VPSUBSWZ128rm, 0 }, + { X86::VPSUBSWZ256rr, X86::VPSUBSWZ256rm, 0 }, + { X86::VPSUBUSBZ128rr, X86::VPSUBUSBZ128rm, 0 }, + { X86::VPSUBUSBZ256rr, X86::VPSUBUSBZ256rm, 0 }, + { X86::VPSUBUSWZ128rr, X86::VPSUBUSWZ128rm, 0 }, + { X86::VPSUBUSWZ256rr, X86::VPSUBUSWZ256rm, 0 }, + { X86::VPSUBWZ128rr, X86::VPSUBWZ128rm, 0 }, + { X86::VPSUBWZ256rr, X86::VPSUBWZ256rm, 0 }, + { X86::VPUNPCKHBWZ128rr, X86::VPUNPCKHBWZ128rm, 0 }, + { X86::VPUNPCKHBWZ256rr, X86::VPUNPCKHBWZ256rm, 0 }, + { X86::VPUNPCKHDQZ128rr, X86::VPUNPCKHDQZ128rm, 0 }, + { X86::VPUNPCKHDQZ256rr, X86::VPUNPCKHDQZ256rm, 0 }, + { X86::VPUNPCKHQDQZ128rr, X86::VPUNPCKHQDQZ128rm, 0 }, + { X86::VPUNPCKHQDQZ256rr, X86::VPUNPCKHQDQZ256rm, 0 }, + { X86::VPUNPCKHWDZ128rr, X86::VPUNPCKHWDZ128rm, 0 }, + { X86::VPUNPCKHWDZ256rr, X86::VPUNPCKHWDZ256rm, 0 }, + { X86::VPUNPCKLBWZ128rr, X86::VPUNPCKLBWZ128rm, 0 }, + { X86::VPUNPCKLBWZ256rr, X86::VPUNPCKLBWZ256rm, 0 }, + { X86::VPUNPCKLDQZ128rr, X86::VPUNPCKLDQZ128rm, 0 }, + { X86::VPUNPCKLDQZ256rr, X86::VPUNPCKLDQZ256rm, 0 }, + { X86::VPUNPCKLQDQZ128rr, X86::VPUNPCKLQDQZ128rm, 0 }, + { X86::VPUNPCKLQDQZ256rr, X86::VPUNPCKLQDQZ256rm, 0 }, + { X86::VPUNPCKLWDZ128rr, X86::VPUNPCKLWDZ128rm, 0 }, + { X86::VPUNPCKLWDZ256rr, X86::VPUNPCKLWDZ256rm, 0 }, + { X86::VPXORDZ128rr, X86::VPXORDZ128rm, 0 }, + { X86::VPXORDZ256rr, X86::VPXORDZ256rm, 0 }, + { X86::VPXORQZ128rr, X86::VPXORQZ128rm, 0 }, + { X86::VPXORQZ256rr, X86::VPXORQZ256rm, 0 }, + { X86::VSHUFPDZ128rri, X86::VSHUFPDZ128rmi, 0 }, + { X86::VSHUFPDZ256rri, X86::VSHUFPDZ256rmi, 0 }, + { X86::VSHUFPSZ128rri, X86::VSHUFPSZ128rmi, 0 }, + { X86::VSHUFPSZ256rri, X86::VSHUFPSZ256rmi, 0 }, + { X86::VSUBPDZ128rr, X86::VSUBPDZ128rm, 0 }, + { X86::VSUBPDZ256rr, X86::VSUBPDZ256rm, 0 }, + { X86::VSUBPSZ128rr, X86::VSUBPSZ128rm, 0 }, + { X86::VSUBPSZ256rr, X86::VSUBPSZ256rm, 0 }, + { X86::VUNPCKHPDZ128rr, X86::VUNPCKHPDZ128rm, 0 }, + { X86::VUNPCKHPDZ256rr, X86::VUNPCKHPDZ256rm, 0 }, + { X86::VUNPCKHPSZ128rr, X86::VUNPCKHPSZ128rm, 0 }, + { X86::VUNPCKHPSZ256rr, X86::VUNPCKHPSZ256rm, 0 }, + { X86::VUNPCKLPDZ128rr, X86::VUNPCKLPDZ128rm, 0 }, + { X86::VUNPCKLPDZ256rr, X86::VUNPCKLPDZ256rm, 0 }, + { X86::VUNPCKLPSZ128rr, X86::VUNPCKLPSZ128rm, 0 }, + { X86::VUNPCKLPSZ256rr, X86::VUNPCKLPSZ256rm, 0 }, + { X86::VXORPDZ128rr, X86::VXORPDZ128rm, 0 }, + { X86::VXORPDZ256rr, X86::VXORPDZ256rm, 0 }, + { X86::VXORPSZ128rr, X86::VXORPSZ128rm, 0 }, + { X86::VXORPSZ256rr, X86::VXORPSZ256rm, 0 }, + + // AVX-512 masked foldable instructions + { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE }, + { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE }, + { X86::VPABSBZrrkz, X86::VPABSBZrmkz, 0 }, + { X86::VPABSDZrrkz, X86::VPABSDZrmkz, 0 }, + { X86::VPABSQZrrkz, X86::VPABSQZrmkz, 0 }, + { X86::VPABSWZrrkz, X86::VPABSWZrmkz, 0 }, + { X86::VPERMILPDZrikz, X86::VPERMILPDZmikz, 0 }, + { X86::VPERMILPSZrikz, X86::VPERMILPSZmikz, 0 }, + { X86::VPERMPDZrikz, X86::VPERMPDZmikz, 0 }, + { X86::VPERMQZrikz, X86::VPERMQZmikz, 0 }, + { X86::VPMOVSXBDZrrkz, X86::VPMOVSXBDZrmkz, 0 }, + { X86::VPMOVSXBQZrrkz, X86::VPMOVSXBQZrmkz, TB_NO_REVERSE }, + { X86::VPMOVSXBWZrrkz, X86::VPMOVSXBWZrmkz, 0 }, + { X86::VPMOVSXDQZrrkz, X86::VPMOVSXDQZrmkz, 0 }, + { X86::VPMOVSXWDZrrkz, X86::VPMOVSXWDZrmkz, 0 }, + { X86::VPMOVSXWQZrrkz, X86::VPMOVSXWQZrmkz, 0 }, + { X86::VPMOVZXBDZrrkz, X86::VPMOVZXBDZrmkz, 0 }, + { X86::VPMOVZXBQZrrkz, X86::VPMOVZXBQZrmkz, TB_NO_REVERSE }, + { X86::VPMOVZXBWZrrkz, X86::VPMOVZXBWZrmkz, 0 }, + { X86::VPMOVZXDQZrrkz, X86::VPMOVZXDQZrmkz, 0 }, + { X86::VPMOVZXWDZrrkz, X86::VPMOVZXWDZrmkz, 0 }, + { X86::VPMOVZXWQZrrkz, X86::VPMOVZXWQZrmkz, 0 }, + { X86::VPOPCNTDZrrkz, X86::VPOPCNTDZrmkz, 0 }, + { X86::VPOPCNTQZrrkz, X86::VPOPCNTQZrmkz, 0 }, + { X86::VPSHUFDZrikz, X86::VPSHUFDZmikz, 0 }, + { X86::VPSHUFHWZrikz, X86::VPSHUFHWZmikz, 0 }, + { X86::VPSHUFLWZrikz, X86::VPSHUFLWZmikz, 0 }, + { X86::VPSLLDZrikz, X86::VPSLLDZmikz, 0 }, + { X86::VPSLLQZrikz, X86::VPSLLQZmikz, 0 }, + { X86::VPSLLWZrikz, X86::VPSLLWZmikz, 0 }, + { X86::VPSRADZrikz, X86::VPSRADZmikz, 0 }, + { X86::VPSRAQZrikz, X86::VPSRAQZmikz, 0 }, + { X86::VPSRAWZrikz, X86::VPSRAWZmikz, 0 }, + { X86::VPSRLDZrikz, X86::VPSRLDZmikz, 0 }, + { X86::VPSRLQZrikz, X86::VPSRLQZmikz, 0 }, + { X86::VPSRLWZrikz, X86::VPSRLWZmikz, 0 }, + + // AVX-512VL 256-bit masked foldable instructions + { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE }, + { X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE }, + { X86::VPABSBZ256rrkz, X86::VPABSBZ256rmkz, 0 }, + { X86::VPABSDZ256rrkz, X86::VPABSDZ256rmkz, 0 }, + { X86::VPABSQZ256rrkz, X86::VPABSQZ256rmkz, 0 }, + { X86::VPABSWZ256rrkz, X86::VPABSWZ256rmkz, 0 }, + { X86::VPERMILPDZ256rikz, X86::VPERMILPDZ256mikz, 0 }, + { X86::VPERMILPSZ256rikz, X86::VPERMILPSZ256mikz, 0 }, + { X86::VPERMPDZ256rikz, X86::VPERMPDZ256mikz, 0 }, + { X86::VPERMQZ256rikz, X86::VPERMQZ256mikz, 0 }, + { X86::VPMOVSXBDZ256rrkz, X86::VPMOVSXBDZ256rmkz, TB_NO_REVERSE }, + { X86::VPMOVSXBQZ256rrkz, X86::VPMOVSXBQZ256rmkz, TB_NO_REVERSE }, + { X86::VPMOVSXBWZ256rrkz, X86::VPMOVSXBWZ256rmkz, 0 }, + { X86::VPMOVSXDQZ256rrkz, X86::VPMOVSXDQZ256rmkz, 0 }, + { X86::VPMOVSXWDZ256rrkz, X86::VPMOVSXWDZ256rmkz, 0 }, + { X86::VPMOVSXWQZ256rrkz, X86::VPMOVSXWQZ256rmkz, TB_NO_REVERSE }, + { X86::VPMOVZXBDZ256rrkz, X86::VPMOVZXBDZ256rmkz, TB_NO_REVERSE }, + { X86::VPMOVZXBQZ256rrkz, X86::VPMOVZXBQZ256rmkz, TB_NO_REVERSE }, + { X86::VPMOVZXBWZ256rrkz, X86::VPMOVZXBWZ256rmkz, 0 }, + { X86::VPMOVZXDQZ256rrkz, X86::VPMOVZXDQZ256rmkz, 0 }, + { X86::VPMOVZXWDZ256rrkz, X86::VPMOVZXWDZ256rmkz, 0 }, + { X86::VPMOVZXWQZ256rrkz, X86::VPMOVZXWQZ256rmkz, TB_NO_REVERSE }, + { X86::VPSHUFDZ256rikz, X86::VPSHUFDZ256mikz, 0 }, + { X86::VPSHUFHWZ256rikz, X86::VPSHUFHWZ256mikz, 0 }, + { X86::VPSHUFLWZ256rikz, X86::VPSHUFLWZ256mikz, 0 }, + { X86::VPSLLDZ256rikz, X86::VPSLLDZ256mikz, 0 }, + { X86::VPSLLQZ256rikz, X86::VPSLLQZ256mikz, 0 }, + { X86::VPSLLWZ256rikz, X86::VPSLLWZ256mikz, 0 }, + { X86::VPSRADZ256rikz, X86::VPSRADZ256mikz, 0 }, + { X86::VPSRAQZ256rikz, X86::VPSRAQZ256mikz, 0 }, + { X86::VPSRAWZ256rikz, X86::VPSRAWZ256mikz, 0 }, + { X86::VPSRLDZ256rikz, X86::VPSRLDZ256mikz, 0 }, + { X86::VPSRLQZ256rikz, X86::VPSRLQZ256mikz, 0 }, + { X86::VPSRLWZ256rikz, X86::VPSRLWZ256mikz, 0 }, + + // AVX-512VL 128-bit masked foldable instructions + { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE }, + { X86::VPABSBZ128rrkz, X86::VPABSBZ128rmkz, 0 }, + { X86::VPABSDZ128rrkz, X86::VPABSDZ128rmkz, 0 }, + { X86::VPABSQZ128rrkz, X86::VPABSQZ128rmkz, 0 }, + { X86::VPABSWZ128rrkz, X86::VPABSWZ128rmkz, 0 }, + { X86::VPERMILPDZ128rikz, X86::VPERMILPDZ128mikz, 0 }, + { X86::VPERMILPSZ128rikz, X86::VPERMILPSZ128mikz, 0 }, + { X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVSXBQZ128rrkz, X86::VPMOVSXBQZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVSXBWZ128rrkz, X86::VPMOVSXBWZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVSXDQZ128rrkz, X86::VPMOVSXDQZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVSXWDZ128rrkz, X86::VPMOVSXWDZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVSXWQZ128rrkz, X86::VPMOVSXWQZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVZXBDZ128rrkz, X86::VPMOVZXBDZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVZXBQZ128rrkz, X86::VPMOVZXBQZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVZXBWZ128rrkz, X86::VPMOVZXBWZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVZXDQZ128rrkz, X86::VPMOVZXDQZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVZXWDZ128rrkz, X86::VPMOVZXWDZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVZXWQZ128rrkz, X86::VPMOVZXWQZ128rmkz, TB_NO_REVERSE }, + { X86::VPSHUFDZ128rikz, X86::VPSHUFDZ128mikz, 0 }, + { X86::VPSHUFHWZ128rikz, X86::VPSHUFHWZ128mikz, 0 }, + { X86::VPSHUFLWZ128rikz, X86::VPSHUFLWZ128mikz, 0 }, + { X86::VPSLLDZ128rikz, X86::VPSLLDZ128mikz, 0 }, + { X86::VPSLLQZ128rikz, X86::VPSLLQZ128mikz, 0 }, + { X86::VPSLLWZ128rikz, X86::VPSLLWZ128mikz, 0 }, + { X86::VPSRADZ128rikz, X86::VPSRADZ128mikz, 0 }, + { X86::VPSRAQZ128rikz, X86::VPSRAQZ128mikz, 0 }, + { X86::VPSRAWZ128rikz, X86::VPSRAWZ128mikz, 0 }, + { X86::VPSRLDZ128rikz, X86::VPSRLDZ128mikz, 0 }, + { X86::VPSRLQZ128rikz, X86::VPSRLQZ128mikz, 0 }, + { X86::VPSRLWZ128rikz, X86::VPSRLWZ128mikz, 0 }, + + // AES foldable instructions + { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 }, + { X86::AESDECrr, X86::AESDECrm, TB_ALIGN_16 }, + { X86::AESENCLASTrr, X86::AESENCLASTrm, TB_ALIGN_16 }, + { X86::AESENCrr, X86::AESENCrm, TB_ALIGN_16 }, + { X86::VAESDECLASTrr, X86::VAESDECLASTrm, 0 }, + { X86::VAESDECrr, X86::VAESDECrm, 0 }, + { X86::VAESENCLASTrr, X86::VAESENCLASTrm, 0 }, + { X86::VAESENCrr, X86::VAESENCrm, 0 }, + + // SHA foldable instructions + { X86::SHA1MSG1rr, X86::SHA1MSG1rm, TB_ALIGN_16 }, + { X86::SHA1MSG2rr, X86::SHA1MSG2rm, TB_ALIGN_16 }, + { X86::SHA1NEXTErr, X86::SHA1NEXTErm, TB_ALIGN_16 }, + { X86::SHA1RNDS4rri, X86::SHA1RNDS4rmi, TB_ALIGN_16 }, + { X86::SHA256MSG1rr, X86::SHA256MSG1rm, TB_ALIGN_16 }, + { X86::SHA256MSG2rr, X86::SHA256MSG2rm, TB_ALIGN_16 }, + { X86::SHA256RNDS2rr, X86::SHA256RNDS2rm, TB_ALIGN_16 } + }; + for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2) { AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable, Entry.RegOp, Entry.MemOp, @@ -150,12 +2439,1105 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) Entry.Flags | TB_INDEX_2 | TB_FOLDED_LOAD); } + static const X86MemoryFoldTableEntry MemoryFoldTable3[] = { + // FMA4 foldable patterns + { X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_NONE }, + { X86::VFMADDSS4rr_Int, X86::VFMADDSS4rm_Int, TB_NO_REVERSE }, + { X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_NONE }, + { X86::VFMADDSD4rr_Int, X86::VFMADDSD4rm_Int, TB_NO_REVERSE }, + { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_NONE }, + { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_NONE }, + { X86::VFMADDPS4Yrr, X86::VFMADDPS4Yrm, TB_ALIGN_NONE }, + { X86::VFMADDPD4Yrr, X86::VFMADDPD4Yrm, TB_ALIGN_NONE }, + { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, TB_ALIGN_NONE }, + { X86::VFNMADDSS4rr_Int, X86::VFNMADDSS4rm_Int, TB_NO_REVERSE }, + { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, TB_ALIGN_NONE }, + { X86::VFNMADDSD4rr_Int, X86::VFNMADDSD4rm_Int, TB_NO_REVERSE }, + { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_NONE }, + { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_NONE }, + { X86::VFNMADDPS4Yrr, X86::VFNMADDPS4Yrm, TB_ALIGN_NONE }, + { X86::VFNMADDPD4Yrr, X86::VFNMADDPD4Yrm, TB_ALIGN_NONE }, + { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, TB_ALIGN_NONE }, + { X86::VFMSUBSS4rr_Int, X86::VFMSUBSS4rm_Int, TB_NO_REVERSE }, + { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, TB_ALIGN_NONE }, + { X86::VFMSUBSD4rr_Int, X86::VFMSUBSD4rm_Int, TB_NO_REVERSE }, + { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_NONE }, + { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_NONE }, + { X86::VFMSUBPS4Yrr, X86::VFMSUBPS4Yrm, TB_ALIGN_NONE }, + { X86::VFMSUBPD4Yrr, X86::VFMSUBPD4Yrm, TB_ALIGN_NONE }, + { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, TB_ALIGN_NONE }, + { X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4rm_Int, TB_NO_REVERSE }, + { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, TB_ALIGN_NONE }, + { X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4rm_Int, TB_NO_REVERSE }, + { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_NONE }, + { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_NONE }, + { X86::VFNMSUBPS4Yrr, X86::VFNMSUBPS4Yrm, TB_ALIGN_NONE }, + { X86::VFNMSUBPD4Yrr, X86::VFNMSUBPD4Yrm, TB_ALIGN_NONE }, + { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_NONE }, + { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_NONE }, + { X86::VFMADDSUBPS4Yrr, X86::VFMADDSUBPS4Yrm, TB_ALIGN_NONE }, + { X86::VFMADDSUBPD4Yrr, X86::VFMADDSUBPD4Yrm, TB_ALIGN_NONE }, + { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_NONE }, + { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_NONE }, + { X86::VFMSUBADDPS4Yrr, X86::VFMSUBADDPS4Yrm, TB_ALIGN_NONE }, + { X86::VFMSUBADDPD4Yrr, X86::VFMSUBADDPD4Yrm, TB_ALIGN_NONE }, + + // XOP foldable instructions + { X86::VPCMOVrrr, X86::VPCMOVrrm, 0 }, + { X86::VPCMOVYrrr, X86::VPCMOVYrrm, 0 }, + { X86::VPERMIL2PDrr, X86::VPERMIL2PDrm, 0 }, + { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYrm, 0 }, + { X86::VPERMIL2PSrr, X86::VPERMIL2PSrm, 0 }, + { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYrm, 0 }, + { X86::VPPERMrrr, X86::VPPERMrrm, 0 }, + + // AVX-512 instructions with 3 source operands. + { X86::VPERMI2Brr, X86::VPERMI2Brm, 0 }, + { X86::VPERMI2Drr, X86::VPERMI2Drm, 0 }, + { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 }, + { X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 }, + { X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 }, + { X86::VPERMI2Wrr, X86::VPERMI2Wrm, 0 }, + { X86::VPERMT2Brr, X86::VPERMT2Brm, 0 }, + { X86::VPERMT2Drr, X86::VPERMT2Drm, 0 }, + { X86::VPERMT2PSrr, X86::VPERMT2PSrm, 0 }, + { X86::VPERMT2PDrr, X86::VPERMT2PDrm, 0 }, + { X86::VPERMT2Qrr, X86::VPERMT2Qrm, 0 }, + { X86::VPERMT2Wrr, X86::VPERMT2Wrm, 0 }, + { X86::VPTERNLOGDZrri, X86::VPTERNLOGDZrmi, 0 }, + { X86::VPTERNLOGQZrri, X86::VPTERNLOGQZrmi, 0 }, + + // AVX-512VL 256-bit instructions with 3 source operands. + { X86::VPERMI2B256rr, X86::VPERMI2B256rm, 0 }, + { X86::VPERMI2D256rr, X86::VPERMI2D256rm, 0 }, + { X86::VPERMI2PD256rr, X86::VPERMI2PD256rm, 0 }, + { X86::VPERMI2PS256rr, X86::VPERMI2PS256rm, 0 }, + { X86::VPERMI2Q256rr, X86::VPERMI2Q256rm, 0 }, + { X86::VPERMI2W256rr, X86::VPERMI2W256rm, 0 }, + { X86::VPERMT2B256rr, X86::VPERMT2B256rm, 0 }, + { X86::VPERMT2D256rr, X86::VPERMT2D256rm, 0 }, + { X86::VPERMT2PD256rr, X86::VPERMT2PD256rm, 0 }, + { X86::VPERMT2PS256rr, X86::VPERMT2PS256rm, 0 }, + { X86::VPERMT2Q256rr, X86::VPERMT2Q256rm, 0 }, + { X86::VPERMT2W256rr, X86::VPERMT2W256rm, 0 }, + { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGDZ256rmi, 0 }, + { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGQZ256rmi, 0 }, + + // AVX-512VL 128-bit instructions with 3 source operands. + { X86::VPERMI2B128rr, X86::VPERMI2B128rm, 0 }, + { X86::VPERMI2D128rr, X86::VPERMI2D128rm, 0 }, + { X86::VPERMI2PD128rr, X86::VPERMI2PD128rm, 0 }, + { X86::VPERMI2PS128rr, X86::VPERMI2PS128rm, 0 }, + { X86::VPERMI2Q128rr, X86::VPERMI2Q128rm, 0 }, + { X86::VPERMI2W128rr, X86::VPERMI2W128rm, 0 }, + { X86::VPERMT2B128rr, X86::VPERMT2B128rm, 0 }, + { X86::VPERMT2D128rr, X86::VPERMT2D128rm, 0 }, + { X86::VPERMT2PD128rr, X86::VPERMT2PD128rm, 0 }, + { X86::VPERMT2PS128rr, X86::VPERMT2PS128rm, 0 }, + { X86::VPERMT2Q128rr, X86::VPERMT2Q128rm, 0 }, + { X86::VPERMT2W128rr, X86::VPERMT2W128rm, 0 }, + { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGDZ128rmi, 0 }, + { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGQZ128rmi, 0 }, + + // AVX-512 masked instructions + { X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 }, + { X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 }, + { X86::VADDSDZrr_Intkz, X86::VADDSDZrm_Intkz, TB_NO_REVERSE }, + { X86::VADDSSZrr_Intkz, X86::VADDSSZrm_Intkz, TB_NO_REVERSE }, + { X86::VALIGNDZrrikz, X86::VALIGNDZrmikz, 0 }, + { X86::VALIGNQZrrikz, X86::VALIGNQZrmikz, 0 }, + { X86::VANDNPDZrrkz, X86::VANDNPDZrmkz, 0 }, + { X86::VANDNPSZrrkz, X86::VANDNPSZrmkz, 0 }, + { X86::VANDPDZrrkz, X86::VANDPDZrmkz, 0 }, + { X86::VANDPSZrrkz, X86::VANDPSZrmkz, 0 }, + { X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 }, + { X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 }, + { X86::VDIVSDZrr_Intkz, X86::VDIVSDZrm_Intkz, TB_NO_REVERSE }, + { X86::VDIVSSZrr_Intkz, X86::VDIVSSZrm_Intkz, TB_NO_REVERSE }, + { X86::VINSERTF32x4Zrrkz, X86::VINSERTF32x4Zrmkz, 0 }, + { X86::VINSERTF32x8Zrrkz, X86::VINSERTF32x8Zrmkz, 0 }, + { X86::VINSERTF64x2Zrrkz, X86::VINSERTF64x2Zrmkz, 0 }, + { X86::VINSERTF64x4Zrrkz, X86::VINSERTF64x4Zrmkz, 0 }, + { X86::VINSERTI32x4Zrrkz, X86::VINSERTI32x4Zrmkz, 0 }, + { X86::VINSERTI32x8Zrrkz, X86::VINSERTI32x8Zrmkz, 0 }, + { X86::VINSERTI64x2Zrrkz, X86::VINSERTI64x2Zrmkz, 0 }, + { X86::VINSERTI64x4Zrrkz, X86::VINSERTI64x4Zrmkz, 0 }, + { X86::VMAXCPDZrrkz, X86::VMAXCPDZrmkz, 0 }, + { X86::VMAXCPSZrrkz, X86::VMAXCPSZrmkz, 0 }, + { X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 }, + { X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0 }, + { X86::VMAXSDZrr_Intkz, X86::VMAXSDZrm_Intkz, 0 }, + { X86::VMAXSSZrr_Intkz, X86::VMAXSSZrm_Intkz, 0 }, + { X86::VMINCPDZrrkz, X86::VMINCPDZrmkz, 0 }, + { X86::VMINCPSZrrkz, X86::VMINCPSZrmkz, 0 }, + { X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0 }, + { X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 }, + { X86::VMINSDZrr_Intkz, X86::VMINSDZrm_Intkz, 0 }, + { X86::VMINSSZrr_Intkz, X86::VMINSSZrm_Intkz, 0 }, + { X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 }, + { X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 }, + { X86::VMULSDZrr_Intkz, X86::VMULSDZrm_Intkz, TB_NO_REVERSE }, + { X86::VMULSSZrr_Intkz, X86::VMULSSZrm_Intkz, TB_NO_REVERSE }, + { X86::VORPDZrrkz, X86::VORPDZrmkz, 0 }, + { X86::VORPSZrrkz, X86::VORPSZrmkz, 0 }, + { X86::VPACKSSDWZrrkz, X86::VPACKSSDWZrmkz, 0 }, + { X86::VPACKSSWBZrrkz, X86::VPACKSSWBZrmkz, 0 }, + { X86::VPACKUSDWZrrkz, X86::VPACKUSDWZrmkz, 0 }, + { X86::VPACKUSWBZrrkz, X86::VPACKUSWBZrmkz, 0 }, + { X86::VPADDBZrrkz, X86::VPADDBZrmkz, 0 }, + { X86::VPADDDZrrkz, X86::VPADDDZrmkz, 0 }, + { X86::VPADDQZrrkz, X86::VPADDQZrmkz, 0 }, + { X86::VPADDSBZrrkz, X86::VPADDSBZrmkz, 0 }, + { X86::VPADDSWZrrkz, X86::VPADDSWZrmkz, 0 }, + { X86::VPADDUSBZrrkz, X86::VPADDUSBZrmkz, 0 }, + { X86::VPADDUSWZrrkz, X86::VPADDUSWZrmkz, 0 }, + { X86::VPADDWZrrkz, X86::VPADDWZrmkz, 0 }, + { X86::VPALIGNRZrrikz, X86::VPALIGNRZrmikz, 0 }, + { X86::VPANDDZrrkz, X86::VPANDDZrmkz, 0 }, + { X86::VPANDNDZrrkz, X86::VPANDNDZrmkz, 0 }, + { X86::VPANDNQZrrkz, X86::VPANDNQZrmkz, 0 }, + { X86::VPANDQZrrkz, X86::VPANDQZrmkz, 0 }, + { X86::VPAVGBZrrkz, X86::VPAVGBZrmkz, 0 }, + { X86::VPAVGWZrrkz, X86::VPAVGWZrmkz, 0 }, + { X86::VPERMBZrrkz, X86::VPERMBZrmkz, 0 }, + { X86::VPERMDZrrkz, X86::VPERMDZrmkz, 0 }, + { X86::VPERMILPDZrrkz, X86::VPERMILPDZrmkz, 0 }, + { X86::VPERMILPSZrrkz, X86::VPERMILPSZrmkz, 0 }, + { X86::VPERMPDZrrkz, X86::VPERMPDZrmkz, 0 }, + { X86::VPERMPSZrrkz, X86::VPERMPSZrmkz, 0 }, + { X86::VPERMQZrrkz, X86::VPERMQZrmkz, 0 }, + { X86::VPERMWZrrkz, X86::VPERMWZrmkz, 0 }, + { X86::VPMADDUBSWZrrkz, X86::VPMADDUBSWZrmkz, 0 }, + { X86::VPMADDWDZrrkz, X86::VPMADDWDZrmkz, 0 }, + { X86::VPMAXSBZrrkz, X86::VPMAXSBZrmkz, 0 }, + { X86::VPMAXSDZrrkz, X86::VPMAXSDZrmkz, 0 }, + { X86::VPMAXSQZrrkz, X86::VPMAXSQZrmkz, 0 }, + { X86::VPMAXSWZrrkz, X86::VPMAXSWZrmkz, 0 }, + { X86::VPMAXUBZrrkz, X86::VPMAXUBZrmkz, 0 }, + { X86::VPMAXUDZrrkz, X86::VPMAXUDZrmkz, 0 }, + { X86::VPMAXUQZrrkz, X86::VPMAXUQZrmkz, 0 }, + { X86::VPMAXUWZrrkz, X86::VPMAXUWZrmkz, 0 }, + { X86::VPMINSBZrrkz, X86::VPMINSBZrmkz, 0 }, + { X86::VPMINSDZrrkz, X86::VPMINSDZrmkz, 0 }, + { X86::VPMINSQZrrkz, X86::VPMINSQZrmkz, 0 }, + { X86::VPMINSWZrrkz, X86::VPMINSWZrmkz, 0 }, + { X86::VPMINUBZrrkz, X86::VPMINUBZrmkz, 0 }, + { X86::VPMINUDZrrkz, X86::VPMINUDZrmkz, 0 }, + { X86::VPMINUQZrrkz, X86::VPMINUQZrmkz, 0 }, + { X86::VPMINUWZrrkz, X86::VPMINUWZrmkz, 0 }, + { X86::VPMULLDZrrkz, X86::VPMULLDZrmkz, 0 }, + { X86::VPMULLQZrrkz, X86::VPMULLQZrmkz, 0 }, + { X86::VPMULLWZrrkz, X86::VPMULLWZrmkz, 0 }, + { X86::VPMULDQZrrkz, X86::VPMULDQZrmkz, 0 }, + { X86::VPMULUDQZrrkz, X86::VPMULUDQZrmkz, 0 }, + { X86::VPORDZrrkz, X86::VPORDZrmkz, 0 }, + { X86::VPORQZrrkz, X86::VPORQZrmkz, 0 }, + { X86::VPSHUFBZrrkz, X86::VPSHUFBZrmkz, 0 }, + { X86::VPSLLDZrrkz, X86::VPSLLDZrmkz, 0 }, + { X86::VPSLLQZrrkz, X86::VPSLLQZrmkz, 0 }, + { X86::VPSLLVDZrrkz, X86::VPSLLVDZrmkz, 0 }, + { X86::VPSLLVQZrrkz, X86::VPSLLVQZrmkz, 0 }, + { X86::VPSLLVWZrrkz, X86::VPSLLVWZrmkz, 0 }, + { X86::VPSLLWZrrkz, X86::VPSLLWZrmkz, 0 }, + { X86::VPSRADZrrkz, X86::VPSRADZrmkz, 0 }, + { X86::VPSRAQZrrkz, X86::VPSRAQZrmkz, 0 }, + { X86::VPSRAVDZrrkz, X86::VPSRAVDZrmkz, 0 }, + { X86::VPSRAVQZrrkz, X86::VPSRAVQZrmkz, 0 }, + { X86::VPSRAVWZrrkz, X86::VPSRAVWZrmkz, 0 }, + { X86::VPSRAWZrrkz, X86::VPSRAWZrmkz, 0 }, + { X86::VPSRLDZrrkz, X86::VPSRLDZrmkz, 0 }, + { X86::VPSRLQZrrkz, X86::VPSRLQZrmkz, 0 }, + { X86::VPSRLVDZrrkz, X86::VPSRLVDZrmkz, 0 }, + { X86::VPSRLVQZrrkz, X86::VPSRLVQZrmkz, 0 }, + { X86::VPSRLVWZrrkz, X86::VPSRLVWZrmkz, 0 }, + { X86::VPSRLWZrrkz, X86::VPSRLWZrmkz, 0 }, + { X86::VPSUBBZrrkz, X86::VPSUBBZrmkz, 0 }, + { X86::VPSUBDZrrkz, X86::VPSUBDZrmkz, 0 }, + { X86::VPSUBQZrrkz, X86::VPSUBQZrmkz, 0 }, + { X86::VPSUBSBZrrkz, X86::VPSUBSBZrmkz, 0 }, + { X86::VPSUBSWZrrkz, X86::VPSUBSWZrmkz, 0 }, + { X86::VPSUBUSBZrrkz, X86::VPSUBUSBZrmkz, 0 }, + { X86::VPSUBUSWZrrkz, X86::VPSUBUSWZrmkz, 0 }, + { X86::VPSUBWZrrkz, X86::VPSUBWZrmkz, 0 }, + { X86::VPUNPCKHBWZrrkz, X86::VPUNPCKHBWZrmkz, 0 }, + { X86::VPUNPCKHDQZrrkz, X86::VPUNPCKHDQZrmkz, 0 }, + { X86::VPUNPCKHQDQZrrkz, X86::VPUNPCKHQDQZrmkz, 0 }, + { X86::VPUNPCKHWDZrrkz, X86::VPUNPCKHWDZrmkz, 0 }, + { X86::VPUNPCKLBWZrrkz, X86::VPUNPCKLBWZrmkz, 0 }, + { X86::VPUNPCKLDQZrrkz, X86::VPUNPCKLDQZrmkz, 0 }, + { X86::VPUNPCKLQDQZrrkz, X86::VPUNPCKLQDQZrmkz, 0 }, + { X86::VPUNPCKLWDZrrkz, X86::VPUNPCKLWDZrmkz, 0 }, + { X86::VPXORDZrrkz, X86::VPXORDZrmkz, 0 }, + { X86::VPXORQZrrkz, X86::VPXORQZrmkz, 0 }, + { X86::VSHUFPDZrrikz, X86::VSHUFPDZrmikz, 0 }, + { X86::VSHUFPSZrrikz, X86::VSHUFPSZrmikz, 0 }, + { X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 }, + { X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 }, + { X86::VSUBSDZrr_Intkz, X86::VSUBSDZrm_Intkz, TB_NO_REVERSE }, + { X86::VSUBSSZrr_Intkz, X86::VSUBSSZrm_Intkz, TB_NO_REVERSE }, + { X86::VUNPCKHPDZrrkz, X86::VUNPCKHPDZrmkz, 0 }, + { X86::VUNPCKHPSZrrkz, X86::VUNPCKHPSZrmkz, 0 }, + { X86::VUNPCKLPDZrrkz, X86::VUNPCKLPDZrmkz, 0 }, + { X86::VUNPCKLPSZrrkz, X86::VUNPCKLPSZrmkz, 0 }, + { X86::VXORPDZrrkz, X86::VXORPDZrmkz, 0 }, + { X86::VXORPSZrrkz, X86::VXORPSZrmkz, 0 }, + + // AVX-512{F,VL} masked arithmetic instructions 256-bit + { X86::VADDPDZ256rrkz, X86::VADDPDZ256rmkz, 0 }, + { X86::VADDPSZ256rrkz, X86::VADDPSZ256rmkz, 0 }, + { X86::VALIGNDZ256rrikz, X86::VALIGNDZ256rmikz, 0 }, + { X86::VALIGNQZ256rrikz, X86::VALIGNQZ256rmikz, 0 }, + { X86::VANDNPDZ256rrkz, X86::VANDNPDZ256rmkz, 0 }, + { X86::VANDNPSZ256rrkz, X86::VANDNPSZ256rmkz, 0 }, + { X86::VANDPDZ256rrkz, X86::VANDPDZ256rmkz, 0 }, + { X86::VANDPSZ256rrkz, X86::VANDPSZ256rmkz, 0 }, + { X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0 }, + { X86::VDIVPSZ256rrkz, X86::VDIVPSZ256rmkz, 0 }, + { X86::VINSERTF32x4Z256rrkz, X86::VINSERTF32x4Z256rmkz, 0 }, + { X86::VINSERTF64x2Z256rrkz, X86::VINSERTF64x2Z256rmkz, 0 }, + { X86::VINSERTI32x4Z256rrkz, X86::VINSERTI32x4Z256rmkz, 0 }, + { X86::VINSERTI64x2Z256rrkz, X86::VINSERTI64x2Z256rmkz, 0 }, + { X86::VMAXCPDZ256rrkz, X86::VMAXCPDZ256rmkz, 0 }, + { X86::VMAXCPSZ256rrkz, X86::VMAXCPSZ256rmkz, 0 }, + { X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmkz, 0 }, + { X86::VMAXPSZ256rrkz, X86::VMAXPSZ256rmkz, 0 }, + { X86::VMINCPDZ256rrkz, X86::VMINCPDZ256rmkz, 0 }, + { X86::VMINCPSZ256rrkz, X86::VMINCPSZ256rmkz, 0 }, + { X86::VMINPDZ256rrkz, X86::VMINPDZ256rmkz, 0 }, + { X86::VMINPSZ256rrkz, X86::VMINPSZ256rmkz, 0 }, + { X86::VMULPDZ256rrkz, X86::VMULPDZ256rmkz, 0 }, + { X86::VMULPSZ256rrkz, X86::VMULPSZ256rmkz, 0 }, + { X86::VORPDZ256rrkz, X86::VORPDZ256rmkz, 0 }, + { X86::VORPSZ256rrkz, X86::VORPSZ256rmkz, 0 }, + { X86::VPACKSSDWZ256rrkz, X86::VPACKSSDWZ256rmkz, 0 }, + { X86::VPACKSSWBZ256rrkz, X86::VPACKSSWBZ256rmkz, 0 }, + { X86::VPACKUSDWZ256rrkz, X86::VPACKUSDWZ256rmkz, 0 }, + { X86::VPACKUSWBZ256rrkz, X86::VPACKUSWBZ256rmkz, 0 }, + { X86::VPADDBZ256rrkz, X86::VPADDBZ256rmkz, 0 }, + { X86::VPADDDZ256rrkz, X86::VPADDDZ256rmkz, 0 }, + { X86::VPADDQZ256rrkz, X86::VPADDQZ256rmkz, 0 }, + { X86::VPADDSBZ256rrkz, X86::VPADDSBZ256rmkz, 0 }, + { X86::VPADDSWZ256rrkz, X86::VPADDSWZ256rmkz, 0 }, + { X86::VPADDUSBZ256rrkz, X86::VPADDUSBZ256rmkz, 0 }, + { X86::VPADDUSWZ256rrkz, X86::VPADDUSWZ256rmkz, 0 }, + { X86::VPADDWZ256rrkz, X86::VPADDWZ256rmkz, 0 }, + { X86::VPALIGNRZ256rrikz, X86::VPALIGNRZ256rmikz, 0 }, + { X86::VPANDDZ256rrkz, X86::VPANDDZ256rmkz, 0 }, + { X86::VPANDNDZ256rrkz, X86::VPANDNDZ256rmkz, 0 }, + { X86::VPANDNQZ256rrkz, X86::VPANDNQZ256rmkz, 0 }, + { X86::VPANDQZ256rrkz, X86::VPANDQZ256rmkz, 0 }, + { X86::VPAVGBZ256rrkz, X86::VPAVGBZ256rmkz, 0 }, + { X86::VPAVGWZ256rrkz, X86::VPAVGWZ256rmkz, 0 }, + { X86::VPERMBZ256rrkz, X86::VPERMBZ256rmkz, 0 }, + { X86::VPERMDZ256rrkz, X86::VPERMDZ256rmkz, 0 }, + { X86::VPERMILPDZ256rrkz, X86::VPERMILPDZ256rmkz, 0 }, + { X86::VPERMILPSZ256rrkz, X86::VPERMILPSZ256rmkz, 0 }, + { X86::VPERMPDZ256rrkz, X86::VPERMPDZ256rmkz, 0 }, + { X86::VPERMPSZ256rrkz, X86::VPERMPSZ256rmkz, 0 }, + { X86::VPERMQZ256rrkz, X86::VPERMQZ256rmkz, 0 }, + { X86::VPERMWZ256rrkz, X86::VPERMWZ256rmkz, 0 }, + { X86::VPMADDUBSWZ256rrkz, X86::VPMADDUBSWZ256rmkz, 0 }, + { X86::VPMADDWDZ256rrkz, X86::VPMADDWDZ256rmkz, 0 }, + { X86::VPMAXSBZ256rrkz, X86::VPMAXSBZ256rmkz, 0 }, + { X86::VPMAXSDZ256rrkz, X86::VPMAXSDZ256rmkz, 0 }, + { X86::VPMAXSQZ256rrkz, X86::VPMAXSQZ256rmkz, 0 }, + { X86::VPMAXSWZ256rrkz, X86::VPMAXSWZ256rmkz, 0 }, + { X86::VPMAXUBZ256rrkz, X86::VPMAXUBZ256rmkz, 0 }, + { X86::VPMAXUDZ256rrkz, X86::VPMAXUDZ256rmkz, 0 }, + { X86::VPMAXUQZ256rrkz, X86::VPMAXUQZ256rmkz, 0 }, + { X86::VPMAXUWZ256rrkz, X86::VPMAXUWZ256rmkz, 0 }, + { X86::VPMINSBZ256rrkz, X86::VPMINSBZ256rmkz, 0 }, + { X86::VPMINSDZ256rrkz, X86::VPMINSDZ256rmkz, 0 }, + { X86::VPMINSQZ256rrkz, X86::VPMINSQZ256rmkz, 0 }, + { X86::VPMINSWZ256rrkz, X86::VPMINSWZ256rmkz, 0 }, + { X86::VPMINUBZ256rrkz, X86::VPMINUBZ256rmkz, 0 }, + { X86::VPMINUDZ256rrkz, X86::VPMINUDZ256rmkz, 0 }, + { X86::VPMINUQZ256rrkz, X86::VPMINUQZ256rmkz, 0 }, + { X86::VPMINUWZ256rrkz, X86::VPMINUWZ256rmkz, 0 }, + { X86::VPMULDQZ256rrkz, X86::VPMULDQZ256rmkz, 0 }, + { X86::VPMULLDZ256rrkz, X86::VPMULLDZ256rmkz, 0 }, + { X86::VPMULLQZ256rrkz, X86::VPMULLQZ256rmkz, 0 }, + { X86::VPMULLWZ256rrkz, X86::VPMULLWZ256rmkz, 0 }, + { X86::VPMULUDQZ256rrkz, X86::VPMULUDQZ256rmkz, 0 }, + { X86::VPORDZ256rrkz, X86::VPORDZ256rmkz, 0 }, + { X86::VPORQZ256rrkz, X86::VPORQZ256rmkz, 0 }, + { X86::VPSHUFBZ256rrkz, X86::VPSHUFBZ256rmkz, 0 }, + { X86::VPSLLDZ256rrkz, X86::VPSLLDZ256rmkz, 0 }, + { X86::VPSLLQZ256rrkz, X86::VPSLLQZ256rmkz, 0 }, + { X86::VPSLLVDZ256rrkz, X86::VPSLLVDZ256rmkz, 0 }, + { X86::VPSLLVQZ256rrkz, X86::VPSLLVQZ256rmkz, 0 }, + { X86::VPSLLVWZ256rrkz, X86::VPSLLVWZ256rmkz, 0 }, + { X86::VPSLLWZ256rrkz, X86::VPSLLWZ256rmkz, 0 }, + { X86::VPSRADZ256rrkz, X86::VPSRADZ256rmkz, 0 }, + { X86::VPSRAQZ256rrkz, X86::VPSRAQZ256rmkz, 0 }, + { X86::VPSRAVDZ256rrkz, X86::VPSRAVDZ256rmkz, 0 }, + { X86::VPSRAVQZ256rrkz, X86::VPSRAVQZ256rmkz, 0 }, + { X86::VPSRAVWZ256rrkz, X86::VPSRAVWZ256rmkz, 0 }, + { X86::VPSRAWZ256rrkz, X86::VPSRAWZ256rmkz, 0 }, + { X86::VPSRLDZ256rrkz, X86::VPSRLDZ256rmkz, 0 }, + { X86::VPSRLQZ256rrkz, X86::VPSRLQZ256rmkz, 0 }, + { X86::VPSRLVDZ256rrkz, X86::VPSRLVDZ256rmkz, 0 }, + { X86::VPSRLVQZ256rrkz, X86::VPSRLVQZ256rmkz, 0 }, + { X86::VPSRLVWZ256rrkz, X86::VPSRLVWZ256rmkz, 0 }, + { X86::VPSRLWZ256rrkz, X86::VPSRLWZ256rmkz, 0 }, + { X86::VPSUBBZ256rrkz, X86::VPSUBBZ256rmkz, 0 }, + { X86::VPSUBDZ256rrkz, X86::VPSUBDZ256rmkz, 0 }, + { X86::VPSUBQZ256rrkz, X86::VPSUBQZ256rmkz, 0 }, + { X86::VPSUBSBZ256rrkz, X86::VPSUBSBZ256rmkz, 0 }, + { X86::VPSUBSWZ256rrkz, X86::VPSUBSWZ256rmkz, 0 }, + { X86::VPSUBUSBZ256rrkz, X86::VPSUBUSBZ256rmkz, 0 }, + { X86::VPSUBUSWZ256rrkz, X86::VPSUBUSWZ256rmkz, 0 }, + { X86::VPSUBWZ256rrkz, X86::VPSUBWZ256rmkz, 0 }, + { X86::VPUNPCKHBWZ256rrkz, X86::VPUNPCKHBWZ256rmkz, 0 }, + { X86::VPUNPCKHDQZ256rrkz, X86::VPUNPCKHDQZ256rmkz, 0 }, + { X86::VPUNPCKHQDQZ256rrkz, X86::VPUNPCKHQDQZ256rmkz, 0 }, + { X86::VPUNPCKHWDZ256rrkz, X86::VPUNPCKHWDZ256rmkz, 0 }, + { X86::VPUNPCKLBWZ256rrkz, X86::VPUNPCKLBWZ256rmkz, 0 }, + { X86::VPUNPCKLDQZ256rrkz, X86::VPUNPCKLDQZ256rmkz, 0 }, + { X86::VPUNPCKLQDQZ256rrkz, X86::VPUNPCKLQDQZ256rmkz, 0 }, + { X86::VPUNPCKLWDZ256rrkz, X86::VPUNPCKLWDZ256rmkz, 0 }, + { X86::VPXORDZ256rrkz, X86::VPXORDZ256rmkz, 0 }, + { X86::VPXORQZ256rrkz, X86::VPXORQZ256rmkz, 0 }, + { X86::VSHUFPDZ256rrikz, X86::VSHUFPDZ256rmikz, 0 }, + { X86::VSHUFPSZ256rrikz, X86::VSHUFPSZ256rmikz, 0 }, + { X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0 }, + { X86::VSUBPSZ256rrkz, X86::VSUBPSZ256rmkz, 0 }, + { X86::VUNPCKHPDZ256rrkz, X86::VUNPCKHPDZ256rmkz, 0 }, + { X86::VUNPCKHPSZ256rrkz, X86::VUNPCKHPSZ256rmkz, 0 }, + { X86::VUNPCKLPDZ256rrkz, X86::VUNPCKLPDZ256rmkz, 0 }, + { X86::VUNPCKLPSZ256rrkz, X86::VUNPCKLPSZ256rmkz, 0 }, + { X86::VXORPDZ256rrkz, X86::VXORPDZ256rmkz, 0 }, + { X86::VXORPSZ256rrkz, X86::VXORPSZ256rmkz, 0 }, + + // AVX-512{F,VL} masked arithmetic instructions 128-bit + { X86::VADDPDZ128rrkz, X86::VADDPDZ128rmkz, 0 }, + { X86::VADDPSZ128rrkz, X86::VADDPSZ128rmkz, 0 }, + { X86::VALIGNDZ128rrikz, X86::VALIGNDZ128rmikz, 0 }, + { X86::VALIGNQZ128rrikz, X86::VALIGNQZ128rmikz, 0 }, + { X86::VANDNPDZ128rrkz, X86::VANDNPDZ128rmkz, 0 }, + { X86::VANDNPSZ128rrkz, X86::VANDNPSZ128rmkz, 0 }, + { X86::VANDPDZ128rrkz, X86::VANDPDZ128rmkz, 0 }, + { X86::VANDPSZ128rrkz, X86::VANDPSZ128rmkz, 0 }, + { X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmkz, 0 }, + { X86::VDIVPSZ128rrkz, X86::VDIVPSZ128rmkz, 0 }, + { X86::VMAXCPDZ128rrkz, X86::VMAXCPDZ128rmkz, 0 }, + { X86::VMAXCPSZ128rrkz, X86::VMAXCPSZ128rmkz, 0 }, + { X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0 }, + { X86::VMAXPSZ128rrkz, X86::VMAXPSZ128rmkz, 0 }, + { X86::VMINCPDZ128rrkz, X86::VMINCPDZ128rmkz, 0 }, + { X86::VMINCPSZ128rrkz, X86::VMINCPSZ128rmkz, 0 }, + { X86::VMINPDZ128rrkz, X86::VMINPDZ128rmkz, 0 }, + { X86::VMINPSZ128rrkz, X86::VMINPSZ128rmkz, 0 }, + { X86::VMULPDZ128rrkz, X86::VMULPDZ128rmkz, 0 }, + { X86::VMULPSZ128rrkz, X86::VMULPSZ128rmkz, 0 }, + { X86::VORPDZ128rrkz, X86::VORPDZ128rmkz, 0 }, + { X86::VORPSZ128rrkz, X86::VORPSZ128rmkz, 0 }, + { X86::VPACKSSDWZ128rrkz, X86::VPACKSSDWZ128rmkz, 0 }, + { X86::VPACKSSWBZ128rrkz, X86::VPACKSSWBZ128rmkz, 0 }, + { X86::VPACKUSDWZ128rrkz, X86::VPACKUSDWZ128rmkz, 0 }, + { X86::VPACKUSWBZ128rrkz, X86::VPACKUSWBZ128rmkz, 0 }, + { X86::VPADDBZ128rrkz, X86::VPADDBZ128rmkz, 0 }, + { X86::VPADDDZ128rrkz, X86::VPADDDZ128rmkz, 0 }, + { X86::VPADDQZ128rrkz, X86::VPADDQZ128rmkz, 0 }, + { X86::VPADDSBZ128rrkz, X86::VPADDSBZ128rmkz, 0 }, + { X86::VPADDSWZ128rrkz, X86::VPADDSWZ128rmkz, 0 }, + { X86::VPADDUSBZ128rrkz, X86::VPADDUSBZ128rmkz, 0 }, + { X86::VPADDUSWZ128rrkz, X86::VPADDUSWZ128rmkz, 0 }, + { X86::VPADDWZ128rrkz, X86::VPADDWZ128rmkz, 0 }, + { X86::VPALIGNRZ128rrikz, X86::VPALIGNRZ128rmikz, 0 }, + { X86::VPANDDZ128rrkz, X86::VPANDDZ128rmkz, 0 }, + { X86::VPANDNDZ128rrkz, X86::VPANDNDZ128rmkz, 0 }, + { X86::VPANDNQZ128rrkz, X86::VPANDNQZ128rmkz, 0 }, + { X86::VPANDQZ128rrkz, X86::VPANDQZ128rmkz, 0 }, + { X86::VPAVGBZ128rrkz, X86::VPAVGBZ128rmkz, 0 }, + { X86::VPAVGWZ128rrkz, X86::VPAVGWZ128rmkz, 0 }, + { X86::VPERMBZ128rrkz, X86::VPERMBZ128rmkz, 0 }, + { X86::VPERMILPDZ128rrkz, X86::VPERMILPDZ128rmkz, 0 }, + { X86::VPERMILPSZ128rrkz, X86::VPERMILPSZ128rmkz, 0 }, + { X86::VPERMWZ128rrkz, X86::VPERMWZ128rmkz, 0 }, + { X86::VPMADDUBSWZ128rrkz, X86::VPMADDUBSWZ128rmkz, 0 }, + { X86::VPMADDWDZ128rrkz, X86::VPMADDWDZ128rmkz, 0 }, + { X86::VPMAXSBZ128rrkz, X86::VPMAXSBZ128rmkz, 0 }, + { X86::VPMAXSDZ128rrkz, X86::VPMAXSDZ128rmkz, 0 }, + { X86::VPMAXSQZ128rrkz, X86::VPMAXSQZ128rmkz, 0 }, + { X86::VPMAXSWZ128rrkz, X86::VPMAXSWZ128rmkz, 0 }, + { X86::VPMAXUBZ128rrkz, X86::VPMAXUBZ128rmkz, 0 }, + { X86::VPMAXUDZ128rrkz, X86::VPMAXUDZ128rmkz, 0 }, + { X86::VPMAXUQZ128rrkz, X86::VPMAXUQZ128rmkz, 0 }, + { X86::VPMAXUWZ128rrkz, X86::VPMAXUWZ128rmkz, 0 }, + { X86::VPMINSBZ128rrkz, X86::VPMINSBZ128rmkz, 0 }, + { X86::VPMINSDZ128rrkz, X86::VPMINSDZ128rmkz, 0 }, + { X86::VPMINSQZ128rrkz, X86::VPMINSQZ128rmkz, 0 }, + { X86::VPMINSWZ128rrkz, X86::VPMINSWZ128rmkz, 0 }, + { X86::VPMINUBZ128rrkz, X86::VPMINUBZ128rmkz, 0 }, + { X86::VPMINUDZ128rrkz, X86::VPMINUDZ128rmkz, 0 }, + { X86::VPMINUQZ128rrkz, X86::VPMINUQZ128rmkz, 0 }, + { X86::VPMINUWZ128rrkz, X86::VPMINUWZ128rmkz, 0 }, + { X86::VPMULDQZ128rrkz, X86::VPMULDQZ128rmkz, 0 }, + { X86::VPMULLDZ128rrkz, X86::VPMULLDZ128rmkz, 0 }, + { X86::VPMULLQZ128rrkz, X86::VPMULLQZ128rmkz, 0 }, + { X86::VPMULLWZ128rrkz, X86::VPMULLWZ128rmkz, 0 }, + { X86::VPMULUDQZ128rrkz, X86::VPMULUDQZ128rmkz, 0 }, + { X86::VPORDZ128rrkz, X86::VPORDZ128rmkz, 0 }, + { X86::VPORQZ128rrkz, X86::VPORQZ128rmkz, 0 }, + { X86::VPSHUFBZ128rrkz, X86::VPSHUFBZ128rmkz, 0 }, + { X86::VPSLLDZ128rrkz, X86::VPSLLDZ128rmkz, 0 }, + { X86::VPSLLQZ128rrkz, X86::VPSLLQZ128rmkz, 0 }, + { X86::VPSLLVDZ128rrkz, X86::VPSLLVDZ128rmkz, 0 }, + { X86::VPSLLVQZ128rrkz, X86::VPSLLVQZ128rmkz, 0 }, + { X86::VPSLLVWZ128rrkz, X86::VPSLLVWZ128rmkz, 0 }, + { X86::VPSLLWZ128rrkz, X86::VPSLLWZ128rmkz, 0 }, + { X86::VPSRADZ128rrkz, X86::VPSRADZ128rmkz, 0 }, + { X86::VPSRAQZ128rrkz, X86::VPSRAQZ128rmkz, 0 }, + { X86::VPSRAVDZ128rrkz, X86::VPSRAVDZ128rmkz, 0 }, + { X86::VPSRAVQZ128rrkz, X86::VPSRAVQZ128rmkz, 0 }, + { X86::VPSRAVWZ128rrkz, X86::VPSRAVWZ128rmkz, 0 }, + { X86::VPSRAWZ128rrkz, X86::VPSRAWZ128rmkz, 0 }, + { X86::VPSRLDZ128rrkz, X86::VPSRLDZ128rmkz, 0 }, + { X86::VPSRLQZ128rrkz, X86::VPSRLQZ128rmkz, 0 }, + { X86::VPSRLVDZ128rrkz, X86::VPSRLVDZ128rmkz, 0 }, + { X86::VPSRLVQZ128rrkz, X86::VPSRLVQZ128rmkz, 0 }, + { X86::VPSRLVWZ128rrkz, X86::VPSRLVWZ128rmkz, 0 }, + { X86::VPSRLWZ128rrkz, X86::VPSRLWZ128rmkz, 0 }, + { X86::VPSUBBZ128rrkz, X86::VPSUBBZ128rmkz, 0 }, + { X86::VPSUBDZ128rrkz, X86::VPSUBDZ128rmkz, 0 }, + { X86::VPSUBQZ128rrkz, X86::VPSUBQZ128rmkz, 0 }, + { X86::VPSUBSBZ128rrkz, X86::VPSUBSBZ128rmkz, 0 }, + { X86::VPSUBSWZ128rrkz, X86::VPSUBSWZ128rmkz, 0 }, + { X86::VPSUBUSBZ128rrkz, X86::VPSUBUSBZ128rmkz, 0 }, + { X86::VPSUBUSWZ128rrkz, X86::VPSUBUSWZ128rmkz, 0 }, + { X86::VPSUBWZ128rrkz, X86::VPSUBWZ128rmkz, 0 }, + { X86::VPUNPCKHBWZ128rrkz, X86::VPUNPCKHBWZ128rmkz, 0 }, + { X86::VPUNPCKHDQZ128rrkz, X86::VPUNPCKHDQZ128rmkz, 0 }, + { X86::VPUNPCKHQDQZ128rrkz, X86::VPUNPCKHQDQZ128rmkz, 0 }, + { X86::VPUNPCKHWDZ128rrkz, X86::VPUNPCKHWDZ128rmkz, 0 }, + { X86::VPUNPCKLBWZ128rrkz, X86::VPUNPCKLBWZ128rmkz, 0 }, + { X86::VPUNPCKLDQZ128rrkz, X86::VPUNPCKLDQZ128rmkz, 0 }, + { X86::VPUNPCKLQDQZ128rrkz, X86::VPUNPCKLQDQZ128rmkz, 0 }, + { X86::VPUNPCKLWDZ128rrkz, X86::VPUNPCKLWDZ128rmkz, 0 }, + { X86::VPXORDZ128rrkz, X86::VPXORDZ128rmkz, 0 }, + { X86::VPXORQZ128rrkz, X86::VPXORQZ128rmkz, 0 }, + { X86::VSHUFPDZ128rrikz, X86::VSHUFPDZ128rmikz, 0 }, + { X86::VSHUFPSZ128rrikz, X86::VSHUFPSZ128rmikz, 0 }, + { X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0 }, + { X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 }, + { X86::VUNPCKHPDZ128rrkz, X86::VUNPCKHPDZ128rmkz, 0 }, + { X86::VUNPCKHPSZ128rrkz, X86::VUNPCKHPSZ128rmkz, 0 }, + { X86::VUNPCKLPDZ128rrkz, X86::VUNPCKLPDZ128rmkz, 0 }, + { X86::VUNPCKLPSZ128rrkz, X86::VUNPCKLPSZ128rmkz, 0 }, + { X86::VXORPDZ128rrkz, X86::VXORPDZ128rmkz, 0 }, + { X86::VXORPSZ128rrkz, X86::VXORPSZ128rmkz, 0 }, + + // AVX-512 masked foldable instructions + { X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE }, + { X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE }, + { X86::VPABSBZrrk, X86::VPABSBZrmk, 0 }, + { X86::VPABSDZrrk, X86::VPABSDZrmk, 0 }, + { X86::VPABSQZrrk, X86::VPABSQZrmk, 0 }, + { X86::VPABSWZrrk, X86::VPABSWZrmk, 0 }, + { X86::VPERMILPDZrik, X86::VPERMILPDZmik, 0 }, + { X86::VPERMILPSZrik, X86::VPERMILPSZmik, 0 }, + { X86::VPERMPDZrik, X86::VPERMPDZmik, 0 }, + { X86::VPERMQZrik, X86::VPERMQZmik, 0 }, + { X86::VPMOVSXBDZrrk, X86::VPMOVSXBDZrmk, 0 }, + { X86::VPMOVSXBQZrrk, X86::VPMOVSXBQZrmk, TB_NO_REVERSE }, + { X86::VPMOVSXBWZrrk, X86::VPMOVSXBWZrmk, 0 }, + { X86::VPMOVSXDQZrrk, X86::VPMOVSXDQZrmk, 0 }, + { X86::VPMOVSXWDZrrk, X86::VPMOVSXWDZrmk, 0 }, + { X86::VPMOVSXWQZrrk, X86::VPMOVSXWQZrmk, 0 }, + { X86::VPMOVZXBDZrrk, X86::VPMOVZXBDZrmk, 0 }, + { X86::VPMOVZXBQZrrk, X86::VPMOVZXBQZrmk, TB_NO_REVERSE }, + { X86::VPMOVZXBWZrrk, X86::VPMOVZXBWZrmk, 0 }, + { X86::VPMOVZXDQZrrk, X86::VPMOVZXDQZrmk, 0 }, + { X86::VPMOVZXWDZrrk, X86::VPMOVZXWDZrmk, 0 }, + { X86::VPMOVZXWQZrrk, X86::VPMOVZXWQZrmk, 0 }, + { X86::VPOPCNTDZrrk, X86::VPOPCNTDZrmk, 0 }, + { X86::VPOPCNTQZrrk, X86::VPOPCNTQZrmk, 0 }, + { X86::VPSHUFDZrik, X86::VPSHUFDZmik, 0 }, + { X86::VPSHUFHWZrik, X86::VPSHUFHWZmik, 0 }, + { X86::VPSHUFLWZrik, X86::VPSHUFLWZmik, 0 }, + { X86::VPSLLDZrik, X86::VPSLLDZmik, 0 }, + { X86::VPSLLQZrik, X86::VPSLLQZmik, 0 }, + { X86::VPSLLWZrik, X86::VPSLLWZmik, 0 }, + { X86::VPSRADZrik, X86::VPSRADZmik, 0 }, + { X86::VPSRAQZrik, X86::VPSRAQZmik, 0 }, + { X86::VPSRAWZrik, X86::VPSRAWZmik, 0 }, + { X86::VPSRLDZrik, X86::VPSRLDZmik, 0 }, + { X86::VPSRLQZrik, X86::VPSRLQZmik, 0 }, + { X86::VPSRLWZrik, X86::VPSRLWZmik, 0 }, + + // AVX-512VL 256-bit masked foldable instructions + { X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE }, + { X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE }, + { X86::VPABSBZ256rrk, X86::VPABSBZ256rmk, 0 }, + { X86::VPABSDZ256rrk, X86::VPABSDZ256rmk, 0 }, + { X86::VPABSQZ256rrk, X86::VPABSQZ256rmk, 0 }, + { X86::VPABSWZ256rrk, X86::VPABSWZ256rmk, 0 }, + { X86::VPERMILPDZ256rik, X86::VPERMILPDZ256mik, 0 }, + { X86::VPERMILPSZ256rik, X86::VPERMILPSZ256mik, 0 }, + { X86::VPERMPDZ256rik, X86::VPERMPDZ256mik, 0 }, + { X86::VPERMQZ256rik, X86::VPERMQZ256mik, 0 }, + { X86::VPMOVSXBDZ256rrk, X86::VPMOVSXBDZ256rmk, TB_NO_REVERSE }, + { X86::VPMOVSXBQZ256rrk, X86::VPMOVSXBQZ256rmk, TB_NO_REVERSE }, + { X86::VPMOVSXBWZ256rrk, X86::VPMOVSXBWZ256rmk, 0 }, + { X86::VPMOVSXDQZ256rrk, X86::VPMOVSXDQZ256rmk, 0 }, + { X86::VPMOVSXWDZ256rrk, X86::VPMOVSXWDZ256rmk, 0 }, + { X86::VPMOVSXWQZ256rrk, X86::VPMOVSXWQZ256rmk, TB_NO_REVERSE }, + { X86::VPMOVZXBDZ256rrk, X86::VPMOVZXBDZ256rmk, TB_NO_REVERSE }, + { X86::VPMOVZXBQZ256rrk, X86::VPMOVZXBQZ256rmk, TB_NO_REVERSE }, + { X86::VPMOVZXBWZ256rrk, X86::VPMOVZXBWZ256rmk, 0 }, + { X86::VPMOVZXDQZ256rrk, X86::VPMOVZXDQZ256rmk, 0 }, + { X86::VPMOVZXWDZ256rrk, X86::VPMOVZXWDZ256rmk, 0 }, + { X86::VPMOVZXWQZ256rrk, X86::VPMOVZXWQZ256rmk, TB_NO_REVERSE }, + { X86::VPSHUFDZ256rik, X86::VPSHUFDZ256mik, 0 }, + { X86::VPSHUFHWZ256rik, X86::VPSHUFHWZ256mik, 0 }, + { X86::VPSHUFLWZ256rik, X86::VPSHUFLWZ256mik, 0 }, + { X86::VPSLLDZ256rik, X86::VPSLLDZ256mik, 0 }, + { X86::VPSLLQZ256rik, X86::VPSLLQZ256mik, 0 }, + { X86::VPSLLWZ256rik, X86::VPSLLWZ256mik, 0 }, + { X86::VPSRADZ256rik, X86::VPSRADZ256mik, 0 }, + { X86::VPSRAQZ256rik, X86::VPSRAQZ256mik, 0 }, + { X86::VPSRAWZ256rik, X86::VPSRAWZ256mik, 0 }, + { X86::VPSRLDZ256rik, X86::VPSRLDZ256mik, 0 }, + { X86::VPSRLQZ256rik, X86::VPSRLQZ256mik, 0 }, + { X86::VPSRLWZ256rik, X86::VPSRLWZ256mik, 0 }, + + // AVX-512VL 128-bit masked foldable instructions + { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE }, + { X86::VPABSBZ128rrk, X86::VPABSBZ128rmk, 0 }, + { X86::VPABSDZ128rrk, X86::VPABSDZ128rmk, 0 }, + { X86::VPABSQZ128rrk, X86::VPABSQZ128rmk, 0 }, + { X86::VPABSWZ128rrk, X86::VPABSWZ128rmk, 0 }, + { X86::VPERMILPDZ128rik, X86::VPERMILPDZ128mik, 0 }, + { X86::VPERMILPSZ128rik, X86::VPERMILPSZ128mik, 0 }, + { X86::VPMOVSXBDZ128rrk, X86::VPMOVSXBDZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVSXBQZ128rrk, X86::VPMOVSXBQZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVSXBWZ128rrk, X86::VPMOVSXBWZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVSXDQZ128rrk, X86::VPMOVSXDQZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVSXWDZ128rrk, X86::VPMOVSXWDZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVSXWQZ128rrk, X86::VPMOVSXWQZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVZXBDZ128rrk, X86::VPMOVZXBDZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVZXBQZ128rrk, X86::VPMOVZXBQZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVZXBWZ128rrk, X86::VPMOVZXBWZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVZXDQZ128rrk, X86::VPMOVZXDQZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVZXWDZ128rrk, X86::VPMOVZXWDZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVZXWQZ128rrk, X86::VPMOVZXWQZ128rmk, TB_NO_REVERSE }, + { X86::VPSHUFDZ128rik, X86::VPSHUFDZ128mik, 0 }, + { X86::VPSHUFHWZ128rik, X86::VPSHUFHWZ128mik, 0 }, + { X86::VPSHUFLWZ128rik, X86::VPSHUFLWZ128mik, 0 }, + { X86::VPSLLDZ128rik, X86::VPSLLDZ128mik, 0 }, + { X86::VPSLLQZ128rik, X86::VPSLLQZ128mik, 0 }, + { X86::VPSLLWZ128rik, X86::VPSLLWZ128mik, 0 }, + { X86::VPSRADZ128rik, X86::VPSRADZ128mik, 0 }, + { X86::VPSRAQZ128rik, X86::VPSRAQZ128mik, 0 }, + { X86::VPSRAWZ128rik, X86::VPSRAWZ128mik, 0 }, + { X86::VPSRLDZ128rik, X86::VPSRLDZ128mik, 0 }, + { X86::VPSRLQZ128rik, X86::VPSRLQZ128mik, 0 }, + { X86::VPSRLWZ128rik, X86::VPSRLWZ128mik, 0 }, + }; + for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) { AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable, Entry.RegOp, Entry.MemOp, // Index 3, folded load Entry.Flags | TB_INDEX_3 | TB_FOLDED_LOAD); } + auto I = X86InstrFMA3Info::rm_begin(); + auto E = X86InstrFMA3Info::rm_end(); + for (; I != E; ++I) { + if (!I.getGroup()->isKMasked()) { + // Intrinsic forms need to pass TB_NO_REVERSE. + if (I.getGroup()->isIntrinsic()) { + AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable, + I.getRegOpcode(), I.getMemOpcode(), + TB_ALIGN_NONE | TB_INDEX_3 | TB_FOLDED_LOAD | TB_NO_REVERSE); + } else { + AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable, + I.getRegOpcode(), I.getMemOpcode(), + TB_ALIGN_NONE | TB_INDEX_3 | TB_FOLDED_LOAD); + } + } + } + + static const X86MemoryFoldTableEntry MemoryFoldTable4[] = { + // AVX-512 foldable masked instructions + { X86::VADDPDZrrk, X86::VADDPDZrmk, 0 }, + { X86::VADDPSZrrk, X86::VADDPSZrmk, 0 }, + { X86::VADDSDZrr_Intk, X86::VADDSDZrm_Intk, TB_NO_REVERSE }, + { X86::VADDSSZrr_Intk, X86::VADDSSZrm_Intk, TB_NO_REVERSE }, + { X86::VALIGNDZrrik, X86::VALIGNDZrmik, 0 }, + { X86::VALIGNQZrrik, X86::VALIGNQZrmik, 0 }, + { X86::VANDNPDZrrk, X86::VANDNPDZrmk, 0 }, + { X86::VANDNPSZrrk, X86::VANDNPSZrmk, 0 }, + { X86::VANDPDZrrk, X86::VANDPDZrmk, 0 }, + { X86::VANDPSZrrk, X86::VANDPSZrmk, 0 }, + { X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 }, + { X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 }, + { X86::VDIVSDZrr_Intk, X86::VDIVSDZrm_Intk, TB_NO_REVERSE }, + { X86::VDIVSSZrr_Intk, X86::VDIVSSZrm_Intk, TB_NO_REVERSE }, + { X86::VINSERTF32x4Zrrk, X86::VINSERTF32x4Zrmk, 0 }, + { X86::VINSERTF32x8Zrrk, X86::VINSERTF32x8Zrmk, 0 }, + { X86::VINSERTF64x2Zrrk, X86::VINSERTF64x2Zrmk, 0 }, + { X86::VINSERTF64x4Zrrk, X86::VINSERTF64x4Zrmk, 0 }, + { X86::VINSERTI32x4Zrrk, X86::VINSERTI32x4Zrmk, 0 }, + { X86::VINSERTI32x8Zrrk, X86::VINSERTI32x8Zrmk, 0 }, + { X86::VINSERTI64x2Zrrk, X86::VINSERTI64x2Zrmk, 0 }, + { X86::VINSERTI64x4Zrrk, X86::VINSERTI64x4Zrmk, 0 }, + { X86::VMAXCPDZrrk, X86::VMAXCPDZrmk, 0 }, + { X86::VMAXCPSZrrk, X86::VMAXCPSZrmk, 0 }, + { X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 }, + { X86::VMAXPSZrrk, X86::VMAXPSZrmk, 0 }, + { X86::VMAXSDZrr_Intk, X86::VMAXSDZrm_Intk, 0 }, + { X86::VMAXSSZrr_Intk, X86::VMAXSSZrm_Intk, 0 }, + { X86::VMINCPDZrrk, X86::VMINCPDZrmk, 0 }, + { X86::VMINCPSZrrk, X86::VMINCPSZrmk, 0 }, + { X86::VMINPDZrrk, X86::VMINPDZrmk, 0 }, + { X86::VMINPSZrrk, X86::VMINPSZrmk, 0 }, + { X86::VMINSDZrr_Intk, X86::VMINSDZrm_Intk, 0 }, + { X86::VMINSSZrr_Intk, X86::VMINSSZrm_Intk, 0 }, + { X86::VMULPDZrrk, X86::VMULPDZrmk, 0 }, + { X86::VMULPSZrrk, X86::VMULPSZrmk, 0 }, + { X86::VMULSDZrr_Intk, X86::VMULSDZrm_Intk, TB_NO_REVERSE }, + { X86::VMULSSZrr_Intk, X86::VMULSSZrm_Intk, TB_NO_REVERSE }, + { X86::VORPDZrrk, X86::VORPDZrmk, 0 }, + { X86::VORPSZrrk, X86::VORPSZrmk, 0 }, + { X86::VPACKSSDWZrrk, X86::VPACKSSDWZrmk, 0 }, + { X86::VPACKSSWBZrrk, X86::VPACKSSWBZrmk, 0 }, + { X86::VPACKUSDWZrrk, X86::VPACKUSDWZrmk, 0 }, + { X86::VPACKUSWBZrrk, X86::VPACKUSWBZrmk, 0 }, + { X86::VPADDBZrrk, X86::VPADDBZrmk, 0 }, + { X86::VPADDDZrrk, X86::VPADDDZrmk, 0 }, + { X86::VPADDQZrrk, X86::VPADDQZrmk, 0 }, + { X86::VPADDSBZrrk, X86::VPADDSBZrmk, 0 }, + { X86::VPADDSWZrrk, X86::VPADDSWZrmk, 0 }, + { X86::VPADDUSBZrrk, X86::VPADDUSBZrmk, 0 }, + { X86::VPADDUSWZrrk, X86::VPADDUSWZrmk, 0 }, + { X86::VPADDWZrrk, X86::VPADDWZrmk, 0 }, + { X86::VPALIGNRZrrik, X86::VPALIGNRZrmik, 0 }, + { X86::VPANDDZrrk, X86::VPANDDZrmk, 0 }, + { X86::VPANDNDZrrk, X86::VPANDNDZrmk, 0 }, + { X86::VPANDNQZrrk, X86::VPANDNQZrmk, 0 }, + { X86::VPANDQZrrk, X86::VPANDQZrmk, 0 }, + { X86::VPAVGBZrrk, X86::VPAVGBZrmk, 0 }, + { X86::VPAVGWZrrk, X86::VPAVGWZrmk, 0 }, + { X86::VPERMBZrrk, X86::VPERMBZrmk, 0 }, + { X86::VPERMDZrrk, X86::VPERMDZrmk, 0 }, + { X86::VPERMI2Brrk, X86::VPERMI2Brmk, 0 }, + { X86::VPERMI2Drrk, X86::VPERMI2Drmk, 0 }, + { X86::VPERMI2PSrrk, X86::VPERMI2PSrmk, 0 }, + { X86::VPERMI2PDrrk, X86::VPERMI2PDrmk, 0 }, + { X86::VPERMI2Qrrk, X86::VPERMI2Qrmk, 0 }, + { X86::VPERMI2Wrrk, X86::VPERMI2Wrmk, 0 }, + { X86::VPERMILPDZrrk, X86::VPERMILPDZrmk, 0 }, + { X86::VPERMILPSZrrk, X86::VPERMILPSZrmk, 0 }, + { X86::VPERMPDZrrk, X86::VPERMPDZrmk, 0 }, + { X86::VPERMPSZrrk, X86::VPERMPSZrmk, 0 }, + { X86::VPERMQZrrk, X86::VPERMQZrmk, 0 }, + { X86::VPERMT2Brrk, X86::VPERMT2Brmk, 0 }, + { X86::VPERMT2Drrk, X86::VPERMT2Drmk, 0 }, + { X86::VPERMT2PSrrk, X86::VPERMT2PSrmk, 0 }, + { X86::VPERMT2PDrrk, X86::VPERMT2PDrmk, 0 }, + { X86::VPERMT2Qrrk, X86::VPERMT2Qrmk, 0 }, + { X86::VPERMT2Wrrk, X86::VPERMT2Wrmk, 0 }, + { X86::VPERMWZrrk, X86::VPERMWZrmk, 0 }, + { X86::VPMADDUBSWZrrk, X86::VPMADDUBSWZrmk, 0 }, + { X86::VPMADDWDZrrk, X86::VPMADDWDZrmk, 0 }, + { X86::VPMAXSBZrrk, X86::VPMAXSBZrmk, 0 }, + { X86::VPMAXSDZrrk, X86::VPMAXSDZrmk, 0 }, + { X86::VPMAXSQZrrk, X86::VPMAXSQZrmk, 0 }, + { X86::VPMAXSWZrrk, X86::VPMAXSWZrmk, 0 }, + { X86::VPMAXUBZrrk, X86::VPMAXUBZrmk, 0 }, + { X86::VPMAXUDZrrk, X86::VPMAXUDZrmk, 0 }, + { X86::VPMAXUQZrrk, X86::VPMAXUQZrmk, 0 }, + { X86::VPMAXUWZrrk, X86::VPMAXUWZrmk, 0 }, + { X86::VPMINSBZrrk, X86::VPMINSBZrmk, 0 }, + { X86::VPMINSDZrrk, X86::VPMINSDZrmk, 0 }, + { X86::VPMINSQZrrk, X86::VPMINSQZrmk, 0 }, + { X86::VPMINSWZrrk, X86::VPMINSWZrmk, 0 }, + { X86::VPMINUBZrrk, X86::VPMINUBZrmk, 0 }, + { X86::VPMINUDZrrk, X86::VPMINUDZrmk, 0 }, + { X86::VPMINUQZrrk, X86::VPMINUQZrmk, 0 }, + { X86::VPMINUWZrrk, X86::VPMINUWZrmk, 0 }, + { X86::VPMULDQZrrk, X86::VPMULDQZrmk, 0 }, + { X86::VPMULLDZrrk, X86::VPMULLDZrmk, 0 }, + { X86::VPMULLQZrrk, X86::VPMULLQZrmk, 0 }, + { X86::VPMULLWZrrk, X86::VPMULLWZrmk, 0 }, + { X86::VPMULUDQZrrk, X86::VPMULUDQZrmk, 0 }, + { X86::VPORDZrrk, X86::VPORDZrmk, 0 }, + { X86::VPORQZrrk, X86::VPORQZrmk, 0 }, + { X86::VPSHUFBZrrk, X86::VPSHUFBZrmk, 0 }, + { X86::VPSLLDZrrk, X86::VPSLLDZrmk, 0 }, + { X86::VPSLLQZrrk, X86::VPSLLQZrmk, 0 }, + { X86::VPSLLVDZrrk, X86::VPSLLVDZrmk, 0 }, + { X86::VPSLLVQZrrk, X86::VPSLLVQZrmk, 0 }, + { X86::VPSLLVWZrrk, X86::VPSLLVWZrmk, 0 }, + { X86::VPSLLWZrrk, X86::VPSLLWZrmk, 0 }, + { X86::VPSRADZrrk, X86::VPSRADZrmk, 0 }, + { X86::VPSRAQZrrk, X86::VPSRAQZrmk, 0 }, + { X86::VPSRAVDZrrk, X86::VPSRAVDZrmk, 0 }, + { X86::VPSRAVQZrrk, X86::VPSRAVQZrmk, 0 }, + { X86::VPSRAVWZrrk, X86::VPSRAVWZrmk, 0 }, + { X86::VPSRAWZrrk, X86::VPSRAWZrmk, 0 }, + { X86::VPSRLDZrrk, X86::VPSRLDZrmk, 0 }, + { X86::VPSRLQZrrk, X86::VPSRLQZrmk, 0 }, + { X86::VPSRLVDZrrk, X86::VPSRLVDZrmk, 0 }, + { X86::VPSRLVQZrrk, X86::VPSRLVQZrmk, 0 }, + { X86::VPSRLVWZrrk, X86::VPSRLVWZrmk, 0 }, + { X86::VPSRLWZrrk, X86::VPSRLWZrmk, 0 }, + { X86::VPSUBBZrrk, X86::VPSUBBZrmk, 0 }, + { X86::VPSUBDZrrk, X86::VPSUBDZrmk, 0 }, + { X86::VPSUBQZrrk, X86::VPSUBQZrmk, 0 }, + { X86::VPSUBSBZrrk, X86::VPSUBSBZrmk, 0 }, + { X86::VPSUBSWZrrk, X86::VPSUBSWZrmk, 0 }, + { X86::VPSUBUSBZrrk, X86::VPSUBUSBZrmk, 0 }, + { X86::VPSUBUSWZrrk, X86::VPSUBUSWZrmk, 0 }, + { X86::VPTERNLOGDZrrik, X86::VPTERNLOGDZrmik, 0 }, + { X86::VPTERNLOGQZrrik, X86::VPTERNLOGQZrmik, 0 }, + { X86::VPUNPCKHBWZrrk, X86::VPUNPCKHBWZrmk, 0 }, + { X86::VPUNPCKHDQZrrk, X86::VPUNPCKHDQZrmk, 0 }, + { X86::VPUNPCKHQDQZrrk, X86::VPUNPCKHQDQZrmk, 0 }, + { X86::VPUNPCKHWDZrrk, X86::VPUNPCKHWDZrmk, 0 }, + { X86::VPUNPCKLBWZrrk, X86::VPUNPCKLBWZrmk, 0 }, + { X86::VPUNPCKLDQZrrk, X86::VPUNPCKLDQZrmk, 0 }, + { X86::VPUNPCKLQDQZrrk, X86::VPUNPCKLQDQZrmk, 0 }, + { X86::VPUNPCKLWDZrrk, X86::VPUNPCKLWDZrmk, 0 }, + { X86::VPXORDZrrk, X86::VPXORDZrmk, 0 }, + { X86::VPXORQZrrk, X86::VPXORQZrmk, 0 }, + { X86::VSHUFPDZrrik, X86::VSHUFPDZrmik, 0 }, + { X86::VSHUFPSZrrik, X86::VSHUFPSZrmik, 0 }, + { X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 }, + { X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 }, + { X86::VSUBSDZrr_Intk, X86::VSUBSDZrm_Intk, TB_NO_REVERSE }, + { X86::VSUBSSZrr_Intk, X86::VSUBSSZrm_Intk, TB_NO_REVERSE }, + { X86::VUNPCKHPDZrrk, X86::VUNPCKHPDZrmk, 0 }, + { X86::VUNPCKHPSZrrk, X86::VUNPCKHPSZrmk, 0 }, + { X86::VUNPCKLPDZrrk, X86::VUNPCKLPDZrmk, 0 }, + { X86::VUNPCKLPSZrrk, X86::VUNPCKLPSZrmk, 0 }, + { X86::VXORPDZrrk, X86::VXORPDZrmk, 0 }, + { X86::VXORPSZrrk, X86::VXORPSZrmk, 0 }, + + // AVX-512{F,VL} foldable masked instructions 256-bit + { X86::VADDPDZ256rrk, X86::VADDPDZ256rmk, 0 }, + { X86::VADDPSZ256rrk, X86::VADDPSZ256rmk, 0 }, + { X86::VALIGNDZ256rrik, X86::VALIGNDZ256rmik, 0 }, + { X86::VALIGNQZ256rrik, X86::VALIGNQZ256rmik, 0 }, + { X86::VANDNPDZ256rrk, X86::VANDNPDZ256rmk, 0 }, + { X86::VANDNPSZ256rrk, X86::VANDNPSZ256rmk, 0 }, + { X86::VANDPDZ256rrk, X86::VANDPDZ256rmk, 0 }, + { X86::VANDPSZ256rrk, X86::VANDPSZ256rmk, 0 }, + { X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0 }, + { X86::VDIVPSZ256rrk, X86::VDIVPSZ256rmk, 0 }, + { X86::VINSERTF32x4Z256rrk,X86::VINSERTF32x4Z256rmk, 0 }, + { X86::VINSERTF64x2Z256rrk,X86::VINSERTF64x2Z256rmk, 0 }, + { X86::VINSERTI32x4Z256rrk,X86::VINSERTI32x4Z256rmk, 0 }, + { X86::VINSERTI64x2Z256rrk,X86::VINSERTI64x2Z256rmk, 0 }, + { X86::VMAXCPDZ256rrk, X86::VMAXCPDZ256rmk, 0 }, + { X86::VMAXCPSZ256rrk, X86::VMAXCPSZ256rmk, 0 }, + { X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmk, 0 }, + { X86::VMAXPSZ256rrk, X86::VMAXPSZ256rmk, 0 }, + { X86::VMINCPDZ256rrk, X86::VMINCPDZ256rmk, 0 }, + { X86::VMINCPSZ256rrk, X86::VMINCPSZ256rmk, 0 }, + { X86::VMINPDZ256rrk, X86::VMINPDZ256rmk, 0 }, + { X86::VMINPSZ256rrk, X86::VMINPSZ256rmk, 0 }, + { X86::VMULPDZ256rrk, X86::VMULPDZ256rmk, 0 }, + { X86::VMULPSZ256rrk, X86::VMULPSZ256rmk, 0 }, + { X86::VORPDZ256rrk, X86::VORPDZ256rmk, 0 }, + { X86::VORPSZ256rrk, X86::VORPSZ256rmk, 0 }, + { X86::VPACKSSDWZ256rrk, X86::VPACKSSDWZ256rmk, 0 }, + { X86::VPACKSSWBZ256rrk, X86::VPACKSSWBZ256rmk, 0 }, + { X86::VPACKUSDWZ256rrk, X86::VPACKUSDWZ256rmk, 0 }, + { X86::VPACKUSWBZ256rrk, X86::VPACKUSWBZ256rmk, 0 }, + { X86::VPADDBZ256rrk, X86::VPADDBZ256rmk, 0 }, + { X86::VPADDDZ256rrk, X86::VPADDDZ256rmk, 0 }, + { X86::VPADDQZ256rrk, X86::VPADDQZ256rmk, 0 }, + { X86::VPADDSBZ256rrk, X86::VPADDSBZ256rmk, 0 }, + { X86::VPADDSWZ256rrk, X86::VPADDSWZ256rmk, 0 }, + { X86::VPADDUSBZ256rrk, X86::VPADDUSBZ256rmk, 0 }, + { X86::VPADDUSWZ256rrk, X86::VPADDUSWZ256rmk, 0 }, + { X86::VPADDWZ256rrk, X86::VPADDWZ256rmk, 0 }, + { X86::VPALIGNRZ256rrik, X86::VPALIGNRZ256rmik, 0 }, + { X86::VPANDDZ256rrk, X86::VPANDDZ256rmk, 0 }, + { X86::VPANDNDZ256rrk, X86::VPANDNDZ256rmk, 0 }, + { X86::VPANDNQZ256rrk, X86::VPANDNQZ256rmk, 0 }, + { X86::VPANDQZ256rrk, X86::VPANDQZ256rmk, 0 }, + { X86::VPAVGBZ256rrk, X86::VPAVGBZ256rmk, 0 }, + { X86::VPAVGWZ256rrk, X86::VPAVGWZ256rmk, 0 }, + { X86::VPERMBZ256rrk, X86::VPERMBZ256rmk, 0 }, + { X86::VPERMDZ256rrk, X86::VPERMDZ256rmk, 0 }, + { X86::VPERMI2B256rrk, X86::VPERMI2B256rmk, 0 }, + { X86::VPERMI2D256rrk, X86::VPERMI2D256rmk, 0 }, + { X86::VPERMI2PD256rrk, X86::VPERMI2PD256rmk, 0 }, + { X86::VPERMI2PS256rrk, X86::VPERMI2PS256rmk, 0 }, + { X86::VPERMI2Q256rrk, X86::VPERMI2Q256rmk, 0 }, + { X86::VPERMI2W256rrk, X86::VPERMI2W256rmk, 0 }, + { X86::VPERMILPDZ256rrk, X86::VPERMILPDZ256rmk, 0 }, + { X86::VPERMILPSZ256rrk, X86::VPERMILPSZ256rmk, 0 }, + { X86::VPERMPDZ256rrk, X86::VPERMPDZ256rmk, 0 }, + { X86::VPERMPSZ256rrk, X86::VPERMPSZ256rmk, 0 }, + { X86::VPERMQZ256rrk, X86::VPERMQZ256rmk, 0 }, + { X86::VPERMT2B256rrk, X86::VPERMT2B256rmk, 0 }, + { X86::VPERMT2D256rrk, X86::VPERMT2D256rmk, 0 }, + { X86::VPERMT2PD256rrk, X86::VPERMT2PD256rmk, 0 }, + { X86::VPERMT2PS256rrk, X86::VPERMT2PS256rmk, 0 }, + { X86::VPERMT2Q256rrk, X86::VPERMT2Q256rmk, 0 }, + { X86::VPERMT2W256rrk, X86::VPERMT2W256rmk, 0 }, + { X86::VPERMWZ256rrk, X86::VPERMWZ256rmk, 0 }, + { X86::VPMADDUBSWZ256rrk, X86::VPMADDUBSWZ256rmk, 0 }, + { X86::VPMADDWDZ256rrk, X86::VPMADDWDZ256rmk, 0 }, + { X86::VPMAXSBZ256rrk, X86::VPMAXSBZ256rmk, 0 }, + { X86::VPMAXSDZ256rrk, X86::VPMAXSDZ256rmk, 0 }, + { X86::VPMAXSQZ256rrk, X86::VPMAXSQZ256rmk, 0 }, + { X86::VPMAXSWZ256rrk, X86::VPMAXSWZ256rmk, 0 }, + { X86::VPMAXUBZ256rrk, X86::VPMAXUBZ256rmk, 0 }, + { X86::VPMAXUDZ256rrk, X86::VPMAXUDZ256rmk, 0 }, + { X86::VPMAXUQZ256rrk, X86::VPMAXUQZ256rmk, 0 }, + { X86::VPMAXUWZ256rrk, X86::VPMAXUWZ256rmk, 0 }, + { X86::VPMINSBZ256rrk, X86::VPMINSBZ256rmk, 0 }, + { X86::VPMINSDZ256rrk, X86::VPMINSDZ256rmk, 0 }, + { X86::VPMINSQZ256rrk, X86::VPMINSQZ256rmk, 0 }, + { X86::VPMINSWZ256rrk, X86::VPMINSWZ256rmk, 0 }, + { X86::VPMINUBZ256rrk, X86::VPMINUBZ256rmk, 0 }, + { X86::VPMINUDZ256rrk, X86::VPMINUDZ256rmk, 0 }, + { X86::VPMINUQZ256rrk, X86::VPMINUQZ256rmk, 0 }, + { X86::VPMINUWZ256rrk, X86::VPMINUWZ256rmk, 0 }, + { X86::VPMULDQZ256rrk, X86::VPMULDQZ256rmk, 0 }, + { X86::VPMULLDZ256rrk, X86::VPMULLDZ256rmk, 0 }, + { X86::VPMULLQZ256rrk, X86::VPMULLQZ256rmk, 0 }, + { X86::VPMULLWZ256rrk, X86::VPMULLWZ256rmk, 0 }, + { X86::VPMULUDQZ256rrk, X86::VPMULUDQZ256rmk, 0 }, + { X86::VPORDZ256rrk, X86::VPORDZ256rmk, 0 }, + { X86::VPORQZ256rrk, X86::VPORQZ256rmk, 0 }, + { X86::VPSHUFBZ256rrk, X86::VPSHUFBZ256rmk, 0 }, + { X86::VPSLLDZ256rrk, X86::VPSLLDZ256rmk, 0 }, + { X86::VPSLLQZ256rrk, X86::VPSLLQZ256rmk, 0 }, + { X86::VPSLLVDZ256rrk, X86::VPSLLVDZ256rmk, 0 }, + { X86::VPSLLVQZ256rrk, X86::VPSLLVQZ256rmk, 0 }, + { X86::VPSLLVWZ256rrk, X86::VPSLLVWZ256rmk, 0 }, + { X86::VPSLLWZ256rrk, X86::VPSLLWZ256rmk, 0 }, + { X86::VPSRADZ256rrk, X86::VPSRADZ256rmk, 0 }, + { X86::VPSRAQZ256rrk, X86::VPSRAQZ256rmk, 0 }, + { X86::VPSRAVDZ256rrk, X86::VPSRAVDZ256rmk, 0 }, + { X86::VPSRAVQZ256rrk, X86::VPSRAVQZ256rmk, 0 }, + { X86::VPSRAVWZ256rrk, X86::VPSRAVWZ256rmk, 0 }, + { X86::VPSRAWZ256rrk, X86::VPSRAWZ256rmk, 0 }, + { X86::VPSRLDZ256rrk, X86::VPSRLDZ256rmk, 0 }, + { X86::VPSRLQZ256rrk, X86::VPSRLQZ256rmk, 0 }, + { X86::VPSRLVDZ256rrk, X86::VPSRLVDZ256rmk, 0 }, + { X86::VPSRLVQZ256rrk, X86::VPSRLVQZ256rmk, 0 }, + { X86::VPSRLVWZ256rrk, X86::VPSRLVWZ256rmk, 0 }, + { X86::VPSRLWZ256rrk, X86::VPSRLWZ256rmk, 0 }, + { X86::VPSUBBZ256rrk, X86::VPSUBBZ256rmk, 0 }, + { X86::VPSUBDZ256rrk, X86::VPSUBDZ256rmk, 0 }, + { X86::VPSUBQZ256rrk, X86::VPSUBQZ256rmk, 0 }, + { X86::VPSUBSBZ256rrk, X86::VPSUBSBZ256rmk, 0 }, + { X86::VPSUBSWZ256rrk, X86::VPSUBSWZ256rmk, 0 }, + { X86::VPSUBUSBZ256rrk, X86::VPSUBUSBZ256rmk, 0 }, + { X86::VPSUBUSWZ256rrk, X86::VPSUBUSWZ256rmk, 0 }, + { X86::VPSUBWZ256rrk, X86::VPSUBWZ256rmk, 0 }, + { X86::VPTERNLOGDZ256rrik, X86::VPTERNLOGDZ256rmik, 0 }, + { X86::VPTERNLOGQZ256rrik, X86::VPTERNLOGQZ256rmik, 0 }, + { X86::VPUNPCKHBWZ256rrk, X86::VPUNPCKHBWZ256rmk, 0 }, + { X86::VPUNPCKHDQZ256rrk, X86::VPUNPCKHDQZ256rmk, 0 }, + { X86::VPUNPCKHQDQZ256rrk, X86::VPUNPCKHQDQZ256rmk, 0 }, + { X86::VPUNPCKHWDZ256rrk, X86::VPUNPCKHWDZ256rmk, 0 }, + { X86::VPUNPCKLBWZ256rrk, X86::VPUNPCKLBWZ256rmk, 0 }, + { X86::VPUNPCKLDQZ256rrk, X86::VPUNPCKLDQZ256rmk, 0 }, + { X86::VPUNPCKLQDQZ256rrk, X86::VPUNPCKLQDQZ256rmk, 0 }, + { X86::VPUNPCKLWDZ256rrk, X86::VPUNPCKLWDZ256rmk, 0 }, + { X86::VPXORDZ256rrk, X86::VPXORDZ256rmk, 0 }, + { X86::VPXORQZ256rrk, X86::VPXORQZ256rmk, 0 }, + { X86::VSHUFPDZ256rrik, X86::VSHUFPDZ256rmik, 0 }, + { X86::VSHUFPSZ256rrik, X86::VSHUFPSZ256rmik, 0 }, + { X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0 }, + { X86::VSUBPSZ256rrk, X86::VSUBPSZ256rmk, 0 }, + { X86::VUNPCKHPDZ256rrk, X86::VUNPCKHPDZ256rmk, 0 }, + { X86::VUNPCKHPSZ256rrk, X86::VUNPCKHPSZ256rmk, 0 }, + { X86::VUNPCKLPDZ256rrk, X86::VUNPCKLPDZ256rmk, 0 }, + { X86::VUNPCKLPSZ256rrk, X86::VUNPCKLPSZ256rmk, 0 }, + { X86::VXORPDZ256rrk, X86::VXORPDZ256rmk, 0 }, + { X86::VXORPSZ256rrk, X86::VXORPSZ256rmk, 0 }, + + // AVX-512{F,VL} foldable instructions 128-bit + { X86::VADDPDZ128rrk, X86::VADDPDZ128rmk, 0 }, + { X86::VADDPSZ128rrk, X86::VADDPSZ128rmk, 0 }, + { X86::VALIGNDZ128rrik, X86::VALIGNDZ128rmik, 0 }, + { X86::VALIGNQZ128rrik, X86::VALIGNQZ128rmik, 0 }, + { X86::VANDNPDZ128rrk, X86::VANDNPDZ128rmk, 0 }, + { X86::VANDNPSZ128rrk, X86::VANDNPSZ128rmk, 0 }, + { X86::VANDPDZ128rrk, X86::VANDPDZ128rmk, 0 }, + { X86::VANDPSZ128rrk, X86::VANDPSZ128rmk, 0 }, + { X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmk, 0 }, + { X86::VDIVPSZ128rrk, X86::VDIVPSZ128rmk, 0 }, + { X86::VMAXCPDZ128rrk, X86::VMAXCPDZ128rmk, 0 }, + { X86::VMAXCPSZ128rrk, X86::VMAXCPSZ128rmk, 0 }, + { X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0 }, + { X86::VMAXPSZ128rrk, X86::VMAXPSZ128rmk, 0 }, + { X86::VMINCPDZ128rrk, X86::VMINCPDZ128rmk, 0 }, + { X86::VMINCPSZ128rrk, X86::VMINCPSZ128rmk, 0 }, + { X86::VMINPDZ128rrk, X86::VMINPDZ128rmk, 0 }, + { X86::VMINPSZ128rrk, X86::VMINPSZ128rmk, 0 }, + { X86::VMULPDZ128rrk, X86::VMULPDZ128rmk, 0 }, + { X86::VMULPSZ128rrk, X86::VMULPSZ128rmk, 0 }, + { X86::VORPDZ128rrk, X86::VORPDZ128rmk, 0 }, + { X86::VORPSZ128rrk, X86::VORPSZ128rmk, 0 }, + { X86::VPACKSSDWZ128rrk, X86::VPACKSSDWZ128rmk, 0 }, + { X86::VPACKSSWBZ128rrk, X86::VPACKSSWBZ128rmk, 0 }, + { X86::VPACKUSDWZ128rrk, X86::VPACKUSDWZ128rmk, 0 }, + { X86::VPACKUSWBZ128rrk, X86::VPACKUSWBZ128rmk, 0 }, + { X86::VPADDBZ128rrk, X86::VPADDBZ128rmk, 0 }, + { X86::VPADDDZ128rrk, X86::VPADDDZ128rmk, 0 }, + { X86::VPADDQZ128rrk, X86::VPADDQZ128rmk, 0 }, + { X86::VPADDSBZ128rrk, X86::VPADDSBZ128rmk, 0 }, + { X86::VPADDSWZ128rrk, X86::VPADDSWZ128rmk, 0 }, + { X86::VPADDUSBZ128rrk, X86::VPADDUSBZ128rmk, 0 }, + { X86::VPADDUSWZ128rrk, X86::VPADDUSWZ128rmk, 0 }, + { X86::VPADDWZ128rrk, X86::VPADDWZ128rmk, 0 }, + { X86::VPALIGNRZ128rrik, X86::VPALIGNRZ128rmik, 0 }, + { X86::VPANDDZ128rrk, X86::VPANDDZ128rmk, 0 }, + { X86::VPANDNDZ128rrk, X86::VPANDNDZ128rmk, 0 }, + { X86::VPANDNQZ128rrk, X86::VPANDNQZ128rmk, 0 }, + { X86::VPANDQZ128rrk, X86::VPANDQZ128rmk, 0 }, + { X86::VPAVGBZ128rrk, X86::VPAVGBZ128rmk, 0 }, + { X86::VPAVGWZ128rrk, X86::VPAVGWZ128rmk, 0 }, + { X86::VPERMBZ128rrk, X86::VPERMBZ128rmk, 0 }, + { X86::VPERMI2B128rrk, X86::VPERMI2B128rmk, 0 }, + { X86::VPERMI2D128rrk, X86::VPERMI2D128rmk, 0 }, + { X86::VPERMI2PD128rrk, X86::VPERMI2PD128rmk, 0 }, + { X86::VPERMI2PS128rrk, X86::VPERMI2PS128rmk, 0 }, + { X86::VPERMI2Q128rrk, X86::VPERMI2Q128rmk, 0 }, + { X86::VPERMI2W128rrk, X86::VPERMI2W128rmk, 0 }, + { X86::VPERMILPDZ128rrk, X86::VPERMILPDZ128rmk, 0 }, + { X86::VPERMILPSZ128rrk, X86::VPERMILPSZ128rmk, 0 }, + { X86::VPERMT2B128rrk, X86::VPERMT2B128rmk, 0 }, + { X86::VPERMT2D128rrk, X86::VPERMT2D128rmk, 0 }, + { X86::VPERMT2PD128rrk, X86::VPERMT2PD128rmk, 0 }, + { X86::VPERMT2PS128rrk, X86::VPERMT2PS128rmk, 0 }, + { X86::VPERMT2Q128rrk, X86::VPERMT2Q128rmk, 0 }, + { X86::VPERMT2W128rrk, X86::VPERMT2W128rmk, 0 }, + { X86::VPERMWZ128rrk, X86::VPERMWZ128rmk, 0 }, + { X86::VPMADDUBSWZ128rrk, X86::VPMADDUBSWZ128rmk, 0 }, + { X86::VPMADDWDZ128rrk, X86::VPMADDWDZ128rmk, 0 }, + { X86::VPMAXSBZ128rrk, X86::VPMAXSBZ128rmk, 0 }, + { X86::VPMAXSDZ128rrk, X86::VPMAXSDZ128rmk, 0 }, + { X86::VPMAXSQZ128rrk, X86::VPMAXSQZ128rmk, 0 }, + { X86::VPMAXSWZ128rrk, X86::VPMAXSWZ128rmk, 0 }, + { X86::VPMAXUBZ128rrk, X86::VPMAXUBZ128rmk, 0 }, + { X86::VPMAXUDZ128rrk, X86::VPMAXUDZ128rmk, 0 }, + { X86::VPMAXUQZ128rrk, X86::VPMAXUQZ128rmk, 0 }, + { X86::VPMAXUWZ128rrk, X86::VPMAXUWZ128rmk, 0 }, + { X86::VPMINSBZ128rrk, X86::VPMINSBZ128rmk, 0 }, + { X86::VPMINSDZ128rrk, X86::VPMINSDZ128rmk, 0 }, + { X86::VPMINSQZ128rrk, X86::VPMINSQZ128rmk, 0 }, + { X86::VPMINSWZ128rrk, X86::VPMINSWZ128rmk, 0 }, + { X86::VPMINUBZ128rrk, X86::VPMINUBZ128rmk, 0 }, + { X86::VPMINUDZ128rrk, X86::VPMINUDZ128rmk, 0 }, + { X86::VPMINUQZ128rrk, X86::VPMINUQZ128rmk, 0 }, + { X86::VPMINUWZ128rrk, X86::VPMINUWZ128rmk, 0 }, + { X86::VPMULDQZ128rrk, X86::VPMULDQZ128rmk, 0 }, + { X86::VPMULLDZ128rrk, X86::VPMULLDZ128rmk, 0 }, + { X86::VPMULLQZ128rrk, X86::VPMULLQZ128rmk, 0 }, + { X86::VPMULLWZ128rrk, X86::VPMULLWZ128rmk, 0 }, + { X86::VPMULUDQZ128rrk, X86::VPMULUDQZ128rmk, 0 }, + { X86::VPORDZ128rrk, X86::VPORDZ128rmk, 0 }, + { X86::VPORQZ128rrk, X86::VPORQZ128rmk, 0 }, + { X86::VPSHUFBZ128rrk, X86::VPSHUFBZ128rmk, 0 }, + { X86::VPSLLDZ128rrk, X86::VPSLLDZ128rmk, 0 }, + { X86::VPSLLQZ128rrk, X86::VPSLLQZ128rmk, 0 }, + { X86::VPSLLVDZ128rrk, X86::VPSLLVDZ128rmk, 0 }, + { X86::VPSLLVQZ128rrk, X86::VPSLLVQZ128rmk, 0 }, + { X86::VPSLLVWZ128rrk, X86::VPSLLVWZ128rmk, 0 }, + { X86::VPSLLWZ128rrk, X86::VPSLLWZ128rmk, 0 }, + { X86::VPSRADZ128rrk, X86::VPSRADZ128rmk, 0 }, + { X86::VPSRAQZ128rrk, X86::VPSRAQZ128rmk, 0 }, + { X86::VPSRAVDZ128rrk, X86::VPSRAVDZ128rmk, 0 }, + { X86::VPSRAVQZ128rrk, X86::VPSRAVQZ128rmk, 0 }, + { X86::VPSRAVWZ128rrk, X86::VPSRAVWZ128rmk, 0 }, + { X86::VPSRAWZ128rrk, X86::VPSRAWZ128rmk, 0 }, + { X86::VPSRLDZ128rrk, X86::VPSRLDZ128rmk, 0 }, + { X86::VPSRLQZ128rrk, X86::VPSRLQZ128rmk, 0 }, + { X86::VPSRLVDZ128rrk, X86::VPSRLVDZ128rmk, 0 }, + { X86::VPSRLVQZ128rrk, X86::VPSRLVQZ128rmk, 0 }, + { X86::VPSRLVWZ128rrk, X86::VPSRLVWZ128rmk, 0 }, + { X86::VPSRLWZ128rrk, X86::VPSRLWZ128rmk, 0 }, + { X86::VPSUBBZ128rrk, X86::VPSUBBZ128rmk, 0 }, + { X86::VPSUBDZ128rrk, X86::VPSUBDZ128rmk, 0 }, + { X86::VPSUBQZ128rrk, X86::VPSUBQZ128rmk, 0 }, + { X86::VPSUBSBZ128rrk, X86::VPSUBSBZ128rmk, 0 }, + { X86::VPSUBSWZ128rrk, X86::VPSUBSWZ128rmk, 0 }, + { X86::VPSUBUSBZ128rrk, X86::VPSUBUSBZ128rmk, 0 }, + { X86::VPSUBUSWZ128rrk, X86::VPSUBUSWZ128rmk, 0 }, + { X86::VPSUBWZ128rrk, X86::VPSUBWZ128rmk, 0 }, + { X86::VPTERNLOGDZ128rrik, X86::VPTERNLOGDZ128rmik, 0 }, + { X86::VPTERNLOGQZ128rrik, X86::VPTERNLOGQZ128rmik, 0 }, + { X86::VPUNPCKHBWZ128rrk, X86::VPUNPCKHBWZ128rmk, 0 }, + { X86::VPUNPCKHDQZ128rrk, X86::VPUNPCKHDQZ128rmk, 0 }, + { X86::VPUNPCKHQDQZ128rrk, X86::VPUNPCKHQDQZ128rmk, 0 }, + { X86::VPUNPCKHWDZ128rrk, X86::VPUNPCKHWDZ128rmk, 0 }, + { X86::VPUNPCKLBWZ128rrk, X86::VPUNPCKLBWZ128rmk, 0 }, + { X86::VPUNPCKLDQZ128rrk, X86::VPUNPCKLDQZ128rmk, 0 }, + { X86::VPUNPCKLQDQZ128rrk, X86::VPUNPCKLQDQZ128rmk, 0 }, + { X86::VPUNPCKLWDZ128rrk, X86::VPUNPCKLWDZ128rmk, 0 }, + { X86::VPXORDZ128rrk, X86::VPXORDZ128rmk, 0 }, + { X86::VPXORQZ128rrk, X86::VPXORQZ128rmk, 0 }, + { X86::VSHUFPDZ128rrik, X86::VSHUFPDZ128rmik, 0 }, + { X86::VSHUFPSZ128rrik, X86::VSHUFPSZ128rmik, 0 }, + { X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0 }, + { X86::VSUBPSZ128rrk, X86::VSUBPSZ128rmk, 0 }, + { X86::VUNPCKHPDZ128rrk, X86::VUNPCKHPDZ128rmk, 0 }, + { X86::VUNPCKHPSZ128rrk, X86::VUNPCKHPSZ128rmk, 0 }, + { X86::VUNPCKLPDZ128rrk, X86::VUNPCKLPDZ128rmk, 0 }, + { X86::VUNPCKLPSZ128rrk, X86::VUNPCKLPSZ128rmk, 0 }, + { X86::VXORPDZ128rrk, X86::VXORPDZ128rmk, 0 }, + { X86::VXORPSZ128rrk, X86::VXORPSZ128rmk, 0 }, + + // 512-bit three source instructions with zero masking. + { X86::VPERMI2Brrkz, X86::VPERMI2Brmkz, 0 }, + { X86::VPERMI2Drrkz, X86::VPERMI2Drmkz, 0 }, + { X86::VPERMI2PSrrkz, X86::VPERMI2PSrmkz, 0 }, + { X86::VPERMI2PDrrkz, X86::VPERMI2PDrmkz, 0 }, + { X86::VPERMI2Qrrkz, X86::VPERMI2Qrmkz, 0 }, + { X86::VPERMI2Wrrkz, X86::VPERMI2Wrmkz, 0 }, + { X86::VPERMT2Brrkz, X86::VPERMT2Brmkz, 0 }, + { X86::VPERMT2Drrkz, X86::VPERMT2Drmkz, 0 }, + { X86::VPERMT2PSrrkz, X86::VPERMT2PSrmkz, 0 }, + { X86::VPERMT2PDrrkz, X86::VPERMT2PDrmkz, 0 }, + { X86::VPERMT2Qrrkz, X86::VPERMT2Qrmkz, 0 }, + { X86::VPERMT2Wrrkz, X86::VPERMT2Wrmkz, 0 }, + { X86::VPTERNLOGDZrrikz, X86::VPTERNLOGDZrmikz, 0 }, + { X86::VPTERNLOGQZrrikz, X86::VPTERNLOGQZrmikz, 0 }, + + // 256-bit three source instructions with zero masking. + { X86::VPERMI2B256rrkz, X86::VPERMI2B256rmkz, 0 }, + { X86::VPERMI2D256rrkz, X86::VPERMI2D256rmkz, 0 }, + { X86::VPERMI2PD256rrkz, X86::VPERMI2PD256rmkz, 0 }, + { X86::VPERMI2PS256rrkz, X86::VPERMI2PS256rmkz, 0 }, + { X86::VPERMI2Q256rrkz, X86::VPERMI2Q256rmkz, 0 }, + { X86::VPERMI2W256rrkz, X86::VPERMI2W256rmkz, 0 }, + { X86::VPERMT2B256rrkz, X86::VPERMT2B256rmkz, 0 }, + { X86::VPERMT2D256rrkz, X86::VPERMT2D256rmkz, 0 }, + { X86::VPERMT2PD256rrkz, X86::VPERMT2PD256rmkz, 0 }, + { X86::VPERMT2PS256rrkz, X86::VPERMT2PS256rmkz, 0 }, + { X86::VPERMT2Q256rrkz, X86::VPERMT2Q256rmkz, 0 }, + { X86::VPERMT2W256rrkz, X86::VPERMT2W256rmkz, 0 }, + { X86::VPTERNLOGDZ256rrikz,X86::VPTERNLOGDZ256rmikz, 0 }, + { X86::VPTERNLOGQZ256rrikz,X86::VPTERNLOGQZ256rmikz, 0 }, + + // 128-bit three source instructions with zero masking. + { X86::VPERMI2B128rrkz, X86::VPERMI2B128rmkz, 0 }, + { X86::VPERMI2D128rrkz, X86::VPERMI2D128rmkz, 0 }, + { X86::VPERMI2PD128rrkz, X86::VPERMI2PD128rmkz, 0 }, + { X86::VPERMI2PS128rrkz, X86::VPERMI2PS128rmkz, 0 }, + { X86::VPERMI2Q128rrkz, X86::VPERMI2Q128rmkz, 0 }, + { X86::VPERMI2W128rrkz, X86::VPERMI2W128rmkz, 0 }, + { X86::VPERMT2B128rrkz, X86::VPERMT2B128rmkz, 0 }, + { X86::VPERMT2D128rrkz, X86::VPERMT2D128rmkz, 0 }, + { X86::VPERMT2PD128rrkz, X86::VPERMT2PD128rmkz, 0 }, + { X86::VPERMT2PS128rrkz, X86::VPERMT2PS128rmkz, 0 }, + { X86::VPERMT2Q128rrkz, X86::VPERMT2Q128rmkz, 0 }, + { X86::VPERMT2W128rrkz, X86::VPERMT2W128rmkz, 0 }, + { X86::VPTERNLOGDZ128rrikz,X86::VPTERNLOGDZ128rmikz, 0 }, + { X86::VPTERNLOGQZ128rrikz,X86::VPTERNLOGQZ128rmikz, 0 }, + }; for (X86MemoryFoldTableEntry Entry : MemoryFoldTable4) { AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable, @@ -163,6 +3545,20 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) // Index 4, folded load Entry.Flags | TB_INDEX_4 | TB_FOLDED_LOAD); } + for (I = X86InstrFMA3Info::rm_begin(); I != E; ++I) { + if (I.getGroup()->isKMasked()) { + // Intrinsics need to pass TB_NO_REVERSE. + if (I.getGroup()->isIntrinsic()) { + AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable, + I.getRegOpcode(), I.getMemOpcode(), + TB_ALIGN_NONE | TB_INDEX_4 | TB_FOLDED_LOAD | TB_NO_REVERSE); + } else { + AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable, + I.getRegOpcode(), I.getMemOpcode(), + TB_ALIGN_NONE | TB_INDEX_4 | TB_FOLDED_LOAD); + } + } + } } void diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a3e677209305..8490b972eb5c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5183,14 +5183,14 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, - Sched<[WriteFAdd]>; + Sched<[WriteFHAdd]>; def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))], - IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFHAddLd, ReadAfterLd]>; } multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, PatFrag ld_frag, @@ -5200,14 +5200,14 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, - Sched<[WriteFAdd]>; + Sched<[WriteFHAdd]>; def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))], - IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFHAddLd, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -5310,7 +5310,7 @@ defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, memopv2i64>; // SSSE3 - Packed Binary Operator Instructions //===---------------------------------------------------------------------===// -let Sched = WriteVecALU in { +let Sched = WritePHAdd in { def SSE_PHADDSUBD : OpndItins< IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM >; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 598d88d8b9c3..33bc8e11a572 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -12,20 +12,21 @@ // //===----------------------------------------------------------------------===// -#include "X86AsmPrinter.h" -#include "X86RegisterInfo.h" -#include "X86ShuffleDecodeConstantPool.h" #include "InstPrinter/X86ATTInstPrinter.h" #include "InstPrinter/X86InstComments.h" #include "MCTargetDesc/X86BaseInfo.h" #include "Utils/X86ShuffleDecode.h" +#include "X86AsmPrinter.h" +#include "X86RegisterInfo.h" +#include "X86ShuffleDecodeConstantPool.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" @@ -38,13 +39,12 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp index aabbf67a16b6..e6756b975c10 100644 --- a/lib/Target/X86/X86OptimizeLEAs.cpp +++ b/lib/Target/X86/X86OptimizeLEAs.cpp @@ -27,8 +27,8 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index 677e82459766..03c8ccb53afe 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -1488,6 +1488,39 @@ def : InstRW<[WriteVPGATHERQQ256, ReadAfterLd], (instregex "VPGATHERQQYrm")>; //-- Arithmetic instructions --// +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// + +// HADD, HSUB PS/PD +// x,x / v,v,v. +def : WriteRes<WriteFHAdd, [HWPort1, HWPort5]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1, 2]; +} + +// x,m / v,v,m. +def : WriteRes<WriteFHAddLd, [HWPort1, HWPort5, HWPort23]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1, 2, 1]; +} + +// PHADD|PHSUB (S) W/D. +// v <- v,v. +def : WriteRes<WritePHAdd, [HWPort1, HWPort5]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1, 2]; +} +// v <- v,m. +def : WriteRes<WritePHAddLd, [HWPort1, HWPort5, HWPort23]> { + let Latency = 6; + let NumMicroOps = 3; + let ResourceCycles = [1, 2, 1]; +} + // PHADD|PHSUB (S) W/D. // v <- v,v. def WritePHADDSUBr : SchedWriteRes<[HWPort1, HWPort5]> { diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index eca65c2892b7..b8ec5883152c 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -157,6 +157,31 @@ def : WriteRes<WriteMPSADLd, [SBPort0, SBPort1, SBPort5, SBPort23]> { let ResourceCycles = [1, 1, 1, 1]; } +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// +// HADD, HSUB PS/PD +// x,x / v,v,v. +def : WriteRes<WriteFHAdd, [SBPort1]> { + let Latency = 3; +} + +// x,m / v,v,m. +def : WriteRes<WriteFHAddLd, [SBPort1, SBPort23]> { + let Latency = 7; + let ResourceCycles = [1, 1]; +} + +// PHADD|PHSUB (S) W/D. +// v <- v,v. +def : WriteRes<WritePHAdd, [SBPort15]>; + +// v <- v,m. +def : WriteRes<WritePHAddLd, [SBPort15, SBPort23]> { + let Latency = 5; + let ResourceCycles = [1, 1]; +} + // String instructions. // Packed Compare Implicit Length Strings, Return Mask def : WriteRes<WritePCmpIStrM, [SBPort015]> { diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index 4eae6ca7abe3..a12fa68faf4f 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -77,6 +77,10 @@ defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends. // FMA Scheduling helper class. class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } +// Horizontal Add/Sub (float and integer) +defm WriteFHAdd : X86SchedWritePair; +defm WritePHAdd : X86SchedWritePair; + // Vector integer operations. defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. defm WriteVecShift : X86SchedWritePair; // Vector integer shifts. diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td index ce1ece34e431..6cb2a3694d92 100644 --- a/lib/Target/X86/X86ScheduleBtVer2.td +++ b/lib/Target/X86/X86ScheduleBtVer2.td @@ -320,6 +320,38 @@ def : WriteRes<WriteAESKeyGenLd, [JLAGU, JVIMUL]> { } //////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// + +def : WriteRes<WriteFHAdd, [JFPU0]> { + let Latency = 3; +} + +def : WriteRes<WriteFHAddLd, [JLAGU, JFPU0]> { + let Latency = 8; +} + +def : WriteRes<WritePHAdd, [JFPU01]> { + let ResourceCycles = [1]; +} +def : WriteRes<WritePHAddLd, [JLAGU, JFPU01 ]> { + let Latency = 6; + let ResourceCycles = [1, 1]; +} + +def WriteFHAddY: SchedWriteRes<[JFPU0]> { + let Latency = 3; + let ResourceCycles = [2]; +} +def : InstRW<[WriteFHAddY], (instregex "VH(ADD|SUB)P(S|D)Yrr")>; + +def WriteFHAddYLd: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 8; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteFHAddYLd], (instregex "VH(ADD|SUB)P(S|D)Yrm")>; + +//////////////////////////////////////////////////////////////////////////////// // Carry-less multiplication instructions. //////////////////////////////////////////////////////////////////////////////// diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td index f95d4fa04177..03ed2db2350d 100644 --- a/lib/Target/X86/X86ScheduleSLM.td +++ b/lib/Target/X86/X86ScheduleSLM.td @@ -137,6 +137,33 @@ defm : SMWriteResPair<WriteShuffle, FPC_RSV0, 1>; defm : SMWriteResPair<WriteBlend, FPC_RSV0, 1>; defm : SMWriteResPair<WriteMPSAD, FPC_RSV0, 7>; +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// + +// HADD, HSUB PS/PD + +def : WriteRes<WriteFHAdd, [FPC_RSV01]> { + let Latency = 3; + let ResourceCycles = [2]; +} + +def : WriteRes<WriteFHAddLd, [FPC_RSV01, MEC_RSV]> { + let Latency = 6; + let ResourceCycles = [2, 1]; +} + +// PHADD|PHSUB (S) W/D. +def : WriteRes<WritePHAdd, [FPC_RSV01]> { + let Latency = 1; + let ResourceCycles = [1]; +} + +def : WriteRes<WritePHAddLd, [FPC_RSV01, MEC_RSV]> { + let Latency = 4; + let ResourceCycles = [1, 1]; +} + // String instructions. // Packed Compare Implicit Length Strings, Return Mask def : WriteRes<WritePCmpIStrM, [FPC_RSV0]> { diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index d4b2392eb1f5..c67aa04aebea 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "X86InstrInfo.h" +#include "X86SelectionDAGInfo.h" #include "X86ISelLowering.h" +#include "X86InstrInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" -#include "X86SelectionDAGInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Target/TargetLowering.h" diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 84ec98484f8e..e36a47506ba0 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/X86BaseInfo.h" #include "X86Subtarget.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "X86TargetMachine.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/Attributes.h" diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index cb21f1bd7706..278b57eb00b7 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -24,8 +24,8 @@ #include "X86TargetObjectFile.h" #include "X86TargetTransformInfo.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 7f70829cb6c6..4fd95717478e 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -9,6 +9,8 @@ #include "X86TargetObjectFile.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Operator.h" #include "llvm/MC/MCContext.h" @@ -16,8 +18,6 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/COFF.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Target/TargetLowering.h" using namespace llvm; diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index fe94079fd869..11ba7025e1b7 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1383,6 +1383,8 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } +unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; } + int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed) { @@ -2176,6 +2178,17 @@ int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy, return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace); } +bool X86TTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) { + // X86 specific here are "instruction number 1st priority". + return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, + C1.NumIVMuls, C1.NumBaseAdds, + C1.ScaleCost, C1.ImmCost, C1.SetupCost) < + std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, + C2.NumIVMuls, C2.NumBaseAdds, + C2.ScaleCost, C2.ImmCost, C2.SetupCost); +} + bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) { Type *ScalarTy = DataTy->getScalarType(); int DataWidth = isa<PointerType>(ScalarTy) ? diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h index 9bef9e80c395..09ce2c90498d 100644 --- a/lib/Target/X86/X86TargetTransformInfo.h +++ b/lib/Target/X86/X86TargetTransformInfo.h @@ -76,6 +76,8 @@ public: int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr); + unsigned getAtomicMemIntrinsicMaxElementSize() const; + int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed = UINT_MAX); @@ -99,6 +101,8 @@ public: int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty); + bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2); bool isLegalMaskedLoad(Type *DataType); bool isLegalMaskedStore(Type *DataType); bool isLegalMaskedGather(Type *DataType); diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp index 3ee14a0ff7b1..0c3b34341476 100644 --- a/lib/Target/X86/X86WinEHState.cpp +++ b/lib/Target/X86/X86WinEHState.cpp @@ -22,9 +22,9 @@ #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index 5fc58d831319..dd27e7ca30aa 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/XCoreMCTargetDesc.h" #include "InstPrinter/XCoreInstPrinter.h" #include "MCTargetDesc/XCoreMCAsmInfo.h" -#include "MCTargetDesc/XCoreMCTargetDesc.h" #include "XCoreTargetStreamer.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCDwarf.h" @@ -23,8 +23,8 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp index b35aa0b95821..8f7c8a82380a 100644 --- a/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "XCore.h" #include "InstPrinter/XCoreInstPrinter.h" +#include "XCore.h" #include "XCoreInstrInfo.h" #include "XCoreMCInstLower.h" #include "XCoreSubtarget.h" diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 1a1cbd474888..cb23399995da 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -10,9 +10,9 @@ // //===----------------------------------------------------------------------===// +#include "XCoreTargetMachine.h" #include "MCTargetDesc/XCoreMCTargetDesc.h" #include "XCore.h" -#include "XCoreTargetMachine.h" #include "XCoreTargetObjectFile.h" #include "XCoreTargetTransformInfo.h" #include "llvm/ADT/Optional.h" diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index 2b53f01a996d..a047b3c9d9fc 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -15,9 +15,9 @@ #define LLVM_LIB_TARGET_XCORE_XCORETARGETMACHINE_H #include "XCoreSubtarget.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetMachine.h" #include <memory> diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp index ad8693fd325e..c60a262e719c 100644 --- a/lib/Target/XCore/XCoreTargetObjectFile.cpp +++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp @@ -9,10 +9,10 @@ #include "XCoreTargetObjectFile.h" #include "XCoreSubtarget.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; |