diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86.td')
-rw-r--r-- | llvm/lib/Target/X86/X86.td | 133 |
1 files changed, 77 insertions, 56 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index d8631aca2734..a2b11d55f650 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -304,12 +304,12 @@ def FeatureFastVariableShuffle : SubtargetFeature<"fast-variable-shuffle", "HasFastVariableShuffle", "true", "Shuffles with variable masks are fast">; -// On some X86 processors, there is no performance hazard to writing only the -// lower parts of a YMM or ZMM register without clearing the upper part. -def FeatureFastPartialYMMorZMMWrite - : SubtargetFeature<"fast-partial-ymm-or-zmm-write", - "HasFastPartialYMMorZMMWrite", - "true", "Partial writes to YMM/ZMM registers are fast">; +// On some X86 processors, a vzeroupper instruction should be inserted after +// using ymm/zmm registers before executing code that may use SSE instructions. +def FeatureInsertVZEROUPPER + : SubtargetFeature<"vzeroupper", + "InsertVZEROUPPER", + "true", "Should insert vzeroupper instructions">; // FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency // than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if // vector FSQRT has higher throughput than the corresponding NR code. @@ -386,6 +386,10 @@ def FeaturePrefer256Bit : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true", "Prefer 256-bit AVX instructions">; +def FeaturePreferMaskRegisters + : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true", + "Prefer AVX512 mask registers over PTEST/MOVMSK">; + // Lower indirect calls using a special construct called a `retpoline` to // mitigate potential Spectre v2 attacks against them. def FeatureRetpolineIndirectCalls @@ -439,7 +443,7 @@ def FeatureFastHorizontalOps : SubtargetFeature< "fast-hops", "HasFastHorizontalOps", "true", "Prefer horizontal vector math instructions (haddp, phsub, etc.) over " - "normal vector instructions with shuffles", [FeatureSSE3]>; + "normal vector instructions with shuffles">; def FeatureFastScalarShiftMasks : SubtargetFeature< @@ -451,6 +455,10 @@ def FeatureFastVectorShiftMasks "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true", "Prefer a left/right vector logical shift pair over a shift+and pair">; +def FeatureUseGLMDivSqrtCosts + : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true", + "Use Goldmont specific floating point div/sqrt costs">; + // Merge branches using three-way conditional code. def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch", "ThreewayBranchProfitable", "true", @@ -465,12 +473,6 @@ def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true", def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">; // Silvermont def ProcIntelSLM : SubtargetFeature<"", "X86ProcFamily", "IntelSLM", "">; -// Goldmont -def ProcIntelGLM : SubtargetFeature<"", "X86ProcFamily", "IntelGLM", "">; -// Goldmont Plus -def ProcIntelGLP : SubtargetFeature<"", "X86ProcFamily", "IntelGLP", "">; -// Tremont -def ProcIntelTRM : SubtargetFeature<"", "X86ProcFamily", "IntelTRM", "">; //===----------------------------------------------------------------------===// // Register File Description @@ -499,6 +501,7 @@ include "X86SchedHaswell.td" include "X86SchedBroadwell.td" include "X86ScheduleSLM.td" include "X86ScheduleZnver1.td" +include "X86ScheduleZnver2.td" include "X86ScheduleBdVer2.td" include "X86ScheduleBtVer2.td" include "X86SchedSkylakeClient.td" @@ -521,7 +524,8 @@ def ProcessorFeatures { FeatureCMPXCHG16B, FeaturePOPCNT, FeatureLAHFSAHF, - FeatureMacroFusion]; + FeatureMacroFusion, + FeatureInsertVZEROUPPER]; list<SubtargetFeature> NHMSpecificFeatures = []; list<SubtargetFeature> NHMFeatures = !listconcat(NHMInheritableFeatures, NHMSpecificFeatures); @@ -701,7 +705,8 @@ def ProcessorFeatures { FeatureCMPXCHG16B, FeatureMOVBE, FeatureSlowTwoMemOps, - FeatureLAHFSAHF]; + FeatureLAHFSAHF, + FeatureInsertVZEROUPPER]; list<SubtargetFeature> AtomSpecificFeatures = [ProcIntelAtom, FeatureSlowUAMem16, FeatureLEAForSP, @@ -739,7 +744,7 @@ def ProcessorFeatures { FeatureXSAVES, FeatureCLFLUSHOPT, FeatureFSGSBase]; - list<SubtargetFeature> GLMSpecificFeatures = [ProcIntelGLM, + list<SubtargetFeature> GLMSpecificFeatures = [FeatureUseGLMDivSqrtCosts, FeaturePOPCNTFalseDeps]; list<SubtargetFeature> GLMInheritableFeatures = !listconcat(SLMInheritableFeatures, GLMAdditionalFeatures); @@ -750,7 +755,7 @@ def ProcessorFeatures { list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE, FeatureRDPID, FeatureSGX]; - list<SubtargetFeature> GLPSpecificFeatures = [ProcIntelGLP]; + list<SubtargetFeature> GLPSpecificFeatures = [FeatureUseGLMDivSqrtCosts]; list<SubtargetFeature> GLPInheritableFeatures = !listconcat(GLMInheritableFeatures, GLPAdditionalFeatures); list<SubtargetFeature> GLPFeatures = @@ -762,7 +767,7 @@ def ProcessorFeatures { FeatureMOVDIRI, FeatureMOVDIR64B, FeatureWAITPKG]; - list<SubtargetFeature> TRMSpecificFeatures = [ProcIntelTRM]; + list<SubtargetFeature> TRMSpecificFeatures = [FeatureUseGLMDivSqrtCosts]; list<SubtargetFeature> TRMFeatures = !listconcat(GLPInheritableFeatures, TRMAdditionalFeatures, TRMSpecificFeatures); @@ -801,8 +806,8 @@ def ProcessorFeatures { FeatureBMI2, FeatureFMA, FeaturePRFCHW, + FeaturePreferMaskRegisters, FeatureSlowTwoMemOps, - FeatureFastPartialYMMorZMMWrite, FeatureHasFastGather, FeatureSlowPMADDWD]; // TODO Add AVX5124FMAPS/AVX5124VNNIW features @@ -823,7 +828,8 @@ def ProcessorFeatures { FeatureLAHFSAHF, FeatureCMOV, Feature64Bit, - FeatureFastScalarShiftMasks]; + FeatureFastScalarShiftMasks, + FeatureInsertVZEROUPPER]; list<SubtargetFeature> BarcelonaFeatures = BarcelonaInheritableFeatures; // Bobcat @@ -845,7 +851,9 @@ def ProcessorFeatures { FeatureFast15ByteNOP, FeatureFastScalarShiftMasks, FeatureFastVectorShiftMasks]; - list<SubtargetFeature> BtVer1Features = BtVer1InheritableFeatures; + list<SubtargetFeature> BtVer1SpecificFeatures = [FeatureInsertVZEROUPPER]; + list<SubtargetFeature> BtVer1Features = + !listconcat(BtVer1InheritableFeatures, BtVer1SpecificFeatures); // Jaguar list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX, @@ -858,7 +866,6 @@ def ProcessorFeatures { FeatureXSAVEOPT]; list<SubtargetFeature> BtVer2SpecificFeatures = [FeatureFastLZCNT, FeatureFastBEXTR, - FeatureFastPartialYMMorZMMWrite, FeatureFastHorizontalOps]; list<SubtargetFeature> BtVer2InheritableFeatures = !listconcat(BtVer1InheritableFeatures, BtVer2AdditionalFeatures); @@ -886,7 +893,8 @@ def ProcessorFeatures { FeatureLAHFSAHF, FeatureFast11ByteNOP, FeatureFastScalarShiftMasks, - FeatureBranchFusion]; + FeatureBranchFusion, + FeatureInsertVZEROUPPER]; list<SubtargetFeature> BdVer1Features = BdVer1InheritableFeatures; // PileDriver @@ -949,6 +957,7 @@ def ProcessorFeatures { FeatureSHA, FeatureSSE4A, FeatureSlowSHLD, + FeatureInsertVZEROUPPER, FeatureX87, FeatureXSAVE, FeatureXSAVEC, @@ -971,28 +980,32 @@ class Proc<string Name, list<SubtargetFeature> Features> // NOTE: CMPXCHG8B is here for legacy compatbility so that it is only disabled // if i386/i486 is specifically requested. def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16, - FeatureCMPXCHG8B]>; -def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16]>; -def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16]>; + FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>; +def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16, + FeatureInsertVZEROUPPER]>; +def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16, + FeatureInsertVZEROUPPER]>; def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16, - FeatureCMPXCHG8B]>; + FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>; def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16, - FeatureCMPXCHG8B]>; + FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>; def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16, - FeatureCMPXCHG8B, FeatureMMX]>; + FeatureCMPXCHG8B, FeatureMMX, + FeatureInsertVZEROUPPER]>; def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, - FeatureCMOV]>; + FeatureCMOV, FeatureInsertVZEROUPPER]>; def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, - FeatureCMOV, FeatureNOPL]>; + FeatureCMOV, FeatureNOPL, FeatureInsertVZEROUPPER]>; def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureMMX, FeatureCMOV, FeatureFXSR, - FeatureNOPL]>; + FeatureNOPL, FeatureInsertVZEROUPPER]>; foreach P = ["pentium3", "pentium3m"] in { def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,FeatureMMX, - FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV]>; + FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV, + FeatureInsertVZEROUPPER]>; } // Enable the PostRAScheduler for SSE2 and SSE3 class cpus. @@ -1008,29 +1021,29 @@ foreach P = ["pentium3", "pentium3m"] in { def : ProcessorModel<"pentium-m", GenericPostRAModel, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL, - FeatureCMOV]>; + FeatureCMOV, FeatureInsertVZEROUPPER]>; foreach P = ["pentium4", "pentium4m"] in { def : ProcessorModel<P, GenericPostRAModel, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL, - FeatureCMOV]>; + FeatureCMOV, FeatureInsertVZEROUPPER]>; } // Intel Quark. -def : Proc<"lakemont", []>; +def : Proc<"lakemont", [FeatureInsertVZEROUPPER]>; // Intel Core Duo. def : ProcessorModel<"yonah", SandyBridgeModel, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL, - FeatureCMOV]>; + FeatureCMOV, FeatureInsertVZEROUPPER]>; // NetBurst. def : ProcessorModel<"prescott", GenericPostRAModel, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL, - FeatureCMOV]>; + FeatureCMOV, FeatureInsertVZEROUPPER]>; def : ProcessorModel<"nocona", GenericPostRAModel, [ FeatureX87, FeatureSlowUAMem16, @@ -1041,7 +1054,8 @@ def : ProcessorModel<"nocona", GenericPostRAModel, [ FeatureFXSR, FeatureNOPL, Feature64Bit, - FeatureCMPXCHG16B + FeatureCMPXCHG16B, + FeatureInsertVZEROUPPER ]>; // Intel Core 2 Solo/Duo. @@ -1057,7 +1071,8 @@ def : ProcessorModel<"core2", SandyBridgeModel, [ Feature64Bit, FeatureCMPXCHG16B, FeatureLAHFSAHF, - FeatureMacroFusion + FeatureMacroFusion, + FeatureInsertVZEROUPPER ]>; def : ProcessorModel<"penryn", SandyBridgeModel, [ FeatureX87, @@ -1071,7 +1086,8 @@ def : ProcessorModel<"penryn", SandyBridgeModel, [ Feature64Bit, FeatureCMPXCHG16B, FeatureLAHFSAHF, - FeatureMacroFusion + FeatureMacroFusion, + FeatureInsertVZEROUPPER ]>; // Atom CPUs. @@ -1138,35 +1154,36 @@ def : ProcessorModel<"tigerlake", SkylakeServerModel, // AMD CPUs. def : Proc<"k6", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, - FeatureMMX]>; + FeatureMMX, FeatureInsertVZEROUPPER]>; def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, - Feature3DNow]>; + Feature3DNow, FeatureInsertVZEROUPPER]>; def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, - Feature3DNow]>; + Feature3DNow, FeatureInsertVZEROUPPER]>; foreach P = ["athlon", "athlon-tbird"] in { def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV, - Feature3DNowA, FeatureNOPL, FeatureSlowSHLD]>; + Feature3DNowA, FeatureNOPL, FeatureSlowSHLD, + FeatureInsertVZEROUPPER]>; } foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV, FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL, - FeatureSlowSHLD]>; + FeatureSlowSHLD, FeatureInsertVZEROUPPER]>; } foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureSSE2, Feature3DNowA, FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureSlowSHLD, FeatureCMOV, - FeatureFastScalarShiftMasks]>; + FeatureFastScalarShiftMasks, FeatureInsertVZEROUPPER]>; } foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureSSE3, Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureSlowSHLD, FeatureCMOV, Feature64Bit, - FeatureFastScalarShiftMasks]>; + FeatureFastScalarShiftMasks, FeatureInsertVZEROUPPER]>; } foreach P = ["amdfam10", "barcelona"] in { @@ -1188,17 +1205,20 @@ def : Proc<"bdver3", ProcessorFeatures.BdVer3Features>; def : Proc<"bdver4", ProcessorFeatures.BdVer4Features>; def : ProcessorModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures>; -def : ProcessorModel<"znver2", Znver1Model, ProcessorFeatures.ZN2Features>; +def : ProcessorModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features>; def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, - Feature3DNowA]>; - -def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; -def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; -def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; + Feature3DNowA, FeatureInsertVZEROUPPER]>; + +def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, + FeatureInsertVZEROUPPER]>; +def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow, + FeatureInsertVZEROUPPER]>; +def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow, + FeatureInsertVZEROUPPER]>; def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE1, FeatureFXSR, - FeatureCMOV]>; + FeatureCMOV, FeatureInsertVZEROUPPER]>; // We also provide a generic 64-bit specific x86 processor model which tries to // be good for modern chips without enabling instruction set encodings past the @@ -1221,7 +1241,8 @@ def : ProcessorModel<"x86-64", SandyBridgeModel, [ Feature64Bit, FeatureSlow3OpsLEA, FeatureSlowIncDec, - FeatureMacroFusion + FeatureMacroFusion, + FeatureInsertVZEROUPPER ]>; //===----------------------------------------------------------------------===// |