diff options
Diffstat (limited to 'compiler-rt/lib/builtins/cpu_model.c')
-rw-r--r-- | compiler-rt/lib/builtins/cpu_model.c | 185 |
1 files changed, 113 insertions, 72 deletions
diff --git a/compiler-rt/lib/builtins/cpu_model.c b/compiler-rt/lib/builtins/cpu_model.c index fb619037d398..8346bb62dcfb 100644 --- a/compiler-rt/lib/builtins/cpu_model.c +++ b/compiler-rt/lib/builtins/cpu_model.c @@ -82,6 +82,8 @@ enum ProcessorSubtypes { INTEL_COREI7_ICELAKE_SERVER, AMDFAM17H_ZNVER2, INTEL_COREI7_CASCADELAKE, + INTEL_COREI7_TIGERLAKE, + INTEL_COREI7_COOPERLAKE, CPU_SUBTYPE_MAX }; @@ -122,7 +124,9 @@ enum ProcessorFeatures { FEATURE_VPCLMULQDQ, FEATURE_AVX512VNNI, FEATURE_AVX512BITALG, - FEATURE_AVX512BF16 + FEATURE_AVX512BF16, + FEATURE_AVX512VP2INTERSECT, + CPU_FEATURE_MAX }; // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). @@ -268,13 +272,17 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family, } } -static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, - unsigned Brand_id, - unsigned Features, - unsigned Features2, unsigned *Type, - unsigned *Subtype) { - if (Brand_id != 0) - return; +static const char * +getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, + const unsigned *Features, + unsigned *Type, unsigned *Subtype) { +#define testFeature(F) \ + (Features[F / 32] & (F % 32)) != 0 + + // We select CPU strings to match the code in Host.cpp, but we don't use them + // in compiler-rt. + const char *CPU = 0; + switch (Family) { case 6: switch (Model) { @@ -285,13 +293,17 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, // 0Fh. All processors are manufactured using the 65 nm process. case 0x16: // Intel Celeron processor model 16h. All processors are // manufactured using the 65 nm process + CPU = "core2"; + *Type = INTEL_CORE2; + break; case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model // 17h. All processors are manufactured using the 45 nm process. // // 45nm: Penryn , Wolfdale, Yorkfield (XE) case 0x1d: // Intel Xeon processor MP. All processors are manufactured using // the 45 nm process. - *Type = INTEL_CORE2; // "penryn" + CPU = "penryn"; + *Type = INTEL_CORE2; break; case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All // processors are manufactured using the 45 nm process. @@ -299,25 +311,29 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, // As found in a Summer 2010 model iMac. case 0x1f: case 0x2e: // Nehalem EX - *Type = INTEL_COREI7; // "nehalem" + CPU = "nehalem"; + *Type = INTEL_COREI7; *Subtype = INTEL_COREI7_NEHALEM; break; case 0x25: // Intel Core i7, laptop version. case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All // processors are manufactured using the 32 nm process. case 0x2f: // Westmere EX - *Type = INTEL_COREI7; // "westmere" + CPU = "westmere"; + *Type = INTEL_COREI7; *Subtype = INTEL_COREI7_WESTMERE; break; case 0x2a: // Intel Core i7 processor. All processors are manufactured // using the 32 nm process. case 0x2d: - *Type = INTEL_COREI7; //"sandybridge" + CPU = "sandybridge"; + *Type = INTEL_COREI7; *Subtype = INTEL_COREI7_SANDYBRIDGE; break; case 0x3a: case 0x3e: // Ivy Bridge EP - *Type = INTEL_COREI7; // "ivybridge" + CPU = "ivybridge"; + *Type = INTEL_COREI7; *Subtype = INTEL_COREI7_IVYBRIDGE; break; @@ -326,7 +342,8 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, case 0x3f: case 0x45: case 0x46: - *Type = INTEL_COREI7; // "haswell" + CPU = "haswell"; + *Type = INTEL_COREI7; *Subtype = INTEL_COREI7_HASWELL; break; @@ -335,7 +352,8 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, case 0x47: case 0x4f: case 0x56: - *Type = INTEL_COREI7; // "broadwell" + CPU = "broadwell"; + *Type = INTEL_COREI7; *Subtype = INTEL_COREI7_BROADWELL; break; @@ -344,37 +362,49 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, case 0x5e: // Skylake desktop case 0x8e: // Kaby Lake mobile case 0x9e: // Kaby Lake desktop - *Type = INTEL_COREI7; // "skylake" + case 0xa5: // Comet Lake-H/S + case 0xa6: // Comet Lake-U + CPU = "skylake"; + *Type = INTEL_COREI7; *Subtype = INTEL_COREI7_SKYLAKE; break; // Skylake Xeon: case 0x55: *Type = INTEL_COREI7; - if (Features2 & (1 << (FEATURE_AVX512VNNI - 32))) - *Subtype = INTEL_COREI7_CASCADELAKE; // "cascadelake" - else - *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" + if (testFeature(FEATURE_AVX512BF16)) { + CPU = "cooperlake"; + *Subtype = INTEL_COREI7_COOPERLAKE; + } else if (testFeature(FEATURE_AVX512VNNI)) { + CPU = "cascadelake"; + *Subtype = INTEL_COREI7_CASCADELAKE; + } else { + CPU = "skylake-avx512"; + *Subtype = INTEL_COREI7_SKYLAKE_AVX512; + } break; // Cannonlake: case 0x66: + CPU = "cannonlake"; *Type = INTEL_COREI7; - *Subtype = INTEL_COREI7_CANNONLAKE; // "cannonlake" + *Subtype = INTEL_COREI7_CANNONLAKE; break; // Icelake: case 0x7d: case 0x7e: + CPU = "icelake-client"; *Type = INTEL_COREI7; - *Subtype = INTEL_COREI7_ICELAKE_CLIENT; // "icelake-client" + *Subtype = INTEL_COREI7_ICELAKE_CLIENT; break; // Icelake Xeon: case 0x6a: case 0x6c: + CPU = "icelake-server"; *Type = INTEL_COREI7; - *Subtype = INTEL_COREI7_ICELAKE_SERVER; // "icelake-server" + *Subtype = INTEL_COREI7_ICELAKE_SERVER; break; case 0x1c: // Most 45 nm Intel Atom processors @@ -382,8 +412,9 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, case 0x27: // 32 nm Atom Medfield case 0x35: // 32 nm Atom Midview case 0x36: // 32 nm Atom Midview + CPU = "bonnell"; *Type = INTEL_BONNELL; - break; // "bonnell" + break; // Atom Silvermont codes from the Intel software optimization guide. case 0x37: @@ -392,26 +423,32 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, case 0x5a: case 0x5d: case 0x4c: // really airmont + CPU = "silvermont"; *Type = INTEL_SILVERMONT; - break; // "silvermont" + break; // Goldmont: case 0x5c: // Apollo Lake case 0x5f: // Denverton + CPU = "goldmont"; *Type = INTEL_GOLDMONT; break; // "goldmont" case 0x7a: + CPU = "goldmont-plus"; *Type = INTEL_GOLDMONT_PLUS; break; case 0x86: + CPU = "tremont"; *Type = INTEL_TREMONT; break; case 0x57: - *Type = INTEL_KNL; // knl + CPU = "knl"; + *Type = INTEL_KNL; break; case 0x85: - *Type = INTEL_KNM; // knm + CPU = "knm"; + *Type = INTEL_KNM; break; default: // Unknown family 6 CPU. @@ -421,17 +458,22 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, default: break; // Unknown. } + + return CPU; } -static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, - unsigned Features, unsigned Features2, - unsigned *Type, unsigned *Subtype) { - // FIXME: this poorly matches the generated SubtargetFeatureKV table. There - // appears to be no way to generate the wide variety of AMD-specific targets - // from the information returned from CPUID. +static const char * +getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, + const unsigned *Features, + unsigned *Type, unsigned *Subtype) { + // We select CPU strings to match the code in Host.cpp, but we don't use them + // in compiler-rt. + const char *CPU = 0; + switch (Family) { case 16: - *Type = AMDFAM10H; // "amdfam10" + CPU = "amdfam10"; + *Type = AMDFAM10H; switch (Model) { case 2: *Subtype = AMDFAM10H_BARCELONA; @@ -445,60 +487,62 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, } break; case 20: + CPU = "btver1"; *Type = AMD_BTVER1; - break; // "btver1"; + break; case 21: + CPU = "bdver1"; *Type = AMDFAM15H; if (Model >= 0x60 && Model <= 0x7f) { + CPU = "bdver4"; *Subtype = AMDFAM15H_BDVER4; - break; // "bdver4"; 60h-7Fh: Excavator + break; // 60h-7Fh: Excavator } if (Model >= 0x30 && Model <= 0x3f) { + CPU = "bdver3"; *Subtype = AMDFAM15H_BDVER3; - break; // "bdver3"; 30h-3Fh: Steamroller + break; // 30h-3Fh: Steamroller } if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { + CPU = "bdver2"; *Subtype = AMDFAM15H_BDVER2; - break; // "bdver2"; 02h, 10h-1Fh: Piledriver + break; // 02h, 10h-1Fh: Piledriver } if (Model <= 0x0f) { *Subtype = AMDFAM15H_BDVER1; - break; // "bdver1"; 00h-0Fh: Bulldozer + break; // 00h-0Fh: Bulldozer } break; case 22: + CPU = "btver2"; *Type = AMD_BTVER2; - break; // "btver2" + break; case 23: + CPU = "znver1"; *Type = AMDFAM17H; if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) { + CPU = "znver2"; *Subtype = AMDFAM17H_ZNVER2; - break; // "znver2"; 30h-3fh, 71h: Zen2 + break; // 30h-3fh, 71h: Zen2 } if (Model <= 0x0f) { *Subtype = AMDFAM17H_ZNVER1; - break; // "znver1"; 00h-0Fh: Zen1 + break; // 00h-0Fh: Zen1 } break; default: - break; // "generic" + break; // Unknown AMD CPU. } + + return CPU; } static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, - unsigned *FeaturesOut, - unsigned *Features2Out) { - unsigned Features = 0; - unsigned Features2 = 0; + unsigned *Features) { unsigned EAX, EBX; #define setFeature(F) \ - do { \ - if (F < 32) \ - Features |= 1U << (F & 0x1f); \ - else if (F < 64) \ - Features2 |= 1U << ((F - 32) & 0x1f); \ - } while (0) + Features[F / 32] |= 1U << (F % 32) if ((EDX >> 15) & 1) setFeature(FEATURE_CMOV); @@ -590,6 +634,8 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, setFeature(FEATURE_AVX5124VNNIW); if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) setFeature(FEATURE_AVX5124FMAPS); + if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512VP2INTERSECT); bool HasLeaf7Subleaf1 = MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); @@ -607,9 +653,6 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, setFeature(FEATURE_XOP); if (HasExtLeaf1 && ((ECX >> 16) & 1)) setFeature(FEATURE_FMA4); - - *FeaturesOut = Features; - *Features2Out = Features2; #undef setFeature } @@ -641,7 +684,7 @@ struct __processor_model { #ifndef _WIN32 __attribute__((visibility("hidden"))) #endif -unsigned int __cpu_features2; +unsigned int __cpu_features2 = 0; // A constructor function that is sets __cpu_model and __cpu_features2 with // the right values. This needs to run only once. This constructor is @@ -653,40 +696,38 @@ int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) { unsigned EAX, EBX, ECX, EDX; unsigned MaxLeaf = 5; unsigned Vendor; - unsigned Model, Family, Brand_id; - unsigned Features = 0; - unsigned Features2 = 0; + unsigned Model, Family; + unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0}; // This function needs to run just once. if (__cpu_model.__cpu_vendor) return 0; - if (!isCpuIdSupported()) - return -1; - - // Assume cpuid insn present. Run in level 0 to get vendor id. - if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) { + if (!isCpuIdSupported() || + getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) { __cpu_model.__cpu_vendor = VENDOR_OTHER; return -1; } + getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); detectX86FamilyModel(EAX, &Family, &Model); - Brand_id = EBX & 0xff; // Find available features. - getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2); - __cpu_model.__cpu_features[0] = Features; - __cpu_features2 = Features2; + getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]); + + assert((sizeof(Features)/sizeof(Features[0])) == 2); + __cpu_model.__cpu_features[0] = Features[0]; + __cpu_features2 = Features[1]; if (Vendor == SIG_INTEL) { // Get CPU type. - getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, - Features2, &(__cpu_model.__cpu_type), + getIntelProcessorTypeAndSubtype(Family, Model, &Features[0], + &(__cpu_model.__cpu_type), &(__cpu_model.__cpu_subtype)); __cpu_model.__cpu_vendor = VENDOR_INTEL; } else if (Vendor == SIG_AMD) { // Get CPU type. - getAMDProcessorTypeAndSubtype(Family, Model, Features, Features2, + getAMDProcessorTypeAndSubtype(Family, Model, &Features[0], &(__cpu_model.__cpu_type), &(__cpu_model.__cpu_subtype)); __cpu_model.__cpu_vendor = VENDOR_AMD; |