summaryrefslogtreecommitdiff
path: root/compiler-rt/lib/builtins/cpu_model.c
diff options
context:
space:
mode:
Diffstat (limited to 'compiler-rt/lib/builtins/cpu_model.c')
-rw-r--r--compiler-rt/lib/builtins/cpu_model.c185
1 files changed, 113 insertions, 72 deletions
diff --git a/compiler-rt/lib/builtins/cpu_model.c b/compiler-rt/lib/builtins/cpu_model.c
index fb619037d398..8346bb62dcfb 100644
--- a/compiler-rt/lib/builtins/cpu_model.c
+++ b/compiler-rt/lib/builtins/cpu_model.c
@@ -82,6 +82,8 @@ enum ProcessorSubtypes {
INTEL_COREI7_ICELAKE_SERVER,
AMDFAM17H_ZNVER2,
INTEL_COREI7_CASCADELAKE,
+ INTEL_COREI7_TIGERLAKE,
+ INTEL_COREI7_COOPERLAKE,
CPU_SUBTYPE_MAX
};
@@ -122,7 +124,9 @@ enum ProcessorFeatures {
FEATURE_VPCLMULQDQ,
FEATURE_AVX512VNNI,
FEATURE_AVX512BITALG,
- FEATURE_AVX512BF16
+ FEATURE_AVX512BF16,
+ FEATURE_AVX512VP2INTERSECT,
+ CPU_FEATURE_MAX
};
// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
@@ -268,13 +272,17 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
}
}
-static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
- unsigned Brand_id,
- unsigned Features,
- unsigned Features2, unsigned *Type,
- unsigned *Subtype) {
- if (Brand_id != 0)
- return;
+static const char *
+getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+ const unsigned *Features,
+ unsigned *Type, unsigned *Subtype) {
+#define testFeature(F) \
+ (Features[F / 32] & (F % 32)) != 0
+
+ // We select CPU strings to match the code in Host.cpp, but we don't use them
+ // in compiler-rt.
+ const char *CPU = 0;
+
switch (Family) {
case 6:
switch (Model) {
@@ -285,13 +293,17 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
// 0Fh. All processors are manufactured using the 65 nm process.
case 0x16: // Intel Celeron processor model 16h. All processors are
// manufactured using the 65 nm process
+ CPU = "core2";
+ *Type = INTEL_CORE2;
+ break;
case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
// 17h. All processors are manufactured using the 45 nm process.
//
// 45nm: Penryn , Wolfdale, Yorkfield (XE)
case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
// the 45 nm process.
- *Type = INTEL_CORE2; // "penryn"
+ CPU = "penryn";
+ *Type = INTEL_CORE2;
break;
case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
// processors are manufactured using the 45 nm process.
@@ -299,25 +311,29 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
// As found in a Summer 2010 model iMac.
case 0x1f:
case 0x2e: // Nehalem EX
- *Type = INTEL_COREI7; // "nehalem"
+ CPU = "nehalem";
+ *Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_NEHALEM;
break;
case 0x25: // Intel Core i7, laptop version.
case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
// processors are manufactured using the 32 nm process.
case 0x2f: // Westmere EX
- *Type = INTEL_COREI7; // "westmere"
+ CPU = "westmere";
+ *Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_WESTMERE;
break;
case 0x2a: // Intel Core i7 processor. All processors are manufactured
// using the 32 nm process.
case 0x2d:
- *Type = INTEL_COREI7; //"sandybridge"
+ CPU = "sandybridge";
+ *Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_SANDYBRIDGE;
break;
case 0x3a:
case 0x3e: // Ivy Bridge EP
- *Type = INTEL_COREI7; // "ivybridge"
+ CPU = "ivybridge";
+ *Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_IVYBRIDGE;
break;
@@ -326,7 +342,8 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 0x3f:
case 0x45:
case 0x46:
- *Type = INTEL_COREI7; // "haswell"
+ CPU = "haswell";
+ *Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_HASWELL;
break;
@@ -335,7 +352,8 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 0x47:
case 0x4f:
case 0x56:
- *Type = INTEL_COREI7; // "broadwell"
+ CPU = "broadwell";
+ *Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_BROADWELL;
break;
@@ -344,37 +362,49 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 0x5e: // Skylake desktop
case 0x8e: // Kaby Lake mobile
case 0x9e: // Kaby Lake desktop
- *Type = INTEL_COREI7; // "skylake"
+ case 0xa5: // Comet Lake-H/S
+ case 0xa6: // Comet Lake-U
+ CPU = "skylake";
+ *Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_SKYLAKE;
break;
// Skylake Xeon:
case 0x55:
*Type = INTEL_COREI7;
- if (Features2 & (1 << (FEATURE_AVX512VNNI - 32)))
- *Subtype = INTEL_COREI7_CASCADELAKE; // "cascadelake"
- else
- *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
+ if (testFeature(FEATURE_AVX512BF16)) {
+ CPU = "cooperlake";
+ *Subtype = INTEL_COREI7_COOPERLAKE;
+ } else if (testFeature(FEATURE_AVX512VNNI)) {
+ CPU = "cascadelake";
+ *Subtype = INTEL_COREI7_CASCADELAKE;
+ } else {
+ CPU = "skylake-avx512";
+ *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
+ }
break;
// Cannonlake:
case 0x66:
+ CPU = "cannonlake";
*Type = INTEL_COREI7;
- *Subtype = INTEL_COREI7_CANNONLAKE; // "cannonlake"
+ *Subtype = INTEL_COREI7_CANNONLAKE;
break;
// Icelake:
case 0x7d:
case 0x7e:
+ CPU = "icelake-client";
*Type = INTEL_COREI7;
- *Subtype = INTEL_COREI7_ICELAKE_CLIENT; // "icelake-client"
+ *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
break;
// Icelake Xeon:
case 0x6a:
case 0x6c:
+ CPU = "icelake-server";
*Type = INTEL_COREI7;
- *Subtype = INTEL_COREI7_ICELAKE_SERVER; // "icelake-server"
+ *Subtype = INTEL_COREI7_ICELAKE_SERVER;
break;
case 0x1c: // Most 45 nm Intel Atom processors
@@ -382,8 +412,9 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 0x27: // 32 nm Atom Medfield
case 0x35: // 32 nm Atom Midview
case 0x36: // 32 nm Atom Midview
+ CPU = "bonnell";
*Type = INTEL_BONNELL;
- break; // "bonnell"
+ break;
// Atom Silvermont codes from the Intel software optimization guide.
case 0x37:
@@ -392,26 +423,32 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 0x5a:
case 0x5d:
case 0x4c: // really airmont
+ CPU = "silvermont";
*Type = INTEL_SILVERMONT;
- break; // "silvermont"
+ break;
// Goldmont:
case 0x5c: // Apollo Lake
case 0x5f: // Denverton
+ CPU = "goldmont";
*Type = INTEL_GOLDMONT;
break; // "goldmont"
case 0x7a:
+ CPU = "goldmont-plus";
*Type = INTEL_GOLDMONT_PLUS;
break;
case 0x86:
+ CPU = "tremont";
*Type = INTEL_TREMONT;
break;
case 0x57:
- *Type = INTEL_KNL; // knl
+ CPU = "knl";
+ *Type = INTEL_KNL;
break;
case 0x85:
- *Type = INTEL_KNM; // knm
+ CPU = "knm";
+ *Type = INTEL_KNM;
break;
default: // Unknown family 6 CPU.
@@ -421,17 +458,22 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
default:
break; // Unknown.
}
+
+ return CPU;
}
-static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
- unsigned Features, unsigned Features2,
- unsigned *Type, unsigned *Subtype) {
- // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
- // appears to be no way to generate the wide variety of AMD-specific targets
- // from the information returned from CPUID.
+static const char *
+getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+ const unsigned *Features,
+ unsigned *Type, unsigned *Subtype) {
+ // We select CPU strings to match the code in Host.cpp, but we don't use them
+ // in compiler-rt.
+ const char *CPU = 0;
+
switch (Family) {
case 16:
- *Type = AMDFAM10H; // "amdfam10"
+ CPU = "amdfam10";
+ *Type = AMDFAM10H;
switch (Model) {
case 2:
*Subtype = AMDFAM10H_BARCELONA;
@@ -445,60 +487,62 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
}
break;
case 20:
+ CPU = "btver1";
*Type = AMD_BTVER1;
- break; // "btver1";
+ break;
case 21:
+ CPU = "bdver1";
*Type = AMDFAM15H;
if (Model >= 0x60 && Model <= 0x7f) {
+ CPU = "bdver4";
*Subtype = AMDFAM15H_BDVER4;
- break; // "bdver4"; 60h-7Fh: Excavator
+ break; // 60h-7Fh: Excavator
}
if (Model >= 0x30 && Model <= 0x3f) {
+ CPU = "bdver3";
*Subtype = AMDFAM15H_BDVER3;
- break; // "bdver3"; 30h-3Fh: Steamroller
+ break; // 30h-3Fh: Steamroller
}
if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
+ CPU = "bdver2";
*Subtype = AMDFAM15H_BDVER2;
- break; // "bdver2"; 02h, 10h-1Fh: Piledriver
+ break; // 02h, 10h-1Fh: Piledriver
}
if (Model <= 0x0f) {
*Subtype = AMDFAM15H_BDVER1;
- break; // "bdver1"; 00h-0Fh: Bulldozer
+ break; // 00h-0Fh: Bulldozer
}
break;
case 22:
+ CPU = "btver2";
*Type = AMD_BTVER2;
- break; // "btver2"
+ break;
case 23:
+ CPU = "znver1";
*Type = AMDFAM17H;
if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
+ CPU = "znver2";
*Subtype = AMDFAM17H_ZNVER2;
- break; // "znver2"; 30h-3fh, 71h: Zen2
+ break; // 30h-3fh, 71h: Zen2
}
if (Model <= 0x0f) {
*Subtype = AMDFAM17H_ZNVER1;
- break; // "znver1"; 00h-0Fh: Zen1
+ break; // 00h-0Fh: Zen1
}
break;
default:
- break; // "generic"
+ break; // Unknown AMD CPU.
}
+
+ return CPU;
}
static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
- unsigned *FeaturesOut,
- unsigned *Features2Out) {
- unsigned Features = 0;
- unsigned Features2 = 0;
+ unsigned *Features) {
unsigned EAX, EBX;
#define setFeature(F) \
- do { \
- if (F < 32) \
- Features |= 1U << (F & 0x1f); \
- else if (F < 64) \
- Features2 |= 1U << ((F - 32) & 0x1f); \
- } while (0)
+ Features[F / 32] |= 1U << (F % 32)
if ((EDX >> 15) & 1)
setFeature(FEATURE_CMOV);
@@ -590,6 +634,8 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_AVX5124VNNIW);
if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX5124FMAPS);
+ if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512VP2INTERSECT);
bool HasLeaf7Subleaf1 =
MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
@@ -607,9 +653,6 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_XOP);
if (HasExtLeaf1 && ((ECX >> 16) & 1))
setFeature(FEATURE_FMA4);
-
- *FeaturesOut = Features;
- *Features2Out = Features2;
#undef setFeature
}
@@ -641,7 +684,7 @@ struct __processor_model {
#ifndef _WIN32
__attribute__((visibility("hidden")))
#endif
-unsigned int __cpu_features2;
+unsigned int __cpu_features2 = 0;
// A constructor function that is sets __cpu_model and __cpu_features2 with
// the right values. This needs to run only once. This constructor is
@@ -653,40 +696,38 @@ int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
unsigned EAX, EBX, ECX, EDX;
unsigned MaxLeaf = 5;
unsigned Vendor;
- unsigned Model, Family, Brand_id;
- unsigned Features = 0;
- unsigned Features2 = 0;
+ unsigned Model, Family;
+ unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
// This function needs to run just once.
if (__cpu_model.__cpu_vendor)
return 0;
- if (!isCpuIdSupported())
- return -1;
-
- // Assume cpuid insn present. Run in level 0 to get vendor id.
- if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
+ if (!isCpuIdSupported() ||
+ getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
__cpu_model.__cpu_vendor = VENDOR_OTHER;
return -1;
}
+
getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
detectX86FamilyModel(EAX, &Family, &Model);
- Brand_id = EBX & 0xff;
// Find available features.
- getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2);
- __cpu_model.__cpu_features[0] = Features;
- __cpu_features2 = Features2;
+ getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);
+
+ assert((sizeof(Features)/sizeof(Features[0])) == 2);
+ __cpu_model.__cpu_features[0] = Features[0];
+ __cpu_features2 = Features[1];
if (Vendor == SIG_INTEL) {
// Get CPU type.
- getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
- Features2, &(__cpu_model.__cpu_type),
+ getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
+ &(__cpu_model.__cpu_type),
&(__cpu_model.__cpu_subtype));
__cpu_model.__cpu_vendor = VENDOR_INTEL;
} else if (Vendor == SIG_AMD) {
// Get CPU type.
- getAMDProcessorTypeAndSubtype(Family, Model, Features, Features2,
+ getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
&(__cpu_model.__cpu_type),
&(__cpu_model.__cpu_subtype));
__cpu_model.__cpu_vendor = VENDOR_AMD;