aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/compiler-rt/lib/builtins/cpu_model')
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc91
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64.c84
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64.h21
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/android.inc36
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc159
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/freebsd.inc27
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc53
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc149
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc21
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/unimplemented.inc8
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc189
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/android.inc28
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/freebsd.inc5
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/fuchsia.inc12
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/sysauxv.inc6
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/cpu_model.h41
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/x86.c1205
17 files changed, 2135 insertions, 0 deletions
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc
new file mode 100644
index 000000000000..e78bb88cfedf
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc
@@ -0,0 +1,91 @@
+//===- AArch64CPUFeatures.inc - AArch64 CPU Features enum -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CPUFeatures enum for AArch64 to facilitate better
+// testing of this code between LLVM and compiler-rt, primarily that the files
+// are an exact match.
+//
+// This file has two identical copies. The primary copy lives in LLVM and
+// the other one sits in compiler-rt/lib/builtins/cpu_model directory. To make
+// changes in this file, first modify the primary copy and copy it over to
+// compiler-rt. compiler-rt tests will fail if the two files are not synced up.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AARCH64_CPU_FEATURS_INC_H
+#define AARCH64_CPU_FEATURS_INC_H
+
+// Function Multi Versioning CPU features.
+enum CPUFeatures {
+ FEAT_RNG,
+ FEAT_FLAGM,
+ FEAT_FLAGM2,
+ FEAT_FP16FML,
+ FEAT_DOTPROD,
+ FEAT_SM4,
+ FEAT_RDM,
+ FEAT_LSE,
+ FEAT_FP,
+ FEAT_SIMD,
+ FEAT_CRC,
+ FEAT_SHA1,
+ FEAT_SHA2,
+ FEAT_SHA3,
+ FEAT_AES,
+ FEAT_PMULL,
+ FEAT_FP16,
+ FEAT_DIT,
+ FEAT_DPB,
+ FEAT_DPB2,
+ FEAT_JSCVT,
+ FEAT_FCMA,
+ FEAT_RCPC,
+ FEAT_RCPC2,
+ FEAT_FRINTTS,
+ FEAT_DGH,
+ FEAT_I8MM,
+ FEAT_BF16,
+ FEAT_EBF16,
+ FEAT_RPRES,
+ FEAT_SVE,
+ FEAT_SVE_BF16,
+ FEAT_SVE_EBF16,
+ FEAT_SVE_I8MM,
+ FEAT_SVE_F32MM,
+ FEAT_SVE_F64MM,
+ FEAT_SVE2,
+ FEAT_SVE_AES,
+ FEAT_SVE_PMULL128,
+ FEAT_SVE_BITPERM,
+ FEAT_SVE_SHA3,
+ FEAT_SVE_SM4,
+ FEAT_SME,
+ FEAT_MEMTAG,
+ FEAT_MEMTAG2,
+ FEAT_MEMTAG3,
+ FEAT_SB,
+ FEAT_PREDRES,
+ FEAT_SSBS,
+ FEAT_SSBS2,
+ FEAT_BTI,
+ FEAT_LS64,
+ FEAT_LS64_V,
+ FEAT_LS64_ACCDATA,
+ FEAT_WFXT,
+ FEAT_SME_F64,
+ FEAT_SME_I64,
+ FEAT_SME2,
+ FEAT_RCPC3,
+ FEAT_MOPS,
+ FEAT_MAX,
+ FEAT_EXT = 62, // Reserved to indicate presence of additional features field
+ // in __aarch64_cpu_features
+ FEAT_INIT // Used as flag of features initialization completion
+};
+
+#endif
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64.c b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64.c
new file mode 100644
index 000000000000..b868caa991b2
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64.c
@@ -0,0 +1,84 @@
+//===-- cpu_model/aarch64.c - Support for __cpu_model builtin ----*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is based on LLVM's lib/Support/Host.cpp.
+// It implements __aarch64_have_lse_atomics, __aarch64_cpu_features for
+// AArch64.
+//
+//===----------------------------------------------------------------------===//
+
+#include "aarch64.h"
+
+#if !defined(__aarch64__)
+#error This file is intended only for aarch64-based targets
+#endif
+
+#if __has_include(<sys/ifunc.h>)
+#include <sys/ifunc.h>
+#else
+typedef struct __ifunc_arg_t {
+ unsigned long _size;
+ unsigned long _hwcap;
+ unsigned long _hwcap2;
+} __ifunc_arg_t;
+#endif // __has_include(<sys/ifunc.h>)
+
+// LSE support detection for out-of-line atomics
+// using HWCAP and Auxiliary vector
+_Bool __aarch64_have_lse_atomics
+ __attribute__((visibility("hidden"), nocommon)) = false;
+
+#if defined(__FreeBSD__)
+// clang-format off: should not reorder sys/auxv.h alphabetically
+#include <sys/auxv.h>
+// clang-format on
+#include "aarch64/hwcap.inc"
+#include "aarch64/lse_atomics/freebsd.inc"
+#elif defined(__Fuchsia__)
+#include "aarch64/hwcap.inc"
+#include "aarch64/lse_atomics/fuchsia.inc"
+#elif defined(__ANDROID__)
+#include "aarch64/hwcap.inc"
+#include "aarch64/lse_atomics/android.inc"
+#elif __has_include(<sys/auxv.h>)
+#include "aarch64/hwcap.inc"
+#include "aarch64/lse_atomics/sysauxv.inc"
+#else
+// When unimplemented, we leave __aarch64_have_lse_atomics initialized to false.
+#endif
+
+#if !defined(DISABLE_AARCH64_FMV)
+
+// Architecture features used
+// in Function Multi Versioning
+struct {
+ unsigned long long features;
+ // As features grows new fields could be added
+} __aarch64_cpu_features __attribute__((visibility("hidden"), nocommon));
+
+// The formatter wants to re-order these includes, but doing so is incorrect:
+// clang-format off
+#if defined(__APPLE__)
+#include "aarch64/fmv/apple.inc"
+#elif defined(__FreeBSD__)
+#include "aarch64/fmv/mrs.inc"
+#include "aarch64/fmv/freebsd.inc"
+#elif defined(__Fuchsia__)
+#include "aarch64/fmv/fuchsia.inc"
+#elif defined(__ANDROID__)
+#include "aarch64/fmv/mrs.inc"
+#include "aarch64/fmv/android.inc"
+#elif __has_include(<sys/auxv.h>)
+#include "aarch64/fmv/mrs.inc"
+#include "aarch64/fmv/sysauxv.inc"
+#else
+#include "aarch64/fmv/unimplemented.inc"
+#endif
+// clang-format on
+
+#endif // !defined(DISABLE_AARCH64_FMV)
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64.h b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64.h
new file mode 100644
index 000000000000..f6cbf75d582f
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64.h
@@ -0,0 +1,21 @@
+//===-- cpu_model/aarch64.h --------------------------------------------- -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "cpu_model.h"
+
+#if !defined(__aarch64__)
+#error This file is intended only for aarch64-based targets
+#endif
+
+#if !defined(DISABLE_AARCH64_FMV)
+
+#include "AArch64CPUFeatures.inc"
+
+void __init_cpu_features(void);
+
+#endif // !defined(DISABLE_AARCH64_FMV)
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/android.inc b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/android.inc
new file mode 100644
index 000000000000..a9e3594e93c2
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/android.inc
@@ -0,0 +1,36 @@
+void __init_cpu_features_resolver(unsigned long hwcap,
+ const __ifunc_arg_t *arg) {
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
+ return;
+
+ // ifunc resolvers don't have hwcaps in arguments on Android API lower
+ // than 30. If so, set feature detection done and keep all CPU features
+ // unsupported (zeros). To detect this case in runtime we check existence
+ // of memfd_create function from Standard C library which was introduced in
+ // Android API 30.
+ int memfd_create(const char *, unsigned int) __attribute__((weak));
+ if (!memfd_create)
+ return;
+
+ __init_cpu_features_constructor(hwcap, arg);
+}
+
+void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
+ // CPU features already initialized.
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
+ return;
+
+ // Don't set any CPU features,
+ // detection could be wrong on Exynos 9810.
+ if (__isExynos9810())
+ return;
+
+ unsigned long hwcap = getauxval(AT_HWCAP);
+ unsigned long hwcap2 = getauxval(AT_HWCAP2);
+
+ __ifunc_arg_t arg;
+ arg._size = sizeof(__ifunc_arg_t);
+ arg._hwcap = hwcap;
+ arg._hwcap2 = hwcap2;
+ __init_cpu_features_constructor(hwcap | _IFUNC_ARG_HWCAP, &arg);
+}
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
new file mode 100644
index 000000000000..f0694900f231
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
@@ -0,0 +1,159 @@
+#include <TargetConditionals.h>
+#if TARGET_OS_OSX || TARGET_OS_IPHONE
+#include <sys/sysctl.h>
+
+#if __has_include(<arm/cpu_capabilities_public.h>)
+#include <arm/cpu_capabilities_public.h>
+#define HAS_CPU_CAPABILITIES_PUBLIC_H 1
+
+// FB13964283 - A few of these didn't make it into the public SDK yet.
+#ifndef CAP_BIT_FEAT_SME
+#define CAP_BIT_FEAT_SME 40
+#endif
+#ifndef CAP_BIT_FEAT_SME2
+#define CAP_BIT_FEAT_SME2 41
+#endif
+#ifndef CAP_BIT_FEAT_SME_F64F64
+#define CAP_BIT_FEAT_SME_F64F64 42
+#endif
+#ifndef CAP_BIT_FEAT_SME_I16I64
+#define CAP_BIT_FEAT_SME_I16I64 43
+#endif
+
+#endif
+
+static bool isKnownAndSupported(const char *name) {
+ int32_t val = 0;
+ size_t size = sizeof(val);
+ if (sysctlbyname(name, &val, &size, NULL, 0))
+ return false;
+ return val;
+}
+
+static uint64_t deriveImplicitFeatures(uint64_t features) {
+ // FEAT_SSBS2 implies FEAT_SSBS
+ if ((1ULL << FEAT_SSBS2) & features)
+ features |= (1ULL << FEAT_SSBS);
+
+ // FEAT_FP is always enabled
+ features |= (1ULL << FEAT_FP);
+
+ features |= (1ULL << FEAT_INIT);
+
+ return features;
+}
+
+void __init_cpu_features_resolver(void) {
+ // On Darwin platforms, this may be called concurrently by multiple threads
+ // because the resolvers that use it are called lazily at runtime (unlike on
+ // ELF platforms, where IFuncs are resolved serially at load time). This
+ // function's effect on __aarch64_cpu_features must be idempotent.
+
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
+ return;
+
+ uint64_t features = 0;
+
+#ifdef HAS_CPU_CAPABILITIES_PUBLIC_H
+ uint8_t feats_bitvec[(CAP_BIT_NB + 7) / 8] = {0};
+ size_t len = sizeof(feats_bitvec);
+ // When hw.optional.arm.feats is available (macOS 15.0+, iOS 18.0+), use the
+ // fast path to get all the feature bits, otherwise fall back to the slow
+ // ~20-something sysctls path.
+ if (!sysctlbyname("hw.optional.arm.caps", &feats_bitvec, &len, 0, 0)) {
+
+#define CHECK_BIT(FROM, TO) \
+ do { \
+ if (feats_bitvec[FROM / 8] & (1u << ((FROM) & 7))) { \
+ features |= (1ULL << TO); \
+ } \
+ } while (0)
+
+ CHECK_BIT(CAP_BIT_FEAT_FlagM, FEAT_FLAGM);
+ CHECK_BIT(CAP_BIT_FEAT_FlagM2, FEAT_FLAGM2);
+ CHECK_BIT(CAP_BIT_FEAT_FHM, FEAT_FP16FML);
+ CHECK_BIT(CAP_BIT_FEAT_DotProd, FEAT_DOTPROD);
+ CHECK_BIT(CAP_BIT_FEAT_SHA3, FEAT_SHA3);
+ CHECK_BIT(CAP_BIT_FEAT_RDM, FEAT_RDM);
+ CHECK_BIT(CAP_BIT_FEAT_LSE, FEAT_LSE);
+ CHECK_BIT(CAP_BIT_FEAT_SHA256, FEAT_SHA2);
+ CHECK_BIT(CAP_BIT_FEAT_SHA1, FEAT_SHA1);
+ CHECK_BIT(CAP_BIT_FEAT_AES, FEAT_AES);
+ CHECK_BIT(CAP_BIT_FEAT_PMULL, FEAT_PMULL);
+ CHECK_BIT(CAP_BIT_FEAT_SPECRES, FEAT_PREDRES);
+ CHECK_BIT(CAP_BIT_FEAT_SB, FEAT_SB);
+ CHECK_BIT(CAP_BIT_FEAT_FRINTTS, FEAT_FRINTTS);
+ CHECK_BIT(CAP_BIT_FEAT_LRCPC, FEAT_RCPC);
+ CHECK_BIT(CAP_BIT_FEAT_LRCPC2, FEAT_RCPC2);
+ CHECK_BIT(CAP_BIT_FEAT_FCMA, FEAT_FCMA);
+ CHECK_BIT(CAP_BIT_FEAT_JSCVT, FEAT_JSCVT);
+ CHECK_BIT(CAP_BIT_FEAT_DPB, FEAT_DPB);
+ CHECK_BIT(CAP_BIT_FEAT_DPB2, FEAT_DPB2);
+ CHECK_BIT(CAP_BIT_FEAT_BF16, FEAT_BF16);
+ CHECK_BIT(CAP_BIT_FEAT_I8MM, FEAT_I8MM);
+ CHECK_BIT(CAP_BIT_FEAT_DIT, FEAT_DIT);
+ CHECK_BIT(CAP_BIT_FEAT_FP16, FEAT_FP16);
+ CHECK_BIT(CAP_BIT_FEAT_SSBS, FEAT_SSBS2);
+ CHECK_BIT(CAP_BIT_FEAT_BTI, FEAT_BTI);
+ CHECK_BIT(CAP_BIT_AdvSIMD, FEAT_SIMD);
+ CHECK_BIT(CAP_BIT_CRC32, FEAT_CRC);
+ CHECK_BIT(CAP_BIT_FEAT_SME, FEAT_SME);
+ CHECK_BIT(CAP_BIT_FEAT_SME2, FEAT_SME2);
+ CHECK_BIT(CAP_BIT_FEAT_SME_F64F64, FEAT_SME_F64);
+ CHECK_BIT(CAP_BIT_FEAT_SME_I16I64, FEAT_SME_I64);
+
+ features = deriveImplicitFeatures(features);
+
+ __atomic_store(&__aarch64_cpu_features.features, &features,
+ __ATOMIC_RELAXED);
+ return;
+ }
+#endif
+
+ // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
+ static const struct {
+ const char *sysctl_name;
+ enum CPUFeatures feature;
+ } feature_checks[] = {
+ {"hw.optional.arm.FEAT_FlagM", FEAT_FLAGM},
+ {"hw.optional.arm.FEAT_FlagM2", FEAT_FLAGM2},
+ {"hw.optional.arm.FEAT_FHM", FEAT_FP16FML},
+ {"hw.optional.arm.FEAT_DotProd", FEAT_DOTPROD},
+ {"hw.optional.arm.FEAT_RDM", FEAT_RDM},
+ {"hw.optional.arm.FEAT_LSE", FEAT_LSE},
+ {"hw.optional.AdvSIMD", FEAT_SIMD},
+ {"hw.optional.armv8_crc32", FEAT_CRC},
+ {"hw.optional.arm.FEAT_SHA1", FEAT_SHA1},
+ {"hw.optional.arm.FEAT_SHA256", FEAT_SHA2},
+ {"hw.optional.arm.FEAT_SHA3", FEAT_SHA3},
+ {"hw.optional.arm.FEAT_AES", FEAT_AES},
+ {"hw.optional.arm.FEAT_PMULL", FEAT_PMULL},
+ {"hw.optional.arm.FEAT_FP16", FEAT_FP16},
+ {"hw.optional.arm.FEAT_DIT", FEAT_DIT},
+ {"hw.optional.arm.FEAT_DPB", FEAT_DPB},
+ {"hw.optional.arm.FEAT_DPB2", FEAT_DPB2},
+ {"hw.optional.arm.FEAT_JSCVT", FEAT_JSCVT},
+ {"hw.optional.arm.FEAT_FCMA", FEAT_FCMA},
+ {"hw.optional.arm.FEAT_LRCPC", FEAT_RCPC},
+ {"hw.optional.arm.FEAT_LRCPC2", FEAT_RCPC2},
+ {"hw.optional.arm.FEAT_FRINTTS", FEAT_FRINTTS},
+ {"hw.optional.arm.FEAT_I8MM", FEAT_I8MM},
+ {"hw.optional.arm.FEAT_BF16", FEAT_BF16},
+ {"hw.optional.arm.FEAT_SB", FEAT_SB},
+ {"hw.optional.arm.FEAT_SPECRES", FEAT_PREDRES},
+ {"hw.optional.arm.FEAT_SSBS", FEAT_SSBS2},
+ {"hw.optional.arm.FEAT_BTI", FEAT_BTI},
+ };
+
+ for (size_t I = 0, E = sizeof(feature_checks) / sizeof(feature_checks[0]);
+ I != E; ++I)
+ if (isKnownAndSupported(feature_checks[I].sysctl_name))
+ features |= (1ULL << feature_checks[I].feature);
+
+ features = deriveImplicitFeatures(features);
+
+ __atomic_store(&__aarch64_cpu_features.features, &features,
+ __ATOMIC_RELAXED);
+}
+
+#endif // TARGET_OS_OSX || TARGET_OS_IPHONE
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/freebsd.inc b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/freebsd.inc
new file mode 100644
index 000000000000..aa975dc854f9
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/freebsd.inc
@@ -0,0 +1,27 @@
+void __init_cpu_features_resolver(unsigned long hwcap,
+ const __ifunc_arg_t *arg) {
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
+ return;
+
+ __init_cpu_features_constructor(hwcap, arg);
+}
+
+void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
+ unsigned long hwcap = 0;
+ unsigned long hwcap2 = 0;
+ // CPU features already initialized.
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
+ return;
+
+ int res = 0;
+ res = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
+ res |= elf_aux_info(AT_HWCAP2, &hwcap2, sizeof hwcap2);
+ if (res)
+ return;
+
+ __ifunc_arg_t arg;
+ arg._size = sizeof(__ifunc_arg_t);
+ arg._hwcap = hwcap;
+ arg._hwcap2 = hwcap2;
+ __init_cpu_features_constructor(hwcap | _IFUNC_ARG_HWCAP, &arg);
+}
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc
new file mode 100644
index 000000000000..1ae4780e4978
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc
@@ -0,0 +1,53 @@
+#include <zircon/features.h>
+#include <zircon/syscalls.h>
+
+void __init_cpu_features_resolver() {
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
+ return;
+
+ // This ensures the vDSO is a direct link-time dependency of anything that
+ // needs this initializer code.
+#pragma comment(lib, "zircon")
+ uint32_t features;
+ zx_status_t status = _zx_system_get_features(ZX_FEATURE_KIND_CPU, &features);
+ if (status != ZX_OK)
+ return;
+
+ unsigned long long feat = 0;
+#define setCPUFeature(cpu_feature) feat |= 1ULL << cpu_feature
+
+ if (features & ZX_ARM64_FEATURE_ISA_FP)
+ setCPUFeature(FEAT_FP);
+ if (features & ZX_ARM64_FEATURE_ISA_ASIMD)
+ setCPUFeature(FEAT_SIMD);
+ if (features & ZX_ARM64_FEATURE_ISA_AES)
+ setCPUFeature(FEAT_AES);
+ if (features & ZX_ARM64_FEATURE_ISA_PMULL)
+ setCPUFeature(FEAT_PMULL);
+ if (features & ZX_ARM64_FEATURE_ISA_SHA1)
+ setCPUFeature(FEAT_SHA1);
+ if (features & ZX_ARM64_FEATURE_ISA_SHA256)
+ setCPUFeature(FEAT_SHA2);
+ if (features & ZX_ARM64_FEATURE_ISA_CRC32)
+ setCPUFeature(FEAT_CRC);
+ if (features & ZX_ARM64_FEATURE_ISA_RDM)
+ setCPUFeature(FEAT_RDM);
+ if (features & ZX_ARM64_FEATURE_ISA_SHA3)
+ setCPUFeature(FEAT_SHA3);
+ if (features & ZX_ARM64_FEATURE_ISA_SM4)
+ setCPUFeature(FEAT_SM4);
+ if (features & ZX_ARM64_FEATURE_ISA_DP)
+ setCPUFeature(FEAT_DOTPROD);
+ if (features & ZX_ARM64_FEATURE_ISA_FHM)
+ setCPUFeature(FEAT_FP16FML);
+ if (features & ZX_ARM64_FEATURE_ISA_SHA512)
+ setCPUFeature(FEAT_SHA3);
+ if (features & ZX_ARM64_FEATURE_ISA_I8MM)
+ setCPUFeature(FEAT_I8MM);
+ if (features & ZX_ARM64_FEATURE_ISA_SVE)
+ setCPUFeature(FEAT_SVE);
+
+ setCPUFeature(FEAT_INIT);
+
+ __atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
+}
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
new file mode 100644
index 000000000000..e4d5e7f2bd7e
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
@@ -0,0 +1,149 @@
+#if __has_include(<sys/auxv.h>)
+#include <sys/auxv.h>
+#define HAVE_SYS_AUXV_H
+#endif
+
+static void __init_cpu_features_constructor(unsigned long hwcap,
+ const __ifunc_arg_t *arg) {
+ unsigned long long feat = 0;
+#define setCPUFeature(F) feat |= 1ULL << F
+#define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
+#define extractBits(val, start, number) \
+ (val & ((1ULL << number) - 1ULL) << start) >> start
+ unsigned long hwcap2 = 0;
+ if (hwcap & _IFUNC_ARG_HWCAP)
+ hwcap2 = arg->_hwcap2;
+ if (hwcap & HWCAP_CRC32)
+ setCPUFeature(FEAT_CRC);
+ if (hwcap & HWCAP_PMULL)
+ setCPUFeature(FEAT_PMULL);
+ if (hwcap & HWCAP_FLAGM)
+ setCPUFeature(FEAT_FLAGM);
+ if (hwcap2 & HWCAP2_FLAGM2)
+ setCPUFeature(FEAT_FLAGM2);
+ if (hwcap & HWCAP_SM4)
+ setCPUFeature(FEAT_SM4);
+ if (hwcap & HWCAP_ASIMDDP)
+ setCPUFeature(FEAT_DOTPROD);
+ if (hwcap & HWCAP_ASIMDFHM)
+ setCPUFeature(FEAT_FP16FML);
+ if (hwcap & HWCAP_FPHP)
+ setCPUFeature(FEAT_FP16);
+ if (hwcap & HWCAP_DIT)
+ setCPUFeature(FEAT_DIT);
+ if (hwcap & HWCAP_ASIMDRDM)
+ setCPUFeature(FEAT_RDM);
+ if (hwcap & HWCAP_AES)
+ setCPUFeature(FEAT_AES);
+ if (hwcap & HWCAP_SHA1)
+ setCPUFeature(FEAT_SHA1);
+ if (hwcap & HWCAP_SHA2)
+ setCPUFeature(FEAT_SHA2);
+ if (hwcap & HWCAP_JSCVT)
+ setCPUFeature(FEAT_JSCVT);
+ if (hwcap & HWCAP_FCMA)
+ setCPUFeature(FEAT_FCMA);
+ if (hwcap & HWCAP_SB)
+ setCPUFeature(FEAT_SB);
+ if (hwcap & HWCAP_SSBS) {
+ setCPUFeature(FEAT_SSBS);
+ setCPUFeature(FEAT_SSBS2);
+ }
+ if (hwcap2 & HWCAP2_MTE) {
+ setCPUFeature(FEAT_MEMTAG);
+ setCPUFeature(FEAT_MEMTAG2);
+ }
+ if (hwcap2 & HWCAP2_MTE3)
+ setCPUFeature(FEAT_MEMTAG3);
+ if (hwcap2 & HWCAP2_SVEAES)
+ setCPUFeature(FEAT_SVE_AES);
+ if (hwcap2 & HWCAP2_SVEPMULL)
+ setCPUFeature(FEAT_SVE_PMULL128);
+ if (hwcap2 & HWCAP2_SVEBITPERM)
+ setCPUFeature(FEAT_SVE_BITPERM);
+ if (hwcap2 & HWCAP2_SVESHA3)
+ setCPUFeature(FEAT_SVE_SHA3);
+ if (hwcap2 & HWCAP2_SVESM4)
+ setCPUFeature(FEAT_SVE_SM4);
+ if (hwcap2 & HWCAP2_DCPODP)
+ setCPUFeature(FEAT_DPB2);
+ if (hwcap & HWCAP_ATOMICS)
+ setCPUFeature(FEAT_LSE);
+ if (hwcap2 & HWCAP2_RNG)
+ setCPUFeature(FEAT_RNG);
+ if (hwcap2 & HWCAP2_I8MM)
+ setCPUFeature(FEAT_I8MM);
+ if (hwcap2 & HWCAP2_EBF16)
+ setCPUFeature(FEAT_EBF16);
+ if (hwcap2 & HWCAP2_SVE_EBF16)
+ setCPUFeature(FEAT_SVE_EBF16);
+ if (hwcap2 & HWCAP2_DGH)
+ setCPUFeature(FEAT_DGH);
+ if (hwcap2 & HWCAP2_FRINT)
+ setCPUFeature(FEAT_FRINTTS);
+ if (hwcap2 & HWCAP2_SVEI8MM)
+ setCPUFeature(FEAT_SVE_I8MM);
+ if (hwcap2 & HWCAP2_SVEF32MM)
+ setCPUFeature(FEAT_SVE_F32MM);
+ if (hwcap2 & HWCAP2_SVEF64MM)
+ setCPUFeature(FEAT_SVE_F64MM);
+ if (hwcap2 & HWCAP2_BTI)
+ setCPUFeature(FEAT_BTI);
+ if (hwcap2 & HWCAP2_RPRES)
+ setCPUFeature(FEAT_RPRES);
+ if (hwcap2 & HWCAP2_WFXT)
+ setCPUFeature(FEAT_WFXT);
+ if (hwcap2 & HWCAP2_SME)
+ setCPUFeature(FEAT_SME);
+ if (hwcap2 & HWCAP2_SME2)
+ setCPUFeature(FEAT_SME2);
+ if (hwcap2 & HWCAP2_SME_I16I64)
+ setCPUFeature(FEAT_SME_I64);
+ if (hwcap2 & HWCAP2_SME_F64F64)
+ setCPUFeature(FEAT_SME_F64);
+ if (hwcap2 & HWCAP2_MOPS)
+ setCPUFeature(FEAT_MOPS);
+ if (hwcap & HWCAP_CPUID) {
+ unsigned long ftr;
+
+ getCPUFeature(ID_AA64ISAR1_EL1, ftr);
+ /* ID_AA64ISAR1_EL1.SPECRES >= 0b0001 */
+ if (extractBits(ftr, 40, 4) >= 0x1)
+ setCPUFeature(FEAT_PREDRES);
+ /* ID_AA64ISAR1_EL1.LS64 >= 0b0001 */
+ if (extractBits(ftr, 60, 4) >= 0x1)
+ setCPUFeature(FEAT_LS64);
+ /* ID_AA64ISAR1_EL1.LS64 >= 0b0010 */
+ if (extractBits(ftr, 60, 4) >= 0x2)
+ setCPUFeature(FEAT_LS64_V);
+ /* ID_AA64ISAR1_EL1.LS64 >= 0b0011 */
+ if (extractBits(ftr, 60, 4) >= 0x3)
+ setCPUFeature(FEAT_LS64_ACCDATA);
+ }
+ if (hwcap & HWCAP_FP) {
+ setCPUFeature(FEAT_FP);
+ // FP and AdvSIMD fields have the same value
+ setCPUFeature(FEAT_SIMD);
+ }
+ if (hwcap & HWCAP_DCPOP)
+ setCPUFeature(FEAT_DPB);
+ if (hwcap & HWCAP_LRCPC)
+ setCPUFeature(FEAT_RCPC);
+ if (hwcap & HWCAP_ILRCPC)
+ setCPUFeature(FEAT_RCPC2);
+ if (hwcap2 & HWCAP2_LRCPC3)
+ setCPUFeature(FEAT_RCPC3);
+ if (hwcap2 & HWCAP2_BF16)
+ setCPUFeature(FEAT_BF16);
+ if (hwcap2 & HWCAP2_SVEBF16)
+ setCPUFeature(FEAT_SVE_BF16);
+ if (hwcap & HWCAP_SVE)
+ setCPUFeature(FEAT_SVE);
+ if (hwcap2 & HWCAP2_SVE2)
+ setCPUFeature(FEAT_SVE2);
+ if (hwcap & HWCAP_SHA3)
+ setCPUFeature(FEAT_SHA3);
+ setCPUFeature(FEAT_INIT);
+
+ __atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
+}
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc
new file mode 100644
index 000000000000..486f77a1e4d2
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc
@@ -0,0 +1,21 @@
+void __init_cpu_features_resolver(unsigned long hwcap,
+ const __ifunc_arg_t *arg) {
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
+ return;
+ __init_cpu_features_constructor(hwcap, arg);
+}
+
+void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
+ // CPU features already initialized.
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
+ return;
+
+ unsigned long hwcap = getauxval(AT_HWCAP);
+ unsigned long hwcap2 = getauxval(AT_HWCAP2);
+
+ __ifunc_arg_t arg;
+ arg._size = sizeof(__ifunc_arg_t);
+ arg._hwcap = hwcap;
+ arg._hwcap2 = hwcap2;
+ __init_cpu_features_constructor(hwcap | _IFUNC_ARG_HWCAP, &arg);
+}
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/unimplemented.inc b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/unimplemented.inc
new file mode 100644
index 000000000000..dc34624807b7
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/unimplemented.inc
@@ -0,0 +1,8 @@
+// On platforms that have not implemented this yet, we provide an implementation
+// that does not claim support for any features by leaving
+// __aarch64_cpu_features.features initialized to 0.
+
+void __init_cpu_features_resolver(unsigned long hwcap,
+ const __ifunc_arg_t *arg) {}
+
+void __init_cpu_features(void) {}
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc
new file mode 100644
index 000000000000..41aba82ef952
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc
@@ -0,0 +1,189 @@
+#if __has_include(<sys/hwcap.h>)
+#include <sys/hwcap.h>
+#define HAVE_SYS_HWCAP_H
+#endif
+
+#ifndef _IFUNC_ARG_HWCAP
+#define _IFUNC_ARG_HWCAP (1ULL << 62)
+#endif
+#ifndef AT_HWCAP
+#define AT_HWCAP 16
+#endif
+#ifndef HWCAP_CPUID
+#define HWCAP_CPUID (1 << 11)
+#endif
+#ifndef HWCAP_FP
+#define HWCAP_FP (1 << 0)
+#endif
+#ifndef HWCAP_ASIMD
+#define HWCAP_ASIMD (1 << 1)
+#endif
+#ifndef HWCAP_AES
+#define HWCAP_AES (1 << 3)
+#endif
+#ifndef HWCAP_PMULL
+#define HWCAP_PMULL (1 << 4)
+#endif
+#ifndef HWCAP_SHA1
+#define HWCAP_SHA1 (1 << 5)
+#endif
+#ifndef HWCAP_SHA2
+#define HWCAP_SHA2 (1 << 6)
+#endif
+#ifndef HWCAP_CRC32
+#define HWCAP_CRC32 (1 << 7)
+#endif
+#ifndef HWCAP_ATOMICS
+#define HWCAP_ATOMICS (1 << 8)
+#endif
+#ifndef HWCAP_FPHP
+#define HWCAP_FPHP (1 << 9)
+#endif
+#ifndef HWCAP_ASIMDHP
+#define HWCAP_ASIMDHP (1 << 10)
+#endif
+#ifndef HWCAP_ASIMDRDM
+#define HWCAP_ASIMDRDM (1 << 12)
+#endif
+#ifndef HWCAP_JSCVT
+#define HWCAP_JSCVT (1 << 13)
+#endif
+#ifndef HWCAP_FCMA
+#define HWCAP_FCMA (1 << 14)
+#endif
+#ifndef HWCAP_LRCPC
+#define HWCAP_LRCPC (1 << 15)
+#endif
+#ifndef HWCAP_DCPOP
+#define HWCAP_DCPOP (1 << 16)
+#endif
+#ifndef HWCAP_SHA3
+#define HWCAP_SHA3 (1 << 17)
+#endif
+#ifndef HWCAP_SM3
+#define HWCAP_SM3 (1 << 18)
+#endif
+#ifndef HWCAP_SM4
+#define HWCAP_SM4 (1 << 19)
+#endif
+#ifndef HWCAP_ASIMDDP
+#define HWCAP_ASIMDDP (1 << 20)
+#endif
+#ifndef HWCAP_SHA512
+#define HWCAP_SHA512 (1 << 21)
+#endif
+#ifndef HWCAP_SVE
+#define HWCAP_SVE (1 << 22)
+#endif
+#ifndef HWCAP_ASIMDFHM
+#define HWCAP_ASIMDFHM (1 << 23)
+#endif
+#ifndef HWCAP_DIT
+#define HWCAP_DIT (1 << 24)
+#endif
+#ifndef HWCAP_ILRCPC
+#define HWCAP_ILRCPC (1 << 26)
+#endif
+#ifndef HWCAP_FLAGM
+#define HWCAP_FLAGM (1 << 27)
+#endif
+#ifndef HWCAP_SSBS
+#define HWCAP_SSBS (1 << 28)
+#endif
+#ifndef HWCAP_SB
+#define HWCAP_SB (1 << 29)
+#endif
+
+#ifndef AT_HWCAP2
+#define AT_HWCAP2 26
+#endif
+#ifndef HWCAP2_DCPODP
+#define HWCAP2_DCPODP (1 << 0)
+#endif
+#ifndef HWCAP2_SVE2
+#define HWCAP2_SVE2 (1 << 1)
+#endif
+#ifndef HWCAP2_SVEAES
+#define HWCAP2_SVEAES (1 << 2)
+#endif
+#ifndef HWCAP2_SVEPMULL
+#define HWCAP2_SVEPMULL (1 << 3)
+#endif
+#ifndef HWCAP2_SVEBITPERM
+#define HWCAP2_SVEBITPERM (1 << 4)
+#endif
+#ifndef HWCAP2_SVESHA3
+#define HWCAP2_SVESHA3 (1 << 5)
+#endif
+#ifndef HWCAP2_SVESM4
+#define HWCAP2_SVESM4 (1 << 6)
+#endif
+#ifndef HWCAP2_FLAGM2
+#define HWCAP2_FLAGM2 (1 << 7)
+#endif
+#ifndef HWCAP2_FRINT
+#define HWCAP2_FRINT (1 << 8)
+#endif
+#ifndef HWCAP2_SVEI8MM
+#define HWCAP2_SVEI8MM (1 << 9)
+#endif
+#ifndef HWCAP2_SVEF32MM
+#define HWCAP2_SVEF32MM (1 << 10)
+#endif
+#ifndef HWCAP2_SVEF64MM
+#define HWCAP2_SVEF64MM (1 << 11)
+#endif
+#ifndef HWCAP2_SVEBF16
+#define HWCAP2_SVEBF16 (1 << 12)
+#endif
+#ifndef HWCAP2_I8MM
+#define HWCAP2_I8MM (1 << 13)
+#endif
+#ifndef HWCAP2_BF16
+#define HWCAP2_BF16 (1 << 14)
+#endif
+#ifndef HWCAP2_DGH
+#define HWCAP2_DGH (1 << 15)
+#endif
+#ifndef HWCAP2_RNG
+#define HWCAP2_RNG (1 << 16)
+#endif
+#ifndef HWCAP2_BTI
+#define HWCAP2_BTI (1 << 17)
+#endif
+#ifndef HWCAP2_MTE
+#define HWCAP2_MTE (1 << 18)
+#endif
+#ifndef HWCAP2_RPRES
+#define HWCAP2_RPRES (1 << 21)
+#endif
+#ifndef HWCAP2_MTE3
+#define HWCAP2_MTE3 (1 << 22)
+#endif
+#ifndef HWCAP2_SME
+#define HWCAP2_SME (1 << 23)
+#endif
+#ifndef HWCAP2_SME_I16I64
+#define HWCAP2_SME_I16I64 (1 << 24)
+#endif
+#ifndef HWCAP2_SME_F64F64
+#define HWCAP2_SME_F64F64 (1 << 25)
+#endif
+#ifndef HWCAP2_WFXT
+#define HWCAP2_WFXT (1UL << 31)
+#endif
+#ifndef HWCAP2_EBF16
+#define HWCAP2_EBF16 (1ULL << 32)
+#endif
+#ifndef HWCAP2_SVE_EBF16
+#define HWCAP2_SVE_EBF16 (1ULL << 33)
+#endif
+#ifndef HWCAP2_SME2
+#define HWCAP2_SME2 (1UL << 37)
+#endif
+#ifndef HWCAP2_MOPS
+#define HWCAP2_MOPS (1ULL << 43)
+#endif
+#ifndef HWCAP2_LRCPC3
+#define HWCAP2_LRCPC3 (1UL << 46)
+#endif
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/android.inc b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/android.inc
new file mode 100644
index 000000000000..94bf64a5b0b0
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/android.inc
@@ -0,0 +1,28 @@
+#include <string.h>
+#include <sys/auxv.h>
+#include <sys/system_properties.h>
+
+static bool __isExynos9810(void) {
+ char arch[PROP_VALUE_MAX];
+ return __system_property_get("ro.arch", arch) > 0 &&
+ strncmp(arch, "exynos9810", sizeof("exynos9810") - 1) == 0;
+}
+
+static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
+ unsigned long hwcap = getauxval(AT_HWCAP);
+ _Bool result = (hwcap & HWCAP_ATOMICS) != 0;
+ if (result) {
+ // Some cores in the Exynos 9810 CPU are ARMv8.2 and others are ARMv8.0;
+ // only the former support LSE atomics. However, the kernel in the
+ // initial Android 8.0 release of Galaxy S9/S9+ devices incorrectly
+ // reported the feature as being supported.
+ //
+ // The kernel appears to have been corrected to mark it unsupported as of
+ // the Android 9.0 release on those devices, and this issue has not been
+ // observed anywhere else. Thus, this workaround may be removed if
+ // compiler-rt ever drops support for Android 8.0.
+ if (__isExynos9810())
+ result = false;
+ }
+ __aarch64_have_lse_atomics = result;
+}
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/freebsd.inc b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/freebsd.inc
new file mode 100644
index 000000000000..4a1f9c2c27c8
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/freebsd.inc
@@ -0,0 +1,5 @@
+static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
+ unsigned long hwcap;
+ int result = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
+ __aarch64_have_lse_atomics = result == 0 && (hwcap & HWCAP_ATOMICS) != 0;
+}
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/fuchsia.inc b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/fuchsia.inc
new file mode 100644
index 000000000000..91eac70ae6c5
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/fuchsia.inc
@@ -0,0 +1,12 @@
+#include <zircon/features.h>
+#include <zircon/syscalls.h>
+
+static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
+ // This ensures the vDSO is a direct link-time dependency of anything that
+ // needs this initializer code.
+#pragma comment(lib, "zircon")
+ uint32_t features;
+ zx_status_t status = _zx_system_get_features(ZX_FEATURE_KIND_CPU, &features);
+ __aarch64_have_lse_atomics =
+ status == ZX_OK && (features & ZX_ARM64_FEATURE_ISA_ATOMICS) != 0;
+}
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/sysauxv.inc b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/sysauxv.inc
new file mode 100644
index 000000000000..6642c1f5b60b
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/aarch64/lse_atomics/sysauxv.inc
@@ -0,0 +1,6 @@
+#include <sys/auxv.h>
+
+static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
+ unsigned long hwcap = getauxval(AT_HWCAP);
+ __aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0;
+}
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/cpu_model.h b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/cpu_model.h
new file mode 100644
index 000000000000..924ca89cf60f
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/cpu_model.h
@@ -0,0 +1,41 @@
+//===-- cpu_model_common.c - Utilities for cpu model detection ----*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements common utilities for runtime cpu model detection.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef COMPILER_RT_LIB_BUILTINS_CPU_MODEL_COMMON_H
+#define COMPILER_RT_LIB_BUILTINS_CPU_MODEL_COMMON_H
+
+#define bool int
+#define true 1
+#define false 0
+
+#ifndef __has_attribute
+#define __has_attribute(attr) 0
+#endif
+
+#if __has_attribute(constructor)
+#if __GNUC__ >= 9
+// Ordinarily init priorities below 101 are disallowed as they are reserved for
+// the implementation. However, we are the implementation, so silence the
+// diagnostic, since it doesn't apply to us.
+#pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
+#endif
+// We're choosing init priority 90 to force our constructors to run before any
+// constructors in the end user application (starting at priority 101). This
+// value matches the libgcc choice for the same functions.
+#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(90)))
+#else
+// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
+// this runs during initialization.
+#define CONSTRUCTOR_ATTRIBUTE
+#endif
+
+#endif
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/x86.c b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/x86.c
new file mode 100644
index 000000000000..b1c4abd9d11d
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -0,0 +1,1205 @@
+//===-- cpu_model/x86.c - Support for __cpu_model builtin --------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is based on LLVM's lib/Support/Host.cpp.
+// It implements the operating system Host concept and builtin
+// __cpu_model for the compiler_rt library for x86.
+//
+//===----------------------------------------------------------------------===//
+
+#include "cpu_model.h"
+
+#if !(defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
+ defined(_M_X64))
+#error This file is intended only for x86-based targets
+#endif
+
+#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
+
+#include <assert.h>
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+enum VendorSignatures {
+ SIG_INTEL = 0x756e6547, // Genu
+ SIG_AMD = 0x68747541, // Auth
+};
+
+enum ProcessorVendors {
+ VENDOR_INTEL = 1,
+ VENDOR_AMD,
+ VENDOR_OTHER,
+ VENDOR_MAX
+};
+
+enum ProcessorTypes {
+ INTEL_BONNELL = 1,
+ INTEL_CORE2,
+ INTEL_COREI7,
+ AMDFAM10H,
+ AMDFAM15H,
+ INTEL_SILVERMONT,
+ INTEL_KNL,
+ AMD_BTVER1,
+ AMD_BTVER2,
+ AMDFAM17H,
+ INTEL_KNM,
+ INTEL_GOLDMONT,
+ INTEL_GOLDMONT_PLUS,
+ INTEL_TREMONT,
+ AMDFAM19H,
+ ZHAOXIN_FAM7H,
+ INTEL_SIERRAFOREST,
+ INTEL_GRANDRIDGE,
+ INTEL_CLEARWATERFOREST,
+ AMDFAM1AH,
+ CPU_TYPE_MAX
+};
+
+enum ProcessorSubtypes {
+ INTEL_COREI7_NEHALEM = 1,
+ INTEL_COREI7_WESTMERE,
+ INTEL_COREI7_SANDYBRIDGE,
+ AMDFAM10H_BARCELONA,
+ AMDFAM10H_SHANGHAI,
+ AMDFAM10H_ISTANBUL,
+ AMDFAM15H_BDVER1,
+ AMDFAM15H_BDVER2,
+ AMDFAM15H_BDVER3,
+ AMDFAM15H_BDVER4,
+ AMDFAM17H_ZNVER1,
+ INTEL_COREI7_IVYBRIDGE,
+ INTEL_COREI7_HASWELL,
+ INTEL_COREI7_BROADWELL,
+ INTEL_COREI7_SKYLAKE,
+ INTEL_COREI7_SKYLAKE_AVX512,
+ INTEL_COREI7_CANNONLAKE,
+ INTEL_COREI7_ICELAKE_CLIENT,
+ INTEL_COREI7_ICELAKE_SERVER,
+ AMDFAM17H_ZNVER2,
+ INTEL_COREI7_CASCADELAKE,
+ INTEL_COREI7_TIGERLAKE,
+ INTEL_COREI7_COOPERLAKE,
+ INTEL_COREI7_SAPPHIRERAPIDS,
+ INTEL_COREI7_ALDERLAKE,
+ AMDFAM19H_ZNVER3,
+ INTEL_COREI7_ROCKETLAKE,
+ ZHAOXIN_FAM7H_LUJIAZUI,
+ AMDFAM19H_ZNVER4,
+ INTEL_COREI7_GRANITERAPIDS,
+ INTEL_COREI7_GRANITERAPIDS_D,
+ INTEL_COREI7_ARROWLAKE,
+ INTEL_COREI7_ARROWLAKE_S,
+ INTEL_COREI7_PANTHERLAKE,
+ AMDFAM1AH_ZNVER5,
+ CPU_SUBTYPE_MAX
+};
+
+enum ProcessorFeatures {
+ FEATURE_CMOV = 0,
+ FEATURE_MMX,
+ FEATURE_POPCNT,
+ FEATURE_SSE,
+ FEATURE_SSE2,
+ FEATURE_SSE3,
+ FEATURE_SSSE3,
+ FEATURE_SSE4_1,
+ FEATURE_SSE4_2,
+ FEATURE_AVX,
+ FEATURE_AVX2,
+ FEATURE_SSE4_A,
+ FEATURE_FMA4,
+ FEATURE_XOP,
+ FEATURE_FMA,
+ FEATURE_AVX512F,
+ FEATURE_BMI,
+ FEATURE_BMI2,
+ FEATURE_AES,
+ FEATURE_PCLMUL,
+ FEATURE_AVX512VL,
+ FEATURE_AVX512BW,
+ FEATURE_AVX512DQ,
+ FEATURE_AVX512CD,
+ FEATURE_AVX512ER,
+ FEATURE_AVX512PF,
+ FEATURE_AVX512VBMI,
+ FEATURE_AVX512IFMA,
+ FEATURE_AVX5124VNNIW,
+ FEATURE_AVX5124FMAPS,
+ FEATURE_AVX512VPOPCNTDQ,
+ FEATURE_AVX512VBMI2,
+ FEATURE_GFNI,
+ FEATURE_VPCLMULQDQ,
+ FEATURE_AVX512VNNI,
+ FEATURE_AVX512BITALG,
+ FEATURE_AVX512BF16,
+ FEATURE_AVX512VP2INTERSECT,
+ // FIXME: Below Features has some missings comparing to gcc, it's because gcc
+ // has some not one-to-one mapped in llvm.
+ // FEATURE_3DNOW,
+ // FEATURE_3DNOWP,
+ FEATURE_ADX = 40,
+ // FEATURE_ABM,
+ FEATURE_CLDEMOTE = 42,
+ FEATURE_CLFLUSHOPT,
+ FEATURE_CLWB,
+ FEATURE_CLZERO,
+ FEATURE_CMPXCHG16B,
+ // FIXME: Not adding FEATURE_CMPXCHG8B is a workaround to make 'generic' as
+ // a cpu string with no X86_FEATURE_COMPAT features, which is required in
+ // current implementantion of cpu_specific/cpu_dispatch FMV feature.
+ // FEATURE_CMPXCHG8B,
+ FEATURE_ENQCMD = 48,
+ FEATURE_F16C,
+ FEATURE_FSGSBASE,
+ // FEATURE_FXSAVE,
+ // FEATURE_HLE,
+ // FEATURE_IBT,
+ FEATURE_LAHF_LM = 54,
+ FEATURE_LM,
+ FEATURE_LWP,
+ FEATURE_LZCNT,
+ FEATURE_MOVBE,
+ FEATURE_MOVDIR64B,
+ FEATURE_MOVDIRI,
+ FEATURE_MWAITX,
+ // FEATURE_OSXSAVE,
+ FEATURE_PCONFIG = 63,
+ FEATURE_PKU,
+ FEATURE_PREFETCHWT1,
+ FEATURE_PRFCHW,
+ FEATURE_PTWRITE,
+ FEATURE_RDPID,
+ FEATURE_RDRND,
+ FEATURE_RDSEED,
+ FEATURE_RTM,
+ FEATURE_SERIALIZE,
+ FEATURE_SGX,
+ FEATURE_SHA,
+ FEATURE_SHSTK,
+ FEATURE_TBM,
+ FEATURE_TSXLDTRK,
+ FEATURE_VAES,
+ FEATURE_WAITPKG,
+ FEATURE_WBNOINVD,
+ FEATURE_XSAVE,
+ FEATURE_XSAVEC,
+ FEATURE_XSAVEOPT,
+ FEATURE_XSAVES,
+ FEATURE_AMX_TILE,
+ FEATURE_AMX_INT8,
+ FEATURE_AMX_BF16,
+ FEATURE_UINTR,
+ FEATURE_HRESET,
+ FEATURE_KL,
+ // FEATURE_AESKLE,
+ FEATURE_WIDEKL = 92,
+ FEATURE_AVXVNNI,
+ FEATURE_AVX512FP16,
+ FEATURE_X86_64_BASELINE,
+ FEATURE_X86_64_V2,
+ FEATURE_X86_64_V3,
+ FEATURE_X86_64_V4,
+ FEATURE_AVXIFMA,
+ FEATURE_AVXVNNIINT8,
+ FEATURE_AVXNECONVERT,
+ FEATURE_CMPCCXADD,
+ FEATURE_AMX_FP16,
+ FEATURE_PREFETCHI,
+ FEATURE_RAOINT,
+ FEATURE_AMX_COMPLEX,
+ FEATURE_AVXVNNIINT16,
+ FEATURE_SM3,
+ FEATURE_SHA512,
+ FEATURE_SM4,
+ FEATURE_APXF,
+ FEATURE_USERMSR,
+ FEATURE_AVX10_1_256,
+ FEATURE_AVX10_1_512,
+ CPU_FEATURE_MAX
+};
+
+// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
+// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
+// support. Consequently, for i386, the presence of CPUID is checked first
+// via the corresponding eflags bit.
+static bool isCpuIdSupported(void) {
+#if defined(__GNUC__) || defined(__clang__)
+#if defined(__i386__)
+ int __cpuid_supported;
+ __asm__(" pushfl\n"
+ " popl %%eax\n"
+ " movl %%eax,%%ecx\n"
+ " xorl $0x00200000,%%eax\n"
+ " pushl %%eax\n"
+ " popfl\n"
+ " pushfl\n"
+ " popl %%eax\n"
+ " movl $0,%0\n"
+ " cmpl %%eax,%%ecx\n"
+ " je 1f\n"
+ " movl $1,%0\n"
+ "1:"
+ : "=r"(__cpuid_supported)
+ :
+ : "eax", "ecx");
+ if (!__cpuid_supported)
+ return false;
+#endif
+ return true;
+#endif
+ return true;
+}
+
+// This code is copied from lib/Support/Host.cpp.
+// Changes to either file should be mirrored in the other.
+
+/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
+/// the specified arguments. If we can't run cpuid on the host, return true.
+static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
+ unsigned *rECX, unsigned *rEDX) {
+#if defined(__GNUC__) || defined(__clang__)
+#if defined(__x86_64__)
+ // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
+ // FIXME: should we save this for Clang?
+ __asm__("movq\t%%rbx, %%rsi\n\t"
+ "cpuid\n\t"
+ "xchgq\t%%rbx, %%rsi\n\t"
+ : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+ : "a"(value));
+ return false;
+#elif defined(__i386__)
+ __asm__("movl\t%%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl\t%%ebx, %%esi\n\t"
+ : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+ : "a"(value));
+ return false;
+#else
+ return true;
+#endif
+#elif defined(_MSC_VER)
+ // The MSVC intrinsic is portable across x86 and x64.
+ int registers[4];
+ __cpuid(registers, value);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
+#else
+ return true;
+#endif
+}
+
+/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
+/// the 4 values in the specified arguments. If we can't run cpuid on the host,
+/// return true.
+static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
+ unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
+ unsigned *rEDX) {
+#if defined(__GNUC__) || defined(__clang__)
+#if defined(__x86_64__)
+ // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
+ // FIXME: should we save this for Clang?
+ __asm__("movq\t%%rbx, %%rsi\n\t"
+ "cpuid\n\t"
+ "xchgq\t%%rbx, %%rsi\n\t"
+ : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+ : "a"(value), "c"(subleaf));
+ return false;
+#elif defined(__i386__)
+ __asm__("movl\t%%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl\t%%ebx, %%esi\n\t"
+ : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+ : "a"(value), "c"(subleaf));
+ return false;
+#else
+ return true;
+#endif
+#elif defined(_MSC_VER)
+ int registers[4];
+ __cpuidex(registers, value, subleaf);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
+#else
+ return true;
+#endif
+}
+
+// Read control register 0 (XCR0). Used to detect features such as AVX.
+static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
+#if defined(__GNUC__) || defined(__clang__)
+ // Check xgetbv; this uses a .byte sequence instead of the instruction
+ // directly because older assemblers do not include support for xgetbv and
+ // there is no easy way to conditionally compile based on the assembler used.
+ __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
+ return false;
+#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
+ unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+ *rEAX = Result;
+ *rEDX = Result >> 32;
+ return false;
+#else
+ return true;
+#endif
+}
+
+static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
+ unsigned *Model) {
+ *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+ *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
+ if (*Family == 6 || *Family == 0xf) {
+ if (*Family == 0xf)
+ // Examine extended family ID if family ID is F.
+ *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
+ // Examine extended model ID if family ID is 6 or F.
+ *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+ }
+}
+
+#define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0
+
+static const char *getIntelProcessorTypeAndSubtype(unsigned Family,
+ unsigned Model,
+ const unsigned *Features,
+ unsigned *Type,
+ unsigned *Subtype) {
+ // We select CPU strings to match the code in Host.cpp, but we don't use them
+ // in compiler-rt.
+ const char *CPU = 0;
+
+ switch (Family) {
+ case 6:
+ switch (Model) {
+ case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
+ // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
+ // mobile processor, Intel Core 2 Extreme processor, Intel
+ // Pentium Dual-Core processor, Intel Xeon processor, model
+ // 0Fh. All processors are manufactured using the 65 nm process.
+ case 0x16: // Intel Celeron processor model 16h. All processors are
+ // manufactured using the 65 nm process
+ CPU = "core2";
+ *Type = INTEL_CORE2;
+ break;
+ case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
+ // 17h. All processors are manufactured using the 45 nm process.
+ //
+ // 45nm: Penryn , Wolfdale, Yorkfield (XE)
+ case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
+ // the 45 nm process.
+ CPU = "penryn";
+ *Type = INTEL_CORE2;
+ break;
+ case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
+ // processors are manufactured using the 45 nm process.
+ case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
+ // As found in a Summer 2010 model iMac.
+ case 0x1f:
+ case 0x2e: // Nehalem EX
+ CPU = "nehalem";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_NEHALEM;
+ break;
+ case 0x25: // Intel Core i7, laptop version.
+ case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
+ // processors are manufactured using the 32 nm process.
+ case 0x2f: // Westmere EX
+ CPU = "westmere";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_WESTMERE;
+ break;
+ case 0x2a: // Intel Core i7 processor. All processors are manufactured
+ // using the 32 nm process.
+ case 0x2d:
+ CPU = "sandybridge";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_SANDYBRIDGE;
+ break;
+ case 0x3a:
+ case 0x3e: // Ivy Bridge EP
+ CPU = "ivybridge";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_IVYBRIDGE;
+ break;
+
+ // Haswell:
+ case 0x3c:
+ case 0x3f:
+ case 0x45:
+ case 0x46:
+ CPU = "haswell";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_HASWELL;
+ break;
+
+ // Broadwell:
+ case 0x3d:
+ case 0x47:
+ case 0x4f:
+ case 0x56:
+ CPU = "broadwell";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_BROADWELL;
+ break;
+
+ // Skylake:
+ case 0x4e: // Skylake mobile
+ case 0x5e: // Skylake desktop
+ case 0x8e: // Kaby Lake mobile
+ case 0x9e: // Kaby Lake desktop
+ case 0xa5: // Comet Lake-H/S
+ case 0xa6: // Comet Lake-U
+ CPU = "skylake";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_SKYLAKE;
+ break;
+
+ // Rocketlake:
+ case 0xa7:
+ CPU = "rocketlake";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_ROCKETLAKE;
+ break;
+
+ // Skylake Xeon:
+ case 0x55:
+ *Type = INTEL_COREI7;
+ if (testFeature(FEATURE_AVX512BF16)) {
+ CPU = "cooperlake";
+ *Subtype = INTEL_COREI7_COOPERLAKE;
+ } else if (testFeature(FEATURE_AVX512VNNI)) {
+ CPU = "cascadelake";
+ *Subtype = INTEL_COREI7_CASCADELAKE;
+ } else {
+ CPU = "skylake-avx512";
+ *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
+ }
+ break;
+
+ // Cannonlake:
+ case 0x66:
+ CPU = "cannonlake";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_CANNONLAKE;
+ break;
+
+ // Icelake:
+ case 0x7d:
+ case 0x7e:
+ CPU = "icelake-client";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
+ break;
+
+ // Tigerlake:
+ case 0x8c:
+ case 0x8d:
+ CPU = "tigerlake";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_TIGERLAKE;
+ break;
+
+ // Alderlake:
+ case 0x97:
+ case 0x9a:
+ // Raptorlake:
+ case 0xb7:
+ case 0xba:
+ case 0xbf:
+ // Meteorlake:
+ case 0xaa:
+ case 0xac:
+ // Gracemont:
+ case 0xbe:
+ CPU = "alderlake";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_ALDERLAKE;
+ break;
+
+ // Arrowlake:
+ case 0xc5:
+ CPU = "arrowlake";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_ARROWLAKE;
+ break;
+
+ // Arrowlake S:
+ case 0xc6:
+ // Lunarlake:
+ case 0xbd:
+ CPU = "arrowlake-s";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_ARROWLAKE_S;
+ break;
+
+ // Pantherlake:
+ case 0xcc:
+ CPU = "pantherlake";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_PANTHERLAKE;
+ break;
+
+ // Icelake Xeon:
+ case 0x6a:
+ case 0x6c:
+ CPU = "icelake-server";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_ICELAKE_SERVER;
+ break;
+
+ // Emerald Rapids:
+ case 0xcf:
+ // Sapphire Rapids:
+ case 0x8f:
+ CPU = "sapphirerapids";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
+ break;
+
+ // Granite Rapids:
+ case 0xad:
+ CPU = "graniterapids";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_GRANITERAPIDS;
+ break;
+
+ // Granite Rapids D:
+ case 0xae:
+ CPU = "graniterapids-d";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_GRANITERAPIDS_D;
+ break;
+
+ case 0x1c: // Most 45 nm Intel Atom processors
+ case 0x26: // 45 nm Atom Lincroft
+ case 0x27: // 32 nm Atom Medfield
+ case 0x35: // 32 nm Atom Midview
+ case 0x36: // 32 nm Atom Midview
+ CPU = "bonnell";
+ *Type = INTEL_BONNELL;
+ break;
+
+ // Atom Silvermont codes from the Intel software optimization guide.
+ case 0x37:
+ case 0x4a:
+ case 0x4d:
+ case 0x5a:
+ case 0x5d:
+ case 0x4c: // really airmont
+ CPU = "silvermont";
+ *Type = INTEL_SILVERMONT;
+ break;
+ // Goldmont:
+ case 0x5c: // Apollo Lake
+ case 0x5f: // Denverton
+ CPU = "goldmont";
+ *Type = INTEL_GOLDMONT;
+ break; // "goldmont"
+ case 0x7a:
+ CPU = "goldmont-plus";
+ *Type = INTEL_GOLDMONT_PLUS;
+ break;
+ case 0x86:
+ case 0x8a: // Lakefield
+ case 0x96: // Elkhart Lake
+ case 0x9c: // Jasper Lake
+ CPU = "tremont";
+ *Type = INTEL_TREMONT;
+ break;
+
+ // Sierraforest:
+ case 0xaf:
+ CPU = "sierraforest";
+ *Type = INTEL_SIERRAFOREST;
+ break;
+
+ // Grandridge:
+ case 0xb6:
+ CPU = "grandridge";
+ *Type = INTEL_GRANDRIDGE;
+ break;
+
+ // Clearwaterforest:
+ case 0xdd:
+ CPU = "clearwaterforest";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_CLEARWATERFOREST;
+ break;
+
+ case 0x57:
+ CPU = "knl";
+ *Type = INTEL_KNL;
+ break;
+
+ case 0x85:
+ CPU = "knm";
+ *Type = INTEL_KNM;
+ break;
+
+ default: // Unknown family 6 CPU.
+ break;
+ }
+ break;
+ default:
+ break; // Unknown.
+ }
+
+ return CPU;
+}
+
+static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
+ unsigned Model,
+ const unsigned *Features,
+ unsigned *Type,
+ unsigned *Subtype) {
+ const char *CPU = 0;
+
+ switch (Family) {
+ case 4:
+ CPU = "i486";
+ break;
+ case 5:
+ CPU = "pentium";
+ switch (Model) {
+ case 6:
+ case 7:
+ CPU = "k6";
+ break;
+ case 8:
+ CPU = "k6-2";
+ break;
+ case 9:
+ case 13:
+ CPU = "k6-3";
+ break;
+ case 10:
+ CPU = "geode";
+ break;
+ }
+ break;
+ case 6:
+ if (testFeature(FEATURE_SSE)) {
+ CPU = "athlon-xp";
+ break;
+ }
+ CPU = "athlon";
+ break;
+ case 15:
+ if (testFeature(FEATURE_SSE3)) {
+ CPU = "k8-sse3";
+ break;
+ }
+ CPU = "k8";
+ break;
+ case 16:
+ CPU = "amdfam10";
+ *Type = AMDFAM10H; // "amdfam10"
+ switch (Model) {
+ case 2:
+ *Subtype = AMDFAM10H_BARCELONA;
+ break;
+ case 4:
+ *Subtype = AMDFAM10H_SHANGHAI;
+ break;
+ case 8:
+ *Subtype = AMDFAM10H_ISTANBUL;
+ break;
+ }
+ break;
+ case 20:
+ CPU = "btver1";
+ *Type = AMD_BTVER1;
+ break;
+ case 21:
+ CPU = "bdver1";
+ *Type = AMDFAM15H;
+ if (Model >= 0x60 && Model <= 0x7f) {
+ CPU = "bdver4";
+ *Subtype = AMDFAM15H_BDVER4;
+ break; // 60h-7Fh: Excavator
+ }
+ if (Model >= 0x30 && Model <= 0x3f) {
+ CPU = "bdver3";
+ *Subtype = AMDFAM15H_BDVER3;
+ break; // 30h-3Fh: Steamroller
+ }
+ if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
+ CPU = "bdver2";
+ *Subtype = AMDFAM15H_BDVER2;
+ break; // 02h, 10h-1Fh: Piledriver
+ }
+ if (Model <= 0x0f) {
+ *Subtype = AMDFAM15H_BDVER1;
+ break; // 00h-0Fh: Bulldozer
+ }
+ break;
+ case 22:
+ CPU = "btver2";
+ *Type = AMD_BTVER2;
+ break;
+ case 23:
+ CPU = "znver1";
+ *Type = AMDFAM17H;
+ if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) ||
+ (Model >= 0x60 && Model <= 0x67) || (Model >= 0x68 && Model <= 0x6f) ||
+ (Model >= 0x70 && Model <= 0x7f) || (Model >= 0x84 && Model <= 0x87) ||
+ (Model >= 0x90 && Model <= 0x97) || (Model >= 0x98 && Model <= 0x9f) ||
+ (Model >= 0xa0 && Model <= 0xaf)) {
+ // Family 17h Models 30h-3Fh (Starship) Zen 2
+ // Family 17h Models 47h (Cardinal) Zen 2
+ // Family 17h Models 60h-67h (Renoir) Zen 2
+ // Family 17h Models 68h-6Fh (Lucienne) Zen 2
+ // Family 17h Models 70h-7Fh (Matisse) Zen 2
+ // Family 17h Models 84h-87h (ProjectX) Zen 2
+ // Family 17h Models 90h-97h (VanGogh) Zen 2
+ // Family 17h Models 98h-9Fh (Mero) Zen 2
+ // Family 17h Models A0h-AFh (Mendocino) Zen 2
+ CPU = "znver2";
+ *Subtype = AMDFAM17H_ZNVER2;
+ break;
+ }
+ if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x20 && Model <= 0x2f)) {
+ // Family 17h Models 10h-1Fh (Raven1) Zen
+ // Family 17h Models 10h-1Fh (Picasso) Zen+
+ // Family 17h Models 20h-2Fh (Raven2 x86) Zen
+ *Subtype = AMDFAM17H_ZNVER1;
+ break;
+ }
+ break;
+ case 25:
+ CPU = "znver3";
+ *Type = AMDFAM19H;
+ if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x2f) ||
+ (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) ||
+ (Model >= 0x50 && Model <= 0x5f)) {
+ // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3
+ // Family 19h Models 20h-2Fh (Vermeer) Zen 3
+ // Family 19h Models 30h-3Fh (Badami) Zen 3
+ // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+
+ // Family 19h Models 50h-5Fh (Cezanne) Zen 3
+ *Subtype = AMDFAM19H_ZNVER3;
+ break;
+ }
+ if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x6f) ||
+ (Model >= 0x70 && Model <= 0x77) || (Model >= 0x78 && Model <= 0x7f) ||
+ (Model >= 0xa0 && Model <= 0xaf)) {
+ // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4
+ // Family 19h Models 60h-6Fh (Raphael) Zen 4
+ // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4
+ // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4
+ // Family 19h Models A0h-AFh (Stones-Dense) Zen 4
+ CPU = "znver4";
+ *Subtype = AMDFAM19H_ZNVER4;
+ break; // "znver4"
+ }
+ break; // family 19h
+ case 26:
+ CPU = "znver5";
+ *Type = AMDFAM1AH;
+ if (Model <= 0x77) {
+ // Models 00h-0Fh (Breithorn).
+ // Models 10h-1Fh (Breithorn-Dense).
+ // Models 20h-2Fh (Strix 1).
+ // Models 30h-37h (Strix 2).
+ // Models 38h-3Fh (Strix 3).
+ // Models 40h-4Fh (Granite Ridge).
+ // Models 50h-5Fh (Weisshorn).
+ // Models 60h-6Fh (Krackan1).
+ // Models 70h-77h (Sarlak).
+ CPU = "znver5";
+ *Subtype = AMDFAM1AH_ZNVER5;
+ break; // "znver5"
+ }
+ break;
+ default:
+ break; // Unknown AMD CPU.
+ }
+
+ return CPU;
+}
+
+#undef testFeature
+
+static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
+ unsigned *Features) {
+ unsigned EAX = 0, EBX = 0;
+
+#define hasFeature(F) ((Features[F / 32] >> (F % 32)) & 1)
+#define setFeature(F) Features[F / 32] |= 1U << (F % 32)
+
+ if ((EDX >> 15) & 1)
+ setFeature(FEATURE_CMOV);
+ if ((EDX >> 23) & 1)
+ setFeature(FEATURE_MMX);
+ if ((EDX >> 25) & 1)
+ setFeature(FEATURE_SSE);
+ if ((EDX >> 26) & 1)
+ setFeature(FEATURE_SSE2);
+
+ if ((ECX >> 0) & 1)
+ setFeature(FEATURE_SSE3);
+ if ((ECX >> 1) & 1)
+ setFeature(FEATURE_PCLMUL);
+ if ((ECX >> 9) & 1)
+ setFeature(FEATURE_SSSE3);
+ if ((ECX >> 12) & 1)
+ setFeature(FEATURE_FMA);
+ if ((ECX >> 13) & 1)
+ setFeature(FEATURE_CMPXCHG16B);
+ if ((ECX >> 19) & 1)
+ setFeature(FEATURE_SSE4_1);
+ if ((ECX >> 20) & 1)
+ setFeature(FEATURE_SSE4_2);
+ if ((ECX >> 22) & 1)
+ setFeature(FEATURE_MOVBE);
+ if ((ECX >> 23) & 1)
+ setFeature(FEATURE_POPCNT);
+ if ((ECX >> 25) & 1)
+ setFeature(FEATURE_AES);
+ if ((ECX >> 29) & 1)
+ setFeature(FEATURE_F16C);
+ if ((ECX >> 30) & 1)
+ setFeature(FEATURE_RDRND);
+
+ // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
+ // indicates that the AVX registers will be saved and restored on context
+ // switch, then we have full AVX support.
+ const unsigned AVXBits = (1 << 27) | (1 << 28);
+ bool HasAVXSave = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
+ ((EAX & 0x6) == 0x6);
+#if defined(__APPLE__)
+ // Darwin lazily saves the AVX512 context on first use: trust that the OS will
+ // save the AVX512 context if we use AVX512 instructions, even the bit is not
+ // set right now.
+ bool HasAVX512Save = true;
+#else
+ // AVX512 requires additional context to be saved by the OS.
+ bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
+#endif
+ // AMX requires additional context to be saved by the OS.
+ const unsigned AMXBits = (1 << 17) | (1 << 18);
+ bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
+ bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
+
+ if (HasAVXSave)
+ setFeature(FEATURE_AVX);
+
+ if (((ECX >> 26) & 1) && HasAVXSave)
+ setFeature(FEATURE_XSAVE);
+
+ bool HasLeaf7 =
+ MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
+
+ if (HasLeaf7 && ((EBX >> 0) & 1))
+ setFeature(FEATURE_FSGSBASE);
+ if (HasLeaf7 && ((EBX >> 2) & 1))
+ setFeature(FEATURE_SGX);
+ if (HasLeaf7 && ((EBX >> 3) & 1))
+ setFeature(FEATURE_BMI);
+ if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave)
+ setFeature(FEATURE_AVX2);
+ if (HasLeaf7 && ((EBX >> 8) & 1))
+ setFeature(FEATURE_BMI2);
+ if (HasLeaf7 && ((EBX >> 11) & 1))
+ setFeature(FEATURE_RTM);
+ if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512F);
+ if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512DQ);
+ if (HasLeaf7 && ((EBX >> 18) & 1))
+ setFeature(FEATURE_RDSEED);
+ if (HasLeaf7 && ((EBX >> 19) & 1))
+ setFeature(FEATURE_ADX);
+ if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512IFMA);
+ if (HasLeaf7 && ((EBX >> 24) & 1))
+ setFeature(FEATURE_CLWB);
+ if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512PF);
+ if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512ER);
+ if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512CD);
+ if (HasLeaf7 && ((EBX >> 29) & 1))
+ setFeature(FEATURE_SHA);
+ if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512BW);
+ if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512VL);
+
+ if (HasLeaf7 && ((ECX >> 0) & 1))
+ setFeature(FEATURE_PREFETCHWT1);
+ if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512VBMI);
+ if (HasLeaf7 && ((ECX >> 4) & 1))
+ setFeature(FEATURE_PKU);
+ if (HasLeaf7 && ((ECX >> 5) & 1))
+ setFeature(FEATURE_WAITPKG);
+ if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512VBMI2);
+ if (HasLeaf7 && ((ECX >> 7) & 1))
+ setFeature(FEATURE_SHSTK);
+ if (HasLeaf7 && ((ECX >> 8) & 1))
+ setFeature(FEATURE_GFNI);
+ if (HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave)
+ setFeature(FEATURE_VAES);
+ if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave)
+ setFeature(FEATURE_VPCLMULQDQ);
+ if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512VNNI);
+ if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512BITALG);
+ if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512VPOPCNTDQ);
+ if (HasLeaf7 && ((ECX >> 22) & 1))
+ setFeature(FEATURE_RDPID);
+ if (HasLeaf7 && ((ECX >> 23) & 1))
+ setFeature(FEATURE_KL);
+ if (HasLeaf7 && ((ECX >> 25) & 1))
+ setFeature(FEATURE_CLDEMOTE);
+ if (HasLeaf7 && ((ECX >> 27) & 1))
+ setFeature(FEATURE_MOVDIRI);
+ if (HasLeaf7 && ((ECX >> 28) & 1))
+ setFeature(FEATURE_MOVDIR64B);
+ if (HasLeaf7 && ((ECX >> 29) & 1))
+ setFeature(FEATURE_ENQCMD);
+
+ if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX5124VNNIW);
+ if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX5124FMAPS);
+ if (HasLeaf7 && ((EDX >> 5) & 1))
+ setFeature(FEATURE_UINTR);
+ if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512VP2INTERSECT);
+ if (HasLeaf7 && ((EDX >> 14) & 1))
+ setFeature(FEATURE_SERIALIZE);
+ if (HasLeaf7 && ((EDX >> 16) & 1))
+ setFeature(FEATURE_TSXLDTRK);
+ if (HasLeaf7 && ((EDX >> 18) & 1))
+ setFeature(FEATURE_PCONFIG);
+ if (HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave)
+ setFeature(FEATURE_AMX_BF16);
+ if (HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512FP16);
+ if (HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave)
+ setFeature(FEATURE_AMX_TILE);
+ if (HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave)
+ setFeature(FEATURE_AMX_INT8);
+
+ // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
+ // return all 0s for invalid subleaves so check the limit.
+ bool HasLeaf7Subleaf1 =
+ HasLeaf7 && EAX >= 1 &&
+ !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeaf7Subleaf1 && ((EAX >> 0) & 1))
+ setFeature(FEATURE_SHA512);
+ if (HasLeaf7Subleaf1 && ((EAX >> 1) & 1))
+ setFeature(FEATURE_SM3);
+ if (HasLeaf7Subleaf1 && ((EAX >> 2) & 1))
+ setFeature(FEATURE_SM4);
+ if (HasLeaf7Subleaf1 && ((EAX >> 3) & 1))
+ setFeature(FEATURE_RAOINT);
+ if (HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave)
+ setFeature(FEATURE_AVXVNNI);
+ if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512BF16);
+ if (HasLeaf7Subleaf1 && ((EAX >> 7) & 1))
+ setFeature(FEATURE_CMPCCXADD);
+ if (HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave)
+ setFeature(FEATURE_AMX_FP16);
+ if (HasLeaf7Subleaf1 && ((EAX >> 22) & 1))
+ setFeature(FEATURE_HRESET);
+ if (HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave)
+ setFeature(FEATURE_AVXIFMA);
+
+ if (HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave)
+ setFeature(FEATURE_AVXVNNIINT8);
+ if (HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave)
+ setFeature(FEATURE_AVXNECONVERT);
+ if (HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave)
+ setFeature(FEATURE_AMX_COMPLEX);
+ if (HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave)
+ setFeature(FEATURE_AVXVNNIINT16);
+ if (HasLeaf7Subleaf1 && ((EDX >> 14) & 1))
+ setFeature(FEATURE_PREFETCHI);
+ if (HasLeaf7Subleaf1 && ((EDX >> 15) & 1))
+ setFeature(FEATURE_USERMSR);
+ if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1))
+ setFeature(FEATURE_AVX10_1_256);
+ if (HasLeaf7Subleaf1 && ((EDX >> 21) & 1))
+ setFeature(FEATURE_APXF);
+
+ unsigned MaxLevel;
+ getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX);
+ bool HasLeafD = MaxLevel >= 0xd &&
+ !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeafD && ((EAX >> 0) & 1) && HasAVXSave)
+ setFeature(FEATURE_XSAVEOPT);
+ if (HasLeafD && ((EAX >> 1) & 1) && HasAVXSave)
+ setFeature(FEATURE_XSAVEC);
+ if (HasLeafD && ((EAX >> 3) & 1) && HasAVXSave)
+ setFeature(FEATURE_XSAVES);
+
+ bool HasLeaf24 =
+ MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1) && HasLeaf24 && ((EBX >> 18) & 1))
+ setFeature(FEATURE_AVX10_1_512);
+
+ unsigned MaxExtLevel;
+ getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
+
+ bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
+ !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ if (HasExtLeaf1) {
+ if (ECX & 1)
+ setFeature(FEATURE_LAHF_LM);
+ if ((ECX >> 5) & 1)
+ setFeature(FEATURE_LZCNT);
+ if (((ECX >> 6) & 1))
+ setFeature(FEATURE_SSE4_A);
+ if (((ECX >> 8) & 1))
+ setFeature(FEATURE_PRFCHW);
+ if (((ECX >> 11) & 1))
+ setFeature(FEATURE_XOP);
+ if (((ECX >> 15) & 1))
+ setFeature(FEATURE_LWP);
+ if (((ECX >> 16) & 1))
+ setFeature(FEATURE_FMA4);
+ if (((ECX >> 21) & 1))
+ setFeature(FEATURE_TBM);
+ if (((ECX >> 29) & 1))
+ setFeature(FEATURE_MWAITX);
+
+ if (((EDX >> 29) & 1))
+ setFeature(FEATURE_LM);
+ }
+
+ bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
+ !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
+ if (HasExtLeaf8 && ((EBX >> 0) & 1))
+ setFeature(FEATURE_CLZERO);
+ if (HasExtLeaf8 && ((EBX >> 9) & 1))
+ setFeature(FEATURE_WBNOINVD);
+
+ bool HasLeaf14 = MaxLevel >= 0x14 &&
+ !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeaf14 && ((EBX >> 4) & 1))
+ setFeature(FEATURE_PTWRITE);
+
+ bool HasLeaf19 =
+ MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1))
+ setFeature(FEATURE_WIDEKL);
+
+ if (hasFeature(FEATURE_LM) && hasFeature(FEATURE_SSE2)) {
+ setFeature(FEATURE_X86_64_BASELINE);
+ if (hasFeature(FEATURE_CMPXCHG16B) && hasFeature(FEATURE_POPCNT) &&
+ hasFeature(FEATURE_LAHF_LM) && hasFeature(FEATURE_SSE4_2)) {
+ setFeature(FEATURE_X86_64_V2);
+ if (hasFeature(FEATURE_AVX2) && hasFeature(FEATURE_BMI) &&
+ hasFeature(FEATURE_BMI2) && hasFeature(FEATURE_F16C) &&
+ hasFeature(FEATURE_FMA) && hasFeature(FEATURE_LZCNT) &&
+ hasFeature(FEATURE_MOVBE)) {
+ setFeature(FEATURE_X86_64_V3);
+ if (hasFeature(FEATURE_AVX512BW) && hasFeature(FEATURE_AVX512CD) &&
+ hasFeature(FEATURE_AVX512DQ) && hasFeature(FEATURE_AVX512VL))
+ setFeature(FEATURE_X86_64_V4);
+ }
+ }
+ }
+
+#undef hasFeature
+#undef setFeature
+}
+
+#ifndef _WIN32
+__attribute__((visibility("hidden")))
+#endif
+int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
+
+#ifndef _WIN32
+__attribute__((visibility("hidden")))
+#endif
+struct __processor_model {
+ unsigned int __cpu_vendor;
+ unsigned int __cpu_type;
+ unsigned int __cpu_subtype;
+ unsigned int __cpu_features[1];
+} __cpu_model = {0, 0, 0, {0}};
+
+#ifndef _WIN32
+__attribute__((visibility("hidden")))
+#endif
+unsigned __cpu_features2[(CPU_FEATURE_MAX - 1) / 32];
+
+// A constructor function that is sets __cpu_model and __cpu_features2 with
+// the right values. This needs to run only once. This constructor is
+// given the highest priority and it should run before constructors without
+// the priority set. However, it still runs after ifunc initializers and
+// needs to be called explicitly there.
+
+int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
+ unsigned EAX, EBX, ECX, EDX;
+ unsigned MaxLeaf = 5;
+ unsigned Vendor;
+ unsigned Model, Family;
+ unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
+ static_assert(sizeof(Features) / sizeof(Features[0]) == 4, "");
+ static_assert(sizeof(__cpu_features2) / sizeof(__cpu_features2[0]) == 3, "");
+
+ // This function needs to run just once.
+ if (__cpu_model.__cpu_vendor)
+ return 0;
+
+ if (!isCpuIdSupported() ||
+ getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
+ __cpu_model.__cpu_vendor = VENDOR_OTHER;
+ return -1;
+ }
+
+ getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
+ detectX86FamilyModel(EAX, &Family, &Model);
+
+ // Find available features.
+ getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);
+
+ __cpu_model.__cpu_features[0] = Features[0];
+ __cpu_features2[0] = Features[1];
+ __cpu_features2[1] = Features[2];
+ __cpu_features2[2] = Features[3];
+
+ if (Vendor == SIG_INTEL) {
+ // Get CPU type.
+ getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
+ &(__cpu_model.__cpu_type),
+ &(__cpu_model.__cpu_subtype));
+ __cpu_model.__cpu_vendor = VENDOR_INTEL;
+ } else if (Vendor == SIG_AMD) {
+ // Get CPU type.
+ getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
+ &(__cpu_model.__cpu_type),
+ &(__cpu_model.__cpu_subtype));
+ __cpu_model.__cpu_vendor = VENDOR_AMD;
+ } else
+ __cpu_model.__cpu_vendor = VENDOR_OTHER;
+
+ assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
+ assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
+ assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
+
+ return 0;
+}
+#endif // defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)