summaryrefslogtreecommitdiff
path: root/compiler-rt/lib/builtins
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
commitcfca06d7963fa0909f90483b42a6d7d194d01e08 (patch)
tree209fb2a2d68f8f277793fc8df46c753d31bc853b /compiler-rt/lib/builtins
parent706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff)
Notes
Diffstat (limited to 'compiler-rt/lib/builtins')
-rw-r--r--compiler-rt/lib/builtins/README.txt51
-rw-r--r--compiler-rt/lib/builtins/absvsi2.c2
-rw-r--r--compiler-rt/lib/builtins/ashldi3.c2
-rw-r--r--compiler-rt/lib/builtins/ashrdi3.c2
-rw-r--r--compiler-rt/lib/builtins/atomic.c9
-rw-r--r--compiler-rt/lib/builtins/clear_cache.c10
-rw-r--r--compiler-rt/lib/builtins/clzdi2.c6
-rw-r--r--compiler-rt/lib/builtins/clzsi2.c2
-rw-r--r--compiler-rt/lib/builtins/clzti2.c2
-rw-r--r--compiler-rt/lib/builtins/cpu_model.c185
-rw-r--r--compiler-rt/lib/builtins/ctzdi2.c6
-rw-r--r--compiler-rt/lib/builtins/ctzsi2.c2
-rw-r--r--compiler-rt/lib/builtins/ctzti2.c2
-rw-r--r--compiler-rt/lib/builtins/ffsdi2.c6
-rw-r--r--compiler-rt/lib/builtins/ffssi2.c4
-rw-r--r--compiler-rt/lib/builtins/ffsti2.c2
-rw-r--r--compiler-rt/lib/builtins/floatdidf.c2
-rw-r--r--compiler-rt/lib/builtins/floatdisf.c2
-rw-r--r--compiler-rt/lib/builtins/floatsidf.c8
-rw-r--r--compiler-rt/lib/builtins/floatundidf.c2
-rw-r--r--compiler-rt/lib/builtins/floatundisf.c2
-rw-r--r--compiler-rt/lib/builtins/floatunsidf.c6
-rw-r--r--compiler-rt/lib/builtins/fp_extend.h2
-rw-r--r--compiler-rt/lib/builtins/fp_lib.h6
-rw-r--r--compiler-rt/lib/builtins/fp_mode.h4
-rw-r--r--compiler-rt/lib/builtins/hexagon/dffma.S8
-rw-r--r--compiler-rt/lib/builtins/hexagon/fabs_opt.S36
-rw-r--r--compiler-rt/lib/builtins/hexagon/fma_opt.S30
-rw-r--r--compiler-rt/lib/builtins/hexagon/fmax_opt.S29
-rw-r--r--compiler-rt/lib/builtins/hexagon/fmin_opt.S29
-rw-r--r--compiler-rt/lib/builtins/i386/floatdidf.S2
-rw-r--r--compiler-rt/lib/builtins/i386/floatdixf.S2
-rw-r--r--compiler-rt/lib/builtins/int_div_impl.inc70
-rw-r--r--compiler-rt/lib/builtins/int_lib.h24
-rw-r--r--compiler-rt/lib/builtins/int_types.h26
-rw-r--r--compiler-rt/lib/builtins/lshrdi3.c2
-rw-r--r--compiler-rt/lib/builtins/paritydi2.c2
-rw-r--r--compiler-rt/lib/builtins/paritysi2.c2
-rw-r--r--compiler-rt/lib/builtins/parityti2.c2
-rw-r--r--compiler-rt/lib/builtins/popcountdi2.c2
-rw-r--r--compiler-rt/lib/builtins/popcountsi2.c2
-rw-r--r--compiler-rt/lib/builtins/popcountti2.c2
-rw-r--r--compiler-rt/lib/builtins/powidf2.c2
-rw-r--r--compiler-rt/lib/builtins/powisf2.c2
-rw-r--r--compiler-rt/lib/builtins/powitf2.c7
-rw-r--r--compiler-rt/lib/builtins/powixf2.c2
-rw-r--r--compiler-rt/lib/builtins/riscv/int_mul_impl.inc31
-rw-r--r--compiler-rt/lib/builtins/riscv/muldi3.S11
-rw-r--r--compiler-rt/lib/builtins/riscv/mulsi3.S23
-rw-r--r--compiler-rt/lib/builtins/udivdi3.c6
-rw-r--r--compiler-rt/lib/builtins/udivmoddi4.c6
-rw-r--r--compiler-rt/lib/builtins/udivmodti4.c285
-rw-r--r--compiler-rt/lib/builtins/udivsi3.c47
-rw-r--r--compiler-rt/lib/builtins/umoddi3.c8
-rw-r--r--compiler-rt/lib/builtins/umodsi3.c6
-rw-r--r--compiler-rt/lib/builtins/ve/grow_stack.S31
-rw-r--r--compiler-rt/lib/builtins/ve/grow_stack_align.S31
57 files changed, 572 insertions, 523 deletions
diff --git a/compiler-rt/lib/builtins/README.txt b/compiler-rt/lib/builtins/README.txt
index e603dfa05356..f9e1bc805092 100644
--- a/compiler-rt/lib/builtins/README.txt
+++ b/compiler-rt/lib/builtins/README.txt
@@ -20,13 +20,18 @@ Here is the specification for this library:
http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc
+Please note that the libgcc specification explicitly mentions actual types of
+arguments and returned values being expressed with machine modes.
+In some cases particular types such as "int", "unsigned", "long long", etc.
+may be specified just as examples there.
+
Here is a synopsis of the contents of this library:
-typedef int si_int;
-typedef unsigned su_int;
+typedef int32_t si_int;
+typedef uint32_t su_int;
-typedef long long di_int;
-typedef unsigned long long du_int;
+typedef int64_t di_int;
+typedef uint64_t du_int;
// Integral bit manipulation
@@ -38,24 +43,24 @@ ti_int __ashrti3(ti_int a, si_int b); // a >> b arithmetic (sign fill)
di_int __lshrdi3(di_int a, si_int b); // a >> b logical (zero fill)
ti_int __lshrti3(ti_int a, si_int b); // a >> b logical (zero fill)
-si_int __clzsi2(si_int a); // count leading zeros
-si_int __clzdi2(di_int a); // count leading zeros
-si_int __clzti2(ti_int a); // count leading zeros
-si_int __ctzsi2(si_int a); // count trailing zeros
-si_int __ctzdi2(di_int a); // count trailing zeros
-si_int __ctzti2(ti_int a); // count trailing zeros
+int __clzsi2(si_int a); // count leading zeros
+int __clzdi2(di_int a); // count leading zeros
+int __clzti2(ti_int a); // count leading zeros
+int __ctzsi2(si_int a); // count trailing zeros
+int __ctzdi2(di_int a); // count trailing zeros
+int __ctzti2(ti_int a); // count trailing zeros
-si_int __ffssi2(si_int a); // find least significant 1 bit
-si_int __ffsdi2(di_int a); // find least significant 1 bit
-si_int __ffsti2(ti_int a); // find least significant 1 bit
+int __ffssi2(si_int a); // find least significant 1 bit
+int __ffsdi2(di_int a); // find least significant 1 bit
+int __ffsti2(ti_int a); // find least significant 1 bit
-si_int __paritysi2(si_int a); // bit parity
-si_int __paritydi2(di_int a); // bit parity
-si_int __parityti2(ti_int a); // bit parity
+int __paritysi2(si_int a); // bit parity
+int __paritydi2(di_int a); // bit parity
+int __parityti2(ti_int a); // bit parity
-si_int __popcountsi2(si_int a); // bit population
-si_int __popcountdi2(di_int a); // bit population
-si_int __popcountti2(ti_int a); // bit population
+int __popcountsi2(si_int a); // bit population
+int __popcountdi2(di_int a); // bit population
+int __popcountti2(ti_int a); // bit population
uint32_t __bswapsi2(uint32_t a); // a byteswapped
uint64_t __bswapdi2(uint64_t a); // a byteswapped
@@ -169,10 +174,10 @@ long double __floatuntixf(tu_int a);
// Floating point raised to integer power
-float __powisf2( float a, si_int b); // a ^ b
-double __powidf2( double a, si_int b); // a ^ b
-long double __powixf2(long double a, si_int b); // a ^ b
-long double __powitf2(long double a, si_int b); // ppc only, a ^ b
+float __powisf2( float a, int b); // a ^ b
+double __powidf2( double a, int b); // a ^ b
+long double __powixf2(long double a, int b); // a ^ b
+long double __powitf2(long double a, int b); // ppc only, a ^ b
// Complex arithmetic
diff --git a/compiler-rt/lib/builtins/absvsi2.c b/compiler-rt/lib/builtins/absvsi2.c
index 44ada169e7e6..9d5de7e8a3f2 100644
--- a/compiler-rt/lib/builtins/absvsi2.c
+++ b/compiler-rt/lib/builtins/absvsi2.c
@@ -18,7 +18,7 @@
COMPILER_RT_ABI si_int __absvsi2(si_int a) {
const int N = (int)(sizeof(si_int) * CHAR_BIT);
- if (a == (1 << (N - 1)))
+ if (a == ((si_int)1 << (N - 1)))
compilerrt_abort();
const si_int t = a >> (N - 1);
return (a ^ t) - t;
diff --git a/compiler-rt/lib/builtins/ashldi3.c b/compiler-rt/lib/builtins/ashldi3.c
index 7c81057a2284..04f22228f11d 100644
--- a/compiler-rt/lib/builtins/ashldi3.c
+++ b/compiler-rt/lib/builtins/ashldi3.c
@@ -16,7 +16,7 @@
// Precondition: 0 <= b < bits_in_dword
-COMPILER_RT_ABI di_int __ashldi3(di_int a, si_int b) {
+COMPILER_RT_ABI di_int __ashldi3(di_int a, int b) {
const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
dwords input;
dwords result;
diff --git a/compiler-rt/lib/builtins/ashrdi3.c b/compiler-rt/lib/builtins/ashrdi3.c
index b9939132205c..934a5c47fd69 100644
--- a/compiler-rt/lib/builtins/ashrdi3.c
+++ b/compiler-rt/lib/builtins/ashrdi3.c
@@ -16,7 +16,7 @@
// Precondition: 0 <= b < bits_in_dword
-COMPILER_RT_ABI di_int __ashrdi3(di_int a, si_int b) {
+COMPILER_RT_ABI di_int __ashrdi3(di_int a, int b) {
const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
dwords input;
dwords result;
diff --git a/compiler-rt/lib/builtins/atomic.c b/compiler-rt/lib/builtins/atomic.c
index 32b3a0f9ad23..8634a72e77d1 100644
--- a/compiler-rt/lib/builtins/atomic.c
+++ b/compiler-rt/lib/builtins/atomic.c
@@ -23,6 +23,7 @@
//
//===----------------------------------------------------------------------===//
+#include <stdbool.h>
#include <stdint.h>
#include <string.h>
@@ -293,8 +294,8 @@ OPTIMISED_CASES
#undef OPTIMISED_CASE
#define OPTIMISED_CASE(n, lockfree, type) \
- int __atomic_compare_exchange_##n(type *ptr, type *expected, type desired, \
- int success, int failure) { \
+ bool __atomic_compare_exchange_##n(type *ptr, type *expected, type desired, \
+ int success, int failure) { \
if (lockfree) \
return __c11_atomic_compare_exchange_strong( \
(_Atomic(type) *)ptr, expected, desired, success, failure); \
@@ -303,11 +304,11 @@ OPTIMISED_CASES
if (*ptr == *expected) { \
*ptr = desired; \
unlock(l); \
- return 1; \
+ return true; \
} \
*expected = *ptr; \
unlock(l); \
- return 0; \
+ return false; \
}
OPTIMISED_CASES
#undef OPTIMISED_CASE
diff --git a/compiler-rt/lib/builtins/clear_cache.c b/compiler-rt/lib/builtins/clear_cache.c
index e83e21254e85..72e02e613de5 100644
--- a/compiler-rt/lib/builtins/clear_cache.c
+++ b/compiler-rt/lib/builtins/clear_cache.c
@@ -147,6 +147,16 @@ void __clear_cache(void *start, void *end) {
for (uintptr_t dword = start_dword; dword < end_dword; dword += dword_size)
__asm__ volatile("flush %0" : : "r"(dword));
+#elif defined(__riscv) && defined(__linux__)
+#define __NR_riscv_flush_icache (244 + 15)
+ register void *start_reg __asm("a0") = start;
+ const register void *end_reg __asm("a1") = end;
+ const register long flags __asm("a2") = 0;
+ const register long syscall_nr __asm("a7") = __NR_riscv_flush_icache;
+ __asm __volatile("ecall"
+ : "=r"(start_reg)
+ : "r"(start_reg), "r"(end_reg), "r"(flags), "r"(syscall_nr));
+ assert(start_reg == 0 && "Cache flush syscall failed.");
#else
#if __APPLE__
// On Darwin, sys_icache_invalidate() provides this functionality
diff --git a/compiler-rt/lib/builtins/clzdi2.c b/compiler-rt/lib/builtins/clzdi2.c
index a0bacb2ae39e..12c17982a5cb 100644
--- a/compiler-rt/lib/builtins/clzdi2.c
+++ b/compiler-rt/lib/builtins/clzdi2.c
@@ -21,15 +21,15 @@
// ctz instruction, gcc resolves __builtin_clz to __clzdi2 rather than
// __clzsi2, leading to infinite recursion.
#define __builtin_clz(a) __clzsi2(a)
-extern si_int __clzsi2(si_int);
+extern int __clzsi2(si_int);
#endif
// Precondition: a != 0
-COMPILER_RT_ABI si_int __clzdi2(di_int a) {
+COMPILER_RT_ABI int __clzdi2(di_int a) {
dwords x;
x.all = a;
const si_int f = -(x.s.high == 0);
- return __builtin_clz((x.s.high & ~f) | (x.s.low & f)) +
+ return clzsi((x.s.high & ~f) | (x.s.low & f)) +
(f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
}
diff --git a/compiler-rt/lib/builtins/clzsi2.c b/compiler-rt/lib/builtins/clzsi2.c
index 3f9f27f41331..d75f56d937b0 100644
--- a/compiler-rt/lib/builtins/clzsi2.c
+++ b/compiler-rt/lib/builtins/clzsi2.c
@@ -16,7 +16,7 @@
// Precondition: a != 0
-COMPILER_RT_ABI si_int __clzsi2(si_int a) {
+COMPILER_RT_ABI int __clzsi2(si_int a) {
su_int x = (su_int)a;
si_int t = ((x & 0xFFFF0000) == 0) << 4; // if (x is small) t = 16 else 0
x >>= 16 - t; // x = [0 - 0xFFFF]
diff --git a/compiler-rt/lib/builtins/clzti2.c b/compiler-rt/lib/builtins/clzti2.c
index 0c787104caa2..25d30119f271 100644
--- a/compiler-rt/lib/builtins/clzti2.c
+++ b/compiler-rt/lib/builtins/clzti2.c
@@ -18,7 +18,7 @@
// Precondition: a != 0
-COMPILER_RT_ABI si_int __clzti2(ti_int a) {
+COMPILER_RT_ABI int __clzti2(ti_int a) {
twords x;
x.all = a;
const di_int f = -(x.s.high == 0);
diff --git a/compiler-rt/lib/builtins/cpu_model.c b/compiler-rt/lib/builtins/cpu_model.c
index fb619037d398..8346bb62dcfb 100644
--- a/compiler-rt/lib/builtins/cpu_model.c
+++ b/compiler-rt/lib/builtins/cpu_model.c
@@ -82,6 +82,8 @@ enum ProcessorSubtypes {
INTEL_COREI7_ICELAKE_SERVER,
AMDFAM17H_ZNVER2,
INTEL_COREI7_CASCADELAKE,
+ INTEL_COREI7_TIGERLAKE,
+ INTEL_COREI7_COOPERLAKE,
CPU_SUBTYPE_MAX
};
@@ -122,7 +124,9 @@ enum ProcessorFeatures {
FEATURE_VPCLMULQDQ,
FEATURE_AVX512VNNI,
FEATURE_AVX512BITALG,
- FEATURE_AVX512BF16
+ FEATURE_AVX512BF16,
+ FEATURE_AVX512VP2INTERSECT,
+ CPU_FEATURE_MAX
};
// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
@@ -268,13 +272,17 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
}
}
-static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
- unsigned Brand_id,
- unsigned Features,
- unsigned Features2, unsigned *Type,
- unsigned *Subtype) {
- if (Brand_id != 0)
- return;
+static const char *
+getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+ const unsigned *Features,
+ unsigned *Type, unsigned *Subtype) {
+#define testFeature(F) \
+ (Features[F / 32] & (F % 32)) != 0
+
+ // We select CPU strings to match the code in Host.cpp, but we don't use them
+ // in compiler-rt.
+ const char *CPU = 0;
+
switch (Family) {
case 6:
switch (Model) {
@@ -285,13 +293,17 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
// 0Fh. All processors are manufactured using the 65 nm process.
case 0x16: // Intel Celeron processor model 16h. All processors are
// manufactured using the 65 nm process
+ CPU = "core2";
+ *Type = INTEL_CORE2;
+ break;
case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
// 17h. All processors are manufactured using the 45 nm process.
//
// 45nm: Penryn , Wolfdale, Yorkfield (XE)
case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
// the 45 nm process.
- *Type = INTEL_CORE2; // "penryn"
+ CPU = "penryn";
+ *Type = INTEL_CORE2;
break;
case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
// processors are manufactured using the 45 nm process.
@@ -299,25 +311,29 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
// As found in a Summer 2010 model iMac.
case 0x1f:
case 0x2e: // Nehalem EX
- *Type = INTEL_COREI7; // "nehalem"
+ CPU = "nehalem";
+ *Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_NEHALEM;
break;
case 0x25: // Intel Core i7, laptop version.
case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
// processors are manufactured using the 32 nm process.
case 0x2f: // Westmere EX
- *Type = INTEL_COREI7; // "westmere"
+ CPU = "westmere";
+ *Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_WESTMERE;
break;
case 0x2a: // Intel Core i7 processor. All processors are manufactured
// using the 32 nm process.
case 0x2d:
- *Type = INTEL_COREI7; //"sandybridge"
+ CPU = "sandybridge";
+ *Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_SANDYBRIDGE;
break;
case 0x3a:
case 0x3e: // Ivy Bridge EP
- *Type = INTEL_COREI7; // "ivybridge"
+ CPU = "ivybridge";
+ *Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_IVYBRIDGE;
break;
@@ -326,7 +342,8 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 0x3f:
case 0x45:
case 0x46:
- *Type = INTEL_COREI7; // "haswell"
+ CPU = "haswell";
+ *Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_HASWELL;
break;
@@ -335,7 +352,8 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 0x47:
case 0x4f:
case 0x56:
- *Type = INTEL_COREI7; // "broadwell"
+ CPU = "broadwell";
+ *Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_BROADWELL;
break;
@@ -344,37 +362,49 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 0x5e: // Skylake desktop
case 0x8e: // Kaby Lake mobile
case 0x9e: // Kaby Lake desktop
- *Type = INTEL_COREI7; // "skylake"
+ case 0xa5: // Comet Lake-H/S
+ case 0xa6: // Comet Lake-U
+ CPU = "skylake";
+ *Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_SKYLAKE;
break;
// Skylake Xeon:
case 0x55:
*Type = INTEL_COREI7;
- if (Features2 & (1 << (FEATURE_AVX512VNNI - 32)))
- *Subtype = INTEL_COREI7_CASCADELAKE; // "cascadelake"
- else
- *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
+ if (testFeature(FEATURE_AVX512BF16)) {
+ CPU = "cooperlake";
+ *Subtype = INTEL_COREI7_COOPERLAKE;
+ } else if (testFeature(FEATURE_AVX512VNNI)) {
+ CPU = "cascadelake";
+ *Subtype = INTEL_COREI7_CASCADELAKE;
+ } else {
+ CPU = "skylake-avx512";
+ *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
+ }
break;
// Cannonlake:
case 0x66:
+ CPU = "cannonlake";
*Type = INTEL_COREI7;
- *Subtype = INTEL_COREI7_CANNONLAKE; // "cannonlake"
+ *Subtype = INTEL_COREI7_CANNONLAKE;
break;
// Icelake:
case 0x7d:
case 0x7e:
+ CPU = "icelake-client";
*Type = INTEL_COREI7;
- *Subtype = INTEL_COREI7_ICELAKE_CLIENT; // "icelake-client"
+ *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
break;
// Icelake Xeon:
case 0x6a:
case 0x6c:
+ CPU = "icelake-server";
*Type = INTEL_COREI7;
- *Subtype = INTEL_COREI7_ICELAKE_SERVER; // "icelake-server"
+ *Subtype = INTEL_COREI7_ICELAKE_SERVER;
break;
case 0x1c: // Most 45 nm Intel Atom processors
@@ -382,8 +412,9 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 0x27: // 32 nm Atom Medfield
case 0x35: // 32 nm Atom Midview
case 0x36: // 32 nm Atom Midview
+ CPU = "bonnell";
*Type = INTEL_BONNELL;
- break; // "bonnell"
+ break;
// Atom Silvermont codes from the Intel software optimization guide.
case 0x37:
@@ -392,26 +423,32 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 0x5a:
case 0x5d:
case 0x4c: // really airmont
+ CPU = "silvermont";
*Type = INTEL_SILVERMONT;
- break; // "silvermont"
+ break;
// Goldmont:
case 0x5c: // Apollo Lake
case 0x5f: // Denverton
+ CPU = "goldmont";
*Type = INTEL_GOLDMONT;
break; // "goldmont"
case 0x7a:
+ CPU = "goldmont-plus";
*Type = INTEL_GOLDMONT_PLUS;
break;
case 0x86:
+ CPU = "tremont";
*Type = INTEL_TREMONT;
break;
case 0x57:
- *Type = INTEL_KNL; // knl
+ CPU = "knl";
+ *Type = INTEL_KNL;
break;
case 0x85:
- *Type = INTEL_KNM; // knm
+ CPU = "knm";
+ *Type = INTEL_KNM;
break;
default: // Unknown family 6 CPU.
@@ -421,17 +458,22 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
default:
break; // Unknown.
}
+
+ return CPU;
}
-static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
- unsigned Features, unsigned Features2,
- unsigned *Type, unsigned *Subtype) {
- // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
- // appears to be no way to generate the wide variety of AMD-specific targets
- // from the information returned from CPUID.
+static const char *
+getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+ const unsigned *Features,
+ unsigned *Type, unsigned *Subtype) {
+ // We select CPU strings to match the code in Host.cpp, but we don't use them
+ // in compiler-rt.
+ const char *CPU = 0;
+
switch (Family) {
case 16:
- *Type = AMDFAM10H; // "amdfam10"
+ CPU = "amdfam10";
+ *Type = AMDFAM10H;
switch (Model) {
case 2:
*Subtype = AMDFAM10H_BARCELONA;
@@ -445,60 +487,62 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
}
break;
case 20:
+ CPU = "btver1";
*Type = AMD_BTVER1;
- break; // "btver1";
+ break;
case 21:
+ CPU = "bdver1";
*Type = AMDFAM15H;
if (Model >= 0x60 && Model <= 0x7f) {
+ CPU = "bdver4";
*Subtype = AMDFAM15H_BDVER4;
- break; // "bdver4"; 60h-7Fh: Excavator
+ break; // 60h-7Fh: Excavator
}
if (Model >= 0x30 && Model <= 0x3f) {
+ CPU = "bdver3";
*Subtype = AMDFAM15H_BDVER3;
- break; // "bdver3"; 30h-3Fh: Steamroller
+ break; // 30h-3Fh: Steamroller
}
if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
+ CPU = "bdver2";
*Subtype = AMDFAM15H_BDVER2;
- break; // "bdver2"; 02h, 10h-1Fh: Piledriver
+ break; // 02h, 10h-1Fh: Piledriver
}
if (Model <= 0x0f) {
*Subtype = AMDFAM15H_BDVER1;
- break; // "bdver1"; 00h-0Fh: Bulldozer
+ break; // 00h-0Fh: Bulldozer
}
break;
case 22:
+ CPU = "btver2";
*Type = AMD_BTVER2;
- break; // "btver2"
+ break;
case 23:
+ CPU = "znver1";
*Type = AMDFAM17H;
if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
+ CPU = "znver2";
*Subtype = AMDFAM17H_ZNVER2;
- break; // "znver2"; 30h-3fh, 71h: Zen2
+ break; // 30h-3fh, 71h: Zen2
}
if (Model <= 0x0f) {
*Subtype = AMDFAM17H_ZNVER1;
- break; // "znver1"; 00h-0Fh: Zen1
+ break; // 00h-0Fh: Zen1
}
break;
default:
- break; // "generic"
+ break; // Unknown AMD CPU.
}
+
+ return CPU;
}
static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
- unsigned *FeaturesOut,
- unsigned *Features2Out) {
- unsigned Features = 0;
- unsigned Features2 = 0;
+ unsigned *Features) {
unsigned EAX, EBX;
#define setFeature(F) \
- do { \
- if (F < 32) \
- Features |= 1U << (F & 0x1f); \
- else if (F < 64) \
- Features2 |= 1U << ((F - 32) & 0x1f); \
- } while (0)
+ Features[F / 32] |= 1U << (F % 32)
if ((EDX >> 15) & 1)
setFeature(FEATURE_CMOV);
@@ -590,6 +634,8 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_AVX5124VNNIW);
if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX5124FMAPS);
+ if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512VP2INTERSECT);
bool HasLeaf7Subleaf1 =
MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
@@ -607,9 +653,6 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_XOP);
if (HasExtLeaf1 && ((ECX >> 16) & 1))
setFeature(FEATURE_FMA4);
-
- *FeaturesOut = Features;
- *Features2Out = Features2;
#undef setFeature
}
@@ -641,7 +684,7 @@ struct __processor_model {
#ifndef _WIN32
__attribute__((visibility("hidden")))
#endif
-unsigned int __cpu_features2;
+unsigned int __cpu_features2 = 0;
// A constructor function that is sets __cpu_model and __cpu_features2 with
// the right values. This needs to run only once. This constructor is
@@ -653,40 +696,38 @@ int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
unsigned EAX, EBX, ECX, EDX;
unsigned MaxLeaf = 5;
unsigned Vendor;
- unsigned Model, Family, Brand_id;
- unsigned Features = 0;
- unsigned Features2 = 0;
+ unsigned Model, Family;
+ unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
// This function needs to run just once.
if (__cpu_model.__cpu_vendor)
return 0;
- if (!isCpuIdSupported())
- return -1;
-
- // Assume cpuid insn present. Run in level 0 to get vendor id.
- if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
+ if (!isCpuIdSupported() ||
+ getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
__cpu_model.__cpu_vendor = VENDOR_OTHER;
return -1;
}
+
getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
detectX86FamilyModel(EAX, &Family, &Model);
- Brand_id = EBX & 0xff;
// Find available features.
- getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2);
- __cpu_model.__cpu_features[0] = Features;
- __cpu_features2 = Features2;
+ getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);
+
+ assert((sizeof(Features)/sizeof(Features[0])) == 2);
+ __cpu_model.__cpu_features[0] = Features[0];
+ __cpu_features2 = Features[1];
if (Vendor == SIG_INTEL) {
// Get CPU type.
- getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
- Features2, &(__cpu_model.__cpu_type),
+ getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
+ &(__cpu_model.__cpu_type),
&(__cpu_model.__cpu_subtype));
__cpu_model.__cpu_vendor = VENDOR_INTEL;
} else if (Vendor == SIG_AMD) {
// Get CPU type.
- getAMDProcessorTypeAndSubtype(Family, Model, Features, Features2,
+ getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
&(__cpu_model.__cpu_type),
&(__cpu_model.__cpu_subtype));
__cpu_model.__cpu_vendor = VENDOR_AMD;
diff --git a/compiler-rt/lib/builtins/ctzdi2.c b/compiler-rt/lib/builtins/ctzdi2.c
index 9384aa6055a1..26c908d876ac 100644
--- a/compiler-rt/lib/builtins/ctzdi2.c
+++ b/compiler-rt/lib/builtins/ctzdi2.c
@@ -21,15 +21,15 @@
// ctz instruction, gcc resolves __builtin_ctz to __ctzdi2 rather than
// __ctzsi2, leading to infinite recursion.
#define __builtin_ctz(a) __ctzsi2(a)
-extern si_int __ctzsi2(si_int);
+extern int __ctzsi2(si_int);
#endif
// Precondition: a != 0
-COMPILER_RT_ABI si_int __ctzdi2(di_int a) {
+COMPILER_RT_ABI int __ctzdi2(di_int a) {
dwords x;
x.all = a;
const si_int f = -(x.s.low == 0);
- return __builtin_ctz((x.s.high & f) | (x.s.low & ~f)) +
+ return ctzsi((x.s.high & f) | (x.s.low & ~f)) +
(f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
}
diff --git a/compiler-rt/lib/builtins/ctzsi2.c b/compiler-rt/lib/builtins/ctzsi2.c
index 09c6863b74e3..ed95c6057933 100644
--- a/compiler-rt/lib/builtins/ctzsi2.c
+++ b/compiler-rt/lib/builtins/ctzsi2.c
@@ -16,7 +16,7 @@
// Precondition: a != 0
-COMPILER_RT_ABI si_int __ctzsi2(si_int a) {
+COMPILER_RT_ABI int __ctzsi2(si_int a) {
su_int x = (su_int)a;
si_int t = ((x & 0x0000FFFF) == 0)
<< 4; // if (x has no small bits) t = 16 else 0
diff --git a/compiler-rt/lib/builtins/ctzti2.c b/compiler-rt/lib/builtins/ctzti2.c
index 2a1312c8437d..fb136d0de1c0 100644
--- a/compiler-rt/lib/builtins/ctzti2.c
+++ b/compiler-rt/lib/builtins/ctzti2.c
@@ -18,7 +18,7 @@
// Precondition: a != 0
-COMPILER_RT_ABI si_int __ctzti2(ti_int a) {
+COMPILER_RT_ABI int __ctzti2(ti_int a) {
twords x;
x.all = a;
const di_int f = -(x.s.low == 0);
diff --git a/compiler-rt/lib/builtins/ffsdi2.c b/compiler-rt/lib/builtins/ffsdi2.c
index 9c1a24260956..beae5530430e 100644
--- a/compiler-rt/lib/builtins/ffsdi2.c
+++ b/compiler-rt/lib/builtins/ffsdi2.c
@@ -15,13 +15,13 @@
// Returns: the index of the least significant 1-bit in a, or
// the value zero if a is zero. The least significant bit is index one.
-COMPILER_RT_ABI si_int __ffsdi2(di_int a) {
+COMPILER_RT_ABI int __ffsdi2(di_int a) {
dwords x;
x.all = a;
if (x.s.low == 0) {
if (x.s.high == 0)
return 0;
- return __builtin_ctz(x.s.high) + (1 + sizeof(si_int) * CHAR_BIT);
+ return ctzsi(x.s.high) + (1 + sizeof(si_int) * CHAR_BIT);
}
- return __builtin_ctz(x.s.low) + 1;
+ return ctzsi(x.s.low) + 1;
}
diff --git a/compiler-rt/lib/builtins/ffssi2.c b/compiler-rt/lib/builtins/ffssi2.c
index cba1f72fdc61..ddb52927f8db 100644
--- a/compiler-rt/lib/builtins/ffssi2.c
+++ b/compiler-rt/lib/builtins/ffssi2.c
@@ -15,9 +15,9 @@
// Returns: the index of the least significant 1-bit in a, or
// the value zero if a is zero. The least significant bit is index one.
-COMPILER_RT_ABI si_int __ffssi2(si_int a) {
+COMPILER_RT_ABI int __ffssi2(si_int a) {
if (a == 0) {
return 0;
}
- return __builtin_ctz(a) + 1;
+ return ctzsi(a) + 1;
}
diff --git a/compiler-rt/lib/builtins/ffsti2.c b/compiler-rt/lib/builtins/ffsti2.c
index a2d7ce08ada1..a2177d148a09 100644
--- a/compiler-rt/lib/builtins/ffsti2.c
+++ b/compiler-rt/lib/builtins/ffsti2.c
@@ -17,7 +17,7 @@
// Returns: the index of the least significant 1-bit in a, or
// the value zero if a is zero. The least significant bit is index one.
-COMPILER_RT_ABI si_int __ffsti2(ti_int a) {
+COMPILER_RT_ABI int __ffsti2(ti_int a) {
twords x;
x.all = a;
if (x.s.low == 0) {
diff --git a/compiler-rt/lib/builtins/floatdidf.c b/compiler-rt/lib/builtins/floatdidf.c
index 8f887314b9e1..b2d8f2b44b6d 100644
--- a/compiler-rt/lib/builtins/floatdidf.c
+++ b/compiler-rt/lib/builtins/floatdidf.c
@@ -87,7 +87,7 @@ COMPILER_RT_ABI double __floatdidf(di_int a) {
}
double_bits fb;
fb.u.s.high = ((su_int)s & 0x80000000) | // sign
- ((e + 1023) << 20) | // exponent
+ ((su_int)(e + 1023) << 20) | // exponent
((su_int)(a >> 32) & 0x000FFFFF); // mantissa-high
fb.u.s.low = (su_int)a; // mantissa-low
return fb.f;
diff --git a/compiler-rt/lib/builtins/floatdisf.c b/compiler-rt/lib/builtins/floatdisf.c
index cd9e0a3b78a5..faaa1bcb3c8e 100644
--- a/compiler-rt/lib/builtins/floatdisf.c
+++ b/compiler-rt/lib/builtins/floatdisf.c
@@ -26,7 +26,7 @@ COMPILER_RT_ABI float __floatdisf(di_int a) {
const di_int s = a >> (N - 1);
a = (a ^ s) - s;
int sd = N - __builtin_clzll(a); // number of significant digits
- int e = sd - 1; // exponent
+ si_int e = sd - 1; // exponent
if (sd > FLT_MANT_DIG) {
// start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
// finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
diff --git a/compiler-rt/lib/builtins/floatsidf.c b/compiler-rt/lib/builtins/floatsidf.c
index 2c66167d794d..28cf32f6388b 100644
--- a/compiler-rt/lib/builtins/floatsidf.c
+++ b/compiler-rt/lib/builtins/floatsidf.c
@@ -17,7 +17,7 @@
#include "int_lib.h"
-COMPILER_RT_ABI fp_t __floatsidf(int a) {
+COMPILER_RT_ABI fp_t __floatsidf(si_int a) {
const int aWidth = sizeof a * CHAR_BIT;
@@ -33,14 +33,14 @@ COMPILER_RT_ABI fp_t __floatsidf(int a) {
}
// Exponent of (fp_t)a is the width of abs(a).
- const int exponent = (aWidth - 1) - __builtin_clz(a);
+ const int exponent = (aWidth - 1) - clzsi(a);
rep_t result;
// Shift a into the significand field and clear the implicit bit. Extra
// cast to unsigned int is necessary to get the correct behavior for
// the input INT_MIN.
const int shift = significandBits - exponent;
- result = (rep_t)(unsigned int)a << shift ^ implicitBit;
+ result = (rep_t)(su_int)a << shift ^ implicitBit;
// Insert the exponent
result += (rep_t)(exponent + exponentBias) << significandBits;
@@ -50,7 +50,7 @@ COMPILER_RT_ABI fp_t __floatsidf(int a) {
#if defined(__ARM_EABI__)
#if defined(COMPILER_RT_ARMHF_TARGET)
-AEABI_RTABI fp_t __aeabi_i2d(int a) { return __floatsidf(a); }
+AEABI_RTABI fp_t __aeabi_i2d(si_int a) { return __floatsidf(a); }
#else
COMPILER_RT_ALIAS(__floatsidf, __aeabi_i2d)
#endif
diff --git a/compiler-rt/lib/builtins/floatundidf.c b/compiler-rt/lib/builtins/floatundidf.c
index e7c6aae5ce38..4c445b118080 100644
--- a/compiler-rt/lib/builtins/floatundidf.c
+++ b/compiler-rt/lib/builtins/floatundidf.c
@@ -90,7 +90,7 @@ COMPILER_RT_ABI double __floatundidf(du_int a) {
// a is now rounded to DBL_MANT_DIG bits
}
double_bits fb;
- fb.u.s.high = ((e + 1023) << 20) | // exponent
+ fb.u.s.high = ((su_int)(e + 1023) << 20) | // exponent
((su_int)(a >> 32) & 0x000FFFFF); // mantissa-high
fb.u.s.low = (su_int)a; // mantissa-low
return fb.f;
diff --git a/compiler-rt/lib/builtins/floatundisf.c b/compiler-rt/lib/builtins/floatundisf.c
index 87841b761ded..00d61b0c6310 100644
--- a/compiler-rt/lib/builtins/floatundisf.c
+++ b/compiler-rt/lib/builtins/floatundisf.c
@@ -24,7 +24,7 @@ COMPILER_RT_ABI float __floatundisf(du_int a) {
return 0.0F;
const unsigned N = sizeof(du_int) * CHAR_BIT;
int sd = N - __builtin_clzll(a); // number of significant digits
- int e = sd - 1; // 8 exponent
+ si_int e = sd - 1; // 8 exponent
if (sd > FLT_MANT_DIG) {
// start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
// finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
diff --git a/compiler-rt/lib/builtins/floatunsidf.c b/compiler-rt/lib/builtins/floatunsidf.c
index 2c01c3041434..9b3e5fea0e45 100644
--- a/compiler-rt/lib/builtins/floatunsidf.c
+++ b/compiler-rt/lib/builtins/floatunsidf.c
@@ -17,7 +17,7 @@
#include "int_lib.h"
-COMPILER_RT_ABI fp_t __floatunsidf(unsigned int a) {
+COMPILER_RT_ABI fp_t __floatunsidf(su_int a) {
const int aWidth = sizeof a * CHAR_BIT;
@@ -26,7 +26,7 @@ COMPILER_RT_ABI fp_t __floatunsidf(unsigned int a) {
return fromRep(0);
// Exponent of (fp_t)a is the width of abs(a).
- const int exponent = (aWidth - 1) - __builtin_clz(a);
+ const int exponent = (aWidth - 1) - clzsi(a);
rep_t result;
// Shift a into the significand field and clear the implicit bit.
@@ -40,7 +40,7 @@ COMPILER_RT_ABI fp_t __floatunsidf(unsigned int a) {
#if defined(__ARM_EABI__)
#if defined(COMPILER_RT_ARMHF_TARGET)
-AEABI_RTABI fp_t __aeabi_ui2d(unsigned int a) { return __floatunsidf(a); }
+AEABI_RTABI fp_t __aeabi_ui2d(su_int a) { return __floatunsidf(a); }
#else
COMPILER_RT_ALIAS(__floatunsidf, __aeabi_ui2d)
#endif
diff --git a/compiler-rt/lib/builtins/fp_extend.h b/compiler-rt/lib/builtins/fp_extend.h
index d2083c426722..fb512672e35e 100644
--- a/compiler-rt/lib/builtins/fp_extend.h
+++ b/compiler-rt/lib/builtins/fp_extend.h
@@ -21,7 +21,7 @@ typedef float src_t;
typedef uint32_t src_rep_t;
#define SRC_REP_C UINT32_C
static const int srcSigBits = 23;
-#define src_rep_t_clz __builtin_clz
+#define src_rep_t_clz clzsi
#elif defined SRC_DOUBLE
typedef double src_t;
diff --git a/compiler-rt/lib/builtins/fp_lib.h b/compiler-rt/lib/builtins/fp_lib.h
index e2a906681c46..bd1f180f499e 100644
--- a/compiler-rt/lib/builtins/fp_lib.h
+++ b/compiler-rt/lib/builtins/fp_lib.h
@@ -46,7 +46,7 @@ typedef float fp_t;
#define REP_C UINT32_C
#define significandBits 23
-static __inline int rep_clz(rep_t a) { return __builtin_clz(a); }
+static __inline int rep_clz(rep_t a) { return clzsi(a); }
// 32x32 --> 64 bit multiply
static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
@@ -69,9 +69,9 @@ static __inline int rep_clz(rep_t a) {
return __builtin_clzl(a);
#else
if (a & REP_C(0xffffffff00000000))
- return __builtin_clz(a >> 32);
+ return clzsi(a >> 32);
else
- return 32 + __builtin_clz(a & REP_C(0xffffffff));
+ return 32 + clzsi(a & REP_C(0xffffffff));
#endif
}
diff --git a/compiler-rt/lib/builtins/fp_mode.h b/compiler-rt/lib/builtins/fp_mode.h
index 51bec0431a40..4ba682c384f2 100644
--- a/compiler-rt/lib/builtins/fp_mode.h
+++ b/compiler-rt/lib/builtins/fp_mode.h
@@ -23,7 +23,7 @@ typedef enum {
FE_TOWARDZERO
} FE_ROUND_MODE;
-FE_ROUND_MODE __fe_getround();
-int __fe_raise_inexact();
+FE_ROUND_MODE __fe_getround(void);
+int __fe_raise_inexact(void);
#endif // FP_MODE_H
diff --git a/compiler-rt/lib/builtins/hexagon/dffma.S b/compiler-rt/lib/builtins/hexagon/dffma.S
index c201d3d8be5e..843e88b3cab8 100644
--- a/compiler-rt/lib/builtins/hexagon/dffma.S
+++ b/compiler-rt/lib/builtins/hexagon/dffma.S
@@ -104,13 +104,11 @@
.type __hexagon_fmadf4,@function
.global __hexagon_fmadf5
.type __hexagon_fmadf5,@function
- .global fma
- .type fma,@function
Q6_ALIAS(fmadf5)
.p2align 5
__hexagon_fmadf4:
__hexagon_fmadf5:
-fma:
+.Lfma_begin:
{
P_TMP = dfclass(A,#2)
P_TMP = dfclass(B,#2)
@@ -561,7 +559,7 @@ fma:
B = insert(BTMP,#63,#0)
AH -= asl(TMP,#HI_MANTBITS)
}
- jump fma
+ jump .Lfma_begin
.Lfma_ab_tiny:
ATMP = combine(##0x00100000,#0)
@@ -569,7 +567,7 @@ fma:
A = insert(ATMP,#63,#0)
B = insert(ATMP,#63,#0)
}
- jump fma
+ jump .Lfma_begin
.Lab_inf:
{
diff --git a/compiler-rt/lib/builtins/hexagon/fabs_opt.S b/compiler-rt/lib/builtins/hexagon/fabs_opt.S
deleted file mode 100644
index 6bf9b84b3d20..000000000000
--- a/compiler-rt/lib/builtins/hexagon/fabs_opt.S
+++ /dev/null
@@ -1,36 +0,0 @@
-//===----------------------Hexagon builtin routine ------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-.macro FUNCTION_BEGIN name
-.text
-.p2align 5
-.globl \name
-.type \name, @function
-\name:
-.endm
-
-.macro FUNCTION_END name
-.size \name, . - \name
-.endm
-
-FUNCTION_BEGIN fabs
- {
- r1 = clrbit(r1, #31)
- jumpr r31
- }
-FUNCTION_END fabs
-
-FUNCTION_BEGIN fabsf
- {
- r0 = clrbit(r0, #31)
- jumpr r31
- }
-FUNCTION_END fabsf
-
- .globl fabsl
- .set fabsl, fabs
diff --git a/compiler-rt/lib/builtins/hexagon/fma_opt.S b/compiler-rt/lib/builtins/hexagon/fma_opt.S
deleted file mode 100644
index 7f566adffd6a..000000000000
--- a/compiler-rt/lib/builtins/hexagon/fma_opt.S
+++ /dev/null
@@ -1,30 +0,0 @@
-//===----------------------Hexagon builtin routine ------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-.macro FUNCTION_BEGIN name
-.text
-.p2align 5
-.globl \name
-.type \name, @function
-\name:
-.endm
-
-.macro FUNCTION_END name
-.size \name, . - \name
-.endm
-
-FUNCTION_BEGIN fmaf
- r2 += sfmpy(r0, r1)
- {
- r0 = r2
- jumpr r31
- }
-FUNCTION_END fmaf
-
- .globl fmal
- .set fmal, fma
diff --git a/compiler-rt/lib/builtins/hexagon/fmax_opt.S b/compiler-rt/lib/builtins/hexagon/fmax_opt.S
deleted file mode 100644
index 81d711dff8d2..000000000000
--- a/compiler-rt/lib/builtins/hexagon/fmax_opt.S
+++ /dev/null
@@ -1,29 +0,0 @@
-//===----------------------Hexagon builtin routine ------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-.macro FUNCTION_BEGIN name
-.text
-.p2align 5
-.globl \name
-.type \name, @function
-\name:
-.endm
-
-.macro FUNCTION_END name
-.size \name, . - \name
-.endm
-
-FUNCTION_BEGIN fmaxf
- {
- r0 = sfmax(r0, r1)
- jumpr r31
- }
-FUNCTION_END fmaxf
-
- .globl fmaxl
- .set fmaxl, fmax
diff --git a/compiler-rt/lib/builtins/hexagon/fmin_opt.S b/compiler-rt/lib/builtins/hexagon/fmin_opt.S
deleted file mode 100644
index d043f1d7a698..000000000000
--- a/compiler-rt/lib/builtins/hexagon/fmin_opt.S
+++ /dev/null
@@ -1,29 +0,0 @@
-//===----------------------Hexagon builtin routine ------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-.macro FUNCTION_BEGIN name
-.text
-.p2align 5
-.globl \name
-.type \name, @function
-\name:
-.endm
-
-.macro FUNCTION_END name
-.size \name, . - \name
-.endm
-
-FUNCTION_BEGIN fminf
- {
- r0 = sfmin(r0, r1)
- jumpr r31
- }
-FUNCTION_END fminf
-
- .globl fminl
- .set fminl, fmin
diff --git a/compiler-rt/lib/builtins/i386/floatdidf.S b/compiler-rt/lib/builtins/i386/floatdidf.S
index ab7422c312dc..d588e770364e 100644
--- a/compiler-rt/lib/builtins/i386/floatdidf.S
+++ b/compiler-rt/lib/builtins/i386/floatdidf.S
@@ -4,7 +4,7 @@
#include "../assembly.h"
-// double __floatundidf(du_int a);
+// double __floatdidf(du_int a);
#ifdef __i386__
diff --git a/compiler-rt/lib/builtins/i386/floatdixf.S b/compiler-rt/lib/builtins/i386/floatdixf.S
index df70f5f9e6e3..19dd0835a9c5 100644
--- a/compiler-rt/lib/builtins/i386/floatdixf.S
+++ b/compiler-rt/lib/builtins/i386/floatdixf.S
@@ -4,7 +4,7 @@
#include "../assembly.h"
-// float __floatdixf(di_int a);
+// long double __floatdixf(di_int a);
#ifdef __i386__
diff --git a/compiler-rt/lib/builtins/int_div_impl.inc b/compiler-rt/lib/builtins/int_div_impl.inc
new file mode 100644
index 000000000000..de0373889078
--- /dev/null
+++ b/compiler-rt/lib/builtins/int_div_impl.inc
@@ -0,0 +1,70 @@
+//===-- int_div_impl.inc - Integer division ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Helpers used by __udivsi3, __umodsi3, __udivdi3, and __umodsi3.
+//
+//===----------------------------------------------------------------------===//
+
+#define clz(a) (sizeof(a) == sizeof(unsigned long long) ? __builtin_clzll(a) : clzsi(a))
+
+// Adapted from Figure 3-40 of The PowerPC Compiler Writer's Guide
+static __inline fixuint_t __udivXi3(fixuint_t n, fixuint_t d) {
+ const unsigned N = sizeof(fixuint_t) * CHAR_BIT;
+ // d == 0 cases are unspecified.
+ unsigned sr = (d ? clz(d) : N) - (n ? clz(n) : N);
+ // 0 <= sr <= N - 1 or sr is very large.
+ if (sr > N - 1) // n < d
+ return 0;
+ if (sr == N - 1) // d == 1
+ return n;
+ ++sr;
+ // 1 <= sr <= N - 1. Shifts do not trigger UB.
+ fixuint_t r = n >> sr;
+ n <<= N - sr;
+ fixuint_t carry = 0;
+ for (; sr > 0; --sr) {
+ r = (r << 1) | (n >> (N - 1));
+ n = (n << 1) | carry;
+ // Branch-less version of:
+ // carry = 0;
+ // if (r >= d) r -= d, carry = 1;
+ const fixint_t s = (fixint_t)(d - r - 1) >> (N - 1);
+ carry = s & 1;
+ r -= d & s;
+ }
+ n = (n << 1) | carry;
+ return n;
+}
+
+// Mostly identical to __udivXi3 but the return values are different.
+static __inline fixuint_t __umodXi3(fixuint_t n, fixuint_t d) {
+ const unsigned N = sizeof(fixuint_t) * CHAR_BIT;
+ // d == 0 cases are unspecified.
+ unsigned sr = (d ? clz(d) : N) - (n ? clz(n) : N);
+ // 0 <= sr <= N - 1 or sr is very large.
+ if (sr > N - 1) // n < d
+ return n;
+ if (sr == N - 1) // d == 1
+ return 0;
+ ++sr;
+ // 1 <= sr <= N - 1. Shifts do not trigger UB.
+ fixuint_t r = n >> sr;
+ n <<= N - sr;
+ fixuint_t carry = 0;
+ for (; sr > 0; --sr) {
+ r = (r << 1) | (n >> (N - 1));
+ n = (n << 1) | carry;
+ // Branch-less version of:
+ // carry = 0;
+ // if (r >= d) r -= d, carry = 1;
+ const fixint_t s = (fixint_t)(d - r - 1) >> (N - 1);
+ carry = s & 1;
+ r -= d & s;
+ }
+ return r;
+}
diff --git a/compiler-rt/lib/builtins/int_lib.h b/compiler-rt/lib/builtins/int_lib.h
index 3092f68c084a..991c4a99ea6e 100644
--- a/compiler-rt/lib/builtins/int_lib.h
+++ b/compiler-rt/lib/builtins/int_lib.h
@@ -48,12 +48,20 @@
#define XSTR(a) STR(a)
#define SYMBOL_NAME(name) XSTR(__USER_LABEL_PREFIX__) #name
-#if defined(__ELF__) || defined(__MINGW32__) || defined(__wasm__)
+#if defined(__ELF__) || defined(__MINGW32__) || defined(__wasm__) || \
+ defined(_AIX)
#define COMPILER_RT_ALIAS(name, aliasname) \
COMPILER_RT_ABI __typeof(name) aliasname __attribute__((__alias__(#name)));
#elif defined(__APPLE__)
+#if defined(VISIBILITY_HIDDEN)
+#define COMPILER_RT_ALIAS_VISIBILITY(name) \
+ __asm__(".private_extern " SYMBOL_NAME(name));
+#else
+#define COMPILER_RT_ALIAS_VISIBILITY(name)
+#endif
#define COMPILER_RT_ALIAS(name, aliasname) \
__asm__(".globl " SYMBOL_NAME(aliasname)); \
+ COMPILER_RT_ALIAS_VISIBILITY(aliasname) \
__asm__(SYMBOL_NAME(aliasname) " = " SYMBOL_NAME(name)); \
COMPILER_RT_ABI __typeof(name) aliasname;
#elif defined(_WIN32)
@@ -84,8 +92,8 @@
// Include internal utility function declarations.
#include "int_util.h"
-COMPILER_RT_ABI si_int __paritysi2(si_int a);
-COMPILER_RT_ABI si_int __paritydi2(di_int a);
+COMPILER_RT_ABI int __paritysi2(si_int a);
+COMPILER_RT_ABI int __paritydi2(di_int a);
COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b);
COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b);
@@ -94,7 +102,7 @@ COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d);
COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int *rem);
COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem);
#ifdef CRT_HAS_128BIT
-COMPILER_RT_ABI si_int __clzti2(ti_int a);
+COMPILER_RT_ABI int __clzti2(ti_int a);
COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem);
#endif
@@ -102,14 +110,14 @@ COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem);
#if defined(_MSC_VER) && !defined(__clang__)
#include <intrin.h>
-uint32_t __inline __builtin_ctz(uint32_t value) {
+int __inline __builtin_ctz(uint32_t value) {
unsigned long trailing_zero = 0;
if (_BitScanForward(&trailing_zero, value))
return trailing_zero;
return 32;
}
-uint32_t __inline __builtin_clz(uint32_t value) {
+int __inline __builtin_clz(uint32_t value) {
unsigned long leading_zero = 0;
if (_BitScanReverse(&leading_zero, value))
return 31 - leading_zero;
@@ -117,14 +125,14 @@ uint32_t __inline __builtin_clz(uint32_t value) {
}
#if defined(_M_ARM) || defined(_M_X64)
-uint32_t __inline __builtin_clzll(uint64_t value) {
+int __inline __builtin_clzll(uint64_t value) {
unsigned long leading_zero = 0;
if (_BitScanReverse64(&leading_zero, value))
return 63 - leading_zero;
return 64;
}
#else
-uint32_t __inline __builtin_clzll(uint64_t value) {
+int __inline __builtin_clzll(uint64_t value) {
if (value == 0)
return 64;
uint32_t msh = (uint32_t)(value >> 32);
diff --git a/compiler-rt/lib/builtins/int_types.h b/compiler-rt/lib/builtins/int_types.h
index f89220d54350..705355a4840d 100644
--- a/compiler-rt/lib/builtins/int_types.h
+++ b/compiler-rt/lib/builtins/int_types.h
@@ -22,11 +22,20 @@
#ifdef si_int
#undef si_int
#endif
-typedef int si_int;
-typedef unsigned su_int;
+typedef int32_t si_int;
+typedef uint32_t su_int;
+#if UINT_MAX == 0xFFFFFFFF
+#define clzsi __builtin_clz
+#define ctzsi __builtin_ctz
+#elif ULONG_MAX == 0xFFFFFFFF
+#define clzsi __builtin_clzl
+#define ctzsi __builtin_ctzl
+#else
+#error could not determine appropriate clzsi macro for this system
+#endif
-typedef long long di_int;
-typedef unsigned long long du_int;
+typedef int64_t di_int;
+typedef uint64_t du_int;
typedef union {
di_int all;
@@ -135,9 +144,12 @@ typedef struct {
// Check if the target supports 80 bit extended precision long doubles.
// Notably, on x86 Windows, MSVC only provides a 64-bit long double, but GCC
// still makes it 80 bits. Clang will match whatever compiler it is trying to
-// be compatible with.
-#if ((defined(__i386__) || defined(__x86_64__)) && !defined(_MSC_VER)) || \
- defined(__m68k__) || defined(__ia64__)
+// be compatible with. On 32-bit x86 Android, long double is 64 bits, while on
+// x86_64 Android, long double is 128 bits.
+#if (defined(__i386__) || defined(__x86_64__)) && \
+ !(defined(_MSC_VER) || defined(__ANDROID__))
+#define HAS_80_BIT_LONG_DOUBLE 1
+#elif defined(__m68k__) || defined(__ia64__)
#define HAS_80_BIT_LONG_DOUBLE 1
#else
#define HAS_80_BIT_LONG_DOUBLE 0
diff --git a/compiler-rt/lib/builtins/lshrdi3.c b/compiler-rt/lib/builtins/lshrdi3.c
index 97e08e1e9ba0..6072152583ac 100644
--- a/compiler-rt/lib/builtins/lshrdi3.c
+++ b/compiler-rt/lib/builtins/lshrdi3.c
@@ -16,7 +16,7 @@
// Precondition: 0 <= b < bits_in_dword
-COMPILER_RT_ABI di_int __lshrdi3(di_int a, si_int b) {
+COMPILER_RT_ABI di_int __lshrdi3(di_int a, int b) {
const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
udwords input;
udwords result;
diff --git a/compiler-rt/lib/builtins/paritydi2.c b/compiler-rt/lib/builtins/paritydi2.c
index dd9d45e63ea4..58e85f89e043 100644
--- a/compiler-rt/lib/builtins/paritydi2.c
+++ b/compiler-rt/lib/builtins/paritydi2.c
@@ -14,7 +14,7 @@
// Returns: 1 if number of bits is odd else returns 0
-COMPILER_RT_ABI si_int __paritydi2(di_int a) {
+COMPILER_RT_ABI int __paritydi2(di_int a) {
dwords x;
x.all = a;
return __paritysi2(x.s.high ^ x.s.low);
diff --git a/compiler-rt/lib/builtins/paritysi2.c b/compiler-rt/lib/builtins/paritysi2.c
index 3efa961f2f85..a4b84e080632 100644
--- a/compiler-rt/lib/builtins/paritysi2.c
+++ b/compiler-rt/lib/builtins/paritysi2.c
@@ -14,7 +14,7 @@
// Returns: 1 if number of bits is odd else returns 0
-COMPILER_RT_ABI si_int __paritysi2(si_int a) {
+COMPILER_RT_ABI int __paritysi2(si_int a) {
su_int x = (su_int)a;
x ^= x >> 16;
x ^= x >> 8;
diff --git a/compiler-rt/lib/builtins/parityti2.c b/compiler-rt/lib/builtins/parityti2.c
index f3942ba8378c..79e920d8a02d 100644
--- a/compiler-rt/lib/builtins/parityti2.c
+++ b/compiler-rt/lib/builtins/parityti2.c
@@ -16,7 +16,7 @@
// Returns: 1 if number of bits is odd else returns 0
-COMPILER_RT_ABI si_int __parityti2(ti_int a) {
+COMPILER_RT_ABI int __parityti2(ti_int a) {
twords x;
x.all = a;
return __paritydi2(x.s.high ^ x.s.low);
diff --git a/compiler-rt/lib/builtins/popcountdi2.c b/compiler-rt/lib/builtins/popcountdi2.c
index 9bbc39c6608a..20dd0b0239ef 100644
--- a/compiler-rt/lib/builtins/popcountdi2.c
+++ b/compiler-rt/lib/builtins/popcountdi2.c
@@ -14,7 +14,7 @@
// Returns: count of 1 bits
-COMPILER_RT_ABI si_int __popcountdi2(di_int a) {
+COMPILER_RT_ABI int __popcountdi2(di_int a) {
du_int x2 = (du_int)a;
x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL);
// Every 2 bits holds the sum of every pair of bits (32)
diff --git a/compiler-rt/lib/builtins/popcountsi2.c b/compiler-rt/lib/builtins/popcountsi2.c
index 75e592a778d9..4d346c45d9ce 100644
--- a/compiler-rt/lib/builtins/popcountsi2.c
+++ b/compiler-rt/lib/builtins/popcountsi2.c
@@ -14,7 +14,7 @@
// Returns: count of 1 bits
-COMPILER_RT_ABI si_int __popcountsi2(si_int a) {
+COMPILER_RT_ABI int __popcountsi2(si_int a) {
su_int x = (su_int)a;
x = x - ((x >> 1) & 0x55555555);
// Every 2 bits holds the sum of every pair of bits
diff --git a/compiler-rt/lib/builtins/popcountti2.c b/compiler-rt/lib/builtins/popcountti2.c
index 853fd722309e..79cbb2fb34c0 100644
--- a/compiler-rt/lib/builtins/popcountti2.c
+++ b/compiler-rt/lib/builtins/popcountti2.c
@@ -17,7 +17,7 @@
// Returns: count of 1 bits
-COMPILER_RT_ABI si_int __popcountti2(ti_int a) {
+COMPILER_RT_ABI int __popcountti2(ti_int a) {
tu_int x3 = (tu_int)a;
x3 = x3 - ((x3 >> 1) &
(((tu_int)0x5555555555555555uLL << 64) | 0x5555555555555555uLL));
diff --git a/compiler-rt/lib/builtins/powidf2.c b/compiler-rt/lib/builtins/powidf2.c
index 9697588484e7..81058af50829 100644
--- a/compiler-rt/lib/builtins/powidf2.c
+++ b/compiler-rt/lib/builtins/powidf2.c
@@ -14,7 +14,7 @@
// Returns: a ^ b
-COMPILER_RT_ABI double __powidf2(double a, si_int b) {
+COMPILER_RT_ABI double __powidf2(double a, int b) {
const int recip = b < 0;
double r = 1;
while (1) {
diff --git a/compiler-rt/lib/builtins/powisf2.c b/compiler-rt/lib/builtins/powisf2.c
index 469402348825..d0ab26167bbd 100644
--- a/compiler-rt/lib/builtins/powisf2.c
+++ b/compiler-rt/lib/builtins/powisf2.c
@@ -14,7 +14,7 @@
// Returns: a ^ b
-COMPILER_RT_ABI float __powisf2(float a, si_int b) {
+COMPILER_RT_ABI float __powisf2(float a, int b) {
const int recip = b < 0;
float r = 1;
while (1) {
diff --git a/compiler-rt/lib/builtins/powitf2.c b/compiler-rt/lib/builtins/powitf2.c
index fcbdb4c2ee2a..8e639a03a3c4 100644
--- a/compiler-rt/lib/builtins/powitf2.c
+++ b/compiler-rt/lib/builtins/powitf2.c
@@ -10,13 +10,14 @@
//
//===----------------------------------------------------------------------===//
-#include "int_lib.h"
+#define QUAD_PRECISION
+#include "fp_lib.h"
-#if _ARCH_PPC
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
// Returns: a ^ b
-COMPILER_RT_ABI long double __powitf2(long double a, si_int b) {
+COMPILER_RT_ABI long double __powitf2(long double a, int b) {
const int recip = b < 0;
long double r = 1;
while (1) {
diff --git a/compiler-rt/lib/builtins/powixf2.c b/compiler-rt/lib/builtins/powixf2.c
index b7b52095afa1..3edfe9fd7af5 100644
--- a/compiler-rt/lib/builtins/powixf2.c
+++ b/compiler-rt/lib/builtins/powixf2.c
@@ -16,7 +16,7 @@
// Returns: a ^ b
-COMPILER_RT_ABI long double __powixf2(long double a, si_int b) {
+COMPILER_RT_ABI long double __powixf2(long double a, int b) {
const int recip = b < 0;
long double r = 1;
while (1) {
diff --git a/compiler-rt/lib/builtins/riscv/int_mul_impl.inc b/compiler-rt/lib/builtins/riscv/int_mul_impl.inc
new file mode 100644
index 000000000000..50951d5f4195
--- /dev/null
+++ b/compiler-rt/lib/builtins/riscv/int_mul_impl.inc
@@ -0,0 +1,31 @@
+//===-- int_mul_impl.inc - Integer multiplication -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Helpers used by __mulsi3, __muldi3.
+//
+//===----------------------------------------------------------------------===//
+
+#if !defined(__riscv_mul)
+ .text
+ .align 2
+
+ .globl __mulxi3
+ .type __mulxi3, @function
+__mulxi3:
+ mv a2, a0
+ mv a0, zero
+.L1:
+ andi a3, a1, 1
+ beqz a3, .L2
+ add a0, a0, a2
+.L2:
+ srli a1, a1, 1
+ slli a2, a2, 1
+ bnez a1, .L1
+ ret
+#endif
diff --git a/compiler-rt/lib/builtins/riscv/muldi3.S b/compiler-rt/lib/builtins/riscv/muldi3.S
new file mode 100644
index 000000000000..9e292e8dd8b9
--- /dev/null
+++ b/compiler-rt/lib/builtins/riscv/muldi3.S
@@ -0,0 +1,11 @@
+//===--- muldi3.S - Integer multiplication routines -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#if __riscv_xlen == 64
+#define __mulxi3 __muldi3
+#include "int_mul_impl.inc"
+#endif
diff --git a/compiler-rt/lib/builtins/riscv/mulsi3.S b/compiler-rt/lib/builtins/riscv/mulsi3.S
index 5464919b26b9..cfafb7a0d7b3 100644
--- a/compiler-rt/lib/builtins/riscv/mulsi3.S
+++ b/compiler-rt/lib/builtins/riscv/mulsi3.S
@@ -1,4 +1,4 @@
-//===--- mulsi3.S - Integer multiplication routines routines ---===//
+//===--- mulsi3.S - Integer multiplication routines -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,22 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#if !defined(__riscv_mul) && __riscv_xlen == 32
- .text
- .align 2
-
- .globl __mulsi3
- .type __mulsi3, @function
-__mulsi3:
- mv a2, a0
- mv a0, zero
-.L1:
- andi a3, a1, 1
- beqz a3, .L2
- add a0, a0, a2
-.L2:
- srli a1, a1, 1
- slli a2, a2, 1
- bnez a1, .L1
- ret
+#if __riscv_xlen == 32
+#define __mulxi3 __mulsi3
+#include "int_mul_impl.inc"
#endif
diff --git a/compiler-rt/lib/builtins/udivdi3.c b/compiler-rt/lib/builtins/udivdi3.c
index a23139ec947f..74319cbe71c3 100644
--- a/compiler-rt/lib/builtins/udivdi3.c
+++ b/compiler-rt/lib/builtins/udivdi3.c
@@ -12,8 +12,12 @@
#include "int_lib.h"
+typedef du_int fixuint_t;
+typedef di_int fixint_t;
+#include "int_div_impl.inc"
+
// Returns: a / b
COMPILER_RT_ABI du_int __udivdi3(du_int a, du_int b) {
- return __udivmoddi4(a, b, 0);
+ return __udivXi3(a, b);
}
diff --git a/compiler-rt/lib/builtins/udivmoddi4.c b/compiler-rt/lib/builtins/udivmoddi4.c
index 5b297c32d790..10b41df28f84 100644
--- a/compiler-rt/lib/builtins/udivmoddi4.c
+++ b/compiler-rt/lib/builtins/udivmoddi4.c
@@ -87,7 +87,7 @@ COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem) {
// K K
// ---
// K 0
- sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
+ sr = clzsi(d.s.high) - clzsi(n.s.high);
// 0 <= sr <= n_uword_bits - 2 or sr large
if (sr > n_uword_bits - 2) {
if (rem)
@@ -120,7 +120,7 @@ COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem) {
// K X
// ---
// 0 K
- sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
+ sr = 1 + n_uword_bits + clzsi(d.s.low) - clzsi(n.s.high);
// 2 <= sr <= n_udword_bits - 1
// q.all = n.all << (n_udword_bits - sr);
// r.all = n.all >> sr;
@@ -145,7 +145,7 @@ COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem) {
// K X
// ---
// K K
- sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
+ sr = clzsi(d.s.high) - clzsi(n.s.high);
// 0 <= sr <= n_uword_bits - 1 or sr large
if (sr > n_uword_bits - 1) {
if (rem)
diff --git a/compiler-rt/lib/builtins/udivmodti4.c b/compiler-rt/lib/builtins/udivmodti4.c
index dd14a8b579ca..55def37c9e1f 100644
--- a/compiler-rt/lib/builtins/udivmodti4.c
+++ b/compiler-rt/lib/builtins/udivmodti4.c
@@ -14,182 +14,145 @@
#ifdef CRT_HAS_128BIT
+// Returns the 128 bit division result by 64 bit. Result must fit in 64 bits.
+// Remainder stored in r.
+// Taken and adjusted from libdivide libdivide_128_div_64_to_64 division
+// fallback. For a correctness proof see the reference for this algorithm
+// in Knuth, Volume 2, section 4.3.1, Algorithm D.
+UNUSED
+static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,
+ du_int *r) {
+ const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
+ const du_int b = (1ULL << (n_udword_bits / 2)); // Number base (32 bits)
+ du_int un1, un0; // Norm. dividend LSD's
+ du_int vn1, vn0; // Norm. divisor digits
+ du_int q1, q0; // Quotient digits
+ du_int un64, un21, un10; // Dividend digit pairs
+ du_int rhat; // A remainder
+ si_int s; // Shift amount for normalization
+
+ s = __builtin_clzll(v);
+ if (s > 0) {
+ // Normalize the divisor.
+ v = v << s;
+ un64 = (u1 << s) | (u0 >> (n_udword_bits - s));
+ un10 = u0 << s; // Shift dividend left
+ } else {
+ // Avoid undefined behavior of (u0 >> 64).
+ un64 = u1;
+ un10 = u0;
+ }
+
+ // Break divisor up into two 32-bit digits.
+ vn1 = v >> (n_udword_bits / 2);
+ vn0 = v & 0xFFFFFFFF;
+
+ // Break right half of dividend into two digits.
+ un1 = un10 >> (n_udword_bits / 2);
+ un0 = un10 & 0xFFFFFFFF;
+
+ // Compute the first quotient digit, q1.
+ q1 = un64 / vn1;
+ rhat = un64 - q1 * vn1;
+
+ // q1 has at most error 2. No more than 2 iterations.
+ while (q1 >= b || q1 * vn0 > b * rhat + un1) {
+ q1 = q1 - 1;
+ rhat = rhat + vn1;
+ if (rhat >= b)
+ break;
+ }
+
+ un21 = un64 * b + un1 - q1 * v;
+
+ // Compute the second quotient digit.
+ q0 = un21 / vn1;
+ rhat = un21 - q0 * vn1;
+
+ // q0 has at most error 2. No more than 2 iterations.
+ while (q0 >= b || q0 * vn0 > b * rhat + un0) {
+ q0 = q0 - 1;
+ rhat = rhat + vn1;
+ if (rhat >= b)
+ break;
+ }
+
+ *r = (un21 * b + un0 - q0 * v) >> s;
+ return q1 * b + q0;
+}
+
+static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v,
+ du_int *r) {
+#if defined(__x86_64__)
+ du_int result;
+ __asm__("divq %[v]"
+ : "=a"(result), "=d"(*r)
+ : [ v ] "r"(v), "a"(u0), "d"(u1));
+ return result;
+#else
+ return udiv128by64to64default(u1, u0, v, r);
+#endif
+}
+
// Effects: if rem != 0, *rem = a % b
// Returns: a / b
-// Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide
-
COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
- const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
- utwords n;
- n.all = a;
- utwords d;
- d.all = b;
- utwords q;
- utwords r;
- unsigned sr;
- // special cases, X is unknown, K != 0
- if (n.s.high == 0) {
- if (d.s.high == 0) {
- // 0 X
- // ---
- // 0 X
- if (rem)
- *rem = n.s.low % d.s.low;
- return n.s.low / d.s.low;
- }
- // 0 X
- // ---
- // K X
+ utwords dividend;
+ dividend.all = a;
+ utwords divisor;
+ divisor.all = b;
+ utwords quotient;
+ utwords remainder;
+ if (divisor.all > dividend.all) {
if (rem)
- *rem = n.s.low;
+ *rem = dividend.all;
return 0;
}
- // n.s.high != 0
- if (d.s.low == 0) {
- if (d.s.high == 0) {
- // K X
- // ---
- // 0 0
- if (rem)
- *rem = n.s.high % d.s.low;
- return n.s.high / d.s.low;
- }
- // d.s.high != 0
- if (n.s.low == 0) {
- // K 0
- // ---
- // K 0
- if (rem) {
- r.s.high = n.s.high % d.s.high;
- r.s.low = 0;
- *rem = r.all;
- }
- return n.s.high / d.s.high;
- }
- // K K
- // ---
- // K 0
- if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ {
- if (rem) {
- r.s.low = n.s.low;
- r.s.high = n.s.high & (d.s.high - 1);
- *rem = r.all;
- }
- return n.s.high >> __builtin_ctzll(d.s.high);
- }
- // K K
- // ---
- // K 0
- sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
- // 0 <= sr <= n_udword_bits - 2 or sr large
- if (sr > n_udword_bits - 2) {
- if (rem)
- *rem = n.all;
- return 0;
- }
- ++sr;
- // 1 <= sr <= n_udword_bits - 1
- // q.all = n.all << (n_utword_bits - sr);
- q.s.low = 0;
- q.s.high = n.s.low << (n_udword_bits - sr);
- // r.all = n.all >> sr;
- r.s.high = n.s.high >> sr;
- r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
- } else /* d.s.low != 0 */ {
- if (d.s.high == 0) {
- // K X
- // ---
- // 0 K
- if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ {
- if (rem)
- *rem = n.s.low & (d.s.low - 1);
- if (d.s.low == 1)
- return n.all;
- sr = __builtin_ctzll(d.s.low);
- q.s.high = n.s.high >> sr;
- q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
- return q.all;
- }
- // K X
- // ---
- // 0 K
- sr = 1 + n_udword_bits + __builtin_clzll(d.s.low) -
- __builtin_clzll(n.s.high);
- // 2 <= sr <= n_utword_bits - 1
- // q.all = n.all << (n_utword_bits - sr);
- // r.all = n.all >> sr;
- if (sr == n_udword_bits) {
- q.s.low = 0;
- q.s.high = n.s.low;
- r.s.high = 0;
- r.s.low = n.s.high;
- } else if (sr < n_udword_bits) /* 2 <= sr <= n_udword_bits - 1 */ {
- q.s.low = 0;
- q.s.high = n.s.low << (n_udword_bits - sr);
- r.s.high = n.s.high >> sr;
- r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
- } else /* n_udword_bits + 1 <= sr <= n_utword_bits - 1 */ {
- q.s.low = n.s.low << (n_utword_bits - sr);
- q.s.high = (n.s.high << (n_utword_bits - sr)) |
- (n.s.low >> (sr - n_udword_bits));
- r.s.high = 0;
- r.s.low = n.s.high >> (sr - n_udword_bits);
- }
+ // When the divisor fits in 64 bits, we can use an optimized path.
+ if (divisor.s.high == 0) {
+ remainder.s.high = 0;
+ if (dividend.s.high < divisor.s.low) {
+ // The result fits in 64 bits.
+ quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
+ divisor.s.low, &remainder.s.low);
+ quotient.s.high = 0;
} else {
- // K X
- // ---
- // K K
- sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
- // 0 <= sr <= n_udword_bits - 1 or sr large
- if (sr > n_udword_bits - 1) {
- if (rem)
- *rem = n.all;
- return 0;
- }
- ++sr;
- // 1 <= sr <= n_udword_bits
- // q.all = n.all << (n_utword_bits - sr);
- // r.all = n.all >> sr;
- q.s.low = 0;
- if (sr == n_udword_bits) {
- q.s.high = n.s.low;
- r.s.high = 0;
- r.s.low = n.s.high;
- } else {
- r.s.high = n.s.high >> sr;
- r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
- q.s.high = n.s.low << (n_udword_bits - sr);
- }
+ // First, divide with the high part to get the remainder in dividend.s.high.
+ // After that dividend.s.high < divisor.s.low.
+ quotient.s.high = dividend.s.high / divisor.s.low;
+ dividend.s.high = dividend.s.high % divisor.s.low;
+ quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
+ divisor.s.low, &remainder.s.low);
}
+ if (rem)
+ *rem = remainder.all;
+ return quotient.all;
}
- // Not a special case
- // q and r are initialized with:
- // q.all = n.all << (n_utword_bits - sr);
- // r.all = n.all >> sr;
- // 1 <= sr <= n_utword_bits - 1
- su_int carry = 0;
- for (; sr > 0; --sr) {
- // r:q = ((r:q) << 1) | carry
- r.s.high = (r.s.high << 1) | (r.s.low >> (n_udword_bits - 1));
- r.s.low = (r.s.low << 1) | (q.s.high >> (n_udword_bits - 1));
- q.s.high = (q.s.high << 1) | (q.s.low >> (n_udword_bits - 1));
- q.s.low = (q.s.low << 1) | carry;
- // carry = 0;
- // if (r.all >= d.all)
+ // 0 <= shift <= 63.
+ si_int shift =
+ __builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high);
+ divisor.all <<= shift;
+ quotient.s.high = 0;
+ quotient.s.low = 0;
+ for (; shift >= 0; --shift) {
+ quotient.s.low <<= 1;
+ // Branch free version of.
+ // if (dividend.all >= divisor.all)
// {
- // r.all -= d.all;
- // carry = 1;
+ // dividend.all -= divisor.all;
+ // carry = 1;
// }
- const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1);
- carry = s & 1;
- r.all -= d.all & s;
+ const ti_int s =
+ (ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1);
+ quotient.s.low |= s & 1;
+ dividend.all -= divisor.all & s;
+ divisor.all >>= 1;
}
- q.all = (q.all << 1) | carry;
if (rem)
- *rem = r.all;
- return q.all;
+ *rem = dividend.all;
+ return quotient.all;
}
#endif // CRT_HAS_128BIT
diff --git a/compiler-rt/lib/builtins/udivsi3.c b/compiler-rt/lib/builtins/udivsi3.c
index 18cc96c1b2e0..3894e1597552 100644
--- a/compiler-rt/lib/builtins/udivsi3.c
+++ b/compiler-rt/lib/builtins/udivsi3.c
@@ -12,49 +12,14 @@
#include "int_lib.h"
-// Returns: a / b
+typedef su_int fixuint_t;
+typedef si_int fixint_t;
+#include "int_div_impl.inc"
-// Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide
+// Returns: a / b
-// This function should not call __divsi3!
-COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d) {
- const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
- su_int q;
- su_int r;
- unsigned sr;
- // special cases
- if (d == 0)
- return 0; // ?!
- if (n == 0)
- return 0;
- sr = __builtin_clz(d) - __builtin_clz(n);
- // 0 <= sr <= n_uword_bits - 1 or sr large
- if (sr > n_uword_bits - 1) // d > r
- return 0;
- if (sr == n_uword_bits - 1) // d == 1
- return n;
- ++sr;
- // 1 <= sr <= n_uword_bits - 1
- // Not a special case
- q = n << (n_uword_bits - sr);
- r = n >> sr;
- su_int carry = 0;
- for (; sr > 0; --sr) {
- // r:q = ((r:q) << 1) | carry
- r = (r << 1) | (q >> (n_uword_bits - 1));
- q = (q << 1) | carry;
- // carry = 0;
- // if (r.all >= d.all)
- // {
- // r.all -= d.all;
- // carry = 1;
- // }
- const si_int s = (si_int)(d - r - 1) >> (n_uword_bits - 1);
- carry = s & 1;
- r -= d & s;
- }
- q = (q << 1) | carry;
- return q;
+COMPILER_RT_ABI su_int __udivsi3(su_int a, su_int b) {
+ return __udivXi3(a, b);
}
#if defined(__ARM_EABI__)
diff --git a/compiler-rt/lib/builtins/umoddi3.c b/compiler-rt/lib/builtins/umoddi3.c
index 965cf8fc01bd..e672da96ef62 100644
--- a/compiler-rt/lib/builtins/umoddi3.c
+++ b/compiler-rt/lib/builtins/umoddi3.c
@@ -12,10 +12,12 @@
#include "int_lib.h"
+typedef du_int fixuint_t;
+typedef di_int fixint_t;
+#include "int_div_impl.inc"
+
// Returns: a % b
COMPILER_RT_ABI du_int __umoddi3(du_int a, du_int b) {
- du_int r;
- __udivmoddi4(a, b, &r);
- return r;
+ return __umodXi3(a, b);
}
diff --git a/compiler-rt/lib/builtins/umodsi3.c b/compiler-rt/lib/builtins/umodsi3.c
index ce9abcd94ef7..5383aea656a9 100644
--- a/compiler-rt/lib/builtins/umodsi3.c
+++ b/compiler-rt/lib/builtins/umodsi3.c
@@ -12,8 +12,12 @@
#include "int_lib.h"
+typedef su_int fixuint_t;
+typedef si_int fixint_t;
+#include "int_div_impl.inc"
+
// Returns: a % b
COMPILER_RT_ABI su_int __umodsi3(su_int a, su_int b) {
- return a - __udivsi3(a, b) * b;
+ return __umodXi3(a, b);
}
diff --git a/compiler-rt/lib/builtins/ve/grow_stack.S b/compiler-rt/lib/builtins/ve/grow_stack.S
new file mode 100644
index 000000000000..f403798495af
--- /dev/null
+++ b/compiler-rt/lib/builtins/ve/grow_stack.S
@@ -0,0 +1,31 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "../assembly.h"
+
+// grow_stack routine
+// This routine is VE specific
+// https://www.nec.com/en/global/prod/hpc/aurora/document/VE-ABI_v1.1.pdf
+
+// destroy %s62 and %s63 only
+
+#ifdef __ve__
+
+.text
+.p2align 4
+DEFINE_COMPILERRT_FUNCTION(__ve_grow_stack)
+ subu.l %sp, %sp, %s0 # sp -= alloca size
+ and %sp, -16, %sp # align sp
+ brge.l.t %sp, %sl, 1f
+ ld %s63, 0x18(,%tp) # load param area
+ lea %s62, 0x13b # syscall # of grow
+ shm.l %s62, 0x0(%s63) # stored at addr:0
+ shm.l %sl, 0x8(%s63) # old limit at addr:8
+ shm.l %sp, 0x10(%s63) # new limit at addr:16
+ monc
+1:
+ b.l (,%lr)
+END_COMPILERRT_FUNCTION(__ve_grow_stack)
+
+#endif // __ve__
diff --git a/compiler-rt/lib/builtins/ve/grow_stack_align.S b/compiler-rt/lib/builtins/ve/grow_stack_align.S
new file mode 100644
index 000000000000..19a1dfa8726c
--- /dev/null
+++ b/compiler-rt/lib/builtins/ve/grow_stack_align.S
@@ -0,0 +1,31 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "../assembly.h"
+
+// grow_stack routine
+// This routine is VE specific
+// https://www.nec.com/en/global/prod/hpc/aurora/document/VE-ABI_v1.1.pdf
+
+// destroy %s62 and %s63 only
+
+#ifdef __ve__
+
+.text
+.p2align 4
+DEFINE_COMPILERRT_FUNCTION(__ve_grow_stack_align)
+ subu.l %sp, %sp, %s0 # sp -= alloca size
+ and %sp, %sp, %s1 # align sp
+ brge.l.t %sp, %sl, 1f
+ ld %s63, 0x18(,%tp) # load param area
+ lea %s62, 0x13b # syscall # of grow
+ shm.l %s62, 0x0(%s63) # stored at addr:0
+ shm.l %sl, 0x8(%s63) # old limit at addr:8
+ shm.l %sp, 0x10(%s63) # new limit at addr:16
+ monc
+1:
+ b.l (,%lr)
+END_COMPILERRT_FUNCTION(__ve_grow_stack_align)
+
+#endif // __ve__