diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-01-07 19:55:37 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-01-07 19:55:37 +0000 |
commit | ca9211ecdede9bdedb812b2243a4abdb8dacd1b9 (patch) | |
tree | 9b19e801150082c33e9152275829a6ce90614b55 /lib/builtins/arm | |
parent | 8ef50bf3d1c287b5013c3168de77a462dfce3495 (diff) | |
download | src-ca9211ecdede9bdedb812b2243a4abdb8dacd1b9.tar.gz src-ca9211ecdede9bdedb812b2243a4abdb8dacd1b9.zip |
Notes
Diffstat (limited to 'lib/builtins/arm')
86 files changed, 3189 insertions, 0 deletions
diff --git a/lib/builtins/arm/Makefile.mk b/lib/builtins/arm/Makefile.mk new file mode 100644 index 000000000000..ed2e8323e391 --- /dev/null +++ b/lib/builtins/arm/Makefile.mk @@ -0,0 +1,20 @@ +#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +ModuleName := builtins +SubDirs := +OnlyArchs := armv5 armv6 armv7 armv7k armv7m armv7em armv7s + +AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) +Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) +ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) +Implementation := Optimized + +# FIXME: use automatic dependencies? +Dependencies := $(wildcard lib/*.h $(Dir)/*.h) diff --git a/lib/builtins/arm/adddf3vfp.S b/lib/builtins/arm/adddf3vfp.S new file mode 100644 index 000000000000..2825ae92cd5a --- /dev/null +++ b/lib/builtins/arm/adddf3vfp.S @@ -0,0 +1,26 @@ +//===-- adddf3vfp.S - Implement adddf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// double __adddf3vfp(double a, double b) { return a + b; } +// +// Adds two double precision floating point numbers using the Darwin +// calling convention where double arguments are passsed in GPR pairs +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__adddf3vfp) + vmov d6, r0, r1 // move first param from r0/r1 pair into d6 + vmov d7, r2, r3 // move second param from r2/r3 pair into d7 + vadd.f64 d6, d6, d7 + vmov r0, r1, d6 // move result back to r0/r1 pair + bx lr +END_COMPILERRT_FUNCTION(__adddf3vfp) diff --git a/lib/builtins/arm/addsf3vfp.S b/lib/builtins/arm/addsf3vfp.S new file mode 100644 index 000000000000..bff5a7e0fbe8 --- /dev/null +++ b/lib/builtins/arm/addsf3vfp.S @@ -0,0 +1,26 @@ +//===-- addsf3vfp.S - Implement addsf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __addsf3vfp(float a, float b); +// +// Adds two single precision floating point numbers using the Darwin +// calling convention where single arguments are passsed in GPRs +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__addsf3vfp) + vmov s14, r0 // move first param from r0 into float register + vmov s15, r1 // move second param from r1 into float register + vadd.f32 s14, s14, s15 + vmov r0, s14 // move result back to r0 + bx lr +END_COMPILERRT_FUNCTION(__addsf3vfp) diff --git a/lib/builtins/arm/aeabi_dcmp.S b/lib/builtins/arm/aeabi_dcmp.S new file mode 100644 index 000000000000..310c35b74932 --- /dev/null +++ b/lib/builtins/arm/aeabi_dcmp.S @@ -0,0 +1,40 @@ +//===-- aeabi_dcmp.S - EABI dcmp* implementation ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// int __aeabi_dcmp{eq,lt,le,ge,gt}(double a, double b) { +// int result = __{eq,lt,le,ge,gt}df2(a, b); +// if (result {==,<,<=,>=,>} 0) { +// return 1; +// } else { +// return 0; +// } +// } + +#define DEFINE_AEABI_DCMP(cond) \ + .syntax unified SEPARATOR \ + .p2align 2 SEPARATOR \ +DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) \ + push { r4, lr } SEPARATOR \ + bl SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \ + cmp r0, #0 SEPARATOR \ + b ## cond 1f SEPARATOR \ + mov r0, #0 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +1: SEPARATOR \ + mov r0, #1 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) + +DEFINE_AEABI_DCMP(eq) +DEFINE_AEABI_DCMP(lt) +DEFINE_AEABI_DCMP(le) +DEFINE_AEABI_DCMP(ge) +DEFINE_AEABI_DCMP(gt) diff --git a/lib/builtins/arm/aeabi_div0.c b/lib/builtins/arm/aeabi_div0.c new file mode 100644 index 000000000000..ccc95fa5c12e --- /dev/null +++ b/lib/builtins/arm/aeabi_div0.c @@ -0,0 +1,43 @@ +/* ===-- aeabi_div0.c - ARM Runtime ABI support routines for compiler-rt ---=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements the division by zero helper routines as specified by the + * Run-time ABI for the ARM Architecture. + * + * ===----------------------------------------------------------------------=== + */ + +/* + * RTABI 4.3.2 - Division by zero + * + * The *div0 functions: + * - Return the value passed to them as a parameter + * - Or, return a fixed value defined by the execution environment (such as 0) + * - Or, raise a signal (often SIGFPE) or throw an exception, and do not return + * + * An application may provide its own implementations of the *div0 functions to + * for a particular behaviour from the *div and *divmod functions called out of + * line. + */ + +/* provide an unused declaration to pacify pendantic compilation */ +extern unsigned char declaration; + +#if defined(__ARM_EABI__) +int __attribute__((weak)) __attribute__((visibility("hidden"))) +__aeabi_idiv0(int return_value) { + return return_value; +} + +long long __attribute__((weak)) __attribute__((visibility("hidden"))) +__aeabi_ldiv0(long long return_value) { + return return_value; +} +#endif + diff --git a/lib/builtins/arm/aeabi_fcmp.S b/lib/builtins/arm/aeabi_fcmp.S new file mode 100644 index 000000000000..55f49a2b5af6 --- /dev/null +++ b/lib/builtins/arm/aeabi_fcmp.S @@ -0,0 +1,40 @@ +//===-- aeabi_fcmp.S - EABI fcmp* implementation ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// int __aeabi_fcmp{eq,lt,le,ge,gt}(float a, float b) { +// int result = __{eq,lt,le,ge,gt}sf2(a, b); +// if (result {==,<,<=,>=,>} 0) { +// return 1; +// } else { +// return 0; +// } +// } + +#define DEFINE_AEABI_FCMP(cond) \ + .syntax unified SEPARATOR \ + .p2align 2 SEPARATOR \ +DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) \ + push { r4, lr } SEPARATOR \ + bl SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \ + cmp r0, #0 SEPARATOR \ + b ## cond 1f SEPARATOR \ + mov r0, #0 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +1: SEPARATOR \ + mov r0, #1 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) + +DEFINE_AEABI_FCMP(eq) +DEFINE_AEABI_FCMP(lt) +DEFINE_AEABI_FCMP(le) +DEFINE_AEABI_FCMP(ge) +DEFINE_AEABI_FCMP(gt) diff --git a/lib/builtins/arm/aeabi_idivmod.S b/lib/builtins/arm/aeabi_idivmod.S new file mode 100644 index 000000000000..384add38279e --- /dev/null +++ b/lib/builtins/arm/aeabi_idivmod.S @@ -0,0 +1,28 @@ +//===-- aeabi_idivmod.S - EABI idivmod implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// struct { int quot, int rem} __aeabi_idivmod(int numerator, int denominator) { +// int rem, quot; +// quot = __divmodsi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod) + push { lr } + sub sp, sp, #4 + mov r2, sp + bl SYMBOL_NAME(__divmodsi4) + ldr r1, [sp] + add sp, sp, #4 + pop { pc } +END_COMPILERRT_FUNCTION(__aeabi_idivmod) diff --git a/lib/builtins/arm/aeabi_ldivmod.S b/lib/builtins/arm/aeabi_ldivmod.S new file mode 100644 index 000000000000..ad06f1de2af4 --- /dev/null +++ b/lib/builtins/arm/aeabi_ldivmod.S @@ -0,0 +1,31 @@ +//===-- aeabi_ldivmod.S - EABI ldivmod implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// struct { int64_t quot, int64_t rem} +// __aeabi_ldivmod(int64_t numerator, int64_t denominator) { +// int64_t rem, quot; +// quot = __divmoddi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod) + push {r11, lr} + sub sp, sp, #16 + add r12, sp, #8 + str r12, [sp] + bl SYMBOL_NAME(__divmoddi4) + ldr r2, [sp, #8] + ldr r3, [sp, #12] + add sp, sp, #16 + pop {r11, pc} +END_COMPILERRT_FUNCTION(__aeabi_ldivmod) diff --git a/lib/builtins/arm/aeabi_memcmp.S b/lib/builtins/arm/aeabi_memcmp.S new file mode 100644 index 000000000000..051ce435bab9 --- /dev/null +++ b/lib/builtins/arm/aeabi_memcmp.S @@ -0,0 +1,20 @@ +//===-- aeabi_memcmp.S - EABI memcmp implementation -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); } + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp) + b memcmp +END_COMPILERRT_FUNCTION(__aeabi_memcmp) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp4, __aeabi_memcmp) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp8, __aeabi_memcmp) diff --git a/lib/builtins/arm/aeabi_memcpy.S b/lib/builtins/arm/aeabi_memcpy.S new file mode 100644 index 000000000000..cf02332490a1 --- /dev/null +++ b/lib/builtins/arm/aeabi_memcpy.S @@ -0,0 +1,20 @@ +//===-- aeabi_memcpy.S - EABI memcpy implementation -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); } + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy) + b memcpy +END_COMPILERRT_FUNCTION(__aeabi_memcpy) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy4, __aeabi_memcpy) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy8, __aeabi_memcpy) diff --git a/lib/builtins/arm/aeabi_memmove.S b/lib/builtins/arm/aeabi_memmove.S new file mode 100644 index 000000000000..4dda06f75d04 --- /dev/null +++ b/lib/builtins/arm/aeabi_memmove.S @@ -0,0 +1,20 @@ +//===-- aeabi_memmove.S - EABI memmove implementation --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// + +#include "../assembly.h" + +// void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); } + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove) + b memmove +END_COMPILERRT_FUNCTION(__aeabi_memmove) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove4, __aeabi_memmove) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove8, __aeabi_memmove) diff --git a/lib/builtins/arm/aeabi_memset.S b/lib/builtins/arm/aeabi_memset.S new file mode 100644 index 000000000000..c8b49c7809a6 --- /dev/null +++ b/lib/builtins/arm/aeabi_memset.S @@ -0,0 +1,34 @@ +//===-- aeabi_memset.S - EABI memset implementation -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); } +// void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); } + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memset) + mov r3, r1 + mov r1, r2 + mov r2, r3 + b memset +END_COMPILERRT_FUNCTION(__aeabi_memset) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset4, __aeabi_memset) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset) + +DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr) + mov r2, r1 + mov r1, #0 + b memset +END_COMPILERRT_FUNCTION(__aeabi_memclr) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr4, __aeabi_memclr) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr8, __aeabi_memclr) + diff --git a/lib/builtins/arm/aeabi_uidivmod.S b/lib/builtins/arm/aeabi_uidivmod.S new file mode 100644 index 000000000000..8ea474d91c6b --- /dev/null +++ b/lib/builtins/arm/aeabi_uidivmod.S @@ -0,0 +1,29 @@ +//===-- aeabi_uidivmod.S - EABI uidivmod implementation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// struct { unsigned quot, unsigned rem} +// __aeabi_uidivmod(unsigned numerator, unsigned denominator) { +// unsigned rem, quot; +// quot = __udivmodsi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod) + push { lr } + sub sp, sp, #4 + mov r2, sp + bl SYMBOL_NAME(__udivmodsi4) + ldr r1, [sp] + add sp, sp, #4 + pop { pc } +END_COMPILERRT_FUNCTION(__aeabi_uidivmod) diff --git a/lib/builtins/arm/aeabi_uldivmod.S b/lib/builtins/arm/aeabi_uldivmod.S new file mode 100644 index 000000000000..4e1f8e2a6736 --- /dev/null +++ b/lib/builtins/arm/aeabi_uldivmod.S @@ -0,0 +1,31 @@ +//===-- aeabi_uldivmod.S - EABI uldivmod implementation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// struct { uint64_t quot, uint64_t rem} +// __aeabi_uldivmod(uint64_t numerator, uint64_t denominator) { +// uint64_t rem, quot; +// quot = __udivmoddi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod) + push {r11, lr} + sub sp, sp, #16 + add r12, sp, #8 + str r12, [sp] + bl SYMBOL_NAME(__udivmoddi4) + ldr r2, [sp, #8] + ldr r3, [sp, #12] + add sp, sp, #16 + pop {r11, pc} +END_COMPILERRT_FUNCTION(__aeabi_uldivmod) diff --git a/lib/builtins/arm/bswapdi2.S b/lib/builtins/arm/bswapdi2.S new file mode 100644 index 000000000000..86f3bba8c290 --- /dev/null +++ b/lib/builtins/arm/bswapdi2.S @@ -0,0 +1,47 @@ +//===------- bswapdi2 - Implement bswapdi2 --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + + .syntax unified + .text +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + +// +// extern uint64_t __bswapdi2(uint64_t); +// +// Reverse all the bytes in a 64-bit integer. +// + .p2align 2 +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__bswapdi2) +#else +DEFINE_COMPILERRT_FUNCTION(__bswapdi2) +#endif +#if __ARM_ARCH < 6 + // before armv6 does not have "rev" instruction + // r2 = rev(r0) + eor r2, r0, r0, ror #16 + bic r2, r2, #0xff0000 + mov r2, r2, lsr #8 + eor r2, r2, r0, ror #8 + // r0 = rev(r1) + eor r0, r1, r1, ror #16 + bic r0, r0, #0xff0000 + mov r0, r0, lsr #8 + eor r0, r0, r1, ror #8 +#else + rev r2, r0 // r2 = rev(r0) + rev r0, r1 // r0 = rev(r1) +#endif + mov r1, r2 // r1 = r2 = rev(r0) + JMP(lr) +END_COMPILERRT_FUNCTION(__bswapdi2) diff --git a/lib/builtins/arm/bswapsi2.S b/lib/builtins/arm/bswapsi2.S new file mode 100644 index 000000000000..59ba8158fd57 --- /dev/null +++ b/lib/builtins/arm/bswapsi2.S @@ -0,0 +1,39 @@ +//===------- bswapsi2 - Implement bswapsi2 --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + + .syntax unified + .text +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + +// +// extern uint32_t __bswapsi2(uint32_t); +// +// Reverse all the bytes in a 32-bit integer. +// + .p2align 2 +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__bswapsi2) +#else +DEFINE_COMPILERRT_FUNCTION(__bswapsi2) +#endif +#if __ARM_ARCH < 6 + // before armv6 does not have "rev" instruction + eor r1, r0, r0, ror #16 + bic r1, r1, #0xff0000 + mov r1, r1, lsr #8 + eor r0, r1, r0, ror #8 +#else + rev r0, r0 +#endif + JMP(lr) +END_COMPILERRT_FUNCTION(__bswapsi2) diff --git a/lib/builtins/arm/clzdi2.S b/lib/builtins/arm/clzdi2.S new file mode 100644 index 000000000000..a55abac0469b --- /dev/null +++ b/lib/builtins/arm/clzdi2.S @@ -0,0 +1,97 @@ +/* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements count leading zeros for 64bit arguments. + * + * ===----------------------------------------------------------------------=== + */ +#include "../assembly.h" + + .syntax unified + .text +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + + + .p2align 2 +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__clzdi2) +#else +DEFINE_COMPILERRT_FUNCTION(__clzdi2) +#endif +#ifdef __ARM_FEATURE_CLZ +#ifdef __ARMEB__ + cmp r0, 0 + itee ne + clzne r0, r0 + clzeq r0, r1 + addeq r0, r0, 32 +#else + cmp r1, 0 + itee ne + clzne r0, r1 + clzeq r0, r0 + addeq r0, r0, 32 +#endif + JMP(lr) +#else + /* Assumption: n != 0 */ + + /* + * r0: n + * r1: upper half of n, overwritten after check + * r1: count of leading zeros in n + 1 + * r2: scratch register for shifted r0 + */ +#ifdef __ARMEB__ + cmp r0, 0 + moveq r0, r1 +#else + cmp r1, 0 + movne r0, r1 +#endif + movne r1, 1 + moveq r1, 33 + + /* + * Basic block: + * if ((r0 >> SHIFT) == 0) + * r1 += SHIFT; + * else + * r0 >>= SHIFT; + * for descending powers of two as SHIFT. + */ +#define BLOCK(shift) \ + lsrs r2, r0, shift; \ + movne r0, r2; \ + addeq r1, shift \ + + BLOCK(16) + BLOCK(8) + BLOCK(4) + BLOCK(2) + + /* + * The basic block invariants at this point are (r0 >> 2) == 0 and + * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1. + * + * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1) + * ---+----------------+----------------+------------+-------------- + * 1 | 1 | 0 | 0 | 1 + * 2 | 0 | 1 | -1 | 0 + * 3 | 0 | 1 | -1 | 0 + * + * The r1's initial value of 1 compensates for the 1 here. + */ + sub r0, r1, r0, lsr #1 + + JMP(lr) +#endif // __ARM_FEATURE_CLZ +END_COMPILERRT_FUNCTION(__clzdi2) diff --git a/lib/builtins/arm/clzsi2.S b/lib/builtins/arm/clzsi2.S new file mode 100644 index 000000000000..1cd379bfb0a9 --- /dev/null +++ b/lib/builtins/arm/clzsi2.S @@ -0,0 +1,76 @@ +/* ===-- clzsi2.c - Implement __clzsi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements count leading zeros for 32bit arguments. + * + * ===----------------------------------------------------------------------=== + */ +#include "../assembly.h" + + .syntax unified + .text +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + + .p2align 2 +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__clzsi2) +#else +DEFINE_COMPILERRT_FUNCTION(__clzsi2) +#endif +#ifdef __ARM_FEATURE_CLZ + clz r0, r0 + JMP(lr) +#else + /* Assumption: n != 0 */ + + /* + * r0: n + * r1: count of leading zeros in n + 1 + * r2: scratch register for shifted r0 + */ + mov r1, 1 + + /* + * Basic block: + * if ((r0 >> SHIFT) == 0) + * r1 += SHIFT; + * else + * r0 >>= SHIFT; + * for descending powers of two as SHIFT. + */ + +#define BLOCK(shift) \ + lsrs r2, r0, shift; \ + movne r0, r2; \ + addeq r1, shift \ + + BLOCK(16) + BLOCK(8) + BLOCK(4) + BLOCK(2) + + /* + * The basic block invariants at this point are (r0 >> 2) == 0 and + * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1. + * + * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1) + * ---+----------------+----------------+------------+-------------- + * 1 | 1 | 0 | 0 | 1 + * 2 | 0 | 1 | -1 | 0 + * 3 | 0 | 1 | -1 | 0 + * + * The r1's initial value of 1 compensates for the 1 here. + */ + sub r0, r1, r0, lsr #1 + + JMP(lr) +#endif // __ARM_FEATURE_CLZ +END_COMPILERRT_FUNCTION(__clzsi2) diff --git a/lib/builtins/arm/comparesf2.S b/lib/builtins/arm/comparesf2.S new file mode 100644 index 000000000000..cf71d36e0517 --- /dev/null +++ b/lib/builtins/arm/comparesf2.S @@ -0,0 +1,148 @@ +//===-- comparesf2.S - Implement single-precision soft-float comparisons --===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the following soft-fp_t comparison routines: +// +// __eqsf2 __gesf2 __unordsf2 +// __lesf2 __gtsf2 +// __ltsf2 +// __nesf2 +// +// The semantics of the routines grouped in each column are identical, so there +// is a single implementation for each, with multiple names. +// +// The routines behave as follows: +// +// __lesf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// 1 if either a or b is NaN +// +// __gesf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// -1 if either a or b is NaN +// +// __unordsf2(a,b) returns 0 if both a and b are numbers +// 1 if either a or b is NaN +// +// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of +// NaN values. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" +.syntax unified + +.p2align 2 +DEFINE_COMPILERRT_FUNCTION(__eqsf2) + // Make copies of a and b with the sign bit shifted off the top. These will + // be used to detect zeros and NaNs. + mov r2, r0, lsl #1 + mov r3, r1, lsl #1 + + // We do the comparison in three stages (ignoring NaN values for the time + // being). First, we orr the absolute values of a and b; this sets the Z + // flag if both a and b are zero (of either sign). The shift of r3 doesn't + // effect this at all, but it *does* make sure that the C flag is clear for + // the subsequent operations. + orrs r12, r2, r3, lsr #1 + + // Next, we check if a and b have the same or different signs. If they have + // opposite signs, this eor will set the N flag. + it ne + eorsne r12, r0, r1 + + // If a and b are equal (either both zeros or bit identical; again, we're + // ignoring NaNs for now), this subtract will zero out r0. If they have the + // same sign, the flags are updated as they would be for a comparison of the + // absolute values of a and b. + it pl + subspl r0, r2, r3 + + // If a is smaller in magnitude than b and both have the same sign, place + // the negation of the sign of b in r0. Thus, if both are negative and + // a > b, this sets r0 to 0; if both are positive and a < b, this sets + // r0 to -1. + // + // This is also done if a and b have opposite signs and are not both zero, + // because in that case the subtract was not performed and the C flag is + // still clear from the shift argument in orrs; if a is positive and b + // negative, this places 0 in r0; if a is negative and b positive, -1 is + // placed in r0. + it lo + mvnlo r0, r1, asr #31 + + // If a is greater in magnitude than b and both have the same sign, place + // the sign of b in r0. Thus, if both are negative and a < b, -1 is placed + // in r0, which is the desired result. Conversely, if both are positive + // and a > b, zero is placed in r0. + it hi + movhi r0, r1, asr #31 + + // If you've been keeping track, at this point r0 contains -1 if a < b and + // 0 if a >= b. All that remains to be done is to set it to 1 if a > b. + // If a == b, then the Z flag is set, so we can get the correct final value + // into r0 by simply or'ing with 1 if Z is clear. + it ne + orrne r0, r0, #1 + + // Finally, we need to deal with NaNs. If either argument is NaN, replace + // the value in r0 with 1. + cmp r2, #0xff000000 + ite ls + cmpls r3, #0xff000000 + movhi r0, #1 + JMP(lr) +END_COMPILERRT_FUNCTION(__eqsf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2) + +.p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gtsf2) + // Identical to the preceding except in that we return -1 for NaN values. + // Given that the two paths share so much code, one might be tempted to + // unify them; however, the extra code needed to do so makes the code size + // to performance tradeoff very hard to justify for such small functions. + mov r2, r0, lsl #1 + mov r3, r1, lsl #1 + orrs r12, r2, r3, lsr #1 + it ne + eorsne r12, r0, r1 + it pl + subspl r0, r2, r3 + it lo + mvnlo r0, r1, asr #31 + it hi + movhi r0, r1, asr #31 + it ne + orrne r0, r0, #1 + cmp r2, #0xff000000 + ite ls + cmpls r3, #0xff000000 + movhi r0, #-1 + JMP(lr) +END_COMPILERRT_FUNCTION(__gtsf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2) + +.p2align 2 +DEFINE_COMPILERRT_FUNCTION(__unordsf2) + // Return 1 for NaN values, 0 otherwise. + mov r2, r0, lsl #1 + mov r3, r1, lsl #1 + mov r0, #0 + cmp r2, #0xff000000 + ite ls + cmpls r3, #0xff000000 + movhi r0, #1 + JMP(lr) +END_COMPILERRT_FUNCTION(__unordsf2) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2) diff --git a/lib/builtins/arm/divdf3vfp.S b/lib/builtins/arm/divdf3vfp.S new file mode 100644 index 000000000000..6eebef167a2c --- /dev/null +++ b/lib/builtins/arm/divdf3vfp.S @@ -0,0 +1,26 @@ +//===-- divdf3vfp.S - Implement divdf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __divdf3vfp(double a, double b); +// +// Divides two double precision floating point numbers using the Darwin +// calling convention where double arguments are passsed in GPR pairs +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__divdf3vfp) + vmov d6, r0, r1 // move first param from r0/r1 pair into d6 + vmov d7, r2, r3 // move second param from r2/r3 pair into d7 + vdiv.f64 d5, d6, d7 + vmov r0, r1, d5 // move result back to r0/r1 pair + bx lr +END_COMPILERRT_FUNCTION(__divdf3vfp) diff --git a/lib/builtins/arm/divmodsi4.S b/lib/builtins/arm/divmodsi4.S new file mode 100644 index 000000000000..646b9ab78fb6 --- /dev/null +++ b/lib/builtins/arm/divmodsi4.S @@ -0,0 +1,74 @@ +/*===-- divmodsi4.S - 32-bit signed integer divide and modulus ------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __divmodsi4 (32-bit signed integer divide and + * modulus) function for the ARM architecture. A naive digit-by-digit + * computation is employed for simplicity. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + +#define ESTABLISH_FRAME \ + push {r4-r7, lr} ;\ + add r7, sp, #12 +#define CLEAR_FRAME_AND_RETURN \ + pop {r4-r7, pc} + + .syntax unified + .text +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + +@ int __divmodsi4(int divident, int divisor, int *remainder) +@ Calculate the quotient and remainder of the (signed) division. The return +@ value is the quotient, the remainder is placed in the variable. + + .p2align 3 +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__divmodsi4) +#else +DEFINE_COMPILERRT_FUNCTION(__divmodsi4) +#endif +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divzero) + mov r3, r0 + sdiv r0, r3, r1 + mls r1, r0, r1, r3 + str r1, [r2] + bx lr +LOCAL_LABEL(divzero): + mov r0, #0 + bx lr +#else + ESTABLISH_FRAME +// Set aside the sign of the quotient and modulus, and the address for the +// modulus. + eor r4, r0, r1 + mov r5, r0 + mov r6, r2 +// Take the absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). + eor ip, r0, r0, asr #31 + eor lr, r1, r1, asr #31 + sub r0, ip, r0, asr #31 + sub r1, lr, r1, asr #31 +// Unsigned divmod: + bl SYMBOL_NAME(__udivmodsi4) +// Apply the sign of quotient and modulus + ldr r1, [r6] + eor r0, r0, r4, asr #31 + eor r1, r1, r5, asr #31 + sub r0, r0, r4, asr #31 + sub r1, r1, r5, asr #31 + str r1, [r6] + CLEAR_FRAME_AND_RETURN +#endif +END_COMPILERRT_FUNCTION(__divmodsi4) diff --git a/lib/builtins/arm/divsf3vfp.S b/lib/builtins/arm/divsf3vfp.S new file mode 100644 index 000000000000..fdbaebc88371 --- /dev/null +++ b/lib/builtins/arm/divsf3vfp.S @@ -0,0 +1,26 @@ +//===-- divsf3vfp.S - Implement divsf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __divsf3vfp(float a, float b); +// +// Divides two single precision floating point numbers using the Darwin +// calling convention where single arguments are passsed like 32-bit ints. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__divsf3vfp) + vmov s14, r0 // move first param from r0 into float register + vmov s15, r1 // move second param from r1 into float register + vdiv.f32 s13, s14, s15 + vmov r0, s13 // move result back to r0 + bx lr +END_COMPILERRT_FUNCTION(__divsf3vfp) diff --git a/lib/builtins/arm/divsi3.S b/lib/builtins/arm/divsi3.S new file mode 100644 index 000000000000..adf8f94fc7b8 --- /dev/null +++ b/lib/builtins/arm/divsi3.S @@ -0,0 +1,65 @@ +/*===-- divsi3.S - 32-bit signed integer divide ---------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __divsi3 (32-bit signed integer divide) function + * for the ARM architecture as a wrapper around the unsigned routine. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + +#define ESTABLISH_FRAME \ + push {r4, r7, lr} ;\ + add r7, sp, #4 +#define CLEAR_FRAME_AND_RETURN \ + pop {r4, r7, pc} + + .syntax unified + .text +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + + .p2align 3 +// Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine. +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_idiv, __divsi3) + +@ int __divsi3(int divident, int divisor) +@ Calculate and return the quotient of the (signed) division. + +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__divsi3) +#else +DEFINE_COMPILERRT_FUNCTION(__divsi3) +#endif +#if __ARM_ARCH_EXT_IDIV__ + tst r1,r1 + beq LOCAL_LABEL(divzero) + sdiv r0, r0, r1 + bx lr +LOCAL_LABEL(divzero): + mov r0,#0 + bx lr +#else +ESTABLISH_FRAME +// Set aside the sign of the quotient. + eor r4, r0, r1 +// Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). + eor r2, r0, r0, asr #31 + eor r3, r1, r1, asr #31 + sub r0, r2, r0, asr #31 + sub r1, r3, r1, asr #31 +// abs(a) / abs(b) + bl SYMBOL_NAME(__udivsi3) +// Apply sign of quotient to result and return. + eor r0, r0, r4, asr #31 + sub r0, r0, r4, asr #31 + CLEAR_FRAME_AND_RETURN +#endif +END_COMPILERRT_FUNCTION(__divsi3) diff --git a/lib/builtins/arm/eqdf2vfp.S b/lib/builtins/arm/eqdf2vfp.S new file mode 100644 index 000000000000..7f2fbc3072d4 --- /dev/null +++ b/lib/builtins/arm/eqdf2vfp.S @@ -0,0 +1,29 @@ +//===-- eqdf2vfp.S - Implement eqdf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __eqdf2vfp(double a, double b); +// +// Returns one iff a == b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp) + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 + vmrs apsr_nzcv, fpscr + moveq r0, #1 // set result register to 1 if equal + movne r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__eqdf2vfp) diff --git a/lib/builtins/arm/eqsf2vfp.S b/lib/builtins/arm/eqsf2vfp.S new file mode 100644 index 000000000000..a318b336ae9e --- /dev/null +++ b/lib/builtins/arm/eqsf2vfp.S @@ -0,0 +1,29 @@ +//===-- eqsf2vfp.S - Implement eqsf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __eqsf2vfp(float a, float b); +// +// Returns one iff a == b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp) + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 + vmrs apsr_nzcv, fpscr + moveq r0, #1 // set result register to 1 if equal + movne r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__eqsf2vfp) diff --git a/lib/builtins/arm/extendsfdf2vfp.S b/lib/builtins/arm/extendsfdf2vfp.S new file mode 100644 index 000000000000..b998e589459e --- /dev/null +++ b/lib/builtins/arm/extendsfdf2vfp.S @@ -0,0 +1,26 @@ +//===-- extendsfdf2vfp.S - Implement extendsfdf2vfp -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __extendsfdf2vfp(float a); +// +// Converts single precision float to double precision result. +// Uses Darwin calling convention where a single precision parameter is +// passed in a GPR and a double precision result is returned in R0/R1 pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp) + vmov s15, r0 // load float register from R0 + vcvt.f64.f32 d7, s15 // convert single to double + vmov r0, r1, d7 // return result in r0/r1 pair + bx lr +END_COMPILERRT_FUNCTION(__extendsfdf2vfp) diff --git a/lib/builtins/arm/fixdfsivfp.S b/lib/builtins/arm/fixdfsivfp.S new file mode 100644 index 000000000000..e3bd8e05e01f --- /dev/null +++ b/lib/builtins/arm/fixdfsivfp.S @@ -0,0 +1,26 @@ +//===-- fixdfsivfp.S - Implement fixdfsivfp -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __fixdfsivfp(double a); +// +// Converts double precision float to a 32-bit int rounding towards zero. +// Uses Darwin calling convention where a double precision parameter is +// passed in GPR register pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp) + vmov d7, r0, r1 // load double register from R0/R1 + vcvt.s32.f64 s15, d7 // convert double to 32-bit int into s15 + vmov r0, s15 // move s15 to result register + bx lr +END_COMPILERRT_FUNCTION(__fixdfsivfp) diff --git a/lib/builtins/arm/fixsfsivfp.S b/lib/builtins/arm/fixsfsivfp.S new file mode 100644 index 000000000000..3d0d0f56d235 --- /dev/null +++ b/lib/builtins/arm/fixsfsivfp.S @@ -0,0 +1,26 @@ +//===-- fixsfsivfp.S - Implement fixsfsivfp -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __fixsfsivfp(float a); +// +// Converts single precision float to a 32-bit int rounding towards zero. +// Uses Darwin calling convention where a single precision parameter is +// passed in a GPR.. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp) + vmov s15, r0 // load float register from R0 + vcvt.s32.f32 s15, s15 // convert single to 32-bit int into s15 + vmov r0, s15 // move s15 to result register + bx lr +END_COMPILERRT_FUNCTION(__fixsfsivfp) diff --git a/lib/builtins/arm/fixunsdfsivfp.S b/lib/builtins/arm/fixunsdfsivfp.S new file mode 100644 index 000000000000..35dda5b9b034 --- /dev/null +++ b/lib/builtins/arm/fixunsdfsivfp.S @@ -0,0 +1,27 @@ +//===-- fixunsdfsivfp.S - Implement fixunsdfsivfp -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern unsigned int __fixunsdfsivfp(double a); +// +// Converts double precision float to a 32-bit unsigned int rounding towards +// zero. All negative values become zero. +// Uses Darwin calling convention where a double precision parameter is +// passed in GPR register pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp) + vmov d7, r0, r1 // load double register from R0/R1 + vcvt.u32.f64 s15, d7 // convert double to 32-bit int into s15 + vmov r0, s15 // move s15 to result register + bx lr +END_COMPILERRT_FUNCTION(__fixunsdfsivfp) diff --git a/lib/builtins/arm/fixunssfsivfp.S b/lib/builtins/arm/fixunssfsivfp.S new file mode 100644 index 000000000000..5c3a7d926fcc --- /dev/null +++ b/lib/builtins/arm/fixunssfsivfp.S @@ -0,0 +1,27 @@ +//===-- fixunssfsivfp.S - Implement fixunssfsivfp -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern unsigned int __fixunssfsivfp(float a); +// +// Converts single precision float to a 32-bit unsigned int rounding towards +// zero. All negative values become zero. +// Uses Darwin calling convention where a single precision parameter is +// passed in a GPR.. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp) + vmov s15, r0 // load float register from R0 + vcvt.u32.f32 s15, s15 // convert single to 32-bit unsigned into s15 + vmov r0, s15 // move s15 to result register + bx lr +END_COMPILERRT_FUNCTION(__fixunssfsivfp) diff --git a/lib/builtins/arm/floatsidfvfp.S b/lib/builtins/arm/floatsidfvfp.S new file mode 100644 index 000000000000..d69184914ccd --- /dev/null +++ b/lib/builtins/arm/floatsidfvfp.S @@ -0,0 +1,26 @@ +//===-- floatsidfvfp.S - Implement floatsidfvfp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __floatsidfvfp(int a); +// +// Converts a 32-bit int to a double precision float. +// Uses Darwin calling convention where a double precision result is +// return in GPR register pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp) + vmov s15, r0 // move int to float register s15 + vcvt.f64.s32 d7, s15 // convert 32-bit int in s15 to double in d7 + vmov r0, r1, d7 // move d7 to result register pair r0/r1 + bx lr +END_COMPILERRT_FUNCTION(__floatsidfvfp) diff --git a/lib/builtins/arm/floatsisfvfp.S b/lib/builtins/arm/floatsisfvfp.S new file mode 100644 index 000000000000..4a0cb39d0eb0 --- /dev/null +++ b/lib/builtins/arm/floatsisfvfp.S @@ -0,0 +1,26 @@ +//===-- floatsisfvfp.S - Implement floatsisfvfp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __floatsisfvfp(int a); +// +// Converts single precision float to a 32-bit int rounding towards zero. +// Uses Darwin calling convention where a single precision result is +// return in a GPR.. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp) + vmov s15, r0 // move int to float register s15 + vcvt.f32.s32 s15, s15 // convert 32-bit int in s15 to float in s15 + vmov r0, s15 // move s15 to result register + bx lr +END_COMPILERRT_FUNCTION(__floatsisfvfp) diff --git a/lib/builtins/arm/floatunssidfvfp.S b/lib/builtins/arm/floatunssidfvfp.S new file mode 100644 index 000000000000..d92969ea3453 --- /dev/null +++ b/lib/builtins/arm/floatunssidfvfp.S @@ -0,0 +1,26 @@ +//===-- floatunssidfvfp.S - Implement floatunssidfvfp ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __floatunssidfvfp(unsigned int a); +// +// Converts a 32-bit int to a double precision float. +// Uses Darwin calling convention where a double precision result is +// return in GPR register pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp) + vmov s15, r0 // move int to float register s15 + vcvt.f64.u32 d7, s15 // convert 32-bit int in s15 to double in d7 + vmov r0, r1, d7 // move d7 to result register pair r0/r1 + bx lr +END_COMPILERRT_FUNCTION(__floatunssidfvfp) diff --git a/lib/builtins/arm/floatunssisfvfp.S b/lib/builtins/arm/floatunssisfvfp.S new file mode 100644 index 000000000000..f6aeba56ae15 --- /dev/null +++ b/lib/builtins/arm/floatunssisfvfp.S @@ -0,0 +1,26 @@ +//===-- floatunssisfvfp.S - Implement floatunssisfvfp ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __floatunssisfvfp(unsigned int a); +// +// Converts single precision float to a 32-bit int rounding towards zero. +// Uses Darwin calling convention where a single precision result is +// return in a GPR.. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp) + vmov s15, r0 // move int to float register s15 + vcvt.f32.u32 s15, s15 // convert 32-bit int in s15 to float in s15 + vmov r0, s15 // move s15 to result register + bx lr +END_COMPILERRT_FUNCTION(__floatunssisfvfp) diff --git a/lib/builtins/arm/gedf2vfp.S b/lib/builtins/arm/gedf2vfp.S new file mode 100644 index 000000000000..9e235270175c --- /dev/null +++ b/lib/builtins/arm/gedf2vfp.S @@ -0,0 +1,29 @@ +//===-- gedf2vfp.S - Implement gedf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __gedf2vfp(double a, double b); +// +// Returns one iff a >= b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gedf2vfp) + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 + vmrs apsr_nzcv, fpscr + movge r0, #1 // set result register to 1 if greater than or equal + movlt r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__gedf2vfp) diff --git a/lib/builtins/arm/gesf2vfp.S b/lib/builtins/arm/gesf2vfp.S new file mode 100644 index 000000000000..0ff608477882 --- /dev/null +++ b/lib/builtins/arm/gesf2vfp.S @@ -0,0 +1,29 @@ +//===-- gesf2vfp.S - Implement gesf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __gesf2vfp(float a, float b); +// +// Returns one iff a >= b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gesf2vfp) + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 + vmrs apsr_nzcv, fpscr + movge r0, #1 // set result register to 1 if greater than or equal + movlt r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__gesf2vfp) diff --git a/lib/builtins/arm/gtdf2vfp.S b/lib/builtins/arm/gtdf2vfp.S new file mode 100644 index 000000000000..3dc5d5b59225 --- /dev/null +++ b/lib/builtins/arm/gtdf2vfp.S @@ -0,0 +1,29 @@ +//===-- gtdf2vfp.S - Implement gtdf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __gtdf2vfp(double a, double b); +// +// Returns one iff a > b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp) + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 + vmrs apsr_nzcv, fpscr + movgt r0, #1 // set result register to 1 if equal + movle r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__gtdf2vfp) diff --git a/lib/builtins/arm/gtsf2vfp.S b/lib/builtins/arm/gtsf2vfp.S new file mode 100644 index 000000000000..ddd843acf592 --- /dev/null +++ b/lib/builtins/arm/gtsf2vfp.S @@ -0,0 +1,29 @@ +//===-- gtsf2vfp.S - Implement gtsf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __gtsf2vfp(float a, float b); +// +// Returns one iff a > b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp) + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 + vmrs apsr_nzcv, fpscr + movgt r0, #1 // set result register to 1 if equal + movle r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__gtsf2vfp) diff --git a/lib/builtins/arm/ledf2vfp.S b/lib/builtins/arm/ledf2vfp.S new file mode 100644 index 000000000000..b06ff6db5a33 --- /dev/null +++ b/lib/builtins/arm/ledf2vfp.S @@ -0,0 +1,29 @@ +//===-- ledf2vfp.S - Implement ledf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __ledf2vfp(double a, double b); +// +// Returns one iff a <= b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__ledf2vfp) + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 + vmrs apsr_nzcv, fpscr + movls r0, #1 // set result register to 1 if equal + movhi r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__ledf2vfp) diff --git a/lib/builtins/arm/lesf2vfp.S b/lib/builtins/arm/lesf2vfp.S new file mode 100644 index 000000000000..9b33c0c53697 --- /dev/null +++ b/lib/builtins/arm/lesf2vfp.S @@ -0,0 +1,29 @@ +//===-- lesf2vfp.S - Implement lesf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __lesf2vfp(float a, float b); +// +// Returns one iff a <= b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__lesf2vfp) + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 + vmrs apsr_nzcv, fpscr + movls r0, #1 // set result register to 1 if equal + movhi r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__lesf2vfp) diff --git a/lib/builtins/arm/ltdf2vfp.S b/lib/builtins/arm/ltdf2vfp.S new file mode 100644 index 000000000000..9f794b026a4a --- /dev/null +++ b/lib/builtins/arm/ltdf2vfp.S @@ -0,0 +1,29 @@ +//===-- ltdf2vfp.S - Implement ltdf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __ltdf2vfp(double a, double b); +// +// Returns one iff a < b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp) + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 + vmrs apsr_nzcv, fpscr + movmi r0, #1 // set result register to 1 if equal + movpl r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__ltdf2vfp) diff --git a/lib/builtins/arm/ltsf2vfp.S b/lib/builtins/arm/ltsf2vfp.S new file mode 100644 index 000000000000..ba190d9d8dc2 --- /dev/null +++ b/lib/builtins/arm/ltsf2vfp.S @@ -0,0 +1,29 @@ +//===-- ltsf2vfp.S - Implement ltsf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __ltsf2vfp(float a, float b); +// +// Returns one iff a < b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp) + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 + vmrs apsr_nzcv, fpscr + movmi r0, #1 // set result register to 1 if equal + movpl r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__ltsf2vfp) diff --git a/lib/builtins/arm/modsi3.S b/lib/builtins/arm/modsi3.S new file mode 100644 index 000000000000..295a227d862e --- /dev/null +++ b/lib/builtins/arm/modsi3.S @@ -0,0 +1,63 @@ +/*===-- modsi3.S - 32-bit signed integer modulus --------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __modsi3 (32-bit signed integer modulus) function + * for the ARM architecture as a wrapper around the unsigned routine. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + +#define ESTABLISH_FRAME \ + push {r4, r7, lr} ;\ + add r7, sp, #4 +#define CLEAR_FRAME_AND_RETURN \ + pop {r4, r7, pc} + + .syntax unified + .text +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + +@ int __modsi3(int divident, int divisor) +@ Calculate and return the remainder of the (signed) division. + + .p2align 3 +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__modsi3) +#else +DEFINE_COMPILERRT_FUNCTION(__modsi3) +#endif +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divzero) + sdiv r2, r0, r1 + mls r0, r2, r1, r0 + bx lr +LOCAL_LABEL(divzero): + mov r0, #0 + bx lr +#else + ESTABLISH_FRAME + // Set aside the sign of the dividend. + mov r4, r0 + // Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). + eor r2, r0, r0, asr #31 + eor r3, r1, r1, asr #31 + sub r0, r2, r0, asr #31 + sub r1, r3, r1, asr #31 + // abs(a) % abs(b) + bl SYMBOL_NAME(__umodsi3) + // Apply sign of dividend to result and return. + eor r0, r0, r4, asr #31 + sub r0, r0, r4, asr #31 + CLEAR_FRAME_AND_RETURN +#endif +END_COMPILERRT_FUNCTION(__modsi3) diff --git a/lib/builtins/arm/muldf3vfp.S b/lib/builtins/arm/muldf3vfp.S new file mode 100644 index 000000000000..636cc711ac1a --- /dev/null +++ b/lib/builtins/arm/muldf3vfp.S @@ -0,0 +1,26 @@ +//===-- muldf3vfp.S - Implement muldf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __muldf3vfp(double a, double b); +// +// Multiplies two double precision floating point numbers using the Darwin +// calling convention where double arguments are passsed in GPR pairs +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__muldf3vfp) + vmov d6, r0, r1 // move first param from r0/r1 pair into d6 + vmov d7, r2, r3 // move second param from r2/r3 pair into d7 + vmul.f64 d6, d6, d7 + vmov r0, r1, d6 // move result back to r0/r1 pair + bx lr +END_COMPILERRT_FUNCTION(__muldf3vfp) diff --git a/lib/builtins/arm/mulsf3vfp.S b/lib/builtins/arm/mulsf3vfp.S new file mode 100644 index 000000000000..7f4008266bff --- /dev/null +++ b/lib/builtins/arm/mulsf3vfp.S @@ -0,0 +1,26 @@ +//===-- mulsf3vfp.S - Implement mulsf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __mulsf3vfp(float a, float b); +// +// Multiplies two single precision floating point numbers using the Darwin +// calling convention where single arguments are passsed like 32-bit ints. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp) + vmov s14, r0 // move first param from r0 into float register + vmov s15, r1 // move second param from r1 into float register + vmul.f32 s13, s14, s15 + vmov r0, s13 // move result back to r0 + bx lr +END_COMPILERRT_FUNCTION(__mulsf3vfp) diff --git a/lib/builtins/arm/nedf2vfp.S b/lib/builtins/arm/nedf2vfp.S new file mode 100644 index 000000000000..7ab2f5501ce0 --- /dev/null +++ b/lib/builtins/arm/nedf2vfp.S @@ -0,0 +1,29 @@ +//===-- nedf2vfp.S - Implement nedf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __nedf2vfp(double a, double b); +// +// Returns zero if a and b are unequal and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__nedf2vfp) + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 + vmrs apsr_nzcv, fpscr + movne r0, #1 // set result register to 0 if unequal + moveq r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__nedf2vfp) diff --git a/lib/builtins/arm/negdf2vfp.S b/lib/builtins/arm/negdf2vfp.S new file mode 100644 index 000000000000..56d73c676176 --- /dev/null +++ b/lib/builtins/arm/negdf2vfp.S @@ -0,0 +1,23 @@ +//===-- negdf2vfp.S - Implement negdf2vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __negdf2vfp(double a, double b); +// +// Returns the negation a double precision floating point numbers using the +// Darwin calling convention where double arguments are passsed in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__negdf2vfp) + eor r1, r1, #-2147483648 // flip sign bit on double in r0/r1 pair + bx lr +END_COMPILERRT_FUNCTION(__negdf2vfp) diff --git a/lib/builtins/arm/negsf2vfp.S b/lib/builtins/arm/negsf2vfp.S new file mode 100644 index 000000000000..a6e32e1ff89c --- /dev/null +++ b/lib/builtins/arm/negsf2vfp.S @@ -0,0 +1,23 @@ +//===-- negsf2vfp.S - Implement negsf2vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __negsf2vfp(float a); +// +// Returns the negation of a single precision floating point numbers using the +// Darwin calling convention where single arguments are passsed like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__negsf2vfp) + eor r0, r0, #-2147483648 // flip sign bit on float in r0 + bx lr +END_COMPILERRT_FUNCTION(__negsf2vfp) diff --git a/lib/builtins/arm/nesf2vfp.S b/lib/builtins/arm/nesf2vfp.S new file mode 100644 index 000000000000..9fe8ecdefb37 --- /dev/null +++ b/lib/builtins/arm/nesf2vfp.S @@ -0,0 +1,29 @@ +//===-- nesf2vfp.S - Implement nesf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __nesf2vfp(float a, float b); +// +// Returns one iff a != b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__nesf2vfp) + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 + vmrs apsr_nzcv, fpscr + movne r0, #1 // set result register to 1 if unequal + moveq r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__nesf2vfp) diff --git a/lib/builtins/arm/restore_vfp_d8_d15_regs.S b/lib/builtins/arm/restore_vfp_d8_d15_regs.S new file mode 100644 index 000000000000..0f6ea5136166 --- /dev/null +++ b/lib/builtins/arm/restore_vfp_d8_d15_regs.S @@ -0,0 +1,33 @@ +//===-- save_restore_regs.S - Implement save/restore* ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling C++ functions that need to handle thrown exceptions the +// compiler is required to save all registers and call __Unwind_SjLj_Register +// in the function prolog. But when compiling for thumb1, there are +// no instructions to access the floating point registers, so the +// compiler needs to add a call to the helper function _save_vfp_d8_d15_regs +// written in ARM to save the float registers. In the epilog, the compiler +// must also add a call to __restore_vfp_d8_d15_regs to restore those registers. +// + + .text + .syntax unified + +// +// Restore registers d8-d15 from stack +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__restore_vfp_d8_d15_regs) + vldmia sp!, {d8-d15} // pop registers d8-d15 off stack + bx lr // return to prolog +END_COMPILERRT_FUNCTION(__restore_vfp_d8_d15_regs) + diff --git a/lib/builtins/arm/save_vfp_d8_d15_regs.S b/lib/builtins/arm/save_vfp_d8_d15_regs.S new file mode 100644 index 000000000000..f1d90e75808c --- /dev/null +++ b/lib/builtins/arm/save_vfp_d8_d15_regs.S @@ -0,0 +1,33 @@ +//===-- save_restore_regs.S - Implement save/restore* ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling C++ functions that need to handle thrown exceptions the +// compiler is required to save all registers and call __Unwind_SjLj_Register +// in the function prolog. But when compiling for thumb1, there are +// no instructions to access the floating point registers, so the +// compiler needs to add a call to the helper function _save_vfp_d8_d15_regs +// written in ARM to save the float registers. In the epilog, the compiler +// must also add a call to __restore_vfp_d8_d15_regs to restore those registers. +// + + .text + .syntax unified + +// +// Save registers d8-d15 onto stack +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__save_vfp_d8_d15_regs) + vstmdb sp!, {d8-d15} // push registers d8-d15 onto stack + bx lr // return to prolog +END_COMPILERRT_FUNCTION(__save_vfp_d8_d15_regs) + diff --git a/lib/builtins/arm/softfloat-alias.list b/lib/builtins/arm/softfloat-alias.list new file mode 100644 index 000000000000..cc6a4b3cdd2e --- /dev/null +++ b/lib/builtins/arm/softfloat-alias.list @@ -0,0 +1,21 @@ +# +# These are soft float functions which can be +# aliased to the *vfp functions on arm processors +# that support floating point instructions. +# +___adddf3vfp ___adddf3 +___addsf3vfp ___addsf3 +___divdf3vfp ___divdf3 +___divsf3vfp ___divsf3 +___extendsfdf2vfp ___extendsfdf2 +___fixdfsivfp ___fixdfsi +___fixsfsivfp ___fixsfsi +___floatsidfvfp ___floatsidf +___floatsisfvfp ___floatsisf +___muldf3vfp ___muldf3 +___mulsf3vfp ___mulsf3 +___subdf3vfp ___subdf3 +___subsf3vfp ___subsf3 +___truncdfsf2vfp ___truncdfsf2 +___floatunssidfvfp ___floatunsidf +___floatunssisfvfp ___floatunsisf diff --git a/lib/builtins/arm/subdf3vfp.S b/lib/builtins/arm/subdf3vfp.S new file mode 100644 index 000000000000..5f3c0f70dbc4 --- /dev/null +++ b/lib/builtins/arm/subdf3vfp.S @@ -0,0 +1,26 @@ +//===-- subdf3vfp.S - Implement subdf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __subdf3vfp(double a, double b); +// +// Returns difference between two double precision floating point numbers using +// the Darwin calling convention where double arguments are passsed in GPR pairs +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__subdf3vfp) + vmov d6, r0, r1 // move first param from r0/r1 pair into d6 + vmov d7, r2, r3 // move second param from r2/r3 pair into d7 + vsub.f64 d6, d6, d7 + vmov r0, r1, d6 // move result back to r0/r1 pair + bx lr +END_COMPILERRT_FUNCTION(__subdf3vfp) diff --git a/lib/builtins/arm/subsf3vfp.S b/lib/builtins/arm/subsf3vfp.S new file mode 100644 index 000000000000..d6e06df51920 --- /dev/null +++ b/lib/builtins/arm/subsf3vfp.S @@ -0,0 +1,27 @@ +//===-- subsf3vfp.S - Implement subsf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __subsf3vfp(float a, float b); +// +// Returns the difference between two single precision floating point numbers +// using the Darwin calling convention where single arguments are passsed +// like 32-bit ints. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__subsf3vfp) + vmov s14, r0 // move first param from r0 into float register + vmov s15, r1 // move second param from r1 into float register + vsub.f32 s14, s14, s15 + vmov r0, s14 // move result back to r0 + bx lr +END_COMPILERRT_FUNCTION(__subsf3vfp) diff --git a/lib/builtins/arm/switch16.S b/lib/builtins/arm/switch16.S new file mode 100644 index 000000000000..3c3a6b106124 --- /dev/null +++ b/lib/builtins/arm/switch16.S @@ -0,0 +1,44 @@ +//===-- switch.S - Implement switch* --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling switch statements in thumb mode, the compiler +// can use these __switch* helper functions The compiler emits a blx to +// the __switch* function followed by a table of displacements for each +// case statement. On entry, R0 is the index into the table. The __switch* +// function uses the return address in lr to find the start of the table. +// The first entry in the table is the count of the entries in the table. +// It then uses R0 to index into the table and get the displacement of the +// address to jump to. If R0 is greater than the size of the table, it jumps +// to the last entry in the table. Each displacement in the table is actually +// the distance from lr to the label, thus making the tables PIC. + + + .text + .syntax unified + +// +// The table contains signed 2-byte sized elements which are 1/2 the distance +// from lr to the target label. +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch16) + ldrh ip, [lr, #-1] // get first 16-bit word in table + cmp r0, ip // compare with index + add r0, lr, r0, lsl #1 // compute address of element in table + add ip, lr, ip, lsl #1 // compute address of last element in table + ite lo + ldrshlo r0, [r0, #1] // load 16-bit element if r0 is in range + ldrshhs r0, [ip, #1] // load 16-bit element if r0 out of range + add ip, lr, r0, lsl #1 // compute label = lr + element*2 + bx ip // jump to computed label +END_COMPILERRT_FUNCTION(__switch16) + diff --git a/lib/builtins/arm/switch32.S b/lib/builtins/arm/switch32.S new file mode 100644 index 000000000000..b38cd2b764a4 --- /dev/null +++ b/lib/builtins/arm/switch32.S @@ -0,0 +1,44 @@ +//===-- switch.S - Implement switch* --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling switch statements in thumb mode, the compiler +// can use these __switch* helper functions The compiler emits a blx to +// the __switch* function followed by a table of displacements for each +// case statement. On entry, R0 is the index into the table. The __switch* +// function uses the return address in lr to find the start of the table. +// The first entry in the table is the count of the entries in the table. +// It then uses R0 to index into the table and get the displacement of the +// address to jump to. If R0 is greater than the size of the table, it jumps +// to the last entry in the table. Each displacement in the table is actually +// the distance from lr to the label, thus making the tables PIC. + + + .text + .syntax unified + +// +// The table contains signed 4-byte sized elements which are the distance +// from lr to the target label. +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch32) + ldr ip, [lr, #-1] // get first 32-bit word in table + cmp r0, ip // compare with index + add r0, lr, r0, lsl #2 // compute address of element in table + add ip, lr, ip, lsl #2 // compute address of last element in table + ite lo + ldrlo r0, [r0, #3] // load 32-bit element if r0 is in range + ldrhs r0, [ip, #3] // load 32-bit element if r0 out of range + add ip, lr, r0 // compute label = lr + element + bx ip // jump to computed label +END_COMPILERRT_FUNCTION(__switch32) + diff --git a/lib/builtins/arm/switch8.S b/lib/builtins/arm/switch8.S new file mode 100644 index 000000000000..d7c20423def2 --- /dev/null +++ b/lib/builtins/arm/switch8.S @@ -0,0 +1,42 @@ +//===-- switch.S - Implement switch* --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling switch statements in thumb mode, the compiler +// can use these __switch* helper functions The compiler emits a blx to +// the __switch* function followed by a table of displacements for each +// case statement. On entry, R0 is the index into the table. The __switch* +// function uses the return address in lr to find the start of the table. +// The first entry in the table is the count of the entries in the table. +// It then uses R0 to index into the table and get the displacement of the +// address to jump to. If R0 is greater than the size of the table, it jumps +// to the last entry in the table. Each displacement in the table is actually +// the distance from lr to the label, thus making the tables PIC. + + + .text + .syntax unified + +// +// The table contains signed byte sized elements which are 1/2 the distance +// from lr to the target label. +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch8) + ldrb ip, [lr, #-1] // get first byte in table + cmp r0, ip // signed compare with index + ite lo + ldrsblo r0, [lr, r0] // get indexed byte out of table + ldrsbhs r0, [lr, ip] // if out of range, use last entry in table + add ip, lr, r0, lsl #1 // compute label = lr + element*2 + bx ip // jump to computed label +END_COMPILERRT_FUNCTION(__switch8) + diff --git a/lib/builtins/arm/switchu8.S b/lib/builtins/arm/switchu8.S new file mode 100644 index 000000000000..1844f11c604e --- /dev/null +++ b/lib/builtins/arm/switchu8.S @@ -0,0 +1,42 @@ +//===-- switch.S - Implement switch* --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling switch statements in thumb mode, the compiler +// can use these __switch* helper functions The compiler emits a blx to +// the __switch* function followed by a table of displacements for each +// case statement. On entry, R0 is the index into the table. The __switch* +// function uses the return address in lr to find the start of the table. +// The first entry in the table is the count of the entries in the table. +// It then uses R0 to index into the table and get the displacement of the +// address to jump to. If R0 is greater than the size of the table, it jumps +// to the last entry in the table. Each displacement in the table is actually +// the distance from lr to the label, thus making the tables PIC. + + + .text + .syntax unified + +// +// The table contains unsigned byte sized elements which are 1/2 the distance +// from lr to the target label. +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switchu8) + ldrb ip, [lr, #-1] // get first byte in table + cmp r0, ip // compare with index + ite lo + ldrblo r0, [lr, r0] // get indexed byte out of table + ldrbhs r0, [lr, ip] // if out of range, use last entry in table + add ip, lr, r0, lsl #1 // compute label = lr + element*2 + bx ip // jump to computed label +END_COMPILERRT_FUNCTION(__switchu8) + diff --git a/lib/builtins/arm/sync-ops.h b/lib/builtins/arm/sync-ops.h new file mode 100644 index 000000000000..ee02c30c6eaa --- /dev/null +++ b/lib/builtins/arm/sync-ops.h @@ -0,0 +1,64 @@ +/*===-- sync-ops.h - --===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements outline macros for the __sync_fetch_and_* + * operations. Different instantiations will generate appropriate assembly for + * ARM and Thumb-2 versions of the functions. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + +#define SYNC_OP_4(op) \ + .p2align 2 ; \ + .thumb ; \ + .syntax unified ; \ + DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \ + dmb ; \ + mov r12, r0 ; \ + LOCAL_LABEL(tryatomic_ ## op): \ + ldrex r0, [r12] ; \ + op(r2, r0, r1) ; \ + strex r3, r2, [r12] ; \ + cmp r3, #0 ; \ + bne LOCAL_LABEL(tryatomic_ ## op) ; \ + dmb ; \ + bx lr + +#define SYNC_OP_8(op) \ + .p2align 2 ; \ + .thumb ; \ + .syntax unified ; \ + DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \ + push {r4, r5, r6, lr} ; \ + dmb ; \ + mov r12, r0 ; \ + LOCAL_LABEL(tryatomic_ ## op): \ + ldrexd r0, r1, [r12] ; \ + op(r4, r5, r0, r1, r2, r3) ; \ + strexd r6, r4, r5, [r12] ; \ + cmp r6, #0 ; \ + bne LOCAL_LABEL(tryatomic_ ## op) ; \ + dmb ; \ + pop {r4, r5, r6, pc} + +#define MINMAX_4(rD, rN, rM, cmp_kind) \ + cmp rN, rM ; \ + mov rD, rM ; \ + it cmp_kind ; \ + mov##cmp_kind rD, rN + +#define MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, cmp_kind) \ + cmp rN_LO, rM_LO ; \ + sbcs rN_HI, rM_HI ; \ + mov rD_LO, rM_LO ; \ + mov rD_HI, rM_HI ; \ + itt cmp_kind ; \ + mov##cmp_kind rD_LO, rN_LO ; \ + mov##cmp_kind rD_HI, rN_HI diff --git a/lib/builtins/arm/sync_fetch_and_add_4.S b/lib/builtins/arm/sync_fetch_and_add_4.S new file mode 100644 index 000000000000..54c33e2d26b7 --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_add_4.S @@ -0,0 +1,21 @@ +/*===-- sync_fetch_and_add_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_add_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +/* "adds" is 2 bytes shorter than "add". */ +#define add_4(rD, rN, rM) add rD, rN, rM + +SYNC_OP_4(add_4) + diff --git a/lib/builtins/arm/sync_fetch_and_add_8.S b/lib/builtins/arm/sync_fetch_and_add_8.S new file mode 100644 index 000000000000..5724bb148ba7 --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_add_8.S @@ -0,0 +1,24 @@ +/*===-- sync_fetch_and_add_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_add_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define add_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + adds rD_LO, rN_LO, rM_LO ; \ + adc rD_HI, rN_HI, rM_HI + +SYNC_OP_8(add_8) +#endif + diff --git a/lib/builtins/arm/sync_fetch_and_and_4.S b/lib/builtins/arm/sync_fetch_and_and_4.S new file mode 100644 index 000000000000..e2b77a1a87d4 --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_and_4.S @@ -0,0 +1,19 @@ +/*===-- sync_fetch_and_and_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_and_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define and_4(rD, rN, rM) and rD, rN, rM + +SYNC_OP_4(and_4) diff --git a/lib/builtins/arm/sync_fetch_and_and_8.S b/lib/builtins/arm/sync_fetch_and_and_8.S new file mode 100644 index 000000000000..a74163a8600b --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_and_8.S @@ -0,0 +1,23 @@ +/*===-- sync_fetch_and_and_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_and_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define and_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + and rD_LO, rN_LO, rM_LO ; \ + and rD_HI, rN_HI, rM_HI + +SYNC_OP_8(and_8) +#endif diff --git a/lib/builtins/arm/sync_fetch_and_max_4.S b/lib/builtins/arm/sync_fetch_and_max_4.S new file mode 100644 index 000000000000..01e4f444c2f7 --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_max_4.S @@ -0,0 +1,20 @@ +/*===-- sync_fetch_and_max_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_max_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define max_4(rD, rN, rM) MINMAX_4(rD, rN, rM, gt) + +SYNC_OP_4(max_4) + diff --git a/lib/builtins/arm/sync_fetch_and_max_8.S b/lib/builtins/arm/sync_fetch_and_max_8.S new file mode 100644 index 000000000000..1eef2b223668 --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_max_8.S @@ -0,0 +1,21 @@ +/*===-- sync_fetch_and_max_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_max_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define max_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, gt) + +SYNC_OP_8(max_8) +#endif diff --git a/lib/builtins/arm/sync_fetch_and_min_4.S b/lib/builtins/arm/sync_fetch_and_min_4.S new file mode 100644 index 000000000000..015626b63da5 --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_min_4.S @@ -0,0 +1,20 @@ +/*===-- sync_fetch_and_min_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_min_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define min_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lt) + +SYNC_OP_4(min_4) + diff --git a/lib/builtins/arm/sync_fetch_and_min_8.S b/lib/builtins/arm/sync_fetch_and_min_8.S new file mode 100644 index 000000000000..ad5cce07544c --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_min_8.S @@ -0,0 +1,21 @@ +/*===-- sync_fetch_and_min_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_min_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define min_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lt) + +SYNC_OP_8(min_8) +#endif diff --git a/lib/builtins/arm/sync_fetch_and_nand_4.S b/lib/builtins/arm/sync_fetch_and_nand_4.S new file mode 100644 index 000000000000..b32a314b3974 --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_nand_4.S @@ -0,0 +1,20 @@ +/*===-- sync_fetch_and_nand_4.S - -----------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_nand_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define nand_4(rD, rN, rM) bic rD, rN, rM + +SYNC_OP_4(nand_4) + diff --git a/lib/builtins/arm/sync_fetch_and_nand_8.S b/lib/builtins/arm/sync_fetch_and_nand_8.S new file mode 100644 index 000000000000..a2c17c09c08f --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_nand_8.S @@ -0,0 +1,24 @@ +/*===-- sync_fetch_and_nand_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_nand_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define nand_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + bic rD_LO, rN_LO, rM_LO ; \ + bic rD_HI, rN_HI, rM_HI + +SYNC_OP_8(nand_8) +#endif + diff --git a/lib/builtins/arm/sync_fetch_and_or_4.S b/lib/builtins/arm/sync_fetch_and_or_4.S new file mode 100644 index 000000000000..f2e08576aaab --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_or_4.S @@ -0,0 +1,20 @@ +/*===-- sync_fetch_and_or_4.S - -------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_or_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define or_4(rD, rN, rM) orr rD, rN, rM + +SYNC_OP_4(or_4) + diff --git a/lib/builtins/arm/sync_fetch_and_or_8.S b/lib/builtins/arm/sync_fetch_and_or_8.S new file mode 100644 index 000000000000..87b940bf620d --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_or_8.S @@ -0,0 +1,24 @@ +/*===-- sync_fetch_and_or_8.S - -------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_or_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define or_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + orr rD_LO, rN_LO, rM_LO ; \ + orr rD_HI, rN_HI, rM_HI + +SYNC_OP_8(or_8) +#endif + diff --git a/lib/builtins/arm/sync_fetch_and_sub_4.S b/lib/builtins/arm/sync_fetch_and_sub_4.S new file mode 100644 index 000000000000..460b2bc1ed62 --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_sub_4.S @@ -0,0 +1,21 @@ +/*===-- sync_fetch_and_sub_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_sub_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +/* "subs" is 2 bytes shorter than "sub". */ +#define sub_4(rD, rN, rM) sub rD, rN, rM + +SYNC_OP_4(sub_4) + diff --git a/lib/builtins/arm/sync_fetch_and_sub_8.S b/lib/builtins/arm/sync_fetch_and_sub_8.S new file mode 100644 index 000000000000..a8035a276853 --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_sub_8.S @@ -0,0 +1,24 @@ +/*===-- sync_fetch_and_sub_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_sub_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define sub_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + subs rD_LO, rN_LO, rM_LO ; \ + sbc rD_HI, rN_HI, rM_HI + +SYNC_OP_8(sub_8) +#endif + diff --git a/lib/builtins/arm/sync_fetch_and_umax_4.S b/lib/builtins/arm/sync_fetch_and_umax_4.S new file mode 100644 index 000000000000..c59153031931 --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_umax_4.S @@ -0,0 +1,20 @@ +/*===-- sync_fetch_and_umax_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_umax_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define umax_4(rD, rN, rM) MINMAX_4(rD, rN, rM, hi) + +SYNC_OP_4(umax_4) + diff --git a/lib/builtins/arm/sync_fetch_and_umax_8.S b/lib/builtins/arm/sync_fetch_and_umax_8.S new file mode 100644 index 000000000000..d9b7965e52ba --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_umax_8.S @@ -0,0 +1,21 @@ +/*===-- sync_fetch_and_umax_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_umax_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define umax_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, hi) + +SYNC_OP_8(umax_8) +#endif diff --git a/lib/builtins/arm/sync_fetch_and_umin_4.S b/lib/builtins/arm/sync_fetch_and_umin_4.S new file mode 100644 index 000000000000..9f3896fca80e --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_umin_4.S @@ -0,0 +1,20 @@ +/*===-- sync_fetch_and_umin_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_umin_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define umin_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lo) + +SYNC_OP_4(umin_4) + diff --git a/lib/builtins/arm/sync_fetch_and_umin_8.S b/lib/builtins/arm/sync_fetch_and_umin_8.S new file mode 100644 index 000000000000..7bf5e235653c --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_umin_8.S @@ -0,0 +1,21 @@ +/*===-- sync_fetch_and_umin_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_umin_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define umin_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lo) + +SYNC_OP_8(umin_8) +#endif diff --git a/lib/builtins/arm/sync_fetch_and_xor_4.S b/lib/builtins/arm/sync_fetch_and_xor_4.S new file mode 100644 index 000000000000..7e7c90c96277 --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_xor_4.S @@ -0,0 +1,20 @@ +/*===-- sync_fetch_and_xor_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_xor_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define xor_4(rD, rN, rM) eor rD, rN, rM + +SYNC_OP_4(xor_4) + diff --git a/lib/builtins/arm/sync_fetch_and_xor_8.S b/lib/builtins/arm/sync_fetch_and_xor_8.S new file mode 100644 index 000000000000..ea9aa6d4b0e2 --- /dev/null +++ b/lib/builtins/arm/sync_fetch_and_xor_8.S @@ -0,0 +1,24 @@ +/*===-- sync_fetch_and_xor_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_xor_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define xor_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + eor rD_LO, rN_LO, rM_LO ; \ + eor rD_HI, rN_HI, rM_HI + +SYNC_OP_8(xor_8) +#endif + diff --git a/lib/builtins/arm/sync_synchronize.S b/lib/builtins/arm/sync_synchronize.S new file mode 100644 index 000000000000..178f24534c71 --- /dev/null +++ b/lib/builtins/arm/sync_synchronize.S @@ -0,0 +1,35 @@ +//===-- sync_synchronize - Implement memory barrier * ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling a use of the gcc built-in __sync_synchronize() in thumb1 mode +// the compiler may emit a call to __sync_synchronize. +// On Darwin the implementation jumps to an OS supplied function named +// OSMemoryBarrier +// + + .text + .syntax unified + +#if __APPLE__ + + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__sync_synchronize) + stmfd sp!, {r7, lr} + add r7, sp, #0 + bl _OSMemoryBarrier + ldmfd sp!, {r7, pc} +END_COMPILERRT_FUNCTION(__sync_synchronize) + + // tell linker it can break up file at label boundaries + .subsections_via_symbols + +#endif diff --git a/lib/builtins/arm/truncdfsf2vfp.S b/lib/builtins/arm/truncdfsf2vfp.S new file mode 100644 index 000000000000..fa4362c45e77 --- /dev/null +++ b/lib/builtins/arm/truncdfsf2vfp.S @@ -0,0 +1,26 @@ +//===-- truncdfsf2vfp.S - Implement truncdfsf2vfp -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __truncdfsf2vfp(double a); +// +// Converts double precision float to signle precision result. +// Uses Darwin calling convention where a double precision parameter is +// passed in a R0/R1 pair and a signle precision result is returned in R0. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp) + vmov d7, r0, r1 // load double from r0/r1 pair + vcvt.f32.f64 s15, d7 // convert double to single (trucate precision) + vmov r0, s15 // return result in r0 + bx lr +END_COMPILERRT_FUNCTION(__truncdfsf2vfp) diff --git a/lib/builtins/arm/udivmodsi4.S b/lib/builtins/arm/udivmodsi4.S new file mode 100644 index 000000000000..85b84936c4b3 --- /dev/null +++ b/lib/builtins/arm/udivmodsi4.S @@ -0,0 +1,184 @@ +/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __udivmodsi4 (32-bit unsigned integer divide and + * modulus) function for the ARM 32-bit architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + + .syntax unified + .text + +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + +@ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor, +@ unsigned int *remainder) +@ Calculate the quotient and remainder of the (unsigned) division. The return +@ value is the quotient, the remainder is placed in the variable. + + .p2align 2 +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__udivmodsi4) +#else +DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) +#endif +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divby0) + mov r3, r0 + udiv r0, r3, r1 + mls r1, r0, r1, r3 + str r1, [r2] + bx lr +#else + cmp r1, #1 + bcc LOCAL_LABEL(divby0) + beq LOCAL_LABEL(divby1) + cmp r0, r1 + bcc LOCAL_LABEL(quotient0) + /* + * Implement division using binary long division algorithm. + * + * r0 is the numerator, r1 the denominator. + * + * The code before JMP computes the correct shift I, so that + * r0 and (r1 << I) have the highest bit set in the same position. + * At the time of JMP, ip := .Ldiv0block - 12 * I. + * This depends on the fixed instruction size of block. + * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. + * + * block(shift) implements the test-and-update-quotient core. + * It assumes (r0 << shift) can be computed without overflow and + * that (r0 << shift) < 2 * r1. The quotient is stored in r3. + */ + +# ifdef __ARM_FEATURE_CLZ + clz ip, r0 + clz r3, r1 + /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ + sub r3, r3, ip +# if __ARM_ARCH_ISA_THUMB == 2 + adr ip, LOCAL_LABEL(div0block) + 1 + sub ip, ip, r3, lsl #1 +# else + adr ip, LOCAL_LABEL(div0block) +# endif + sub ip, ip, r3, lsl #2 + sub ip, ip, r3, lsl #3 + mov r3, #0 + bx ip +# else +# if __ARM_ARCH_ISA_THUMB == 2 +# error THUMB mode requires CLZ or UDIV +# endif + str r4, [sp, #-8]! + + mov r4, r0 + adr ip, LOCAL_LABEL(div0block) + + lsr r3, r4, #16 + cmp r3, r1 + movhs r4, r3 + subhs ip, ip, #(16 * 12) + + lsr r3, r4, #8 + cmp r3, r1 + movhs r4, r3 + subhs ip, ip, #(8 * 12) + + lsr r3, r4, #4 + cmp r3, r1 + movhs r4, r3 + subhs ip, #(4 * 12) + + lsr r3, r4, #2 + cmp r3, r1 + movhs r4, r3 + subhs ip, ip, #(2 * 12) + + /* Last block, no need to update r3 or r4. */ + cmp r1, r4, lsr #1 + subls ip, ip, #(1 * 12) + + ldr r4, [sp], #8 /* restore r4, we are done with it. */ + mov r3, #0 + + JMP(ip) +# endif + +#define IMM # + +#define block(shift) \ + cmp r0, r1, lsl IMM shift; \ + ITT(hs); \ + WIDE(addhs) r3, r3, IMM (1 << shift); \ + WIDE(subhs) r0, r0, r1, lsl IMM shift + + block(31) + block(30) + block(29) + block(28) + block(27) + block(26) + block(25) + block(24) + block(23) + block(22) + block(21) + block(20) + block(19) + block(18) + block(17) + block(16) + block(15) + block(14) + block(13) + block(12) + block(11) + block(10) + block(9) + block(8) + block(7) + block(6) + block(5) + block(4) + block(3) + block(2) + block(1) +LOCAL_LABEL(div0block): + block(0) + + str r0, [r2] + mov r0, r3 + JMP(lr) + +LOCAL_LABEL(quotient0): + str r0, [r2] + mov r0, #0 + JMP(lr) + +LOCAL_LABEL(divby1): + mov r3, #0 + str r3, [r2] + JMP(lr) +#endif /* __ARM_ARCH_EXT_IDIV__ */ + +LOCAL_LABEL(divby0): + mov r0, #0 +#ifdef __ARM_EABI__ + b __aeabi_idiv0 +#else + JMP(lr) +#endif + +END_COMPILERRT_FUNCTION(__udivmodsi4) diff --git a/lib/builtins/arm/udivsi3.S b/lib/builtins/arm/udivsi3.S new file mode 100644 index 000000000000..165b2b58acb4 --- /dev/null +++ b/lib/builtins/arm/udivsi3.S @@ -0,0 +1,170 @@ +/*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __udivsi3 (32-bit unsigned integer divide) + * function for the ARM 32-bit architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + + .syntax unified + .text + +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + + .p2align 2 +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3) + +@ unsigned int __udivsi3(unsigned int divident, unsigned int divisor) +@ Calculate and return the quotient of the (unsigned) division. + +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__udivsi3) +#else +DEFINE_COMPILERRT_FUNCTION(__udivsi3) +#endif +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divby0) + udiv r0, r0, r1 + bx lr +#else + cmp r1, #1 + bcc LOCAL_LABEL(divby0) + IT(eq) + JMPc(lr, eq) + cmp r0, r1 + ITT(cc) + movcc r0, #0 + JMPc(lr, cc) + /* + * Implement division using binary long division algorithm. + * + * r0 is the numerator, r1 the denominator. + * + * The code before JMP computes the correct shift I, so that + * r0 and (r1 << I) have the highest bit set in the same position. + * At the time of JMP, ip := .Ldiv0block - 12 * I. + * This depends on the fixed instruction size of block. + * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. + * + * block(shift) implements the test-and-update-quotient core. + * It assumes (r0 << shift) can be computed without overflow and + * that (r0 << shift) < 2 * r1. The quotient is stored in r3. + */ + +# ifdef __ARM_FEATURE_CLZ + clz ip, r0 + clz r3, r1 + /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ + sub r3, r3, ip +# if __ARM_ARCH_ISA_THUMB == 2 + adr ip, LOCAL_LABEL(div0block) + 1 + sub ip, ip, r3, lsl #1 +# else + adr ip, LOCAL_LABEL(div0block) +# endif + sub ip, ip, r3, lsl #2 + sub ip, ip, r3, lsl #3 + mov r3, #0 + bx ip +# else +# if __ARM_ARCH_ISA_THUMB == 2 +# error THUMB mode requires CLZ or UDIV +# endif + mov r2, r0 + adr ip, LOCAL_LABEL(div0block) + + lsr r3, r2, #16 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(16 * 12) + + lsr r3, r2, #8 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(8 * 12) + + lsr r3, r2, #4 + cmp r3, r1 + movhs r2, r3 + subhs ip, #(4 * 12) + + lsr r3, r2, #2 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(2 * 12) + + /* Last block, no need to update r2 or r3. */ + cmp r1, r2, lsr #1 + subls ip, ip, #(1 * 12) + + mov r3, #0 + + JMP(ip) +# endif + +#define IMM # + +#define block(shift) \ + cmp r0, r1, lsl IMM shift; \ + ITT(hs); \ + WIDE(addhs) r3, r3, IMM (1 << shift); \ + WIDE(subhs) r0, r0, r1, lsl IMM shift + + block(31) + block(30) + block(29) + block(28) + block(27) + block(26) + block(25) + block(24) + block(23) + block(22) + block(21) + block(20) + block(19) + block(18) + block(17) + block(16) + block(15) + block(14) + block(13) + block(12) + block(11) + block(10) + block(9) + block(8) + block(7) + block(6) + block(5) + block(4) + block(3) + block(2) + block(1) +LOCAL_LABEL(div0block): + block(0) + + mov r0, r3 + JMP(lr) +#endif /* __ARM_ARCH_EXT_IDIV__ */ + +LOCAL_LABEL(divby0): + mov r0, #0 +#ifdef __ARM_EABI__ + b __aeabi_idiv0 +#else + JMP(lr) +#endif + +END_COMPILERRT_FUNCTION(__udivsi3) diff --git a/lib/builtins/arm/umodsi3.S b/lib/builtins/arm/umodsi3.S new file mode 100644 index 000000000000..9e7a148ce46f --- /dev/null +++ b/lib/builtins/arm/umodsi3.S @@ -0,0 +1,161 @@ +/*===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __umodsi3 (32-bit unsigned integer modulus) + * function for the ARM 32-bit architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + + .syntax unified + .text +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + +@ unsigned int __umodsi3(unsigned int divident, unsigned int divisor) +@ Calculate and return the remainder of the (unsigned) division. + + .p2align 2 +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__umodsi3) +#else +DEFINE_COMPILERRT_FUNCTION(__umodsi3) +#endif +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divby0) + udiv r2, r0, r1 + mls r0, r2, r1, r0 + bx lr +#else + cmp r1, #1 + bcc LOCAL_LABEL(divby0) + ITT(eq) + moveq r0, #0 + JMPc(lr, eq) + cmp r0, r1 + IT(cc) + JMPc(lr, cc) + /* + * Implement division using binary long division algorithm. + * + * r0 is the numerator, r1 the denominator. + * + * The code before JMP computes the correct shift I, so that + * r0 and (r1 << I) have the highest bit set in the same position. + * At the time of JMP, ip := .Ldiv0block - 8 * I. + * This depends on the fixed instruction size of block. + * For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes. + * + * block(shift) implements the test-and-update-quotient core. + * It assumes (r0 << shift) can be computed without overflow and + * that (r0 << shift) < 2 * r1. The quotient is stored in r3. + */ + +# ifdef __ARM_FEATURE_CLZ + clz ip, r0 + clz r3, r1 + /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ + sub r3, r3, ip +# if __ARM_ARCH_ISA_THUMB == 2 + adr ip, LOCAL_LABEL(div0block) + 1 + sub ip, ip, r3, lsl #1 +# else + adr ip, LOCAL_LABEL(div0block) +# endif + sub ip, ip, r3, lsl #3 + bx ip +# else +# if __ARM_ARCH_ISA_THUMB == 2 +# error THUMB mode requires CLZ or UDIV +# endif + mov r2, r0 + adr ip, LOCAL_LABEL(div0block) + + lsr r3, r2, #16 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(16 * 8) + + lsr r3, r2, #8 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(8 * 8) + + lsr r3, r2, #4 + cmp r3, r1 + movhs r2, r3 + subhs ip, #(4 * 8) + + lsr r3, r2, #2 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(2 * 8) + + /* Last block, no need to update r2 or r3. */ + cmp r1, r2, lsr #1 + subls ip, ip, #(1 * 8) + + JMP(ip) +# endif + +#define IMM # + +#define block(shift) \ + cmp r0, r1, lsl IMM shift; \ + IT(hs); \ + WIDE(subhs) r0, r0, r1, lsl IMM shift + + block(31) + block(30) + block(29) + block(28) + block(27) + block(26) + block(25) + block(24) + block(23) + block(22) + block(21) + block(20) + block(19) + block(18) + block(17) + block(16) + block(15) + block(14) + block(13) + block(12) + block(11) + block(10) + block(9) + block(8) + block(7) + block(6) + block(5) + block(4) + block(3) + block(2) + block(1) +LOCAL_LABEL(div0block): + block(0) + JMP(lr) +#endif /* __ARM_ARCH_EXT_IDIV__ */ + +LOCAL_LABEL(divby0): + mov r0, #0 +#ifdef __ARM_EABI__ + b __aeabi_idiv0 +#else + JMP(lr) +#endif + +END_COMPILERRT_FUNCTION(__umodsi3) diff --git a/lib/builtins/arm/unorddf2vfp.S b/lib/builtins/arm/unorddf2vfp.S new file mode 100644 index 000000000000..c4bea2d5eebd --- /dev/null +++ b/lib/builtins/arm/unorddf2vfp.S @@ -0,0 +1,29 @@ +//===-- unorddf2vfp.S - Implement unorddf2vfp ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __unorddf2vfp(double a, double b); +// +// Returns one iff a or b is NaN +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp) + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 + vmrs apsr_nzcv, fpscr + movvs r0, #1 // set result register to 1 if "overflow" (any NaNs) + movvc r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__unorddf2vfp) diff --git a/lib/builtins/arm/unordsf2vfp.S b/lib/builtins/arm/unordsf2vfp.S new file mode 100644 index 000000000000..886e96568103 --- /dev/null +++ b/lib/builtins/arm/unordsf2vfp.S @@ -0,0 +1,29 @@ +//===-- unordsf2vfp.S - Implement unordsf2vfp -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __unordsf2vfp(float a, float b); +// +// Returns one iff a or b is NaN +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp) + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 + vmrs apsr_nzcv, fpscr + movvs r0, #1 // set result register to 1 if "overflow" (any NaNs) + movvc r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__unordsf2vfp) |