diff options
Diffstat (limited to 'lib/builtins/arm/udivmodsi4.S')
| -rw-r--r-- | lib/builtins/arm/udivmodsi4.S | 184 | 
1 files changed, 184 insertions, 0 deletions
| diff --git a/lib/builtins/arm/udivmodsi4.S b/lib/builtins/arm/udivmodsi4.S new file mode 100644 index 000000000000..85b84936c4b3 --- /dev/null +++ b/lib/builtins/arm/udivmodsi4.S @@ -0,0 +1,184 @@ +/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===// + * + *                     The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __udivmodsi4 (32-bit unsigned integer divide and + * modulus) function for the ARM 32-bit architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + +	.syntax unified +	.text + +#if __ARM_ARCH_ISA_THUMB == 2 +	.thumb +#endif + +@ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor, +@                           unsigned int *remainder) +@   Calculate the quotient and remainder of the (unsigned) division.  The return +@   value is the quotient, the remainder is placed in the variable. + +	.p2align 2 +#if __ARM_ARCH_ISA_THUMB == 2 +DEFINE_COMPILERRT_THUMB_FUNCTION(__udivmodsi4) +#else +DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) +#endif +#if __ARM_ARCH_EXT_IDIV__ +	tst     r1, r1 +	beq     LOCAL_LABEL(divby0) +	mov 	r3, r0 +	udiv	r0, r3, r1 +	mls 	r1, r0, r1, r3 +	str 	r1, [r2] +	bx  	lr +#else +	cmp	r1, #1 +	bcc	LOCAL_LABEL(divby0) +	beq	LOCAL_LABEL(divby1) +	cmp	r0, r1 +	bcc	LOCAL_LABEL(quotient0) +	/* +	 * Implement division using binary long division algorithm. +	 * +	 * r0 is the numerator, r1 the denominator. +	 * +	 * The code before JMP computes the correct shift I, so that +	 * r0 and (r1 << I) have the highest bit set in the same position. +	 * At the time of JMP, ip := .Ldiv0block - 12 * I. +	 * This depends on the fixed instruction size of block. +	 * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. +	 * +	 * block(shift) implements the test-and-update-quotient core. +	 * It assumes (r0 << shift) can be computed without overflow and +	 * that (r0 << shift) < 2 * r1. The quotient is stored in r3. +	 */ + +#  ifdef __ARM_FEATURE_CLZ +	clz	ip, r0 +	clz	r3, r1 +	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ +	sub	r3, r3, ip +#    if __ARM_ARCH_ISA_THUMB == 2 +	adr	ip, LOCAL_LABEL(div0block) + 1 +	sub	ip, ip, r3, lsl #1 +#    else +	adr	ip, LOCAL_LABEL(div0block) +#    endif +	sub	ip, ip, r3, lsl #2 +	sub	ip, ip, r3, lsl #3 +	mov	r3, #0 +	bx	ip +#  else +#    if __ARM_ARCH_ISA_THUMB == 2 +#    error THUMB mode requires CLZ or UDIV +#    endif +	str	r4, [sp, #-8]! + +	mov	r4, r0 +	adr	ip, LOCAL_LABEL(div0block) + +	lsr	r3, r4, #16 +	cmp	r3, r1 +	movhs	r4, r3 +	subhs	ip, ip, #(16 * 12) + +	lsr	r3, r4, #8 +	cmp	r3, r1 +	movhs	r4, r3 +	subhs	ip, ip, #(8 * 12) + +	lsr	r3, r4, #4 +	cmp	r3, r1 +	movhs	r4, r3 +	subhs	ip, #(4 * 12) + +	lsr	r3, r4, #2 +	cmp	r3, r1 +	movhs	r4, r3 +	subhs	ip, ip, #(2 * 12) + +	/* Last block, no need to update r3 or r4. */ +	cmp	r1, r4, lsr #1 +	subls	ip, ip, #(1 * 12) + +	ldr	r4, [sp], #8	/* restore r4, we are done with it. */ +	mov	r3, #0 + +	JMP(ip) +#  endif + +#define	IMM	# + +#define block(shift)                                                           \ +	cmp	r0, r1, lsl IMM shift;                                         \ +	ITT(hs);                                                               \ +	WIDE(addhs)	r3, r3, IMM (1 << shift);                              \ +	WIDE(subhs)	r0, r0, r1, lsl IMM shift + +	block(31) +	block(30) +	block(29) +	block(28) +	block(27) +	block(26) +	block(25) +	block(24) +	block(23) +	block(22) +	block(21) +	block(20) +	block(19) +	block(18) +	block(17) +	block(16) +	block(15) +	block(14) +	block(13) +	block(12) +	block(11) +	block(10) +	block(9) +	block(8) +	block(7) +	block(6) +	block(5) +	block(4) +	block(3) +	block(2) +	block(1) +LOCAL_LABEL(div0block): +	block(0) + +	str	r0, [r2] +	mov	r0, r3 +	JMP(lr) + +LOCAL_LABEL(quotient0): +	str	r0, [r2] +	mov	r0, #0 +	JMP(lr) + +LOCAL_LABEL(divby1): +	mov	r3, #0 +	str	r3, [r2] +	JMP(lr) +#endif /* __ARM_ARCH_EXT_IDIV__ */ + +LOCAL_LABEL(divby0): +	mov	r0, #0 +#ifdef __ARM_EABI__ +	b	__aeabi_idiv0 +#else +	JMP(lr) +#endif + +END_COMPILERRT_FUNCTION(__udivmodsi4) | 
