diff options
author | Andrew Turner <andrew@FreeBSD.org> | 2016-09-16 15:19:31 +0000 |
---|---|---|
committer | Andrew Turner <andrew@FreeBSD.org> | 2016-09-16 15:19:31 +0000 |
commit | 162378196a8674c75b4e93addb18ef42cb9b7737 (patch) | |
tree | 35a65fc34832b001eb50dd981f9db6866797e750 /reference/glibc |
Notes
Diffstat (limited to 'reference/glibc')
-rw-r--r-- | reference/glibc/.deps/memcpy.Po | 1 | ||||
-rw-r--r-- | reference/glibc/.deps/memset.Po | 1 | ||||
-rw-r--r-- | reference/glibc/.deps/strchr.Po | 1 | ||||
-rw-r--r-- | reference/glibc/.deps/strlen.Po | 1 | ||||
-rw-r--r-- | reference/glibc/memcpy.S | 229 | ||||
-rw-r--r-- | reference/glibc/memset.S | 64 | ||||
-rw-r--r-- | reference/glibc/strchr.S | 132 | ||||
-rw-r--r-- | reference/glibc/strlen.S | 99 |
8 files changed, 528 insertions, 0 deletions
diff --git a/reference/glibc/.deps/memcpy.Po b/reference/glibc/.deps/memcpy.Po new file mode 100644 index 000000000000..9ce06a81ea45 --- /dev/null +++ b/reference/glibc/.deps/memcpy.Po @@ -0,0 +1 @@ +# dummy diff --git a/reference/glibc/.deps/memset.Po b/reference/glibc/.deps/memset.Po new file mode 100644 index 000000000000..9ce06a81ea45 --- /dev/null +++ b/reference/glibc/.deps/memset.Po @@ -0,0 +1 @@ +# dummy diff --git a/reference/glibc/.deps/strchr.Po b/reference/glibc/.deps/strchr.Po new file mode 100644 index 000000000000..9ce06a81ea45 --- /dev/null +++ b/reference/glibc/.deps/strchr.Po @@ -0,0 +1 @@ +# dummy diff --git a/reference/glibc/.deps/strlen.Po b/reference/glibc/.deps/strlen.Po new file mode 100644 index 000000000000..9ce06a81ea45 --- /dev/null +++ b/reference/glibc/.deps/strlen.Po @@ -0,0 +1 @@ +# dummy diff --git a/reference/glibc/memcpy.S b/reference/glibc/memcpy.S new file mode 100644 index 000000000000..357a89aea4d3 --- /dev/null +++ b/reference/glibc/memcpy.S @@ -0,0 +1,229 @@ +/* Copyright (C) 2006, 2009 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + Contributed by MontaVista Software, Inc. (written by Nicolas Pitre) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* + * Data preload for architectures that support it (ARM V5TE and above) + */ +#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \ + && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \ + && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \ + && !defined (__ARM_ARCH_5T__)) +#define PLD(code...) code +#else +#define PLD(code...) +#endif + +/* + * This can be used to enable code to cacheline align the source pointer. + * Experiments on tested architectures (StrongARM and XScale) didn't show + * this a worthwhile thing to do. That might be different in the future. + */ +//#define CALGN(code...) code +#define CALGN(code...) + +/* + * Endian independent macros for shifting bytes within registers. + */ +#ifndef __ARMEB__ +#define pull lsr +#define push lsl +#else +#define pull lsl +#define push lsr +#endif + + .text + .global memcpy + .type memcpy, %function + +/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ + +memcpy: + + stmfd sp!, {r0, r4, lr} + + subs r2, r2, #4 + blt 8f + ands ip, r0, #3 + PLD( pld [r1, #0] ) + bne 9f + ands ip, r1, #3 + bne 10f + +1: subs r2, r2, #(28) + stmfd sp!, {r5 - r8} + blt 5f + + CALGN( ands ip, r1, #31 ) + CALGN( rsb r3, ip, #32 ) + CALGN( sbcnes r4, r3, r2 ) @ C is always set here + CALGN( bcs 2f ) + CALGN( adr r4, 6f ) + CALGN( subs r2, r2, r3 ) @ C gets set + CALGN( add pc, r4, ip ) + + PLD( pld [r1, #0] ) +2: PLD( subs r2, r2, #96 ) + PLD( pld [r1, #28] ) + PLD( blt 4f ) + PLD( pld [r1, #60] ) + PLD( pld [r1, #92] ) + +3: PLD( pld [r1, #124] ) +4: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr} + subs r2, r2, #32 + stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr} + bge 3b + PLD( cmn r2, #96 ) + PLD( bge 4b ) + +5: ands ip, r2, #28 + rsb ip, ip, #32 + addne pc, pc, ip @ C is always clear here + b 7f +6: nop + ldr r3, [r1], #4 + ldr r4, [r1], #4 + ldr r5, [r1], #4 + ldr r6, [r1], #4 + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr lr, [r1], #4 + + add pc, pc, ip + nop + nop + str r3, [r0], #4 + str r4, [r0], #4 + str r5, [r0], #4 + str r6, [r0], #4 + str r7, [r0], #4 + str r8, [r0], #4 + str lr, [r0], #4 + + CALGN( bcs 2b ) + +7: ldmfd sp!, {r5 - r8} + +8: movs r2, r2, lsl #31 + ldrneb r3, [r1], #1 + ldrcsb r4, [r1], #1 + ldrcsb ip, [r1] + strneb r3, [r0], #1 + strcsb r4, [r0], #1 + strcsb ip, [r0] + +#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__) + ldmfd sp!, {r0, r4, lr} + bx lr +#else + ldmfd sp!, {r0, r4, pc} +#endif + +9: rsb ip, ip, #4 + cmp ip, #2 + ldrgtb r3, [r1], #1 + ldrgeb r4, [r1], #1 + ldrb lr, [r1], #1 + strgtb r3, [r0], #1 + strgeb r4, [r0], #1 + subs r2, r2, ip + strb lr, [r0], #1 + blt 8b + ands ip, r1, #3 + beq 1b + +10: bic r1, r1, #3 + cmp ip, #2 + ldr lr, [r1], #4 + beq 17f + bgt 18f + + + .macro forward_copy_shift pull push + + subs r2, r2, #28 + blt 14f + + CALGN( ands ip, r1, #31 ) + CALGN( rsb ip, ip, #32 ) + CALGN( sbcnes r4, ip, r2 ) @ C is always set here + CALGN( subcc r2, r2, ip ) + CALGN( bcc 15f ) + +11: stmfd sp!, {r5 - r9} + + PLD( pld [r1, #0] ) + PLD( subs r2, r2, #96 ) + PLD( pld [r1, #28] ) + PLD( blt 13f ) + PLD( pld [r1, #60] ) + PLD( pld [r1, #92] ) + +12: PLD( pld [r1, #124] ) +13: ldmia r1!, {r4, r5, r6, r7} + mov r3, lr, pull #\pull + subs r2, r2, #32 + ldmia r1!, {r8, r9, ip, lr} + orr r3, r3, r4, push #\push + mov r4, r4, pull #\pull + orr r4, r4, r5, push #\push + mov r5, r5, pull #\pull + orr r5, r5, r6, push #\push + mov r6, r6, pull #\pull + orr r6, r6, r7, push #\push + mov r7, r7, pull #\pull + orr r7, r7, r8, push #\push + mov r8, r8, pull #\pull + orr r8, r8, r9, push #\push + mov r9, r9, pull #\pull + orr r9, r9, ip, push #\push + mov ip, ip, pull #\pull + orr ip, ip, lr, push #\push + stmia r0!, {r3, r4, r5, r6, r7, r8, r9, ip} + bge 12b + PLD( cmn r2, #96 ) + PLD( bge 13b ) + + ldmfd sp!, {r5 - r9} + +14: ands ip, r2, #28 + beq 16f + +15: mov r3, lr, pull #\pull + ldr lr, [r1], #4 + subs ip, ip, #4 + orr r3, r3, lr, push #\push + str r3, [r0], #4 + bgt 15b + CALGN( cmp r2, #0 ) + CALGN( bge 11b ) + +16: sub r1, r1, #(\push / 8) + b 8b + + .endm + + + forward_copy_shift pull=8 push=24 + +17: forward_copy_shift pull=16 push=16 + +18: forward_copy_shift pull=24 push=8 diff --git a/reference/glibc/memset.S b/reference/glibc/memset.S new file mode 100644 index 000000000000..51585f4b7395 --- /dev/null +++ b/reference/glibc/memset.S @@ -0,0 +1,64 @@ +/* Copyright (C) 1998, 2003 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Philip Blundell <philb@gnu.org> + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* void *memset (dstpp, c, len) */ + .text + .global memset + .type memset, %function + +memset: + mov r3, r0 + cmp r2, #8 + bcc 2f @ less than 8 bytes to move + +1: + tst r3, #3 @ aligned yet? + strneb r1, [r3], #1 + subne r2, r2, #1 + bne 1b + + and r1, r1, #255 @ clear any sign bits + orr r1, r1, r1, lsl $8 + orr r1, r1, r1, lsl $16 + mov ip, r1 + +1: + subs r2, r2, #8 + stmcsia r3!, {r1, ip} @ store up to 32 bytes per loop iteration + subcss r2, r2, #8 + stmcsia r3!, {r1, ip} + subcss r2, r2, #8 + stmcsia r3!, {r1, ip} + subcss r2, r2, #8 + stmcsia r3!, {r1, ip} + bcs 1b + + and r2, r2, #7 +2: + subs r2, r2, #1 @ store up to 4 bytes per loop iteration + strcsb r1, [r3], #1 + subcss r2, r2, #1 + strcsb r1, [r3], #1 + subcss r2, r2, #1 + strcsb r1, [r3], #1 + subcss r2, r2, #1 + strcsb r1, [r3], #1 + bcs 2b + + bx lr diff --git a/reference/glibc/strchr.S b/reference/glibc/strchr.S new file mode 100644 index 000000000000..a09602714deb --- /dev/null +++ b/reference/glibc/strchr.S @@ -0,0 +1,132 @@ +/* strchr -- find the first instance of C in a nul-terminated string. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define ARCH_HAS_T2 + + .syntax unified + .text + .global strchr + .type strchr,%function + .align 4 + +strchr: + @ r0 = start of string + @ r1 = character to match + @ returns NULL for no match, or a pointer to the match + ldrb r2, [r0] @ load the first byte asap + uxtb r1, r1 + + @ To cater to long strings, we want to search through a few + @ characters until we reach an aligned pointer. To cater to + @ small strings, we don't want to start doing word operations + @ immediately. The compromise is a maximum of 16 bytes less + @ whatever is required to end with an aligned pointer. + @ r3 = number of characters to search in alignment loop + and r3, r0, #7 + rsb r3, r3, #15 @ 16 - 1 peeled loop iteration + cmp r2, r1 @ Found C? + it ne + cmpne r2, #0 @ Found EOS? + beq 99f + + @ Loop until we find ... +1: ldrb r2, [r0, #1]! + subs r3, r3, #1 @ ... the aligment point + it ne + cmpne r2, r1 @ ... or the character + it ne + cmpne r2, #0 @ ... or EOS + bne 1b + + @ Disambiguate the exit possibilites above + cmp r2, r1 @ Found the character + it ne + cmpne r2, #0 @ Found EOS + beq 99f + add r0, r0, #1 + + @ So now we're aligned. Now we actually need a stack frame. + push { r4, r5, r6, r7 } + + ldrd r2, r3, [r0], #8 + orr r1, r1, r1, lsl #8 @ Replicate C to all bytes +#ifdef ARCH_HAS_T2 + movw ip, #0x0101 + pld [r0, #64] + movt ip, #0x0101 +#else + ldr ip, =0x01010101 + pld [r0, #64] +#endif + orr r1, r1, r1, lsl #16 + + @ Loop searching for EOS or C, 8 bytes at a time. +2: + @ Subtracting (unsigned saturating) from 1 means result of 1 for + @ any byte that was originally zero and 0 otherwise. Therefore + @ we consider the lsb of each byte the "found" bit. + uqsub8 r4, ip, r2 @ Find EOS + eor r6, r2, r1 @ Convert C bytes to 0 + uqsub8 r5, ip, r3 + eor r7, r3, r1 + uqsub8 r6, ip, r6 @ Find C + pld [r0, #128] @ Prefetch 2 lines ahead + uqsub8 r7, ip, r7 + orr r4, r4, r6 @ Combine found for EOS and C + orr r5, r5, r7 + orrs r6, r4, r5 @ Combine the two words + it eq + ldrdeq r2, r3, [r0], #8 + beq 2b + + @ Found something. Disambiguate between first and second words. + @ Adjust r0 to point to the word containing the match. + @ Adjust r2 to the contents of the word containing the match. + @ Adjust r4 to the found bits for the word containing the match. + cmp r4, #0 + sub r0, r0, #4 + itte eq + moveq r4, r5 + moveq r2, r3 + subne r0, r0, #4 + + @ Find the bit-offset of the match within the word. +#if defined(__ARMEL__) + @ For LE, swap the found word so clz searches from the little end. + rev r4, r4 +#else + @ For BE, byte swap the word to make it easier to extract the byte. + rev r2, r2 +#endif + @ We're counting 0x01 (not 0x80), so the bit offset is 7 too high. + clz r3, r4 + sub r3, r3, #7 + lsr r2, r2, r3 @ Shift down found byte + uxtb r1, r1 @ Undo replication of C + uxtb r2, r2 @ Extract found byte + add r0, r0, r3, lsr #3 @ Adjust the pointer to the found byte + + pop { r4, r5, r6, r7 } + + @ Disambiguate between EOS and C. +99: + cmp r2, r1 + it ne + movne r0, #0 @ Found EOS, return NULL + bx lr + .size strchr,.-strchr diff --git a/reference/glibc/strlen.S b/reference/glibc/strlen.S new file mode 100644 index 000000000000..6b3ce0a008df --- /dev/null +++ b/reference/glibc/strlen.S @@ -0,0 +1,99 @@ +/* strlen -- find the length of a nul-terminated string. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define ARCH_HAS_T2 + + .syntax unified + .text + .global strlen + .type strlen,%function + .align 4 +strlen: + @ r0 = start of string + ldrb r2, [r0] @ load the first byte asap + + @ To cater to long strings, we want to search through a few + @ characters until we reach an aligned pointer. To cater to + @ small strings, we don't want to start doing word operations + @ immediately. The compromise is a maximum of 16 bytes less + @ whatever is required to end with an aligned pointer. + @ r3 = number of characters to search in alignment loop + and r3, r0, #7 + mov r1, r0 @ Save the input pointer + rsb r3, r3, #15 @ 16 - 1 peeled loop iteration + cmp r2, #0 + beq 99f + + @ Loop until we find ... +1: + ldrb r2, [r0, #1]! + subs r3, r3, #1 @ ... the aligment point + it ne + cmpne r2, #0 @ ... or EOS + bne 1b + + @ Disambiguate the exit possibilites above + cmp r2, #0 @ Found EOS + beq 99f + add r0, r0, #1 + + @ So now we're aligned. + ldrd r2, r3, [r0], #8 +#ifdef ARCH_HAS_T2 + movw ip, #0x0101 + pld [r0, #64] + movt ip, #0x0101 +#else + ldr ip, =0x01010101 + pld [r0, #64] +#endif + + @ Loop searching for EOS, 8 bytes at a time. + @ Subtracting (unsigned saturating) from 1 for any byte means that + @ we get 1 for any byte that was originally zero and 0 otherwise. + @ Therefore we consider the lsb of each byte the "found" bit. + .balign 16 +2: uqsub8 r2, ip, r2 @ Find EOS + uqsub8 r3, ip, r3 + pld [r0, #128] @ Prefetch 2 lines ahead + orrs r3, r3, r2 @ Combine the two words + it eq + ldrdeq r2, r3, [r0], #8 + beq 2b + + @ Found something. Disambiguate between first and second words. + @ Adjust r0 to point to the word containing the match. + @ Adjust r2 to the found bits for the word containing the match. + cmp r2, #0 + sub r0, r0, #4 + ite eq + moveq r2, r3 + subne r0, r0, #4 + + @ Find the bit-offset of the match within the word. Note that the + @ bit result from clz will be 7 higher than "true", but we'll + @ immediately discard those bits converting to a byte offset. +#ifdef __ARMEL__ + rev r2, r2 @ For LE, count from the little end +#endif + clz r2, r2 + add r0, r0, r2, lsr #3 @ Adjust the pointer to the found byte +99: + sub r0, r0, r1 @ Subtract input to compute length + bx lr + .size strlen,.-strlen |