summaryrefslogtreecommitdiff
path: root/reference/glibc
diff options
context:
space:
mode:
authorAndrew Turner <andrew@FreeBSD.org>2016-09-16 15:19:31 +0000
committerAndrew Turner <andrew@FreeBSD.org>2016-09-16 15:19:31 +0000
commit162378196a8674c75b4e93addb18ef42cb9b7737 (patch)
tree35a65fc34832b001eb50dd981f9db6866797e750 /reference/glibc
Notes
Diffstat (limited to 'reference/glibc')
-rw-r--r--reference/glibc/.deps/memcpy.Po1
-rw-r--r--reference/glibc/.deps/memset.Po1
-rw-r--r--reference/glibc/.deps/strchr.Po1
-rw-r--r--reference/glibc/.deps/strlen.Po1
-rw-r--r--reference/glibc/memcpy.S229
-rw-r--r--reference/glibc/memset.S64
-rw-r--r--reference/glibc/strchr.S132
-rw-r--r--reference/glibc/strlen.S99
8 files changed, 528 insertions, 0 deletions
diff --git a/reference/glibc/.deps/memcpy.Po b/reference/glibc/.deps/memcpy.Po
new file mode 100644
index 000000000000..9ce06a81ea45
--- /dev/null
+++ b/reference/glibc/.deps/memcpy.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/reference/glibc/.deps/memset.Po b/reference/glibc/.deps/memset.Po
new file mode 100644
index 000000000000..9ce06a81ea45
--- /dev/null
+++ b/reference/glibc/.deps/memset.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/reference/glibc/.deps/strchr.Po b/reference/glibc/.deps/strchr.Po
new file mode 100644
index 000000000000..9ce06a81ea45
--- /dev/null
+++ b/reference/glibc/.deps/strchr.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/reference/glibc/.deps/strlen.Po b/reference/glibc/.deps/strlen.Po
new file mode 100644
index 000000000000..9ce06a81ea45
--- /dev/null
+++ b/reference/glibc/.deps/strlen.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/reference/glibc/memcpy.S b/reference/glibc/memcpy.S
new file mode 100644
index 000000000000..357a89aea4d3
--- /dev/null
+++ b/reference/glibc/memcpy.S
@@ -0,0 +1,229 @@
+/* Copyright (C) 2006, 2009 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/*
+ * Data preload for architectures that support it (ARM V5TE and above)
+ */
+#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
+ && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
+ && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
+ && !defined (__ARM_ARCH_5T__))
+#define PLD(code...) code
+#else
+#define PLD(code...)
+#endif
+
+/*
+ * This can be used to enable code to cacheline align the source pointer.
+ * Experiments on tested architectures (StrongARM and XScale) didn't show
+ * this a worthwhile thing to do. That might be different in the future.
+ */
+//#define CALGN(code...) code
+#define CALGN(code...)
+
+/*
+ * Endian independent macros for shifting bytes within registers.
+ */
+#ifndef __ARMEB__
+#define pull lsr
+#define push lsl
+#else
+#define pull lsl
+#define push lsr
+#endif
+
+ .text
+ .global memcpy
+ .type memcpy, %function
+
+/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
+
+memcpy:
+
+ stmfd sp!, {r0, r4, lr}
+
+ subs r2, r2, #4
+ blt 8f
+ ands ip, r0, #3
+ PLD( pld [r1, #0] )
+ bne 9f
+ ands ip, r1, #3
+ bne 10f
+
+1: subs r2, r2, #(28)
+ stmfd sp!, {r5 - r8}
+ blt 5f
+
+ CALGN( ands ip, r1, #31 )
+ CALGN( rsb r3, ip, #32 )
+ CALGN( sbcnes r4, r3, r2 ) @ C is always set here
+ CALGN( bcs 2f )
+ CALGN( adr r4, 6f )
+ CALGN( subs r2, r2, r3 ) @ C gets set
+ CALGN( add pc, r4, ip )
+
+ PLD( pld [r1, #0] )
+2: PLD( subs r2, r2, #96 )
+ PLD( pld [r1, #28] )
+ PLD( blt 4f )
+ PLD( pld [r1, #60] )
+ PLD( pld [r1, #92] )
+
+3: PLD( pld [r1, #124] )
+4: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
+ subs r2, r2, #32
+ stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
+ bge 3b
+ PLD( cmn r2, #96 )
+ PLD( bge 4b )
+
+5: ands ip, r2, #28
+ rsb ip, ip, #32
+ addne pc, pc, ip @ C is always clear here
+ b 7f
+6: nop
+ ldr r3, [r1], #4
+ ldr r4, [r1], #4
+ ldr r5, [r1], #4
+ ldr r6, [r1], #4
+ ldr r7, [r1], #4
+ ldr r8, [r1], #4
+ ldr lr, [r1], #4
+
+ add pc, pc, ip
+ nop
+ nop
+ str r3, [r0], #4
+ str r4, [r0], #4
+ str r5, [r0], #4
+ str r6, [r0], #4
+ str r7, [r0], #4
+ str r8, [r0], #4
+ str lr, [r0], #4
+
+ CALGN( bcs 2b )
+
+7: ldmfd sp!, {r5 - r8}
+
+8: movs r2, r2, lsl #31
+ ldrneb r3, [r1], #1
+ ldrcsb r4, [r1], #1
+ ldrcsb ip, [r1]
+ strneb r3, [r0], #1
+ strcsb r4, [r0], #1
+ strcsb ip, [r0]
+
+#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
+ ldmfd sp!, {r0, r4, lr}
+ bx lr
+#else
+ ldmfd sp!, {r0, r4, pc}
+#endif
+
+9: rsb ip, ip, #4
+ cmp ip, #2
+ ldrgtb r3, [r1], #1
+ ldrgeb r4, [r1], #1
+ ldrb lr, [r1], #1
+ strgtb r3, [r0], #1
+ strgeb r4, [r0], #1
+ subs r2, r2, ip
+ strb lr, [r0], #1
+ blt 8b
+ ands ip, r1, #3
+ beq 1b
+
+10: bic r1, r1, #3
+ cmp ip, #2
+ ldr lr, [r1], #4
+ beq 17f
+ bgt 18f
+
+
+ .macro forward_copy_shift pull push
+
+ subs r2, r2, #28
+ blt 14f
+
+ CALGN( ands ip, r1, #31 )
+ CALGN( rsb ip, ip, #32 )
+ CALGN( sbcnes r4, ip, r2 ) @ C is always set here
+ CALGN( subcc r2, r2, ip )
+ CALGN( bcc 15f )
+
+11: stmfd sp!, {r5 - r9}
+
+ PLD( pld [r1, #0] )
+ PLD( subs r2, r2, #96 )
+ PLD( pld [r1, #28] )
+ PLD( blt 13f )
+ PLD( pld [r1, #60] )
+ PLD( pld [r1, #92] )
+
+12: PLD( pld [r1, #124] )
+13: ldmia r1!, {r4, r5, r6, r7}
+ mov r3, lr, pull #\pull
+ subs r2, r2, #32
+ ldmia r1!, {r8, r9, ip, lr}
+ orr r3, r3, r4, push #\push
+ mov r4, r4, pull #\pull
+ orr r4, r4, r5, push #\push
+ mov r5, r5, pull #\pull
+ orr r5, r5, r6, push #\push
+ mov r6, r6, pull #\pull
+ orr r6, r6, r7, push #\push
+ mov r7, r7, pull #\pull
+ orr r7, r7, r8, push #\push
+ mov r8, r8, pull #\pull
+ orr r8, r8, r9, push #\push
+ mov r9, r9, pull #\pull
+ orr r9, r9, ip, push #\push
+ mov ip, ip, pull #\pull
+ orr ip, ip, lr, push #\push
+ stmia r0!, {r3, r4, r5, r6, r7, r8, r9, ip}
+ bge 12b
+ PLD( cmn r2, #96 )
+ PLD( bge 13b )
+
+ ldmfd sp!, {r5 - r9}
+
+14: ands ip, r2, #28
+ beq 16f
+
+15: mov r3, lr, pull #\pull
+ ldr lr, [r1], #4
+ subs ip, ip, #4
+ orr r3, r3, lr, push #\push
+ str r3, [r0], #4
+ bgt 15b
+ CALGN( cmp r2, #0 )
+ CALGN( bge 11b )
+
+16: sub r1, r1, #(\push / 8)
+ b 8b
+
+ .endm
+
+
+ forward_copy_shift pull=8 push=24
+
+17: forward_copy_shift pull=16 push=16
+
+18: forward_copy_shift pull=24 push=8
diff --git a/reference/glibc/memset.S b/reference/glibc/memset.S
new file mode 100644
index 000000000000..51585f4b7395
--- /dev/null
+++ b/reference/glibc/memset.S
@@ -0,0 +1,64 @@
+/* Copyright (C) 1998, 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Philip Blundell <philb@gnu.org>
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* void *memset (dstpp, c, len) */
+ .text
+ .global memset
+ .type memset, %function
+
+memset:
+ mov r3, r0
+ cmp r2, #8
+ bcc 2f @ less than 8 bytes to move
+
+1:
+ tst r3, #3 @ aligned yet?
+ strneb r1, [r3], #1
+ subne r2, r2, #1
+ bne 1b
+
+ and r1, r1, #255 @ clear any sign bits
+ orr r1, r1, r1, lsl $8
+ orr r1, r1, r1, lsl $16
+ mov ip, r1
+
+1:
+ subs r2, r2, #8
+ stmcsia r3!, {r1, ip} @ store up to 32 bytes per loop iteration
+ subcss r2, r2, #8
+ stmcsia r3!, {r1, ip}
+ subcss r2, r2, #8
+ stmcsia r3!, {r1, ip}
+ subcss r2, r2, #8
+ stmcsia r3!, {r1, ip}
+ bcs 1b
+
+ and r2, r2, #7
+2:
+ subs r2, r2, #1 @ store up to 4 bytes per loop iteration
+ strcsb r1, [r3], #1
+ subcss r2, r2, #1
+ strcsb r1, [r3], #1
+ subcss r2, r2, #1
+ strcsb r1, [r3], #1
+ subcss r2, r2, #1
+ strcsb r1, [r3], #1
+ bcs 2b
+
+ bx lr
diff --git a/reference/glibc/strchr.S b/reference/glibc/strchr.S
new file mode 100644
index 000000000000..a09602714deb
--- /dev/null
+++ b/reference/glibc/strchr.S
@@ -0,0 +1,132 @@
+/* strchr -- find the first instance of C in a nul-terminated string.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define ARCH_HAS_T2
+
+ .syntax unified
+ .text
+ .global strchr
+ .type strchr,%function
+ .align 4
+
+strchr:
+ @ r0 = start of string
+ @ r1 = character to match
+ @ returns NULL for no match, or a pointer to the match
+ ldrb r2, [r0] @ load the first byte asap
+ uxtb r1, r1
+
+ @ To cater to long strings, we want to search through a few
+ @ characters until we reach an aligned pointer. To cater to
+ @ small strings, we don't want to start doing word operations
+ @ immediately. The compromise is a maximum of 16 bytes less
+ @ whatever is required to end with an aligned pointer.
+ @ r3 = number of characters to search in alignment loop
+ and r3, r0, #7
+ rsb r3, r3, #15 @ 16 - 1 peeled loop iteration
+ cmp r2, r1 @ Found C?
+ it ne
+ cmpne r2, #0 @ Found EOS?
+ beq 99f
+
+ @ Loop until we find ...
+1: ldrb r2, [r0, #1]!
+ subs r3, r3, #1 @ ... the aligment point
+ it ne
+ cmpne r2, r1 @ ... or the character
+ it ne
+ cmpne r2, #0 @ ... or EOS
+ bne 1b
+
+ @ Disambiguate the exit possibilites above
+ cmp r2, r1 @ Found the character
+ it ne
+ cmpne r2, #0 @ Found EOS
+ beq 99f
+ add r0, r0, #1
+
+ @ So now we're aligned. Now we actually need a stack frame.
+ push { r4, r5, r6, r7 }
+
+ ldrd r2, r3, [r0], #8
+ orr r1, r1, r1, lsl #8 @ Replicate C to all bytes
+#ifdef ARCH_HAS_T2
+ movw ip, #0x0101
+ pld [r0, #64]
+ movt ip, #0x0101
+#else
+ ldr ip, =0x01010101
+ pld [r0, #64]
+#endif
+ orr r1, r1, r1, lsl #16
+
+ @ Loop searching for EOS or C, 8 bytes at a time.
+2:
+ @ Subtracting (unsigned saturating) from 1 means result of 1 for
+ @ any byte that was originally zero and 0 otherwise. Therefore
+ @ we consider the lsb of each byte the "found" bit.
+ uqsub8 r4, ip, r2 @ Find EOS
+ eor r6, r2, r1 @ Convert C bytes to 0
+ uqsub8 r5, ip, r3
+ eor r7, r3, r1
+ uqsub8 r6, ip, r6 @ Find C
+ pld [r0, #128] @ Prefetch 2 lines ahead
+ uqsub8 r7, ip, r7
+ orr r4, r4, r6 @ Combine found for EOS and C
+ orr r5, r5, r7
+ orrs r6, r4, r5 @ Combine the two words
+ it eq
+ ldrdeq r2, r3, [r0], #8
+ beq 2b
+
+ @ Found something. Disambiguate between first and second words.
+ @ Adjust r0 to point to the word containing the match.
+ @ Adjust r2 to the contents of the word containing the match.
+ @ Adjust r4 to the found bits for the word containing the match.
+ cmp r4, #0
+ sub r0, r0, #4
+ itte eq
+ moveq r4, r5
+ moveq r2, r3
+ subne r0, r0, #4
+
+ @ Find the bit-offset of the match within the word.
+#if defined(__ARMEL__)
+ @ For LE, swap the found word so clz searches from the little end.
+ rev r4, r4
+#else
+ @ For BE, byte swap the word to make it easier to extract the byte.
+ rev r2, r2
+#endif
+ @ We're counting 0x01 (not 0x80), so the bit offset is 7 too high.
+ clz r3, r4
+ sub r3, r3, #7
+ lsr r2, r2, r3 @ Shift down found byte
+ uxtb r1, r1 @ Undo replication of C
+ uxtb r2, r2 @ Extract found byte
+ add r0, r0, r3, lsr #3 @ Adjust the pointer to the found byte
+
+ pop { r4, r5, r6, r7 }
+
+ @ Disambiguate between EOS and C.
+99:
+ cmp r2, r1
+ it ne
+ movne r0, #0 @ Found EOS, return NULL
+ bx lr
+ .size strchr,.-strchr
diff --git a/reference/glibc/strlen.S b/reference/glibc/strlen.S
new file mode 100644
index 000000000000..6b3ce0a008df
--- /dev/null
+++ b/reference/glibc/strlen.S
@@ -0,0 +1,99 @@
+/* strlen -- find the length of a nul-terminated string.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define ARCH_HAS_T2
+
+ .syntax unified
+ .text
+ .global strlen
+ .type strlen,%function
+ .align 4
+strlen:
+ @ r0 = start of string
+ ldrb r2, [r0] @ load the first byte asap
+
+ @ To cater to long strings, we want to search through a few
+ @ characters until we reach an aligned pointer. To cater to
+ @ small strings, we don't want to start doing word operations
+ @ immediately. The compromise is a maximum of 16 bytes less
+ @ whatever is required to end with an aligned pointer.
+ @ r3 = number of characters to search in alignment loop
+ and r3, r0, #7
+ mov r1, r0 @ Save the input pointer
+ rsb r3, r3, #15 @ 16 - 1 peeled loop iteration
+ cmp r2, #0
+ beq 99f
+
+ @ Loop until we find ...
+1:
+ ldrb r2, [r0, #1]!
+ subs r3, r3, #1 @ ... the aligment point
+ it ne
+ cmpne r2, #0 @ ... or EOS
+ bne 1b
+
+ @ Disambiguate the exit possibilites above
+ cmp r2, #0 @ Found EOS
+ beq 99f
+ add r0, r0, #1
+
+ @ So now we're aligned.
+ ldrd r2, r3, [r0], #8
+#ifdef ARCH_HAS_T2
+ movw ip, #0x0101
+ pld [r0, #64]
+ movt ip, #0x0101
+#else
+ ldr ip, =0x01010101
+ pld [r0, #64]
+#endif
+
+ @ Loop searching for EOS, 8 bytes at a time.
+ @ Subtracting (unsigned saturating) from 1 for any byte means that
+ @ we get 1 for any byte that was originally zero and 0 otherwise.
+ @ Therefore we consider the lsb of each byte the "found" bit.
+ .balign 16
+2: uqsub8 r2, ip, r2 @ Find EOS
+ uqsub8 r3, ip, r3
+ pld [r0, #128] @ Prefetch 2 lines ahead
+ orrs r3, r3, r2 @ Combine the two words
+ it eq
+ ldrdeq r2, r3, [r0], #8
+ beq 2b
+
+ @ Found something. Disambiguate between first and second words.
+ @ Adjust r0 to point to the word containing the match.
+ @ Adjust r2 to the found bits for the word containing the match.
+ cmp r2, #0
+ sub r0, r0, #4
+ ite eq
+ moveq r2, r3
+ subne r0, r0, #4
+
+ @ Find the bit-offset of the match within the word. Note that the
+ @ bit result from clz will be 7 higher than "true", but we'll
+ @ immediately discard those bits converting to a byte offset.
+#ifdef __ARMEL__
+ rev r2, r2 @ For LE, count from the little end
+#endif
+ clz r2, r2
+ add r0, r0, r2, lsr #3 @ Adjust the pointer to the found byte
+99:
+ sub r0, r0, r1 @ Subtract input to compute length
+ bx lr
+ .size strlen,.-strlen