aboutsummaryrefslogtreecommitdiff
path: root/lib/libc/amd64/string/memcmp.S
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libc/amd64/string/memcmp.S')
-rw-r--r--lib/libc/amd64/string/memcmp.S420
1 files changed, 420 insertions, 0 deletions
diff --git a/lib/libc/amd64/string/memcmp.S b/lib/libc/amd64/string/memcmp.S
new file mode 100644
index 000000000000..dc8bcff73cb9
--- /dev/null
+++ b/lib/libc/amd64/string/memcmp.S
@@ -0,0 +1,420 @@
+/*-
+ * Copyright (c) 2018, 2023 The FreeBSD Foundation
+ *
+ * This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Portions of this software were developed by Robert Clausecker
+ * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+#include <machine/param.h>
+
+#include "amd64_archlevel.h"
+
+/*
+ * Note: this routine was written with kernel use in mind (read: no simd),
+ * it is only present in userspace as a temporary measure until something
+ * better gets imported.
+ */
+
+#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
+
+#ifdef BCMP
+#define memcmp bcmp
+#endif
+
+ARCHFUNCS(memcmp)
+ ARCHFUNC(memcmp, scalar)
+ ARCHFUNC(memcmp, baseline)
+ENDARCHFUNCS(memcmp)
+
+ARCHENTRY(memcmp, scalar)
+ xorl %eax,%eax
+10:
+ cmpq $16,%rdx
+ ja 101632f
+
+ cmpb $8,%dl
+ jg 100816f
+
+ cmpb $4,%dl
+ jg 100408f
+
+ cmpb $2,%dl
+ jge 100204f
+
+ cmpb $1,%dl
+ jl 100000f
+ movzbl (%rdi),%eax
+ movzbl (%rsi),%r8d
+ subl %r8d,%eax
+100000:
+ ret
+
+ ALIGN_TEXT
+100816:
+ movq (%rdi),%r8
+ movq (%rsi),%r9
+ cmpq %r8,%r9
+ jne 80f
+ movq -8(%rdi,%rdx),%r8
+ movq -8(%rsi,%rdx),%r9
+ cmpq %r8,%r9
+ jne 10081608f
+ ret
+ ALIGN_TEXT
+100408:
+ movl (%rdi),%r8d
+ movl (%rsi),%r9d
+ cmpl %r8d,%r9d
+ jne 80f
+ movl -4(%rdi,%rdx),%r8d
+ movl -4(%rsi,%rdx),%r9d
+ cmpl %r8d,%r9d
+ jne 10040804f
+ ret
+ ALIGN_TEXT
+100204:
+ movzwl (%rdi),%r8d
+ movzwl (%rsi),%r9d
+ cmpl %r8d,%r9d
+ jne 1f
+ movzwl -2(%rdi,%rdx),%r8d
+ movzwl -2(%rsi,%rdx),%r9d
+ cmpl %r8d,%r9d
+ jne 1f
+ ret
+ ALIGN_TEXT
+101632:
+ cmpq $32,%rdx
+ ja 103200f
+ movq (%rdi),%r8
+ movq (%rsi),%r9
+ cmpq %r8,%r9
+ jne 80f
+ movq 8(%rdi),%r8
+ movq 8(%rsi),%r9
+ cmpq %r8,%r9
+ jne 10163208f
+ movq -16(%rdi,%rdx),%r8
+ movq -16(%rsi,%rdx),%r9
+ cmpq %r8,%r9
+ jne 10163216f
+ movq -8(%rdi,%rdx),%r8
+ movq -8(%rsi,%rdx),%r9
+ cmpq %r8,%r9
+ jne 10163224f
+ ret
+ ALIGN_TEXT
+103200:
+ movq (%rdi),%r8
+ movq 8(%rdi),%r9
+ subq (%rsi),%r8
+ subq 8(%rsi),%r9
+ orq %r8,%r9
+ jnz 10320000f
+
+ movq 16(%rdi),%r8
+ movq 24(%rdi),%r9
+ subq 16(%rsi),%r8
+ subq 24(%rsi),%r9
+ orq %r8,%r9
+ jnz 10320016f
+
+ leaq 32(%rdi),%rdi
+ leaq 32(%rsi),%rsi
+ subq $32,%rdx
+ cmpq $32,%rdx
+ jae 103200b
+ cmpb $0,%dl
+ jne 10b
+ ret
+
+/*
+ * Mismatch was found.
+ */
+#ifdef BCMP
+ ALIGN_TEXT
+10320016:
+10320000:
+10081608:
+10163224:
+10163216:
+10163208:
+10040804:
+80:
+1:
+ leal 1(%eax),%eax
+ ret
+#else
+/*
+ * We need to compute the difference between strings.
+ * Start with narrowing the range down (16 -> 8 -> 4 bytes).
+ */
+ ALIGN_TEXT
+10320016:
+ leaq 16(%rdi),%rdi
+ leaq 16(%rsi),%rsi
+10320000:
+ movq (%rdi),%r8
+ movq (%rsi),%r9
+ cmpq %r8,%r9
+ jne 80f
+ leaq 8(%rdi),%rdi
+ leaq 8(%rsi),%rsi
+ jmp 80f
+ ALIGN_TEXT
+10081608:
+10163224:
+ leaq -8(%rdi,%rdx),%rdi
+ leaq -8(%rsi,%rdx),%rsi
+ jmp 80f
+ ALIGN_TEXT
+10163216:
+ leaq -16(%rdi,%rdx),%rdi
+ leaq -16(%rsi,%rdx),%rsi
+ jmp 80f
+ ALIGN_TEXT
+10163208:
+ leaq 8(%rdi),%rdi
+ leaq 8(%rsi),%rsi
+ jmp 80f
+ ALIGN_TEXT
+10040804:
+ leaq -4(%rdi,%rdx),%rdi
+ leaq -4(%rsi,%rdx),%rsi
+ jmp 1f
+
+ ALIGN_TEXT
+80:
+ movl (%rdi),%r8d
+ movl (%rsi),%r9d
+ cmpl %r8d,%r9d
+ jne 1f
+ leaq 4(%rdi),%rdi
+ leaq 4(%rsi),%rsi
+
+/*
+ * We have up to 4 bytes to inspect.
+ */
+1:
+ movzbl (%rdi),%eax
+ movzbl (%rsi),%r8d
+ cmpb %r8b,%al
+ jne 2f
+
+ movzbl 1(%rdi),%eax
+ movzbl 1(%rsi),%r8d
+ cmpb %r8b,%al
+ jne 2f
+
+ movzbl 2(%rdi),%eax
+ movzbl 2(%rsi),%r8d
+ cmpb %r8b,%al
+ jne 2f
+
+ movzbl 3(%rdi),%eax
+ movzbl 3(%rsi),%r8d
+2:
+ subl %r8d,%eax
+ ret
+#endif
+ARCHEND(memcmp, scalar)
+
+ARCHENTRY(memcmp, baseline)
+ cmp $32, %rdx # enough to permit use of the long kernel?
+ ja .Llong
+
+ test %rdx, %rdx # zero bytes buffer?
+ je .L0
+
+ /*
+ * Compare strings of 1--32 bytes. We want to do this by
+ * loading into two xmm registers and then comparing. To avoid
+ * crossing into unmapped pages, we either load 32 bytes from
+ * the start of the buffer or 32 bytes before its end, depending
+ * on whether there is a page boundary between the overread area
+ * or not.
+ */
+
+ /* check for page boundaries overreads */
+ lea 31(%rdi), %eax # end of overread
+ lea 31(%rsi), %r8d
+ lea -1(%rdi, %rdx, 1), %ecx # last character in buffer
+ lea -1(%rsi, %rdx, 1), %r9d
+ xor %ecx, %eax
+ xor %r9d, %r8d
+ test $PAGE_SIZE, %eax # are they on different pages?
+ jz 0f
+
+ /* fix up rdi */
+ movdqu -32(%rdi, %rdx, 1), %xmm0
+ movdqu -16(%rdi, %rdx, 1), %xmm1
+ lea -8(%rsp), %rdi # end of replacement buffer
+ sub %rdx, %rdi # start of replacement buffer
+ movdqa %xmm0, -40(%rsp) # copy to replacement buffer
+ movdqa %xmm1, -24(%rsp)
+
+0: test $PAGE_SIZE, %r8d
+ jz 0f
+
+ /* fix up rsi */
+ movdqu -32(%rsi, %rdx, 1), %xmm0
+ movdqu -16(%rsi, %rdx, 1), %xmm1
+ lea -40(%rsp), %rsi # end of replacement buffer
+ sub %rdx, %rsi # start of replacement buffer
+ movdqa %xmm0, -72(%rsp) # copy to replacement buffer
+ movdqa %xmm1, -56(%rsp)
+
+ /* load data and compare properly */
+0: movdqu 16(%rdi), %xmm1
+ movdqu 16(%rsi), %xmm3
+ movdqu (%rdi), %xmm0
+ movdqu (%rsi), %xmm2
+ mov %edx, %ecx
+ mov $-1, %edx
+ shl %cl, %rdx # ones where the buffer is not
+ pcmpeqb %xmm3, %xmm1
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm1, %ecx
+ pmovmskb %xmm0, %eax
+ shl $16, %ecx
+ or %ecx, %eax # ones where the buffers match
+ or %edx, %eax # including where the buffer is not
+ not %eax # ones where there is a mismatch
+#ifndef BCMP
+ bsf %eax, %edx # location of the first mismatch
+ cmovz %eax, %edx # including if there is no mismatch
+ movzbl (%rdi, %rdx, 1), %eax # mismatching bytes
+ movzbl (%rsi, %rdx, 1), %edx
+ sub %edx, %eax
+#endif
+ ret
+
+ /* empty input */
+.L0: xor %eax, %eax
+ ret
+
+ /* compare 33+ bytes */
+ ALIGN_TEXT
+.Llong: movdqu (%rdi), %xmm0 # load head
+ movdqu (%rsi), %xmm2
+ mov %rdi, %rcx
+ sub %rdi, %rsi # express rsi as distance from rdi
+ and $~0xf, %rdi # align rdi to 16 bytes
+ movdqu 16(%rsi, %rdi, 1), %xmm1
+ pcmpeqb 16(%rdi), %xmm1 # compare second half of this iteration
+ add %rcx, %rdx # pointer to last byte in buffer
+ jc .Loverflow # did this overflow?
+0: pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ xor $0xffff, %eax # any mismatch?
+ jne .Lmismatch_head
+ add $64, %rdi # advance to next iteration
+ jmp 1f # and get going with the loop
+
+ /*
+ * If we got here, a buffer length was passed to memcmp(a, b, len)
+ * such that a + len < a. While this sort of usage is illegal,
+ * it is plausible that a caller tries to do something like
+ * memcmp(a, b, SIZE_MAX) if a and b are known to differ, intending
+ * for memcmp() to stop comparing at the first mismatch. This
+ * behaviour is not guaranteed by any version of ISO/IEC 9899,
+ * but usually works out in practice. Let's try to make this
+ * case work by comparing until the end of the address space.
+ */
+.Loverflow:
+ mov $-1, %rdx # compare until the end of memory
+ jmp 0b
+
+ /* process buffer 32 bytes at a time */
+ ALIGN_TEXT
+0: movdqu -32(%rsi, %rdi, 1), %xmm0
+ movdqu -16(%rsi, %rdi, 1), %xmm1
+ pcmpeqb -32(%rdi), %xmm0
+ pcmpeqb -16(%rdi), %xmm1
+ add $32, %rdi # advance to next iteration
+1: pand %xmm0, %xmm1 # 0xff where both halves matched
+ pmovmskb %xmm1, %eax
+ cmp $0xffff, %eax # all bytes matched?
+ jne .Lmismatch
+ cmp %rdx, %rdi # end of buffer reached?
+ jb 0b
+
+ /* less than 32 bytes left to compare */
+ movdqu -16(%rdx), %xmm1 # load 32 byte tail through end pointer
+ movdqu -16(%rdx, %rsi, 1), %xmm3
+ movdqu -32(%rdx), %xmm0
+ movdqu -32(%rdx, %rsi, 1), %xmm2
+ pcmpeqb %xmm3, %xmm1
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm1, %ecx
+ pmovmskb %xmm0, %eax
+ shl $16, %ecx
+ or %ecx, %eax # ones where the buffers match
+ not %eax # ones where there is a mismatch
+#ifndef BCMP
+ bsf %eax, %ecx # location of the first mismatch
+ cmovz %eax, %ecx # including if there is no mismatch
+ add %rcx, %rdx # pointer to potential mismatch
+ movzbl -32(%rdx), %eax # mismatching bytes
+ movzbl -32(%rdx, %rsi, 1), %edx
+ sub %edx, %eax
+#endif
+ ret
+
+#ifdef BCMP
+.Lmismatch:
+ mov $1, %eax
+.Lmismatch_head:
+ ret
+#else /* memcmp */
+.Lmismatch_head:
+ tzcnt %eax, %eax # location of mismatch
+ add %rax, %rcx # pointer to mismatch
+ movzbl (%rcx), %eax # mismatching bytes
+ movzbl (%rcx, %rsi, 1), %ecx
+ sub %ecx, %eax
+ ret
+
+.Lmismatch:
+ movdqu -48(%rsi, %rdi, 1), %xmm1
+ pcmpeqb -48(%rdi), %xmm1 # reconstruct xmm1 before PAND
+ pmovmskb %xmm0, %eax # mismatches in first 16 bytes
+ pmovmskb %xmm1, %edx # mismatches in second 16 bytes
+ shl $16, %edx
+ or %edx, %eax # mismatches in both
+ not %eax # matches in both
+ tzcnt %eax, %eax # location of mismatch
+ add %rax, %rdi # pointer to mismatch
+ movzbl -64(%rdi), %eax # mismatching bytes
+ movzbl -64(%rdi, %rsi, 1), %ecx
+ sub %ecx, %eax
+ ret
+#endif
+ARCHEND(memcmp, baseline)
+
+ .section .note.GNU-stack,"",%progbits