diff options
Diffstat (limited to 'lib/libc/riscv/string/strnlen.S')
| -rw-r--r-- | lib/libc/riscv/string/strnlen.S | 143 | 
1 files changed, 143 insertions, 0 deletions
diff --git a/lib/libc/riscv/string/strnlen.S b/lib/libc/riscv/string/strnlen.S new file mode 100644 index 000000000000..c0fd959548ff --- /dev/null +++ b/lib/libc/riscv/string/strnlen.S @@ -0,0 +1,143 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Strahinja Stanisic <strajabot@FreeBSD.org> + */ + +#include <machine/asm.h> + +/* + * a0 - const char *s + * a1 - size_t maxlen; + */ +ENTRY(strnlen) +	/* +	 * a0 - const char *s; +	 * a1 - size_t maxlen; +	 * a2 - uint64_t *ptr; +	 * a3 - char iter[8]; +	 * a4 - uint64_t *end_align; +	 * a5 - uint64_t *end_unroll; +	 */ + +	beqz a1, .Lnot_found + +	/* ptr = s & ~0b111 */ +	/* t0 = 0x0101010101010101 */ +	/* t1 = 0x8080808080808080 */ +	/* end_align = (s + maxlen + 7) & ~0b111 */ +	/* mask_start = t0 >> ((-s.value) << 3) */ +	add a4, a0, a1 +	li t0, 0x01010101 +	addi a4, a4, 7 +	slli t1, t0, 32 +	neg t2, a0 +	andi a4, a4, ~0b111 +	or t0, t0, t1 +	slli t2, t2, 3 +	andi a2, a0, ~0b111 +	slli t1, t0, 7 +	srl t2, t0, t2 + +	/* if pointer is aligned skip to loop */ +	beq a0, a2, .Lskip_start + +	/* iter = *ptr */ +	ld a3, (a2) + +	/* iter = iter | mask_start */ +	or a3, a3, t2 + +	/* has_zero */ +	not t2, a3 +	sub a3, a3, t0 +	and t2, t2, t1 +	and a3, a3, t2 + +	addi a2, a2, 8 +	bnez a3, .Lfind_zero + +.Lskip_start: +	/* end_unroll */ +	sub t2, a4, a2 +	andi t2, t2, ~0b1111 +	add a5, a2, t2 + +	/* while (ptr != end_unroll) */ +	beq a2, a5, .Lskip_loop +.Lloop: +	ld a3, (a2) +	ld a6, 8(a2) + +	/* has_zero */ +	not t2, a3 +	not t3, a6 +	sub a3, a3, t0 +	sub a6, a6, t0 +	and t2, t2, t1 +	and t3, t3, t1 +	and a3, a3, t2 +	and a6, a6, t3 + +	addi a2, a2, 8 +	bnez a3, .Lfind_zero + +	mv a3, a6 + +	addi a2, a2, 8 +	bnez a3, .Lfind_zero + +	bne a2, a5, .Lloop + +.Lskip_loop: + +	beq a2, a4, .Lnot_found + +	ld a3, (a2) + +	/* has_zero */ +	not t2, a3 +	sub a3, a3, t0 +	and t2, t2, t1 +	and a3, a3, t2 + + +	addi a2, a2, 8 +	beqz a3, .Lnot_found + +.Lfind_zero: + +	/* move ptr back */ +	addi a2, a2, -8 + +	/* isolate lowest set bit */ +	neg t0, a3 +	and a3, a3, t0 + +	li t0, 0x0001020304050607 +	srli a3, a3, 7 + +	/* lowest set bit is 2^(8*k) +	 * multiplying by it shifts the idx array in t0 by k bytes to the left */ +	mul	a3, a3, t0 + +	/* highest byte contains idx of first zero */ +	srli a3, a3, 56 + +	/* zero_idx */ +	sub a2, a2, a0 +	add a2, a2, a3 + +	/* min(zero_idx, maxlen) */ +	sub a2, a2, a1 +	srai t1, a2, 63 +	and a2, a2, t1 +	add a0, a1, a2 + +	ret + +.Lnot_found: +	mv a0, a1 +	ret + +END(strnlen)  | 
