diff options
Diffstat (limited to 'string/aarch64/experimental/strchr-sve.S')
-rw-r--r-- | string/aarch64/experimental/strchr-sve.S | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/string/aarch64/experimental/strchr-sve.S b/string/aarch64/experimental/strchr-sve.S new file mode 100644 index 000000000000..7d74ae9ff232 --- /dev/null +++ b/string/aarch64/experimental/strchr-sve.S @@ -0,0 +1,67 @@ +/* + * strchr/strchrnul - find a character in a string + * + * Copyright (c) 2018-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "asmdefs.h" + +.arch armv8-a+sve + +/* Assumptions: + * + * ARMv8-a, AArch64 + * SVE Available. + */ + +/* To build as strchrnul, define BUILD_STRCHRNUL before compiling this file. */ +#ifdef BUILD_STRCHRNUL +#define FUNC __strchrnul_aarch64_sve +#else +#define FUNC __strchr_aarch64_sve +#endif + +ENTRY (FUNC) + dup z1.b, w1 /* replicate byte across vector */ + setffr /* initialize FFR */ + ptrue p1.b /* all ones; loop invariant */ + + .p2align 4 + /* Read a vector's worth of bytes, stopping on first fault. */ +0: ldff1b z0.b, p1/z, [x0, xzr] + rdffrs p0.b, p1/z + b.nlast 2f + + /* First fault did not fail: the whole vector is valid. + Avoid depending on the contents of FFR beyond the branch. */ + incb x0 /* speculate increment */ + cmpeq p2.b, p1/z, z0.b, z1.b /* search for c */ + cmpeq p3.b, p1/z, z0.b, 0 /* search for 0 */ + orrs p4.b, p1/z, p2.b, p3.b /* c | 0 */ + b.none 0b + decb x0 /* undo speculate */ + + /* Found C or 0. */ +1: brka p4.b, p1/z, p4.b /* find first such */ + sub x0, x0, 1 /* adjust pointer for that byte */ + incp x0, p4.b +#ifndef BUILD_STRCHRNUL + ptest p4, p2.b /* was first in c? */ + csel x0, xzr, x0, none /* if there was no c, return null */ +#endif + ret + + /* First fault failed: only some of the vector is valid. + Perform the comparision only on the valid bytes. */ +2: cmpeq p2.b, p0/z, z0.b, z1.b /* search for c */ + cmpeq p3.b, p0/z, z0.b, 0 /* search for 0 */ + orrs p4.b, p0/z, p2.b, p3.b /* c | 0 */ + b.any 1b + + /* No C or 0 found. Re-init FFR, increment, and loop. */ + setffr + incp x0, p0.b + b 0b + +END (FUNC) |