aboutsummaryrefslogtreecommitdiff
path: root/string/aarch64/experimental/strchr-sve.S
diff options
context:
space:
mode:
Diffstat (limited to 'string/aarch64/experimental/strchr-sve.S')
-rw-r--r--string/aarch64/experimental/strchr-sve.S67
1 files changed, 67 insertions, 0 deletions
diff --git a/string/aarch64/experimental/strchr-sve.S b/string/aarch64/experimental/strchr-sve.S
new file mode 100644
index 000000000000..7d74ae9ff232
--- /dev/null
+++ b/string/aarch64/experimental/strchr-sve.S
@@ -0,0 +1,67 @@
+/*
+ * strchr/strchrnul - find a character in a string
+ *
+ * Copyright (c) 2018-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "asmdefs.h"
+
+.arch armv8-a+sve
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * SVE Available.
+ */
+
+/* To build as strchrnul, define BUILD_STRCHRNUL before compiling this file. */
+#ifdef BUILD_STRCHRNUL
+#define FUNC __strchrnul_aarch64_sve
+#else
+#define FUNC __strchr_aarch64_sve
+#endif
+
+ENTRY (FUNC)
+ dup z1.b, w1 /* replicate byte across vector */
+ setffr /* initialize FFR */
+ ptrue p1.b /* all ones; loop invariant */
+
+ .p2align 4
+ /* Read a vector's worth of bytes, stopping on first fault. */
+0: ldff1b z0.b, p1/z, [x0, xzr]
+ rdffrs p0.b, p1/z
+ b.nlast 2f
+
+ /* First fault did not fail: the whole vector is valid.
+ Avoid depending on the contents of FFR beyond the branch. */
+ incb x0 /* speculate increment */
+ cmpeq p2.b, p1/z, z0.b, z1.b /* search for c */
+ cmpeq p3.b, p1/z, z0.b, 0 /* search for 0 */
+ orrs p4.b, p1/z, p2.b, p3.b /* c | 0 */
+ b.none 0b
+ decb x0 /* undo speculate */
+
+ /* Found C or 0. */
+1: brka p4.b, p1/z, p4.b /* find first such */
+ sub x0, x0, 1 /* adjust pointer for that byte */
+ incp x0, p4.b
+#ifndef BUILD_STRCHRNUL
+ ptest p4, p2.b /* was first in c? */
+ csel x0, xzr, x0, none /* if there was no c, return null */
+#endif
+ ret
+
+ /* First fault failed: only some of the vector is valid.
+ Perform the comparision only on the valid bytes. */
+2: cmpeq p2.b, p0/z, z0.b, z1.b /* search for c */
+ cmpeq p3.b, p0/z, z0.b, 0 /* search for 0 */
+ orrs p4.b, p0/z, p2.b, p3.b /* c | 0 */
+ b.any 1b
+
+ /* No C or 0 found. Re-init FFR, increment, and loop. */
+ setffr
+ incp x0, p0.b
+ b 0b
+
+END (FUNC)