diff options
Diffstat (limited to 'string/aarch64/experimental/memcmp-sve.S')
-rw-r--r-- | string/aarch64/experimental/memcmp-sve.S | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/string/aarch64/experimental/memcmp-sve.S b/string/aarch64/experimental/memcmp-sve.S new file mode 100644 index 000000000000..ad3534836d04 --- /dev/null +++ b/string/aarch64/experimental/memcmp-sve.S @@ -0,0 +1,46 @@ +/* + * memcmp - compare memory + * + * Copyright (c) 2018-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "asmdefs.h" + +.arch armv8-a+sve + +/* Assumptions: + * + * ARMv8-a, AArch64 + * SVE Available. + */ + +ENTRY (__memcmp_aarch64_sve) + mov x3, 0 /* initialize off */ + +0: whilelo p0.b, x3, x2 /* while off < max */ + b.none 9f + + ld1b z0.b, p0/z, [x0, x3] /* read vectors bounded by max. */ + ld1b z1.b, p0/z, [x1, x3] + + /* Increment for a whole vector, even if we've only read a partial. + This is significantly cheaper than INCP, and since OFF is not + used after the loop it is ok to increment OFF past MAX. */ + incb x3 + + cmpne p1.b, p0/z, z0.b, z1.b /* while no inequalities */ + b.none 0b + + /* Found inequality. */ +1: brkb p1.b, p0/z, p1.b /* find first such */ + lasta w0, p1, z0.b /* extract each byte */ + lasta w1, p1, z1.b + sub x0, x0, x1 /* return comparison */ + ret + + /* Found end-of-count. */ +9: mov x0, 0 /* return equality */ + ret + +END (__memcmp_aarch64_sve) |