Revert "Reimplement strlen" - src

diff options


context:
space:
mode:

author	Mateusz Guzik <mjg@FreeBSD.org>	2021-02-03 19:38:10 +0000
committer	Mateusz Guzik <mjg@FreeBSD.org>	2021-02-03 19:38:10 +0000
commit	33f0540b13d949c7cc226a79927ddc2062ff98bf (patch)
tree	e40ab3b45d3a93e0101a2feb40ae680ec1bbf800 /lib/libc/string
parent	ae71b794cbed19e5e25effc3438720ad452ab87c (diff)

Diffstat (limited to 'lib/libc/string')

-rw-r--r--

lib/libc/string/strlen.c

1 files changed, 54 insertions, 28 deletions

diff --git a/lib/libc/string/strlen.c b/lib/libc/string/strlen.c
index 0bdc81d7bb9a..a862ffc245ca 100644
--- a/lib/libc/string/strlen.c
+++ b/lib/libc/string/strlen.c

@@ -35,6 +35,10 @@ __FBSDID("$FreeBSD$");

* Portable strlen() for 32-bit and 64-bit systems.

+ * Rationale: it is generally much more efficient to do word length

+ * operations and avoid branches on modern computer systems, as

+ * compared to byte-length operations with a lot of branches.

+ *

* The expression:

* ((x - 0x01....01) & ~x & 0x80....80)

@@ -42,13 +46,18 @@ __FBSDID("$FreeBSD$");

* would evaluate to a non-zero value iff any of the bytes in the

* original word is zero.

+ * On multi-issue processors, we can divide the above expression into:

+ * a) (x - 0x01....01)

+ * b) (~x & 0x80....80)

+ * c) a & b

+ *

+ * Where, a) and b) can be partially computed in parallel.

+ *

* The algorithm above is found on "Hacker's Delight" by

* Henry S. Warren, Jr.

- *

- * Note: this leaves performance on the table and each architecture

- * would be best served with a tailor made routine instead.

+/* Magic numbers for the algorithm */

#if LONG_BIT == 32

static const unsigned long mask01 = 0x01010101;

static const unsigned long mask80 = 0x80808080;

@@ -61,45 +70,62 @@ static const unsigned long mask80 = 0x8080808080808080;

#define LONGPTR_MASK (sizeof(long) - 1)

-#if BYTE_ORDER == LITTLE_ENDIAN

-#define FINDZERO __builtin_ctzl

-#else

-#define FINDZERO __builtin_clzl

-#endif

+/*

+ * Helper macro to return string length if we caught the zero

+ * byte.

+ */

+#define testbyte(x) \

+ do { \

+ if (p[x] == '\0') \

+ return (p - str + x); \

+ } while (0)

size_t

strlen(const char *str)

{

+ const char *p;

const unsigned long *lp;

- unsigned long mask;

long va, vb;

- long val;

- lp = (unsigned long *) (uintptr_t) str;

- if ((uintptr_t)lp & LONGPTR_MASK) {

- lp = (__typeof(lp)) ((uintptr_t)lp & ~LONGPTR_MASK);

-#if BYTE_ORDER == LITTLE_ENDIAN

- mask = ~(~0UL << (((uintptr_t)str & LONGPTR_MASK) << 3));

-#else

- mask = ~(~0UL >> (((uintptr_t)str & LONGPTR_MASK) << 3));

-#endif

- val = *lp | mask;

- va = (val - mask01);

- vb = ((~val) & mask80);

- if (va & vb) {

- return ((const char *)lp - str + (FINDZERO(va & vb) >> 3));

- }

- lp++;

- }

+ /*

+ * Before trying the hard (unaligned byte-by-byte access) way

+ * to figure out whether there is a nul character, try to see

+ * if there is a nul character is within this accessible word

+ * first.

+ *

+ * p and (p & ~LONGPTR_MASK) must be equally accessible since

+ * they always fall in the same memory page, as long as page

+ * boundaries is integral multiple of word size.

+ */

+ lp = (const unsigned long *)((uintptr_t)str & ~LONGPTR_MASK);

+ va = (*lp - mask01);

+ vb = ((~*lp) & mask80);

+ lp++;

+ if (va & vb)

+ /* Check if we have \0 in the first part */

+ for (p = str; p < (const char *)lp; p++)

+ if (*p == '\0')

+ return (p - str);

+ /* Scan the rest of the string using word sized operation */

for (; ; lp++) {

va = (*lp - mask01);

vb = ((~*lp) & mask80);

if (va & vb) {

- return ((const char *)lp - str + (FINDZERO(va & vb) >> 3));

+ p = (const char *)(lp);

+ testbyte(0);

+ testbyte(1);

+ testbyte(2);

+ testbyte(3);

+#if (LONG_BIT >= 64)

+ testbyte(4);

+ testbyte(5);

+ testbyte(6);

+ testbyte(7);

+#endif

}

- __builtin_unreachable();

+ /* NOTREACHED */

return (0);

}