aboutsummaryrefslogtreecommitdiff
path: root/lib/libc/string
diff options
context:
space:
mode:
authorMateusz Guzik <mjg@FreeBSD.org>2021-02-03 19:38:10 +0000
committerMateusz Guzik <mjg@FreeBSD.org>2021-02-03 19:38:10 +0000
commit33f0540b13d949c7cc226a79927ddc2062ff98bf (patch)
treee40ab3b45d3a93e0101a2feb40ae680ec1bbf800 /lib/libc/string
parentae71b794cbed19e5e25effc3438720ad452ab87c (diff)
Diffstat (limited to 'lib/libc/string')
-rw-r--r--lib/libc/string/strlen.c82
1 files changed, 54 insertions, 28 deletions
diff --git a/lib/libc/string/strlen.c b/lib/libc/string/strlen.c
index 0bdc81d7bb9a..a862ffc245ca 100644
--- a/lib/libc/string/strlen.c
+++ b/lib/libc/string/strlen.c
@@ -35,6 +35,10 @@ __FBSDID("$FreeBSD$");
/*
* Portable strlen() for 32-bit and 64-bit systems.
*
+ * Rationale: it is generally much more efficient to do word length
+ * operations and avoid branches on modern computer systems, as
+ * compared to byte-length operations with a lot of branches.
+ *
* The expression:
*
* ((x - 0x01....01) & ~x & 0x80....80)
@@ -42,13 +46,18 @@ __FBSDID("$FreeBSD$");
* would evaluate to a non-zero value iff any of the bytes in the
* original word is zero.
*
+ * On multi-issue processors, we can divide the above expression into:
+ * a) (x - 0x01....01)
+ * b) (~x & 0x80....80)
+ * c) a & b
+ *
+ * Where, a) and b) can be partially computed in parallel.
+ *
* The algorithm above is found on "Hacker's Delight" by
* Henry S. Warren, Jr.
- *
- * Note: this leaves performance on the table and each architecture
- * would be best served with a tailor made routine instead.
*/
+/* Magic numbers for the algorithm */
#if LONG_BIT == 32
static const unsigned long mask01 = 0x01010101;
static const unsigned long mask80 = 0x80808080;
@@ -61,45 +70,62 @@ static const unsigned long mask80 = 0x8080808080808080;
#define LONGPTR_MASK (sizeof(long) - 1)
-#if BYTE_ORDER == LITTLE_ENDIAN
-#define FINDZERO __builtin_ctzl
-#else
-#define FINDZERO __builtin_clzl
-#endif
+/*
+ * Helper macro to return string length if we caught the zero
+ * byte.
+ */
+#define testbyte(x) \
+ do { \
+ if (p[x] == '\0') \
+ return (p - str + x); \
+ } while (0)
size_t
strlen(const char *str)
{
+ const char *p;
const unsigned long *lp;
- unsigned long mask;
long va, vb;
- long val;
- lp = (unsigned long *) (uintptr_t) str;
- if ((uintptr_t)lp & LONGPTR_MASK) {
- lp = (__typeof(lp)) ((uintptr_t)lp & ~LONGPTR_MASK);
-#if BYTE_ORDER == LITTLE_ENDIAN
- mask = ~(~0UL << (((uintptr_t)str & LONGPTR_MASK) << 3));
-#else
- mask = ~(~0UL >> (((uintptr_t)str & LONGPTR_MASK) << 3));
-#endif
- val = *lp | mask;
- va = (val - mask01);
- vb = ((~val) & mask80);
- if (va & vb) {
- return ((const char *)lp - str + (FINDZERO(va & vb) >> 3));
- }
- lp++;
- }
+ /*
+ * Before trying the hard (unaligned byte-by-byte access) way
+ * to figure out whether there is a nul character, try to see
+ * if there is a nul character is within this accessible word
+ * first.
+ *
+ * p and (p & ~LONGPTR_MASK) must be equally accessible since
+ * they always fall in the same memory page, as long as page
+ * boundaries is integral multiple of word size.
+ */
+ lp = (const unsigned long *)((uintptr_t)str & ~LONGPTR_MASK);
+ va = (*lp - mask01);
+ vb = ((~*lp) & mask80);
+ lp++;
+ if (va & vb)
+ /* Check if we have \0 in the first part */
+ for (p = str; p < (const char *)lp; p++)
+ if (*p == '\0')
+ return (p - str);
+ /* Scan the rest of the string using word sized operation */
for (; ; lp++) {
va = (*lp - mask01);
vb = ((~*lp) & mask80);
if (va & vb) {
- return ((const char *)lp - str + (FINDZERO(va & vb) >> 3));
+ p = (const char *)(lp);
+ testbyte(0);
+ testbyte(1);
+ testbyte(2);
+ testbyte(3);
+#if (LONG_BIT >= 64)
+ testbyte(4);
+ testbyte(5);
+ testbyte(6);
+ testbyte(7);
+#endif
}
}
- __builtin_unreachable();
+ /* NOTREACHED */
return (0);
}