aboutsummaryrefslogtreecommitdiff
path: root/string/bench
diff options
context:
space:
mode:
authorAndrew Turner <andrew@FreeBSD.org>2025-01-10 10:34:52 +0000
committerAndrew Turner <andrew@FreeBSD.org>2025-01-10 10:39:34 +0000
commit9d1de25930735261c16ed874a933b4c1f1d9041e (patch)
treeb0cac1c933cc1ecb885c7e757b89ffbf13f1f012 /string/bench
parentedc5c0de794f521eb620d2b6cbaee2434442a8f3 (diff)
Diffstat (limited to 'string/bench')
-rw-r--r--string/bench/memcpy.c239
-rw-r--r--string/bench/memset.c141
-rw-r--r--string/bench/strlen.c206
3 files changed, 231 insertions, 355 deletions
diff --git a/string/bench/memcpy.c b/string/bench/memcpy.c
index b628f9b60d96..583fa505db75 100644
--- a/string/bench/memcpy.c
+++ b/string/bench/memcpy.c
@@ -20,35 +20,18 @@
#define MIN_SIZE 32768
#define MAX_SIZE (1024 * 1024)
-static uint8_t a[MAX_SIZE + 4096 + 64] __attribute__((__aligned__(64)));
-static uint8_t b[MAX_SIZE + 4096 + 64] __attribute__((__aligned__(64)));
-
-#define F(x) {#x, x},
-
-static const struct fun
-{
- const char *name;
- void *(*fun)(void *, const void *, size_t);
-} funtab[] =
-{
-#if __aarch64__
- F(__memcpy_aarch64)
-# if __ARM_NEON
- F(__memcpy_aarch64_simd)
-# endif
-# if __ARM_FEATURE_SVE
- F(__memcpy_aarch64_sve)
-# endif
-# if WANT_MOPS
- F(__memcpy_aarch64_mops)
-# endif
-#elif __arm__
- F(__memcpy_arm)
-#endif
- F(memcpy)
-#undef F
- {0, 0}
-};
+static uint8_t a[MAX_SIZE + 4096 + 64] __attribute__((__aligned__(4096)));
+static uint8_t b[MAX_SIZE + 4096 + 64] __attribute__((__aligned__(4096)));
+
+#define DOTEST(STR,TESTFN) \
+ printf (STR); \
+ RUN (TESTFN, memcpy); \
+ RUNA64 (TESTFN, __memcpy_aarch64); \
+ RUNA64 (TESTFN, __memcpy_aarch64_simd); \
+ RUNSVE (TESTFN, __memcpy_aarch64_sve); \
+ RUNMOPS (TESTFN, __memcpy_aarch64_mops); \
+ RUNA32 (TESTFN, __memcpy_arm); \
+ printf ("\n");
typedef struct { uint16_t size; uint16_t freq; } freq_data_t;
typedef struct { uint8_t align; uint16_t freq; } align_data_t;
@@ -160,183 +143,125 @@ init_copies (size_t max_size)
return total;
}
-int main (void)
+static void inline __attribute ((always_inline))
+memcpy_random (const char *name, void *(*fn)(void *, const void *, size_t))
{
- init_copy_distribution ();
-
- memset (a, 1, sizeof (a));
- memset (b, 2, sizeof (b));
-
- printf("Random memcpy (bytes/ns):\n");
- for (int f = 0; funtab[f].name != 0; f++)
- {
- size_t total = 0;
- uint64_t tsum = 0;
- printf ("%22s ", funtab[f].name);
- rand32 (0x12345678);
-
- for (int size = MIN_SIZE; size <= MAX_SIZE; size *= 2)
- {
- size_t copy_size = init_copies (size) * ITERS;
-
- for (int c = 0; c < NUM_TESTS; c++)
- funtab[f].fun (b + test_arr[c].dst, a + test_arr[c].src,
- test_arr[c].len);
-
- uint64_t t = clock_get_ns ();
- for (int i = 0; i < ITERS; i++)
- for (int c = 0; c < NUM_TESTS; c++)
- funtab[f].fun (b + test_arr[c].dst, a + test_arr[c].src,
- test_arr[c].len);
- t = clock_get_ns () - t;
- total += copy_size;
- tsum += t;
- printf ("%dK: %.2f ", size / 1024, (double)copy_size / t);
- }
- printf( "avg %.2f\n", (double)total / tsum);
- }
-
- size_t total = 0;
- uint64_t tsum = 0;
- printf ("%22s ", "memcpy_call");
- rand32 (0x12345678);
-
+ printf ("%22s ", name);
+ uint64_t total = 0, tsum = 0;
for (int size = MIN_SIZE; size <= MAX_SIZE; size *= 2)
{
- size_t copy_size = init_copies (size) * ITERS;
+ uint64_t copy_size = init_copies (size) * ITERS;
for (int c = 0; c < NUM_TESTS; c++)
- memcpy (b + test_arr[c].dst, a + test_arr[c].src, test_arr[c].len);
+ fn (b + test_arr[c].dst, a + test_arr[c].src, test_arr[c].len);
uint64_t t = clock_get_ns ();
for (int i = 0; i < ITERS; i++)
for (int c = 0; c < NUM_TESTS; c++)
- memcpy (b + test_arr[c].dst, a + test_arr[c].src, test_arr[c].len);
+ fn (b + test_arr[c].dst, a + test_arr[c].src, test_arr[c].len);
t = clock_get_ns () - t;
total += copy_size;
tsum += t;
- printf ("%dK: %.2f ", size / 1024, (double)copy_size / t);
+ printf ("%dK: %5.2f ", size / 1024, (double)copy_size / t);
}
- printf( "avg %.2f\n", (double)total / tsum);
-
+ printf( "avg %5.2f\n", (double)total / tsum);
+}
- printf ("\nAligned medium memcpy (bytes/ns):\n");
- for (int f = 0; funtab[f].name != 0; f++)
- {
- printf ("%22s ", funtab[f].name);
-
- for (int size = 8; size <= 512; size *= 2)
- {
- uint64_t t = clock_get_ns ();
- for (int i = 0; i < ITERS2; i++)
- funtab[f].fun (b, a, size);
- t = clock_get_ns () - t;
- printf ("%dB: %.2f ", size, (double)size * ITERS2 / t);
- }
- printf ("\n");
- }
+static void inline __attribute ((always_inline))
+memcpy_medium_aligned (const char *name, void *(*fn)(void *, const void *, size_t))
+{
+ printf ("%22s ", name);
- printf ("%22s ", "memcpy_call");
for (int size = 8; size <= 512; size *= 2)
{
uint64_t t = clock_get_ns ();
for (int i = 0; i < ITERS2; i++)
- memcpy (b, a, size);
+ fn (b, a, size);
t = clock_get_ns () - t;
- printf ("%dB: %.2f ", size, (double)size * ITERS2 / t);
+ printf ("%dB: %5.2f ", size, (double)size * ITERS2 / t);
}
printf ("\n");
+}
+static void inline __attribute ((always_inline))
+memcpy_medium_unaligned (const char *name, void *(*fn)(void *, const void *, size_t))
+{
+ printf ("%22s ", name);
- printf ("\nUnaligned medium memcpy (bytes/ns):\n");
- for (int f = 0; funtab[f].name != 0; f++)
- {
- printf ("%22s ", funtab[f].name);
-
- for (int size = 8; size <= 512; size *= 2)
- {
- uint64_t t = clock_get_ns ();
- for (int i = 0; i < ITERS2; i++)
- funtab[f].fun (b + 3, a + 1, size);
- t = clock_get_ns () - t;
- printf ("%dB: %.2f ", size, (double)size * ITERS2 / t);
- }
- printf ("\n");
- }
-
- printf ("%22s ", "memcpy_call");
for (int size = 8; size <= 512; size *= 2)
{
uint64_t t = clock_get_ns ();
for (int i = 0; i < ITERS2; i++)
- memcpy (b + 3, a + 1, size);
+ fn (b + 3, a + 1, size);
t = clock_get_ns () - t;
- printf ("%dB: %.2f ", size, (double)size * ITERS2 / t);
+ printf ("%dB: %5.2f ", size, (double)size * ITERS2 / t);
}
printf ("\n");
+}
+static void inline __attribute ((always_inline))
+memcpy_large (const char *name, void *(*fn)(void *, const void *, size_t))
+{
+ printf ("%22s ", name);
- printf ("\nLarge memcpy (bytes/ns):\n");
- for (int f = 0; funtab[f].name != 0; f++)
- {
- printf ("%22s ", funtab[f].name);
-
- for (int size = 1024; size <= 65536; size *= 2)
- {
- uint64_t t = clock_get_ns ();
- for (int i = 0; i < ITERS3; i++)
- funtab[f].fun (b, a, size);
- t = clock_get_ns () - t;
- printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t);
- }
- printf ("\n");
- }
-
- printf ("%22s ", "memcpy_call");
for (int size = 1024; size <= 65536; size *= 2)
{
uint64_t t = clock_get_ns ();
for (int i = 0; i < ITERS3; i++)
- memcpy (b, a, size);
+ fn (b, a, size);
t = clock_get_ns () - t;
- printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t);
+ printf ("%dK: %5.2f ", size / 1024, (double)size * ITERS3 / t);
}
printf ("\n");
+}
+static void inline __attribute ((always_inline))
+memmove_forward_unaligned (const char *name, void *(*fn)(void *, const void *, size_t))
+{
+ printf ("%22s ", name);
- printf ("\nUnaligned forwards memmove (bytes/ns):\n");
- for (int f = 0; funtab[f].name != 0; f++)
+ for (int size = 1024; size <= 65536; size *= 2)
{
- printf ("%22s ", funtab[f].name);
-
- for (int size = 1024; size <= 65536; size *= 2)
- {
- uint64_t t = clock_get_ns ();
- for (int i = 0; i < ITERS3; i++)
- funtab[f].fun (a, a + 256 + (i & 31), size);
- t = clock_get_ns () - t;
- printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t);
- }
- printf ("\n");
+ uint64_t t = clock_get_ns ();
+ for (int i = 0; i < ITERS3; i++)
+ fn (a, a + 256 + (i & 31), size);
+ t = clock_get_ns () - t;
+ printf ("%dK: %5.2f ", size / 1024, (double)size * ITERS3 / t);
}
+ printf ("\n");
+}
+
+static void inline __attribute ((always_inline))
+memmove_backward_unaligned (const char *name, void *(*fn)(void *, const void *, size_t))
+{
+ printf ("%22s ", name);
- printf ("\nUnaligned backwards memmove (bytes/ns):\n");
- for (int f = 0; funtab[f].name != 0; f++)
+ for (int size = 1024; size <= 65536; size *= 2)
{
- printf ("%22s ", funtab[f].name);
-
- for (int size = 1024; size <= 65536; size *= 2)
- {
- uint64_t t = clock_get_ns ();
- for (int i = 0; i < ITERS3; i++)
- funtab[f].fun (a + 256 + (i & 31), a, size);
- t = clock_get_ns () - t;
- printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t);
- }
- printf ("\n");
+ uint64_t t = clock_get_ns ();
+ for (int i = 0; i < ITERS3; i++)
+ fn (a + 256 + (i & 31), a, size);
+ t = clock_get_ns () - t;
+ printf ("%dK: %5.2f ", size / 1024, (double)size * ITERS3 / t);
}
+
printf ("\n");
+}
+
+int main (void)
+{
+ init_copy_distribution ();
+
+ memset (a, 1, sizeof (a));
+ memset (b, 2, sizeof (b));
+
+ DOTEST ("Random memcpy (bytes/ns):\n", memcpy_random);
+ DOTEST ("Medium memcpy aligned (bytes/ns):\n", memcpy_medium_aligned);
+ DOTEST ("Medium memcpy unaligned (bytes/ns):\n", memcpy_medium_unaligned);
+ DOTEST ("Large memcpy (bytes/ns):\n", memcpy_large);
+ DOTEST ("Forwards memmove unaligned (bytes/ns):\n", memmove_forward_unaligned);
+ DOTEST ("Backwards memmove unaligned (bytes/ns):\n", memmove_backward_unaligned);
return 0;
}
diff --git a/string/bench/memset.c b/string/bench/memset.c
index 990e23ba9a36..07474e469146 100644
--- a/string/bench/memset.c
+++ b/string/bench/memset.c
@@ -20,25 +20,16 @@
#define MIN_SIZE 32768
#define MAX_SIZE (1024 * 1024)
-static uint8_t a[MAX_SIZE + 4096] __attribute__((__aligned__(64)));
+static uint8_t a[MAX_SIZE + 4096] __attribute__((__aligned__(4096)));
-#define F(x) {#x, x},
-
-static const struct fun
-{
- const char *name;
- void *(*fun)(void *, int, size_t);
-} funtab[] =
-{
-#if __aarch64__
- F(__memset_aarch64)
-#elif __arm__
- F(__memset_arm)
-#endif
- F(memset)
-#undef F
- {0, 0}
-};
+#define DOTEST(STR,TESTFN) \
+ printf (STR); \
+ RUN (TESTFN, memset); \
+ RUNA64 (TESTFN, __memset_aarch64); \
+ RUNSVE (TESTFN, __memset_aarch64_sve); \
+ RUNMOPS (TESTFN, __memset_mops); \
+ RUNA32 (TESTFN, __memset_arm); \
+ printf ("\n");
typedef struct { uint32_t offset : 20, len : 12; } memset_test_t;
static memset_test_t test_arr[NUM_TESTS];
@@ -127,117 +118,73 @@ init_memset (size_t max_size)
return total;
}
-
-int main (void)
+static void inline __attribute ((always_inline))
+memset_random (const char *name, void *(*set)(void *, int, size_t))
{
- init_memset_distribution ();
-
- memset (a, 1, sizeof (a));
-
- printf("Random memset (bytes/ns):\n");
- for (int f = 0; funtab[f].name != 0; f++)
- {
- size_t total_size = 0;
- uint64_t tsum = 0;
- printf ("%22s ", funtab[f].name);
- rand32 (0x12345678);
-
- for (int size = MIN_SIZE; size <= MAX_SIZE; size *= 2)
- {
- size_t memset_size = init_memset (size) * ITERS;
-
- for (int c = 0; c < NUM_TESTS; c++)
- funtab[f].fun (a + test_arr[c].offset, 0, test_arr[c].len);
-
- uint64_t t = clock_get_ns ();
- for (int i = 0; i < ITERS; i++)
- for (int c = 0; c < NUM_TESTS; c++)
- funtab[f].fun (a + test_arr[c].offset, 0, test_arr[c].len);
- t = clock_get_ns () - t;
- total_size += memset_size;
- tsum += t;
- printf ("%dK: %.2f ", size / 1024, (double)memset_size / t);
- }
- printf( "avg %.2f\n", (double)total_size / tsum);
- }
-
- size_t total_size = 0;
+ uint64_t total_size = 0;
uint64_t tsum = 0;
- printf ("%22s ", "memset_call");
+ printf ("%22s ", name);
rand32 (0x12345678);
for (int size = MIN_SIZE; size <= MAX_SIZE; size *= 2)
{
- size_t memset_size = init_memset (size) * ITERS;
+ uint64_t memset_size = init_memset (size) * ITERS;
for (int c = 0; c < NUM_TESTS; c++)
- memset (a + test_arr[c].offset, 0, test_arr[c].len);
+ set (a + test_arr[c].offset, 0, test_arr[c].len);
uint64_t t = clock_get_ns ();
for (int i = 0; i < ITERS; i++)
for (int c = 0; c < NUM_TESTS; c++)
- memset (a + test_arr[c].offset, 0, test_arr[c].len);
+ set (a + test_arr[c].offset, 0, test_arr[c].len);
t = clock_get_ns () - t;
total_size += memset_size;
tsum += t;
- printf ("%dK: %.2f ", size / 1024, (double)memset_size / t);
+ printf ("%dK: %5.2f ", size / 1024, (double)memset_size / t);
}
- printf( "avg %.2f\n", (double)total_size / tsum);
-
+ printf( "avg %5.2f\n", (double)total_size / tsum);
+}
- printf ("\nMedium memset (bytes/ns):\n");
- for (int f = 0; funtab[f].name != 0; f++)
- {
- printf ("%22s ", funtab[f].name);
-
- for (int size = 8; size <= 512; size *= 2)
- {
- uint64_t t = clock_get_ns ();
- for (int i = 0; i < ITERS2; i++)
- funtab[f].fun (a, 0, size);
- t = clock_get_ns () - t;
- printf ("%dB: %.2f ", size, (double)size * ITERS2 / t);
- }
- printf ("\n");
- }
+static void inline __attribute ((always_inline))
+memset_medium (const char *name, void *(*set)(void *, int, size_t))
+{
+ printf ("%22s ", name);
- printf ("%22s ", "memset_call");
for (int size = 8; size <= 512; size *= 2)
{
uint64_t t = clock_get_ns ();
for (int i = 0; i < ITERS2; i++)
- memset (a, 0, size);
+ set (a, 0, size);
t = clock_get_ns () - t;
- printf ("%dB: %.2f ", size, (double)size * ITERS2 / t);
+ printf ("%dB: %5.2f ", size, (double)size * ITERS2 / t);
}
+ printf ("\n");
+}
+static void inline __attribute ((always_inline))
+memset_large (const char *name, void *(*set)(void *, int, size_t))
+{
+ printf ("%22s ", name);
- printf ("\nLarge memset (bytes/ns):\n");
- for (int f = 0; funtab[f].name != 0; f++)
- {
- printf ("%22s ", funtab[f].name);
-
- for (int size = 1024; size <= 65536; size *= 2)
- {
- uint64_t t = clock_get_ns ();
- for (int i = 0; i < ITERS3; i++)
- funtab[f].fun (a, 0, size);
- t = clock_get_ns () - t;
- printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t);
- }
- printf ("\n");
- }
-
- printf ("%22s ", "memset_call");
for (int size = 1024; size <= 65536; size *= 2)
{
uint64_t t = clock_get_ns ();
for (int i = 0; i < ITERS3; i++)
- memset (a, 0, size);
+ set (a, 0, size);
t = clock_get_ns () - t;
- printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t);
+ printf ("%dKB: %6.2f ", size / 1024, (double)size * ITERS3 / t);
}
- printf ("\n\n");
+ printf ("\n");
+}
+
+int main (void)
+{
+ init_memset_distribution ();
+
+ memset (a, 1, sizeof (a));
+ DOTEST ("Random memset (bytes/ns):\n", memset_random);
+ DOTEST ("Medium memset (bytes/ns):\n", memset_medium);
+ DOTEST ("Large memset (bytes/ns):\n", memset_large);
return 0;
}
diff --git a/string/bench/strlen.c b/string/bench/strlen.c
index f05d0d5b89e6..a8dd55cf5fc4 100644
--- a/string/bench/strlen.c
+++ b/string/bench/strlen.c
@@ -14,40 +14,23 @@
#include "benchlib.h"
#define ITERS 5000
-#define ITERS2 20000000
-#define ITERS3 2000000
-#define NUM_TESTS 16384
+#define ITERS2 40000000
+#define ITERS3 4000000
+#define NUM_TESTS 65536
#define MAX_ALIGN 32
-#define MAX_STRLEN 256
+#define MAX_STRLEN 128
static char a[(MAX_STRLEN + 1) * MAX_ALIGN] __attribute__((__aligned__(4096)));
-#define F(x, mte) {#x, x, mte},
-
-static const struct fun
-{
- const char *name;
- size_t (*fun) (const char *s);
- int test_mte;
-} funtab[] = {
- // clang-format off
- F(strlen, 0)
-#if __aarch64__
- F(__strlen_aarch64, 0)
- F(__strlen_aarch64_mte, 1)
-# if __ARM_FEATURE_SVE
- F(__strlen_aarch64_sve, 1)
-# endif
-#elif __arm__
-# if __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2
- F(__strlen_armv6t2, 0)
-# endif
-#endif
- {0, 0, 0}
- // clang-format on
-};
-#undef F
+#define DOTEST(STR,TESTFN) \
+ printf (STR); \
+ RUN (TESTFN, strlen); \
+ RUNA64 (TESTFN, __strlen_aarch64); \
+ RUNA64 (TESTFN, __strlen_aarch64_mte); \
+ RUNSVE (TESTFN, __strlen_aarch64_sve); \
+ RUNT32 (TESTFN, __strlen_armv6t2); \
+ printf ("\n");
static uint16_t strlen_tests[NUM_TESTS];
@@ -124,98 +107,119 @@ init_strlen_tests (void)
strlen_tests[n] =
index[(align + exp_len) & (MAX_ALIGN - 1)] + MAX_STRLEN - exp_len;
+ assert ((strlen_tests[n] & (align - 1)) == 0);
+ assert (strlen (a + strlen_tests[n]) == exp_len);
}
}
static volatile size_t maskv = 0;
-int main (void)
+static void inline __attribute ((always_inline))
+strlen_random (const char *name, size_t (*fn)(const char *))
{
- rand32 (0x12345678);
- init_strlen_distribution ();
- init_strlen_tests ();
+ size_t res = 0, mask = maskv;
+ uint64_t strlen_size = 0;
+ printf ("%22s ", name);
+
+ for (int c = 0; c < NUM_TESTS; c++)
+ strlen_size += fn (a + strlen_tests[c]) + 1;
+ strlen_size *= ITERS;
+
+ /* Measure throughput of strlen. */
+ uint64_t t = clock_get_ns ();
+ for (int i = 0; i < ITERS; i++)
+ for (int c = 0; c < NUM_TESTS; c++)
+ res += fn (a + strlen_tests[c]);
+ t = clock_get_ns () - t;
+ printf ("tp: %.3f ", (double)strlen_size / t);
+
+ /* Measure latency of strlen result with (res & mask). */
+ t = clock_get_ns ();
+ for (int i = 0; i < ITERS; i++)
+ for (int c = 0; c < NUM_TESTS; c++)
+ res += fn (a + strlen_tests[c] + (res & mask));
+ t = clock_get_ns () - t;
+ printf ("lat: %.3f\n", (double)strlen_size / t);
+ maskv = res & mask;
+}
- printf ("\nRandom strlen (bytes/ns):\n");
- for (int f = 0; funtab[f].name != 0; f++)
- {
- size_t res = 0, strlen_size = 0, mask = maskv;
- printf ("%22s ", funtab[f].name);
+static void inline __attribute ((always_inline))
+strlen_small_aligned (const char *name, size_t (*fn)(const char *))
+{
+ printf ("%22s ", name);
- for (int c = 0; c < NUM_TESTS; c++)
- strlen_size += funtab[f].fun (a + strlen_tests[c]);
- strlen_size *= ITERS;
+ size_t res = 0, mask = maskv;
+ for (int size = 1; size <= 64; size *= 2)
+ {
+ memset (a, 'x', size);
+ a[size - 1] = 0;
- /* Measure latency of strlen result with (res & mask). */
uint64_t t = clock_get_ns ();
- for (int i = 0; i < ITERS; i++)
- for (int c = 0; c < NUM_TESTS; c++)
- res = funtab[f].fun (a + strlen_tests[c] + (res & mask));
+ for (int i = 0; i < ITERS2; i++)
+ res += fn (a + (i & mask));
t = clock_get_ns () - t;
- printf ("%.2f\n", (double)strlen_size / t);
+ printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024,
+ size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
}
+ maskv &= res;
+ printf ("\n");
+}
- printf ("\nSmall aligned strlen (bytes/ns):\n");
- for (int f = 0; funtab[f].name != 0; f++)
- {
- printf ("%22s ", funtab[f].name);
-
- for (int size = 1; size <= 64; size *= 2)
- {
- memset (a, 'x', size);
- a[size - 1] = 0;
-
- uint64_t t = clock_get_ns ();
- for (int i = 0; i < ITERS2; i++)
- funtab[f].fun (a);
- t = clock_get_ns () - t;
- printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
- size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
- }
- printf ("\n");
- }
+static void inline __attribute ((always_inline))
+strlen_small_unaligned (const char *name, size_t (*fn)(const char *))
+{
+ printf ("%22s ", name);
- printf ("\nSmall unaligned strlen (bytes/ns):\n");
- for (int f = 0; funtab[f].name != 0; f++)
+ size_t res = 0, mask = maskv;
+ int align = 9;
+ for (int size = 1; size <= 64; size *= 2)
{
- printf ("%22s ", funtab[f].name);
-
- int align = 9;
- for (int size = 1; size <= 64; size *= 2)
- {
- memset (a + align, 'x', size);
- a[align + size - 1] = 0;
-
- uint64_t t = clock_get_ns ();
- for (int i = 0; i < ITERS2; i++)
- funtab[f].fun (a + align);
- t = clock_get_ns () - t;
- printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
- size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
- }
- printf ("\n");
+ memset (a + align, 'x', size);
+ a[align + size - 1] = 0;
+
+ uint64_t t = clock_get_ns ();
+ for (int i = 0; i < ITERS2; i++)
+ res += fn (a + align + (i & mask));
+ t = clock_get_ns () - t;
+ printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024,
+ size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
}
+ maskv &= res;
+ printf ("\n");
+}
- printf ("\nMedium strlen (bytes/ns):\n");
- for (int f = 0; funtab[f].name != 0; f++)
+static void inline __attribute ((always_inline))
+strlen_medium (const char *name, size_t (*fn)(const char *))
+{
+ printf ("%22s ", name);
+
+ size_t res = 0, mask = maskv;
+ for (int size = 128; size <= 4096; size *= 2)
{
- printf ("%22s ", funtab[f].name);
-
- for (int size = 128; size <= 4096; size *= 2)
- {
- memset (a, 'x', size);
- a[size - 1] = 0;
-
- uint64_t t = clock_get_ns ();
- for (int i = 0; i < ITERS3; i++)
- funtab[f].fun (a);
- t = clock_get_ns () - t;
- printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
- size < 1024 ? 'B' : 'K', (double)size * ITERS3 / t);
- }
- printf ("\n");
- }
+ memset (a, 'x', size);
+ a[size - 1] = 0;
+ uint64_t t = clock_get_ns ();
+ for (int i = 0; i < ITERS3; i++)
+ res += fn (a + (i & mask));
+ t = clock_get_ns () - t;
+ printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024,
+ size < 1024 ? 'B' : 'K', (double)size * ITERS3 / t);
+ }
+ maskv &= res;
printf ("\n");
+}
+
+int main (void)
+{
+ rand32 (0x12345678);
+ init_strlen_distribution ();
+ init_strlen_tests ();
+
+ DOTEST ("Random strlen (bytes/ns):\n", strlen_random);
+ DOTEST ("Small aligned strlen (bytes/ns):\n", strlen_small_aligned);
+ DOTEST ("Small unaligned strlen (bytes/ns):\n", strlen_small_unaligned);
+ DOTEST ("Medium strlen (bytes/ns):\n", strlen_medium);
return 0;
}