diff options
author | Andrew Turner <andrew@FreeBSD.org> | 2025-01-10 10:34:52 +0000 |
---|---|---|
committer | Andrew Turner <andrew@FreeBSD.org> | 2025-01-10 10:39:34 +0000 |
commit | 9d1de25930735261c16ed874a933b4c1f1d9041e (patch) | |
tree | b0cac1c933cc1ecb885c7e757b89ffbf13f1f012 /string/bench | |
parent | edc5c0de794f521eb620d2b6cbaee2434442a8f3 (diff) |
Diffstat (limited to 'string/bench')
-rw-r--r-- | string/bench/memcpy.c | 239 | ||||
-rw-r--r-- | string/bench/memset.c | 141 | ||||
-rw-r--r-- | string/bench/strlen.c | 206 |
3 files changed, 231 insertions, 355 deletions
diff --git a/string/bench/memcpy.c b/string/bench/memcpy.c index b628f9b60d96..583fa505db75 100644 --- a/string/bench/memcpy.c +++ b/string/bench/memcpy.c @@ -20,35 +20,18 @@ #define MIN_SIZE 32768 #define MAX_SIZE (1024 * 1024) -static uint8_t a[MAX_SIZE + 4096 + 64] __attribute__((__aligned__(64))); -static uint8_t b[MAX_SIZE + 4096 + 64] __attribute__((__aligned__(64))); - -#define F(x) {#x, x}, - -static const struct fun -{ - const char *name; - void *(*fun)(void *, const void *, size_t); -} funtab[] = -{ -#if __aarch64__ - F(__memcpy_aarch64) -# if __ARM_NEON - F(__memcpy_aarch64_simd) -# endif -# if __ARM_FEATURE_SVE - F(__memcpy_aarch64_sve) -# endif -# if WANT_MOPS - F(__memcpy_aarch64_mops) -# endif -#elif __arm__ - F(__memcpy_arm) -#endif - F(memcpy) -#undef F - {0, 0} -}; +static uint8_t a[MAX_SIZE + 4096 + 64] __attribute__((__aligned__(4096))); +static uint8_t b[MAX_SIZE + 4096 + 64] __attribute__((__aligned__(4096))); + +#define DOTEST(STR,TESTFN) \ + printf (STR); \ + RUN (TESTFN, memcpy); \ + RUNA64 (TESTFN, __memcpy_aarch64); \ + RUNA64 (TESTFN, __memcpy_aarch64_simd); \ + RUNSVE (TESTFN, __memcpy_aarch64_sve); \ + RUNMOPS (TESTFN, __memcpy_aarch64_mops); \ + RUNA32 (TESTFN, __memcpy_arm); \ + printf ("\n"); typedef struct { uint16_t size; uint16_t freq; } freq_data_t; typedef struct { uint8_t align; uint16_t freq; } align_data_t; @@ -160,183 +143,125 @@ init_copies (size_t max_size) return total; } -int main (void) +static void inline __attribute ((always_inline)) +memcpy_random (const char *name, void *(*fn)(void *, const void *, size_t)) { - init_copy_distribution (); - - memset (a, 1, sizeof (a)); - memset (b, 2, sizeof (b)); - - printf("Random memcpy (bytes/ns):\n"); - for (int f = 0; funtab[f].name != 0; f++) - { - size_t total = 0; - uint64_t tsum = 0; - printf ("%22s ", funtab[f].name); - rand32 (0x12345678); - - for (int size = MIN_SIZE; size <= MAX_SIZE; size *= 2) - { - size_t copy_size = init_copies (size) * ITERS; - - for (int c = 0; c < NUM_TESTS; c++) - funtab[f].fun (b + test_arr[c].dst, a + test_arr[c].src, - test_arr[c].len); - - uint64_t t = clock_get_ns (); - for (int i = 0; i < ITERS; i++) - for (int c = 0; c < NUM_TESTS; c++) - funtab[f].fun (b + test_arr[c].dst, a + test_arr[c].src, - test_arr[c].len); - t = clock_get_ns () - t; - total += copy_size; - tsum += t; - printf ("%dK: %.2f ", size / 1024, (double)copy_size / t); - } - printf( "avg %.2f\n", (double)total / tsum); - } - - size_t total = 0; - uint64_t tsum = 0; - printf ("%22s ", "memcpy_call"); - rand32 (0x12345678); - + printf ("%22s ", name); + uint64_t total = 0, tsum = 0; for (int size = MIN_SIZE; size <= MAX_SIZE; size *= 2) { - size_t copy_size = init_copies (size) * ITERS; + uint64_t copy_size = init_copies (size) * ITERS; for (int c = 0; c < NUM_TESTS; c++) - memcpy (b + test_arr[c].dst, a + test_arr[c].src, test_arr[c].len); + fn (b + test_arr[c].dst, a + test_arr[c].src, test_arr[c].len); uint64_t t = clock_get_ns (); for (int i = 0; i < ITERS; i++) for (int c = 0; c < NUM_TESTS; c++) - memcpy (b + test_arr[c].dst, a + test_arr[c].src, test_arr[c].len); + fn (b + test_arr[c].dst, a + test_arr[c].src, test_arr[c].len); t = clock_get_ns () - t; total += copy_size; tsum += t; - printf ("%dK: %.2f ", size / 1024, (double)copy_size / t); + printf ("%dK: %5.2f ", size / 1024, (double)copy_size / t); } - printf( "avg %.2f\n", (double)total / tsum); - + printf( "avg %5.2f\n", (double)total / tsum); +} - printf ("\nAligned medium memcpy (bytes/ns):\n"); - for (int f = 0; funtab[f].name != 0; f++) - { - printf ("%22s ", funtab[f].name); - - for (int size = 8; size <= 512; size *= 2) - { - uint64_t t = clock_get_ns (); - for (int i = 0; i < ITERS2; i++) - funtab[f].fun (b, a, size); - t = clock_get_ns () - t; - printf ("%dB: %.2f ", size, (double)size * ITERS2 / t); - } - printf ("\n"); - } +static void inline __attribute ((always_inline)) +memcpy_medium_aligned (const char *name, void *(*fn)(void *, const void *, size_t)) +{ + printf ("%22s ", name); - printf ("%22s ", "memcpy_call"); for (int size = 8; size <= 512; size *= 2) { uint64_t t = clock_get_ns (); for (int i = 0; i < ITERS2; i++) - memcpy (b, a, size); + fn (b, a, size); t = clock_get_ns () - t; - printf ("%dB: %.2f ", size, (double)size * ITERS2 / t); + printf ("%dB: %5.2f ", size, (double)size * ITERS2 / t); } printf ("\n"); +} +static void inline __attribute ((always_inline)) +memcpy_medium_unaligned (const char *name, void *(*fn)(void *, const void *, size_t)) +{ + printf ("%22s ", name); - printf ("\nUnaligned medium memcpy (bytes/ns):\n"); - for (int f = 0; funtab[f].name != 0; f++) - { - printf ("%22s ", funtab[f].name); - - for (int size = 8; size <= 512; size *= 2) - { - uint64_t t = clock_get_ns (); - for (int i = 0; i < ITERS2; i++) - funtab[f].fun (b + 3, a + 1, size); - t = clock_get_ns () - t; - printf ("%dB: %.2f ", size, (double)size * ITERS2 / t); - } - printf ("\n"); - } - - printf ("%22s ", "memcpy_call"); for (int size = 8; size <= 512; size *= 2) { uint64_t t = clock_get_ns (); for (int i = 0; i < ITERS2; i++) - memcpy (b + 3, a + 1, size); + fn (b + 3, a + 1, size); t = clock_get_ns () - t; - printf ("%dB: %.2f ", size, (double)size * ITERS2 / t); + printf ("%dB: %5.2f ", size, (double)size * ITERS2 / t); } printf ("\n"); +} +static void inline __attribute ((always_inline)) +memcpy_large (const char *name, void *(*fn)(void *, const void *, size_t)) +{ + printf ("%22s ", name); - printf ("\nLarge memcpy (bytes/ns):\n"); - for (int f = 0; funtab[f].name != 0; f++) - { - printf ("%22s ", funtab[f].name); - - for (int size = 1024; size <= 65536; size *= 2) - { - uint64_t t = clock_get_ns (); - for (int i = 0; i < ITERS3; i++) - funtab[f].fun (b, a, size); - t = clock_get_ns () - t; - printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t); - } - printf ("\n"); - } - - printf ("%22s ", "memcpy_call"); for (int size = 1024; size <= 65536; size *= 2) { uint64_t t = clock_get_ns (); for (int i = 0; i < ITERS3; i++) - memcpy (b, a, size); + fn (b, a, size); t = clock_get_ns () - t; - printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t); + printf ("%dK: %5.2f ", size / 1024, (double)size * ITERS3 / t); } printf ("\n"); +} +static void inline __attribute ((always_inline)) +memmove_forward_unaligned (const char *name, void *(*fn)(void *, const void *, size_t)) +{ + printf ("%22s ", name); - printf ("\nUnaligned forwards memmove (bytes/ns):\n"); - for (int f = 0; funtab[f].name != 0; f++) + for (int size = 1024; size <= 65536; size *= 2) { - printf ("%22s ", funtab[f].name); - - for (int size = 1024; size <= 65536; size *= 2) - { - uint64_t t = clock_get_ns (); - for (int i = 0; i < ITERS3; i++) - funtab[f].fun (a, a + 256 + (i & 31), size); - t = clock_get_ns () - t; - printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t); - } - printf ("\n"); + uint64_t t = clock_get_ns (); + for (int i = 0; i < ITERS3; i++) + fn (a, a + 256 + (i & 31), size); + t = clock_get_ns () - t; + printf ("%dK: %5.2f ", size / 1024, (double)size * ITERS3 / t); } + printf ("\n"); +} + +static void inline __attribute ((always_inline)) +memmove_backward_unaligned (const char *name, void *(*fn)(void *, const void *, size_t)) +{ + printf ("%22s ", name); - printf ("\nUnaligned backwards memmove (bytes/ns):\n"); - for (int f = 0; funtab[f].name != 0; f++) + for (int size = 1024; size <= 65536; size *= 2) { - printf ("%22s ", funtab[f].name); - - for (int size = 1024; size <= 65536; size *= 2) - { - uint64_t t = clock_get_ns (); - for (int i = 0; i < ITERS3; i++) - funtab[f].fun (a + 256 + (i & 31), a, size); - t = clock_get_ns () - t; - printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t); - } - printf ("\n"); + uint64_t t = clock_get_ns (); + for (int i = 0; i < ITERS3; i++) + fn (a + 256 + (i & 31), a, size); + t = clock_get_ns () - t; + printf ("%dK: %5.2f ", size / 1024, (double)size * ITERS3 / t); } + printf ("\n"); +} + +int main (void) +{ + init_copy_distribution (); + + memset (a, 1, sizeof (a)); + memset (b, 2, sizeof (b)); + + DOTEST ("Random memcpy (bytes/ns):\n", memcpy_random); + DOTEST ("Medium memcpy aligned (bytes/ns):\n", memcpy_medium_aligned); + DOTEST ("Medium memcpy unaligned (bytes/ns):\n", memcpy_medium_unaligned); + DOTEST ("Large memcpy (bytes/ns):\n", memcpy_large); + DOTEST ("Forwards memmove unaligned (bytes/ns):\n", memmove_forward_unaligned); + DOTEST ("Backwards memmove unaligned (bytes/ns):\n", memmove_backward_unaligned); return 0; } diff --git a/string/bench/memset.c b/string/bench/memset.c index 990e23ba9a36..07474e469146 100644 --- a/string/bench/memset.c +++ b/string/bench/memset.c @@ -20,25 +20,16 @@ #define MIN_SIZE 32768 #define MAX_SIZE (1024 * 1024) -static uint8_t a[MAX_SIZE + 4096] __attribute__((__aligned__(64))); +static uint8_t a[MAX_SIZE + 4096] __attribute__((__aligned__(4096))); -#define F(x) {#x, x}, - -static const struct fun -{ - const char *name; - void *(*fun)(void *, int, size_t); -} funtab[] = -{ -#if __aarch64__ - F(__memset_aarch64) -#elif __arm__ - F(__memset_arm) -#endif - F(memset) -#undef F - {0, 0} -}; +#define DOTEST(STR,TESTFN) \ + printf (STR); \ + RUN (TESTFN, memset); \ + RUNA64 (TESTFN, __memset_aarch64); \ + RUNSVE (TESTFN, __memset_aarch64_sve); \ + RUNMOPS (TESTFN, __memset_mops); \ + RUNA32 (TESTFN, __memset_arm); \ + printf ("\n"); typedef struct { uint32_t offset : 20, len : 12; } memset_test_t; static memset_test_t test_arr[NUM_TESTS]; @@ -127,117 +118,73 @@ init_memset (size_t max_size) return total; } - -int main (void) +static void inline __attribute ((always_inline)) +memset_random (const char *name, void *(*set)(void *, int, size_t)) { - init_memset_distribution (); - - memset (a, 1, sizeof (a)); - - printf("Random memset (bytes/ns):\n"); - for (int f = 0; funtab[f].name != 0; f++) - { - size_t total_size = 0; - uint64_t tsum = 0; - printf ("%22s ", funtab[f].name); - rand32 (0x12345678); - - for (int size = MIN_SIZE; size <= MAX_SIZE; size *= 2) - { - size_t memset_size = init_memset (size) * ITERS; - - for (int c = 0; c < NUM_TESTS; c++) - funtab[f].fun (a + test_arr[c].offset, 0, test_arr[c].len); - - uint64_t t = clock_get_ns (); - for (int i = 0; i < ITERS; i++) - for (int c = 0; c < NUM_TESTS; c++) - funtab[f].fun (a + test_arr[c].offset, 0, test_arr[c].len); - t = clock_get_ns () - t; - total_size += memset_size; - tsum += t; - printf ("%dK: %.2f ", size / 1024, (double)memset_size / t); - } - printf( "avg %.2f\n", (double)total_size / tsum); - } - - size_t total_size = 0; + uint64_t total_size = 0; uint64_t tsum = 0; - printf ("%22s ", "memset_call"); + printf ("%22s ", name); rand32 (0x12345678); for (int size = MIN_SIZE; size <= MAX_SIZE; size *= 2) { - size_t memset_size = init_memset (size) * ITERS; + uint64_t memset_size = init_memset (size) * ITERS; for (int c = 0; c < NUM_TESTS; c++) - memset (a + test_arr[c].offset, 0, test_arr[c].len); + set (a + test_arr[c].offset, 0, test_arr[c].len); uint64_t t = clock_get_ns (); for (int i = 0; i < ITERS; i++) for (int c = 0; c < NUM_TESTS; c++) - memset (a + test_arr[c].offset, 0, test_arr[c].len); + set (a + test_arr[c].offset, 0, test_arr[c].len); t = clock_get_ns () - t; total_size += memset_size; tsum += t; - printf ("%dK: %.2f ", size / 1024, (double)memset_size / t); + printf ("%dK: %5.2f ", size / 1024, (double)memset_size / t); } - printf( "avg %.2f\n", (double)total_size / tsum); - + printf( "avg %5.2f\n", (double)total_size / tsum); +} - printf ("\nMedium memset (bytes/ns):\n"); - for (int f = 0; funtab[f].name != 0; f++) - { - printf ("%22s ", funtab[f].name); - - for (int size = 8; size <= 512; size *= 2) - { - uint64_t t = clock_get_ns (); - for (int i = 0; i < ITERS2; i++) - funtab[f].fun (a, 0, size); - t = clock_get_ns () - t; - printf ("%dB: %.2f ", size, (double)size * ITERS2 / t); - } - printf ("\n"); - } +static void inline __attribute ((always_inline)) +memset_medium (const char *name, void *(*set)(void *, int, size_t)) +{ + printf ("%22s ", name); - printf ("%22s ", "memset_call"); for (int size = 8; size <= 512; size *= 2) { uint64_t t = clock_get_ns (); for (int i = 0; i < ITERS2; i++) - memset (a, 0, size); + set (a, 0, size); t = clock_get_ns () - t; - printf ("%dB: %.2f ", size, (double)size * ITERS2 / t); + printf ("%dB: %5.2f ", size, (double)size * ITERS2 / t); } + printf ("\n"); +} +static void inline __attribute ((always_inline)) +memset_large (const char *name, void *(*set)(void *, int, size_t)) +{ + printf ("%22s ", name); - printf ("\nLarge memset (bytes/ns):\n"); - for (int f = 0; funtab[f].name != 0; f++) - { - printf ("%22s ", funtab[f].name); - - for (int size = 1024; size <= 65536; size *= 2) - { - uint64_t t = clock_get_ns (); - for (int i = 0; i < ITERS3; i++) - funtab[f].fun (a, 0, size); - t = clock_get_ns () - t; - printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t); - } - printf ("\n"); - } - - printf ("%22s ", "memset_call"); for (int size = 1024; size <= 65536; size *= 2) { uint64_t t = clock_get_ns (); for (int i = 0; i < ITERS3; i++) - memset (a, 0, size); + set (a, 0, size); t = clock_get_ns () - t; - printf ("%dK: %.2f ", size / 1024, (double)size * ITERS3 / t); + printf ("%dKB: %6.2f ", size / 1024, (double)size * ITERS3 / t); } - printf ("\n\n"); + printf ("\n"); +} + +int main (void) +{ + init_memset_distribution (); + + memset (a, 1, sizeof (a)); + DOTEST ("Random memset (bytes/ns):\n", memset_random); + DOTEST ("Medium memset (bytes/ns):\n", memset_medium); + DOTEST ("Large memset (bytes/ns):\n", memset_large); return 0; } diff --git a/string/bench/strlen.c b/string/bench/strlen.c index f05d0d5b89e6..a8dd55cf5fc4 100644 --- a/string/bench/strlen.c +++ b/string/bench/strlen.c @@ -14,40 +14,23 @@ #include "benchlib.h" #define ITERS 5000 -#define ITERS2 20000000 -#define ITERS3 2000000 -#define NUM_TESTS 16384 +#define ITERS2 40000000 +#define ITERS3 4000000 +#define NUM_TESTS 65536 #define MAX_ALIGN 32 -#define MAX_STRLEN 256 +#define MAX_STRLEN 128 static char a[(MAX_STRLEN + 1) * MAX_ALIGN] __attribute__((__aligned__(4096))); -#define F(x, mte) {#x, x, mte}, - -static const struct fun -{ - const char *name; - size_t (*fun) (const char *s); - int test_mte; -} funtab[] = { - // clang-format off - F(strlen, 0) -#if __aarch64__ - F(__strlen_aarch64, 0) - F(__strlen_aarch64_mte, 1) -# if __ARM_FEATURE_SVE - F(__strlen_aarch64_sve, 1) -# endif -#elif __arm__ -# if __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2 - F(__strlen_armv6t2, 0) -# endif -#endif - {0, 0, 0} - // clang-format on -}; -#undef F +#define DOTEST(STR,TESTFN) \ + printf (STR); \ + RUN (TESTFN, strlen); \ + RUNA64 (TESTFN, __strlen_aarch64); \ + RUNA64 (TESTFN, __strlen_aarch64_mte); \ + RUNSVE (TESTFN, __strlen_aarch64_sve); \ + RUNT32 (TESTFN, __strlen_armv6t2); \ + printf ("\n"); static uint16_t strlen_tests[NUM_TESTS]; @@ -124,98 +107,119 @@ init_strlen_tests (void) strlen_tests[n] = index[(align + exp_len) & (MAX_ALIGN - 1)] + MAX_STRLEN - exp_len; + assert ((strlen_tests[n] & (align - 1)) == 0); + assert (strlen (a + strlen_tests[n]) == exp_len); } } static volatile size_t maskv = 0; -int main (void) +static void inline __attribute ((always_inline)) +strlen_random (const char *name, size_t (*fn)(const char *)) { - rand32 (0x12345678); - init_strlen_distribution (); - init_strlen_tests (); + size_t res = 0, mask = maskv; + uint64_t strlen_size = 0; + printf ("%22s ", name); + + for (int c = 0; c < NUM_TESTS; c++) + strlen_size += fn (a + strlen_tests[c]) + 1; + strlen_size *= ITERS; + + /* Measure throughput of strlen. */ + uint64_t t = clock_get_ns (); + for (int i = 0; i < ITERS; i++) + for (int c = 0; c < NUM_TESTS; c++) + res += fn (a + strlen_tests[c]); + t = clock_get_ns () - t; + printf ("tp: %.3f ", (double)strlen_size / t); + + /* Measure latency of strlen result with (res & mask). */ + t = clock_get_ns (); + for (int i = 0; i < ITERS; i++) + for (int c = 0; c < NUM_TESTS; c++) + res += fn (a + strlen_tests[c] + (res & mask)); + t = clock_get_ns () - t; + printf ("lat: %.3f\n", (double)strlen_size / t); + maskv = res & mask; +} - printf ("\nRandom strlen (bytes/ns):\n"); - for (int f = 0; funtab[f].name != 0; f++) - { - size_t res = 0, strlen_size = 0, mask = maskv; - printf ("%22s ", funtab[f].name); +static void inline __attribute ((always_inline)) +strlen_small_aligned (const char *name, size_t (*fn)(const char *)) +{ + printf ("%22s ", name); - for (int c = 0; c < NUM_TESTS; c++) - strlen_size += funtab[f].fun (a + strlen_tests[c]); - strlen_size *= ITERS; + size_t res = 0, mask = maskv; + for (int size = 1; size <= 64; size *= 2) + { + memset (a, 'x', size); + a[size - 1] = 0; - /* Measure latency of strlen result with (res & mask). */ uint64_t t = clock_get_ns (); - for (int i = 0; i < ITERS; i++) - for (int c = 0; c < NUM_TESTS; c++) - res = funtab[f].fun (a + strlen_tests[c] + (res & mask)); + for (int i = 0; i < ITERS2; i++) + res += fn (a + (i & mask)); t = clock_get_ns () - t; - printf ("%.2f\n", (double)strlen_size / t); + printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024, + size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t); } + maskv &= res; + printf ("\n"); +} - printf ("\nSmall aligned strlen (bytes/ns):\n"); - for (int f = 0; funtab[f].name != 0; f++) - { - printf ("%22s ", funtab[f].name); - - for (int size = 1; size <= 64; size *= 2) - { - memset (a, 'x', size); - a[size - 1] = 0; - - uint64_t t = clock_get_ns (); - for (int i = 0; i < ITERS2; i++) - funtab[f].fun (a); - t = clock_get_ns () - t; - printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024, - size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t); - } - printf ("\n"); - } +static void inline __attribute ((always_inline)) +strlen_small_unaligned (const char *name, size_t (*fn)(const char *)) +{ + printf ("%22s ", name); - printf ("\nSmall unaligned strlen (bytes/ns):\n"); - for (int f = 0; funtab[f].name != 0; f++) + size_t res = 0, mask = maskv; + int align = 9; + for (int size = 1; size <= 64; size *= 2) { - printf ("%22s ", funtab[f].name); - - int align = 9; - for (int size = 1; size <= 64; size *= 2) - { - memset (a + align, 'x', size); - a[align + size - 1] = 0; - - uint64_t t = clock_get_ns (); - for (int i = 0; i < ITERS2; i++) - funtab[f].fun (a + align); - t = clock_get_ns () - t; - printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024, - size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t); - } - printf ("\n"); + memset (a + align, 'x', size); + a[align + size - 1] = 0; + + uint64_t t = clock_get_ns (); + for (int i = 0; i < ITERS2; i++) + res += fn (a + align + (i & mask)); + t = clock_get_ns () - t; + printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024, + size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t); } + maskv &= res; + printf ("\n"); +} - printf ("\nMedium strlen (bytes/ns):\n"); - for (int f = 0; funtab[f].name != 0; f++) +static void inline __attribute ((always_inline)) +strlen_medium (const char *name, size_t (*fn)(const char *)) +{ + printf ("%22s ", name); + + size_t res = 0, mask = maskv; + for (int size = 128; size <= 4096; size *= 2) { - printf ("%22s ", funtab[f].name); - - for (int size = 128; size <= 4096; size *= 2) - { - memset (a, 'x', size); - a[size - 1] = 0; - - uint64_t t = clock_get_ns (); - for (int i = 0; i < ITERS3; i++) - funtab[f].fun (a); - t = clock_get_ns () - t; - printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024, - size < 1024 ? 'B' : 'K', (double)size * ITERS3 / t); - } - printf ("\n"); - } + memset (a, 'x', size); + a[size - 1] = 0; + uint64_t t = clock_get_ns (); + for (int i = 0; i < ITERS3; i++) + res += fn (a + (i & mask)); + t = clock_get_ns () - t; + printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024, + size < 1024 ? 'B' : 'K', (double)size * ITERS3 / t); + } + maskv &= res; printf ("\n"); +} + +int main (void) +{ + rand32 (0x12345678); + init_strlen_distribution (); + init_strlen_tests (); + + DOTEST ("Random strlen (bytes/ns):\n", strlen_random); + DOTEST ("Small aligned strlen (bytes/ns):\n", strlen_small_aligned); + DOTEST ("Small unaligned strlen (bytes/ns):\n", strlen_small_unaligned); + DOTEST ("Medium strlen (bytes/ns):\n", strlen_medium); return 0; } |