summaryrefslogtreecommitdiff
path: root/tests/bench.c
diff options
context:
space:
mode:
Diffstat (limited to 'tests/bench.c')
-rw-r--r--tests/bench.c466
1 files changed, 466 insertions, 0 deletions
diff --git a/tests/bench.c b/tests/bench.c
new file mode 100644
index 0000000000000..705fd385bcd48
--- /dev/null
+++ b/tests/bench.c
@@ -0,0 +1,466 @@
+/*
+ bench.c - simple regex benchmark program
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#endif /* HAVE_GETOPT_H */
+#include <time.h>
+#include <unistd.h>
+#include <math.h>
+#include <sys/types.h>
+
+#if 0
+#include <hackerlab/rx-posix/regex.h>
+#else
+#include <regex.h>
+#endif
+
+/* T distribution for alpha = 0.025 (for 95% confidence). XXX - is
+ this correct? */
+double t_distribution[] = {
+ 12.71,
+ 4.303,
+ 3.182,
+ 2.776,
+ 2.571,
+ 2.447,
+ 2.365,
+ 2.306,
+ 2.262,
+ 2.228,
+ 2.201,
+ 2.179,
+ 2.160,
+ 2.145,
+ 2.131,
+ 2.120,
+ 2.110,
+ 2.101,
+ 2.093,
+ 2.086,
+ 2.080,
+ 2.074,
+ 2.069,
+ 2.064,
+ 2.060,
+ 2.056,
+ 2.052,
+ 2.048,
+ 2.045,
+ 2.042
+};
+
+void
+stats(double *sample_data, int samples, int len)
+{
+ double mean, tmp1, tmp2, variance, stddev, error, percent;
+ int i;
+
+ mean = 0;
+ for (i = 0; i < samples; i++)
+ mean += sample_data[i];
+ mean = mean/i;
+ printf("# mean: %.5f\n", mean);
+
+ tmp1 = 0;
+ for (i = 0; i < samples; i++) {
+ tmp2 = sample_data[i] - mean;
+ tmp1 += tmp2*tmp2;
+ }
+ if (samples > 1)
+ variance = tmp1 / (samples-1);
+ else
+ variance = 0;
+ stddev = sqrt(variance);
+ printf("# variance: %.16f\n", variance);
+ printf("# standard deviation: %.16f\n", stddev);
+
+ error = t_distribution[samples-1] * stddev / sqrt(samples);
+ if (mean != 0)
+ percent = 100*error/mean;
+ else
+ percent = 0;
+ printf("# error: ±%.16f (±%.4f%%)\n", error, percent);
+
+ printf("%d\t%.5f\t%.5f\n", len, mean, error);
+
+ fflush(stdout);
+}
+
+void
+run_tests(int len, int samples, double *sample_data, int repeats,
+ regex_t *reobj, char *str, char *tmpbuf)
+{
+ int i, j, errcode;
+ clock_t c1, c2;
+ regmatch_t pmatch[10];
+
+
+ printf("# len = %d\n", len);
+ fflush(stdout);
+ for (i = 0; i < samples; i++) {
+ c1 = clock();
+ for (j = 0; j < repeats; j++)
+ if ((errcode = tre_regexec(reobj, str, 10, pmatch, 0))) {
+ tre_regerror(errcode, reobj, tmpbuf, 255);
+ printf("error: %s\n", tmpbuf);
+ }
+ c2 = clock();
+
+ sample_data[i] = (double)(c2-c1)/(CLOCKS_PER_SEC*repeats);
+
+ printf("# sample: %.5f sec, clocks: %ld\n",
+ (double)(c2-c1)/(CLOCKS_PER_SEC*repeats),
+ (long)(c2-c1));
+ fflush(stdout);
+ }
+ fflush(stdout);
+
+ for (i = 0; i < 10; i += 2) {
+ printf("# pmatch[%d].rm_so = %d\n", i/2, (int)pmatch[i/2].rm_so);
+ printf("# pmatch[%d].rm_eo = %d\n", i/2, (int)pmatch[i/2].rm_eo);
+ }
+}
+
+
+int
+main(int argc, char **argv)
+{
+ regex_t reobj;
+ char *str;
+ char tmpbuf[256];
+ int i, j;
+ int max_len = 1024*1024*10;
+ int steps = 20;
+ int repeats = 10;
+ int samples = 20;
+ int len;
+ clock_t c1, c2;
+ int opt;
+ double sample_data[30];
+
+ int test_id = -1;
+
+ while ((opt = getopt(argc, argv, "r:l:s:j:t:")) != -1) {
+ switch (opt) {
+ case 't':
+ test_id = atoi(optarg);
+ break;
+ case 'l':
+ max_len = atoi(optarg);
+ break;
+ case 'j':
+ steps = atoi(optarg);
+ break;
+ case 's':
+ samples = atoi(optarg);
+ break;
+ case 'r':
+ repeats = atoi(optarg);
+ break;
+ default:
+ printf("Pälli.\n");
+ return 1;
+ }
+ }
+
+ /* XXX - Check that the correct results are returned. For example, GNU
+ regex-0.12 returns incorrect results for very long strings in
+ test number 1. */
+
+ switch (test_id) {
+ case 0:
+ printf("# pattern: \"a*\"\n");
+ printf("# string: \"aaaaaa...\"\n");
+ len = 0;
+ tre_regcomp(&reobj, "a*", REG_EXTENDED);
+ while (len <= max_len) {
+
+ str = malloc(sizeof(char) * (len+1));
+ for (i = 0; i < len; i++)
+ str[i] = 'a';
+ str[len-1] = '\0';
+
+ run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
+ stats(sample_data, samples, len);
+ len = len + (max_len/steps);
+ free(str);
+ }
+ break;
+
+
+ case 1:
+ printf("# pattern: \"(a)*\"\n");
+ printf("# string: \"aaaaaa...\"\n");
+ len = 0;
+ tre_regcomp(&reobj, "(a)*", REG_EXTENDED);
+ while (len <= max_len) {
+
+ str = malloc(sizeof(char) * (len+1));
+ for (i = 0; i < len; i++)
+ str[i] = 'a';
+ str[len-1] = '\0';
+
+ run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
+ stats(sample_data, samples, len);
+ len = len + (max_len/steps);
+ free(str);
+ }
+ break;
+
+
+ case 2:
+ printf("# pattern: \"(a*)\"\n");
+ printf("# string: \"aaaaaa...\"\n"); len = 0;
+ tre_regcomp(&reobj, "(a*)", REG_EXTENDED);
+ while (len <= max_len) {
+
+ str = malloc(sizeof(char) * (len+1));
+ for (i = 0; i < len; i++)
+ str[i] = 'a';
+ str[len-1] = '\0';
+
+ run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
+ stats(sample_data, samples, len);
+ len = len + (max_len/steps);
+ free(str);
+ }
+ break;
+
+ case 3:
+ printf("# pattern: \"(a*)*|b*\"\n");
+ printf("# string: \"aaaaaa...b\"\n");
+ len = 0;
+ tre_regcomp(&reobj, "(a*)*|b*", REG_EXTENDED);
+ while (len <= max_len) {
+ str = malloc(sizeof(char) * (len+1));
+ for (i = 0; i < len-1; i++)
+ str[i] = 'a';
+ if (len > 0)
+ str[len-1] = 'b';
+ str[len] = '\0';
+
+ run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
+ stats(sample_data, samples, len);
+ len = len + (max_len/steps);
+ free(str);
+ }
+ break;
+
+ case 4:
+ printf("# pattern: \"(a|a|a|...|a)\"\n");
+ printf("# string: \"aaaaaa...\"\n");
+ len = 1024*1024;
+ str = malloc(sizeof(char) * (len+1));
+ for (i = 0; i < len-1; i++)
+ str[i] = 'a';
+ str[len] = '\0';
+ len = 0;
+ while (len <= max_len) {
+ tmpbuf[0] = '(';
+ for (i = 1; i < (len*2); i++) {
+ tmpbuf[i] = 'a';
+ if (i < len*2-2) {
+ i++;
+ tmpbuf[i] = '|';
+ }
+ }
+ printf("# i = %d\n", i);
+ tmpbuf[i] = ')';
+ tmpbuf[i+1] = '*';
+ tmpbuf[i+2] = '\0';
+ printf("# pat = %s\n", tmpbuf);
+ tre_regcomp(&reobj, tmpbuf, REG_EXTENDED);
+
+ run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
+ stats(sample_data, samples, len);
+ len = len + (max_len/steps);
+ tre_regfree(&reobj);
+ }
+ free(str);
+ break;
+
+ case 5:
+ printf("# pattern: \"foobar\"\n");
+ printf("# string: \"aaaaaa...foobar\"\n");
+ len = 0;
+ tre_regcomp(&reobj, "foobar", REG_EXTENDED);
+ while (len <= max_len) {
+ str = malloc(sizeof(char) * (len+7));
+ for (i = 0; i < len; i++) {
+ if (i*i % 3)
+ str[i] = 'a';
+ else
+ str[i] = 'a';
+ }
+ str[len+0] = 'f';
+ str[len+1] = 'o';
+ str[len+2] = 'o';
+ str[len+3] = 'b';
+ str[len+4] = 'a';
+ str[len+5] = 'r';
+ str[len+6] = '\0';
+
+ run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
+ stats(sample_data, samples, len);
+ len = len + (max_len/steps);
+ free(str);
+ }
+ break;
+
+
+ case 6:
+ printf("# pattern: \"a*foobar\"\n");
+ printf("# string: \"aaaaaa...foobar\"\n");
+ len = 0;
+ tre_regcomp(&reobj, "a*foobar", REG_EXTENDED);
+ while (len <= max_len) {
+ str = malloc(sizeof(char) * (len+7));
+ for (i = 0; i < len; i++) {
+ str[i] = 'a';
+ }
+ str[len+0] = 'f';
+ str[len+1] = 'o';
+ str[len+2] = 'o';
+ str[len+3] = 'b';
+ str[len+4] = 'a';
+ str[len+5] = 'r';
+ str[len+6] = '\0';
+
+ run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
+ stats(sample_data, samples, len);
+ len = len + (max_len/steps);
+ free(str);
+ }
+ break;
+
+
+ case 7:
+ printf("# pattern: \"(a)*foobar\"\n");
+ printf("# string: \"aaaaabbaaab...foobar\"\n");
+ len = 0;
+ tre_regcomp(&reobj, "(a)*foobar", REG_EXTENDED);
+ while (len <= max_len) {
+ str = malloc(sizeof(char) * (len+7));
+ for (i = 0; i < len; i++) {
+ /* Without this GNU regex won't find a match! */
+ if (i*(i-1) % 3)
+ str[i] = 'b';
+ else
+ str[i] = 'a';
+ }
+ str[len+0] = 'f';
+ str[len+1] = 'o';
+ str[len+2] = 'o';
+ str[len+3] = 'b';
+ str[len+4] = 'a';
+ str[len+5] = 'r';
+ str[len+6] = '\0';
+
+ run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
+ stats(sample_data, samples, len);
+ len = len + (max_len/steps);
+ free(str);
+ }
+ break;
+
+
+ case 8:
+ printf("# pattern: \"(a|b)*foobar\"\n");
+ printf("# string: \"aaaaabbaaab...foobar\"\n");
+ len = 0;
+ tre_regcomp(&reobj, "(a|b)*foobar", REG_EXTENDED);
+ while (len <= max_len) {
+ str = malloc(sizeof(char) * (len+7));
+ for (i = 0; i < len; i++) {
+ if (i*(i-1) % 3)
+ str[i] = 'b';
+ else
+ str[i] = 'a';
+ /* Without this GNU regex won't find a match! */
+ if (i % (1024*1024*10 - 100))
+ str[i] = 'f';
+ }
+ str[len+0] = 'f';
+ str[len+1] = 'o';
+ str[len+2] = 'o';
+ str[len+3] = 'b';
+ str[len+4] = 'a';
+ str[len+5] = 'r';
+ str[len+6] = '\0';
+
+ run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf);
+ stats(sample_data, samples, len);
+ len = len + (max_len/steps);
+ free(str);
+ }
+ break;
+
+
+ case 9:
+ printf("# pattern: hand-coded a*\n");
+ printf("# string: \"aaaaaa...\"\n");
+ len = 0;
+ while (len <= max_len) {
+ printf("# len = %d\n", len);
+
+ str = malloc(sizeof(char)*(len+1));
+ for (i = 0; i < len; i++)
+ str[i] = 'a';
+ str[len-1] = '\0';
+
+ for (i = 0; i < samples; i++) {
+ c1 = clock();
+ for (j = 0; j < repeats; j++) {
+ char *s;
+ int l;
+
+ s = str;
+ l = 0;
+
+
+ while (s != '\0') {
+ if (*s == 'a') {
+ s++;
+ l++;
+ } else
+ break;
+ }
+ }
+ c2 = clock();
+ sample_data[i] = (double)(c2-c1)/(CLOCKS_PER_SEC*repeats);
+
+ printf("# sample: %.5f sec, clocks: %ld\n",
+ (double)(c2-c1)/(CLOCKS_PER_SEC*repeats),
+ (long)(c2-c1));
+ fflush(stdout);
+ }
+ fflush(stdout);
+
+ stats(sample_data, samples, len);
+ len = len + (max_len/steps);
+ free(str);
+ }
+ break;
+
+
+ default:
+ printf("Pelle.\n");
+ return 1;
+ }
+
+ tre_regfree(&reobj);
+
+ return 0;
+}