1 files changed, 488 insertions, 0 deletions
diff --git a/test/bio_base64_test.c b/test/bio_base64_test.c
new file mode 100644
index 000000000000..8d6ca7b58e1e
--- /dev/null
+++ b/test/bio_base64_test.c
@@ -0,0 +1,488 @@
+/*
+ * Copyright 2024-2025 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+#include <stdio.h>
+#include <string.h>
+#include <openssl/bio.h>
+#include <openssl/evp.h>
+#include <openssl/rand.h>
+
+#include "testutil.h"
+
+/* 2047 bytes of "#ooooooooo..." + NUL terminator */
+static char gunk[2048];
+
+typedef struct {
+    char *prefix;
+    char *encoded;
+    unsigned bytes;
+    int trunc;
+    char *suffix;
+    int retry;
+    int no_nl;
+} test_case;
+
+#define BUFMAX 0xa0000          /* Encode at most 640kB. */
+#define sEOF "-EOF"             /* '-' as in PEM and MIME boundaries */
+#define junk "#foo"             /* Skipped initial content */
+
+#define EOF_RETURN (-1729)      /* Distinct from -1, etc., internal results */
+#define NLEN 6
+#define NVAR 5
+/*
+ * Junk suffixed variants don't make sense with padding or truncated groups
+ * because we will typically stop with an error before seeing the suffix, but
+ * with retriable BIOs may never look at the suffix after detecting padding.
+ */
+#define NPAD 6
+#define NVARPAD (NVAR * NPAD - NPAD + 1)
+
+static char *prefixes[NVAR] = { "", junk, gunk, "", "" };
+static char *suffixes[NVAR] = { "", "", "", sEOF, junk };
+static unsigned lengths[6] = { 0, 3, 48, 192, 768, 1536 };
+static unsigned linelengths[] = {
+    4, 8, 16, 28, 40, 64, 80, 128, 256, 512, 1023, 0
+};
+static unsigned wscnts[] = { 0, 1, 2, 4, 8, 16, 0xFFFF };
+
+/* Generate `len` random octets */
+static unsigned char *genbytes(unsigned len)
+{
+    unsigned char *buf = NULL;
+
+    if (len > 0 && len <= BUFMAX && (buf = OPENSSL_malloc(len)) != NULL)
+        RAND_bytes(buf, len);
+
+    return buf;
+}
+
+/* Append one base64 codepoint, adding newlines after every `llen` bytes */
+static int memout(BIO *mem, char c, int llen, int *pos)
+{
+    if (BIO_write(mem, &c, 1) != 1)
+        return 0;
+    if (++*pos == llen) {
+        *pos = 0;
+        c = '\n';
+        if (BIO_write(mem, &c, 1) != 1)
+            return 0;
+    }
+    return 1;
+}
+
+/* Encode and append one 6-bit slice, randomly prepending some whitespace */
+static int memoutws(BIO *mem, char c, unsigned wscnt, unsigned llen, int *pos)
+{
+    if (wscnt > 0
+        && (test_random() % llen) < wscnt
+        && memout(mem, ' ', llen, pos) == 0)
+        return 0;
+    return memout(mem, c, llen, pos);
+}
+
+/*
+ * Encode an octet string in base64, approximately `llen` bytes per line,
+ * with up to roughly `wscnt` additional space characters inserted at random
+ * before some of the base64 code points.
+ */
+static int encode(unsigned const char *buf, unsigned buflen, char *encoded,
+                  int trunc, unsigned llen, unsigned wscnt, BIO *mem)
+{
+    static const unsigned char b64[65] =
+        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+    int pos = 0;
+    char nl = '\n';
+
+    /* Use a verbatim encoding when provided */
+    if (encoded != NULL) {
+        int elen = strlen(encoded);
+
+        return BIO_write(mem, encoded, elen) == elen;
+    }
+
+    /* Encode full 3-octet groups */
+    while (buflen > 2) {
+        unsigned long v = buf[0] << 16 | buf[1] << 8 | buf[2];
+
+        if (memoutws(mem, b64[v >> 18], wscnt, llen, &pos) == 0
+            || memoutws(mem, b64[(v >> 12) & 0x3f], wscnt, llen, &pos) == 0
+            || memoutws(mem, b64[(v >> 6) & 0x3f], wscnt, llen, &pos) == 0
+            || memoutws(mem, b64[v & 0x3f], wscnt, llen, &pos) == 0)
+            return 0;
+        buf += 3;
+        buflen -= 3;
+    }
+
+    /* Encode and pad final 1 or 2 octet group */
+    if (buflen == 2) {
+        unsigned long v = buf[0] << 8 | buf[1];
+
+        if (memoutws(mem, b64[(v >> 10) & 0x3f], wscnt, llen, &pos) == 0
+            || memoutws(mem, b64[(v >> 4) & 0x3f], wscnt, llen, &pos) == 0
+            || memoutws(mem, b64[(v & 0xf) << 2], wscnt, llen, &pos) == 0
+            || memoutws(mem, '=', wscnt, llen, &pos) == 0)
+            return 0;
+    } else if (buflen == 1) {
+        unsigned long v = buf[0];
+
+        if (memoutws(mem, b64[v >> 2], wscnt, llen, &pos) == 0
+            || memoutws(mem, b64[(v & 0x3) << 4], wscnt, llen, &pos) == 0
+            || memoutws(mem, '=', wscnt, llen, &pos) == 0
+            || memoutws(mem, '=', wscnt, llen, &pos) == 0)
+            return 0;
+    }
+
+    while (trunc-- > 0)
+        if (memoutws(mem, 'A', wscnt, llen, &pos) == 0)
+            return 0;
+
+    /* Terminate last line */
+    if (pos > 0 && BIO_write(mem, &nl, 1) != 1)
+        return 0;
+
+    return 1;
+}
+
+static int genb64(char *prefix, char *suffix, unsigned const char *buf,
+                  unsigned buflen, int trunc, char *encoded, unsigned llen,
+                  unsigned wscnt, char **out)
+{
+    int preflen = strlen(prefix);
+    int sufflen = strlen(suffix);
+    int outlen;
+    char newline = '\n';
+    BUF_MEM *bptr;
+    BIO *mem = BIO_new(BIO_s_mem());
+
+    if (mem == NULL)
+        return -1;
+
+    if ((*prefix && (BIO_write(mem, prefix, preflen) != preflen
+                     || BIO_write(mem, &newline, 1) != 1))
+        || encode(buf, buflen, encoded, trunc, llen, wscnt, mem) <= 0
+        || (*suffix && (BIO_write(mem, suffix, sufflen) != sufflen
+                        || BIO_write(mem, &newline, 1) != 1))) {
+        BIO_free(mem);
+        return -1;
+    }
+
+    /* Orphan the memory BIO's data buffer */
+    BIO_get_mem_ptr(mem, &bptr);
+    *out = bptr->data;
+    outlen = bptr->length;
+    bptr->data = NULL;
+    (void) BIO_set_close(mem, BIO_NOCLOSE);
+    BIO_free(mem);
+    BUF_MEM_free(bptr);
+
+    return outlen;
+}
+
+static int test_bio_base64_run(test_case *t, int llen, int wscnt)
+{
+    unsigned char *raw;
+    unsigned char *out;
+    unsigned out_len;
+    char *encoded = NULL;
+    int elen;
+    BIO *bio, *b64;
+    int n, n1, n2;
+    int ret;
+
+    /*
+     * Pre-encoded data always encodes NUL octets.  If all we care about is the
+     * length, and not the payload, use random bytes.
+     */
+    if (t->encoded != NULL)
+        raw = OPENSSL_zalloc(t->bytes);
+    else
+        raw = genbytes(t->bytes);
+
+    if (raw == NULL && t->bytes > 0) {
+        TEST_error("out of memory");
+        return -1;
+    }
+
+    out_len = t->bytes + 1024;
+    out = OPENSSL_malloc(out_len);
+    if (out == NULL) {
+        OPENSSL_free(raw);
+        TEST_error("out of memory");
+        return -1;
+    }
+
+    elen = genb64(t->prefix, t->suffix, raw, t->bytes, t->trunc, t->encoded,
+                  llen, wscnt, &encoded);
+    if (elen < 0 || (bio = BIO_new(BIO_s_mem())) == NULL) {
+        OPENSSL_free(raw);
+        OPENSSL_free(out);
+        OPENSSL_free(encoded);
+        TEST_error("out of memory");
+        return -1;
+    }
+    if (t->retry)
+        BIO_set_mem_eof_return(bio, EOF_RETURN);
+    else
+        BIO_set_mem_eof_return(bio, 0);
+
+    /*
+     * When the input is long enough, and the source bio is retriable, exercise
+     * retries by writting the input to the underlying BIO in two steps (1024
+     * bytes, then the rest) and trying to decode some data after each write.
+     */
+    n1 = elen;
+    if (t->retry)
+        n1 = elen / 2;
+    if (n1 > 0)
+        BIO_write(bio, encoded, n1);
+
+    b64 = BIO_new(BIO_f_base64());
+    if (t->no_nl)
+        BIO_set_flags(b64, BIO_FLAGS_BASE64_NO_NL);
+    BIO_push(b64, bio);
+
+    n = BIO_read(b64, out, out_len);
+
+    if (n1 < elen) {
+        /* Append the rest of the input, and read again */
+        BIO_write(bio, encoded + n1, elen - n1);
+        if (n > 0) {
+            n2 = BIO_read(b64, out + n, out_len - n);
+            if (n2 > 0)
+                n += n2;
+        } else if (n == EOF_RETURN) {
+            n = BIO_read(b64, out, out_len);
+        }
+    }
+
+    /* Turn retry-related negative results to normal (0) EOF */
+    if (n < 0 && n == EOF_RETURN)
+        n = 0;
+
+    /* Turn off retries */
+    if (t->retry)
+        BIO_set_mem_eof_return(bio, 0);
+
+    if (n < (int) out_len)
+        /* Perform the last read, checking its result */
+        ret = BIO_read(b64, out + n, out_len - n);
+    else {
+        /* Should not happen, given extra space in out_len */
+        TEST_error("Unexpectedly long decode output");
+        ret = -1;
+    }
+
+    /*
+     * Expect an error to be detected with:
+     *
+     * - truncated groups,
+     * - non-base64 suffixes (other than soft EOF) for non-empty or oneline
+     *   input
+     * - non-base64 prefixes in NO_NL mode
+     *
+     * Otherwise, check the decoded content
+     */
+    if (t->trunc > 0
+        || ((t->bytes > 0 || t->no_nl) && *t->suffix && *t->suffix != '-')
+        || (t->no_nl && *t->prefix)) {
+        if ((ret = ret < 0 ? 0 : -1) != 0)
+            TEST_error("Final read result was non-negative");
+    } else if (ret != 0
+             || n != (int) t->bytes
+             || (n > 0 && memcmp(raw, out, n) != 0)) {
+        TEST_error("Failed to decode expected data");
+        ret = -1;
+    }
+
+    BIO_free_all(b64);
+    OPENSSL_free(out);
+    OPENSSL_free(raw);
+    OPENSSL_free(encoded);
+
+    return ret;
+}
+
+static int generic_case(test_case *t, int verbose)
+{
+    unsigned *llen;
+    unsigned *wscnt;
+    int ok = 1;
+
+    for (llen = linelengths; *llen > 0; ++llen) {
+        for (wscnt = wscnts; *wscnt * 2 < *llen; ++wscnt) {
+            int extra = t->no_nl ? 64 : 0;
+
+            /*
+             * Use a longer line for NO_NL tests, in particular, eventually
+             * exceeding 1k bytes.
+             */
+            if (test_bio_base64_run(t, *llen + extra, *wscnt) != 0)
+                ok = 0;
+
+            if (verbose) {
+                fprintf(stderr, "bio_base64_test: ok=%d", ok);
+                if (*t->prefix)
+                    fprintf(stderr, ", prefix='%s'", t->prefix);
+                if (t->encoded)
+                    fprintf(stderr, ", data='%s'", t->encoded);
+                else
+                    fprintf(stderr, ", datalen=%u", t->bytes);
+                if (t->trunc)
+                    fprintf(stderr, ", trunc=%d", t->trunc);
+                if (*t->suffix)
+                    fprintf(stderr, ", suffix='%s'", t->suffix);
+                fprintf(stderr, ", linelen=%u", *llen);
+                fprintf(stderr, ", wscount=%u", *wscnt);
+                if (t->retry)
+                    fprintf(stderr, ", retriable");
+                if (t->no_nl)
+                    fprintf(stderr, ", oneline");
+                fputc('\n', stderr);
+            }
+
+            /* For verbatim input no effect from varying llen or wscnt */
+            if (t->encoded)
+                return ok;
+        }
+        /*
+         * Longer 'llen' has no effect once we're sure to not have multiple
+         * lines of data
+         */
+        if (*llen > t->bytes + (t->bytes >> 1))
+            break;
+    }
+    return ok;
+}
+
+static int quotrem(int i, unsigned int m, int *q)
+{
+    *q = i / m;
+    return i - *q * m;
+}
+
+static int test_bio_base64_generated(int idx)
+{
+    test_case t;
+    int variant;
+    int lencase;
+    int padcase;
+    int q = idx;
+
+    lencase = quotrem(q, NLEN, &q);
+    variant = quotrem(q, NVARPAD, &q);
+    padcase = quotrem(variant, NPAD, &variant);
+    t.retry = quotrem(q, 2, &q);
+    t.no_nl = quotrem(q, 2, &q);
+
+    if (q != 0) {
+        fprintf(stderr, "Test index out of range: %d", idx);
+        return 0;
+    }
+
+    t.prefix = prefixes[variant];
+    t.encoded = NULL;
+    t.bytes  = lengths[lencase];
+    t.trunc = 0;
+    if (padcase && padcase < 3)
+        t.bytes  += padcase;
+    else if (padcase >= 3)
+        t.trunc = padcase - 2;
+    t.suffix = suffixes[variant];
+
+    if (padcase != 0 && (*t.suffix && *t.suffix != '-')) {
+        TEST_error("Unexpected suffix test after padding");
+        return 0;
+    }
+
+    return generic_case(&t, 0);
+}
+
+static int test_bio_base64_corner_case_bug(int idx)
+{
+    test_case t;
+    int q = idx;
+
+    t.retry = quotrem(q, 2, &q);
+    t.no_nl = quotrem(q, 2, &q);
+
+    if (q != 0) {
+        fprintf(stderr, "Test index out of range: %d", idx);
+        return 0;
+    }
+
+    /* 9 bytes of skipped non-base64 input + newline */
+    t.prefix = "#foo\n#bar";
+
+    /* 9 bytes on 2nd and subsequent lines */
+    t.encoded = "A\nAAA\nAAAA\n";
+    t.suffix = "";
+
+    /* Expected decode length */
+    t.bytes = 6;
+    t.trunc = 0;    /* ignored */
+
+    return generic_case(&t, 0);
+}
+
+int setup_tests(void)
+{
+    int numidx;
+
+    memset(gunk, 'o', sizeof(gunk));
+    gunk[0] = '#';
+    gunk[sizeof(gunk) - 1] = '\0';
+
+    /*
+     * Test 5 variants of prefix or suffix
+     *
+     *  - both empty
+     *  - short junk prefix
+     *  - long gunk prefix (> internal BIO 1k buffer size),
+     *  - soft EOF suffix
+     *  - junk suffix (expect to detect an error)
+     *
+     * For 6 input lengths of randomly generated raw input:
+     *
+     *  0, 3, 48, 192, 768 and 1536
+     *
+     * corresponding to encoded lengths (plus linebreaks and ignored
+     * whitespace) of:
+     *
+     *  0, 4, 64, 256, 1024 and 2048
+     *
+     * Followed by zero, one or two additional bytes that may involve padding,
+     * or else (truncation) 1, 2 or 3 bytes with missing padding.
+     * Only the first four variants make sense with padding or truncated
+     * groups.
+     *
+     * With two types of underlying BIO
+     *
+     *  - Non-retriable underlying BIO
+     *  - Retriable underlying BIO
+     *
+     * And with/without the BIO_FLAGS_BASE64_NO_NL flag, where now an error is
+     * expected with the junk and gunk prefixes, however, but the "soft EOF"
+     * suffix is still accepted.
+     *
+     * Internally, each test may loop over a range of encoded line lengths and
+     * whitespace average "densities".
+     */
+    numidx = NLEN * (NVAR * NPAD - NPAD + 1) * 2 * 2;
+    ADD_ALL_TESTS(test_bio_base64_generated, numidx);
+
+    /*
+     * Corner case in original code that skips ignored input, when the ignored
+     * length is one byte longer than the total of the second and later lines
+     * of valid input in the first 1k bytes of input.  No content variants,
+     * just BIO retry status and oneline flags vary.
+     */
+    numidx = 2 * 2;
+    ADD_ALL_TESTS(test_bio_base64_corner_case_bug, numidx);
+
+    return 1;
+}