summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/conf/NOTES1
-rw-r--r--sys/conf/files4
-rw-r--r--sys/conf/options1
-rw-r--r--sys/geom/bde/g_bde.c282
-rw-r--r--sys/geom/bde/g_bde.h150
-rw-r--r--sys/geom/bde/g_bde_crypt.c356
-rw-r--r--sys/geom/bde/g_bde_lock.c311
-rw-r--r--sys/geom/bde/g_bde_work.c731
8 files changed, 1836 insertions, 0 deletions
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index 81df5c3b3ad1..02e1d9f1080a 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -115,6 +115,7 @@ options PQ_CACHESIZE=512 # color for 512k/16k cache
options INCLUDE_CONFIG_FILE # Include this file in kernel
options GEOM_AES
+options GEOM_BDE
options GEOM_BSD
options GEOM_GPT
options GEOM_MBR
diff --git a/sys/conf/files b/sys/conf/files
index 2bace6d1bf26..c003bc3c66af 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -796,6 +796,10 @@ fs/umapfs/umap_vnops.c optional umapfs
fs/unionfs/union_subr.c optional unionfs
fs/unionfs/union_vfsops.c optional unionfs
fs/unionfs/union_vnops.c optional unionfs
+geom/bde/g_bde.c optional geom_bde
+geom/bde/g_bde_crypt.c optional geom_bde
+geom/bde/g_bde_lock.c optional geom_bde
+geom/bde/g_bde_work.c optional geom_bde
geom/geom_aes.c optional geom_aes
geom/geom_bsd.c optional geom_bsd
geom/geom_ctl.c standard
diff --git a/sys/conf/options b/sys/conf/options
index 8480b1cf0682..031184935fb2 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -88,6 +88,7 @@ GDB_REMOTE_CHAT opt_ddb.h
GDBSPEED opt_ddb.h
NO_GEOM opt_geom.h
GEOM_AES opt_geom.h
+GEOM_BDE opt_geom.h
GEOM_BSD opt_geom.h
GEOM_GPT opt_geom.h
GEOM_MBR opt_geom.h
diff --git a/sys/geom/bde/g_bde.c b/sys/geom/bde/g_bde.c
new file mode 100644
index 000000000000..51fd77977004
--- /dev/null
+++ b/sys/geom/bde/g_bde.c
@@ -0,0 +1,282 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/stdint.h>
+#include <sys/bio.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/malloc.h>
+#include <geom/geom.h>
+#include <geom/bde/g_bde.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+
+#define BDE_CLASS_NAME "BDE"
+
+static void
+g_bde_start(struct bio *bp)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct g_bde_softc *sc;
+
+ gp = bp->bio_to->geom;
+ cp = LIST_FIRST(&gp->consumer);
+ sc = gp->softc;
+ switch (bp->bio_cmd) {
+ case BIO_DELETE:
+ case BIO_READ:
+ case BIO_WRITE:
+ g_bde_start1(bp);
+ break;
+ case BIO_GETATTR:
+ case BIO_SETATTR:
+ if (g_handleattr_off_t(bp, "GEOM::mediasize", sc->mediasize))
+ return;
+ if (g_handleattr_int(bp, "GEOM::sectorsize", sc->sectorsize))
+ return;
+ g_io_deliver(bp, EOPNOTSUPP);
+ break;
+ default:
+ g_io_deliver(bp, EOPNOTSUPP);
+ return;
+ }
+ return;
+}
+
+static void
+g_bde_orphan(struct g_consumer *cp)
+{
+ struct g_geom *gp;
+ struct g_provider *pp;
+ struct g_bde_softc *sc;
+ int error;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_orphan(%p/%s)", cp, cp->provider->name);
+ g_topology_assert();
+ KASSERT(cp->provider->error != 0,
+ ("g_bde_orphan with error == 0"));
+
+ gp = cp->geom;
+ sc = gp->softc;
+ gp->flags |= G_GEOM_WITHER;
+ error = cp->provider->error;
+ LIST_FOREACH(pp, &gp->provider, provider)
+ g_orphan_provider(pp, error);
+ bzero(sc, sizeof(struct g_bde_softc)); /* destroy evidence */
+ return;
+}
+
+static int
+g_bde_access(struct g_provider *pp, int dr, int dw, int de)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+
+ gp = pp->geom;
+ cp = LIST_FIRST(&gp->consumer);
+ if (cp->acr == 0 && cp->acw == 0 && cp->ace == 0) {
+ de++;
+ dr++;
+ }
+ /* ... and let go of it on last close */
+ if ((cp->acr + dr) == 0 && (cp->acw + dw) == 0 && (cp->ace + de) == 1) {
+ de--;
+ dr--;
+ }
+ return (g_access_rel(cp, dr, dw, de));
+}
+
+static int
+g_bde_create(struct g_createargs *ga)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct g_provider *pp;
+ struct g_bde_key *kp;
+ int error;
+ u_int sectorsize;
+ off_t mediasize;
+ struct g_bde_softc *sc;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_create(%d)", ga->flag);
+ g_topology_assert();
+ if (ga->flag == 1) {
+ /*
+ * Orderly dettachment.
+ */
+ if (ga->geom != NULL) {
+ gp = ga->geom;
+ } else if (ga->provider != NULL) {
+ if (ga->provider->geom->class == ga->class) {
+ gp = ga->provider->geom;
+ } else {
+ LIST_FOREACH(cp, &ga->provider->consumers,
+ consumers) {
+ if (cp->geom->class == ga->class) {
+ gp = cp->geom;
+ break;
+ }
+ }
+ }
+ if (gp == NULL)
+ return (EINVAL);
+ } else {
+ return (EINVAL);
+ }
+ KASSERT(gp != NULL, ("NULL geom"));
+ pp = LIST_FIRST(&gp->provider);
+ KASSERT(pp != NULL, ("NULL provider"));
+ if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)
+ return (EBUSY);
+ g_orphan_provider(pp, ENXIO);
+ sc = gp->softc;
+ cp = LIST_FIRST(&gp->consumer);
+ KASSERT(cp != NULL, ("NULL consumer"));
+ sc->dead = 1;
+ wakeup(sc);
+ error = g_access_rel(cp, -1, -1, -1);
+ KASSERT(error == 0, ("error on close"));
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ g_topology_unlock();
+ while (sc->dead != 2 && !LIST_EMPTY(&pp->consumers))
+ tsleep(sc, PRIBIO, "g_bdedie", hz);
+ g_topology_lock();
+ g_destroy_provider(pp);
+ mtx_destroy(&sc->worklist_mutex);
+ bzero(&sc->key, sizeof sc->key);
+ g_free(sc);
+ g_destroy_geom(gp);
+ return (0);
+ }
+
+ if (ga->flag != 0)
+ return (EOPNOTSUPP);
+
+ if (ga->provider == NULL)
+ return (EINVAL);
+ /*
+ * Attach
+ */
+ gp = g_new_geomf(ga->class, "%s.bde", ga->provider->name);
+ gp->start = g_bde_start;
+ gp->orphan = g_bde_orphan;
+ gp->access = g_bde_access;
+ gp->spoiled = g_std_spoiled;
+ cp = g_new_consumer(gp);
+ g_attach(cp, ga->provider);
+ error = g_access_rel(cp, 1, 1, 1);
+ if (error) {
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ g_destroy_geom(gp);
+ return (error);
+ }
+ g_topology_unlock();
+ while (1) {
+ error = g_getattr("GEOM::sectorsize", cp, &sectorsize);
+ if (error)
+ break;
+ error = g_getattr("GEOM::mediasize", cp, &mediasize);
+ if (error)
+ break;
+ sc = g_malloc(sizeof(struct g_bde_softc), M_WAITOK | M_ZERO);
+ gp->softc = sc;
+ sc->geom = gp;
+ sc->consumer = cp;
+
+ error = g_bde_decrypt_lock(sc, ga->ptr,
+ (u_char *)ga->ptr + 256, mediasize, sectorsize, NULL);
+ bzero(sc->arc4_sbox, sizeof sc->arc4_sbox);
+ if (error)
+ break;
+ kp = &sc->key;
+
+ /* Initialize helper-fields */
+ kp->keys_per_sector = kp->sectorsize / G_BDE_SKEYLEN;
+ kp->zone_cont = kp->keys_per_sector * kp->sectorsize;
+ kp->zone_width = kp->zone_cont + kp->sectorsize;
+ kp->media_width = kp->sectorN - kp->sector0 -
+ G_BDE_MAXKEYS * kp->sectorsize;
+
+ /* Our external parameters */
+ sc->zone_cont = kp->zone_cont;
+ sc->mediasize = g_bde_max_sector(kp);
+ sc->sectorsize = kp->sectorsize;
+
+ TAILQ_INIT(&sc->freelist);
+ TAILQ_INIT(&sc->worklist);
+ mtx_init(&sc->worklist_mutex, "g_bde_worklist", NULL, MTX_DEF);
+ mtx_lock(&Giant);
+ /* XXX: error check */
+ kthread_create(g_bde_worker, gp, &sc->thread, 0, 0,
+ "g_bde %s", gp->name);
+ mtx_unlock(&Giant);
+ g_topology_lock();
+ pp = g_new_providerf(gp, gp->name);
+ pp->mediasize = sc->mediasize;
+ g_error_provider(pp, 0);
+ g_topology_unlock();
+ break;
+ }
+ g_topology_lock();
+ if (error == 0) {
+ ga->geom = gp;
+ return (0);
+ } else {
+ g_access_rel(cp, -1, -1, -1);
+ }
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ if (gp->softc != NULL)
+ g_free(gp->softc);
+ g_destroy_geom(gp);
+ return (error);
+}
+
+static struct g_class g_bde_class = {
+ BDE_CLASS_NAME,
+ NULL,
+ g_bde_create,
+ G_CLASS_INITIALIZER
+};
+
+DECLARE_GEOM_CLASS(g_bde_class, g_bde);
diff --git a/sys/geom/bde/g_bde.h b/sys/geom/bde/g_bde.h
new file mode 100644
index 000000000000..df924e420f10
--- /dev/null
+++ b/sys/geom/bde/g_bde.h
@@ -0,0 +1,150 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/* These are quite, but not entirely unlike constants. */
+#define G_BDE_MKEYLEN (2048/8)
+#define G_BDE_SKEYBITS 128
+#define G_BDE_SKEYLEN (G_BDE_SKEYBITS/8)
+#define G_BDE_KKEYBITS 128
+#define G_BDE_KKEYLEN (G_BDE_KKEYBITS/8)
+#define G_BDE_MAXKEYS 4
+#define G_BDE_LOCKSIZE 384
+
+/* This just needs to be "large enough" */
+#define G_BDE_KEYBYTES 304
+
+struct g_bde_work;
+struct g_bde_softc;
+
+struct g_bde_sector {
+ struct g_bde_work *owner;
+ struct g_bde_softc *softc;
+ off_t offset;
+ u_int size;
+ u_int ref;
+ void *data;
+ TAILQ_ENTRY(g_bde_sector) list;
+ u_char valid;
+ u_char malloc;
+ enum {JUNK, IO, VALID} state;
+ int error;
+};
+
+struct g_bde_work {
+ struct mtx mutex;
+ off_t offset;
+ off_t length;
+ void *data;
+ struct bio *bp;
+ struct g_bde_softc *softc;
+ off_t so;
+ off_t kso;
+ u_int ko;
+ struct g_bde_sector *sp;
+ struct g_bde_sector *ksp;
+ TAILQ_ENTRY(g_bde_work) list;
+ enum {SETUP, WAIT, FINISH} state;
+ int error;
+};
+
+struct g_bde_key {
+ uint64_t sector0;
+ /* Physical byte offset of first byte used */
+ uint64_t sectorN;
+ /* Physical byte offset of first byte not used */
+ uint64_t keyoffset;
+ uint64_t lsector[G_BDE_MAXKEYS];
+ /* Physical offsets */
+ uint32_t sectorsize;
+ uint32_t flags;
+ uint8_t hash[16];
+ uint8_t spare[48];
+ uint8_t key[G_BDE_MKEYLEN];
+ /* Non-stored help-fields */
+ uint64_t zone_width; /* On-disk width of zone */
+ uint64_t zone_cont; /* Payload width of zone */
+ uint64_t media_width; /* Non-magic width of zone */
+ u_int keys_per_sector;
+};
+
+struct g_bde_softc {
+ off_t mediasize;
+ u_int sectorsize;
+ uint64_t zone_cont;
+ struct g_geom *geom;
+ struct g_consumer *consumer;
+ TAILQ_HEAD(, g_bde_sector) freelist;
+ TAILQ_HEAD(, g_bde_work) worklist;
+ struct mtx worklist_mutex;
+ struct proc *thread;
+ struct g_bde_key key;
+ u_char arc4_sbox[256];
+ u_char arc4_i, arc4_j;
+ int dead;
+ u_int nwork;
+ u_int nsect;
+ u_int ncache;
+};
+
+/* g_bde_crypt.c */
+void g_bde_crypt_delete(struct g_bde_work *wp);
+void g_bde_crypt_read(struct g_bde_work *wp);
+void g_bde_crypt_write(struct g_bde_work *wp);
+
+/* g_bde_key.c */
+void g_bde_zap_key(struct g_bde_softc *sc);
+int g_bde_get_key(struct g_bde_softc *sc, void *ptr, int len);
+int g_bde_init_keybytes(struct g_bde_softc *sc, char *passp, int len);
+
+/* g_bde_lock .c */
+void g_bde_encode_lock(struct g_bde_key *gl, u_char *ptr);
+void g_bde_decode_lock(struct g_bde_key *gl, u_char *ptr);
+u_char g_bde_arc4(struct g_bde_softc *sc);
+void g_bde_arc4_seq(struct g_bde_softc *sc, void *ptr, u_int len);
+void g_bde_arc4_seed(struct g_bde_softc *sc, void *ptr, u_int len);
+int g_bde_keyloc_encrypt(struct g_bde_softc *sc, void *input, void *output);
+int g_bde_keyloc_decrypt(struct g_bde_softc *sc, void *input, void *output);
+int g_bde_decrypt_lock(struct g_bde_softc *sc, u_char *sbox, u_char *meta, off_t mediasize, u_int sectorsize, u_int *nkey);
+
+/* g_bde_math .c */
+uint64_t g_bde_max_sector(struct g_bde_key *lp);
+void g_bde_map_sector(struct g_bde_key *lp, uint64_t isector, uint64_t *osector, uint64_t *ksector, u_int *koffset);
+
+/* g_bde_work.c */
+void g_bde_start1(struct bio *bp);
+void g_bde_worker(void *arg);
+
diff --git a/sys/geom/bde/g_bde_crypt.c b/sys/geom/bde/g_bde_crypt.c
new file mode 100644
index 000000000000..c649e23d3122
--- /dev/null
+++ b/sys/geom/bde/g_bde_crypt.c
@@ -0,0 +1,356 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ * This source file contains the functions responsible for the crypto, keying
+ * and mapping operations on the I/O requests.
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/stdint.h>
+#include <sys/bio.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/malloc.h>
+#include <sys/libkern.h>
+#include <sys/md5.h>
+
+#include <geom/geom.h>
+#include <geom/bde/g_bde.h>
+
+#include <crypto/rijndael/rijndael.h>
+
+/*
+ * These four functions wrap the raw Rijndael functions and make sure we
+ * explode if something fails which shouldn't.
+ */
+
+static void
+AES_init(cipherInstance *ci)
+{
+ int error;
+
+ error = rijndael_cipherInit(ci, MODE_CBC, NULL);
+ KASSERT(error > 0, ("rijndael_cipherInit %d", error));
+}
+
+static void
+AES_makekey(keyInstance *ki, int dir, u_int len, void *key)
+{
+ int error;
+
+ error = rijndael_makeKey(ki, dir, len, key);
+ KASSERT(error > 0, ("rijndael_makeKey %d", error));
+}
+
+static void
+AES_encrypt(cipherInstance *ci, keyInstance *ki, void *in, void *out, u_int len)
+{
+ int error;
+
+ error = rijndael_blockEncrypt(ci, ki, in, len * 8, out);
+ KASSERT(error > 0, ("rijndael_blockEncrypt %d", error));
+}
+
+static void
+AES_decrypt(cipherInstance *ci, keyInstance *ki, void *in, void *out, u_int len)
+{
+ int error;
+
+ error = rijndael_blockDecrypt(ci, ki, in, len * 8, out);
+ KASSERT(error > 0, ("rijndael_blockDecrypt %d", error));
+}
+
+/*
+ * Derive kkey from mkey + sector offset.
+ *
+ * Security objective: Derive a potentially very large number of distinct skeys
+ * from the comparatively small key material in our mkey, in such a way that
+ * if one, more or even many of the kkeys are compromised, this does not
+ * significantly help an attack on other kkeys and in particular does not
+ * weaken or compromised the mkey.
+ *
+ * We do this by cherry-picking characters out of the mkey, feeding these to
+ * MD5 with the sector offset in the middle and using the MD5 hash as kkey.
+ *
+ * The MD5 only acts as a "diode" against brute-force reversal, it offsers no
+ * protection if the input to MD5 is predictable or insufficiently uncorrelated
+ * from sector to sector.
+ *
+ * The amount of entropy in a sector number is very low, and the amount of
+ * entropy between two sector numbers is even lower, (only slightly higher than
+ * one bit), so we rely heavily on the mkey to make the cherry picking non-
+ * linear and irreversible.
+ *
+ * This strong dependency on the mkey is very desirable, but the low amount
+ * of entropy from the sector number means that the algorithm is vulnerable
+ * to mkeys which has a lumpy histogram of byte values or little entropy.
+ *
+ * If you read this comment in order to find a weak spot or the best way to
+ * attack GBDE, you have probably come to the right place. Good luck.
+ */
+
+static void
+g_bde_kkey(struct g_bde_softc *sc, keyInstance *ki, int dir, off_t sector)
+{
+ u_int u, v, w, t;
+ MD5_CTX ct;
+ u_char buf[16], c;
+
+ MD5Init(&ct);
+ w = sector /= sc->sectorsize;
+ v = w % 211; /* A prime slightly smaller than G_BDE_MKEYLEN */
+ u = w % 19; /* A small prime */
+ for (t = 0; t < G_BDE_SKEYLEN; t++) {
+ u %= G_BDE_MKEYLEN;
+ v %= G_BDE_MKEYLEN;
+ c = sc->key.key[u] ^ sc->key.key[v];
+ MD5Update(&ct, &c, 1);
+ v += c + t;
+ u += sc->key.key[c];
+ if (w & 1)
+ v += 13; /* A small prime */
+ else
+ u += 131; /* A prime roughly G_BDE_MKEYLEN / 2 */
+ if (t == G_BDE_SKEYLEN / 2)
+ MD5Update(&ct, (void *)&sector, sizeof sector);
+ }
+ w = v = u - 0;
+ MD5Update(&ct, (void *)&sector, sizeof sector);
+ MD5Final(buf, &ct);
+ bzero(&ct, sizeof ct);
+ AES_makekey(ki, dir, G_BDE_KKEYBITS, buf);
+ bzero(buf, sizeof buf);
+}
+
+/*
+ * Encryption work for read operation.
+ *
+ * Security objective: Find the kkey, find the skey, decrypt the sector data.
+ */
+
+void
+g_bde_crypt_read(struct g_bde_work *wp)
+{
+ struct g_bde_softc *sc;
+ u_char *d;
+ u_int n;
+ off_t o;
+ u_char skey[G_BDE_SKEYLEN];
+ keyInstance ki;
+ cipherInstance ci;
+
+
+ AES_init(&ci);
+ sc = wp->softc;
+ o = 0;
+ for (n = 0; o < wp->length; n++, o += sc->sectorsize) {
+ d = (u_char *)wp->ksp->data + wp->ko + n * G_BDE_SKEYLEN;
+ g_bde_kkey(sc, &ki, DIR_DECRYPT, wp->offset + o);
+ AES_decrypt(&ci, &ki, d, skey, sizeof skey);
+ d = (u_char *)wp->data + o;
+ AES_makekey(&ki, DIR_DECRYPT, G_BDE_SKEYBITS, skey);
+ AES_decrypt(&ci, &ki, d, d, sc->sectorsize);
+ }
+ bzero(skey, sizeof skey);
+ bzero(&ci, sizeof ci);
+ bzero(&ki, sizeof ci);
+}
+
+/*
+ * Encryption work for write operation.
+ *
+ * Security objective: Create random skey, encrypt sector data,
+ * encrypt skey with the kkey.
+ */
+
+void
+g_bde_crypt_write(struct g_bde_work *wp)
+{
+ u_char *s, *d;
+ struct g_bde_softc *sc;
+ u_int n;
+ off_t o;
+ u_char skey[G_BDE_SKEYLEN];
+ keyInstance ki;
+ cipherInstance ci;
+
+ sc = wp->softc;
+ AES_init(&ci);
+ o = 0;
+ for (n = 0; o < wp->length; n++, o += sc->sectorsize) {
+
+ s = (u_char *)wp->data + o;
+ d = (u_char *)wp->sp->data + o;
+ arc4rand(&skey, sizeof skey, 0);
+ AES_makekey(&ki, DIR_ENCRYPT, G_BDE_SKEYBITS, skey);
+ AES_encrypt(&ci, &ki, s, d, sc->sectorsize);
+
+ d = (u_char *)wp->ksp->data + wp->ko + n * G_BDE_SKEYLEN;
+ g_bde_kkey(sc, &ki, DIR_ENCRYPT, wp->offset + o);
+ AES_encrypt(&ci, &ki, skey, d, sizeof skey);
+ bzero(skey, sizeof skey);
+ }
+ bzero(skey, sizeof skey);
+ bzero(&ci, sizeof ci);
+ bzero(&ki, sizeof ci);
+}
+
+/*
+ * Encryption work for delete operation.
+ *
+ * Security objective: Write random data to the sectors.
+ *
+ * XXX: At a hit in performance we would trash the encrypted skey as well.
+ * XXX: This would add frustration to the cleaning lady attack by making
+ * XXX: deletes look like writes.
+ */
+
+void
+g_bde_crypt_delete(struct g_bde_work *wp)
+{
+ struct g_bde_softc *sc;
+ u_char *d;
+ off_t o;
+
+ sc = wp->softc;
+ d = wp->sp->data;
+ /*
+ * Do not unroll this loop!
+ * Our zone may be significantly wider than the amount of random
+ * bytes arc4rand likes to give in one reseeding, whereas our
+ * sectorsize is far more likely to be in the same range.
+ */
+ for (o = 0; o < wp->length; o += sc->sectorsize) {
+ arc4rand(d, sc->sectorsize, 0);
+ d += sc->sectorsize;
+ }
+ /*
+ * Having written a long random sequence to disk here, we want to
+ * force a reseed, to avoid weakening the next time we use random
+ * data for something important.
+ */
+ arc4rand(&o, sizeof o, 1);
+}
+
+/*
+ * Calculate the total payload size of the encrypted device.
+ *
+ * Security objectives: none.
+ *
+ * This function needs to agree with g_bde_map_sector() about things.
+ */
+
+uint64_t
+g_bde_max_sector(struct g_bde_key *kp)
+{
+ uint64_t maxsect;
+
+ maxsect = kp->media_width;
+ maxsect /= kp->zone_width;
+ maxsect *= kp->zone_cont;
+ return (maxsect);
+}
+
+/*
+ * Convert an unencrypted side offset to offsets on the encrypted side.
+ *
+ * Security objective: Make it harder to identify what sectors contain what
+ * on a "cold" disk image.
+ *
+ * We do this by adding the "keyoffset" from the lock to the physical sector
+ * number modulus the available number of sectors, since all physical sectors
+ * presumably look the same cold, this should be enough.
+ *
+ * Shuffling things further is an option, but the incremental frustration is
+ * not currently deemed worth the run-time performance hit resulting from the
+ * increased number of disk arm movements it would incur.
+ *
+ * This function offers nothing but a trivial diversion for an attacker able
+ * to do "the cleaning lady attack" in its current static mapping form.
+ */
+
+void
+g_bde_map_sector(struct g_bde_key *kp,
+ uint64_t isector,
+ uint64_t *osector,
+ uint64_t *ksector,
+ u_int *koffset)
+{
+
+ u_int zone, zoff, zidx, u;
+ uint64_t os;
+
+ /* find which zone and the offset and index in it */
+ zone = isector / kp->zone_cont;
+ zoff = isector % kp->zone_cont;
+ zidx = zoff / kp->sectorsize;
+
+ /* Find physical sector address */
+ os = zone * kp->zone_width + zoff;
+ os += kp->keyoffset;
+ os %= kp->media_width - (G_BDE_MAXKEYS * kp->sectorsize);
+ os += kp->sector0;
+
+ /* Compensate for lock sectors */
+ for (u = 0; u < G_BDE_MAXKEYS; u++)
+ if (os >= kp->lsector[u])
+ os += kp->sectorsize;
+
+ *osector = os;
+
+ /* The key sector is the last in this zone. */
+ os = (1 + zone) * kp->zone_width - kp->sectorsize;
+ os += kp->keyoffset;
+ os %= kp->media_width - (G_BDE_MAXKEYS * kp->sectorsize);
+ os += kp->sector0;
+
+ for (u = 0; u < G_BDE_MAXKEYS; u++)
+ if (os >= kp->lsector[u])
+ os += kp->sectorsize;
+ *ksector = os;
+
+ *koffset = zidx * G_BDE_SKEYLEN;
+
+#if 0
+ printf("off %jd %jd %jd %u\n",
+ (intmax_t)isector,
+ (intmax_t)*osector,
+ (intmax_t)*ksector,
+ *koffset);
+#endif
+}
diff --git a/sys/geom/bde/g_bde_lock.c b/sys/geom/bde/g_bde_lock.c
new file mode 100644
index 000000000000..e58683f9712a
--- /dev/null
+++ b/sys/geom/bde/g_bde_lock.c
@@ -0,0 +1,311 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ * This souce file contains routines which operates on the lock sectors, both
+ * for the kernel and the userland program gbde(1).
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/stdint.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/md5.h>
+
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#else
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#define g_free(foo) free(foo)
+#endif
+
+#include <geom/geom.h>
+#include <geom/bde/g_bde.h>
+
+#include <crypto/rijndael/rijndael.h>
+
+/*
+ * Encode/Decode the lock structure in byte-sequence format.
+ *
+ * Security objectives: none.
+ *
+ * C-structure packing and byte-endianess depends on architecture, compiler
+ * and compiler options. We therefore explicitly encode and decode struct
+ * g_bde_key using an invariant byte-sequence format.
+ *
+ */
+
+void
+g_bde_encode_lock(struct g_bde_key *gl, u_char *ptr)
+{
+
+ bcopy(gl->hash, ptr + 0, sizeof gl->hash);
+ g_enc_le8(ptr + 16, gl->sector0);
+ g_enc_le8(ptr + 24, gl->sectorN);
+ g_enc_le8(ptr + 32, gl->keyoffset);
+ g_enc_le4(ptr + 40, gl->sectorsize);
+ g_enc_le4(ptr + 44, gl->flags);
+ g_enc_le8(ptr + 48, gl->lsector[0]);
+ g_enc_le8(ptr + 56, gl->lsector[1]);
+ g_enc_le8(ptr + 64, gl->lsector[2]);
+ g_enc_le8(ptr + 72, gl->lsector[3]);
+ bcopy(gl->spare, ptr + 80, sizeof gl->spare);
+ bcopy(gl->key, ptr + 128, sizeof gl->key);
+}
+
+void
+g_bde_decode_lock(struct g_bde_key *gl, u_char *ptr)
+{
+ bcopy(ptr + 0, gl->hash, sizeof gl->hash);
+ gl->sector0 = g_dec_le8(ptr + 16);
+ gl->sectorN = g_dec_le8(ptr + 24);
+ gl->keyoffset = g_dec_le8(ptr + 32);
+ gl->sectorsize = g_dec_le4(ptr + 40);
+ gl->flags = g_dec_le4(ptr + 44);
+ gl->lsector[0] = g_dec_le8(ptr + 48);
+ gl->lsector[1] = g_dec_le8(ptr + 56);
+ gl->lsector[2] = g_dec_le8(ptr + 64);
+ gl->lsector[3] = g_dec_le8(ptr + 72);
+ bcopy(ptr + 80, gl->spare, sizeof gl->spare);
+ bcopy(ptr + 128, gl->key, sizeof gl->key);
+}
+
+/*
+ * Generate key-material used for protecting lock sectors.
+ *
+ * Security objectives: from the pass-phrase provide by the user, produce a
+ * reproducible stream of bits/bytes which resemeble pseudo-random bits.
+ *
+ * This is the stream-cipher algorithm called ARC4. See for instance the
+ * description in "Applied Cryptography" by Bruce Scneier.
+ */
+
+u_char
+g_bde_arc4(struct g_bde_softc *sc)
+{
+ u_char c;
+
+ sc->arc4_j += sc->arc4_sbox[++sc->arc4_i];
+ c = sc->arc4_sbox[sc->arc4_i];
+ sc->arc4_sbox[sc->arc4_i] = sc->arc4_sbox[sc->arc4_j];
+ sc->arc4_sbox[sc->arc4_j] = c;
+ c = sc->arc4_sbox[sc->arc4_i] + sc->arc4_sbox[sc->arc4_j];
+ c = sc->arc4_sbox[c];
+ return (c);
+}
+
+void
+g_bde_arc4_seq(struct g_bde_softc *sc, void *ptr, u_int len)
+{
+ u_char *p;
+
+ p = ptr;
+ while (len--)
+ *p++ = g_bde_arc4(sc);
+}
+
+void
+g_bde_arc4_seed(struct g_bde_softc *sc, void *ptr, u_int len)
+{
+ u_char k[256], *p, c;
+ u_int i;
+
+ p = ptr;
+ sc->arc4_i = 0;
+ bzero(k, sizeof k);
+ while(len--)
+ k[sc->arc4_i++] ^= *p++;
+
+ sc->arc4_j = 0;
+ for (i = 0; i < 256; i++)
+ sc->arc4_sbox[i] = i;
+ for (i = 0; i < 256; i++) {
+ sc->arc4_j += sc->arc4_sbox[i] + k[i];
+ c = sc->arc4_sbox[i];
+ sc->arc4_sbox[i] = sc->arc4_sbox[sc->arc4_j];
+ sc->arc4_sbox[sc->arc4_j] = c;
+ }
+ sc->arc4_i = 0;
+ sc->arc4_j = 0;
+}
+
+/*
+ * Encrypt/Decrypt the metadata address with key-material.
+ */
+
+int
+g_bde_keyloc_encrypt(struct g_bde_softc *sc, void *input, void *output)
+{
+ u_char *p;
+ u_char buf[16], buf1[16];
+ u_int i;
+ keyInstance ki;
+ cipherInstance ci;
+
+ rijndael_cipherInit(&ci, MODE_CBC, NULL);
+ p = input;
+ g_bde_arc4_seq(sc, buf, sizeof buf);
+ for (i = 0; i < sizeof buf; i++)
+ buf1[i] = p[i] ^ buf[i];
+ g_bde_arc4_seq(sc, buf, sizeof buf);
+ rijndael_makeKey(&ki, DIR_ENCRYPT, G_BDE_KKEYBITS, buf);
+ rijndael_blockEncrypt(&ci, &ki, buf1, 16 * 8, output);
+ bzero(&ci, sizeof ci);
+ return (0);
+}
+
+int
+g_bde_keyloc_decrypt(struct g_bde_softc *sc, void *input, void *output)
+{
+ u_char *p;
+ u_char buf1[16], buf2[16];
+ u_int i;
+ keyInstance ki;
+ cipherInstance ci;
+
+ rijndael_cipherInit(&ci, MODE_CBC, NULL);
+ g_bde_arc4_seq(sc, buf1, sizeof buf1);
+ g_bde_arc4_seq(sc, buf2, sizeof buf2);
+ rijndael_makeKey(&ki, DIR_DECRYPT, G_BDE_KKEYBITS, buf2);
+ rijndael_blockDecrypt(&ci, &ki, input, 16 * 8, output);
+ p = output;
+ for (i = 0; i < sizeof buf1; i++)
+ p[i] ^= buf1[i];
+ bzero(&ci, sizeof ci);
+ return (0);
+}
+
+/*
+ * Encode/Decode lock sectors.
+ */
+
+int
+g_bde_decrypt_lock(struct g_bde_softc *sc, u_char *sbox, u_char *meta, off_t mediasize, u_int sectorsize, u_int *nkey)
+{
+ u_char *buf, k1buf[16], k2buf[G_BDE_LOCKSIZE], k3buf[16], *q;
+ struct g_bde_key *gl;
+ uint64_t off[2];
+ int error, m, i;
+ MD5_CTX c;
+ keyInstance ki;
+ cipherInstance ci;
+
+ rijndael_cipherInit(&ci, MODE_CBC, NULL);
+ bcopy(sbox, sc->arc4_sbox, 256);
+ sc->arc4_i = 0;
+ sc->arc4_j = 0;
+ gl = &sc->key;
+ error = g_bde_keyloc_decrypt(sc, meta, off);
+ if (error)
+ return(error);
+
+ if (off[0] + G_BDE_LOCKSIZE > (uint64_t)mediasize) {
+ bzero(off, sizeof off);
+ return (ESRCH);
+ }
+ off[1] = 0;
+ m = 1;
+ if (off[0] % sectorsize > sectorsize - G_BDE_LOCKSIZE)
+ m++;
+ buf = g_read_data(sc->consumer,
+ off[0] - (off[0] % sectorsize),
+ m * sectorsize, &error);
+ if (buf == NULL) {
+ off[0] = 0;
+ return(error);
+ }
+
+ q = buf + off[0] % sectorsize;
+
+ off[1] = 0;
+ for (i = 0; i < (int)sizeof(*gl); i++)
+ off[1] += q[i];
+
+ if (off[1] == 0) {
+ off[0] = 0;
+ g_free(buf);
+ return (ESRCH);
+ }
+
+ g_bde_arc4_seq(sc, k1buf, sizeof k1buf);
+ g_bde_arc4_seq(sc, k2buf, sizeof k2buf);
+ g_bde_arc4_seq(sc, k3buf, sizeof k3buf);
+
+ MD5Init(&c);
+ MD5Update(&c, "0000", 4); /* XXX: for future versioning */
+ MD5Update(&c, k1buf, 16);
+ MD5Final(k1buf, &c);
+
+ rijndael_makeKey(&ki, DIR_DECRYPT, 128, k3buf);
+ bzero(k3buf, sizeof k3buf);
+ rijndael_blockDecrypt(&ci, &ki, q, G_BDE_LOCKSIZE * 8, q);
+
+ for (i = 0; i < G_BDE_LOCKSIZE; i++)
+ q[i] ^= k2buf[i];
+ bzero(k2buf, sizeof k2buf);
+
+ if (bcmp(q, k1buf, sizeof k1buf)) {
+ bzero(k1buf, sizeof k1buf);
+ bzero(buf, sectorsize * m);
+ g_free(buf);
+ off[0] = 0;
+ return (ESRCH);
+ }
+ bzero(k1buf, sizeof k1buf);
+
+ g_bde_decode_lock(gl, q);
+ bzero(buf, sectorsize * m);
+ g_free(buf);
+
+ off[1] = 0;
+ for (i = 0; i < (int)sizeof(gl->key); i++)
+ off[1] += gl->key[i];
+
+ if (off[1] == 0) {
+ off[0] = 0;
+ return (ENOENT);
+ }
+ for (i = 0; i < G_BDE_MAXKEYS; i++)
+ if (nkey != NULL && off[0] == gl->lsector[i])
+ *nkey = i;
+
+ return (0);
+}
diff --git a/sys/geom/bde/g_bde_work.c b/sys/geom/bde/g_bde_work.c
new file mode 100644
index 000000000000..6f337fa3d00f
--- /dev/null
+++ b/sys/geom/bde/g_bde_work.c
@@ -0,0 +1,731 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ * This source file contains the state-engine which makes things happen in the
+ * right order.
+ *
+ * Outline:
+ * 1) g_bde_start1()
+ * Break the struct bio into multiple work packets one per zone.
+ * 2) g_bde_start2()
+ * Setup the necessary sector buffers and start those read operations
+ * which we can start at this time and put the item on the work-list.
+ * 3) g_bde_worker()
+ * Scan the work-list for items which are ready for crypto processing
+ * and call the matching crypto function in g_bde_crypt.c and schedule
+ * any writes needed. Read operations finish here by releasing the
+ * sector buffers and delivering the original bio request.
+ * 4) g_bde_write_done()
+ * Release sector buffers and deliver the original bio request.
+ *
+ * Because of the C-scope rules, the functions are almost perfectly in the
+ * opposite order in this source file.
+ *
+ * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add
+ * XXX: additional states to this state-engine. Since no hardware available
+ * XXX: at this time has AES support, implementing this has been postponed
+ * XXX: until such time as it would result in a benefit.
+ */
+
+#include <sys/param.h>
+#include <sys/stdint.h>
+#include <sys/bio.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/proc.h>
+#include <sys/kthread.h>
+
+#include <geom/geom.h>
+#include <geom/bde/g_bde.h>
+
+static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp);
+static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len);
+static void g_bde_release_sector(struct g_bde_work *wp, struct g_bde_sector *sp);
+static struct g_bde_sector *g_bde_get_sector(struct g_bde_work *wp, off_t offset);
+static int g_bde_start_read(struct g_bde_sector *sp);
+
+/*
+ * Work item allocation.
+ *
+ * C++ would call these constructors and destructors.
+ */
+static u_int g_bde_nwork;
+SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
+
+static struct g_bde_work *
+g_bde_new_work(struct g_bde_softc *sc)
+{
+ struct g_bde_work *wp;
+
+ wp = g_malloc(sizeof *wp, M_NOWAIT | M_ZERO);
+ if (wp == NULL)
+ return (wp);
+ wp->state = SETUP;
+ wp->softc = sc;
+ g_bde_nwork++;
+ sc->nwork++;
+ TAILQ_INSERT_TAIL(&sc->worklist, wp, list);
+ return (wp);
+}
+
+static void
+g_bde_delete_work(struct g_bde_work *wp)
+{
+ struct g_bde_softc *sc;
+
+ sc = wp->softc;
+ g_bde_nwork--;
+ sc->nwork--;
+ TAILQ_REMOVE(&sc->worklist, wp, list);
+ g_free(wp);
+}
+
+/*
+ * Sector buffer allocation
+ *
+ * These two functions allocate and free back variable sized sector buffers
+ */
+
+static u_int g_bde_nsect;
+SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, "");
+
+void
+g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
+{
+
+ g_bde_nsect--;
+ sc->nsect--;
+ if (sp->malloc)
+ g_free(sp->data);
+ g_free(sp);
+}
+
+struct g_bde_sector *
+g_bde_new_sector(struct g_bde_work *wp, u_int len)
+{
+ struct g_bde_sector *sp;
+
+ sp = g_malloc(sizeof *sp, M_NOWAIT | M_ZERO);
+ if (sp == NULL)
+ return (sp);
+ if (len > 0) {
+ sp->data = g_malloc(len, M_NOWAIT | M_ZERO);
+ if (sp->data == NULL) {
+ g_free(sp);
+ return (NULL);
+ }
+ sp->malloc = 1;
+ }
+ g_bde_nsect++;
+ wp->softc->nsect++;
+ sp->size = len;
+ sp->softc = wp->softc;
+ sp->ref = 1;
+ sp->owner = wp;
+ sp->offset = wp->so;
+ sp->state = JUNK;
+ return (sp);
+}
+
+/*
+ * Skey sector cache.
+ *
+ * Nothing prevents two separate I/O requests from addressing the same zone
+ * and thereby needing the same skey sector. We therefore need to sequence
+ * I/O operations to the skey sectors. A certain amount of caching is also
+ * desirable, although the extent of benefit from this is not at this point
+ * determined.
+ *
+ * XXX: GEOM may be able to grow a generic caching facility at some point
+ * XXX: to support such needs.
+ */
+
+static u_int g_bde_ncache;
+SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, "");
+
+static struct g_bde_sector *
+g_bde_get_sector(struct g_bde_work *wp, off_t offset)
+{
+ struct g_bde_sector *sp;
+ struct g_bde_softc *sc;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_get_sector(%p, %jd)", wp, (intmax_t)offset);
+ sc = wp->softc;
+ TAILQ_FOREACH(sp, &sc->freelist, list) {
+ if (sp->offset == offset)
+ break;
+ }
+ if (sp != NULL) {
+ sp->ref++;
+ KASSERT(sp->offset == offset, ("wrong offset"));
+ KASSERT(sp->softc == wp->softc, ("wrong softc"));
+ if (sp->ref == 1)
+ sp->owner = wp;
+ } else {
+ if (!TAILQ_EMPTY(&sc->freelist))
+ sp = TAILQ_FIRST(&sc->freelist);
+ if (sp != NULL && sp->ref > 0)
+ sp = NULL;
+ if (sp == NULL) {
+ g_bde_ncache++;
+ sc->ncache++;
+ sp = g_bde_new_sector(wp, sc->sectorsize);
+ if (sp != NULL) {
+ TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
+ sp->malloc = 2;
+ }
+ }
+ if (sp != NULL) {
+ sp->offset = offset;
+ sp->softc = wp->softc;
+ sp->ref = 1;
+ sp->owner = wp;
+ sp->state = JUNK;
+ sp->error = 0;
+ }
+ }
+ if (sp != NULL) {
+ TAILQ_REMOVE(&sc->freelist, sp, list);
+ TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
+ }
+ wp->ksp = sp;
+ KASSERT(sp != NULL, ("get_sector failed"));
+ return(sp);
+}
+
+static void
+g_bde_release_sector(struct g_bde_work *wp, struct g_bde_sector *sp)
+{
+ struct g_bde_softc *sc;
+ struct g_bde_work *wp2;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_release_sector(%p)", sp);
+ KASSERT(sp->malloc == 2, ("Wrong sector released"));
+ sc = sp->softc;
+ KASSERT(sc != NULL, ("NULL sp->softc"));
+ KASSERT(wp == sp->owner, ("Releasing, not owner"));
+ sp->owner = NULL;
+ wp->ksp = NULL;
+ sp->ref--;
+ if (sp->ref > 0) {
+ TAILQ_REMOVE(&sc->freelist, sp, list);
+ TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
+ TAILQ_FOREACH(wp2, &sc->worklist, list) {
+ if (wp2->ksp == sp) {
+ KASSERT(wp2 != wp, ("Self-reowning"));
+ sp->owner = wp2;
+ wakeup(sp->softc);
+ break;
+ }
+ }
+ KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp));
+ } else if (sp->error != 0) {
+ sp->offset = ~0;
+ sp->error = 0;
+ sp->state = JUNK;
+ }
+ TAILQ_REMOVE(&sc->freelist, sp, list);
+ TAILQ_INSERT_HEAD(&sc->freelist, sp, list);
+}
+
+static void
+g_bde_purge_sector(struct g_bde_softc *sc, int fraction)
+{
+ struct g_bde_sector *sp;
+ int n;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc);
+ n = sc->ncache / fraction + 1;
+ while(n--) {
+ TAILQ_FOREACH(sp, &sc->freelist, list) {
+ if (sp->ref != 0)
+ continue;
+ TAILQ_REMOVE(&sc->freelist, sp, list);
+ g_bde_ncache--;
+ sc->ncache--;
+ bzero(sp->data, sp->size);
+ g_bde_delete_sector(sc, sp);
+ break;
+ }
+ }
+}
+
+static struct g_bde_sector *
+g_bde_read_sector(struct g_bde_softc *sc, struct g_bde_work *wp, off_t offset)
+{
+ struct g_bde_sector *sp;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_read_sector(%p)", wp);
+ sp = g_bde_get_sector(wp, offset);
+ if (sp == NULL)
+ return (sp);
+ if (sp->owner != wp)
+ return (sp);
+ if (sp->state == VALID)
+ return (sp);
+ if (g_bde_start_read(sp) == 0)
+ return (sp);
+ g_bde_release_sector(wp, sp);
+ return (NULL);
+}
+
+/*
+ * Contribute to the completion of the original bio request.
+ *
+ * We have no simple way to tell how many bits the original bio request has
+ * been segmented into, so the easiest way to determine when we can deliver
+ * it is to keep track of the number of bytes we have completed. We keep
+ * track of any errors underway and latch onto the first one.
+ *
+ * We always report "nothing done" in case of error, because random bits here
+ * and there may be completed and returning a number of completed bytes does
+ * not convey any useful information about which bytes they were. If some
+ * piece of broken code somewhere interprets this to mean that nothing has
+ * changed on the underlying media they deserve the lossage headed for them.
+ *
+ * A single mutex per g_bde instance is used to prevent contention.
+ */
+
+static void
+g_bde_contribute(struct bio *bp, off_t bytes, int error)
+{
+ struct g_bde_softc *sc;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d",
+ bp, (intmax_t)bytes, error);
+ sc = bp->bio_driver1;
+ if (bp->bio_error == 0)
+ bp->bio_error = error;
+ bp->bio_completed += bytes;
+ KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution"));
+ if (bp->bio_completed == bp->bio_length) {
+ if (bp->bio_error != 0)
+ bp->bio_completed = 0;
+ g_io_deliver(bp, bp->bio_error);
+ }
+}
+
+/*
+ * A write operation has finished. When we have all expected cows in the
+ * barn close the door and call it a day.
+ */
+
+static void
+g_bde_write_done(struct bio *bp)
+{
+ struct g_bde_sector *sp;
+ struct g_bde_work *wp;
+ struct g_bde_softc *sc;
+
+ sp = bp->bio_caller1;
+ sc = bp->bio_caller2;
+ mtx_lock(&sc->worklist_mutex);
+ KASSERT(sp != NULL, ("NULL sp"));
+ KASSERT(sc != NULL, ("NULL sc"));
+ KASSERT(sp->owner != NULL, ("NULL sp->owner"));
+ g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp);
+ sp->error = bp->bio_error;
+ g_destroy_bio(bp);
+ wp = sp->owner;
+ if (wp->error == 0)
+ wp->error = sp->error;
+
+ if (wp->bp->bio_cmd == BIO_DELETE) {
+ KASSERT(sp == wp->sp, ("trashed delete op"));
+ g_bde_contribute(wp->bp, wp->length, wp->error);
+ g_bde_delete_sector(sc, sp);
+ g_bde_delete_work(wp);
+ mtx_unlock(&sc->worklist_mutex);
+ return;
+ }
+
+ KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()"));
+ KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op"));
+ if (wp->sp == sp) {
+ g_bde_delete_sector(sc, wp->sp);
+ wp->sp = NULL;
+ } else {
+ sp->state = VALID;
+ }
+ if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID) {
+ g_bde_contribute(wp->bp, wp->length, wp->error);
+ g_bde_release_sector(wp, wp->ksp);
+ g_bde_delete_work(wp);
+ }
+ mtx_unlock(&sc->worklist_mutex);
+ return;
+}
+
+/*
+ * Send a write request for the given sector down the pipeline.
+ */
+
+static int
+g_bde_start_write(struct g_bde_sector *sp)
+{
+ struct bio *bp;
+ struct g_bde_softc *sc;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp);
+ sc = sp->softc;
+ KASSERT(sc != NULL, ("NULL sc in g_bde_start_write"));
+ KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write"));
+ bp = g_new_bio();
+ if (bp == NULL)
+ return (ENOMEM);
+ bp->bio_cmd = BIO_WRITE;
+ bp->bio_offset = sp->offset;
+ bp->bio_data = sp->data;
+ bp->bio_length = sp->size;
+ bp->bio_done = g_bde_write_done;
+ bp->bio_caller1 = sp;
+ bp->bio_caller2 = sc;
+ sp->state = IO;
+ g_io_request(bp, sc->consumer);
+ return(0);
+}
+
+/*
+ * A read operation has finished. Mark the sector no longer iobusy and
+ * wake up the worker thread and let it do its thing.
+ */
+
+static void
+g_bde_read_done(struct bio *bp)
+{
+ struct g_bde_sector *sp;
+ struct g_bde_softc *sc;
+
+ sp = bp->bio_caller1;
+ g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp);
+ sc = bp->bio_caller2;
+ mtx_lock(&sc->worklist_mutex);
+ sp->error = bp->bio_error;
+ sp->state = VALID;
+ wakeup(sc);
+ g_destroy_bio(bp);
+ mtx_unlock(&sc->worklist_mutex);
+}
+
+/*
+ * Send a read request for the given sector down the pipeline.
+ */
+
+static int
+g_bde_start_read(struct g_bde_sector *sp)
+{
+ struct bio *bp;
+ struct g_bde_softc *sc;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp);
+ sc = sp->softc;
+ KASSERT(sc != NULL, ("Null softc in sp %p", sp));
+ bp = g_new_bio();
+ if (bp == NULL)
+ return (ENOMEM);
+ bp->bio_cmd = BIO_READ;
+ bp->bio_offset = sp->offset;
+ bp->bio_data = sp->data;
+ bp->bio_length = sp->size;
+ bp->bio_done = g_bde_read_done;
+ bp->bio_caller1 = sp;
+ bp->bio_caller2 = sc;
+ sp->state = IO;
+ g_io_request(bp, sc->consumer);
+ return(0);
+}
+
+/*
+ * The worker thread.
+ *
+ * The up/down path of GEOM is not allowed to sleep or do any major work
+ * so we use this thread to do the actual crypto operations and to push
+ * the state engine onwards.
+ *
+ * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption
+ * XXX: using a thread here is probably not needed.
+ */
+
+void
+g_bde_worker(void *arg)
+{
+ struct g_bde_softc *sc;
+ struct g_bde_work *wp;
+ struct g_geom *gp;
+ int busy, error;
+
+ gp = arg;
+ sc = gp->softc;
+
+ mtx_lock(&sc->worklist_mutex);
+ for (;;) {
+ busy = 0;
+ g_trace(G_T_TOPOLOGY, "g_bde_worker scan");
+ TAILQ_FOREACH(wp, &sc->worklist, list) {
+ KASSERT(wp != NULL, ("NULL wp"));
+ KASSERT(wp->softc != NULL, ("NULL wp->softc"));
+ if (wp->state != WAIT)
+ continue; /* Not interesting here */
+
+ KASSERT(wp->bp != NULL, ("NULL wp->bp"));
+ KASSERT(wp->sp != NULL, ("NULL wp->sp"));
+
+ if (wp->ksp != NULL) {
+ if (wp->ksp->owner != wp)
+ continue;
+ if (wp->ksp->state == IO)
+ continue;
+ KASSERT(wp->ksp->state == VALID,
+ ("Illegal sector state (JUNK ?)"));
+ }
+
+ if (wp->bp->bio_cmd == BIO_READ && wp->sp->state != VALID)
+ continue;
+
+ if (wp->ksp != NULL && wp->ksp->error != 0) {
+ g_bde_contribute(wp->bp, wp->length,
+ wp->ksp->error);
+ g_bde_delete_sector(sc, wp->sp);
+ g_bde_release_sector(wp, wp->ksp);
+ g_bde_delete_work(wp);
+ busy++;
+ break;
+ }
+ switch(wp->bp->bio_cmd) {
+ case BIO_READ:
+ if (wp->ksp != NULL && wp->sp->error == 0) {
+ mtx_unlock(&sc->worklist_mutex);
+ g_bde_crypt_read(wp);
+ mtx_lock(&sc->worklist_mutex);
+ }
+ g_bde_contribute(wp->bp, wp->length,
+ wp->sp->error);
+ g_bde_delete_sector(sc, wp->sp);
+ if (wp->ksp != NULL)
+ g_bde_release_sector(wp, wp->ksp);
+ g_bde_delete_work(wp);
+ break;
+ case BIO_WRITE:
+ wp->state = FINISH;
+ KASSERT(wp->sp->owner == wp, ("Write not owner sp"));
+ KASSERT(wp->ksp->owner == wp, ("Write not owner ksp"));
+ mtx_unlock(&sc->worklist_mutex);
+ g_bde_crypt_write(wp);
+ mtx_lock(&sc->worklist_mutex);
+ g_bde_start_write(wp->sp);
+ g_bde_start_write(wp->ksp);
+ break;
+ case BIO_DELETE:
+ wp->state = FINISH;
+ mtx_unlock(&sc->worklist_mutex);
+ g_bde_crypt_delete(wp);
+ mtx_lock(&sc->worklist_mutex);
+ g_bde_start_write(wp->sp);
+ break;
+ }
+ busy++;
+ break;
+ }
+ if (!busy) {
+ /*
+ * We don't look for our death-warrant until we are
+ * idle. Shouldn't make a difference in practice.
+ */
+ if (sc->dead)
+ break;
+ g_trace(G_T_TOPOLOGY, "g_bde_worker sleep");
+ error = msleep(sc, &sc->worklist_mutex,
+ PRIBIO, "g_bde", hz);
+ if (error == EWOULDBLOCK) {
+ /*
+ * Loose our skey cache in an orderly fashion.
+ * The exact rate can be tuned to be less
+ * aggressive if this is desirable. 10% per
+ * second means that the cache is gone in a
+ * few minutes.
+ */
+ g_bde_purge_sector(sc, 10);
+ }
+ }
+ }
+ g_trace(G_T_TOPOLOGY, "g_bde_worker die");
+ g_bde_purge_sector(sc, 1);
+ KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork));
+ KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache));
+ KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect));
+ mtx_unlock(&sc->worklist_mutex);
+ sc->dead = 2;
+ wakeup(sc);
+ mtx_lock(&Giant);
+ kthread_exit(0);
+}
+
+/*
+ * g_bde_start1 has chopped the incoming request up so all the requests
+ * we see here are inside a single zone. Map the data and key locations
+ * grab the buffers we need and fire off the first volley of read requests.
+ */
+
+static void
+g_bde_start2(struct g_bde_work *wp)
+{
+ struct g_bde_softc *sc;
+
+ KASSERT(wp != NULL, ("NULL wp in g_bde_start2"));
+ g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp);
+ sc = wp->softc;
+ KASSERT(wp->softc != NULL, ("NULL wp->softc"));
+ g_bde_map_sector(&sc->key, wp->offset, &wp->so, &wp->kso, &wp->ko);
+ if (wp->bp->bio_cmd == BIO_READ) {
+ wp->sp = g_bde_new_sector(wp, 0);
+ if (wp->sp == NULL) {
+ g_bde_contribute(wp->bp, wp->length, ENOMEM);
+ g_bde_delete_work(wp);
+ return;
+ }
+ wp->sp->size = wp->length;
+ wp->sp->data = wp->data;
+ if (g_bde_start_read(wp->sp) != 0) {
+ g_bde_contribute(wp->bp, wp->length, ENOMEM);
+ g_bde_delete_sector(sc, wp->sp);
+ g_bde_delete_work(wp);
+ return;
+ }
+ g_bde_read_sector(sc, wp, wp->kso);
+ if (wp->ksp == NULL)
+ wp->error = ENOMEM;
+ } else if (wp->bp->bio_cmd == BIO_DELETE) {
+ wp->sp = g_bde_new_sector(wp, wp->length);
+ if (wp->sp == NULL) {
+ g_bde_contribute(wp->bp, wp->length, ENOMEM);
+ g_bde_delete_work(wp);
+ return;
+ }
+ } else if (wp->bp->bio_cmd == BIO_WRITE) {
+ wp->sp = g_bde_new_sector(wp, wp->length);
+ if (wp->sp == NULL) {
+ g_bde_contribute(wp->bp, wp->length, ENOMEM);
+ g_bde_delete_work(wp);
+ return;
+ }
+ g_bde_read_sector(sc, wp, wp->kso);
+ if (wp->ksp == NULL) {
+ g_bde_contribute(wp->bp, wp->length, ENOMEM);
+ g_bde_delete_sector(sc, wp->sp);
+ g_bde_delete_work(wp);
+ return;
+ }
+ } else {
+ KASSERT(0 == 1,
+ ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd));
+ }
+
+ wp->state = WAIT;
+ wakeup(sc);
+}
+
+/*
+ * Split the incoming bio on zone boundaries and submit the resulting
+ * work structures to g_bde_start2().
+ */
+
+void
+g_bde_start1(struct bio *bp)
+{
+ struct g_bde_softc *sc;
+ struct g_bde_work *wp;
+ off_t zone_start, left;
+ caddr_t p;
+
+ sc = bp->bio_to->geom->softc;
+ bp->bio_driver1 = sc;
+
+ mtx_lock(&sc->worklist_mutex);
+ zone_start = bp->bio_offset - bp->bio_offset % sc->zone_cont;
+ wp = g_bde_new_work(sc);
+ if (wp == NULL) {
+ g_io_deliver(bp, ENOMEM);
+ mtx_unlock(&sc->worklist_mutex);
+ return;
+ }
+ left = bp->bio_length;
+ p = bp->bio_data;
+
+ /* Do the first and possible only fragment */
+ wp->bp = bp;
+ wp->offset = bp->bio_offset;
+ wp->data = p;
+ wp->length = zone_start + sc->zone_cont - wp->offset;
+ if (wp->length >= left) {
+ /* Only this one fragment needed */
+ wp->length = left;
+ g_bde_start2(wp);
+ mtx_unlock(&sc->worklist_mutex);
+ return;
+ }
+
+ /* Submit the first fragment */
+ g_bde_start2(wp);
+ left -= wp->length;
+ p += wp->length;
+
+ /* Do the subsequent fragments */
+ for(;left > 0;) {
+ wp = g_bde_new_work(sc);
+ if (wp == NULL) {
+ g_bde_contribute(bp, left, ENOMEM);
+ mtx_unlock(&sc->worklist_mutex);
+ return;
+ }
+ zone_start += sc->zone_cont;
+ wp->bp = bp;
+ wp->offset = zone_start;
+ wp->data = p;
+ if (left > sc->zone_cont)
+ wp->length = sc->zone_cont;
+ else
+ wp->length = left;
+ left -= wp->length;
+ p += wp->length;
+ g_bde_start2(wp);
+ }
+ mtx_unlock(&sc->worklist_mutex);
+}