diff options
Diffstat (limited to 'sys/geom/stripe/g_stripe.c')
| -rw-r--r-- | sys/geom/stripe/g_stripe.c | 1276 |
1 files changed, 1276 insertions, 0 deletions
diff --git a/sys/geom/stripe/g_stripe.c b/sys/geom/stripe/g_stripe.c new file mode 100644 index 000000000000..ba1953f036d3 --- /dev/null +++ b/sys/geom/stripe/g_stripe.c @@ -0,0 +1,1276 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/bio.h> +#include <sys/sbuf.h> +#include <sys/sysctl.h> +#include <sys/malloc.h> +#include <vm/uma.h> +#include <geom/geom.h> +#include <geom/geom_dbg.h> +#include <geom/stripe/g_stripe.h> + +FEATURE(geom_stripe, "GEOM striping support"); + +static MALLOC_DEFINE(M_STRIPE, "stripe_data", "GEOM_STRIPE Data"); + +static uma_zone_t g_stripe_zone; + +static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force); +static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp, + struct g_geom *gp); + +static g_taste_t g_stripe_taste; +static g_ctl_req_t g_stripe_config; +static g_dumpconf_t g_stripe_dumpconf; +static g_init_t g_stripe_init; +static g_fini_t g_stripe_fini; + +struct g_class g_stripe_class = { + .name = G_STRIPE_CLASS_NAME, + .version = G_VERSION, + .ctlreq = g_stripe_config, + .taste = g_stripe_taste, + .destroy_geom = g_stripe_destroy_geom, + .init = g_stripe_init, + .fini = g_stripe_fini +}; + +SYSCTL_DECL(_kern_geom); +static SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, + "GEOM_STRIPE stuff"); +static u_int g_stripe_debug = 0; +SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RWTUN, &g_stripe_debug, 0, + "Debug level"); +static int g_stripe_fast = 0; +SYSCTL_INT(_kern_geom_stripe, OID_AUTO, fast, + CTLFLAG_RWTUN, &g_stripe_fast, 0, + "Fast, but memory-consuming, mode"); +static u_long g_stripe_maxmem; +SYSCTL_ULONG(_kern_geom_stripe, OID_AUTO, maxmem, + CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &g_stripe_maxmem, 0, + "Maximum memory that can be allocated in \"fast\" mode (in bytes)"); +static u_int g_stripe_fast_failed = 0; +SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, fast_failed, CTLFLAG_RD, + &g_stripe_fast_failed, 0, "How many times \"fast\" mode failed"); + +/* + * Greatest Common Divisor. + */ +static u_int +gcd(u_int a, u_int b) +{ + u_int c; + + while (b != 0) { + c = a; + a = b; + b = (c % b); + } + return (a); +} + +/* + * Least Common Multiple. + */ +static u_int +lcm(u_int a, u_int b) +{ + + return ((a * b) / gcd(a, b)); +} + +static void +g_stripe_init(struct g_class *mp __unused) +{ + + g_stripe_maxmem = maxphys * 100; + TUNABLE_ULONG_FETCH("kern.geom.stripe.maxmem,", &g_stripe_maxmem); + g_stripe_zone = uma_zcreate("g_stripe_zone", maxphys, NULL, NULL, + NULL, NULL, 0, 0); + g_stripe_maxmem -= g_stripe_maxmem % maxphys; + uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / maxphys); +} + +static void +g_stripe_fini(struct g_class *mp __unused) +{ + + uma_zdestroy(g_stripe_zone); +} + +/* + * Return the number of valid disks. + */ +static u_int +g_stripe_nvalid(struct g_stripe_softc *sc) +{ + u_int i, no; + + no = 0; + for (i = 0; i < sc->sc_ndisks; i++) { + if (sc->sc_disks[i] != NULL) + no++; + } + + return (no); +} + +static void +g_stripe_remove_disk(struct g_consumer *cp) +{ + struct g_stripe_softc *sc; + + g_topology_assert(); + KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__)); + sc = (struct g_stripe_softc *)cp->geom->softc; + KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); + + if (cp->private == NULL) { + G_STRIPE_DEBUG(0, "Disk %s removed from %s.", + cp->provider->name, sc->sc_name); + cp->private = (void *)(uintptr_t)-1; + } + + if (sc->sc_provider != NULL) { + G_STRIPE_DEBUG(0, "Device %s deactivated.", + sc->sc_provider->name); + g_wither_provider(sc->sc_provider, ENXIO); + sc->sc_provider = NULL; + } + + if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) + return; + sc->sc_disks[cp->index] = NULL; + cp->index = 0; + g_detach(cp); + g_destroy_consumer(cp); + /* If there are no valid disks anymore, remove device. */ + if (LIST_EMPTY(&sc->sc_geom->consumer)) + g_stripe_destroy(sc, 1); +} + +static void +g_stripe_orphan(struct g_consumer *cp) +{ + struct g_stripe_softc *sc; + struct g_geom *gp; + + g_topology_assert(); + gp = cp->geom; + sc = gp->softc; + if (sc == NULL) + return; + + g_stripe_remove_disk(cp); +} + +static int +g_stripe_access(struct g_provider *pp, int dr, int dw, int de) +{ + struct g_consumer *cp1, *cp2, *tmp; + struct g_stripe_softc *sc __diagused; + struct g_geom *gp; + int error; + + g_topology_assert(); + gp = pp->geom; + sc = gp->softc; + KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); + + /* On first open, grab an extra "exclusive" bit */ + if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) + de++; + /* ... and let go of it on last close */ + if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0) + de--; + + LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) { + error = g_access(cp1, dr, dw, de); + if (error != 0) + goto fail; + if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 && + cp1->private != NULL) { + g_stripe_remove_disk(cp1); /* May destroy geom. */ + } + } + return (0); + +fail: + LIST_FOREACH(cp2, &gp->consumer, consumer) { + if (cp1 == cp2) + break; + g_access(cp2, -dr, -dw, -de); + } + return (error); +} + +static void +g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset, + off_t length, int mode) +{ + off_t stripesize; + size_t len; + + stripesize = sc->sc_stripesize; + len = (size_t)(stripesize - (offset & (stripesize - 1))); + do { + bcopy(src, dst, len); + if (mode) { + dst += len + stripesize * (sc->sc_ndisks - 1); + src += len; + } else { + dst += len; + src += len + stripesize * (sc->sc_ndisks - 1); + } + length -= len; + KASSERT(length >= 0, + ("Length < 0 (stripesize=%ju, offset=%ju, length=%jd).", + (uintmax_t)stripesize, (uintmax_t)offset, (intmax_t)length)); + if (length > stripesize) + len = stripesize; + else + len = length; + } while (length > 0); +} + +static void +g_stripe_done(struct bio *bp) +{ + struct g_stripe_softc *sc; + struct bio *pbp; + + pbp = bp->bio_parent; + sc = pbp->bio_to->geom->softc; + if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) { + g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset, + bp->bio_length, 1); + bp->bio_data = bp->bio_caller1; + bp->bio_caller1 = NULL; + } + mtx_lock(&sc->sc_lock); + if (pbp->bio_error == 0) + pbp->bio_error = bp->bio_error; + pbp->bio_completed += bp->bio_completed; + pbp->bio_inbed++; + if (pbp->bio_children == pbp->bio_inbed) { + mtx_unlock(&sc->sc_lock); + if (pbp->bio_driver1 != NULL) + uma_zfree(g_stripe_zone, pbp->bio_driver1); + if (bp->bio_cmd == BIO_SPEEDUP) + pbp->bio_completed = pbp->bio_length; + g_io_deliver(pbp, pbp->bio_error); + } else + mtx_unlock(&sc->sc_lock); + g_destroy_bio(bp); +} + +static int +g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length) +{ + TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); + struct g_stripe_softc *sc; + char *addr, *data = NULL; + struct bio *cbp; + off_t stripesize; + u_int nparts = 0; + int error; + + sc = bp->bio_to->geom->softc; + + addr = bp->bio_data; + stripesize = sc->sc_stripesize; + + cbp = g_clone_bio(bp); + if (cbp == NULL) { + error = ENOMEM; + goto failure; + } + TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); + nparts++; + /* + * Fill in the component buf structure. + */ + cbp->bio_done = g_stripe_done; + cbp->bio_offset = offset; + cbp->bio_data = addr; + cbp->bio_caller1 = NULL; + cbp->bio_length = length; + cbp->bio_caller2 = sc->sc_disks[no]; + + /* offset -= offset % stripesize; */ + offset -= offset & (stripesize - 1); + addr += length; + length = bp->bio_length - length; + for (no++; length > 0; no++, length -= stripesize, addr += stripesize) { + if (no > sc->sc_ndisks - 1) { + no = 0; + offset += stripesize; + } + if (nparts >= sc->sc_ndisks) { + cbp = TAILQ_NEXT(cbp, bio_queue); + if (cbp == NULL) + cbp = TAILQ_FIRST(&queue); + nparts++; + /* + * Update bio structure. + */ + /* + * MIN() is in case when + * (bp->bio_length % sc->sc_stripesize) != 0. + */ + cbp->bio_length += MIN(stripesize, length); + if (cbp->bio_caller1 == NULL) { + cbp->bio_caller1 = cbp->bio_data; + cbp->bio_data = NULL; + if (data == NULL) { + data = uma_zalloc(g_stripe_zone, + M_NOWAIT); + if (data == NULL) { + error = ENOMEM; + goto failure; + } + } + } + } else { + cbp = g_clone_bio(bp); + if (cbp == NULL) { + error = ENOMEM; + goto failure; + } + TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); + nparts++; + /* + * Fill in the component buf structure. + */ + cbp->bio_done = g_stripe_done; + cbp->bio_offset = offset; + cbp->bio_data = addr; + cbp->bio_caller1 = NULL; + /* + * MIN() is in case when + * (bp->bio_length % sc->sc_stripesize) != 0. + */ + cbp->bio_length = MIN(stripesize, length); + cbp->bio_caller2 = sc->sc_disks[no]; + } + } + if (data != NULL) + bp->bio_driver1 = data; + /* + * Fire off all allocated requests! + */ + while ((cbp = TAILQ_FIRST(&queue)) != NULL) { + struct g_consumer *cp; + + TAILQ_REMOVE(&queue, cbp, bio_queue); + cp = cbp->bio_caller2; + cbp->bio_caller2 = NULL; + cbp->bio_to = cp->provider; + if (cbp->bio_caller1 != NULL) { + cbp->bio_data = data; + if (bp->bio_cmd == BIO_WRITE) { + g_stripe_copy(sc, cbp->bio_caller1, data, + cbp->bio_offset, cbp->bio_length, 0); + } + data += cbp->bio_length; + } + G_STRIPE_LOGREQ(cbp, "Sending request."); + g_io_request(cbp, cp); + } + return (0); +failure: + if (data != NULL) + uma_zfree(g_stripe_zone, data); + while ((cbp = TAILQ_FIRST(&queue)) != NULL) { + TAILQ_REMOVE(&queue, cbp, bio_queue); + if (cbp->bio_caller1 != NULL) { + cbp->bio_data = cbp->bio_caller1; + cbp->bio_caller1 = NULL; + } + bp->bio_children--; + g_destroy_bio(cbp); + } + return (error); +} + +static int +g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length) +{ + TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); + struct g_stripe_softc *sc; + off_t stripesize; + struct bio *cbp; + char *addr; + int error; + + sc = bp->bio_to->geom->softc; + + stripesize = sc->sc_stripesize; + + cbp = g_clone_bio(bp); + if (cbp == NULL) { + error = ENOMEM; + goto failure; + } + TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); + /* + * Fill in the component buf structure. + */ + if (bp->bio_length == length) + cbp->bio_done = g_std_done; /* Optimized lockless case. */ + else + cbp->bio_done = g_stripe_done; + cbp->bio_offset = offset; + cbp->bio_length = length; + if ((bp->bio_flags & BIO_UNMAPPED) != 0) + addr = NULL; + else + addr = bp->bio_data; + cbp->bio_caller2 = sc->sc_disks[no]; + + /* offset -= offset % stripesize; */ + offset -= offset & (stripesize - 1); + if (bp->bio_cmd != BIO_DELETE) + addr += length; + length = bp->bio_length - length; + for (no++; length > 0; no++, length -= stripesize) { + if (no > sc->sc_ndisks - 1) { + no = 0; + offset += stripesize; + } + cbp = g_clone_bio(bp); + if (cbp == NULL) { + error = ENOMEM; + goto failure; + } + TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); + + /* + * Fill in the component buf structure. + */ + cbp->bio_done = g_stripe_done; + cbp->bio_offset = offset; + /* + * MIN() is in case when + * (bp->bio_length % sc->sc_stripesize) != 0. + */ + cbp->bio_length = MIN(stripesize, length); + if ((bp->bio_flags & BIO_UNMAPPED) != 0) { + cbp->bio_ma_offset += (uintptr_t)addr; + cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE; + cbp->bio_ma_offset %= PAGE_SIZE; + cbp->bio_ma_n = round_page(cbp->bio_ma_offset + + cbp->bio_length) / PAGE_SIZE; + } else + cbp->bio_data = addr; + + cbp->bio_caller2 = sc->sc_disks[no]; + + if (bp->bio_cmd != BIO_DELETE) + addr += stripesize; + } + /* + * Fire off all allocated requests! + */ + while ((cbp = TAILQ_FIRST(&queue)) != NULL) { + struct g_consumer *cp; + + TAILQ_REMOVE(&queue, cbp, bio_queue); + cp = cbp->bio_caller2; + cbp->bio_caller2 = NULL; + cbp->bio_to = cp->provider; + G_STRIPE_LOGREQ(cbp, "Sending request."); + g_io_request(cbp, cp); + } + return (0); +failure: + while ((cbp = TAILQ_FIRST(&queue)) != NULL) { + TAILQ_REMOVE(&queue, cbp, bio_queue); + bp->bio_children--; + g_destroy_bio(cbp); + } + return (error); +} + +static void +g_stripe_pushdown(struct g_stripe_softc *sc, struct bio *bp) +{ + struct bio_queue_head queue; + struct g_consumer *cp; + struct bio *cbp; + u_int no; + + bioq_init(&queue); + for (no = 0; no < sc->sc_ndisks; no++) { + cbp = g_clone_bio(bp); + if (cbp == NULL) { + for (cbp = bioq_first(&queue); cbp != NULL; + cbp = bioq_first(&queue)) { + bioq_remove(&queue, cbp); + g_destroy_bio(cbp); + } + if (bp->bio_error == 0) + bp->bio_error = ENOMEM; + g_io_deliver(bp, bp->bio_error); + return; + } + bioq_insert_tail(&queue, cbp); + cbp->bio_done = g_stripe_done; + cbp->bio_caller2 = sc->sc_disks[no]; + cbp->bio_to = sc->sc_disks[no]->provider; + } + for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { + bioq_remove(&queue, cbp); + G_STRIPE_LOGREQ(cbp, "Sending request."); + cp = cbp->bio_caller2; + cbp->bio_caller2 = NULL; + g_io_request(cbp, cp); + } +} + +static void +g_stripe_start(struct bio *bp) +{ + off_t offset, start, length, nstripe, stripesize; + struct g_stripe_softc *sc; + u_int no; + int error, fast = 0; + + sc = bp->bio_to->geom->softc; + /* + * If sc == NULL, provider's error should be set and g_stripe_start() + * should not be called at all. + */ + KASSERT(sc != NULL, + ("Provider's error should be set (error=%d)(device=%s).", + bp->bio_to->error, bp->bio_to->name)); + + G_STRIPE_LOGREQ(bp, "Request received."); + + switch (bp->bio_cmd) { + case BIO_READ: + case BIO_WRITE: + case BIO_DELETE: + break; + case BIO_SPEEDUP: + case BIO_FLUSH: + g_stripe_pushdown(sc, bp); + return; + case BIO_GETATTR: + if (!strcmp(bp->bio_attribute, "GEOM::candelete")) { + int val = (sc->sc_flags & G_STRIPE_FLAG_CANDELETE) != 0; + g_handleattr(bp, "GEOM::candelete", &val, sizeof(val)); + return; + } + /* otherwise: To which provider it should be delivered? */ + default: + g_io_deliver(bp, EOPNOTSUPP); + return; + } + + stripesize = sc->sc_stripesize; + + /* + * Calculations are quite messy, but fast I hope. + */ + + /* Stripe number. */ + /* nstripe = bp->bio_offset / stripesize; */ + nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits; + /* Disk number. */ + no = nstripe % sc->sc_ndisks; + /* Start position in stripe. */ + /* start = bp->bio_offset % stripesize; */ + start = bp->bio_offset & (stripesize - 1); + /* Start position in disk. */ + /* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */ + offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start; + /* Length of data to operate. */ + length = MIN(bp->bio_length, stripesize - start); + + /* + * Do use "fast" mode when: + * 1. "Fast" mode is ON. + * and + * 2. Request size is less than or equal to maxphys, + * which should always be true. + * and + * 3. Request size is bigger than stripesize * ndisks. If it isn't, + * there will be no need to send more than one I/O request to + * a provider, so there is nothing to optmize. + * and + * 4. Request is not unmapped. + * and + * 5. It is not a BIO_DELETE. + */ + if (g_stripe_fast && bp->bio_length <= maxphys && + bp->bio_length >= stripesize * sc->sc_ndisks && + (bp->bio_flags & BIO_UNMAPPED) == 0 && + bp->bio_cmd != BIO_DELETE) { + fast = 1; + } + error = 0; + if (fast) { + error = g_stripe_start_fast(bp, no, offset, length); + if (error != 0) + g_stripe_fast_failed++; + } + /* + * Do use "economic" when: + * 1. "Economic" mode is ON. + * or + * 2. "Fast" mode failed. It can only fail if there is no memory. + */ + if (!fast || error != 0) + error = g_stripe_start_economic(bp, no, offset, length); + if (error != 0) { + if (bp->bio_error == 0) + bp->bio_error = error; + g_io_deliver(bp, bp->bio_error); + } +} + +static void +g_stripe_check_and_run(struct g_stripe_softc *sc) +{ + struct g_provider *dp; + off_t mediasize, ms; + u_int no, sectorsize = 0; + + g_topology_assert(); + if (g_stripe_nvalid(sc) != sc->sc_ndisks) + return; + + sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s", + sc->sc_name); + sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; + if (g_stripe_fast == 0) + sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED; + /* + * Find the smallest disk. + */ + mediasize = sc->sc_disks[0]->provider->mediasize; + if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) + mediasize -= sc->sc_disks[0]->provider->sectorsize; + mediasize -= mediasize % sc->sc_stripesize; + sectorsize = sc->sc_disks[0]->provider->sectorsize; + for (no = 1; no < sc->sc_ndisks; no++) { + dp = sc->sc_disks[no]->provider; + ms = dp->mediasize; + if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) + ms -= dp->sectorsize; + ms -= ms % sc->sc_stripesize; + if (ms < mediasize) + mediasize = ms; + sectorsize = lcm(sectorsize, dp->sectorsize); + + /* A provider underneath us doesn't support unmapped */ + if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) { + G_STRIPE_DEBUG(1, "Cancelling unmapped " + "because of %s.", dp->name); + sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED; + } + } + sc->sc_provider->sectorsize = sectorsize; + sc->sc_provider->mediasize = mediasize * sc->sc_ndisks; + sc->sc_provider->stripesize = sc->sc_stripesize; + sc->sc_provider->stripeoffset = 0; + g_error_provider(sc->sc_provider, 0); + + G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_provider->name); +} + +static int +g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md) +{ + struct g_provider *pp; + u_char *buf; + int error; + + g_topology_assert(); + + error = g_access(cp, 1, 0, 0); + if (error != 0) + return (error); + pp = cp->provider; + g_topology_unlock(); + buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, + &error); + g_topology_lock(); + g_access(cp, -1, 0, 0); + if (buf == NULL) + return (error); + + /* Decode metadata. */ + stripe_metadata_decode(buf, md); + g_free(buf); + + return (0); +} + +/* + * Add disk to given device. + */ +static int +g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no) +{ + struct g_consumer *cp, *fcp; + struct g_geom *gp; + int error; + + g_topology_assert(); + /* Metadata corrupted? */ + if (no >= sc->sc_ndisks) + return (EINVAL); + + /* Check if disk is not already attached. */ + if (sc->sc_disks[no] != NULL) + return (EEXIST); + + gp = sc->sc_geom; + fcp = LIST_FIRST(&gp->consumer); + + cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; + cp->private = NULL; + cp->index = no; + error = g_attach(cp, pp); + if (error != 0) { + g_destroy_consumer(cp); + return (error); + } + + if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) { + error = g_access(cp, fcp->acr, fcp->acw, fcp->ace); + if (error != 0) { + g_detach(cp); + g_destroy_consumer(cp); + return (error); + } + } + if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) { + struct g_stripe_metadata md; + + /* Reread metadata. */ + error = g_stripe_read_metadata(cp, &md); + if (error != 0) + goto fail; + + if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 || + strcmp(md.md_name, sc->sc_name) != 0 || + md.md_id != sc->sc_id) { + G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name); + goto fail; + } + } + + sc->sc_disks[no] = cp; + + /* cascade candelete */ + error = g_access(cp, 1, 0, 0); + if (error == 0) { + int can_delete; + + error = g_getattr("GEOM::candelete", cp, &can_delete); + if (error == 0 && can_delete != 0) + sc->sc_flags |= G_STRIPE_FLAG_CANDELETE; + G_STRIPE_DEBUG(1, "Provider %s candelete %i.", pp->name, + can_delete); + g_access(cp, -1, 0, 0); + } + + G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name); + g_stripe_check_and_run(sc); + + return (0); +fail: + if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) + g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace); + g_detach(cp); + g_destroy_consumer(cp); + return (error); +} + +static struct g_geom * +g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md, + u_int type) +{ + struct g_stripe_softc *sc; + struct g_geom *gp; + u_int no; + + g_topology_assert(); + G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name, + md->md_id); + + /* Two disks is minimum. */ + if (md->md_all < 2) { + G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name); + return (NULL); + } +#if 0 + /* Stripe size have to be grater than or equal to sector size. */ + if (md->md_stripesize < sectorsize) { + G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); + return (NULL); + } +#endif + /* Stripe size have to be power of 2. */ + if (!powerof2(md->md_stripesize)) { + G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); + return (NULL); + } + + /* Check for duplicate unit */ + LIST_FOREACH(gp, &mp->geom, geom) { + sc = gp->softc; + if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) { + G_STRIPE_DEBUG(0, "Device %s already configured.", + sc->sc_name); + return (NULL); + } + } + gp = g_new_geom(mp, md->md_name); + sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO); + gp->start = g_stripe_start; + gp->spoiled = g_stripe_orphan; + gp->orphan = g_stripe_orphan; + gp->access = g_stripe_access; + gp->dumpconf = g_stripe_dumpconf; + + sc->sc_id = md->md_id; + sc->sc_stripesize = md->md_stripesize; + sc->sc_stripebits = bitcount32(sc->sc_stripesize - 1); + sc->sc_ndisks = md->md_all; + sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks, + M_STRIPE, M_WAITOK | M_ZERO); + for (no = 0; no < sc->sc_ndisks; no++) + sc->sc_disks[no] = NULL; + sc->sc_type = type; + mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF); + + gp->softc = sc; + sc->sc_geom = gp; + sc->sc_provider = NULL; + + G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id); + + return (gp); +} + +static int +g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force) +{ + struct g_provider *pp; + struct g_consumer *cp, *cp1; + struct g_geom *gp; + + g_topology_assert(); + + if (sc == NULL) + return (ENXIO); + + pp = sc->sc_provider; + if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { + if (force) { + G_STRIPE_DEBUG(0, "Device %s is still open, so it " + "can't be definitely removed.", pp->name); + } else { + G_STRIPE_DEBUG(1, + "Device %s is still open (r%dw%de%d).", pp->name, + pp->acr, pp->acw, pp->ace); + return (EBUSY); + } + } + + gp = sc->sc_geom; + LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) { + g_stripe_remove_disk(cp); + if (cp1 == NULL) + return (0); /* Recursion happened. */ + } + if (!LIST_EMPTY(&gp->consumer)) + return (EINPROGRESS); + + gp->softc = NULL; + KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)", + gp->name)); + free(sc->sc_disks, M_STRIPE); + mtx_destroy(&sc->sc_lock); + free(sc, M_STRIPE); + G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name); + g_wither_geom(gp, ENXIO); + return (0); +} + +static int +g_stripe_destroy_geom(struct gctl_req *req __unused, + struct g_class *mp __unused, struct g_geom *gp) +{ + struct g_stripe_softc *sc; + + sc = gp->softc; + return (g_stripe_destroy(sc, 0)); +} + +static struct g_geom * +g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) +{ + struct g_stripe_metadata md; + struct g_stripe_softc *sc; + struct g_consumer *cp; + struct g_geom *gp; + int error; + + g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); + g_topology_assert(); + + /* Skip providers that are already open for writing. */ + if (pp->acw > 0) + return (NULL); + + G_STRIPE_DEBUG(3, "Tasting %s.", pp->name); + + gp = g_new_geom(mp, "stripe:taste"); + gp->start = g_stripe_start; + gp->access = g_stripe_access; + gp->orphan = g_stripe_orphan; + cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; + error = g_attach(cp, pp); + if (error == 0) { + error = g_stripe_read_metadata(cp, &md); + g_detach(cp); + } + g_destroy_consumer(cp); + g_destroy_geom(gp); + if (error != 0) + return (NULL); + gp = NULL; + + if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0) + return (NULL); + if (md.md_version > G_STRIPE_VERSION) { + printf("geom_stripe.ko module is too old to handle %s.\n", + pp->name); + return (NULL); + } + /* + * Backward compatibility: + */ + /* There was no md_provider field in earlier versions of metadata. */ + if (md.md_version < 2) + bzero(md.md_provider, sizeof(md.md_provider)); + /* There was no md_provsize field in earlier versions of metadata. */ + if (md.md_version < 3) + md.md_provsize = pp->mediasize; + + if (md.md_provider[0] != '\0' && + !g_compare_names(md.md_provider, pp->name)) + return (NULL); + if (md.md_provsize != pp->mediasize) + return (NULL); + + /* + * Let's check if device already exists. + */ + sc = NULL; + LIST_FOREACH(gp, &mp->geom, geom) { + sc = gp->softc; + if (sc == NULL) + continue; + if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC) + continue; + if (strcmp(md.md_name, sc->sc_name) != 0) + continue; + if (md.md_id != sc->sc_id) + continue; + break; + } + if (gp != NULL) { + G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); + error = g_stripe_add_disk(sc, pp, md.md_no); + if (error != 0) { + G_STRIPE_DEBUG(0, + "Cannot add disk %s to %s (error=%d).", pp->name, + gp->name, error); + return (NULL); + } + } else { + gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC); + if (gp == NULL) { + G_STRIPE_DEBUG(0, "Cannot create device %s.", + md.md_name); + return (NULL); + } + sc = gp->softc; + G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); + error = g_stripe_add_disk(sc, pp, md.md_no); + if (error != 0) { + G_STRIPE_DEBUG(0, + "Cannot add disk %s to %s (error=%d).", pp->name, + gp->name, error); + g_stripe_destroy(sc, 1); + return (NULL); + } + } + + return (gp); +} + +static void +g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp) +{ + u_int attached, no; + struct g_stripe_metadata md; + struct g_provider *pp; + struct g_stripe_softc *sc; + struct g_geom *gp; + struct sbuf *sb; + off_t *stripesize; + const char *name; + char param[16]; + int *nargs; + + g_topology_assert(); + nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); + if (nargs == NULL) { + gctl_error(req, "No '%s' argument.", "nargs"); + return; + } + if (*nargs <= 2) { + gctl_error(req, "Too few arguments."); + return; + } + + strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic)); + md.md_version = G_STRIPE_VERSION; + name = gctl_get_asciiparam(req, "arg0"); + if (name == NULL) { + gctl_error(req, "No 'arg%u' argument.", 0); + return; + } + strlcpy(md.md_name, name, sizeof(md.md_name)); + md.md_id = arc4random(); + md.md_no = 0; + md.md_all = *nargs - 1; + stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize)); + if (stripesize == NULL) { + gctl_error(req, "No '%s' argument.", "stripesize"); + return; + } + md.md_stripesize = (uint32_t)*stripesize; + bzero(md.md_provider, sizeof(md.md_provider)); + /* This field is not important here. */ + md.md_provsize = 0; + + /* Check all providers are valid */ + for (no = 1; no < *nargs; no++) { + snprintf(param, sizeof(param), "arg%u", no); + pp = gctl_get_provider(req, param); + if (pp == NULL) + return; + } + + gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL); + if (gp == NULL) { + gctl_error(req, "Can't configure %s.", md.md_name); + return; + } + + sc = gp->softc; + sb = sbuf_new_auto(); + sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name); + for (attached = 0, no = 1; no < *nargs; no++) { + snprintf(param, sizeof(param), "arg%u", no); + pp = gctl_get_provider(req, param); + if (pp == NULL) { + name = gctl_get_asciiparam(req, param); + MPASS(name != NULL); + sbuf_printf(sb, " %s", name); + continue; + } + if (g_stripe_add_disk(sc, pp, no - 1) != 0) { + G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.", + no, pp->name, gp->name); + sbuf_printf(sb, " %s", pp->name); + continue; + } + attached++; + } + sbuf_finish(sb); + if (md.md_all != attached) { + g_stripe_destroy(gp->softc, 1); + gctl_error(req, "%s", sbuf_data(sb)); + } + sbuf_delete(sb); +} + +static struct g_stripe_softc * +g_stripe_find_device(struct g_class *mp, const char *name) +{ + struct g_stripe_softc *sc; + struct g_geom *gp; + + LIST_FOREACH(gp, &mp->geom, geom) { + sc = gp->softc; + if (sc == NULL) + continue; + if (strcmp(sc->sc_name, name) == 0) + return (sc); + } + return (NULL); +} + +static void +g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp) +{ + struct g_stripe_softc *sc; + int *force, *nargs, error; + const char *name; + char param[16]; + u_int i; + + g_topology_assert(); + + nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); + if (nargs == NULL) { + gctl_error(req, "No '%s' argument.", "nargs"); + return; + } + if (*nargs <= 0) { + gctl_error(req, "Missing device(s)."); + return; + } + force = gctl_get_paraml(req, "force", sizeof(*force)); + if (force == NULL) { + gctl_error(req, "No '%s' argument.", "force"); + return; + } + + for (i = 0; i < (u_int)*nargs; i++) { + snprintf(param, sizeof(param), "arg%u", i); + name = gctl_get_asciiparam(req, param); + if (name == NULL) { + gctl_error(req, "No 'arg%u' argument.", i); + return; + } + sc = g_stripe_find_device(mp, name); + if (sc == NULL) { + gctl_error(req, "No such device: %s.", name); + return; + } + error = g_stripe_destroy(sc, *force); + if (error != 0) { + gctl_error(req, "Cannot destroy device %s (error=%d).", + sc->sc_name, error); + return; + } + } +} + +static void +g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb) +{ + uint32_t *version; + + g_topology_assert(); + + version = gctl_get_paraml(req, "version", sizeof(*version)); + if (version == NULL) { + gctl_error(req, "No '%s' argument.", "version"); + return; + } + if (*version != G_STRIPE_VERSION) { + gctl_error(req, "Userland and kernel parts are out of sync."); + return; + } + + if (strcmp(verb, "create") == 0) { + g_stripe_ctl_create(req, mp); + return; + } else if (strcmp(verb, "destroy") == 0 || + strcmp(verb, "stop") == 0) { + g_stripe_ctl_destroy(req, mp); + return; + } + + gctl_error(req, "Unknown verb."); +} + +static void +g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, + struct g_consumer *cp, struct g_provider *pp) +{ + struct g_stripe_softc *sc; + + sc = gp->softc; + if (sc == NULL) + return; + if (pp != NULL) { + /* Nothing here. */ + } else if (cp != NULL) { + sbuf_printf(sb, "%s<Number>%u</Number>\n", indent, + (u_int)cp->index); + } else { + sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id); + sbuf_printf(sb, "%s<Stripesize>%ju</Stripesize>\n", indent, + (uintmax_t)sc->sc_stripesize); + sbuf_printf(sb, "%s<Type>", indent); + switch (sc->sc_type) { + case G_STRIPE_TYPE_AUTOMATIC: + sbuf_cat(sb, "AUTOMATIC"); + break; + case G_STRIPE_TYPE_MANUAL: + sbuf_cat(sb, "MANUAL"); + break; + default: + sbuf_cat(sb, "UNKNOWN"); + break; + } + sbuf_cat(sb, "</Type>\n"); + sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n", + indent, sc->sc_ndisks, g_stripe_nvalid(sc)); + sbuf_printf(sb, "%s<State>", indent); + if (sc->sc_provider != NULL && sc->sc_provider->error == 0) + sbuf_cat(sb, "UP"); + else + sbuf_cat(sb, "DOWN"); + sbuf_cat(sb, "</State>\n"); + } +} + +DECLARE_GEOM_CLASS(g_stripe_class, g_stripe); +MODULE_VERSION(geom_stripe, 0); |
