diff options
author | Lukas Ertl <le@FreeBSD.org> | 2004-09-30 12:57:35 +0000 |
---|---|---|
committer | Lukas Ertl <le@FreeBSD.org> | 2004-09-30 12:57:35 +0000 |
commit | c3aadfb9d6bc31a2a8cf9d67be72f7ec39df08e0 (patch) | |
tree | 6a80fd9dd1fc6bc23ae20575319c461f4e1c54ec /sys/geom/vinum | |
parent | af9cb375e833beec6dfd66419f061e9d874162d3 (diff) | |
download | src-c3aadfb9d6bc31a2a8cf9d67be72f7ec39df08e0.tar.gz src-c3aadfb9d6bc31a2a8cf9d67be72f7ec39df08e0.zip |
Notes
Diffstat (limited to 'sys/geom/vinum')
-rw-r--r-- | sys/geom/vinum/geom_vinum_init.c | 118 | ||||
-rw-r--r-- | sys/geom/vinum/geom_vinum_list.c | 26 | ||||
-rw-r--r-- | sys/geom/vinum/geom_vinum_plex.c | 16 | ||||
-rw-r--r-- | sys/geom/vinum/geom_vinum_raid5.c | 114 | ||||
-rw-r--r-- | sys/geom/vinum/geom_vinum_raid5.h | 2 | ||||
-rw-r--r-- | sys/geom/vinum/geom_vinum_var.h | 1 |
6 files changed, 261 insertions, 16 deletions
diff --git a/sys/geom/vinum/geom_vinum_init.c b/sys/geom/vinum/geom_vinum_init.c index 60c408c68396..46d9d51b9699 100644 --- a/sys/geom/vinum/geom_vinum_init.c +++ b/sys/geom/vinum/geom_vinum_init.c @@ -43,6 +43,8 @@ __FBSDID("$FreeBSD$"); int gv_init_plex(struct gv_plex *); int gv_init_sd(struct gv_sd *); void gv_init_td(void *); +void gv_rebuild_plex(struct gv_plex *); +void gv_rebuild_td(void *); void gv_start_plex(struct gv_plex *); void gv_start_vol(struct gv_volume *); void gv_sync(struct gv_volume *); @@ -117,8 +119,12 @@ gv_start_plex(struct gv_plex *p) v = p->vol_sc; if ((v != NULL) && (v->plexcount > 1)) gv_sync(v); - else if (p->org == GV_PLEX_RAID5) - gv_init_plex(p); + else if (p->org == GV_PLEX_RAID5) { + if (p->state == GV_PLEX_DEGRADED) + gv_rebuild_plex(p); + else + gv_init_plex(p); + } return; } @@ -142,7 +148,9 @@ gv_start_vol(struct gv_volume *v) case GV_PLEX_DOWN: gv_init_plex(p); break; - case GV_PLEX_DEGRADED: /* XXX not yet */ + case GV_PLEX_DEGRADED: + gv_rebuild_plex(p); + break; default: return; } @@ -191,6 +199,22 @@ gv_sync(struct gv_volume *v) } } +void +gv_rebuild_plex(struct gv_plex *p) +{ + struct gv_sync_args *sync; + + if ((p->flags & GV_PLEX_SYNCING) || gv_is_open(p->geom)) + return; + + sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO); + sync->to = p; + sync->syncsize = GV_DFLT_SYNCSIZE; + + kthread_create(gv_rebuild_td, sync, NULL, 0, 0, "gv_rebuild %s", + p->name); +} + int gv_init_plex(struct gv_plex *p) { @@ -225,6 +249,94 @@ gv_init_sd(struct gv_sd *s) return (0); } +/* This thread is responsible for rebuilding a degraded RAID5 plex. */ +void +gv_rebuild_td(void *arg) +{ + struct bio *bp; + struct gv_plex *p; + struct g_consumer *cp; + struct gv_sync_args *sync; + u_char *buf; + off_t i; + int error; + + buf = NULL; + bp = NULL; + + sync = arg; + p = sync->to; + p->synced = 0; + p->flags |= GV_PLEX_SYNCING; + cp = p->consumer; + + g_topology_lock(); + error = g_access(cp, 1, 1, 0); + if (error) { + g_topology_unlock(); + printf("GEOM_VINUM: rebuild of %s failed to access consumer: " + "%d\n", p->name, error); + kthread_exit(error); + } + g_topology_unlock(); + + buf = g_malloc(sync->syncsize, M_WAITOK); + + printf("GEOM_VINUM: rebuild of %s started\n", p->name); + i = 0; + for (i = 0; i < p->size; i += (p->stripesize * (p->sdcount - 1))) { +/* + if (i + sync->syncsize > p->size) + sync->syncsize = p->size - i; +*/ + bp = g_new_bio(); + if (bp == NULL) { + printf("GEOM_VINUM: rebuild of %s failed creating bio: " + "out of memory\n", p->name); + break; + } + bp->bio_cmd = BIO_WRITE; + bp->bio_done = NULL; + bp->bio_data = buf; + bp->bio_cflags |= GV_BIO_REBUILD; + bp->bio_offset = i; + bp->bio_length = p->stripesize; + + /* Schedule it down ... */ + g_io_request(bp, cp); + + /* ... and wait for the result. */ + error = biowait(bp, "gwrite"); + if (error) { + printf("GEOM_VINUM: rebuild of %s failed at offset %jd " + "errno: %d\n", p->name, i, error); + break; + } + g_destroy_bio(bp); + bp = NULL; + } + + if (bp != NULL) + g_destroy_bio(bp); + if (buf != NULL) + g_free(buf); + + g_topology_lock(); + g_access(cp, -1, -1, 0); + gv_save_config_all(p->vinumconf); + g_topology_unlock(); + + p->flags &= ~GV_PLEX_SYNCING; + p->synced = 0; + + /* Successful initialization. */ + if (!error) + printf("GEOM_VINUM: rebuild of %s finished\n", p->name); + + g_free(sync); + kthread_exit(error); +} + void gv_sync_td(void *arg) { diff --git a/sys/geom/vinum/geom_vinum_list.c b/sys/geom/vinum/geom_vinum_list.c index f70cffb5f842..ca95ba74e7d2 100644 --- a/sys/geom/vinum/geom_vinum_list.c +++ b/sys/geom/vinum/geom_vinum_list.c @@ -365,9 +365,15 @@ gv_lsi(struct gv_sd *s, struct sbuf *sb, int flags) (intmax_t)s->size, (intmax_t)s->size / MEGABYTE); sbuf_printf(sb, "\t\tState: %s\n", gv_sdstate(s->state)); - if (s->state == GV_SD_INITIALIZING) { - sbuf_printf(sb, "\t\tInitialized: %16jd bytes " - "(%d%%)\n", (intmax_t)s->initialized, + if (s->state == GV_SD_INITIALIZING || + s->state == GV_SD_REVIVING) { + if (s->state == GV_SD_INITIALIZING) + sbuf_printf(sb, "\t\tInitialized: "); + else + sbuf_printf(sb, "\t\tRevived: "); + + sbuf_printf(sb, "%16jd bytes (%d%%)\n", + (intmax_t)s->initialized, (int)((s->initialized * 100) / s->size)); } @@ -377,20 +383,20 @@ gv_lsi(struct gv_sd *s, struct sbuf *sb, int flags) gv_roughlength(s->plex_offset, 1)); } - if (s->state == GV_SD_REVIVING) { - /* XXX */ - } - sbuf_printf(sb, "\t\tDrive %s (%s) at offset %jd (%s)\n", s->drive, s->drive_sc == NULL ? "*missing*" : s->drive_sc->name, (intmax_t)s->drive_offset, gv_roughlength(s->drive_offset, 1)); } else { - /* XXX reviving and initializing... */ sbuf_printf(sb, "S %-21s State: ", s->name); - if (s->state == GV_SD_INITIALIZING) { - sbuf_printf(sb, "I %d%%\t", + if (s->state == GV_SD_INITIALIZING || + s->state == GV_SD_REVIVING) { + if (s->state == GV_SD_INITIALIZING) + sbuf_printf(sb, "I "); + else + sbuf_printf(sb, "R "); + sbuf_printf(sb, "%d%%\t", (int)((s->initialized * 100) / s->size)); } else { sbuf_printf(sb, "%s\t", gv_sdstate(s->state)); diff --git a/sys/geom/vinum/geom_vinum_plex.c b/sys/geom/vinum/geom_vinum_plex.c index 494ec2c0ba10..7ce5b08a7a5e 100644 --- a/sys/geom/vinum/geom_vinum_plex.c +++ b/sys/geom/vinum/geom_vinum_plex.c @@ -295,7 +295,9 @@ gv_plex_worker(void *arg) /* A completed request. */ if (bp->bio_cflags & GV_BIO_DONE) { g_free(bq); - if (bp->bio_cflags & GV_BIO_SYNCREQ) { + + if (bp->bio_cflags & GV_BIO_SYNCREQ || + bp->bio_cflags & GV_BIO_REBUILD) { s = bp->bio_to->private; if (bp->bio_error == 0) s->initialized += bp->bio_length; @@ -306,8 +308,11 @@ gv_plex_worker(void *arg) g_topology_unlock(); s->initialized = 0; } + } + + if (bp->bio_cflags & GV_BIO_SYNCREQ) g_std_done(bp); - } else + else gv_plex_completed_request(p, bp); /* * A sub-request that was hold back because it interfered with @@ -457,7 +462,12 @@ gv_plex_normal_request(struct gv_plex *p, struct bio *bp) wp->bio = bp; TAILQ_INIT(&wp->bits); - err = gv_build_raid5_req(p, wp, bp, addr, boff, bcount); + if (bp->bio_cflags & GV_BIO_REBUILD) + err = gv_rebuild_raid5(p, wp, bp, addr, + boff, bcount); + else + err = gv_build_raid5_req(p, wp, bp, addr, + boff, bcount); /* * Building the sub-request failed, we probably need to diff --git a/sys/geom/vinum/geom_vinum_raid5.c b/sys/geom/vinum/geom_vinum_raid5.c index 62fb24685516..9ba02e85f896 100644 --- a/sys/geom/vinum/geom_vinum_raid5.c +++ b/sys/geom/vinum/geom_vinum_raid5.c @@ -77,6 +77,117 @@ gv_stripe_active(struct gv_plex *p, struct bio *bp) return (overlap); } +int +gv_rebuild_raid5(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp, + caddr_t addr, off_t boff, off_t bcount) +{ + struct gv_sd *broken, *s; + struct gv_bioq *bq; + struct bio *cbp, *pbp; + off_t len_left, real_len, real_off, stripeend, stripeoff, stripestart; + + if (p == NULL || LIST_EMPTY(&p->subdisks)) + return (ENXIO); + + /* Offset of the start address from the start of the stripe. */ + stripeoff = boff % (p->stripesize * (p->sdcount - 1)); + KASSERT(stripeoff >= 0, ("gv_build_raid5_request: stripeoff < 0")); + + /* The offset of the stripe on this subdisk. */ + stripestart = (boff - stripeoff) / (p->sdcount - 1); + KASSERT(stripestart >= 0, ("gv_build_raid5_request: stripestart < 0")); + + stripeoff %= p->stripesize; + + /* The offset of the request on this subdisk. */ + real_off = stripestart + stripeoff; + + stripeend = stripestart + p->stripesize; + len_left = stripeend - real_off; + KASSERT(len_left >= 0, ("gv_build_raid5_request: len_left < 0")); + + /* Find the right subdisk. */ + broken = NULL; + LIST_FOREACH(s, &p->subdisks, in_plex) { + if (s->state != GV_SD_UP) + broken = s; + } + + /* Parity stripe not found. */ + if (broken == NULL) + return (ENXIO); + + switch (broken->state) { + case GV_SD_UP: + return (EINVAL); + + case GV_SD_STALE: + if (!(bp->bio_cflags & GV_BIO_REBUILD)) + return (ENXIO); + + printf("GEOM_VINUM: sd %s is reviving\n", broken->name); + gv_set_sd_state(broken, GV_SD_REVIVING, GV_SETSTATE_FORCE); + break; + + case GV_SD_REVIVING: + break; + + default: + /* All other subdisk states mean it's not accessible. */ + return (ENXIO); + } + + real_len = (bcount <= len_left) ? bcount : len_left; + wp->length = real_len; + wp->data = addr; + wp->lockbase = real_off; + + KASSERT(wp->length >= 0, ("gv_build_raid5_request: wp->length < 0")); + + /* Read all subdisks. */ + LIST_FOREACH(s, &p->subdisks, in_plex) { + /* Skip the broken subdisk. */ + if (s == broken) + continue; + + cbp = g_clone_bio(bp); + if (cbp == NULL) + return (ENOMEM); + cbp->bio_cmd = BIO_READ; + cbp->bio_data = g_malloc(real_len, M_WAITOK); + cbp->bio_cflags |= GV_BIO_MALLOC; + cbp->bio_offset = real_off; + cbp->bio_length = real_len; + cbp->bio_done = gv_plex_done; + cbp->bio_caller2 = s->consumer; + cbp->bio_driver1 = wp; + + GV_ENQUEUE(bp, cbp, pbp); + + bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO); + bq->bp = cbp; + TAILQ_INSERT_TAIL(&wp->bits, bq, queue); + } + + /* Write the parity data. */ + cbp = g_clone_bio(bp); + if (cbp == NULL) + return (ENOMEM); + cbp->bio_data = g_malloc(real_len, M_WAITOK | M_ZERO); + cbp->bio_cflags |= GV_BIO_MALLOC; + cbp->bio_offset = real_off; + cbp->bio_length = real_len; + cbp->bio_done = gv_plex_done; + cbp->bio_caller2 = broken->consumer; + cbp->bio_driver1 = wp; + cbp->bio_cflags |= GV_BIO_REBUILD; + wp->parity = cbp; + + p->synced = boff; + + return (0); +} + /* Build a request group to perform (part of) a RAID5 request. */ int gv_build_raid5_req(struct gv_plex *p, struct gv_raid5_packet *wp, @@ -166,6 +277,9 @@ gv_build_raid5_req(struct gv_plex *p, struct gv_raid5_packet *wp, KASSERT(wp->length >= 0, ("gv_build_raid5_request: wp->length < 0")); + if ((p->flags & GV_PLEX_SYNCING) && (boff + real_len < p->synced)) + type = REQ_TYPE_NORMAL; + switch (bp->bio_cmd) { case BIO_READ: /* diff --git a/sys/geom/vinum/geom_vinum_raid5.h b/sys/geom/vinum/geom_vinum_raid5.h index 8074f4273c10..212f6c65f831 100644 --- a/sys/geom/vinum/geom_vinum_raid5.h +++ b/sys/geom/vinum/geom_vinum_raid5.h @@ -67,6 +67,8 @@ struct gv_raid5_packet { int gv_stripe_active(struct gv_plex *, struct bio *); int gv_build_raid5_req(struct gv_plex *, struct gv_raid5_packet *, struct bio *, caddr_t, off_t, off_t); +int gv_rebuild_raid5(struct gv_plex *, struct gv_raid5_packet *, + struct bio *, caddr_t, off_t, off_t); void gv_raid5_worker(void *); void gv_plex_done(struct bio *); diff --git a/sys/geom/vinum/geom_vinum_var.h b/sys/geom/vinum/geom_vinum_var.h index 99c1c377cea6..196f7f8d50f0 100644 --- a/sys/geom/vinum/geom_vinum_var.h +++ b/sys/geom/vinum/geom_vinum_var.h @@ -113,6 +113,7 @@ #define GV_BIO_ONHOLD 0x04 #define GV_BIO_SYNCREQ 0x08 #define GV_BIO_SUCCEED 0x10 +#define GV_BIO_REBUILD 0x20 /* * hostname is 256 bytes long, but we don't need to shlep multiple copies in |