summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGreg Lehey <grog@FreeBSD.org>1999-01-21 00:40:32 +0000
committerGreg Lehey <grog@FreeBSD.org>1999-01-21 00:40:32 +0000
commit6d930639c333ddcdec7f1d0fcfa01e09b1ba36ab (patch)
treefa828e3b7eb7d35375c6c275733820dcf6acabfb
parent2178c45997177ec75af11ed332e4d0e7fc2711f9 (diff)
Notes
-rw-r--r--sys/dev/vinum/vinumstate.c869
1 files changed, 456 insertions, 413 deletions
diff --git a/sys/dev/vinum/vinumstate.c b/sys/dev/vinum/vinumstate.c
index eea83e148f71..8d44fecfcdf2 100644
--- a/sys/dev/vinum/vinumstate.c
+++ b/sys/dev/vinum/vinumstate.c
@@ -33,7 +33,7 @@
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
- * $Id: state.c,v 1.5 1998/12/28 04:56:23 peter Exp $
+ * $Id: vinumstate.c,v 2.10 1999/01/17 06:19:23 grog Exp grog $
*/
#define REALLYKERNEL
@@ -44,7 +44,7 @@
/* Update drive state */
/* Return 1 if the state changes, otherwise 0 */
int
-set_drive_state(int driveno, enum drivestate state, int flags)
+set_drive_state(int driveno, enum drivestate newstate, enum setstateflags flags)
{
struct drive *drive = &DRIVE[driveno];
int oldstate = drive->state;
@@ -53,35 +53,25 @@ set_drive_state(int driveno, enum drivestate state, int flags)
if (drive->state == drive_unallocated) /* no drive to do anything with, */
return 0;
- if (state != oldstate) { /* don't change it if it's not different */
- if (state == drive_down) { /* the drive's going down */
- if ((flags & setstate_force) || (drive->opencount == 0)) { /* we can do it */
- /* We can't call close() from an interrupt
- * context. Instead, we do it when we
- * next call strategy(). This will change
- * when the vinum daemon comes onto the scene */
- if (!(flags & setstate_noupdate)) /* we can close it */
- close_drive(drive);
- } else
- return 0; /* don't do it */
- }
- drive->state = state; /* set the state */
- printf("vinum: drive %s is %s\n", drive->label.name, drive_state(drive->state));
- if (((drive->state == drive_up)
- || ((drive->state == drive_coming_up)))
+ if (newstate != oldstate) { /* don't change it if it's not different */
+ if ((newstate == drive_down) /* the drive's going down */
+ &&(!(flags & setstate_force))
+ && (drive->opencount != 0)) /* we can't do it */
+ return 0; /* don't do it */
+ drive->state = newstate; /* set the state */
+ if (drive->label.name[0] != '\0') /* we have a name, */
+ printf("vinum: drive %s is %s\n", drive->label.name, drive_state(drive->state));
+ if ((drive->state == drive_up)
&& (drive->vp == NULL)) /* should be open, but we're not */
- init_drive(drive); /* which changes the state again */
- if ((state != oldstate) /* state has changed */
- &&((flags & setstate_norecurse) == 0)) { /* and we want to recurse, */
+ init_drive(drive, 1); /* which changes the state again */
+ if (newstate != oldstate) { /* state has changed */
for (sdno = 0; sdno < vinum_conf.subdisks_used; sdno++) { /* find this drive's subdisks */
if (SD[sdno].driveno == driveno) /* belongs to this drive */
- set_sd_state(sdno, sd_down, setstate_force | setstate_recursing); /* take it down */
+ update_sd_state(sdno); /* update the state */
}
}
- if (flags & setstate_noupdate) /* don't update now, */
- vinum_conf.flags |= VF_DIRTYCONFIG; /* wait until later */
- else
- save_config(); /* yes: save the updated configuration */
+ if ((flags & setstate_configuring) == 0) /* configuring? */
+ save_config(); /* no: save the updated configuration now */
return 1;
}
return 0;
@@ -95,24 +85,27 @@ set_drive_state(int driveno, enum drivestate state, int flags)
* only) and internally.
*/
int
-set_sd_state(int sdno, enum sdstate state, enum setstateflags flags)
+set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags)
{
struct sd *sd = &SD[sdno];
+ struct plex *plex;
+ struct volume *vol;
int oldstate = sd->state;
int status = 1; /* status to return */
- if (state == oldstate)
- return 0; /* no change */
-
- if (sd->state == sd_unallocated) /* no subdisk to do anything with, */
+ if ((newstate == oldstate)
+ || (sd->state == sd_unallocated)) /* no subdisk to do anything with, */
return 0;
if (sd->driveoffset < 0) { /* not allocated space */
sd->state = sd_down;
- if (state != sd_down)
+ if (newstate != sd_down) {
+ if (sd->plexno >= 0)
+ sdstatemap(&PLEX[sd->plexno]); /* count up subdisks */
return -1;
+ }
} else { /* space allocated */
- switch (state) {
+ switch (newstate) {
case sd_down:
if ((!flags & setstate_force) /* but gently */
&&(sd->plexno >= 0)) /* and we're attached to a plex, */
@@ -125,35 +118,82 @@ set_sd_state(int sdno, enum sdstate state, enum setstateflags flags)
switch (sd->state) {
case sd_crashed:
case sd_down: /* been down, no data lost */
- if ((sd->plexno) /* we're associated with a plex */
+ if ((sd->plexno >= 0) /* we're associated with a plex */
&&(((PLEX[sd->plexno].state < plex_firstup) /* and it's not up */
||(PLEX[sd->plexno].subdisks > 1)))) /* or it's the only one */
- break;
+ break; /* do it */
/* XXX Get this right: make sure that other plexes in
* the volume cover this address space, otherwise
- * we make this one sd_up */
+ * we make this one sd_up.
+ *
+ * Do we even want this any more?
+ */
sd->state = sd_reborn; /* here it is again */
- printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(state));
+ printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(newstate));
status = -1;
break;
case sd_init: /* brand new */
if (flags & setstate_configuring) /* we're doing this while configuring */
break;
- sd->state = sd_empty; /* nothing in it */
- printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(state));
- status = -1;
- break;
-
- case sd_initializing:
- break; /* go on and do it */
+ /* otherwise it's like being empty */
+ /* FALLTHROUGH */
case sd_empty:
- if ((sd->plexno) /* we're associated with a plex */
+ if ((sd->plexno >= 0) /* we're associated with a plex */
&&(((PLEX[sd->plexno].state < plex_firstup) /* and it's not up */
||(PLEX[sd->plexno].subdisks > 1)))) /* or it's the only one */
break;
- return 0; /* can't do it */
+ /* Otherwise it's just out of date */
+ /* FALLTHROUGH */
+
+ case sd_stale: /* out of date info, need reviving */
+ case sd_obsolete:
+ /* 1. If the subdisk is not part of a plex, bring it up, don't revive.
+
+ * 2. If the subdisk is part of a one-plex volume or an unattached plex,
+ * and it's not RAID-5, we *can't revive*. The subdisk doesn't
+ * change its state.
+ *
+ * 3. If the subdisk is part of a one-plex volume or an unattached plex,
+ * and it's RAID-5, but more than one subdisk is down, we *still
+ * can't revive*. The subdisk doesn't change its state.
+ *
+ * 4. If the subdisk is part of a multi-plex volume, we'll change to
+ * reviving and let the revive routines find out whether it will work
+ * or not. If they don't, the revive stops with an error message,
+ * but the state doesn't change (FWIW).*/
+ if (sd->plexno < 0) /* no plex associated, */
+ break; /* bring it up */
+ plex = &PLEX[sd->plexno];
+ if (plex->volno >= 0) /* have a volume */
+ vol = &VOL[plex->volno];
+ else
+ vol = NULL;
+ if (((vol == NULL) /* no volume */ ||(vol->plexes == 1)) /* or only one plex in volume */
+ &&((plex->organization != plex_raid5) /* or it's a RAID-5 plex */
+ ||(plex->sddowncount > 1))) /* with more than one subdisk down, */
+ return 0; /* can't do it */
+ sd->state = sd_reviving; /* put in reviving state */
+ sd->revived = 0; /* nothing done yet */
+ status = EAGAIN; /* need to repeat */
+ break;
+
+ /* XXX This is silly. We need to be able to
+ * bring the subdisk up when it's finished
+ * initializing, but not from the user. We
+ * use the same ioctl in each case, but Vinum(8)
+ * doesn't supply the -f flag, so we use that
+ * to decide whether to do it or not */
+ case sd_initializing:
+ if (flags & setstate_force)
+ break; /* do it if we have to */
+ return 0; /* no */
+
+ case sd_reviving:
+ if (flags & setstate_force) /* insist, */
+ break;
+ return EAGAIN; /* no, try again */
default: /* can't do it */
/* There's no way to bring subdisks up directly from
@@ -168,186 +208,359 @@ set_sd_state(int sdno, enum sdstate state, enum setstateflags flags)
return 0; /* don't do it */
}
}
- sd->state = state;
- printf("vinum: subdisk %s is %s\n", sd->name, sd_state(sd->state));
- if ((flags & setstate_norecurse) == 0)
- set_plex_state(sd->plexno, plex_up, setstate_recursing); /* update plex state */
- if ((flags & (setstate_configuring | setstate_recursing)) == 0) { /* save config now */
- if (setstate_noupdate) /* we can't update now, */
- vinum_conf.flags |= VF_DIRTYCONFIG; /* wait until later */
- else
- save_config();
- }
+ if (status == 1) { /* we can do it, */
+ sd->state = newstate;
+ printf("vinum: %s is %s\n", sd->name, sd_state(sd->state));
+ } else /* we don't get here with status 0 */
+ printf("vinum: %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(newstate));
+ if (sd->plexno >= 0) /* we belong to a plex */
+ update_plex_state(sd->plexno); /* update plex state */
+ if ((flags & setstate_configuring) == 0) /* save config now */
+ save_config();
return status;
}
-/* Called from request routines when they find
- * a subdisk which is not kosher. Decide whether
- * it warrants changing the state. Return
- * REQUEST_DOWN if we can't use the subdisk,
- * REQUEST_OK if we can. */
-enum requeststatus
-checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend)
+/* Set the state of a plex dependent on its subdisks.
+ * This time round, we'll let plex state just reflect
+ * aggregate subdisk state, so this becomes an order of
+ * magnitude less complicated. In particular, ignore
+ * the requested state.
+ */
+int
+set_plex_state(int plexno, enum plexstate state, enum setstateflags flags)
{
- struct plex *plex = &PLEX[sd->plexno];
- int writeop = (rq->bp->b_flags & B_READ) == 0; /* note if we're writing */
+ struct plex *plex; /* point to our plex */
+ enum plexstate oldstate;
+ enum volplexstate vps; /* how do we compare with the other plexes? */
- /* first, see if the plex wants to be accessed */
- switch (plex->state) {
- case plex_reviving:
- /* When writing, we'll write anything that starts
- * up to the current revive pointer, but we'll
- * only accept a read which finishes before the
- * current revive pointer.
- */
- if ((writeop && (diskaddr > plex->revived)) /* write starts after current revive pointer */
- ||((!writeop) && (diskend >= plex->revived))) { /* or read ends after current revive pointer */
- if (writeop) { /* writing to a consistent down disk */
- if (DRIVE[sd->driveno].state == drive_up)
- set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
- else
- set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
- }
- return REQUEST_DOWN; /* that part of the plex is still down */
- } else if (diskend >= plex->revived) /* write finishes beyond revive pointer */
- rq->flags |= XFR_REVIVECONFLICT; /* note a potential conflict */
- /* FALLTHROUGH */
-
- case plex_up:
- case plex_degraded:
- case plex_flaky:
- /* We can access the plex: let's see
- * how the subdisk feels */
- switch (sd->state) {
- case sd_up:
- return REQUEST_OK;
+ plex = &PLEX[plexno]; /* point to our plex */
+ oldstate = plex->state;
- case sd_reborn:
- if (writeop)
- return REQUEST_OK; /* always write to a reborn disk */
- /* Handle the mapping. We don't want to reject
- * a read request to a reborn subdisk if that's
- * all we have. XXX */
- return REQUEST_DOWN;
+ if ((plex->state == plex_unallocated) /* or no plex to do anything with, */
+ ||((state == oldstate) /* or we're already there */
+ &&(state != plex_up))) /* and it's not up */
+ return 0;
- case sd_down:
- case sd_crashed:
- if (writeop) { /* writing to a consistent down disk */
- if (DRIVE[sd->driveno].state == drive_up)
- set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
- else
- set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
- }
- return REQUEST_DOWN; /* and it's down one way or another */
+ vps = vpstate(plex); /* how do we compare with the other plexes? */
- default:
- return REQUEST_DOWN;
- }
+ switch (state) {
+ /* We can't bring the plex up, even by force,
+ * unless it's ready. update_plex_state
+ * checks that */
+ case plex_up: /* bring the plex up */
+ update_plex_state(plex->plexno); /* it'll come up if it can */
+ break;
+
+ case plex_down: /* want to take it down */
+ if (((vps == volplex_onlyus) /* we're the only one up */
+ ||(vps == volplex_onlyusup)) /* we're the only one up */
+ &&(!(flags & setstate_force))) /* and we don't want to use force */
+ return 0; /* can't do it */
+ plex->state = state; /* do it */
+ invalidate_subdisks(plex, sd_down); /* and down all up subdisks */
+ break;
+
+ /* This is only requested internally.
+ * Trust ourselves */
+ case plex_faulty:
+ plex->state = state; /* do it */
+ invalidate_subdisks(plex, sd_crashed); /* and crash all up subdisks */
+ break;
+
+ case plex_initializing:
+ /* XXX consider what safeguards we need here */
+ if ((flags & setstate_force) == 0)
+ return 0;
+ plex->state = state; /* do it */
+ break;
+ /* What's this? */
default:
- return REQUEST_DOWN;
+ return 0;
+ }
+ if (plex->state != oldstate) /* we've changed, */
+ printf("vinum: %s is %s\n", plex->name, plex_state(plex->state)); /* tell them about it */
+ /* Now see what we have left, and whether
+ * we're taking the volume down */
+ if (plex->volno >= 0) /* we have a volume */
+ update_volume_state(plex->volno); /* update its state */
+ if ((flags & setstate_configuring) == 0) /* save config now */
+ save_config(); /* yes: save the updated configuration */
+ return 1;
+}
+
+/* Update the state of a plex dependent on its plexes. */
+int
+set_volume_state(int volno, enum volumestate state, enum setstateflags flags)
+{
+ struct volume *vol = &VOL[volno]; /* point to our volume */
+
+ if ((vol->state == state) /* we're there already */
+ ||(vol->state == volume_unallocated)) /* or no volume to do anything with, */
+ return 0;
+
+ if (state == volume_up) /* want to come up */
+ update_volume_state(volno);
+ else if (state == volume_down) { /* want to go down */
+ if ((vol->opencount == 0) /* not open */
+ ||((flags & setstate_force) != 0)) { /* or we're forcing */
+ vol->state = volume_down;
+ printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state));
+ if ((flags & setstate_configuring) == 0) /* save config now */
+ save_config(); /* yes: save the updated configuration */
+ return 1;
+ }
}
+ return 0; /* no change */
}
+/* Set the state of a subdisk based on its environment */
void
-add_defective_region(struct plex *plex, off_t offset, size_t length)
+update_sd_state(int sdno)
{
-/* XXX get this ordered, and coalesce regions if necessary */
- if (++plex->defective_regions > plex->defective_region_count)
- EXPAND(plex->defective_region,
- struct plexregion,
- plex->defective_region_count,
- PLEX_REGION_TABLE_SIZE);
- plex->defective_region[plex->defective_regions - 1].offset = offset;
- plex->defective_region[plex->defective_regions - 1].length = length;
+ struct sd *sd;
+ struct drive *drive;
+ enum sdstate oldstate;
+
+ sd = &SD[sdno];
+ oldstate = sd->state;
+ drive = &DRIVE[sd->driveno];
+
+ if (drive->state == drive_up) {
+ switch (sd->state) {
+ case sd_down:
+ case sd_crashed:
+ sd->state = sd_reborn; /* back up again with no loss */
+ break;
+
+ default:
+ break;
+ }
+ } else { /* down or worse */
+ switch (sd->state) {
+ case sd_up:
+ case sd_reborn:
+ case sd_reviving:
+ sd->state = sd_crashed; /* lost our drive */
+ break;
+
+ default:
+ break;
+ }
+ }
+ if (sd->state != oldstate) /* state has changed, */
+ printf("vinum: %s is %s\n", sd->name, sd_state(sd->state)); /* say so */
+ if (sd->plexno >= 0) /* we're part of a plex, */
+ update_plex_state(sd->plexno); /* update its state */
}
+/* Set the state of a plex based on its environment */
void
-add_unmapped_region(struct plex *plex, off_t offset, size_t length)
+update_plex_state(int plexno)
{
- if (++plex->unmapped_regions > plex->unmapped_region_count)
- EXPAND(plex->unmapped_region,
- struct plexregion,
- plex->unmapped_region_count,
- PLEX_REGION_TABLE_SIZE);
- plex->unmapped_region[plex->unmapped_regions - 1].offset = offset;
- plex->unmapped_region[plex->unmapped_regions - 1].length = length;
+ struct plex *plex; /* point to our plex */
+ enum plexstate oldstate;
+ enum volplexstate vps; /* how do we compare with the other plexes? */
+ enum sdstates statemap; /* get a map of the subdisk states */
+
+ plex = &PLEX[plexno]; /* point to our plex */
+ oldstate = plex->state;
+
+ vps = vpstate(plex); /* how do we compare with the other plexes? */
+ statemap = sdstatemap(plex); /* get a map of the subdisk states */
+
+ if (statemap == sd_upstate) /* all subdisks ready for action */
+ /* All the subdisks are up. This also means that
+ * they are consistent, so we can just bring
+ * the plex up */
+ plex->state = plex_up; /* go for it */
+ else if (statemap == sd_emptystate) { /* nothing done yet */
+ if (((vps & (volplex_otherup | volplex_onlyus)) == 0) /* nothing is up */ &&(plex->state == plex_init) /* we're brand spanking new */
+ &&(plex->volno >= 0) /* and we have a volume */
+ &&(VOL[plex->volno].flags & VF_CONFIG_SETUPSTATE)) { /* and we consider that up */
+ /* Conceptually, an empty plex does not contain valid data,
+ * but normally we'll see this state when we have just
+ * created a plex, and it's either consistent from earlier,
+ * or we don't care about the previous contents (we're going
+ * to create a file system or use it for swap).
+ *
+ * We need to do this in one swell foop: on the next call
+ * we will no longer be just empty.
+ *
+ * This code assumes that all the other plexes are also
+ * capable of coming up (i.e. all the sds are up), but
+ * that's OK: we'll come back to this function for the remaining
+ * plexes in the volume. */
+ struct volume *vol = &VOL[plex->volno];
+ int plexno;
+
+ for (plexno = 0; plexno < vol->plexes; plexno++)
+ PLEX[vol->plex[plexno]].state = plex_up;
+ } else if (vps & volplex_otherup == 0) { /* no other plexes up */
+ int sdno;
+
+ plex->state = plex_up; /* we can call that up */
+ for (sdno = 0; sdno < plex->subdisks; sdno++) { /* change the subdisks to up state */
+ SD[plex->sdnos[sdno]].state = sd_up;
+ printf("vinum: %s is up\n", SD[plex->sdnos[sdno]].name); /* tell them about it */
+ }
+ } else
+ plex->state = plex_faulty; /* no, it's down */
+ } else if (statemap & (sd_upstate | sd_rebornstate) == statemap) /* all up or reborn */
+ plex->state = plex_flaky;
+ else if (statemap & (sd_upstate | sd_rebornstate)) /* some up or reborn */
+ plex->state = plex_corrupt; /* corrupt */
+ else if (statemap & sd_initstate) /* some subdisks initializing */
+ plex->state = plex_initializing;
+ else /* nothing at all up */
+ plex->state = plex_faulty;
+
+ if (plex->state != oldstate) /* state has changed, */
+ printf("vinum: %s is %s\n", plex->name, plex_state(plex->state)); /* tell them about it */
+ if (plex->volno >= 0) /* we're part of a volume, */
+ update_volume_state(plex->volno); /* update its state */
}
-/* Rebuild a plex free list and set state if
- * we have a configuration error */
+/* Set volume state based on its components */
void
-rebuild_plex_unmappedlist(struct plex *plex)
+update_volume_state(int volno)
{
- int sdno;
- struct sd *sd;
- int lastsdend = 0; /* end offset of last subdisk */
+ struct volume *vol; /* our volume */
+ int plexno;
+ enum volumestate oldstate;
+
+ vol = &VOL[volno]; /* point to our volume */
+ oldstate = vol->state;
- if (plex->unmapped_region != NULL) { /* we're going to rebuild it */
- Free(plex->unmapped_region);
- plex->unmapped_region = NULL;
- plex->unmapped_regions = 0;
- plex->unmapped_region_count = 0;
+ for (plexno = 0; plexno < vol->plexes; plexno++) {
+ struct plex *plex = &PLEX[vol->plex[plexno]]; /* point to the plex */
+ if (plex->state >= plex_corrupt) { /* something accessible, */
+ vol->state = volume_up;
+ break;
+ }
}
- if (plex->defective_region != NULL) {
- Free(plex->defective_region);
- plex->defective_region = NULL;
- plex->defective_regions = 0;
- plex->defective_region_count = 0;
+ if (plexno == vol->plexes) /* didn't find an up plex */
+ vol->state = volume_down;
+
+ if (vol->state != oldstate) { /* state changed */
+ printf("vinum: %s is %s\n", vol->name, volume_state(vol->state));
+ save_config(); /* save the updated configuration */
}
- for (sdno = 0; sdno < plex->subdisks; sdno++) {
- sd = &SD[plex->sdnos[sdno]];
- if (sd->plexoffset < lastsdend) { /* overlap */
- printf("vinum: Plex %s, subdisk %s overlaps previous\n", plex->name, sd->name);
- set_plex_state(plex->plexno, plex_down, setstate_force); /* don't allow that */
- } else if (sd->plexoffset > lastsdend) /* gap */
- add_unmapped_region(plex, lastsdend, sd->plexoffset - lastsdend);
- else if (sd->state < sd_reborn) /* this part defective */
- add_defective_region(plex, sd->plexoffset, sd->sectors);
- lastsdend = sd->plexoffset + sd->sectors;
+}
+
+/* Called from request routines when they find
+ * a subdisk which is not kosher. Decide whether
+ * it warrants changing the state. Return
+ * REQUEST_DOWN if we can't use the subdisk,
+ * REQUEST_OK if we can. */
+/* A prior version of this function checked the plex
+ * state as well. At the moment, consider plex states
+ * information for the user only. We'll ignore them
+ * and use the subdisk state only. The last version of
+ * this file with the old logic was 2.7. XXX */
+enum requeststatus
+checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend)
+{
+ struct plex *plex = &PLEX[sd->plexno];
+ int writeop = (rq->bp->b_flags & B_READ) == 0; /* note if we're writing */
+
+ switch (sd->state) {
+ /* We shouldn't get called if the subdisk is up */
+ case sd_up:
+ return REQUEST_OK;
+
+ case sd_reviving:
+ /* Access to a reviving subdisk depends on the
+ * organization of the plex:
+
+ * - If it's concatenated, access the subdisk up to its current
+ * revive point. If we want to write to the subdisk overlapping the
+ * current revive block, set the conflict flag in the request, asking
+ * the caller to put the request on the wait list, which will be
+ * attended to by revive_block when it's done.
+ * - if it's striped, we can't do it (we could do some hairy
+ * calculations, but it's unlikely to work).
+ * - if it's RAID-5, we can do it as long as only one
+ * subdisk is down */
+ if (plex->state == plex_striped) /* plex is striped, */
+ return REQUEST_DOWN; /* can't access it now */
+ if (diskaddr > (sd->revived
+ + sd->plexoffset
+ + (sd->revive_blocksize >> DEV_BSHIFT))) /* we're beyond the end */
+ return REQUEST_DOWN; /* don't take the sd down again... */
+ else if (diskend > (sd->revived + sd->plexoffset)) { /* we finish beyond the end */
+ if (writeop) {
+ rq->flags |= XFR_REVIVECONFLICT; /* note a potential conflict */
+ rq->sdno = sd->sdno; /* and which sd last caused it */
+ } else
+ return REQUEST_DOWN; /* can't read this yet */
+ }
+ return REQUEST_OK;
+
+ case sd_reborn:
+ if (writeop)
+ return REQUEST_OK; /* always write to a reborn disk */
+ else /* don't allow a read */
+ /* Handle the mapping. We don't want to reject
+ * a read request to a reborn subdisk if that's
+ * all we have. XXX */
+ return REQUEST_DOWN;
+
+ case sd_down:
+ if (writeop) /* writing to a consistent down disk */
+ set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
+ return REQUEST_DOWN; /* and it's down one way or another */
+
+ case sd_crashed:
+ if (writeop) /* writing to a consistent down disk */
+ set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
+ return REQUEST_DOWN; /* and it's down one way or another */
+
+ default:
+ return REQUEST_DOWN;
}
}
/* return a state map for the subdisks of a plex */
enum sdstates
-sdstatemap(struct plex *plex, int *sddowncount)
+sdstatemap(struct plex *plex)
{
int sdno;
enum sdstates statemap = 0; /* note the states we find */
- *sddowncount = 0; /* no subdisks down yet */
+ plex->sddowncount = 0; /* no subdisks down yet */
for (sdno = 0; sdno < plex->subdisks; sdno++) {
struct sd *sd = &SD[plex->sdnos[sdno]]; /* point to the subdisk */
switch (sd->state) {
case sd_empty:
statemap |= sd_emptystate;
- (*sddowncount)++; /* another unusable subdisk */
+ (plex->sddowncount)++; /* another unusable subdisk */
break;
case sd_init:
statemap |= sd_initstate;
- (*sddowncount)++; /* another unusable subdisk */
+ (plex->sddowncount)++; /* another unusable subdisk */
break;
case sd_down:
statemap |= sd_downstate;
- (*sddowncount)++; /* another unusable subdisk */
+ (plex->sddowncount)++; /* another unusable subdisk */
break;
case sd_crashed:
statemap |= sd_crashedstate;
- (*sddowncount)++; /* another unusable subdisk */
+ (plex->sddowncount)++; /* another unusable subdisk */
break;
case sd_obsolete:
statemap |= sd_obsolete;
- (*sddowncount)++; /* another unusable subdisk */
+ (plex->sddowncount)++; /* another unusable subdisk */
break;
case sd_stale:
statemap |= sd_stalestate;
- (*sddowncount)++; /* another unusable subdisk */
+ (plex->sddowncount)++; /* another unusable subdisk */
break;
case sd_reborn:
@@ -358,9 +571,16 @@ sdstatemap(struct plex *plex, int *sddowncount)
statemap |= sd_upstate;
break;
- default:
- statemap |= sd_otherstate;
+ case sd_initializing:
+ statemap |= sd_initstate;
+ (plex->sddowncount)++; /* another unusable subdisk */
break;
+
+ case sd_unallocated:
+ case sd_uninit:
+ case sd_reviving:
+ statemap |= sd_otherstate;
+ (plex->sddowncount)++; /* another unusable subdisk */
}
}
return statemap;
@@ -380,13 +600,25 @@ vpstate(struct plex *plex)
vol = &VOL[plex->volno]; /* point to our volume */
for (plexno = 0; plexno < vol->plexes; plexno++) {
if (&PLEX[vol->plex[plexno]] == plex) { /* us */
- if (PLEX[vol->plex[plexno]].state == plex_up) /* are we up? */
+#if RAID5
+ if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* are we up? */
state |= volplex_onlyus; /* yes */
+#else
+ if (PLEX[vol->plex[plexno]].state >= plex_flaky) /* are we up? */
+ state |= volplex_onlyus; /* yes */
+#endif
} else {
- if (PLEX[vol->plex[plexno]].state == plex_up) /* not us */
+#if RAID5
+ if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* not us */
+ state |= volplex_otherup; /* and when they were up, they were up */
+ else
+ state |= volplex_alldown; /* and when they were down, they were down */
+#else
+ if (PLEX[vol->plex[plexno]].state >= plex_flaky) /* not us */
state |= volplex_otherup; /* and when they were up, they were up */
else
state |= volplex_alldown; /* and when they were down, they were down */
+#endif
}
}
return state; /* and when they were only halfway up */
@@ -401,240 +633,33 @@ allset(int a, int b)
return (a & b) == b;
}
-/* Update the state of a plex dependent on its subdisks.
- * Also rebuild the unmapped_region and defective_region table */
-int
-set_plex_state(int plexno, enum plexstate state, enum setstateflags flags)
-{
- int sddowncount = 0; /* number of down subdisks */
- struct plex *plex = &PLEX[plexno]; /* point to our plex */
- enum plexstate oldstate = plex->state;
- enum volplexstate vps = vpstate(plex); /* how do we compare with the other plexes? */
- enum sdstates statemap = sdstatemap(plex, &sddowncount); /* get a map of the subdisk states */
-
- if ((flags & setstate_force) && (oldstate == state)) /* we're there already, */
- return 0; /* no change */
-
- if (plex->state == plex_unallocated) /* no plex to do anything with, */
- return 0;
-
- switch (state) {
- case plex_up:
- if ((plex->state == plex_initializing) /* we're initializing */
- &&(statemap != sd_upstate)) /* but SDs aren't up yet */
- return 0; /* do nothing */
-
- /* We don't really care what our state was before
- * if we want to come up. We rely entirely on the
- * state of our subdisks and our volume */
- switch (vps) {
- case volplex_onlyusdown:
- case volplex_alldown: /* another plex is down, and so are we */
- if (statemap == sd_upstate) { /* all subdisks ready for action */
- if ((plex->state == plex_init) /* we're brand spanking new */
- &&(VOL[plex->volno].flags & VF_CONFIG_SETUPSTATE)) { /* and we consider that up */
- /* Conceptually, an empty plex does not contain valid data,
- * but normally we'll see this state when we have just
- * created a plex, and it's either consistent from earlier,
- * or we don't care about the previous contents (we're going
- * to create a file system or use it for swap).
- *
- * We need to do this in one swell foop: on the next call
- * we will no longer be just empty.
- *
- * We'll still come back to this function for the remaining
- * plexes in the volume. They'll be up already, so that
- * doesn't change anything, but it's not worth the additional
- * code to stop doing it. */
- struct volume *vol = &VOL[plex->volno];
- int plexno;
-
- for (plexno = 0; plexno < vol->plexes; plexno++)
- PLEX[vol->plex[plexno]].state = plex_up;
- }
- plex->state = plex_up; /* bring up up, anyway */
- } else
- plex->state = plex_down;
- break;
-
- case volplex_onlyusup: /* only we are up: others are down */
- case volplex_onlyus: /* we're up and alone */
- if ((statemap == sd_upstate) /* subdisks all up */
- ||(statemap == sd_emptystate)) /* or all empty */
- plex->state = plex_up; /* go for it */
- else if ((statemap & (sd_upstate | sd_reborn)) == statemap) /* all up or reborn, */
- plex->state = plex_flaky;
- else if (statemap & (sd_upstate | sd_reborn)) /* some up or reborn, */
- plex->state = plex_degraded; /* so far no corruption */
- else
- plex->state = plex_faulty;
- break;
-
- case volplex_otherup: /* another plex is up */
- case volplex_otherupdown: /* other plexes are up and down */
- {
- int sdno;
- struct sd *sd;
-
- /* Is the data in all subdisks valid? */
- /* XXX At the moment, subdisks make false
- * claims about their validity. Replace this
- * when they tell the truth */
- /* No: we have invalid or down subdisks */
- for (sdno = 0; sdno < plex->subdisks; sdno++) { /* look at these subdisks more carefully */
- set_sd_state(plex->sdnos[sdno], /* try to get it up */
- sd_up,
- setstate_norecurse | setstate_noupdate);
- sd = &SD[plex->sdnos[sdno]]; /* point to subdisk */
- /* we can make a stale subdisk up here, because
- * we're in the process of bringing it up.
- * This wouldn't work in set_sd_state, because
- * it would allow bypassing the revive */
- if (((sd->state == sd_stale)
- || (sd->state == sd_obsolete))
- && (DRIVE[sd->driveno].state == drive_up))
- sd->state = sd_up;
- }
- statemap = sdstatemap(plex, &sddowncount); /* get the new state map */
- /* Do we need reborn? They should now all be up */
- if (statemap == (statemap & (sd_upstate | sd_rebornstate))) { /* got something we can use */
- plex->state = plex_reviving; /* we need reviving */
- return EAGAIN;
- } else
- plex->state = plex_down; /* still in error */
- }
- break;
-
- case volplex_allup: /* all plexes are up */
- case volplex_someup:
- if ((statemap & (sd_upstate | sd_reborn)) == statemap) /* all up or reborn, */
- break; /* no change */
- else
- plex->state = plex_degraded; /* we're not all there */
- }
-
- if (plex->state != oldstate)
- break;
- return 0; /* no change */
-
- case plex_down: /* want to take it down */
- if (((vps == volplex_onlyus) /* we're the only one up */
- ||(vps == volplex_onlyusup)) /* we're the only one up */
- &&(!(flags & setstate_force))) /* and we don't want to use force */
- return 0; /* can't do it */
- plex->state = state; /* do it */
- break;
-
- /* This is only requested by the driver.
- * Trust ourselves */
- case plex_faulty:
- plex->state = state; /* do it */
- break;
-
- case plex_initializing:
- /* XXX consider what safeguards we need here */
- if ((flags & setstate_force) == 0)
- return 0;
- plex->state = state; /* do it */
- break;
-
- /* What's this? */
- default:
- return 0;
- }
- printf("vinum: plex %s is %s\n", plex->name, plex_state(plex->state));
- /* Now see what we have left, and whether
- * we're taking the volume down */
- if (plex->volno >= 0) { /* we have a volume */
- struct volume *vol = &VOL[plex->volno];
-
- vps = vpstate(plex); /* get our combined state again */
- if ((flags & setstate_norecurse) == 0) { /* we can recurse */
- if ((vol->state == volume_up)
- && (vps == volplex_alldown)) /* and we're all down */
- set_volume_state(plex->volno, volume_down, setstate_recursing); /* take our volume down */
- else if ((vol->state == volume_down)
- && (vps & (volplex_otherup | volplex_onlyusup))) /* and at least one is up */
- set_volume_state(plex->volno, volume_up, setstate_recursing); /* bring our volume up */
- }
- }
- if ((flags & (setstate_configuring | setstate_recursing)) == 0) { /* save config now */
- if (flags & setstate_noupdate) /* don't update now, */
- vinum_conf.flags |= VF_DIRTYCONFIG; /* wait until later */
- else
- save_config(); /* yes: save the updated configuration */
- }
- return 1;
-}
-
-/* Update the state of a plex dependent on its plexes.
- * Also rebuild the unmapped_region and defective_region table */
-int
-set_volume_state(int volno, enum volumestate state, enum setstateflags flags)
+/* Invalidate the subdisks belonging to a plex */
+void
+invalidate_subdisks(struct plex *plex, enum sdstate state)
{
- int plexno;
- enum plexstates {
- plex_downstate = 1, /* found a plex which is down */
- plex_degradedstate = 2, /* found a plex which is halfway up */
- plex_upstate = 4 /* found a plex which is completely up */
- };
-
- int plexstatemap = 0; /* note the states we find */
- struct volume *vol = &VOL[volno]; /* point to our volume */
-
- if (vol->state == state) /* we're there already */
- return 0; /* no change */
- if (vol->state == volume_unallocated) /* no volume to do anything with, */
- return 0;
-
- for (plexno = 0; plexno < vol->plexes; plexno++) {
- struct plex *plex = &PLEX[vol->plex[plexno]]; /* point to the plex */
- switch (plex->state) {
- case plex_degraded:
- case plex_flaky:
- case plex_reviving:
- plexstatemap |= plex_degradedstate;
- break;
+ int sdno;
- case plex_up:
- plexstatemap |= plex_upstate;
- break;
+ for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */
+ struct sd *sd = &SD[plex->sdnos[sdno]];
- default:
- plexstatemap |= plex_downstate;
+ switch (sd->state) {
+ case sd_unallocated:
+ case sd_uninit:
+ case sd_init:
+ case sd_initializing:
+ case sd_empty:
+ case sd_obsolete:
+ case sd_stale:
+ case sd_crashed:
+ case sd_down:
break;
- }
- }
- if (state == volume_up) { /* want to come up */
- if (plexstatemap & plex_upstate) { /* we have a plex which is completely up */
- vol->state = volume_up; /* did it */
- printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state));
- if ((flags & (setstate_configuring | setstate_recursing)) == 0) { /* save config now */
- if (flags & setstate_noupdate) /* don't update now, */
- vinum_conf.flags |= VF_DIRTYCONFIG; /* wait until later */
- else
- save_config(); /* yes: save the updated configuration */
- }
- return 1;
- }
- /* Here we should check whether we have enough
- * coverage for the complete volume. Writeme XXX */
- } else if (state == volume_down) { /* want to go down */
- if ((vol->opencount == 0) /* not open */
- ||(flags & setstate_force != 0)) { /* or we're forcing */
- vol->state = volume_down;
- printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state));
- if ((flags & (setstate_configuring | setstate_recursing)) == 0) { /* save config now */
- if (flags & setstate_noupdate) /* don't update now, */
- vinum_conf.flags |= VF_DIRTYCONFIG; /* wait until later */
- else
- save_config(); /* yes: save the updated configuration */
- }
- return 1;
+ case sd_reviving:
+ case sd_reborn:
+ case sd_up:
+ set_sd_state(plex->sdnos[sdno], state, setstate_force);
}
}
- return 0; /* no change */
}
/* Start an object, in other words do what we can to get it up.
@@ -645,34 +670,56 @@ void
start_object(struct vinum_ioctl_msg *data)
{
int status;
- int realstatus; /* what we really have */
int objindex = data->index; /* data gets overwritten */
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
+ enum setstateflags flags;
+
+ if (data->force != 0) /* are we going to use force? */
+ flags = setstate_force; /* yes */
+ else
+ flags = setstate_none; /* no */
switch (data->type) {
case drive_object:
- status = set_drive_state(objindex, drive_up, setstate_none);
- realstatus = DRIVE[objindex].state == drive_up; /* set status on whether we really did it */
+ status = set_drive_state(objindex, drive_up, flags);
+ if (DRIVE[objindex].state != drive_up) /* set status on whether we really did it */
+ ioctl_reply->error = EINVAL;
+ else
+ ioctl_reply->error = 0;
break;
case sd_object:
- status = set_sd_state(objindex, sd_up, setstate_none); /* set state */
- realstatus = SD[objindex].state == sd_up; /* set status on whether we really did it */
- break;
-
- case plex_object:
- if (PLEX[objindex].state == plex_reviving) { /* reviving, */
+ if (SD[objindex].state == sd_reviving) { /* reviving, */
ioctl_reply->error = revive_block(objindex); /* revive another block */
ioctl_reply->msg[0] = '\0'; /* no comment */
return;
}
- status = set_plex_state(objindex, plex_up, setstate_none);
- realstatus = PLEX[objindex].state == plex_up; /* set status on whether we really did it */
+ status = set_sd_state(objindex, sd_up, flags); /* set state */
+ if (status == EAGAIN) { /* first revive, */
+ ioctl_reply->error = revive_block(objindex); /* revive the first block */
+ ioctl_reply->error = EAGAIN;
+ } else {
+ if (SD[objindex].state != sd_up) /* set status on whether we really did it */
+ ioctl_reply->error = EINVAL;
+ else
+ ioctl_reply->error = 0;
+ }
+ break;
+
+ case plex_object:
+ status = set_plex_state(objindex, plex_up, flags);
+ if (PLEX[objindex].state != plex_up) /* set status on whether we really did it */
+ ioctl_reply->error = EINVAL;
+ else
+ ioctl_reply->error = 0;
break;
case volume_object:
- status = set_volume_state(objindex, volume_up, setstate_none);
- realstatus = VOL[objindex].state == volume_up; /* set status on whether we really did it */
+ status = set_volume_state(objindex, volume_up, flags);
+ if (VOL[objindex].state != volume_up) /* set status on whether we really did it */
+ ioctl_reply->error = EINVAL;
+ else
+ ioctl_reply->error = 0;
break;
default:
@@ -683,10 +730,6 @@ start_object(struct vinum_ioctl_msg *data)
/* There's no point in saying anything here:
* the userland program does it better */
ioctl_reply->msg[0] = '\0';
- if (realstatus == 0) /* couldn't do it */
- ioctl_reply->error = EINVAL;
- else
- ioctl_reply->error = 0;
}
/* Stop an object, in other words do what we can to get it down