diff options
| author | Greg Lehey <grog@FreeBSD.org> | 1999-01-21 00:40:32 +0000 |
|---|---|---|
| committer | Greg Lehey <grog@FreeBSD.org> | 1999-01-21 00:40:32 +0000 |
| commit | 6d930639c333ddcdec7f1d0fcfa01e09b1ba36ab (patch) | |
| tree | fa828e3b7eb7d35375c6c275733820dcf6acabfb | |
| parent | 2178c45997177ec75af11ed332e4d0e7fc2711f9 (diff) | |
Notes
| -rw-r--r-- | sys/dev/vinum/vinumstate.c | 869 |
1 files changed, 456 insertions, 413 deletions
diff --git a/sys/dev/vinum/vinumstate.c b/sys/dev/vinum/vinumstate.c index eea83e148f71..8d44fecfcdf2 100644 --- a/sys/dev/vinum/vinumstate.c +++ b/sys/dev/vinum/vinumstate.c @@ -33,7 +33,7 @@ * otherwise) arising in any way out of the use of this software, even if * advised of the possibility of such damage. * - * $Id: state.c,v 1.5 1998/12/28 04:56:23 peter Exp $ + * $Id: vinumstate.c,v 2.10 1999/01/17 06:19:23 grog Exp grog $ */ #define REALLYKERNEL @@ -44,7 +44,7 @@ /* Update drive state */ /* Return 1 if the state changes, otherwise 0 */ int -set_drive_state(int driveno, enum drivestate state, int flags) +set_drive_state(int driveno, enum drivestate newstate, enum setstateflags flags) { struct drive *drive = &DRIVE[driveno]; int oldstate = drive->state; @@ -53,35 +53,25 @@ set_drive_state(int driveno, enum drivestate state, int flags) if (drive->state == drive_unallocated) /* no drive to do anything with, */ return 0; - if (state != oldstate) { /* don't change it if it's not different */ - if (state == drive_down) { /* the drive's going down */ - if ((flags & setstate_force) || (drive->opencount == 0)) { /* we can do it */ - /* We can't call close() from an interrupt - * context. Instead, we do it when we - * next call strategy(). This will change - * when the vinum daemon comes onto the scene */ - if (!(flags & setstate_noupdate)) /* we can close it */ - close_drive(drive); - } else - return 0; /* don't do it */ - } - drive->state = state; /* set the state */ - printf("vinum: drive %s is %s\n", drive->label.name, drive_state(drive->state)); - if (((drive->state == drive_up) - || ((drive->state == drive_coming_up))) + if (newstate != oldstate) { /* don't change it if it's not different */ + if ((newstate == drive_down) /* the drive's going down */ + &&(!(flags & setstate_force)) + && (drive->opencount != 0)) /* we can't do it */ + return 0; /* don't do it */ + drive->state = newstate; /* set the state */ + if (drive->label.name[0] != '\0') /* we have a name, */ + printf("vinum: drive %s is %s\n", drive->label.name, drive_state(drive->state)); + if ((drive->state == drive_up) && (drive->vp == NULL)) /* should be open, but we're not */ - init_drive(drive); /* which changes the state again */ - if ((state != oldstate) /* state has changed */ - &&((flags & setstate_norecurse) == 0)) { /* and we want to recurse, */ + init_drive(drive, 1); /* which changes the state again */ + if (newstate != oldstate) { /* state has changed */ for (sdno = 0; sdno < vinum_conf.subdisks_used; sdno++) { /* find this drive's subdisks */ if (SD[sdno].driveno == driveno) /* belongs to this drive */ - set_sd_state(sdno, sd_down, setstate_force | setstate_recursing); /* take it down */ + update_sd_state(sdno); /* update the state */ } } - if (flags & setstate_noupdate) /* don't update now, */ - vinum_conf.flags |= VF_DIRTYCONFIG; /* wait until later */ - else - save_config(); /* yes: save the updated configuration */ + if ((flags & setstate_configuring) == 0) /* configuring? */ + save_config(); /* no: save the updated configuration now */ return 1; } return 0; @@ -95,24 +85,27 @@ set_drive_state(int driveno, enum drivestate state, int flags) * only) and internally. */ int -set_sd_state(int sdno, enum sdstate state, enum setstateflags flags) +set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags) { struct sd *sd = &SD[sdno]; + struct plex *plex; + struct volume *vol; int oldstate = sd->state; int status = 1; /* status to return */ - if (state == oldstate) - return 0; /* no change */ - - if (sd->state == sd_unallocated) /* no subdisk to do anything with, */ + if ((newstate == oldstate) + || (sd->state == sd_unallocated)) /* no subdisk to do anything with, */ return 0; if (sd->driveoffset < 0) { /* not allocated space */ sd->state = sd_down; - if (state != sd_down) + if (newstate != sd_down) { + if (sd->plexno >= 0) + sdstatemap(&PLEX[sd->plexno]); /* count up subdisks */ return -1; + } } else { /* space allocated */ - switch (state) { + switch (newstate) { case sd_down: if ((!flags & setstate_force) /* but gently */ &&(sd->plexno >= 0)) /* and we're attached to a plex, */ @@ -125,35 +118,82 @@ set_sd_state(int sdno, enum sdstate state, enum setstateflags flags) switch (sd->state) { case sd_crashed: case sd_down: /* been down, no data lost */ - if ((sd->plexno) /* we're associated with a plex */ + if ((sd->plexno >= 0) /* we're associated with a plex */ &&(((PLEX[sd->plexno].state < plex_firstup) /* and it's not up */ ||(PLEX[sd->plexno].subdisks > 1)))) /* or it's the only one */ - break; + break; /* do it */ /* XXX Get this right: make sure that other plexes in * the volume cover this address space, otherwise - * we make this one sd_up */ + * we make this one sd_up. + * + * Do we even want this any more? + */ sd->state = sd_reborn; /* here it is again */ - printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(state)); + printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(newstate)); status = -1; break; case sd_init: /* brand new */ if (flags & setstate_configuring) /* we're doing this while configuring */ break; - sd->state = sd_empty; /* nothing in it */ - printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(state)); - status = -1; - break; - - case sd_initializing: - break; /* go on and do it */ + /* otherwise it's like being empty */ + /* FALLTHROUGH */ case sd_empty: - if ((sd->plexno) /* we're associated with a plex */ + if ((sd->plexno >= 0) /* we're associated with a plex */ &&(((PLEX[sd->plexno].state < plex_firstup) /* and it's not up */ ||(PLEX[sd->plexno].subdisks > 1)))) /* or it's the only one */ break; - return 0; /* can't do it */ + /* Otherwise it's just out of date */ + /* FALLTHROUGH */ + + case sd_stale: /* out of date info, need reviving */ + case sd_obsolete: + /* 1. If the subdisk is not part of a plex, bring it up, don't revive. + + * 2. If the subdisk is part of a one-plex volume or an unattached plex, + * and it's not RAID-5, we *can't revive*. The subdisk doesn't + * change its state. + * + * 3. If the subdisk is part of a one-plex volume or an unattached plex, + * and it's RAID-5, but more than one subdisk is down, we *still + * can't revive*. The subdisk doesn't change its state. + * + * 4. If the subdisk is part of a multi-plex volume, we'll change to + * reviving and let the revive routines find out whether it will work + * or not. If they don't, the revive stops with an error message, + * but the state doesn't change (FWIW).*/ + if (sd->plexno < 0) /* no plex associated, */ + break; /* bring it up */ + plex = &PLEX[sd->plexno]; + if (plex->volno >= 0) /* have a volume */ + vol = &VOL[plex->volno]; + else + vol = NULL; + if (((vol == NULL) /* no volume */ ||(vol->plexes == 1)) /* or only one plex in volume */ + &&((plex->organization != plex_raid5) /* or it's a RAID-5 plex */ + ||(plex->sddowncount > 1))) /* with more than one subdisk down, */ + return 0; /* can't do it */ + sd->state = sd_reviving; /* put in reviving state */ + sd->revived = 0; /* nothing done yet */ + status = EAGAIN; /* need to repeat */ + break; + + /* XXX This is silly. We need to be able to + * bring the subdisk up when it's finished + * initializing, but not from the user. We + * use the same ioctl in each case, but Vinum(8) + * doesn't supply the -f flag, so we use that + * to decide whether to do it or not */ + case sd_initializing: + if (flags & setstate_force) + break; /* do it if we have to */ + return 0; /* no */ + + case sd_reviving: + if (flags & setstate_force) /* insist, */ + break; + return EAGAIN; /* no, try again */ default: /* can't do it */ /* There's no way to bring subdisks up directly from @@ -168,186 +208,359 @@ set_sd_state(int sdno, enum sdstate state, enum setstateflags flags) return 0; /* don't do it */ } } - sd->state = state; - printf("vinum: subdisk %s is %s\n", sd->name, sd_state(sd->state)); - if ((flags & setstate_norecurse) == 0) - set_plex_state(sd->plexno, plex_up, setstate_recursing); /* update plex state */ - if ((flags & (setstate_configuring | setstate_recursing)) == 0) { /* save config now */ - if (setstate_noupdate) /* we can't update now, */ - vinum_conf.flags |= VF_DIRTYCONFIG; /* wait until later */ - else - save_config(); - } + if (status == 1) { /* we can do it, */ + sd->state = newstate; + printf("vinum: %s is %s\n", sd->name, sd_state(sd->state)); + } else /* we don't get here with status 0 */ + printf("vinum: %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(newstate)); + if (sd->plexno >= 0) /* we belong to a plex */ + update_plex_state(sd->plexno); /* update plex state */ + if ((flags & setstate_configuring) == 0) /* save config now */ + save_config(); return status; } -/* Called from request routines when they find - * a subdisk which is not kosher. Decide whether - * it warrants changing the state. Return - * REQUEST_DOWN if we can't use the subdisk, - * REQUEST_OK if we can. */ -enum requeststatus -checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend) +/* Set the state of a plex dependent on its subdisks. + * This time round, we'll let plex state just reflect + * aggregate subdisk state, so this becomes an order of + * magnitude less complicated. In particular, ignore + * the requested state. + */ +int +set_plex_state(int plexno, enum plexstate state, enum setstateflags flags) { - struct plex *plex = &PLEX[sd->plexno]; - int writeop = (rq->bp->b_flags & B_READ) == 0; /* note if we're writing */ + struct plex *plex; /* point to our plex */ + enum plexstate oldstate; + enum volplexstate vps; /* how do we compare with the other plexes? */ - /* first, see if the plex wants to be accessed */ - switch (plex->state) { - case plex_reviving: - /* When writing, we'll write anything that starts - * up to the current revive pointer, but we'll - * only accept a read which finishes before the - * current revive pointer. - */ - if ((writeop && (diskaddr > plex->revived)) /* write starts after current revive pointer */ - ||((!writeop) && (diskend >= plex->revived))) { /* or read ends after current revive pointer */ - if (writeop) { /* writing to a consistent down disk */ - if (DRIVE[sd->driveno].state == drive_up) - set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */ - else - set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */ - } - return REQUEST_DOWN; /* that part of the plex is still down */ - } else if (diskend >= plex->revived) /* write finishes beyond revive pointer */ - rq->flags |= XFR_REVIVECONFLICT; /* note a potential conflict */ - /* FALLTHROUGH */ - - case plex_up: - case plex_degraded: - case plex_flaky: - /* We can access the plex: let's see - * how the subdisk feels */ - switch (sd->state) { - case sd_up: - return REQUEST_OK; + plex = &PLEX[plexno]; /* point to our plex */ + oldstate = plex->state; - case sd_reborn: - if (writeop) - return REQUEST_OK; /* always write to a reborn disk */ - /* Handle the mapping. We don't want to reject - * a read request to a reborn subdisk if that's - * all we have. XXX */ - return REQUEST_DOWN; + if ((plex->state == plex_unallocated) /* or no plex to do anything with, */ + ||((state == oldstate) /* or we're already there */ + &&(state != plex_up))) /* and it's not up */ + return 0; - case sd_down: - case sd_crashed: - if (writeop) { /* writing to a consistent down disk */ - if (DRIVE[sd->driveno].state == drive_up) - set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */ - else - set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */ - } - return REQUEST_DOWN; /* and it's down one way or another */ + vps = vpstate(plex); /* how do we compare with the other plexes? */ - default: - return REQUEST_DOWN; - } + switch (state) { + /* We can't bring the plex up, even by force, + * unless it's ready. update_plex_state + * checks that */ + case plex_up: /* bring the plex up */ + update_plex_state(plex->plexno); /* it'll come up if it can */ + break; + + case plex_down: /* want to take it down */ + if (((vps == volplex_onlyus) /* we're the only one up */ + ||(vps == volplex_onlyusup)) /* we're the only one up */ + &&(!(flags & setstate_force))) /* and we don't want to use force */ + return 0; /* can't do it */ + plex->state = state; /* do it */ + invalidate_subdisks(plex, sd_down); /* and down all up subdisks */ + break; + + /* This is only requested internally. + * Trust ourselves */ + case plex_faulty: + plex->state = state; /* do it */ + invalidate_subdisks(plex, sd_crashed); /* and crash all up subdisks */ + break; + + case plex_initializing: + /* XXX consider what safeguards we need here */ + if ((flags & setstate_force) == 0) + return 0; + plex->state = state; /* do it */ + break; + /* What's this? */ default: - return REQUEST_DOWN; + return 0; + } + if (plex->state != oldstate) /* we've changed, */ + printf("vinum: %s is %s\n", plex->name, plex_state(plex->state)); /* tell them about it */ + /* Now see what we have left, and whether + * we're taking the volume down */ + if (plex->volno >= 0) /* we have a volume */ + update_volume_state(plex->volno); /* update its state */ + if ((flags & setstate_configuring) == 0) /* save config now */ + save_config(); /* yes: save the updated configuration */ + return 1; +} + +/* Update the state of a plex dependent on its plexes. */ +int +set_volume_state(int volno, enum volumestate state, enum setstateflags flags) +{ + struct volume *vol = &VOL[volno]; /* point to our volume */ + + if ((vol->state == state) /* we're there already */ + ||(vol->state == volume_unallocated)) /* or no volume to do anything with, */ + return 0; + + if (state == volume_up) /* want to come up */ + update_volume_state(volno); + else if (state == volume_down) { /* want to go down */ + if ((vol->opencount == 0) /* not open */ + ||((flags & setstate_force) != 0)) { /* or we're forcing */ + vol->state = volume_down; + printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state)); + if ((flags & setstate_configuring) == 0) /* save config now */ + save_config(); /* yes: save the updated configuration */ + return 1; + } } + return 0; /* no change */ } +/* Set the state of a subdisk based on its environment */ void -add_defective_region(struct plex *plex, off_t offset, size_t length) +update_sd_state(int sdno) { -/* XXX get this ordered, and coalesce regions if necessary */ - if (++plex->defective_regions > plex->defective_region_count) - EXPAND(plex->defective_region, - struct plexregion, - plex->defective_region_count, - PLEX_REGION_TABLE_SIZE); - plex->defective_region[plex->defective_regions - 1].offset = offset; - plex->defective_region[plex->defective_regions - 1].length = length; + struct sd *sd; + struct drive *drive; + enum sdstate oldstate; + + sd = &SD[sdno]; + oldstate = sd->state; + drive = &DRIVE[sd->driveno]; + + if (drive->state == drive_up) { + switch (sd->state) { + case sd_down: + case sd_crashed: + sd->state = sd_reborn; /* back up again with no loss */ + break; + + default: + break; + } + } else { /* down or worse */ + switch (sd->state) { + case sd_up: + case sd_reborn: + case sd_reviving: + sd->state = sd_crashed; /* lost our drive */ + break; + + default: + break; + } + } + if (sd->state != oldstate) /* state has changed, */ + printf("vinum: %s is %s\n", sd->name, sd_state(sd->state)); /* say so */ + if (sd->plexno >= 0) /* we're part of a plex, */ + update_plex_state(sd->plexno); /* update its state */ } +/* Set the state of a plex based on its environment */ void -add_unmapped_region(struct plex *plex, off_t offset, size_t length) +update_plex_state(int plexno) { - if (++plex->unmapped_regions > plex->unmapped_region_count) - EXPAND(plex->unmapped_region, - struct plexregion, - plex->unmapped_region_count, - PLEX_REGION_TABLE_SIZE); - plex->unmapped_region[plex->unmapped_regions - 1].offset = offset; - plex->unmapped_region[plex->unmapped_regions - 1].length = length; + struct plex *plex; /* point to our plex */ + enum plexstate oldstate; + enum volplexstate vps; /* how do we compare with the other plexes? */ + enum sdstates statemap; /* get a map of the subdisk states */ + + plex = &PLEX[plexno]; /* point to our plex */ + oldstate = plex->state; + + vps = vpstate(plex); /* how do we compare with the other plexes? */ + statemap = sdstatemap(plex); /* get a map of the subdisk states */ + + if (statemap == sd_upstate) /* all subdisks ready for action */ + /* All the subdisks are up. This also means that + * they are consistent, so we can just bring + * the plex up */ + plex->state = plex_up; /* go for it */ + else if (statemap == sd_emptystate) { /* nothing done yet */ + if (((vps & (volplex_otherup | volplex_onlyus)) == 0) /* nothing is up */ &&(plex->state == plex_init) /* we're brand spanking new */ + &&(plex->volno >= 0) /* and we have a volume */ + &&(VOL[plex->volno].flags & VF_CONFIG_SETUPSTATE)) { /* and we consider that up */ + /* Conceptually, an empty plex does not contain valid data, + * but normally we'll see this state when we have just + * created a plex, and it's either consistent from earlier, + * or we don't care about the previous contents (we're going + * to create a file system or use it for swap). + * + * We need to do this in one swell foop: on the next call + * we will no longer be just empty. + * + * This code assumes that all the other plexes are also + * capable of coming up (i.e. all the sds are up), but + * that's OK: we'll come back to this function for the remaining + * plexes in the volume. */ + struct volume *vol = &VOL[plex->volno]; + int plexno; + + for (plexno = 0; plexno < vol->plexes; plexno++) + PLEX[vol->plex[plexno]].state = plex_up; + } else if (vps & volplex_otherup == 0) { /* no other plexes up */ + int sdno; + + plex->state = plex_up; /* we can call that up */ + for (sdno = 0; sdno < plex->subdisks; sdno++) { /* change the subdisks to up state */ + SD[plex->sdnos[sdno]].state = sd_up; + printf("vinum: %s is up\n", SD[plex->sdnos[sdno]].name); /* tell them about it */ + } + } else + plex->state = plex_faulty; /* no, it's down */ + } else if (statemap & (sd_upstate | sd_rebornstate) == statemap) /* all up or reborn */ + plex->state = plex_flaky; + else if (statemap & (sd_upstate | sd_rebornstate)) /* some up or reborn */ + plex->state = plex_corrupt; /* corrupt */ + else if (statemap & sd_initstate) /* some subdisks initializing */ + plex->state = plex_initializing; + else /* nothing at all up */ + plex->state = plex_faulty; + + if (plex->state != oldstate) /* state has changed, */ + printf("vinum: %s is %s\n", plex->name, plex_state(plex->state)); /* tell them about it */ + if (plex->volno >= 0) /* we're part of a volume, */ + update_volume_state(plex->volno); /* update its state */ } -/* Rebuild a plex free list and set state if - * we have a configuration error */ +/* Set volume state based on its components */ void -rebuild_plex_unmappedlist(struct plex *plex) +update_volume_state(int volno) { - int sdno; - struct sd *sd; - int lastsdend = 0; /* end offset of last subdisk */ + struct volume *vol; /* our volume */ + int plexno; + enum volumestate oldstate; + + vol = &VOL[volno]; /* point to our volume */ + oldstate = vol->state; - if (plex->unmapped_region != NULL) { /* we're going to rebuild it */ - Free(plex->unmapped_region); - plex->unmapped_region = NULL; - plex->unmapped_regions = 0; - plex->unmapped_region_count = 0; + for (plexno = 0; plexno < vol->plexes; plexno++) { + struct plex *plex = &PLEX[vol->plex[plexno]]; /* point to the plex */ + if (plex->state >= plex_corrupt) { /* something accessible, */ + vol->state = volume_up; + break; + } } - if (plex->defective_region != NULL) { - Free(plex->defective_region); - plex->defective_region = NULL; - plex->defective_regions = 0; - plex->defective_region_count = 0; + if (plexno == vol->plexes) /* didn't find an up plex */ + vol->state = volume_down; + + if (vol->state != oldstate) { /* state changed */ + printf("vinum: %s is %s\n", vol->name, volume_state(vol->state)); + save_config(); /* save the updated configuration */ } - for (sdno = 0; sdno < plex->subdisks; sdno++) { - sd = &SD[plex->sdnos[sdno]]; - if (sd->plexoffset < lastsdend) { /* overlap */ - printf("vinum: Plex %s, subdisk %s overlaps previous\n", plex->name, sd->name); - set_plex_state(plex->plexno, plex_down, setstate_force); /* don't allow that */ - } else if (sd->plexoffset > lastsdend) /* gap */ - add_unmapped_region(plex, lastsdend, sd->plexoffset - lastsdend); - else if (sd->state < sd_reborn) /* this part defective */ - add_defective_region(plex, sd->plexoffset, sd->sectors); - lastsdend = sd->plexoffset + sd->sectors; +} + +/* Called from request routines when they find + * a subdisk which is not kosher. Decide whether + * it warrants changing the state. Return + * REQUEST_DOWN if we can't use the subdisk, + * REQUEST_OK if we can. */ +/* A prior version of this function checked the plex + * state as well. At the moment, consider plex states + * information for the user only. We'll ignore them + * and use the subdisk state only. The last version of + * this file with the old logic was 2.7. XXX */ +enum requeststatus +checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend) +{ + struct plex *plex = &PLEX[sd->plexno]; + int writeop = (rq->bp->b_flags & B_READ) == 0; /* note if we're writing */ + + switch (sd->state) { + /* We shouldn't get called if the subdisk is up */ + case sd_up: + return REQUEST_OK; + + case sd_reviving: + /* Access to a reviving subdisk depends on the + * organization of the plex: + + * - If it's concatenated, access the subdisk up to its current + * revive point. If we want to write to the subdisk overlapping the + * current revive block, set the conflict flag in the request, asking + * the caller to put the request on the wait list, which will be + * attended to by revive_block when it's done. + * - if it's striped, we can't do it (we could do some hairy + * calculations, but it's unlikely to work). + * - if it's RAID-5, we can do it as long as only one + * subdisk is down */ + if (plex->state == plex_striped) /* plex is striped, */ + return REQUEST_DOWN; /* can't access it now */ + if (diskaddr > (sd->revived + + sd->plexoffset + + (sd->revive_blocksize >> DEV_BSHIFT))) /* we're beyond the end */ + return REQUEST_DOWN; /* don't take the sd down again... */ + else if (diskend > (sd->revived + sd->plexoffset)) { /* we finish beyond the end */ + if (writeop) { + rq->flags |= XFR_REVIVECONFLICT; /* note a potential conflict */ + rq->sdno = sd->sdno; /* and which sd last caused it */ + } else + return REQUEST_DOWN; /* can't read this yet */ + } + return REQUEST_OK; + + case sd_reborn: + if (writeop) + return REQUEST_OK; /* always write to a reborn disk */ + else /* don't allow a read */ + /* Handle the mapping. We don't want to reject + * a read request to a reborn subdisk if that's + * all we have. XXX */ + return REQUEST_DOWN; + + case sd_down: + if (writeop) /* writing to a consistent down disk */ + set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */ + return REQUEST_DOWN; /* and it's down one way or another */ + + case sd_crashed: + if (writeop) /* writing to a consistent down disk */ + set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */ + return REQUEST_DOWN; /* and it's down one way or another */ + + default: + return REQUEST_DOWN; } } /* return a state map for the subdisks of a plex */ enum sdstates -sdstatemap(struct plex *plex, int *sddowncount) +sdstatemap(struct plex *plex) { int sdno; enum sdstates statemap = 0; /* note the states we find */ - *sddowncount = 0; /* no subdisks down yet */ + plex->sddowncount = 0; /* no subdisks down yet */ for (sdno = 0; sdno < plex->subdisks; sdno++) { struct sd *sd = &SD[plex->sdnos[sdno]]; /* point to the subdisk */ switch (sd->state) { case sd_empty: statemap |= sd_emptystate; - (*sddowncount)++; /* another unusable subdisk */ + (plex->sddowncount)++; /* another unusable subdisk */ break; case sd_init: statemap |= sd_initstate; - (*sddowncount)++; /* another unusable subdisk */ + (plex->sddowncount)++; /* another unusable subdisk */ break; case sd_down: statemap |= sd_downstate; - (*sddowncount)++; /* another unusable subdisk */ + (plex->sddowncount)++; /* another unusable subdisk */ break; case sd_crashed: statemap |= sd_crashedstate; - (*sddowncount)++; /* another unusable subdisk */ + (plex->sddowncount)++; /* another unusable subdisk */ break; case sd_obsolete: statemap |= sd_obsolete; - (*sddowncount)++; /* another unusable subdisk */ + (plex->sddowncount)++; /* another unusable subdisk */ break; case sd_stale: statemap |= sd_stalestate; - (*sddowncount)++; /* another unusable subdisk */ + (plex->sddowncount)++; /* another unusable subdisk */ break; case sd_reborn: @@ -358,9 +571,16 @@ sdstatemap(struct plex *plex, int *sddowncount) statemap |= sd_upstate; break; - default: - statemap |= sd_otherstate; + case sd_initializing: + statemap |= sd_initstate; + (plex->sddowncount)++; /* another unusable subdisk */ break; + + case sd_unallocated: + case sd_uninit: + case sd_reviving: + statemap |= sd_otherstate; + (plex->sddowncount)++; /* another unusable subdisk */ } } return statemap; @@ -380,13 +600,25 @@ vpstate(struct plex *plex) vol = &VOL[plex->volno]; /* point to our volume */ for (plexno = 0; plexno < vol->plexes; plexno++) { if (&PLEX[vol->plex[plexno]] == plex) { /* us */ - if (PLEX[vol->plex[plexno]].state == plex_up) /* are we up? */ +#if RAID5 + if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* are we up? */ state |= volplex_onlyus; /* yes */ +#else + if (PLEX[vol->plex[plexno]].state >= plex_flaky) /* are we up? */ + state |= volplex_onlyus; /* yes */ +#endif } else { - if (PLEX[vol->plex[plexno]].state == plex_up) /* not us */ +#if RAID5 + if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* not us */ + state |= volplex_otherup; /* and when they were up, they were up */ + else + state |= volplex_alldown; /* and when they were down, they were down */ +#else + if (PLEX[vol->plex[plexno]].state >= plex_flaky) /* not us */ state |= volplex_otherup; /* and when they were up, they were up */ else state |= volplex_alldown; /* and when they were down, they were down */ +#endif } } return state; /* and when they were only halfway up */ @@ -401,240 +633,33 @@ allset(int a, int b) return (a & b) == b; } -/* Update the state of a plex dependent on its subdisks. - * Also rebuild the unmapped_region and defective_region table */ -int -set_plex_state(int plexno, enum plexstate state, enum setstateflags flags) -{ - int sddowncount = 0; /* number of down subdisks */ - struct plex *plex = &PLEX[plexno]; /* point to our plex */ - enum plexstate oldstate = plex->state; - enum volplexstate vps = vpstate(plex); /* how do we compare with the other plexes? */ - enum sdstates statemap = sdstatemap(plex, &sddowncount); /* get a map of the subdisk states */ - - if ((flags & setstate_force) && (oldstate == state)) /* we're there already, */ - return 0; /* no change */ - - if (plex->state == plex_unallocated) /* no plex to do anything with, */ - return 0; - - switch (state) { - case plex_up: - if ((plex->state == plex_initializing) /* we're initializing */ - &&(statemap != sd_upstate)) /* but SDs aren't up yet */ - return 0; /* do nothing */ - - /* We don't really care what our state was before - * if we want to come up. We rely entirely on the - * state of our subdisks and our volume */ - switch (vps) { - case volplex_onlyusdown: - case volplex_alldown: /* another plex is down, and so are we */ - if (statemap == sd_upstate) { /* all subdisks ready for action */ - if ((plex->state == plex_init) /* we're brand spanking new */ - &&(VOL[plex->volno].flags & VF_CONFIG_SETUPSTATE)) { /* and we consider that up */ - /* Conceptually, an empty plex does not contain valid data, - * but normally we'll see this state when we have just - * created a plex, and it's either consistent from earlier, - * or we don't care about the previous contents (we're going - * to create a file system or use it for swap). - * - * We need to do this in one swell foop: on the next call - * we will no longer be just empty. - * - * We'll still come back to this function for the remaining - * plexes in the volume. They'll be up already, so that - * doesn't change anything, but it's not worth the additional - * code to stop doing it. */ - struct volume *vol = &VOL[plex->volno]; - int plexno; - - for (plexno = 0; plexno < vol->plexes; plexno++) - PLEX[vol->plex[plexno]].state = plex_up; - } - plex->state = plex_up; /* bring up up, anyway */ - } else - plex->state = plex_down; - break; - - case volplex_onlyusup: /* only we are up: others are down */ - case volplex_onlyus: /* we're up and alone */ - if ((statemap == sd_upstate) /* subdisks all up */ - ||(statemap == sd_emptystate)) /* or all empty */ - plex->state = plex_up; /* go for it */ - else if ((statemap & (sd_upstate | sd_reborn)) == statemap) /* all up or reborn, */ - plex->state = plex_flaky; - else if (statemap & (sd_upstate | sd_reborn)) /* some up or reborn, */ - plex->state = plex_degraded; /* so far no corruption */ - else - plex->state = plex_faulty; - break; - - case volplex_otherup: /* another plex is up */ - case volplex_otherupdown: /* other plexes are up and down */ - { - int sdno; - struct sd *sd; - - /* Is the data in all subdisks valid? */ - /* XXX At the moment, subdisks make false - * claims about their validity. Replace this - * when they tell the truth */ - /* No: we have invalid or down subdisks */ - for (sdno = 0; sdno < plex->subdisks; sdno++) { /* look at these subdisks more carefully */ - set_sd_state(plex->sdnos[sdno], /* try to get it up */ - sd_up, - setstate_norecurse | setstate_noupdate); - sd = &SD[plex->sdnos[sdno]]; /* point to subdisk */ - /* we can make a stale subdisk up here, because - * we're in the process of bringing it up. - * This wouldn't work in set_sd_state, because - * it would allow bypassing the revive */ - if (((sd->state == sd_stale) - || (sd->state == sd_obsolete)) - && (DRIVE[sd->driveno].state == drive_up)) - sd->state = sd_up; - } - statemap = sdstatemap(plex, &sddowncount); /* get the new state map */ - /* Do we need reborn? They should now all be up */ - if (statemap == (statemap & (sd_upstate | sd_rebornstate))) { /* got something we can use */ - plex->state = plex_reviving; /* we need reviving */ - return EAGAIN; - } else - plex->state = plex_down; /* still in error */ - } - break; - - case volplex_allup: /* all plexes are up */ - case volplex_someup: - if ((statemap & (sd_upstate | sd_reborn)) == statemap) /* all up or reborn, */ - break; /* no change */ - else - plex->state = plex_degraded; /* we're not all there */ - } - - if (plex->state != oldstate) - break; - return 0; /* no change */ - - case plex_down: /* want to take it down */ - if (((vps == volplex_onlyus) /* we're the only one up */ - ||(vps == volplex_onlyusup)) /* we're the only one up */ - &&(!(flags & setstate_force))) /* and we don't want to use force */ - return 0; /* can't do it */ - plex->state = state; /* do it */ - break; - - /* This is only requested by the driver. - * Trust ourselves */ - case plex_faulty: - plex->state = state; /* do it */ - break; - - case plex_initializing: - /* XXX consider what safeguards we need here */ - if ((flags & setstate_force) == 0) - return 0; - plex->state = state; /* do it */ - break; - - /* What's this? */ - default: - return 0; - } - printf("vinum: plex %s is %s\n", plex->name, plex_state(plex->state)); - /* Now see what we have left, and whether - * we're taking the volume down */ - if (plex->volno >= 0) { /* we have a volume */ - struct volume *vol = &VOL[plex->volno]; - - vps = vpstate(plex); /* get our combined state again */ - if ((flags & setstate_norecurse) == 0) { /* we can recurse */ - if ((vol->state == volume_up) - && (vps == volplex_alldown)) /* and we're all down */ - set_volume_state(plex->volno, volume_down, setstate_recursing); /* take our volume down */ - else if ((vol->state == volume_down) - && (vps & (volplex_otherup | volplex_onlyusup))) /* and at least one is up */ - set_volume_state(plex->volno, volume_up, setstate_recursing); /* bring our volume up */ - } - } - if ((flags & (setstate_configuring | setstate_recursing)) == 0) { /* save config now */ - if (flags & setstate_noupdate) /* don't update now, */ - vinum_conf.flags |= VF_DIRTYCONFIG; /* wait until later */ - else - save_config(); /* yes: save the updated configuration */ - } - return 1; -} - -/* Update the state of a plex dependent on its plexes. - * Also rebuild the unmapped_region and defective_region table */ -int -set_volume_state(int volno, enum volumestate state, enum setstateflags flags) +/* Invalidate the subdisks belonging to a plex */ +void +invalidate_subdisks(struct plex *plex, enum sdstate state) { - int plexno; - enum plexstates { - plex_downstate = 1, /* found a plex which is down */ - plex_degradedstate = 2, /* found a plex which is halfway up */ - plex_upstate = 4 /* found a plex which is completely up */ - }; - - int plexstatemap = 0; /* note the states we find */ - struct volume *vol = &VOL[volno]; /* point to our volume */ - - if (vol->state == state) /* we're there already */ - return 0; /* no change */ - if (vol->state == volume_unallocated) /* no volume to do anything with, */ - return 0; - - for (plexno = 0; plexno < vol->plexes; plexno++) { - struct plex *plex = &PLEX[vol->plex[plexno]]; /* point to the plex */ - switch (plex->state) { - case plex_degraded: - case plex_flaky: - case plex_reviving: - plexstatemap |= plex_degradedstate; - break; + int sdno; - case plex_up: - plexstatemap |= plex_upstate; - break; + for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */ + struct sd *sd = &SD[plex->sdnos[sdno]]; - default: - plexstatemap |= plex_downstate; + switch (sd->state) { + case sd_unallocated: + case sd_uninit: + case sd_init: + case sd_initializing: + case sd_empty: + case sd_obsolete: + case sd_stale: + case sd_crashed: + case sd_down: break; - } - } - if (state == volume_up) { /* want to come up */ - if (plexstatemap & plex_upstate) { /* we have a plex which is completely up */ - vol->state = volume_up; /* did it */ - printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state)); - if ((flags & (setstate_configuring | setstate_recursing)) == 0) { /* save config now */ - if (flags & setstate_noupdate) /* don't update now, */ - vinum_conf.flags |= VF_DIRTYCONFIG; /* wait until later */ - else - save_config(); /* yes: save the updated configuration */ - } - return 1; - } - /* Here we should check whether we have enough - * coverage for the complete volume. Writeme XXX */ - } else if (state == volume_down) { /* want to go down */ - if ((vol->opencount == 0) /* not open */ - ||(flags & setstate_force != 0)) { /* or we're forcing */ - vol->state = volume_down; - printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state)); - if ((flags & (setstate_configuring | setstate_recursing)) == 0) { /* save config now */ - if (flags & setstate_noupdate) /* don't update now, */ - vinum_conf.flags |= VF_DIRTYCONFIG; /* wait until later */ - else - save_config(); /* yes: save the updated configuration */ - } - return 1; + case sd_reviving: + case sd_reborn: + case sd_up: + set_sd_state(plex->sdnos[sdno], state, setstate_force); } } - return 0; /* no change */ } /* Start an object, in other words do what we can to get it up. @@ -645,34 +670,56 @@ void start_object(struct vinum_ioctl_msg *data) { int status; - int realstatus; /* what we really have */ int objindex = data->index; /* data gets overwritten */ struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */ + enum setstateflags flags; + + if (data->force != 0) /* are we going to use force? */ + flags = setstate_force; /* yes */ + else + flags = setstate_none; /* no */ switch (data->type) { case drive_object: - status = set_drive_state(objindex, drive_up, setstate_none); - realstatus = DRIVE[objindex].state == drive_up; /* set status on whether we really did it */ + status = set_drive_state(objindex, drive_up, flags); + if (DRIVE[objindex].state != drive_up) /* set status on whether we really did it */ + ioctl_reply->error = EINVAL; + else + ioctl_reply->error = 0; break; case sd_object: - status = set_sd_state(objindex, sd_up, setstate_none); /* set state */ - realstatus = SD[objindex].state == sd_up; /* set status on whether we really did it */ - break; - - case plex_object: - if (PLEX[objindex].state == plex_reviving) { /* reviving, */ + if (SD[objindex].state == sd_reviving) { /* reviving, */ ioctl_reply->error = revive_block(objindex); /* revive another block */ ioctl_reply->msg[0] = '\0'; /* no comment */ return; } - status = set_plex_state(objindex, plex_up, setstate_none); - realstatus = PLEX[objindex].state == plex_up; /* set status on whether we really did it */ + status = set_sd_state(objindex, sd_up, flags); /* set state */ + if (status == EAGAIN) { /* first revive, */ + ioctl_reply->error = revive_block(objindex); /* revive the first block */ + ioctl_reply->error = EAGAIN; + } else { + if (SD[objindex].state != sd_up) /* set status on whether we really did it */ + ioctl_reply->error = EINVAL; + else + ioctl_reply->error = 0; + } + break; + + case plex_object: + status = set_plex_state(objindex, plex_up, flags); + if (PLEX[objindex].state != plex_up) /* set status on whether we really did it */ + ioctl_reply->error = EINVAL; + else + ioctl_reply->error = 0; break; case volume_object: - status = set_volume_state(objindex, volume_up, setstate_none); - realstatus = VOL[objindex].state == volume_up; /* set status on whether we really did it */ + status = set_volume_state(objindex, volume_up, flags); + if (VOL[objindex].state != volume_up) /* set status on whether we really did it */ + ioctl_reply->error = EINVAL; + else + ioctl_reply->error = 0; break; default: @@ -683,10 +730,6 @@ start_object(struct vinum_ioctl_msg *data) /* There's no point in saying anything here: * the userland program does it better */ ioctl_reply->msg[0] = '\0'; - if (realstatus == 0) /* couldn't do it */ - ioctl_reply->error = EINVAL; - else - ioctl_reply->error = 0; } /* Stop an object, in other words do what we can to get it down |
