diff options
| -rw-r--r-- | sys/dev/vinum/vinumrevive.c | 221 |
1 files changed, 122 insertions, 99 deletions
diff --git a/sys/dev/vinum/vinumrevive.c b/sys/dev/vinum/vinumrevive.c index 315432c9d471..530136234839 100644 --- a/sys/dev/vinum/vinumrevive.c +++ b/sys/dev/vinum/vinumrevive.c @@ -37,7 +37,7 @@ * otherwise) arising in any way out of the use of this software, even if * advised of the possibility of such damage. * - * $Id: vinumrevive.c,v 1.10 2000/01/03 03:40:54 grog Exp grog $ + * $Id: vinumrevive.c,v 1.13 2000/05/10 22:43:01 grog Exp grog $ * $FreeBSD$ */ @@ -52,6 +52,7 @@ * plex (which means a programming error if we get * here at all; FIXME). */ + int revive_block(int sdno) { @@ -156,7 +157,7 @@ revive_block(int sdno) /* Don't need that bp after all, we'll get a new one. */ bp->b_flags |= B_INVAL; brelse(bp); /* is this kosher? */ - bp = parityrebuild(plex, plexblkno, size, 0, &lock); /* do the grunt work */ + bp = parityrebuild(plex, plexblkno, size, rebuildparity, &lock, NULL); /* do the grunt work */ if (bp == NULL) /* no buffer space */ return ENOMEM; /* chicken out */ } else { /* data block */ @@ -182,6 +183,7 @@ revive_block(int sdno) /* Now write to the subdisk */ { bp->b_dev = VINUM_SD(sdno); /* create the device number */ + bp->b_flags &= ~B_DONE; /* no longer done */ bp->b_ioflags = BIO_ORDERED; /* and make this an ordered write */ bp->b_iocmd = BIO_WRITE; BUF_LOCKINIT(bp); /* get a lock for the buffer */ @@ -253,120 +255,128 @@ revive_block(int sdno) * called repeatedly from userland. */ void -parityops(struct vinum_ioctl_msg *data, enum parityop op) +parityops(struct vinum_ioctl_msg *data) { int plexno; struct plex *plex; int size; /* I/O transfer size, bytes */ - int i; int stripe; /* stripe number in plex */ int psd; /* parity subdisk number */ struct rangelock *lock; /* lock on stripe */ struct _ioctl_reply *reply; - u_int64_t *pstripep; /* pointer to our stripe counter */ + off_t pstripe; /* pointer to our stripe counter */ struct buf *pbp; + off_t errorloc; /* offset of parity error */ + enum parityop op; /* operation to perform */ - pbp = NULL; plexno = data->index; + op = data->op; + pbp = NULL; reply = (struct _ioctl_reply *) data; reply->error = EAGAIN; /* expect to repeat this call */ - reply->msg[0] = '\0'; plex = &PLEX[plexno]; if (!isparity(plex)) { /* not RAID-4 or RAID-5 */ reply->error = EINVAL; return; } - if (op == rebuildparity) /* point to our counter */ - pstripep = &plex->rebuildblock; - else - pstripep = &plex->checkblock; - stripe = *pstripep / plex->stripesize; /* stripe number */ + pstripe = data->offset; + stripe = pstripe / plex->stripesize; /* stripe number */ psd = plex->subdisks - 1 - stripe % plex->subdisks; /* parity subdisk for this stripe */ size = min(DEFAULT_REVIVE_BLOCKSIZE, /* one block at a time */ plex->stripesize << DEV_BSHIFT); - pbp = parityrebuild(plex, *pstripep, size, op == checkparity, &lock); /* do the grunt work */ - if (pbp == NULL) /* no buffer space */ + pbp = parityrebuild(plex, pstripe, size, op, &lock, &errorloc); /* do the grunt work */ + if (pbp == NULL) { /* no buffer space */ + reply->error = ENOMEM; return; /* chicken out */ - + } /* * Now we have a result in the data buffer of * the parity buffer header, which we have kept. * Decide what to do with it. */ + reply->msg[0] = '\0'; /* until shown otherwise */ if ((pbp->b_ioflags & BIO_ERROR) == 0) { /* no error */ - if (op == checkparity) { - int *parity_buf; - int isize; - - parity_buf = (int *) pbp->b_data; - isize = pbp->b_bcount / sizeof(int); - for (i = 0; i < isize; i++) { - if (parity_buf[i] != 0) { - reply->error = EIO; - sprintf(reply->msg, - "Parity incorrect at offset 0x%lx\n", - (u_long) (*pstripep << DEV_BSHIFT) * (plex->subdisks - 1) - + i * sizeof(int)); - break; - } - } - } else { /* rebuildparity */ + if ((op == rebuildparity) + || (op == rebuildandcheckparity)) { pbp->b_iocmd = BIO_WRITE; pbp->b_resid = pbp->b_bcount; BUF_LOCKINIT(pbp); /* get a lock for the buffer */ BUF_LOCK(pbp, LK_EXCLUSIVE); /* and lock it */ - sdio(pbp); /* perform the I/O */ + sdio(pbp); /* write the parity block */ bufwait(pbp); } + if (((op == checkparity) + || (op == rebuildandcheckparity)) + && (errorloc != -1)) { + if (op == checkparity) + reply->error = EIO; + sprintf(reply->msg, + "Parity incorrect at offset 0x%llx\n", + errorloc); + } if (reply->error == EAGAIN) { /* still OK, */ - *pstripep += (pbp->b_bcount >> DEV_BSHIFT); /* moved this much further down */ - if (*pstripep >= SD[plex->sdnos[0]].sectors) { /* finished */ - *pstripep = 0; + plex->checkblock = pstripe + (pbp->b_bcount >> DEV_BSHIFT); /* moved this much further down */ + if (pstripe >= SD[plex->sdnos[0]].sectors) { /* finished */ + plex->checkblock = 0; reply->error = 0; } } - if (pbp->b_ioflags & BIO_ERROR) - reply->error = pbp->b_error; - pbp->b_flags |= B_INVAL; - pbp->b_ioflags &= ~BIO_ERROR; - brelse(pbp); - } + if (pbp->b_ioflags & BIO_ERROR) + reply->error = pbp->b_error; + pbp->b_flags |= B_INVAL; + pbp->b_ioflags &= ~BIO_ERROR; + brelse(pbp); unlockrange(plexno, lock); } /* * Rebuild a parity stripe. Return pointer to - * parity bp. On return, the band is locked. The - * caller must unlock the band and release the - * buffer header. + * parity bp. On return, + * + * 1. The band is locked. The caller must unlock + * the band and release the buffer header. + * + * 2. All buffer headers except php have been + * released. The caller must release pbp. + * + * 3. For checkparity and rebuildandcheckparity, + * the parity is compared with the current + * parity block. If it's different, the + * offset of the error is returned to + * errorloc. The caller can set the value of + * the pointer to NULL if this is called for + * rebuilding parity. */ struct buf * parityrebuild(struct plex *plex, u_int64_t pstripe, int size, - int check, /* 1 if only checking */ - struct rangelock **lockp) + enum parityop op, + struct rangelock **lockp, + off_t * errorloc) { int error; int s; int sdno; int stripe; /* stripe number */ - int *parity_buf; /* the address supplied by geteblk */ + int *parity_buf; /* buffer address for current parity block */ + int *newparity_buf; /* and for new parity block */ int mysize; /* I/O transfer size for this transfer */ int isize; /* mysize in ints */ int i; int psd; /* parity subdisk number */ + int newpsd; /* and "subdisk number" of new parity */ struct buf **bpp; /* pointers to our bps */ struct buf *pbp; /* buffer header for parity stripe */ int *sbuf; + int bufcount; /* number of buffers we need */ stripe = pstripe / plex->stripesize; /* stripe number */ psd = plex->subdisks - 1 - stripe % plex->subdisks; /* parity subdisk for this stripe */ parity_buf = NULL; /* to keep the compiler happy */ error = 0; - pbp = NULL; /* * It's possible that the default transfer size @@ -380,38 +390,45 @@ parityrebuild(struct plex *plex, */ mysize = min(size, (plex->stripesize * (stripe + 1) - pstripe) << DEV_BSHIFT); isize = mysize / (sizeof(int)); /* number of ints in the buffer */ - - bpp = (struct buf **) Malloc(plex->subdisks * sizeof(struct buf *)); /* array of pointers to bps */ - - /* First, issue requests for all subdisks in parallel */ - for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */ - /* Get a buffer header and initialize it. */ - s = splbio(); - bpp[sdno] = geteblk(mysize); /* Get a buffer */ - if (bpp[sdno] == NULL) { - while (sdno-- > 0) { /* release the ones we got */ - bpp[sdno]->b_flags |= B_INVAL; - brelse(bpp[sdno]); /* give back our resources */ + bufcount = plex->subdisks + 1; /* sd buffers plus result buffer */ + newpsd = plex->subdisks; + bpp = (struct buf **) Malloc(bufcount * sizeof(struct buf *)); /* array of pointers to bps */ + + /* First, build requests for all subdisks */ + for (sdno = 0; sdno < bufcount; sdno++) { /* for each subdisk */ + if ((sdno != psd) || (op != rebuildparity)) { + /* Get a buffer header and initialize it. */ + s = splbio(); + bpp[sdno] = geteblk(mysize); /* Get a buffer */ + if (bpp[sdno] == NULL) { + while (sdno-- > 0) { /* release the ones we got */ + bpp[sdno]->b_flags |= B_INVAL; + brelse(bpp[sdno]); /* give back our resources */ + } + splx(s); + printf("vinum: can't allocate buffer space for parity op.\n"); + return NULL; /* no bpps */ } splx(s); - printf("vinum: can't allocate buffer space\n"); - return NULL; /* no bpps */ + if (sdno == psd) + parity_buf = (int *) bpp[sdno]->b_data; + if (sdno == newpsd) /* the new one? */ + bpp[sdno]->b_dev = VINUM_SD(psd); /* write back to the parity SD */ + else + bpp[sdno]->b_dev = VINUM_SD(plex->sdnos[sdno]); /* device number */ + bpp[sdno]->b_iocmd = BIO_READ; /* either way, read it */ + bpp[sdno]->b_flags = 0; + bpp[sdno]->b_bcount = bpp[sdno]->b_bufsize; + bpp[sdno]->b_resid = bpp[sdno]->b_bcount; + bpp[sdno]->b_blkno = pstripe; /* transfer from here */ } - splx(s); - if (sdno == psd) { - pbp = bpp[sdno]; - parity_buf = (int *) bpp[sdno]->b_data; - if (!check) - bzero(parity_buf, mysize); - } - bpp[sdno]->b_dev = VINUM_SD(plex->sdnos[sdno]); /* device number */ - bpp[sdno]->b_iocmd = BIO_READ; /* either way, read it */ - bpp[sdno]->b_flags = 0; - bpp[sdno]->b_bcount = bpp[sdno]->b_bufsize; - bpp[sdno]->b_resid = bpp[sdno]->b_bcount; - bpp[sdno]->b_blkno = pstripe; /* read from here */ } + /* Initialize result buffer */ + pbp = bpp[newpsd]; + newparity_buf = (int *) bpp[newpsd]->b_data; + bzero(newparity_buf, mysize); + /* * Now lock the stripe with the first non-parity * bp as locking bp. @@ -423,11 +440,11 @@ parityrebuild(struct plex *plex, /* * Then issue requests for all subdisks in * parallel. Don't transfer the parity stripe - * if we're rebuilding parity. We have already - * initialized it to 0. + * if we're rebuilding parity, unless we also + * want to check it. */ - for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */ - if ((sdno != psd) || check) { + for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each real subdisk */ + if ((sdno != psd) || (op != rebuildparity)) { BUF_LOCKINIT(bpp[sdno]); /* get a lock for the buffer */ BUF_LOCK(bpp[sdno], LK_EXCLUSIVE); /* and lock it */ sdio(bpp[sdno]); @@ -443,35 +460,41 @@ parityrebuild(struct plex *plex, * delay is minimal. */ for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */ - if ((sdno != psd) || check) { + if ((sdno != psd) || (op != rebuildparity)) { bufwait(bpp[sdno]); if (bpp[sdno]->b_ioflags & BIO_ERROR) /* can't read, */ error = bpp[sdno]->b_error; + else if (sdno != psd) { /* update parity */ + sbuf = (int *) bpp[sdno]->b_data; + for (i = 0; i < isize; i++) + ((int *) newparity_buf)[i] ^= sbuf[i]; /* xor in the buffer */ + } + } + if (sdno != psd) { /* release all bps except parity */ + bpp[sdno]->b_flags |= B_INVAL; + brelse(bpp[sdno]); /* give back our resources */ } } /* - * Finally, do the xors. To save time, we do - * the XOR wordwise. This requires sectors to - * be a multiple of the length of an int, which - * is currently always the case. We do this in - * a separate loop to avoid a race condition - * with the parity stripe. + * If we're checking, compare the calculated + * and the read parity block. If they're + * different, return the plex-relative offset; + * otherwise return -1. */ - for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */ - if ((sdno != psd) || check) { - if (error == 0) { /* still OK, */ - sbuf = (int *) bpp[sdno]->b_data; - for (i = 0; i < isize; i++) - ((int *) parity_buf)[i] ^= sbuf[i]; /* xor in the buffer */ - } - if (sdno != psd) { /* release all bps except parity */ - bpp[sdno]->b_flags |= B_INVAL; - brelse(bpp[sdno]); /* give back our resources */ + if ((op == checkparity) + || (op == rebuildandcheckparity)) { + *errorloc = -1; /* no error yet */ + for (i = 0; i < isize; i++) { + if (parity_buf[i] != newparity_buf[i]) { + *errorloc = (u_long) (pstripe << DEV_BSHIFT) * (plex->subdisks - 1) + + i * sizeof(int); + break; } } + bpp[psd]->b_flags |= B_INVAL; + brelse(bpp[psd]); /* give back our resources */ } - /* release our resources */ Free(bpp); if (error) { @@ -483,7 +506,7 @@ parityrebuild(struct plex *plex, /* * Initialize a subdisk by writing zeroes to the - * complete address space. If check is set, + * complete address space. If verify is set, * check each transfer for correctness. * * Each call to this function writes (and maybe |
