summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/dev/vinum/vinumrevive.c221
1 files changed, 122 insertions, 99 deletions
diff --git a/sys/dev/vinum/vinumrevive.c b/sys/dev/vinum/vinumrevive.c
index 315432c9d471..530136234839 100644
--- a/sys/dev/vinum/vinumrevive.c
+++ b/sys/dev/vinum/vinumrevive.c
@@ -37,7 +37,7 @@
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
- * $Id: vinumrevive.c,v 1.10 2000/01/03 03:40:54 grog Exp grog $
+ * $Id: vinumrevive.c,v 1.13 2000/05/10 22:43:01 grog Exp grog $
* $FreeBSD$
*/
@@ -52,6 +52,7 @@
* plex (which means a programming error if we get
* here at all; FIXME).
*/
+
int
revive_block(int sdno)
{
@@ -156,7 +157,7 @@ revive_block(int sdno)
/* Don't need that bp after all, we'll get a new one. */
bp->b_flags |= B_INVAL;
brelse(bp); /* is this kosher? */
- bp = parityrebuild(plex, plexblkno, size, 0, &lock); /* do the grunt work */
+ bp = parityrebuild(plex, plexblkno, size, rebuildparity, &lock, NULL); /* do the grunt work */
if (bp == NULL) /* no buffer space */
return ENOMEM; /* chicken out */
} else { /* data block */
@@ -182,6 +183,7 @@ revive_block(int sdno)
/* Now write to the subdisk */
{
bp->b_dev = VINUM_SD(sdno); /* create the device number */
+ bp->b_flags &= ~B_DONE; /* no longer done */
bp->b_ioflags = BIO_ORDERED; /* and make this an ordered write */
bp->b_iocmd = BIO_WRITE;
BUF_LOCKINIT(bp); /* get a lock for the buffer */
@@ -253,120 +255,128 @@ revive_block(int sdno)
* called repeatedly from userland.
*/
void
-parityops(struct vinum_ioctl_msg *data, enum parityop op)
+parityops(struct vinum_ioctl_msg *data)
{
int plexno;
struct plex *plex;
int size; /* I/O transfer size, bytes */
- int i;
int stripe; /* stripe number in plex */
int psd; /* parity subdisk number */
struct rangelock *lock; /* lock on stripe */
struct _ioctl_reply *reply;
- u_int64_t *pstripep; /* pointer to our stripe counter */
+ off_t pstripe; /* pointer to our stripe counter */
struct buf *pbp;
+ off_t errorloc; /* offset of parity error */
+ enum parityop op; /* operation to perform */
- pbp = NULL;
plexno = data->index;
+ op = data->op;
+ pbp = NULL;
reply = (struct _ioctl_reply *) data;
reply->error = EAGAIN; /* expect to repeat this call */
- reply->msg[0] = '\0';
plex = &PLEX[plexno];
if (!isparity(plex)) { /* not RAID-4 or RAID-5 */
reply->error = EINVAL;
return;
}
- if (op == rebuildparity) /* point to our counter */
- pstripep = &plex->rebuildblock;
- else
- pstripep = &plex->checkblock;
- stripe = *pstripep / plex->stripesize; /* stripe number */
+ pstripe = data->offset;
+ stripe = pstripe / plex->stripesize; /* stripe number */
psd = plex->subdisks - 1 - stripe % plex->subdisks; /* parity subdisk for this stripe */
size = min(DEFAULT_REVIVE_BLOCKSIZE, /* one block at a time */
plex->stripesize << DEV_BSHIFT);
- pbp = parityrebuild(plex, *pstripep, size, op == checkparity, &lock); /* do the grunt work */
- if (pbp == NULL) /* no buffer space */
+ pbp = parityrebuild(plex, pstripe, size, op, &lock, &errorloc); /* do the grunt work */
+ if (pbp == NULL) { /* no buffer space */
+ reply->error = ENOMEM;
return; /* chicken out */
-
+ }
/*
* Now we have a result in the data buffer of
* the parity buffer header, which we have kept.
* Decide what to do with it.
*/
+ reply->msg[0] = '\0'; /* until shown otherwise */
if ((pbp->b_ioflags & BIO_ERROR) == 0) { /* no error */
- if (op == checkparity) {
- int *parity_buf;
- int isize;
-
- parity_buf = (int *) pbp->b_data;
- isize = pbp->b_bcount / sizeof(int);
- for (i = 0; i < isize; i++) {
- if (parity_buf[i] != 0) {
- reply->error = EIO;
- sprintf(reply->msg,
- "Parity incorrect at offset 0x%lx\n",
- (u_long) (*pstripep << DEV_BSHIFT) * (plex->subdisks - 1)
- + i * sizeof(int));
- break;
- }
- }
- } else { /* rebuildparity */
+ if ((op == rebuildparity)
+ || (op == rebuildandcheckparity)) {
pbp->b_iocmd = BIO_WRITE;
pbp->b_resid = pbp->b_bcount;
BUF_LOCKINIT(pbp); /* get a lock for the buffer */
BUF_LOCK(pbp, LK_EXCLUSIVE); /* and lock it */
- sdio(pbp); /* perform the I/O */
+ sdio(pbp); /* write the parity block */
bufwait(pbp);
}
+ if (((op == checkparity)
+ || (op == rebuildandcheckparity))
+ && (errorloc != -1)) {
+ if (op == checkparity)
+ reply->error = EIO;
+ sprintf(reply->msg,
+ "Parity incorrect at offset 0x%llx\n",
+ errorloc);
+ }
if (reply->error == EAGAIN) { /* still OK, */
- *pstripep += (pbp->b_bcount >> DEV_BSHIFT); /* moved this much further down */
- if (*pstripep >= SD[plex->sdnos[0]].sectors) { /* finished */
- *pstripep = 0;
+ plex->checkblock = pstripe + (pbp->b_bcount >> DEV_BSHIFT); /* moved this much further down */
+ if (pstripe >= SD[plex->sdnos[0]].sectors) { /* finished */
+ plex->checkblock = 0;
reply->error = 0;
}
}
- if (pbp->b_ioflags & BIO_ERROR)
- reply->error = pbp->b_error;
- pbp->b_flags |= B_INVAL;
- pbp->b_ioflags &= ~BIO_ERROR;
- brelse(pbp);
-
}
+ if (pbp->b_ioflags & BIO_ERROR)
+ reply->error = pbp->b_error;
+ pbp->b_flags |= B_INVAL;
+ pbp->b_ioflags &= ~BIO_ERROR;
+ brelse(pbp);
unlockrange(plexno, lock);
}
/*
* Rebuild a parity stripe. Return pointer to
- * parity bp. On return, the band is locked. The
- * caller must unlock the band and release the
- * buffer header.
+ * parity bp. On return,
+ *
+ * 1. The band is locked. The caller must unlock
+ * the band and release the buffer header.
+ *
+ * 2. All buffer headers except php have been
+ * released. The caller must release pbp.
+ *
+ * 3. For checkparity and rebuildandcheckparity,
+ * the parity is compared with the current
+ * parity block. If it's different, the
+ * offset of the error is returned to
+ * errorloc. The caller can set the value of
+ * the pointer to NULL if this is called for
+ * rebuilding parity.
*/
struct buf *
parityrebuild(struct plex *plex,
u_int64_t pstripe,
int size,
- int check, /* 1 if only checking */
- struct rangelock **lockp)
+ enum parityop op,
+ struct rangelock **lockp,
+ off_t * errorloc)
{
int error;
int s;
int sdno;
int stripe; /* stripe number */
- int *parity_buf; /* the address supplied by geteblk */
+ int *parity_buf; /* buffer address for current parity block */
+ int *newparity_buf; /* and for new parity block */
int mysize; /* I/O transfer size for this transfer */
int isize; /* mysize in ints */
int i;
int psd; /* parity subdisk number */
+ int newpsd; /* and "subdisk number" of new parity */
struct buf **bpp; /* pointers to our bps */
struct buf *pbp; /* buffer header for parity stripe */
int *sbuf;
+ int bufcount; /* number of buffers we need */
stripe = pstripe / plex->stripesize; /* stripe number */
psd = plex->subdisks - 1 - stripe % plex->subdisks; /* parity subdisk for this stripe */
parity_buf = NULL; /* to keep the compiler happy */
error = 0;
- pbp = NULL;
/*
* It's possible that the default transfer size
@@ -380,38 +390,45 @@ parityrebuild(struct plex *plex,
*/
mysize = min(size, (plex->stripesize * (stripe + 1) - pstripe) << DEV_BSHIFT);
isize = mysize / (sizeof(int)); /* number of ints in the buffer */
-
- bpp = (struct buf **) Malloc(plex->subdisks * sizeof(struct buf *)); /* array of pointers to bps */
-
- /* First, issue requests for all subdisks in parallel */
- for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */
- /* Get a buffer header and initialize it. */
- s = splbio();
- bpp[sdno] = geteblk(mysize); /* Get a buffer */
- if (bpp[sdno] == NULL) {
- while (sdno-- > 0) { /* release the ones we got */
- bpp[sdno]->b_flags |= B_INVAL;
- brelse(bpp[sdno]); /* give back our resources */
+ bufcount = plex->subdisks + 1; /* sd buffers plus result buffer */
+ newpsd = plex->subdisks;
+ bpp = (struct buf **) Malloc(bufcount * sizeof(struct buf *)); /* array of pointers to bps */
+
+ /* First, build requests for all subdisks */
+ for (sdno = 0; sdno < bufcount; sdno++) { /* for each subdisk */
+ if ((sdno != psd) || (op != rebuildparity)) {
+ /* Get a buffer header and initialize it. */
+ s = splbio();
+ bpp[sdno] = geteblk(mysize); /* Get a buffer */
+ if (bpp[sdno] == NULL) {
+ while (sdno-- > 0) { /* release the ones we got */
+ bpp[sdno]->b_flags |= B_INVAL;
+ brelse(bpp[sdno]); /* give back our resources */
+ }
+ splx(s);
+ printf("vinum: can't allocate buffer space for parity op.\n");
+ return NULL; /* no bpps */
}
splx(s);
- printf("vinum: can't allocate buffer space\n");
- return NULL; /* no bpps */
+ if (sdno == psd)
+ parity_buf = (int *) bpp[sdno]->b_data;
+ if (sdno == newpsd) /* the new one? */
+ bpp[sdno]->b_dev = VINUM_SD(psd); /* write back to the parity SD */
+ else
+ bpp[sdno]->b_dev = VINUM_SD(plex->sdnos[sdno]); /* device number */
+ bpp[sdno]->b_iocmd = BIO_READ; /* either way, read it */
+ bpp[sdno]->b_flags = 0;
+ bpp[sdno]->b_bcount = bpp[sdno]->b_bufsize;
+ bpp[sdno]->b_resid = bpp[sdno]->b_bcount;
+ bpp[sdno]->b_blkno = pstripe; /* transfer from here */
}
- splx(s);
- if (sdno == psd) {
- pbp = bpp[sdno];
- parity_buf = (int *) bpp[sdno]->b_data;
- if (!check)
- bzero(parity_buf, mysize);
- }
- bpp[sdno]->b_dev = VINUM_SD(plex->sdnos[sdno]); /* device number */
- bpp[sdno]->b_iocmd = BIO_READ; /* either way, read it */
- bpp[sdno]->b_flags = 0;
- bpp[sdno]->b_bcount = bpp[sdno]->b_bufsize;
- bpp[sdno]->b_resid = bpp[sdno]->b_bcount;
- bpp[sdno]->b_blkno = pstripe; /* read from here */
}
+ /* Initialize result buffer */
+ pbp = bpp[newpsd];
+ newparity_buf = (int *) bpp[newpsd]->b_data;
+ bzero(newparity_buf, mysize);
+
/*
* Now lock the stripe with the first non-parity
* bp as locking bp.
@@ -423,11 +440,11 @@ parityrebuild(struct plex *plex,
/*
* Then issue requests for all subdisks in
* parallel. Don't transfer the parity stripe
- * if we're rebuilding parity. We have already
- * initialized it to 0.
+ * if we're rebuilding parity, unless we also
+ * want to check it.
*/
- for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */
- if ((sdno != psd) || check) {
+ for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each real subdisk */
+ if ((sdno != psd) || (op != rebuildparity)) {
BUF_LOCKINIT(bpp[sdno]); /* get a lock for the buffer */
BUF_LOCK(bpp[sdno], LK_EXCLUSIVE); /* and lock it */
sdio(bpp[sdno]);
@@ -443,35 +460,41 @@ parityrebuild(struct plex *plex,
* delay is minimal.
*/
for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */
- if ((sdno != psd) || check) {
+ if ((sdno != psd) || (op != rebuildparity)) {
bufwait(bpp[sdno]);
if (bpp[sdno]->b_ioflags & BIO_ERROR) /* can't read, */
error = bpp[sdno]->b_error;
+ else if (sdno != psd) { /* update parity */
+ sbuf = (int *) bpp[sdno]->b_data;
+ for (i = 0; i < isize; i++)
+ ((int *) newparity_buf)[i] ^= sbuf[i]; /* xor in the buffer */
+ }
+ }
+ if (sdno != psd) { /* release all bps except parity */
+ bpp[sdno]->b_flags |= B_INVAL;
+ brelse(bpp[sdno]); /* give back our resources */
}
}
/*
- * Finally, do the xors. To save time, we do
- * the XOR wordwise. This requires sectors to
- * be a multiple of the length of an int, which
- * is currently always the case. We do this in
- * a separate loop to avoid a race condition
- * with the parity stripe.
+ * If we're checking, compare the calculated
+ * and the read parity block. If they're
+ * different, return the plex-relative offset;
+ * otherwise return -1.
*/
- for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */
- if ((sdno != psd) || check) {
- if (error == 0) { /* still OK, */
- sbuf = (int *) bpp[sdno]->b_data;
- for (i = 0; i < isize; i++)
- ((int *) parity_buf)[i] ^= sbuf[i]; /* xor in the buffer */
- }
- if (sdno != psd) { /* release all bps except parity */
- bpp[sdno]->b_flags |= B_INVAL;
- brelse(bpp[sdno]); /* give back our resources */
+ if ((op == checkparity)
+ || (op == rebuildandcheckparity)) {
+ *errorloc = -1; /* no error yet */
+ for (i = 0; i < isize; i++) {
+ if (parity_buf[i] != newparity_buf[i]) {
+ *errorloc = (u_long) (pstripe << DEV_BSHIFT) * (plex->subdisks - 1)
+ + i * sizeof(int);
+ break;
}
}
+ bpp[psd]->b_flags |= B_INVAL;
+ brelse(bpp[psd]); /* give back our resources */
}
-
/* release our resources */
Free(bpp);
if (error) {
@@ -483,7 +506,7 @@ parityrebuild(struct plex *plex,
/*
* Initialize a subdisk by writing zeroes to the
- * complete address space. If check is set,
+ * complete address space. If verify is set,
* check each transfer for correctness.
*
* Each call to this function writes (and maybe