summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Dyson <dyson@FreeBSD.org>1995-09-03 19:56:15 +0000
committerJohn Dyson <dyson@FreeBSD.org>1995-09-03 19:56:15 +0000
commit8c601f7da8d286a2af125f62064d3664deae07a2 (patch)
tree5127cfe6ad87c2bfe194ed162b90660e2ec56e7d
parenta50cd483d2278a4a58171f7472ed3b2c3aa90174 (diff)
Notes
-rw-r--r--sys/kern/vfs_bio.c64
-rw-r--r--sys/kern/vfs_cluster.c247
2 files changed, 205 insertions, 106 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 6e8d2018eb8b..8e8b9ad96e06 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -18,7 +18,7 @@
* 5. Modifications may be freely made to this file if the above conditions
* are met.
*
- * $Id: vfs_bio.c,v 1.59 1995/08/24 13:59:14 davidg Exp $
+ * $Id: vfs_bio.c,v 1.60 1995/08/28 09:18:53 julian Exp $
*/
/*
@@ -73,6 +73,7 @@ void vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to);
void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to);
void vfs_clean_pages(struct buf * bp);
static void vfs_setdirty(struct buf *bp);
+static __inline struct buf * gbincore(struct vnode * vp, daddr_t blkno);
int needsbuffer;
@@ -541,6 +542,29 @@ brelse(struct buf * bp)
}
/*
+ * Check to see if a block is currently memory resident.
+ */
+static __inline struct buf *
+gbincore(struct vnode * vp, daddr_t blkno)
+{
+ struct buf *bp;
+ struct bufhashhdr *bh;
+
+ bh = BUFHASH(vp, blkno);
+ bp = bh->lh_first;
+
+ /* Search hash chain */
+ while (bp != NULL) {
+ /* hit */
+ if (bp->b_vp == vp && bp->b_lblkno == blkno) {
+ break;
+ }
+ bp = bp->b_hash.le_next;
+ }
+ return (bp);
+}
+
+/*
* this routine implements clustered async writes for
* clearing out B_DELWRI buffers... This is much better
* than the old way of writing only one buffer at a time.
@@ -562,7 +586,7 @@ vfs_bio_awrite(struct buf * bp)
int maxcl = MAXPHYS / size;
for (i = 1; i < maxcl; i++) {
- if ((bpa = incore(vp, lblkno + i)) &&
+ if ((bpa = gbincore(vp, lblkno + i)) &&
((bpa->b_flags & (B_BUSY | B_DELWRI | B_CLUSTEROK | B_INVAL)) ==
(B_DELWRI | B_CLUSTEROK)) &&
(bpa->b_bufsize == size)) {
@@ -716,14 +740,12 @@ incore(struct vnode * vp, daddr_t blkno)
/* hit */
if (bp->b_vp == vp && bp->b_lblkno == blkno &&
(bp->b_flags & B_INVAL) == 0) {
- splx(s);
- return (bp);
+ break;
}
bp = bp->b_hash.le_next;
}
splx(s);
-
- return (NULL);
+ return (bp);
}
/*
@@ -838,8 +860,8 @@ getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo)
s = splbio();
loop:
- if (bp = incore(vp, blkno)) {
- if (bp->b_flags & B_BUSY) {
+ if (bp = gbincore(vp, blkno)) {
+ if (bp->b_flags & (B_BUSY|B_INVAL)) {
bp->b_flags |= B_WANTED;
if (!tsleep(bp, PRIBIO | slpflag, "getblk", slptimeo))
goto loop;
@@ -878,7 +900,7 @@ loop:
* Normally the vnode is locked so this isn't a problem.
* VBLK type I/O requests, however, don't lock the vnode.
*/
- if (!VOP_ISLOCKED(vp) && incore(vp, blkno)) {
+ if (!VOP_ISLOCKED(vp) && gbincore(vp, blkno)) {
bp->b_flags |= B_INVAL;
brelse(bp);
goto loop;
@@ -940,7 +962,7 @@ allocbuf(struct buf * bp, int size)
{
int s;
- int newbsize;
+ int newbsize, mbsize;
int i;
if (!(bp->b_flags & B_BUSY))
@@ -950,6 +972,7 @@ allocbuf(struct buf * bp, int size)
/*
* Just get anonymous memory from the kernel
*/
+ mbsize = ((size + DEV_BSIZE - 1) / DEV_BSIZE) * DEV_BSIZE;
newbsize = round_page(size);
if (newbsize < bp->b_bufsize) {
@@ -1218,8 +1241,7 @@ biodone(register struct buf * bp)
* here in the read case.
*/
if ((bp->b_flags & B_READ) && !bogusflag && resid > 0) {
- vm_page_set_valid(m, foff & (PAGE_SIZE-1), resid);
- vm_page_set_clean(m, foff & (PAGE_SIZE-1), resid);
+ vm_page_set_validclean(m, foff & (PAGE_SIZE-1), resid);
}
/*
@@ -1285,10 +1307,10 @@ count_lock_queue()
int vfs_update_interval = 30;
-static void
+void
vfs_update()
{
- (void) spl0(); /* XXX redundant? wrong place?*/
+ (void) spl0();
while (1) {
tsleep(&vfs_update_wakeup, PRIBIO, "update",
hz * vfs_update_interval);
@@ -1365,13 +1387,13 @@ vfs_busy_pages(struct buf * bp, int clear_modify)
if (resid > iocount)
resid = iocount;
- obj->paging_in_progress++;
- m->busy++;
+ if ((bp->b_flags & B_CLUSTER) == 0) {
+ obj->paging_in_progress++;
+ m->busy++;
+ }
if (clear_modify) {
vm_page_protect(m, VM_PROT_READ);
- vm_page_set_valid(m,
- foff & (PAGE_SIZE-1), resid);
- vm_page_set_clean(m,
+ vm_page_set_validclean(m,
foff & (PAGE_SIZE-1), resid);
} else if (bp->b_bcount >= PAGE_SIZE) {
if (m->valid && (bp->b_flags & B_CACHE) == 0) {
@@ -1407,9 +1429,7 @@ vfs_clean_pages(struct buf * bp)
if (resid > iocount)
resid = iocount;
if (resid > 0) {
- vm_page_set_valid(m,
- foff & (PAGE_SIZE-1), resid);
- vm_page_set_clean(m,
+ vm_page_set_validclean(m,
foff & (PAGE_SIZE-1), resid);
}
foff += resid;
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
index e6bde8c6e831..3f55ff99f626 100644
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@@ -33,7 +33,7 @@
* SUCH DAMAGE.
*
* @(#)vfs_cluster.c 8.7 (Berkeley) 2/13/94
- * $Id: vfs_cluster.c,v 1.16 1995/05/30 08:06:30 rgrimes Exp $
+ * $Id: vfs_cluster.c,v 1.17 1995/06/28 12:31:47 davidg Exp $
*/
#include <sys/param.h>
@@ -47,6 +47,8 @@
#include <sys/vmmeter.h>
#include <miscfs/specfs/specdev.h>
#include <vm/vm.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
#ifdef DEBUG
#include <vm/vm.h>
@@ -62,12 +64,13 @@ struct ctldebug debug13 = {"doreallocblks", &doreallocblks};
/*
* Local declarations
*/
-struct buf *cluster_rbuild __P((struct vnode *, u_quad_t, struct buf *,
- daddr_t, daddr_t, long, int, long));
+static struct buf *cluster_rbuild __P((struct vnode *, u_quad_t,
+ daddr_t, daddr_t, long, int));
struct cluster_save *cluster_collectbufs __P((struct vnode *, struct buf *));
int totreads;
int totreadblocks;
+extern vm_page_t bogus_page;
#ifdef DIAGNOSTIC
/*
@@ -93,6 +96,13 @@ int totreadblocks;
#endif
/*
+ * allow for three entire read-aheads... The system will
+ * adjust downwards rapidly if needed...
+ */
+#define RA_MULTIPLE_FAST 2
+#define RA_MULTIPLE_SLOW 3
+#define RA_SHIFTDOWN 1 /* approx lg2(RA_MULTIPLE) */
+/*
* This replaces bread. If this is a bread at the beginning of a file and
* lastr is 0, we assume this is the first read and we'll read up to two
* blocks if they are sequential. After that, we'll do regular read ahead
@@ -114,31 +124,35 @@ cluster_read(vp, filesize, lblkno, size, cred, bpp)
daddr_t blkno, rablkno, origlblkno;
long flags;
int error, num_ra, alreadyincore;
+ int i;
+ int seq;
- origlblkno = lblkno;
error = 0;
/*
* get the requested block
*/
+ origlblkno = lblkno;
*bpp = bp = getblk(vp, lblkno, size, 0, 0);
+ seq = ISSEQREAD(vp, lblkno);
/*
* if it is in the cache, then check to see if the reads have been
* sequential. If they have, then try some read-ahead, otherwise
* back-off on prospective read-aheads.
*/
if (bp->b_flags & B_CACHE) {
- int i;
-
- if (!ISSEQREAD(vp, origlblkno)) {
+ if (!seq) {
vp->v_maxra = bp->b_lblkno + bp->b_bcount / size;
- vp->v_ralen >>= 1;
+ vp->v_ralen >>= RA_SHIFTDOWN;
return 0;
- } else if( vp->v_maxra >= origlblkno) {
- if ((vp->v_ralen + 1) < (MAXPHYS / size))
- vp->v_ralen++;
- if ( vp->v_maxra >= (origlblkno + vp->v_ralen))
+ } else if( vp->v_maxra > lblkno) {
+ if ( (vp->v_maxra + (vp->v_ralen / RA_MULTIPLE_SLOW)) >= (lblkno + vp->v_ralen)) {
+ if ((vp->v_ralen + 1) < RA_MULTIPLE_FAST*(MAXPHYS / size))
+ ++vp->v_ralen;
return 0;
+ }
lblkno = vp->v_maxra;
+ } else {
+ lblkno += 1;
}
bp = NULL;
} else {
@@ -149,13 +163,9 @@ cluster_read(vp, filesize, lblkno, size, cred, bpp)
bp->b_flags |= B_READ;
lblkno += 1;
curproc->p_stats->p_ru.ru_inblock++; /* XXX */
+ vp->v_ralen = 0;
}
/*
- * if ralen is "none", then try a little
- */
- if (vp->v_ralen == 0)
- vp->v_ralen = 1;
- /*
* assume no read-ahead
*/
alreadyincore = 1;
@@ -164,9 +174,13 @@ cluster_read(vp, filesize, lblkno, size, cred, bpp)
/*
* if we have been doing sequential I/O, then do some read-ahead
*/
- if (ISSEQREAD(vp, origlblkno)) {
- int i;
+ if (seq) {
+ /*
+ * bump ralen a bit...
+ */
+ if ((vp->v_ralen + 1) < RA_MULTIPLE_SLOW*(MAXPHYS / size))
+ ++vp->v_ralen;
/*
* this code makes sure that the stuff that we have read-ahead
* is still in the cache. If it isn't, we have been reading
@@ -177,21 +191,19 @@ cluster_read(vp, filesize, lblkno, size, cred, bpp)
rablkno = lblkno + i;
alreadyincore = (int) incore(vp, rablkno);
if (!alreadyincore) {
+ if (inmem(vp, rablkno)) {
+ struct buf *bpt;
+ if (vp->v_maxra < rablkno)
+ vp->v_maxra = rablkno + 1;
+ continue;
+ }
if (rablkno < vp->v_maxra) {
vp->v_maxra = rablkno;
- vp->v_ralen >>= 1;
+ vp->v_ralen >>= RA_SHIFTDOWN;
alreadyincore = 1;
- } else {
- if (inmem(vp, rablkno)) {
- if( vp->v_maxra < rablkno)
- vp->v_maxra = rablkno + 1;
- continue;
- }
- if ((vp->v_ralen + 1) < MAXPHYS / size)
- vp->v_ralen++;
}
break;
- } else if( vp->v_maxra < rablkno) {
+ } else if (vp->v_maxra < rablkno) {
vp->v_maxra = rablkno + 1;
}
}
@@ -202,16 +214,14 @@ cluster_read(vp, filesize, lblkno, size, cred, bpp)
rbp = NULL;
if (!alreadyincore &&
(rablkno + 1) * size <= filesize &&
- !(error = VOP_BMAP(vp, rablkno, NULL, &blkno, &num_ra)) &&
+ !(error = VOP_BMAP(vp, rablkno, NULL, &blkno, &num_ra, NULL)) &&
blkno != -1) {
- if ((vp->v_ralen + 1) < MAXPHYS / size)
- vp->v_ralen++;
if (num_ra > vp->v_ralen)
num_ra = vp->v_ralen;
if (num_ra) {
- rbp = cluster_rbuild(vp, filesize,
- NULL, rablkno, blkno, size, num_ra, B_READ | B_ASYNC);
+ rbp = cluster_rbuild(vp, filesize, rablkno, blkno, size,
+ num_ra + 1);
} else {
rbp = getblk(vp, rablkno, size, 0, 0);
rbp->b_flags |= B_READ | B_ASYNC;
@@ -220,8 +230,7 @@ cluster_read(vp, filesize, lblkno, size, cred, bpp)
}
/*
- * if the synchronous read is a cluster, handle it, otherwise do a
- * simple, non-clustered read.
+ * handle the synchronous read
*/
if (bp) {
if (bp->b_flags & (B_DONE | B_DELWRI))
@@ -244,7 +253,8 @@ cluster_read(vp, filesize, lblkno, size, cred, bpp)
rbp->b_flags &= ~(B_ASYNC | B_READ);
brelse(rbp);
} else {
- vfs_busy_pages(rbp, 0);
+ if ((rbp->b_flags & B_CLUSTER) == 0)
+ vfs_busy_pages(rbp, 0);
(void) VOP_STRATEGY(rbp);
totreads++;
totreadblocks += rbp->b_bcount / size;
@@ -261,19 +271,17 @@ cluster_read(vp, filesize, lblkno, size, cred, bpp)
* read ahead. We will read as many blocks as possible sequentially
* and then parcel them up into logical blocks in the buffer hash table.
*/
-struct buf *
-cluster_rbuild(vp, filesize, bp, lbn, blkno, size, run, flags)
+static struct buf *
+cluster_rbuild(vp, filesize, lbn, blkno, size, run)
struct vnode *vp;
u_quad_t filesize;
- struct buf *bp;
daddr_t lbn;
daddr_t blkno;
long size;
int run;
- long flags;
{
struct cluster_save *b_save;
- struct buf *tbp;
+ struct buf *bp, *tbp;
daddr_t bn;
int i, inc, j;
@@ -284,31 +292,28 @@ cluster_rbuild(vp, filesize, bp, lbn, blkno, size, run, flags)
#endif
if (size * (lbn + run + 1) > filesize)
--run;
- if (run == 0) {
- if (!bp) {
- bp = getblk(vp, lbn, size, 0, 0);
- bp->b_blkno = blkno;
- bp->b_flags |= flags;
- }
- return (bp);
- }
- tbp = bp;
- if (!tbp) {
- tbp = getblk(vp, lbn, size, 0, 0);
- }
- if (tbp->b_flags & B_CACHE) {
- return (tbp);
- } else if (bp == NULL) {
- tbp->b_flags |= B_ASYNC;
- }
- bp = getpbuf();
- bp->b_flags = flags | B_CALL | B_BUSY | B_CLUSTER;
+
+ tbp = getblk(vp, lbn, size, 0, 0);
+ if (tbp->b_flags & B_CACHE)
+ return tbp;
+
+ tbp->b_blkno = blkno;
+ tbp->b_flags |= B_ASYNC | B_READ;
+ if( ((tbp->b_flags & B_VMIO) == 0) || (run <= 1) )
+ return tbp;
+
+ bp = trypbuf();
+ if (bp == 0)
+ return tbp;
+
+ (vm_offset_t) bp->b_data |= ((vm_offset_t) tbp->b_data) & PAGE_MASK;
+ bp->b_flags = B_ASYNC | B_READ | B_CALL | B_BUSY | B_CLUSTER | B_VMIO;
bp->b_iodone = cluster_callback;
bp->b_blkno = blkno;
bp->b_lblkno = lbn;
pbgetvp(vp, bp);
- b_save = malloc(sizeof(struct buf *) * (run + 1) + sizeof(struct cluster_save),
+ b_save = malloc(sizeof(struct buf *) * run + sizeof(struct cluster_save),
M_SEGMENT, M_WAITOK);
b_save->bs_nchildren = 0;
b_save->bs_children = (struct buf **) (b_save + 1);
@@ -318,33 +323,61 @@ cluster_rbuild(vp, filesize, bp, lbn, blkno, size, run, flags)
bp->b_bufsize = 0;
bp->b_npages = 0;
- if (tbp->b_flags & B_VMIO)
- bp->b_flags |= B_VMIO;
-
inc = btodb(size);
- for (bn = blkno, i = 0; i <= run; ++i, bn += inc) {
+ for (bn = blkno, i = 0; i < run; ++i, bn += inc) {
if (i != 0) {
+ if ((bp->b_npages * PAGE_SIZE) + size > MAXPHYS)
+ break;
+ if (incore(vp, lbn + i))
+ break;
tbp = getblk(vp, lbn + i, size, 0, 0);
+
if ((tbp->b_flags & B_CACHE) ||
- (tbp->b_flags & B_VMIO) != (bp->b_flags & B_VMIO)) {
+ (tbp->b_flags & B_VMIO) == 0) {
+ brelse(tbp);
+ break;
+ }
+
+ for (j=0;j<tbp->b_npages;j++) {
+ if (tbp->b_pages[j]->valid) {
+ break;
+ }
+ }
+
+ if (j != tbp->b_npages) {
+ brelse(tbp);
+ break;
+ }
+
+ tbp->b_flags |= B_READ | B_ASYNC;
+ if( tbp->b_blkno == tbp->b_lblkno) {
+ tbp->b_blkno = bn;
+ } else if (tbp->b_blkno != bn) {
brelse(tbp);
break;
}
- tbp->b_blkno = bn;
- tbp->b_flags |= flags | B_READ | B_ASYNC;
- } else {
- tbp->b_flags |= flags | B_READ;
}
++b_save->bs_nchildren;
b_save->bs_children[i] = tbp;
for (j = 0; j < tbp->b_npages; j += 1) {
- bp->b_pages[j + bp->b_npages] = tbp->b_pages[j];
+ vm_page_t m;
+ m = tbp->b_pages[j];
+ ++m->busy;
+ ++m->object->paging_in_progress;
+ if (m->valid == VM_PAGE_BITS_ALL) {
+ m = bogus_page;
+ }
+ if ((bp->b_npages == 0) ||
+ (bp->b_pages[bp->b_npages - 1] != m)) {
+ bp->b_pages[bp->b_npages] = m;
+ bp->b_npages++;
+ }
}
- bp->b_npages += tbp->b_npages;
- bp->b_bcount += size;
- bp->b_bufsize += size;
+ bp->b_bcount += tbp->b_bcount;
+ bp->b_bufsize += tbp->b_bufsize;
}
- pmap_qenter((vm_offset_t) bp->b_data, (vm_page_t *)bp->b_pages, bp->b_npages);
+ pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
+ (vm_page_t *)bp->b_pages, bp->b_npages);
return (bp);
}
@@ -370,7 +403,7 @@ cluster_callback(bp)
error = bp->b_error;
b_save = (struct cluster_save *) (bp->b_saveaddr);
- pmap_qremove((vm_offset_t) bp->b_data, bp->b_npages);
+ pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
/*
* Move memory from the large cluster buffer into the component
* buffers and mark IO as done on these.
@@ -429,8 +462,41 @@ cluster_write(bp, filesize)
* reallocating to make it sequential.
*/
cursize = vp->v_lastw - vp->v_cstart + 1;
- cluster_wbuild(vp, NULL, lblocksize,
- vp->v_cstart, cursize, lbn);
+ if (!doreallocblks ||
+ (lbn + 1) * lblocksize != filesize ||
+ lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) {
+ cluster_wbuild(vp, NULL, lblocksize,
+ vp->v_cstart, cursize, lbn);
+ } else {
+ struct buf **bpp, **endbp;
+ struct cluster_save *buflist;
+
+ buflist = cluster_collectbufs(vp, bp);
+ endbp = &buflist->bs_children
+ [buflist->bs_nchildren - 1];
+ if (VOP_REALLOCBLKS(vp, buflist)) {
+ /*
+ * Failed, push the previous cluster.
+ */
+ for (bpp = buflist->bs_children;
+ bpp < endbp; bpp++)
+ brelse(*bpp);
+ free(buflist, M_SEGMENT);
+ cluster_wbuild(vp, NULL, lblocksize,
+ vp->v_cstart, cursize, lbn);
+ } else {
+ /*
+ * Succeeded, keep building cluster.
+ */
+ for (bpp = buflist->bs_children;
+ bpp <= endbp; bpp++)
+ bdwrite(*bpp);
+ free(buflist, M_SEGMENT);
+ vp->v_lastw = lbn;
+ vp->v_lasta = bp->b_blkno;
+ return;
+ }
+ }
}
/*
* Consider beginning a cluster. If at end of file, make
@@ -439,8 +505,8 @@ cluster_write(bp, filesize)
*/
if ((lbn + 1) * lblocksize != filesize &&
(bp->b_blkno == bp->b_lblkno) &&
- (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen) ||
- bp->b_blkno == -1)) {
+ (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen, NULL) ||
+ bp->b_blkno == -1)) {
bawrite(bp);
vp->v_clen = 0;
vp->v_lasta = bp->b_blkno;
@@ -571,6 +637,7 @@ redo:
bp->b_blkno = tbp->b_blkno;
bp->b_lblkno = tbp->b_lblkno;
+ (vm_offset_t) bp->b_data |= ((vm_offset_t) tbp->b_data) & PAGE_MASK;
bp->b_flags |= B_CALL | B_BUSY | B_CLUSTER;
bp->b_iodone = cluster_callback;
pbgetvp(vp, bp);
@@ -592,6 +659,10 @@ redo:
if ((tbp->b_npages + bp->b_npages) > (MAXPHYS / PAGE_SIZE))
break;
+ if ( (tbp->b_blkno != tbp->b_lblkno) &&
+ ((bp->b_blkno + btodb(size) * i) != tbp->b_blkno))
+ break;
+
/*
* Get the desired block buffer (unless it is the
* final sequential block whose buffer was passed in
@@ -610,9 +681,16 @@ redo:
tbp = last_bp;
}
for (j = 0; j < tbp->b_npages; j += 1) {
- bp->b_pages[j + bp->b_npages] = tbp->b_pages[j];
+ vm_page_t m;
+ m = tbp->b_pages[j];
+ ++m->busy;
+ ++m->object->paging_in_progress;
+ if ((bp->b_npages == 0) ||
+ (bp->b_pages[bp->b_npages - 1] != m)) {
+ bp->b_pages[bp->b_npages] = m;
+ bp->b_npages++;
+ }
}
- bp->b_npages += tbp->b_npages;
bp->b_bcount += size;
bp->b_bufsize += size;
@@ -625,7 +703,8 @@ redo:
b_save->bs_children[i] = tbp;
}
b_save->bs_nchildren = i;
- pmap_qenter((vm_offset_t) bp->b_data, (vm_page_t *) bp->b_pages, bp->b_npages);
+ pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
+ (vm_page_t *) bp->b_pages, bp->b_npages);
bawrite(bp);
if (i < len) {