summaryrefslogtreecommitdiff
path: root/sys/kern/vfs_bio.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/kern/vfs_bio.c')
-rw-r--r--sys/kern/vfs_bio.c1036
1 files changed, 660 insertions, 376 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index a01230bfd1cd..76f14b2e61ed 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -11,7 +11,7 @@
* 2. Absolutely no warranty of function or purpose is made by the author
* John S. Dyson.
*
- * $Id: vfs_bio.c,v 1.200 1999/03/02 20:26:39 julian Exp $
+ * $Id: vfs_bio.c,v 1.201 1999/03/02 21:23:38 julian Exp $
*/
/*
@@ -83,8 +83,7 @@ static void vfs_clean_pages(struct buf * bp);
static void vfs_setdirty(struct buf *bp);
static void vfs_vmio_release(struct buf *bp);
static void flushdirtybuffers(int slpflag, int slptimeo);
-
-int needsbuffer;
+static int flushbufqueues(void);
/*
* Internal update daemon, process 3
@@ -92,11 +91,6 @@ int needsbuffer;
*/
int vfs_update_wakeup;
-
-/*
- * buffers base kva
- */
-
/*
* bogus page -- for I/O to/from partially complete buffers
* this is a temporary solution to the problem, but it is not
@@ -105,12 +99,13 @@ int vfs_update_wakeup;
* but the code is intricate enough already.
*/
vm_page_t bogus_page;
+int runningbufspace;
static vm_offset_t bogus_offset;
static int bufspace, maxbufspace, vmiospace, maxvmiobufspace,
- bufmallocspace, maxbufmallocspace;
-int numdirtybuffers;
-static int lodirtybuffers, hidirtybuffers;
+ bufmallocspace, maxbufmallocspace, hibufspace;
+static int needsbuffer;
+static int numdirtybuffers, lodirtybuffers, hidirtybuffers;
static int numfreebuffers, lofreebuffers, hifreebuffers;
static int kvafreespace;
@@ -126,8 +121,12 @@ SYSCTL_INT(_vfs, OID_AUTO, lofreebuffers, CTLFLAG_RW,
&lofreebuffers, 0, "");
SYSCTL_INT(_vfs, OID_AUTO, hifreebuffers, CTLFLAG_RW,
&hifreebuffers, 0, "");
+SYSCTL_INT(_vfs, OID_AUTO, runningbufspace, CTLFLAG_RD,
+ &runningbufspace, 0, "");
SYSCTL_INT(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RW,
&maxbufspace, 0, "");
+SYSCTL_INT(_vfs, OID_AUTO, hibufspace, CTLFLAG_RD,
+ &hibufspace, 0, "");
SYSCTL_INT(_vfs, OID_AUTO, bufspace, CTLFLAG_RD,
&bufspace, 0, "");
SYSCTL_INT(_vfs, OID_AUTO, maxvmiobufspace, CTLFLAG_RW,
@@ -146,11 +145,81 @@ struct bqueues bufqueues[BUFFER_QUEUES] = { { 0 } };
extern int vm_swap_size;
-#define BUF_MAXUSE 24
+#define BUF_MAXUSE 24
+
+#define VFS_BIO_NEED_ANY 0x01 /* any freeable buffer */
+#define VFS_BIO_NEED_RESERVED02 0x02 /* unused */
+#define VFS_BIO_NEED_FREE 0x04 /* wait for free bufs, hi hysteresis */
+#define VFS_BIO_NEED_BUFSPACE 0x08 /* wait for buf space, lo hysteresis */
+#define VFS_BIO_NEED_KVASPACE 0x10 /* wait for buffer_map space, emerg */
-#define VFS_BIO_NEED_ANY 1
-#define VFS_BIO_NEED_LOWLIMIT 2
-#define VFS_BIO_NEED_FREE 4
+/*
+ * kvaspacewakeup:
+ *
+ * Called when kva space is potential available for recovery or when
+ * kva space is recovered in the buffer_map. This function wakes up
+ * anyone waiting for buffer_map kva space. Even though the buffer_map
+ * is larger then maxbufspace, this situation will typically occur
+ * when the buffer_map gets fragmented.
+ */
+
+static __inline void
+kvaspacewakeup(void)
+{
+ /*
+ * If someone is waiting for KVA space, wake them up. Even
+ * though we haven't freed the kva space yet, the waiting
+ * process will be able to now.
+ */
+ if (needsbuffer & VFS_BIO_NEED_KVASPACE) {
+ needsbuffer &= ~VFS_BIO_NEED_KVASPACE;
+ wakeup(&needsbuffer);
+ }
+}
+
+/*
+ * bufspacewakeup:
+ *
+ * Called when buffer space is potentially available for recovery or when
+ * buffer space is recovered. getnewbuf() will block on this flag when
+ * it is unable to free sufficient buffer space. Buffer space becomes
+ * recoverable when bp's get placed back in the queues.
+ */
+
+static __inline void
+bufspacewakeup(void)
+{
+ /*
+ * If someone is waiting for BUF space, wake them up. Even
+ * though we haven't freed the kva space yet, the waiting
+ * process will be able to now.
+ */
+ if (needsbuffer & VFS_BIO_NEED_BUFSPACE) {
+ needsbuffer &= ~VFS_BIO_NEED_BUFSPACE;
+ wakeup(&needsbuffer);
+ }
+}
+
+/*
+ * bufcountwakeup:
+ *
+ * Called when a buffer has been added to one of the free queues to
+ * account for the buffer and to wakeup anyone waiting for free buffers.
+ * This typically occurs when large amounts of metadata are being handled
+ * by the buffer cache ( else buffer space runs out first, usually ).
+ */
+
+static __inline void
+bufcountwakeup(void)
+{
+ ++numfreebuffers;
+ if (needsbuffer) {
+ needsbuffer &= ~VFS_BIO_NEED_ANY;
+ if (numfreebuffers >= hifreebuffers)
+ needsbuffer &= ~VFS_BIO_NEED_FREE;
+ wakeup(&needsbuffer);
+ }
+}
/*
* Initialize buffer headers and related structures.
@@ -186,17 +255,25 @@ bufinit()
TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
LIST_INSERT_HEAD(&invalhash, bp, b_hash);
}
-/*
- * maxbufspace is currently calculated to support all filesystem blocks
- * to be 8K. If you happen to use a 16K filesystem, the size of the buffer
- * cache is still the same as it would be for 8K filesystems. This
- * keeps the size of the buffer cache "in check" for big block filesystems.
- */
+
+ /*
+ * maxbufspace is currently calculated to support all filesystem
+ * blocks to be 8K. If you happen to use a 16K filesystem, the size
+ * of the buffer cache is still the same as it would be for 8K
+ * filesystems. This keeps the size of the buffer cache "in check"
+ * for big block filesystems.
+ *
+ * maxbufspace is calculated as around 50% of the KVA available in
+ * the buffer_map ( DFLTSIZE vs BKVASIZE ), I presume to reduce the
+ * effect of fragmentation.
+ */
maxbufspace = (nbuf + 8) * DFLTBSIZE;
+ if ((hibufspace = maxbufspace - MAXBSIZE * 5) <= MAXBSIZE)
+ hibufspace = 3 * maxbufspace / 4;
/*
* reserve 1/3 of the buffers for metadata (VDIR) which might not be VMIO'ed
*/
- maxvmiobufspace = 2 * maxbufspace / 3;
+ maxvmiobufspace = 2 * hibufspace / 3;
/*
* Limit the amount of malloc memory since it is wired permanently into
* the kernel space. Even though this is accounted for in the buffer
@@ -204,18 +281,24 @@ bufinit()
* The malloc scheme improves memory utilization significantly on average
* (small) directories.
*/
- maxbufmallocspace = maxbufspace / 20;
+ maxbufmallocspace = hibufspace / 20;
/*
- * Remove the probability of deadlock conditions by limiting the
- * number of dirty buffers.
+ * Reduce the chance of a deadlock occuring by limiting the number
+ * of delayed-write dirty buffers we allow to stack up.
*/
- hidirtybuffers = nbuf / 8 + 20;
lodirtybuffers = nbuf / 16 + 10;
+ hidirtybuffers = nbuf / 8 + 20;
numdirtybuffers = 0;
+
+/*
+ * Try to keep the number of free buffers in the specified range,
+ * and give the syncer access to an emergency reserve.
+ */
lofreebuffers = nbuf / 18 + 5;
hifreebuffers = 2 * lofreebuffers;
numfreebuffers = nbuf;
+
kvafreespace = 0;
bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
@@ -233,24 +316,26 @@ bufinit()
static void
bfreekva(struct buf * bp)
{
- if (bp->b_kvasize == 0)
- return;
-
- vm_map_delete(buffer_map,
- (vm_offset_t) bp->b_kvabase,
- (vm_offset_t) bp->b_kvabase + bp->b_kvasize);
-
- bp->b_kvasize = 0;
-
+ if (bp->b_kvasize) {
+ vm_map_delete(buffer_map,
+ (vm_offset_t) bp->b_kvabase,
+ (vm_offset_t) bp->b_kvabase + bp->b_kvasize
+ );
+ bp->b_kvasize = 0;
+ kvaspacewakeup();
+ }
}
/*
- * remove the buffer from the appropriate free list
+ * bremfree:
+ *
+ * Remove the buffer from the appropriate free list.
*/
void
bremfree(struct buf * bp)
{
int s = splbio();
+ int old_qindex = bp->b_qindex;
if (bp->b_qindex != QUEUE_NONE) {
if (bp->b_qindex == QUEUE_EMPTY) {
@@ -258,14 +343,29 @@ bremfree(struct buf * bp)
}
TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist);
bp->b_qindex = QUEUE_NONE;
+ runningbufspace += bp->b_bufsize;
} else {
#if !defined(MAX_PERF)
panic("bremfree: removing a buffer when not on a queue");
#endif
}
- if ((bp->b_flags & B_INVAL) ||
- (bp->b_flags & (B_DELWRI|B_LOCKED)) == 0)
- --numfreebuffers;
+
+ /*
+ * Fixup numfreebuffers count. If the buffer is invalid or not
+ * delayed-write, and it was on the EMPTY, LRU, or AGE queues,
+ * the buffer was free and we must decrement numfreebuffers.
+ */
+ if ((bp->b_flags & B_INVAL) || (bp->b_flags & B_DELWRI) == 0) {
+ switch(old_qindex) {
+ case QUEUE_EMPTY:
+ case QUEUE_LRU:
+ case QUEUE_AGE:
+ --numfreebuffers;
+ break;
+ default:
+ break;
+ }
+ }
splx(s);
}
@@ -286,6 +386,7 @@ bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred,
if ((bp->b_flags & B_CACHE) == 0) {
if (curproc != NULL)
curproc->p_stats->p_ru.ru_inblock++;
+ KASSERT(!(bp->b_flags & B_ASYNC), ("bread: illegal async bp %p", bp));
bp->b_flags |= B_READ;
bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
if (bp->b_rcred == NOCRED) {
@@ -330,6 +431,7 @@ breadn(struct vnode * vp, daddr_t blkno, int size,
VOP_STRATEGY(vp, bp);
++readwait;
}
+
for (i = 0; i < cnt; i++, rablkno++, rabsize++) {
if (inmem(vp, *rablkno))
continue;
@@ -369,7 +471,6 @@ bwrite(struct buf * bp)
struct vnode *vp;
struct mount *mp;
-
if (bp->b_flags & B_INVAL) {
brelse(bp);
return (0);
@@ -381,16 +482,12 @@ bwrite(struct buf * bp)
if ((bp->b_flags & B_BUSY) == 0)
panic("bwrite: buffer is not busy???");
#endif
+ s = splbio();
+ bundirty(bp);
- bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
+ bp->b_flags &= ~(B_READ | B_DONE | B_ERROR);
bp->b_flags |= B_WRITEINPROG;
- s = splbio();
- if ((oldflags & B_DELWRI) == B_DELWRI) {
- --numdirtybuffers;
- reassignbuf(bp, bp->b_vp);
- }
-
bp->b_vp->v_numoutput++;
vfs_busy_pages(bp, 1);
if (curproc != NULL)
@@ -420,23 +517,8 @@ bwrite(struct buf * bp)
brelse(bp);
return (rtval);
}
- return (0);
-}
-void
-vfs_bio_need_satisfy(void) {
- ++numfreebuffers;
- if (!needsbuffer)
- return;
- if (numdirtybuffers < lodirtybuffers) {
- needsbuffer &= ~(VFS_BIO_NEED_ANY | VFS_BIO_NEED_LOWLIMIT);
- } else {
- needsbuffer &= ~VFS_BIO_NEED_ANY;
- }
- if (numfreebuffers >= hifreebuffers) {
- needsbuffer &= ~VFS_BIO_NEED_FREE;
- }
- wakeup(&needsbuffer);
+ return (0);
}
/*
@@ -457,12 +539,7 @@ bdwrite(struct buf * bp)
brelse(bp);
return;
}
- bp->b_flags &= ~(B_READ|B_RELBUF);
- if ((bp->b_flags & B_DELWRI) == 0) {
- bp->b_flags |= B_DONE | B_DELWRI;
- reassignbuf(bp, bp->b_vp);
- ++numdirtybuffers;
- }
+ bdirty(bp);
/*
* This bmap keeps the system from needing to do the bmap later,
@@ -506,32 +583,68 @@ bdwrite(struct buf * bp)
if (numdirtybuffers >= hidirtybuffers)
flushdirtybuffers(0, 0);
-
- return;
}
-
/*
- * Same as first half of bdwrite, mark buffer dirty, but do not release it.
- * Check how this compares with vfs_setdirty(); XXX [JRE]
+ * bdirty:
+ *
+ * Turn buffer into delayed write request. We must clear B_READ and
+ * B_RELBUF, and we must set B_DELWRI. We reassign the buffer to
+ * itself to properly update it in the dirty/clean lists. We mark it
+ * B_DONE to ensure that any asynchronization of the buffer properly
+ * clears B_DONE ( else a panic will occur later ). Note that B_INVALID
+ * buffers are not considered dirty even if B_DELWRI is set.
+ *
+ * Since the buffer is not on a queue, we do not update the numfreebuffers
+ * count.
+ *
+ * Must be called at splbio().
+ * The buffer must be on QUEUE_NONE.
*/
void
bdirty(bp)
- struct buf *bp;
+ struct buf *bp;
{
-
- bp->b_flags &= ~(B_READ|B_RELBUF); /* XXX ??? check this */
+ KASSERT(bp->b_qindex == QUEUE_NONE, ("bdirty: buffer %p still on queue %d", bp, bp->b_qindex));
+ bp->b_flags &= ~(B_READ|B_RELBUF);
+
if ((bp->b_flags & B_DELWRI) == 0) {
- bp->b_flags |= B_DONE | B_DELWRI; /* why done? XXX JRE */
+ bp->b_flags |= B_DONE | B_DELWRI;
reassignbuf(bp, bp->b_vp);
++numdirtybuffers;
}
}
/*
- * Asynchronous write.
- * Start output on a buffer, but do not wait for it to complete.
- * The buffer is released when the output completes.
+ * bundirty:
+ *
+ * Clear B_DELWRI for buffer.
+ *
+ * Since the buffer is not on a queue, we do not update the numfreebuffers
+ * count.
+ *
+ * Must be called at splbio().
+ * The buffer must be on QUEUE_NONE.
+ */
+
+void
+bundirty(bp)
+ struct buf *bp;
+{
+ KASSERT(bp->b_qindex == QUEUE_NONE, ("bundirty: buffer %p still on queue %d", bp, bp->b_qindex));
+
+ if (bp->b_flags & B_DELWRI) {
+ bp->b_flags &= ~B_DELWRI;
+ reassignbuf(bp, bp->b_vp);
+ --numdirtybuffers;
+ }
+}
+
+/*
+ * bawrite:
+ *
+ * Asynchronous write. Start output on a buffer, but do not wait for
+ * it to complete. The buffer is released when the output completes.
*/
void
bawrite(struct buf * bp)
@@ -541,39 +654,42 @@ bawrite(struct buf * bp)
}
/*
- * Ordered write.
- * Start output on a buffer, and flag it so that the device will write
- * it in the order it was queued. The buffer is released when the output
- * completes.
+ * bowrite:
+ *
+ * Ordered write. Start output on a buffer, and flag it so that the
+ * device will write it in the order it was queued. The buffer is
+ * released when the output completes.
*/
int
bowrite(struct buf * bp)
{
- bp->b_flags |= B_ORDERED|B_ASYNC;
+ bp->b_flags |= B_ORDERED | B_ASYNC;
return (VOP_BWRITE(bp));
}
/*
- * Release a buffer.
+ * brelse:
+ *
+ * Release a busy buffer and, if requested, free its resources. The
+ * buffer will be stashed in the appropriate bufqueue[] allowing it
+ * to be accessed later as a cache entity or reused for other purposes.
*/
void
brelse(struct buf * bp)
{
int s;
+ KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
+
+#if 0
if (bp->b_flags & B_CLUSTER) {
relpbuf(bp, NULL);
return;
}
+#endif
s = splbio();
- /* anyone need this block? */
- if (bp->b_flags & B_WANTED) {
- bp->b_flags &= ~(B_WANTED | B_AGE);
- wakeup(bp);
- }
-
if (bp->b_flags & B_LOCKED)
bp->b_flags &= ~B_ERROR;
@@ -717,8 +833,8 @@ brelse(struct buf * bp)
if (bp->b_qindex != QUEUE_NONE)
panic("brelse: free buffer onto another queue???");
#endif
-
/* enqueue */
+
/* buffers with no memory */
if (bp->b_bufsize == 0) {
bp->b_flags |= B_INVAL;
@@ -728,7 +844,8 @@ brelse(struct buf * bp)
LIST_INSERT_HEAD(&invalhash, bp, b_hash);
bp->b_dev = NODEV;
kvafreespace += bp->b_kvasize;
-
+ if (bp->b_kvasize)
+ kvaspacewakeup();
/* buffers with junk contents */
} else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE | B_RELBUF)) {
bp->b_flags |= B_INVAL;
@@ -754,15 +871,38 @@ brelse(struct buf * bp)
TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
}
- if ((bp->b_flags & B_INVAL) ||
- (bp->b_flags & (B_LOCKED|B_DELWRI)) == 0) {
- if (bp->b_flags & B_DELWRI) {
- --numdirtybuffers;
- bp->b_flags &= ~B_DELWRI;
- }
- vfs_bio_need_satisfy();
+ /*
+ * If B_INVAL, clear B_DELWRI.
+ */
+ if ((bp->b_flags & (B_INVAL|B_DELWRI)) == (B_INVAL|B_DELWRI)) {
+ bp->b_flags &= ~B_DELWRI;
+ --numdirtybuffers;
}
+ runningbufspace -= bp->b_bufsize;
+
+ /*
+ * Fixup numfreebuffers count. The bp is on an appropriate queue
+ * unless locked. We then bump numfreebuffers if it is not B_DELWRI.
+ * We've already handled the B_INVAL case ( B_DELWRI will be clear
+ * if B_INVAL is set ).
+ */
+
+ if ((bp->b_flags & B_LOCKED) == 0 && !(bp->b_flags & B_DELWRI))
+ bufcountwakeup();
+
+ /*
+ * Something we can maybe free.
+ */
+
+ if (bp->b_bufsize)
+ bufspacewakeup();
+
+ if (bp->b_flags & B_WANTED) {
+ bp->b_flags &= ~(B_WANTED | B_AGE);
+ wakeup(bp);
+ }
+
/* unlock */
bp->b_flags &= ~(B_ORDERED | B_WANTED | B_BUSY |
B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
@@ -770,7 +910,8 @@ brelse(struct buf * bp)
}
/*
- * Release a buffer.
+ * Release a buffer back to the appropriate queue but do not try to free
+ * it.
*/
void
bqrelse(struct buf * bp)
@@ -779,17 +920,12 @@ bqrelse(struct buf * bp)
s = splbio();
- /* anyone need this block? */
- if (bp->b_flags & B_WANTED) {
- bp->b_flags &= ~(B_WANTED | B_AGE);
- wakeup(bp);
- }
+ KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("bqrelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
#if !defined(MAX_PERF)
if (bp->b_qindex != QUEUE_NONE)
panic("bqrelse: free buffer onto another queue???");
#endif
-
if (bp->b_flags & B_LOCKED) {
bp->b_flags &= ~B_ERROR;
bp->b_qindex = QUEUE_LOCKED;
@@ -800,10 +936,26 @@ bqrelse(struct buf * bp)
TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
}
- if ((bp->b_flags & (B_LOCKED|B_DELWRI)) == 0) {
- vfs_bio_need_satisfy();
+ runningbufspace -= bp->b_bufsize;
+
+ if ((bp->b_flags & B_LOCKED) == 0 &&
+ ((bp->b_flags & B_INVAL) || !(bp->b_flags & B_DELWRI))
+ ) {
+ bufcountwakeup();
}
+ /*
+ * Something we can maybe wakeup
+ */
+ if (bp->b_bufsize)
+ bufspacewakeup();
+
+ /* anyone need this block? */
+ if (bp->b_flags & B_WANTED) {
+ bp->b_flags &= ~(B_WANTED | B_AGE);
+ wakeup(bp);
+ }
+
/* unlock */
bp->b_flags &= ~(B_ORDERED | B_WANTED | B_BUSY |
B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
@@ -847,10 +999,13 @@ vfs_vmio_release(bp)
}
}
}
- splx(s);
bufspace -= bp->b_bufsize;
vmiospace -= bp->b_bufsize;
+ runningbufspace -= bp->b_bufsize;
+ splx(s);
pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
+ if (bp->b_bufsize)
+ bufspacewakeup();
bp->b_npages = 0;
bp->b_bufsize = 0;
bp->b_flags &= ~B_VMIO;
@@ -902,7 +1057,8 @@ vfs_bio_awrite(struct buf * bp)
s = splbio();
/*
- * right now we support clustered writing only to regular files
+ * right now we support clustered writing only to regular files, and
+ * then only if our I/O system is not saturated.
*/
if ((vp->v_type == VREG) &&
(vp->v_mount != 0) && /* Only on nodes that have the size info */
@@ -943,279 +1099,358 @@ vfs_bio_awrite(struct buf * bp)
*/
nwritten = bp->b_bufsize;
(void) VOP_BWRITE(bp);
+
return nwritten;
}
-
/*
- * Find a buffer header which is available for use.
+ * getnewbuf:
+ *
+ * Find and initialize a new buffer header, freeing up existing buffers
+ * in the bufqueues as necessary.
+ *
+ * We block if:
+ * We have insufficient buffer headers
+ * We have insufficient buffer space
+ * buffer_map is too fragmented ( space reservation fails )
+ *
+ * We do *not* attempt to flush dirty buffers more then one level deep.
+ * I.e., if P_FLSINPROG is set we do not flush dirty buffers at all.
+ *
+ * If P_FLSINPROG is set, we are allowed to dip into our emergency
+ * reserve.
*/
static struct buf *
getnewbuf(struct vnode *vp, daddr_t blkno,
int slpflag, int slptimeo, int size, int maxsize)
{
- struct buf *bp, *bp1;
- int nbyteswritten = 0;
- vm_offset_t addr;
- static int writerecursion = 0;
-
-start:
- if (bufspace >= maxbufspace)
- goto trytofreespace;
+ struct buf *bp;
+ struct buf *nbp;
+ int outofspace;
+ int nqindex;
+ int defrag = 0;
+ int countawrites = 0;
+
+restart:
- /* can we constitute a new buffer? */
- if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]))) {
-#if !defined(MAX_PERF)
- if (bp->b_qindex != QUEUE_EMPTY)
- panic("getnewbuf: inconsistent EMPTY queue, qindex=%d",
- bp->b_qindex);
-#endif
- bp->b_flags |= B_BUSY;
- bremfree(bp);
- goto fillbuf;
- }
-trytofreespace:
/*
- * We keep the file I/O from hogging metadata I/O
- * This is desirable because file data is cached in the
- * VM/Buffer cache even if a buffer is freed.
+ * Setup for scan. If we do not have enough free buffers,
+ * we setup a degenerate case that falls through the while.
+ *
+ * If we are in the middle of a flush, we can dip into the
+ * emergency reserve.
*/
- if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_AGE]))) {
-#if !defined(MAX_PERF)
- if (bp->b_qindex != QUEUE_AGE)
- panic("getnewbuf: inconsistent AGE queue, qindex=%d",
- bp->b_qindex);
-#endif
- } else if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_LRU]))) {
-#if !defined(MAX_PERF)
- if (bp->b_qindex != QUEUE_LRU)
- panic("getnewbuf: inconsistent LRU queue, qindex=%d",
- bp->b_qindex);
-#endif
- }
- if (!bp) {
- /* wait for a free buffer of any kind */
- needsbuffer |= VFS_BIO_NEED_ANY;
- do
- tsleep(&needsbuffer, (PRIBIO + 4) | slpflag, "newbuf",
- slptimeo);
- while (needsbuffer & VFS_BIO_NEED_ANY);
- return (0);
+
+ if ((curproc->p_flag & P_FLSINPROG) == 0 &&
+ numfreebuffers < lofreebuffers
+ ) {
+ nqindex = QUEUE_LRU;
+ nbp = NULL;
+ } else {
+ nqindex = QUEUE_EMPTY;
+ if ((nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY])) == NULL) {
+ nqindex = QUEUE_AGE;
+ nbp = TAILQ_FIRST(&bufqueues[QUEUE_AGE]);
+ if (nbp == NULL) {
+ nqindex = QUEUE_LRU;
+ nbp = TAILQ_FIRST(&bufqueues[QUEUE_LRU]);
+ }
+ }
}
- KASSERT(!(bp->b_flags & B_BUSY),
- ("getnewbuf: busy buffer on free list\n"));
+
/*
- * We are fairly aggressive about freeing VMIO buffers, but since
- * the buffering is intact without buffer headers, there is not
- * much loss. We gain by maintaining non-VMIOed metadata in buffers.
+ * Calculate whether we are out of buffer space. This state is
+ * recalculated on every restart. If we are out of space, we
+ * have to turn off defragmentation. The outofspace code will
+ * defragment too, but the looping conditionals will be messed up
+ * if both outofspace and defrag are on.
*/
- if ((bp->b_qindex == QUEUE_LRU) && (bp->b_usecount > 0)) {
- if ((bp->b_flags & B_VMIO) == 0 ||
- (vmiospace < maxvmiobufspace)) {
- --bp->b_usecount;
- TAILQ_REMOVE(&bufqueues[QUEUE_LRU], bp, b_freelist);
- if (TAILQ_FIRST(&bufqueues[QUEUE_LRU]) != NULL) {
- TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
- goto start;
- }
- TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
+
+ outofspace = 0;
+ if (bufspace >= hibufspace) {
+ if ((curproc->p_flag & P_FLSINPROG) == 0 ||
+ bufspace >= maxbufspace
+ ) {
+ outofspace = 1;
+ defrag = 0;
}
}
+ /*
+ * defrag state is semi-persistant. 1 means we are flagged for
+ * defragging. -1 means we actually defragged something.
+ */
+ /* nop */
- /* if we are a delayed write, convert to an async write */
- if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) {
+ /*
+ * Run scan, possibly freeing data and/or kva mappings on the fly
+ * depending.
+ */
+ while ((bp = nbp) != NULL) {
+ int qindex = nqindex;
/*
- * If our delayed write is likely to be used soon, then
- * recycle back onto the LRU queue.
+ * Calculate next bp ( we can only use it if we do not block
+ * or do other fancy things ).
*/
- if (vp && (bp->b_vp == vp) && (bp->b_qindex == QUEUE_LRU) &&
- (bp->b_lblkno >= blkno) && (maxsize > 0)) {
+ if ((nbp = TAILQ_NEXT(bp, b_freelist)) == NULL) {
+ switch(qindex) {
+ case QUEUE_EMPTY:
+ nqindex = QUEUE_AGE;
+ if ((nbp = TAILQ_FIRST(&bufqueues[QUEUE_AGE])))
+ break;
+ /* fall through */
+ case QUEUE_AGE:
+ nqindex = QUEUE_LRU;
+ if ((nbp = TAILQ_FIRST(&bufqueues[QUEUE_LRU])))
+ break;
+ /* fall through */
+ case QUEUE_LRU:
+ /*
+ * nbp is NULL.
+ */
+ break;
+ }
+ }
- if (bp->b_usecount > 0) {
- if (bp->b_lblkno < blkno + (MAXPHYS / maxsize)) {
+ /*
+ * Sanity Checks
+ */
+ KASSERT(!(bp->b_flags & B_BUSY), ("getnewbuf: busy buffer %p on free list", bp));
+ KASSERT(bp->b_qindex == qindex, ("getnewbuf: inconsistant queue %d bp %p", qindex, bp));
- TAILQ_REMOVE(&bufqueues[QUEUE_LRU], bp, b_freelist);
+ /*
+ * Here we try to move NON VMIO buffers to the end of the
+ * LRU queue in order to make VMIO buffers more readily
+ * freeable. We also try to move buffers with a positive
+ * usecount to the end.
+ *
+ * Note that by moving the bp to the end, we setup a following
+ * loop. Since we continue to decrement b_usecount this
+ * is ok and, in fact, desireable.
+ *
+ * If we are at the end of the list, we move ourself to the
+ * same place and need to fixup nbp and nqindex to handle
+ * the following case.
+ */
- if (TAILQ_FIRST(&bufqueues[QUEUE_LRU]) != NULL) {
- TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
- bp->b_usecount--;
- goto start;
- }
- TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
+ if ((qindex == QUEUE_LRU) && bp->b_usecount > 0) {
+ if ((bp->b_flags & B_VMIO) == 0 ||
+ (vmiospace < maxvmiobufspace)
+ ) {
+ --bp->b_usecount;
+ TAILQ_REMOVE(&bufqueues[QUEUE_LRU], bp, b_freelist);
+ TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
+ if (nbp == NULL) {
+ nqindex = qindex;
+ nbp = bp;
}
+ continue;
}
}
/*
- * Certain layered filesystems can recursively re-enter the vfs_bio
- * code, due to delayed writes. This helps keep the system from
- * deadlocking.
+ * If we come across a delayed write and numdirtybuffers should
+ * be flushed, try to write it out. Only if P_FLSINPROG is
+ * not set. We can't afford to recursively stack more then
+ * one deep due to the possibility of having deep VFS call
+ * stacks.
+ *
+ * Limit the number of dirty buffers we are willing to try
+ * to recover since it really isn't our job here.
*/
- if (writerecursion > 0) {
- if (writerecursion > 5) {
- bp = TAILQ_FIRST(&bufqueues[QUEUE_AGE]);
- while (bp) {
- if ((bp->b_flags & B_DELWRI) == 0)
- break;
- bp = TAILQ_NEXT(bp, b_freelist);
- }
- if (bp == NULL) {
- bp = TAILQ_FIRST(&bufqueues[QUEUE_LRU]);
- while (bp) {
- if ((bp->b_flags & B_DELWRI) == 0)
- break;
- bp = TAILQ_NEXT(bp, b_freelist);
- }
- }
- if (bp == NULL)
- panic("getnewbuf: cannot get buffer, infinite recursion failure");
- } else {
- bremfree(bp);
- bp->b_flags |= B_BUSY | B_AGE | B_ASYNC;
- nbyteswritten += bp->b_bufsize;
- ++writerecursion;
- VOP_BWRITE(bp);
- --writerecursion;
- if (!slpflag && !slptimeo) {
- return (0);
- }
- goto start;
+ if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) {
+ if ((curproc->p_flag & P_FLSINPROG) ||
+ numdirtybuffers < hidirtybuffers ||
+ countawrites > 16
+ ) {
+ continue;
}
- } else {
- ++writerecursion;
- nbyteswritten += vfs_bio_awrite(bp);
- --writerecursion;
- if (!slpflag && !slptimeo) {
- return (0);
+ curproc->p_flag |= P_FLSINPROG;
+ vfs_bio_awrite(bp);
+ curproc->p_flag &= ~P_FLSINPROG;
+ ++countawrites;
+ goto restart;
+ }
+
+ if (defrag > 0 && bp->b_kvasize == 0)
+ continue;
+ if (outofspace > 0 && bp->b_bufsize == 0)
+ continue;
+
+ /*
+ * Start freeing the bp. This is somewhat involved. nbp
+ * remains valid only for QUEUE_EMPTY bp's.
+ */
+
+ bremfree(bp);
+ bp->b_flags |= B_BUSY;
+
+ if (qindex == QUEUE_LRU || qindex == QUEUE_AGE) {
+ if (bp->b_flags & B_VMIO) {
+ bp->b_flags &= ~B_ASYNC;
+ vfs_vmio_release(bp);
}
- goto start;
+ if (bp->b_vp)
+ brelvp(bp);
}
- }
- if (bp->b_flags & B_WANTED) {
- bp->b_flags &= ~B_WANTED;
- wakeup(bp);
- }
- bremfree(bp);
- bp->b_flags |= B_BUSY;
+ if (bp->b_flags & B_WANTED) {
+ bp->b_flags &= ~B_WANTED;
+ wakeup(bp);
+ }
- if (bp->b_flags & B_VMIO) {
- bp->b_flags &= ~B_ASYNC;
- vfs_vmio_release(bp);
- }
+ /*
+ * NOTE: nbp is now entirely invalid. We can only restart
+ * the scan from this point on.
+ *
+ * Get the rest of the buffer freed up. b_kva* is still
+ * valid after this operation.
+ */
- if (bp->b_vp)
- brelvp(bp);
+ if (bp->b_rcred != NOCRED) {
+ crfree(bp->b_rcred);
+ bp->b_rcred = NOCRED;
+ }
+ if (bp->b_wcred != NOCRED) {
+ crfree(bp->b_wcred);
+ bp->b_wcred = NOCRED;
+ }
+ if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate)
+ (*bioops.io_deallocate)(bp);
-fillbuf:
+ LIST_REMOVE(bp, b_hash);
+ LIST_INSERT_HEAD(&invalhash, bp, b_hash);
- /* we are not free, nor do we contain interesting data */
- if (bp->b_rcred != NOCRED) {
- crfree(bp->b_rcred);
- bp->b_rcred = NOCRED;
- }
- if (bp->b_wcred != NOCRED) {
- crfree(bp->b_wcred);
- bp->b_wcred = NOCRED;
- }
- if (LIST_FIRST(&bp->b_dep) != NULL &&
- bioops.io_deallocate)
- (*bioops.io_deallocate)(bp);
-
- LIST_REMOVE(bp, b_hash);
- LIST_INSERT_HEAD(&invalhash, bp, b_hash);
- if (bp->b_bufsize) {
- allocbuf(bp, 0);
- }
- bp->b_flags = B_BUSY;
- bp->b_dev = NODEV;
- bp->b_vp = NULL;
- bp->b_blkno = bp->b_lblkno = 0;
- bp->b_offset = NOOFFSET;
- bp->b_iodone = 0;
- bp->b_error = 0;
- bp->b_resid = 0;
- bp->b_bcount = 0;
- bp->b_npages = 0;
- bp->b_dirtyoff = bp->b_dirtyend = 0;
- bp->b_validoff = bp->b_validend = 0;
- bp->b_usecount = 5;
- /* Here, not kern_physio.c, is where this should be done*/
- LIST_INIT(&bp->b_dep);
+ if (bp->b_bufsize)
+ allocbuf(bp, 0);
- maxsize = (maxsize + PAGE_MASK) & ~PAGE_MASK;
+ bp->b_flags = B_BUSY;
+ bp->b_dev = NODEV;
+ bp->b_vp = NULL;
+ bp->b_blkno = bp->b_lblkno = 0;
+ bp->b_offset = NOOFFSET;
+ bp->b_iodone = 0;
+ bp->b_error = 0;
+ bp->b_resid = 0;
+ bp->b_bcount = 0;
+ bp->b_npages = 0;
+ bp->b_dirtyoff = bp->b_dirtyend = 0;
+ bp->b_validoff = bp->b_validend = 0;
+ bp->b_usecount = 5;
+
+ LIST_INIT(&bp->b_dep);
- /*
- * we assume that buffer_map is not at address 0
- */
- addr = 0;
- if (maxsize != bp->b_kvasize) {
- bfreekva(bp);
-
-findkvaspace:
/*
- * See if we have buffer kva space
+ * Ok, now that we have a free buffer, if we are defragging
+ * we have to recover the kvaspace.
*/
- if (vm_map_findspace(buffer_map,
- vm_map_min(buffer_map), maxsize, &addr)) {
- if (kvafreespace > 0) {
- int totfree = 0, freed;
- do {
- freed = 0;
- for (bp1 = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]);
- bp1 != NULL; bp1 = TAILQ_NEXT(bp1, b_freelist)) {
- if (bp1->b_kvasize != 0) {
- totfree += bp1->b_kvasize;
- freed = bp1->b_kvasize;
- bremfree(bp1);
- bfreekva(bp1);
- brelse(bp1);
- break;
- }
- }
- } while (freed);
- /*
- * if we found free space, then retry with the same buffer.
- */
- if (totfree)
- goto findkvaspace;
- }
+
+ if (defrag > 0) {
+ defrag = -1;
bp->b_flags |= B_INVAL;
+ bfreekva(bp);
brelse(bp);
- goto trytofreespace;
+ goto restart;
}
- }
- /*
- * See if we are below are allocated minimum
- */
- if (bufspace >= (maxbufspace + nbyteswritten)) {
- bp->b_flags |= B_INVAL;
- brelse(bp);
- goto trytofreespace;
+ if (outofspace > 0) {
+ outofspace = -1;
+ bp->b_flags |= B_INVAL;
+ bfreekva(bp);
+ brelse(bp);
+ goto restart;
+ }
+
+ /*
+ * We are done
+ */
+ break;
}
/*
- * create a map entry for the buffer -- in essence
- * reserving the kva space.
+ * If we exhausted our list, sleep as appropriate.
*/
- if (addr) {
- vm_map_insert(buffer_map, NULL, 0,
- addr, addr + maxsize,
- VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
- bp->b_kvabase = (caddr_t) addr;
- bp->b_kvasize = maxsize;
+ if (bp == NULL) {
+ int flags;
+
+dosleep:
+ if (defrag > 0)
+ flags = VFS_BIO_NEED_KVASPACE;
+ else if (outofspace > 0)
+ flags = VFS_BIO_NEED_BUFSPACE;
+ else
+ flags = VFS_BIO_NEED_ANY;
+
+ if (rushjob < syncdelay / 2)
+ ++rushjob;
+ needsbuffer |= flags;
+ while (needsbuffer & flags) {
+ tsleep(
+ &needsbuffer,
+ (PRIBIO + 4) | slpflag,
+ "newbuf",
+ slptimeo
+ );
+ }
+ } else {
+ /*
+ * We finally have a valid bp. We aren't quite out of the
+ * woods, we still have to reserve kva space.
+ */
+ vm_offset_t addr = 0;
+
+ maxsize = (maxsize + PAGE_MASK) & ~PAGE_MASK;
+
+ if (maxsize != bp->b_kvasize) {
+ bfreekva(bp);
+
+ if (vm_map_findspace(buffer_map,
+ vm_map_min(buffer_map), maxsize, &addr)
+ ) {
+ /*
+ * Uh oh. Buffer map is to fragmented. Try
+ * to defragment.
+ */
+ if (defrag <= 0) {
+ defrag = 1;
+ bp->b_flags |= B_INVAL;
+ brelse(bp);
+ goto restart;
+ }
+ /*
+ * Uh oh. We couldn't seem to defragment
+ */
+ bp = NULL;
+ goto dosleep;
+ }
+ }
+ if (addr) {
+ vm_map_insert(buffer_map, NULL, 0,
+ addr, addr + maxsize,
+ VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
+
+ bp->b_kvabase = (caddr_t) addr;
+ bp->b_kvasize = maxsize;
+ bp->b_data = bp->b_kvabase;
+ }
}
- bp->b_data = bp->b_kvabase;
return (bp);
}
+/*
+ * waitfreebuffers:
+ *
+ * Wait for sufficient free buffers. This routine is not called if
+ * curproc is the update process so we do not have to do anything
+ * fancy.
+ */
+
static void
-waitfreebuffers(int slpflag, int slptimeo) {
+waitfreebuffers(int slpflag, int slptimeo)
+{
while (numfreebuffers < hifreebuffers) {
flushdirtybuffers(slpflag, slptimeo);
if (numfreebuffers < hifreebuffers)
@@ -1226,48 +1461,80 @@ waitfreebuffers(int slpflag, int slptimeo) {
}
}
+/*
+ * flushdirtybuffers:
+ *
+ * This routine is called when we get too many dirty buffers.
+ *
+ * We have to protect ourselves from recursion, but we also do not want
+ * other process's flushdirtybuffers() to interfere with the syncer if
+ * it decides to flushdirtybuffers().
+ *
+ * In order to maximize operations, we allow any process to flush
+ * dirty buffers and use P_FLSINPROG to prevent recursion.
+ */
+
static void
-flushdirtybuffers(int slpflag, int slptimeo) {
+flushdirtybuffers(int slpflag, int slptimeo)
+{
int s;
- static pid_t flushing = 0;
s = splbio();
- if (flushing) {
- if (flushing == curproc->p_pid) {
- splx(s);
- return;
- }
- while (flushing) {
- if (tsleep(&flushing, (PRIBIO + 4)|slpflag, "biofls", slptimeo)) {
- splx(s);
- return;
- }
- }
+ if (curproc->p_flag & P_FLSINPROG) {
+ splx(s);
+ return;
}
- flushing = curproc->p_pid;
+ curproc->p_flag |= P_FLSINPROG;
while (numdirtybuffers > lodirtybuffers) {
- struct buf *bp;
- needsbuffer |= VFS_BIO_NEED_LOWLIMIT;
- bp = TAILQ_FIRST(&bufqueues[QUEUE_AGE]);
- if (bp == NULL)
- bp = TAILQ_FIRST(&bufqueues[QUEUE_LRU]);
-
- while (bp && ((bp->b_flags & B_DELWRI) == 0)) {
- bp = TAILQ_NEXT(bp, b_freelist);
+ if (flushbufqueues() == 0)
+ break;
+ }
+
+ curproc->p_flag &= ~P_FLSINPROG;
+
+ splx(s);
+}
+
+static int
+flushbufqueues(void)
+{
+ struct buf *bp;
+ int qindex;
+ int r = 0;
+
+ qindex = QUEUE_AGE;
+ bp = TAILQ_FIRST(&bufqueues[QUEUE_AGE]);
+
+ for (;;) {
+ if (bp == NULL) {
+ if (qindex == QUEUE_LRU)
+ break;
+ qindex = QUEUE_LRU;
+ if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_LRU])) == NULL)
+ break;
}
- if (bp) {
- vfs_bio_awrite(bp);
- continue;
+ /*
+ * XXX NFS does weird things with B_INVAL bps if we bwrite
+ * them ( vfs_bio_awrite/bawrite/bdwrite/etc ) Why?
+ *
+ */
+ if ((bp->b_flags & B_DELWRI) != 0) {
+ if (bp->b_flags & B_INVAL) {
+ bremfree(bp);
+ bp->b_flags |= B_BUSY;
+ brelse(bp);
+ } else {
+ vfs_bio_awrite(bp);
+ }
+ ++r;
+ break;
}
- break;
+ bp = TAILQ_NEXT(bp, b_freelist);
}
-
- flushing = 0;
- wakeup(&flushing);
- splx(s);
+ return(r);
}
/*
@@ -1335,21 +1602,29 @@ inmem(struct vnode * vp, daddr_t blkno)
* code, and used by the nfs read code.
*/
static void
-vfs_setdirty(struct buf *bp) {
+vfs_setdirty(struct buf *bp)
+{
int i;
vm_object_t object;
vm_offset_t boffset;
-#if 0
- vm_offset_t offset;
-#endif
/*
* We qualify the scan for modified pages on whether the
* object has been flushed yet. The OBJ_WRITEABLE flag
* is not cleared simply by protecting pages off.
*/
- if ((bp->b_flags & B_VMIO) &&
- ((object = bp->b_pages[0]->object)->flags & (OBJ_WRITEABLE|OBJ_CLEANING))) {
+
+ if ((bp->b_flags & B_VMIO) == 0)
+ return;
+
+ object = bp->b_pages[0]->object;
+
+ if ((object->flags & OBJ_WRITEABLE) && !(object->flags & OBJ_MIGHTBEDIRTY))
+ printf("Warning: object %p writeable but not mightbedirty\n", object);
+ if (!(object->flags & OBJ_WRITEABLE) && (object->flags & OBJ_MIGHTBEDIRTY))
+ printf("Warning: object %p mightbedirty but not writeable\n", object);
+
+ if (object->flags & (OBJ_MIGHTBEDIRTY|OBJ_CLEANING)) {
/*
* test the pages to see if they have been modified directly
* by users through the VM system.
@@ -1410,7 +1685,15 @@ getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo)
s = splbio();
loop:
- if (numfreebuffers < lofreebuffers) {
+ /*
+ * Block if we are low on buffers. The syncer is allowed more
+ * buffers in order to avoid a deadlock.
+ */
+ if (curproc == updateproc && numfreebuffers == 0) {
+ needsbuffer |= VFS_BIO_NEED_ANY;
+ tsleep(&needsbuffer, (PRIBIO + 4) | slpflag, "newbuf",
+ slptimeo);
+ } else if (curproc != updateproc && numfreebuffers < lofreebuffers) {
waitfreebuffers(slpflag, slptimeo);
}
@@ -1655,6 +1938,9 @@ allocbuf(struct buf *bp, int size)
free(bp->b_data, M_BIOBUF);
bufspace -= bp->b_bufsize;
bufmallocspace -= bp->b_bufsize;
+ runningbufspace -= bp->b_bufsize;
+ if (bp->b_bufsize)
+ bufspacewakeup();
bp->b_data = bp->b_kvabase;
bp->b_bufsize = 0;
bp->b_bcount = 0;
@@ -1683,6 +1969,7 @@ allocbuf(struct buf *bp, int size)
bp->b_flags |= B_MALLOC;
bufspace += mbsize;
bufmallocspace += mbsize;
+ runningbufspace += bp->b_bufsize;
return 1;
}
#endif
@@ -1699,6 +1986,9 @@ allocbuf(struct buf *bp, int size)
bp->b_data = bp->b_kvabase;
bufspace -= bp->b_bufsize;
bufmallocspace -= bp->b_bufsize;
+ runningbufspace -= bp->b_bufsize;
+ if (bp->b_bufsize)
+ bufspacewakeup();
bp->b_bufsize = 0;
bp->b_flags &= ~B_MALLOC;
newbsize = round_page(newbsize);
@@ -1862,6 +2152,9 @@ allocbuf(struct buf *bp, int size)
if (bp->b_flags & B_VMIO)
vmiospace += (newbsize - bp->b_bufsize);
bufspace += (newbsize - bp->b_bufsize);
+ runningbufspace += (newbsize - bp->b_bufsize);
+ if (newbsize < bp->b_bufsize)
+ bufspacewakeup();
bp->b_bufsize = newbsize;
bp->b_bcount = size;
return 1;
@@ -1909,18 +2202,9 @@ biodone(register struct buf * bp)
s = splbio();
-#if !defined(MAX_PERF)
- if (!(bp->b_flags & B_BUSY))
- panic("biodone: buffer not busy");
-#endif
+ KASSERT((bp->b_flags & B_BUSY), ("biodone: bp %p not busy", bp));
+ KASSERT(!(bp->b_flags & B_DONE), ("biodone: bp %p already done", bp));
- if (bp->b_flags & B_DONE) {
- splx(s);
-#if !defined(MAX_PERF)
- printf("biodone: buffer already done\n");
-#endif
- return;
- }
bp->b_flags |= B_DONE;
if (bp->b_flags & B_FREEBUF) {