1 files changed, 214 insertions, 167 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 5c478c6ccb4a..47e82761db8c 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -11,7 +11,7 @@
  * 2. Absolutely no warranty of function or purpose is made by the author
  *		John S. Dyson.
  *
- * $Id: vfs_bio.c,v 1.219 1999/06/29 05:59:41 peter Exp $
+ * $Id: vfs_bio.c,v 1.220 1999/07/04 00:25:27 mckusick Exp $
  */
 
 /*
@@ -90,14 +90,11 @@ static int bufspace, maxbufspace, vmiospace,
 #if 0
 static int maxvmiobufspace;
 #endif
+static int maxbdrun;
 static int needsbuffer;
 static int numdirtybuffers, lodirtybuffers, hidirtybuffers;
 static int numfreebuffers, lofreebuffers, hifreebuffers;
 static int getnewbufcalls;
-static int getnewbufloops;
-static int getnewbufloops1;
-static int getnewbufloops2;
-static int getnewbufloops3;
 static int getnewbufrestarts;
 static int kvafreespace;
 
@@ -121,6 +118,8 @@ SYSCTL_INT(_vfs, OID_AUTO, hibufspace, CTLFLAG_RD,
 	&hibufspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, bufspace, CTLFLAG_RD,
 	&bufspace, 0, "");
+SYSCTL_INT(_vfs, OID_AUTO, maxbdrun, CTLFLAG_RW,
+	&maxbdrun, 0, "");
 #if 0
 SYSCTL_INT(_vfs, OID_AUTO, maxvmiobufspace, CTLFLAG_RW,
 	&maxvmiobufspace, 0, "");
@@ -135,18 +134,12 @@ SYSCTL_INT(_vfs, OID_AUTO, kvafreespace, CTLFLAG_RD,
 	&kvafreespace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RW,
 	&getnewbufcalls, 0, "");
-SYSCTL_INT(_vfs, OID_AUTO, getnewbufloops, CTLFLAG_RW,
-	&getnewbufloops, 0, "");
-SYSCTL_INT(_vfs, OID_AUTO, getnewbufloops1, CTLFLAG_RW,
-	&getnewbufloops1, 0, "");
-SYSCTL_INT(_vfs, OID_AUTO, getnewbufloops2, CTLFLAG_RW,
-	&getnewbufloops2, 0, "");
-SYSCTL_INT(_vfs, OID_AUTO, getnewbufloops3, CTLFLAG_RW,
-	&getnewbufloops3, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, getnewbufrestarts, CTLFLAG_RW,
 	&getnewbufrestarts, 0, "");
 
-static LIST_HEAD(bufhashhdr, buf) bufhashtbl[BUFHSZ], invalhash;
+
+static int bufhashmask;
+static LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash;
 struct bqueues bufqueues[BUFFER_QUEUES] = { { 0 } };
 char *buf_wmesg = BUF_WMESG;
 
@@ -155,12 +148,24 @@ extern int vm_swap_size;
 #define BUF_MAXUSE		24
 
 #define VFS_BIO_NEED_ANY	0x01	/* any freeable buffer */
-#define VFS_BIO_NEED_RESERVED02	0x02	/* unused */
+#define VFS_BIO_NEED_DIRTYFLUSH	0x02	/* waiting for dirty buffer flush */
 #define VFS_BIO_NEED_FREE	0x04	/* wait for free bufs, hi hysteresis */
 #define VFS_BIO_NEED_BUFSPACE	0x08	/* wait for buf space, lo hysteresis */
 #define VFS_BIO_NEED_KVASPACE	0x10	/* wait for buffer_map space, emerg  */
 
 /*
+ * Buffer hash table code.  Note that the logical block scans linearly, which
+ * gives us some L1 cache locality.
+ */
+
+static __inline 
+struct bufhashhdr *
+bufhash(struct vnode *vnp, daddr_t bn)
+{
+	return(&bufhashtbl[(((uintptr_t)(vnp) >> 7) + (int)bn) & bufhashmask]);
+}
+
+/*
  *	kvaspacewakeup:
  *
  *	Called when kva space is potential available for recovery or when
@@ -185,6 +190,24 @@ kvaspacewakeup(void)
 }
 
 /*
+ *	numdirtywakeup:
+ *
+ *	If someone is blocked due to there being too many dirty buffers,
+ *	and numdirtybuffers is now reasonable, wake them up.
+ */
+
+static __inline void
+numdirtywakeup(void)
+{
+	if (numdirtybuffers < hidirtybuffers) {
+		if (needsbuffer & VFS_BIO_NEED_DIRTYFLUSH) {
+			needsbuffer &= ~VFS_BIO_NEED_DIRTYFLUSH;
+			wakeup(&needsbuffer);
+		}
+	}
+}
+
+/*
  *	bufspacewakeup:
  *
  *	Called when buffer space is potentially available for recovery or when
@@ -260,10 +283,23 @@ bd_wakeup(int dirtybuflevel)
 
 
 /*
- * Initialize buffer headers and related structures.
+ * Initialize buffer headers and related structures. 
  */
+
+vm_offset_t
+bufhashinit(vm_offset_t vaddr)
+{
+	/* first, make a null hash table */
+	for (bufhashmask = 8; bufhashmask < nbuf / 4; bufhashmask <<= 1)
+		;
+	bufhashtbl = (void *)vaddr;
+	vaddr = vaddr + sizeof(*bufhashtbl) * bufhashmask;
+	--bufhashmask;
+	return(vaddr);
+}
+
 void
-bufinit()
+bufinit(void)
 {
 	struct buf *bp;
 	int i;
@@ -272,8 +308,7 @@ bufinit()
 	LIST_INIT(&invalhash);
 	simple_lock_init(&buftimelock);
 
-	/* first, make a null hash table */
-	for (i = 0; i < BUFHSZ; i++)
+	for (i = 0; i <= bufhashmask; i++)
 		LIST_INIT(&bufhashtbl[i]);
 
 	/* next, make a null set of free lists */
@@ -329,8 +364,8 @@ bufinit()
  * Reduce the chance of a deadlock occuring by limiting the number
  * of delayed-write dirty buffers we allow to stack up.
  */
-	lodirtybuffers = nbuf / 6 + 10;
-	hidirtybuffers = nbuf / 3 + 20;
+	lodirtybuffers = nbuf / 7 + 10;
+	hidirtybuffers = nbuf / 4 + 20;
 	numdirtybuffers = 0;
 
 /*
@@ -341,6 +376,15 @@ bufinit()
 	hifreebuffers = 2 * lofreebuffers;
 	numfreebuffers = nbuf;
 
+/*
+ * Maximum number of async ops initiated per buf_daemon loop.  This is
+ * somewhat of a hack at the moment, we really need to limit ourselves
+ * based on the number of bytes of I/O in-transit that were initiated
+ * from buf_daemon.
+ */
+	if ((maxbdrun = nswbuf / 4) < 4)
+		maxbdrun = 4;
+
 	kvafreespace = 0;
 
 	bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
@@ -383,19 +427,14 @@ bremfree(struct buf * bp)
 		if (bp->b_qindex == QUEUE_EMPTYKVA) {
 			kvafreespace -= bp->b_kvasize;
 		}
-		if (BUF_REFCNT(bp) == 1)
-			TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist);
-		else if (BUF_REFCNT(bp) == 0)
-			panic("bremfree: not locked");
-		else
-			/* Temporary panic to verify exclusive locking */
-			/* This panic goes away when we allow shared refs */
-			panic("bremfree: multiple refs");
+		KASSERT(BUF_REFCNT(bp) == 0, ("bremfree: bp %p not locked",bp));
+		TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist);
 		bp->b_qindex = QUEUE_NONE;
 		runningbufspace += bp->b_bufsize;
 	} else {
 #if !defined(MAX_PERF)
-		panic("bremfree: removing a buffer when not on a queue");
+		if (BUF_REFCNT(bp) <= 1)
+			panic("bremfree: removing a buffer not on a queue");
 #endif
 	}
 
@@ -599,7 +638,9 @@ bwrite(struct buf * bp)
 void
 bdwrite(struct buf * bp)
 {
+#if 0
 	struct vnode *vp;
+#endif
 
 #if !defined(MAX_PERF)
 	if (BUF_REFCNT(bp) == 0)
@@ -654,6 +695,11 @@ bdwrite(struct buf * bp)
 	bd_wakeup(hidirtybuffers);
 
 	/*
+	 * note: we cannot initiate I/O from a bdwrite even if we wanted to,
+	 * due to the softdep code.
+	 */
+#if 0
+	/*
 	 * XXX The soft dependency code is not prepared to
 	 * have I/O done when a bdwrite is requested. For
 	 * now we just let the write be delayed if it is
@@ -664,6 +710,7 @@ bdwrite(struct buf * bp)
 		  (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP)) ||
 		 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))))
 		return;
+#endif
 }
 
 /*
@@ -722,6 +769,7 @@ bundirty(bp)
 		bp->b_flags &= ~B_DELWRI;
 		reassignbuf(bp, bp->b_vp);
 		--numdirtybuffers;
+		numdirtywakeup();
 	}
 }
 
@@ -757,6 +805,34 @@ bowrite(struct buf * bp)
 }
 
 /*
+ *	bwillwrite:
+ *
+ *	Called prior to the locking of any vnodes when we are expecting to
+ *	write.  We do not want to starve the buffer cache with too many
+ *	dirty buffers so we block here.  By blocking prior to the locking
+ *	of any vnodes we attempt to avoid the situation where a locked vnode
+ *	prevents the various system daemons from flushing related buffers.
+ */
+
+void
+bwillwrite(void)
+{
+	int twenty = (hidirtybuffers - lodirtybuffers) / 5;
+
+	if (numdirtybuffers > hidirtybuffers + twenty) {
+		int s;
+
+		s = splbio();
+		while (numdirtybuffers > hidirtybuffers) {
+			bd_wakeup(hidirtybuffers);
+			needsbuffer |= VFS_BIO_NEED_DIRTYFLUSH;
+			tsleep(&needsbuffer, (PRIBIO + 4), "flswai", 0);
+		}
+		splx(s);
+	}
+}
+
+/*
  *	brelse:
  *
  *	Release a busy buffer and, if requested, free its resources.  The
@@ -799,8 +875,10 @@ brelse(struct buf * bp)
 		bp->b_flags |= B_INVAL;
 		if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate)
 			(*bioops.io_deallocate)(bp);
-		if (bp->b_flags & B_DELWRI)
+		if (bp->b_flags & B_DELWRI) {
 			--numdirtybuffers;
+			numdirtywakeup();
+		}
 		bp->b_flags &= ~(B_DELWRI | B_CACHE | B_FREEBUF);
 		if ((bp->b_flags & B_VMIO) == 0) {
 			if (bp->b_bufsize)
@@ -991,6 +1069,7 @@ brelse(struct buf * bp)
 	if ((bp->b_flags & (B_INVAL|B_DELWRI)) == (B_INVAL|B_DELWRI)) {
 		bp->b_flags &= ~B_DELWRI;
 		--numdirtybuffers;
+		numdirtywakeup();
 	}
 
 	runningbufspace -= bp->b_bufsize;
@@ -1070,7 +1149,7 @@ bqrelse(struct buf * bp)
 	/*
 	 * Something we can maybe wakeup
 	 */
-	if (bp->b_bufsize)
+	if (bp->b_bufsize && !(bp->b_flags & B_DELWRI))
 		bufspacewakeup();
 
 	/* unlock */
@@ -1139,7 +1218,7 @@ gbincore(struct vnode * vp, daddr_t blkno)
 	struct buf *bp;
 	struct bufhashhdr *bh;
 
-	bh = BUFHASH(vp, blkno);
+	bh = bufhash(vp, blkno);
 	bp = bh->lh_first;
 
 	/* Search hash chain */
@@ -1155,14 +1234,18 @@ gbincore(struct vnode * vp, daddr_t blkno)
 }
 
 /*
- * this routine implements clustered async writes for
- * clearing out B_DELWRI buffers...  This is much better
- * than the old way of writing only one buffer at a time.
+ *	vfs_bio_awrite:
+ *
+ *	Implement clustered async writes for clearing out B_DELWRI buffers.
+ *	This is much better then the old way of writing only one buffer at
+ *	a time.  Note that we may not be presented with the buffers in the 
+ *	correct order, so we search for the cluster in both directions.
  */
 int
 vfs_bio_awrite(struct buf * bp)
 {
 	int i;
+	int j;
 	daddr_t lblkno = bp->b_lblkno;
 	struct vnode *vp = bp->b_vp;
 	int s;
@@ -1174,8 +1257,9 @@ vfs_bio_awrite(struct buf * bp)
 
 	s = splbio();
 	/*
-	 * right now we support clustered writing only to regular files, and
-	 * then only if our I/O system is not saturated.
+	 * right now we support clustered writing only to regular files.  If
+	 * we find a clusterable block we could be in the middle of a cluster
+	 * rather then at the beginning.
 	 */
 	if ((vp->v_type == VREG) && 
 	    (vp->v_mount != 0) && /* Only on nodes that have the size info */
@@ -1191,18 +1275,34 @@ vfs_bio_awrite(struct buf * bp)
 			    (B_DELWRI | B_CLUSTEROK)) &&
 			    (bpa->b_bufsize == size)) {
 				if ((bpa->b_blkno == bpa->b_lblkno) ||
-				    (bpa->b_blkno != bp->b_blkno + ((i * size) >> DEV_BSHIFT)))
+				    (bpa->b_blkno !=
+				     bp->b_blkno + ((i * size) >> DEV_BSHIFT)))
 					break;
 			} else {
 				break;
 			}
 		}
-		ncl = i;
+		for (j = 1; i + j <= maxcl && j <= lblkno; j++) {
+			if ((bpa = gbincore(vp, lblkno - j)) &&
+			    BUF_REFCNT(bpa) == 0 &&
+			    ((bpa->b_flags & (B_DELWRI | B_CLUSTEROK | B_INVAL)) ==
+			    (B_DELWRI | B_CLUSTEROK)) &&
+			    (bpa->b_bufsize == size)) {
+				if ((bpa->b_blkno == bpa->b_lblkno) ||
+				    (bpa->b_blkno !=
+				     bp->b_blkno - ((j * size) >> DEV_BSHIFT)))
+					break;
+			} else {
+				break;
+			}
+		}
+		--j;
+		ncl = i + j;
 		/*
 		 * this is a possible cluster write
 		 */
 		if (ncl != 1) {
-			nwritten = cluster_wbuild(vp, size, lblkno, ncl);
+			nwritten = cluster_wbuild(vp, size, lblkno - j, ncl);
 			splx(s);
 			return nwritten;
 		}
@@ -1240,21 +1340,12 @@ vfs_bio_awrite(struct buf * bp)
  *		If we have to flush dirty buffers ( but we try to avoid this )
  *
  *	To avoid VFS layer recursion we do not flush dirty buffers ourselves.
- *	Instead we ask the pageout daemon to do it for us.  We attempt to
+ *	Instead we ask the buf daemon to do it for us.  We attempt to
  *	avoid piecemeal wakeups of the pageout daemon.
  */
 
-	/*
-	 * We fully expect to be able to handle any fragmentation and buffer
-	 * space issues by freeing QUEUE_CLEAN buffers.  If this fails, we 
-	 * have to wakeup the pageout daemon and ask it to flush some of our 
-	 * QUEUE_DIRTY buffers.  We have to be careful to prevent a deadlock.
-	 * XXX
-	 */
-
 static struct buf *
-getnewbuf(struct vnode *vp, daddr_t blkno,
-	int slpflag, int slptimeo, int size, int maxsize)
+getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
 {
 	struct buf *bp;
 	struct buf *nbp;
@@ -1262,8 +1353,6 @@ getnewbuf(struct vnode *vp, daddr_t blkno,
 	int outofspace;
 	int nqindex;
 	int defrag = 0;
-	static int newbufcnt = 0;
-	int lastnewbuf = newbufcnt;
 	
 	++getnewbufcalls;
 	--getnewbufrestarts;
@@ -1338,13 +1427,9 @@ restart:
 	 * depending.
 	 */
 
-	if (nbp)
-		--getnewbufloops;
-
 	while ((bp = nbp) != NULL) {
 		int qindex = nqindex;
 
-		++getnewbufloops;
 		/*
 		 * Calculate next bp ( we can only use it if we do not block
 		 * or do other fancy things ).
@@ -1372,7 +1457,6 @@ restart:
 		/*
 		 * Sanity Checks
 		 */
-		KASSERT(BUF_REFCNT(bp) == 0, ("getnewbuf: busy buffer %p on free list", bp));
 		KASSERT(bp->b_qindex == qindex, ("getnewbuf: inconsistant queue %d bp %p", qindex, bp));
 
 		/*
@@ -1388,14 +1472,10 @@ restart:
 		 * buffer isn't useful for fixing that problem we continue.
 		 */
 
-		if (defrag > 0 && bp->b_kvasize == 0) {
-			++getnewbufloops1;
+		if (defrag > 0 && bp->b_kvasize == 0)
 			continue;
-		}
-		if (outofspace > 0 && bp->b_bufsize == 0) {
-			++getnewbufloops2;
+		if (outofspace > 0 && bp->b_bufsize == 0)
 			continue;
-		}
 
 		/*
 		 * Start freeing the bp.  This is somewhat involved.  nbp
@@ -1433,7 +1513,6 @@ restart:
 		}
 		if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate)
 			(*bioops.io_deallocate)(bp);
-
 		LIST_REMOVE(bp, b_hash);
 		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 
@@ -1451,7 +1530,6 @@ restart:
 		bp->b_bcount = 0;
 		bp->b_npages = 0;
 		bp->b_dirtyoff = bp->b_dirtyend = 0;
-		bp->b_usecount = 5;
 
 		LIST_INIT(&bp->b_dep);
 
@@ -1489,19 +1567,26 @@ restart:
 
 	/*
 	 * If we exhausted our list, sleep as appropriate.  We may have to
-	 * wakeup the pageout daemon to write out some dirty buffers.
+	 * wakeup various daemons and write out some dirty buffers.
+	 *
+	 * Generally we are sleeping due to insufficient buffer space.
 	 */
 
 	if (bp == NULL) {
 		int flags;
+		char *waitmsg;
 
 dosleep:
-		if (defrag > 0)
+		if (defrag > 0) {
 			flags = VFS_BIO_NEED_KVASPACE;
-		else if (outofspace > 0)
+			waitmsg = "nbufkv";
+		} else if (outofspace > 0) {
+			waitmsg = "nbufbs";
 			flags = VFS_BIO_NEED_BUFSPACE;
-		else
+		} else {
+			waitmsg = "newbuf";
 			flags = VFS_BIO_NEED_ANY;
+		}
 
 		/* XXX */
 
@@ -1509,7 +1594,7 @@ dosleep:
 		needsbuffer |= flags;
 		while (needsbuffer & flags) {
 			if (tsleep(&needsbuffer, (PRIBIO + 4) | slpflag,
-			    "newbuf", slptimeo))
+			    waitmsg, slptimeo))
 				return (NULL);
 		}
 	} else {
@@ -1553,42 +1638,7 @@ dosleep:
 		}
 		bp->b_data = bp->b_kvabase;
 	}
-
-	/*
-	 * If we have slept at some point in this process and another
-	 * process has managed to allocate a new buffer while we slept,
-	 * we have to return NULL so that our caller can recheck to
-	 * ensure that the other process did not create an identically
-	 * identified buffer to the one we were requesting. We make this
-	 * check by incrementing the static int newbufcnt each time we
-	 * successfully allocate a new buffer. By saving the value of
-	 * newbufcnt in our local lastnewbuf, we can compare newbufcnt
-	 * with lastnewbuf to see if any other process managed to
-	 * allocate a buffer while we were doing so ourselves.
-	 *
-	 * Note that bp, if valid, is locked.
-	 */
-	if (lastnewbuf == newbufcnt) {
-		/*
-		 * No buffers allocated, so we can return one if we were
-		 * successful, or continue trying if we were not successful.
-		 */
-		if (bp != NULL) {
-			newbufcnt += 1;
-			return (bp);
-		}
-		goto restart;
-	}
-	/*
-	 * Another process allocated a buffer since we were called, so
-	 * we have to free the one we allocated and return NULL to let
-	 * our caller recheck to see if a new buffer is still needed.
-	 */
-	if (bp != NULL) {
-		bp->b_flags |= B_INVAL;
-		brelse(bp);
-	}
-	return (NULL);
+	return(bp);
 }
 
 /*
@@ -1601,7 +1651,6 @@ static void
 waitfreebuffers(int slpflag, int slptimeo) 
 {
 	while (numfreebuffers < hifreebuffers) {
-		bd_wakeup(0);
 		if (numfreebuffers >= hifreebuffers)
 			break;
 		needsbuffer |= VFS_BIO_NEED_FREE;
@@ -1646,60 +1695,72 @@ buf_daemon()
 		bd_request = 0;
 
 		/*
-		 * Do the flush.  
+		 * Do the flush.  Limit the number of buffers we flush in one
+		 * go.  The failure condition occurs when processes are writing
+		 * buffers faster then we can dispose of them.  In this case
+		 * we may be flushing so often that the previous set of flushes
+		 * have not had time to complete, causing us to run out of
+		 * physical buffers and block.
 		 */
 		{
-			while (numdirtybuffers > bd_flushto) {
+			int runcount = maxbdrun;
+
+			while (numdirtybuffers > bd_flushto && runcount) {
+				--runcount;
 				if (flushbufqueues() == 0)
 					break;
 			}
 		}
 
 		/*
-		 * Whew.  If nobody is requesting anything we sleep until the
-		 * next event.  If we sleep and the sleep times out and
-		 * nobody is waiting for interesting things we back-off.  
-		 * Otherwise we get more aggressive.
+		 * If nobody is requesting anything we sleep
 		 */
+		if (bd_request == 0)
+			tsleep(&bd_request, PVM, "psleep", bd_interval);
 
-		if (bd_request == 0 &&
-		    tsleep(&bd_request, PVM, "psleep", bd_interval) &&
-		    needsbuffer == 0) {
-			/*
-			 * timed out and nothing serious going on,
-			 * increase the flushto high water mark to reduce
-			 * the flush rate.
-			 */
-			bd_flushto += 10;
-		} else {
-			/*
-			 * We were woken up or hit a serious wall that needs
-			 * to be addressed.
-			 */
-			bd_flushto -= 10;
-			if (needsbuffer) {
-				int middb = (lodirtybuffers+hidirtybuffers)/2;
-				bd_interval >>= 1;
-				if (bd_flushto > middb)
-					bd_flushto = middb;
-			}
+		/*
+		 * We calculate how much to add or subtract from bd_flushto
+		 * and bd_interval based on how far off we are from the 
+		 * optimal number of dirty buffers, which is 20% below the
+		 * hidirtybuffers mark.  We cannot use hidirtybuffers straight
+		 * because being right on the mark will cause getnewbuf()
+		 * to oscillate our wakeup.
+		 *
+		 * The larger the error in either direction, the more we adjust
+		 * bd_flushto and bd_interval.  The time interval is adjusted
+		 * by 2 seconds per whole-buffer-range of error.  This is an
+		 * exponential convergence algorithm, with large errors
+		 * producing large changes and small errors producing small
+		 * changes.
+		 */
+
+		{
+			int brange = hidirtybuffers - lodirtybuffers;
+			int middb = hidirtybuffers - brange / 5;
+			int deltabuf = middb - numdirtybuffers;
+
+			bd_flushto += deltabuf / 20;
+			bd_interval += deltabuf * (2 * hz) / (brange * 1);
 		}
-		if (bd_flushto < lodirtybuffers) {
+		if (bd_flushto < lodirtybuffers)
 			bd_flushto = lodirtybuffers;
-			bd_interval -= hz / 10;
-		}
-		if (bd_flushto > hidirtybuffers) {
+		if (bd_flushto > hidirtybuffers)
 			bd_flushto = hidirtybuffers;
-			bd_interval += hz / 10;
-		}
 		if (bd_interval < hz / 10)
 			bd_interval = hz / 10;
-
 		if (bd_interval > 5 * hz)
 			bd_interval = 5 * hz;
 	}
 }
 
+/*
+ *	flushbufqueues:
+ *
+ *	Try to flush a buffer in the dirty queue.  We must be careful to
+ *	free up B_INVAL buffers instead of write them, which NFS is 
+ *	particularly sensitive to.
+ */
+
 static int
 flushbufqueues(void)
 {
@@ -1709,15 +1770,6 @@ flushbufqueues(void)
 	bp = TAILQ_FIRST(&bufqueues[QUEUE_DIRTY]);
 
 	while (bp) {
-		/*
-		 * Try to free up B_INVAL delayed-write buffers rather then
-		 * writing them out.  Note also that NFS is somewhat sensitive
-		 * to B_INVAL buffers so it is doubly important that we do 
-		 * this.
-		 *
-		 * We do not try to sync buffers whos vnodes are locked, we
-		 * cannot afford to block in this process.
-		 */
 		KASSERT((bp->b_flags & B_DELWRI), ("unexpected clean buffer %p", bp));
 		if ((bp->b_flags & B_DELWRI) != 0) {
 			if (bp->b_flags & B_INVAL) {
@@ -1728,11 +1780,9 @@ flushbufqueues(void)
 				++r;
 				break;
 			}
-			if (!VOP_ISLOCKED(bp->b_vp)) {
-				vfs_bio_awrite(bp);
-				++r;
-				break;
-			}
+			vfs_bio_awrite(bp);
+			++r;
+			break;
 		}
 		bp = TAILQ_NEXT(bp, b_freelist);
 	}
@@ -1957,8 +2007,6 @@ loop:
 		 */
 
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
-			if (bp->b_usecount < BUF_MAXUSE)
-				++bp->b_usecount;
 			if (BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL,
 			    "getblk", slpflag, slptimeo) == ENOLCK)
 				goto loop;
@@ -2036,8 +2084,6 @@ loop:
 			goto loop;
 		}
 
-		if (bp->b_usecount < BUF_MAXUSE)
-			++bp->b_usecount;
 		splx(s);
 		bp->b_flags &= ~B_DONE;
 	} else {
@@ -2063,8 +2109,7 @@ loop:
 		maxsize = vmio ? size + (offset & PAGE_MASK) : size;
 		maxsize = imax(maxsize, bsize);
 
-		if ((bp = getnewbuf(vp, blkno,
-			slpflag, slptimeo, size, maxsize)) == NULL) {
+		if ((bp = getnewbuf(slpflag, slptimeo, size, maxsize)) == NULL) {
 			if (slpflag || slptimeo) {
 				splx(s);
 				return NULL;
@@ -2079,7 +2124,8 @@ loop:
 		 * If the buffer is created out from under us, we have to
 		 * throw away the one we just created.  There is now window
 		 * race because we are safely running at splbio() from the
-		 * point of the duplicate buffer creation through to here.
+		 * point of the duplicate buffer creation through to here,
+		 * and we've locked the buffer.
 		 */
 		if (gbincore(vp, blkno)) {
 			bp->b_flags |= B_INVAL;
@@ -2096,7 +2142,7 @@ loop:
 
 		bgetvp(vp, bp);
 		LIST_REMOVE(bp, b_hash);
-		bh = BUFHASH(vp, blkno);
+		bh = bufhash(vp, blkno);
 		LIST_INSERT_HEAD(bh, bp, b_hash);
 
 		/*
@@ -2135,7 +2181,7 @@ geteblk(int size)
 	int s;
 
 	s = splbio();
-	while ((bp = getnewbuf(0, (daddr_t) 0, 0, 0, size, MAXBSIZE)) == 0);
+	while ((bp = getnewbuf(0, 0, size, MAXBSIZE)) == 0);
 	splx(s);
 	allocbuf(bp, size);
 	bp->b_flags |= B_INVAL;	/* b_dep cleared by getnewbuf() */
@@ -2218,7 +2264,8 @@ allocbuf(struct buf *bp, int size)
 #if !defined(NO_B_MALLOC)
 			/*
 			 * We only use malloced memory on the first allocation.
-			 * and revert to page-allocated memory when the buffer grows.
+			 * and revert to page-allocated memory when the buffer
+			 * grows.
 			 */
 			if ( (bufmallocspace < maxbufmallocspace) &&
 				(bp->b_bufsize == 0) &&