1 files changed, 1307 insertions, 1246 deletions
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 16911684c998..b06352014b32 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 1998 Matthew Dillon,
  * Copyright (c) 1994 John S. Dyson
  * Copyright (c) 1990 University of Utah.
  * Copyright (c) 1991, 1993
@@ -36,17 +37,34 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
+ *				New Swap System
+ *				Matthew Dillon
+ *
+ * Radix Bitmap 'blists'.
+ *
+ *	- The new swapper uses the new radix bitmap code.  This should scale
+ *	  to arbitrarily small or arbitrarily large swap spaces and an almost
+ *	  arbitrary degree of fragmentation.
+ *
+ * Features:
+ *
+ *	- on the fly reallocation of swap during putpages.  The new system
+ *	  does not try to keep previously allocated swap blocks for dirty
+ *	  pages.  
+ *
+ *	- on the fly deallocation of swap
+ *
+ *	- No more garbage collection required.  Unnecessarily allocated swap
+ *	  blocks only exist for dirty vm_page_t's now and these are already
+ *	  cycled (in a high-load system) by the pager.  We also do on-the-fly
+ *	  removal of invalidated swap blocks when a page is destroyed
+ *	  or renamed.
+ *
  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
  *
  *	@(#)swap_pager.c	8.9 (Berkeley) 3/21/94
- * $Id: swap_pager.c,v 1.106 1999/01/08 17:31:23 eivind Exp $
- */
-
-/*
- * Quick hack to page to dedicated partition(s).
- * TODO:
- *	Add multiprocessor locks
- *	Deal with async writes in a better fashion
+ *
+ * $Id: swap_pager.c,v 1.107 1999/01/10 01:58:28 eivind Exp $
  */
 
 #include <sys/param.h>
@@ -57,18 +75,16 @@
 #include <sys/vnode.h>
 #include <sys/malloc.h>
 #include <sys/vmmeter.h>
-#include <sys/rlist.h>
+#include <sys/blist.h>
+#include <sys/lock.h>
 
 #ifndef MAX_PAGEOUT_CLUSTER
 #define MAX_PAGEOUT_CLUSTER 16
 #endif
 
-#ifndef NPENDINGIO
-#define NPENDINGIO	16
-#endif
-
-#define SWB_NPAGES MAX_PAGEOUT_CLUSTER
+#define SWB_NPAGES	MAX_PAGEOUT_CLUSTER
 
+#include "opt_swap.h"
 #include <vm/vm.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_object.h>
@@ -77,848 +93,651 @@
 #include <vm/vm_pageout.h>
 #include <vm/swap_pager.h>
 #include <vm/vm_extern.h>
+#include <vm/vm_zone.h>
 
-static int nswiodone;
-int swap_pager_full;
-extern int vm_swap_size;
-static int no_swap_space = 1;
-static int max_pageout_cluster;
-struct rlisthdr swaplist;
-
-TAILQ_HEAD(swpclean, swpagerclean);
-
-typedef struct swpagerclean *swp_clean_t;
+#define SWM_FREE	0x02	/* free, period			*/
+#define SWM_POP		0x04	/* pop out			*/
 
-static struct swpagerclean {
-	TAILQ_ENTRY(swpagerclean) spc_list;
-	int spc_flags;
-	struct buf *spc_bp;
-	vm_object_t spc_object;
-	vm_offset_t spc_kva;
-	int spc_first;
-	int spc_count;
-	vm_page_t spc_m[MAX_PAGEOUT_CLUSTER];
-} swcleanlist[NPENDINGIO];
-
-
-/* spc_flags values */
-#define SPC_ERROR	0x01
+/*
+ * vm_swap_size is in page-sized chunks now.  It was DEV_BSIZE'd chunks
+ * in the old system.
+ */
 
-#define SWB_EMPTY (-1)
+extern int vm_swap_size;	/* number of free swap blocks, in pages */
 
-/* list of completed page cleans */
-static struct swpclean swap_pager_done;
+int swap_pager_full;		/* swap space exhaustion (w/ hysteresis)*/
+static int nsw_rcount;		/* free read buffers			*/
+static int nsw_wcount;		/* free write buffers			*/
+static int nsw_hysteresis;	/* hysteresis				*/
+static int max_pageout_cluster;	/* maximum VOP I/O allowed		*/
+static int sw_alloc_interlock;	/* swap pager allocation interlock	*/
 
-/* list of pending page cleans */
-static struct swpclean swap_pager_inuse;
+struct blist *swapblist;
+static struct swblock **swhash;
+static int swhash_mask;
 
-/* list of free pager clean structs */
-static struct swpclean swap_pager_free;
-static int swap_pager_free_count;
-static int swap_pager_free_pending;
 
-/* list of "named" anon region objects */
-static struct pagerlst swap_pager_object_list;
+/*
+ * "named" and "unnamed" anon region objects.  Try to reduce the overhead
+ * of searching a named list by hashing it just a little.
+ */
 
-/* list of "unnamed" anon region objects */
-struct pagerlst swap_pager_un_object_list;
+#define NOBJLISTS		8
 
-#define	SWAP_FREE_NEEDED	0x1	/* need a swap block */
-#define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2
-static int swap_pager_needflags;
+#define NOBJLIST(handle)	\
+	(&swap_pager_object_list[((int)(long)handle >> 4) & (NOBJLISTS-1)])
 
-static struct pagerlst *swp_qs[] = {
-	&swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0
-};
+static struct pagerlst	swap_pager_object_list[NOBJLISTS];
+struct pagerlst		swap_pager_un_object_list;
+vm_zone_t		swap_zone;
 
 /*
- * pagerops for OBJT_SWAP - "swap pager".
+ * pagerops for OBJT_SWAP - "swap pager".  Some ops are also global procedure
+ * calls hooked from other parts of the VM system and do not appear here.
+ * (see vm/swap_pager.h).
  */
+
 static vm_object_t
 		swap_pager_alloc __P((void *handle, vm_ooffset_t size,
 				      vm_prot_t prot, vm_ooffset_t offset));
 static void	swap_pager_dealloc __P((vm_object_t object));
-static boolean_t
-		swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex,
-					int *before, int *after));
 static int	swap_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
 static void	swap_pager_init __P((void));
-static void spc_free __P((swp_clean_t));
+static void	swap_pager_unswapped __P((vm_page_t));
 
 struct pagerops swappagerops = {
-	swap_pager_init,
-	swap_pager_alloc,
-	swap_pager_dealloc,
-	swap_pager_getpages,
-	swap_pager_putpages,
-	swap_pager_haspage,
-	swap_pager_sync
+	swap_pager_init,	/* early system initialization of pager	*/
+	swap_pager_alloc,	/* allocate an OBJT_SWAP object		*/
+	swap_pager_dealloc,	/* deallocate an OBJT_SWAP object	*/
+	swap_pager_getpages,	/* pagein				*/
+	swap_pager_putpages,	/* pageout				*/
+	swap_pager_haspage,	/* get backing store status for page	*/
+	swap_pager_unswapped	/* remove swap related to page		*/
 };
 
-static int npendingio;
-static int dmmin;
+/*
+ * dmmax is in page-sized chunks with the new swap system.  It was
+ * dev-bsized chunks in the old.
+ *
+ * swap_*() routines are externally accessible.  swp_*() routines are
+ * internal.
+ */
+
 int dmmax;
+static int dmmax_mask;
+int nswap_lowat = 128;		/* in pages, swap_pager_full warning	*/
+int nswap_hiwat = 256;		/* in pages, swap_pager_full warning	*/
+
+static __inline void	swp_sizecheck __P((void));
+static void	swp_pager_sync_iodone __P((struct buf *bp));
+static void	swp_pager_async_iodone __P((struct buf *bp));
+
+/*
+ * Swap bitmap functions
+ */
+
+static __inline void	swp_pager_freeswapspace __P((daddr_t blk, int npages));
+static __inline daddr_t	swp_pager_getswapspace __P((int npages));
+
+/*
+ * Metadata functions
+ */
+
+static void swp_pager_meta_build __P((vm_object_t, daddr_t, daddr_t, int));
+static void swp_pager_meta_free __P((vm_object_t, daddr_t, daddr_t));
+static void swp_pager_meta_free_all __P((vm_object_t));
+static daddr_t swp_pager_meta_ctl __P((vm_object_t, vm_pindex_t, int));
 
-static int	swap_pager_block_index __P((vm_pindex_t pindex));
-static int	swap_pager_block_offset __P((vm_pindex_t pindex));
-static daddr_t *swap_pager_diskaddr __P((vm_object_t object,
-					  vm_pindex_t pindex, int *valid));
-static void	swap_pager_finish __P((swp_clean_t spc));
-static void	swap_pager_free_swap __P((vm_object_t object));
-static void	swap_pager_freeswapspace __P((vm_object_t object,
-					      unsigned int from,
-					      unsigned int to));
-static int	swap_pager_getswapspace __P((vm_object_t object,
-					     unsigned int amount,
-					     daddr_t *rtval));
-static void	swap_pager_iodone __P((struct buf *));
-static void	swap_pager_iodone1 __P((struct buf *bp));
-static void	swap_pager_reclaim __P((void));
-static void	swap_pager_ridpages __P((vm_page_t *m, int count,
-					 int reqpage));
-static void	swap_pager_setvalid __P((vm_object_t object,
-					 vm_offset_t offset, int valid));
-static __inline void	swapsizecheck __P((void));
-
-#define SWAPLOW (vm_swap_size < (512 * btodb(PAGE_SIZE)))
+/*
+ * SWP_SIZECHECK() -	update swap_pager_full indication
+ *	
+ *	update the swap_pager_full indication and warn when we are
+ *	about to run out of swap space.
+ *
+ *	No restrictions on call
+ *	This routine may not block.
+ *	This routine must be called at splvm()
+ */
 
 static __inline void
-swapsizecheck()
+swp_sizecheck()
 {
-	if (vm_swap_size < 128 * btodb(PAGE_SIZE)) {
+	if (vm_swap_size < nswap_lowat) {
 		if (swap_pager_full == 0)
 			printf("swap_pager: out of swap space\n");
 		swap_pager_full = 1;
-	} else if (vm_swap_size > 192 * btodb(PAGE_SIZE))
+	} else if (vm_swap_size > nswap_hiwat) {
 		swap_pager_full = 0;
+	}
 }
 
+/*
+ * SWAP_PAGER_INIT() -	initialize the swap pager!
+ *
+ *	Expected to be started from system init.  NOTE:  This code is run 
+ *	before much else so be careful what you depend on.  Most of the VM
+ *	system has yet to be initialized at this point.
+ */
+
 static void
 swap_pager_init()
 {
-	int maxsafepending;
-	TAILQ_INIT(&swap_pager_object_list);
-	TAILQ_INIT(&swap_pager_un_object_list);
-
 	/*
-	 * Initialize clean lists
+	 * Initialize object lists
 	 */
-	TAILQ_INIT(&swap_pager_inuse);
-	TAILQ_INIT(&swap_pager_done);
-	TAILQ_INIT(&swap_pager_free);
-	swap_pager_free_count = 0;
+	int i;
+
+	for (i = 0; i < NOBJLISTS; ++i)
+		TAILQ_INIT(&swap_pager_object_list[i]);
+	TAILQ_INIT(&swap_pager_un_object_list);
 
 	/*
-	 * Calculate the swap allocation constants.
+	 * Device Stripe, in PAGE_SIZE'd blocks
 	 */
-	dmmin = PAGE_SIZE / DEV_BSIZE;
-	dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2;
-
-	maxsafepending = cnt.v_free_min - cnt.v_free_reserved;
-	npendingio = NPENDINGIO;
-	max_pageout_cluster = MAX_PAGEOUT_CLUSTER;
-
-	if ((2 * NPENDINGIO * MAX_PAGEOUT_CLUSTER) > maxsafepending) {
-		max_pageout_cluster = MAX_PAGEOUT_CLUSTER / 2;
-		npendingio = maxsafepending / (2 * max_pageout_cluster);
-		if (npendingio < 2)
-			npendingio = 2;
-	}
+
+	dmmax = SWB_NPAGES * 2;
+	dmmax_mask = ~(dmmax - 1);
 }
 
+/*
+ * SWAP_PAGER_SWAP_INIT() - swap pager initialization from pageout process
+ *
+ *	Expected to be started from pageout process once, prior to entering
+ *	its main loop.
+ */
+
 void
 swap_pager_swap_init()
 {
-	swp_clean_t spc;
-	struct buf *bp;
-	int i;
+	int n;
 
 	/*
-	 * kva's are allocated here so that we dont need to keep doing
-	 * kmem_alloc pageables at runtime
+	 * Number of in-transit swap bp operations.  Don't
+	 * exhaust the pbufs completely.  Make sure we
+	 * initialize workable values (0 will work for hysteresis
+	 * but it isn't very efficient).
+	 *
+	 * The max_pageout_cluster is constrained by the bp->b_pages[]
+	 * array (MAXPHYS/PAGE_SIZE) and our locally defined
+	 * MAX_PAGEOUT_CLUSTER.   Also be aware that swap ops are
+	 * constrained by the swap device interleave stripe size.
 	 */
-	for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) {
-		spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * max_pageout_cluster);
-		if (!spc->spc_kva) {
-			break;
-		}
-		spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL);
-		if (!spc->spc_bp) {
-			kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
-			break;
-		}
-		spc->spc_flags = 0;
-		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
-		swap_pager_free_count++;
-	}
-}
 
-int
-swap_pager_swp_alloc(object, wait)
-	vm_object_t object;
-	int wait;
-{
-	sw_blk_t swb;
-	int nblocks;
-	int i, j;
-
-	nblocks = (object->size + SWB_NPAGES - 1) / SWB_NPAGES;
-	swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait);
-	if (swb == NULL)
-		return 1;
-
-	for (i = 0; i < nblocks; i++) {
-		swb[i].swb_valid = 0;
-		swb[i].swb_locked = 0;
-		for (j = 0; j < SWB_NPAGES; j++)
-			swb[i].swb_block[j] = SWB_EMPTY;
-	}
+	nsw_rcount = (nswbuf + 1) / 2;
+	nsw_wcount = (nswbuf + 3) / 4;
+	nsw_hysteresis = nsw_wcount / 2;
+	max_pageout_cluster = min((MAXPHYS/PAGE_SIZE), MAX_PAGEOUT_CLUSTER);
 
-	object->un_pager.swp.swp_nblocks = nblocks;
-	object->un_pager.swp.swp_allocsize = 0;
-	object->un_pager.swp.swp_blocks = swb;
-	object->un_pager.swp.swp_poip = 0;
+	/*
+	 * Initialize our zone.  Right now I'm just guessing on the number
+	 * we need based on the number of pages in the system.  Each swblock
+	 * can hold 16 pages, so this is probably overkill.
+	 */
 
-	if (object->handle != NULL) {
-		TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list);
-	} else {
-		TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list);
-	}
+	n = cnt.v_page_count * 2;
 
-	return 0;
+	swap_zone = zinit(
+	    "SWAPMETA", 
+	    sizeof(struct swblock), 
+	    n,
+	    ZONE_INTERRUPT, 
+	    1
+	);
+
+	/*
+	 * Initialize our meta-data hash table.  The swapper does not need to
+	 * be quite as efficient as the VM system, so we do not use an 
+	 * oversized hash table.
+	 *
+	 * 	n: 		size of hash table, must be power of 2
+	 *	swhash_mask:	hash table index mask
+	 */
+
+	for (n = 1; n < cnt.v_page_count / 4; n <<= 1)
+		;
+
+	swhash = malloc(sizeof(struct swblock *) * n, M_VMPGDATA, M_WAITOK);
+	bzero(swhash, sizeof(struct swblock *) * n);
+
+	swhash_mask = n - 1;
 }
 
 /*
- * Allocate an object and associated resources.
- * Note that if we are called from the pageout daemon (handle == NULL)
- * we should not wait for memory as it could resulting in deadlock.
+ * SWAP_PAGER_ALLOC() -	allocate a new OBJT_SWAP VM object and instantiate
+ *			its metadata structures.
+ *
+ *	This routine is called from the mmap and fork code to create a new
+ *	OBJT_SWAP object.  We do this by creating an OBJT_DEFAULT object
+ *	and then converting it with swp_pager_meta_build().
+ *
+ *	This routine may block in vm_object_allocate() and create a named
+ *	object lookup race, so we must interlock.   We must also run at
+ *	splvm() for the object lookup to handle races with interrupts, but
+ *	we do not have to maintain splvm() in between the lookup and the
+ *	add because (I believe) it is not possible to attempt to create
+ *	a new swap object w/handle when a default object with that handle
+ *	already exists.
  */
+
 static vm_object_t
 swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
 		 vm_ooffset_t offset)
 {
 	vm_object_t object;
 
-	/*
-	 * If this is a "named" anonymous region, look it up and use the
-	 * object if it exists, otherwise allocate a new one.
-	 */
 	if (handle) {
-		object = vm_pager_object_lookup(&swap_pager_object_list, handle);
+		/*
+		 * Reference existing named region or allocate new one.  There
+		 * should not be a race here against swp_pager_meta_build()
+		 * as called from vm_page_remove() in regards to the lookup
+		 * of the handle.
+		 */
+
+		while (sw_alloc_interlock) {
+			sw_alloc_interlock = -1;
+			tsleep(&sw_alloc_interlock, PVM, "swpalc", 0);
+		}
+		sw_alloc_interlock = 1;
+
+		object = vm_pager_object_lookup(NOBJLIST(handle), handle);
+
 		if (object != NULL) {
 			vm_object_reference(object);
 		} else {
-			/*
-			 * XXX - there is a race condition here. Two processes
-			 * can request the same named object simultaneuously,
-			 * and if one blocks for memory, the result is a disaster.
-			 * Probably quite rare, but is yet another reason to just
-			 * rip support of "named anonymous regions" out altogether.
-			 */
-			object = vm_object_allocate(OBJT_SWAP,
+			object = vm_object_allocate(OBJT_DEFAULT,
 				OFF_TO_IDX(offset + PAGE_MASK + size));
 			object->handle = handle;
-			(void) swap_pager_swp_alloc(object, M_WAITOK);
+
+			swp_pager_meta_build(
+			    object,
+			    0,
+			    SWAPBLK_NONE,
+			    0
+			);
 		}
+
+		if (sw_alloc_interlock < 0)
+			wakeup(&sw_alloc_interlock);
+
+		sw_alloc_interlock = 0;
 	} else {
-		object = vm_object_allocate(OBJT_SWAP,
+		object = vm_object_allocate(OBJT_DEFAULT,
 			OFF_TO_IDX(offset + PAGE_MASK + size));
-		(void) swap_pager_swp_alloc(object, M_WAITOK);
+
+		swp_pager_meta_build(
+		    object,
+		    0,
+		    SWAPBLK_NONE,
+		    0
+		);
 	}
 
 	return (object);
 }
 
 /*
- * returns disk block associated with pager and offset
- * additionally, as a side effect returns a flag indicating
- * if the block has been written
+ * SWAP_PAGER_DEALLOC() -	remove swap metadata from object
+ *
+ *	The swap backing for the object is destroyed.  The code is 
+ *	designed such that we can reinstantiate it later, but this
+ *	routine is typically called only when the entire object is
+ *	about to be destroyed.
+ *
+ *	This routine may block, but no longer does. 
+ *
+ *	The object must be locked or unreferenceable.
  */
 
-static __inline daddr_t *
-swap_pager_diskaddr(object, pindex, valid)
+static void
+swap_pager_dealloc(object)
 	vm_object_t object;
-	vm_pindex_t pindex;
-	int *valid;
 {
-	register sw_blk_t swb;
-	int ix;
-
-	if (valid)
-		*valid = 0;
-	ix = pindex / SWB_NPAGES;
-	if ((ix >= object->un_pager.swp.swp_nblocks) ||
-	    (pindex >= object->size)) {
-		return (FALSE);
+	/*
+	 * Remove from list right away so lookups will fail if we block for
+	 * pageout completion.
+	 */
+
+	if (object->handle == NULL) {
+		TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list);
+	} else {
+		TAILQ_REMOVE(NOBJLIST(object->handle), object, pager_object_list);
 	}
-	swb = &object->un_pager.swp.swp_blocks[ix];
-	ix = pindex % SWB_NPAGES;
-	if (valid)
-		*valid = swb->swb_valid & (1 << ix);
-	return &swb->swb_block[ix];
-}
 
-/*
- * Utility routine to set the valid (written) bit for
- * a block associated with a pager and offset
- */
-static void
-swap_pager_setvalid(object, offset, valid)
-	vm_object_t object;
-	vm_offset_t offset;
-	int valid;
-{
-	register sw_blk_t swb;
-	int ix;
+	vm_object_pip_wait(object, "swpdea");
 
-	ix = offset / SWB_NPAGES;
-	if (ix >= object->un_pager.swp.swp_nblocks)
-		return;
+	/*
+	 * Free all remaining metadata.  We only bother to free it from 
+	 * the swap meta data.  We do not attempt to free swapblk's still
+	 * associated with vm_page_t's for this object.  We do not care
+	 * if paging is still in progress on some objects.
+	 */
 
-	swb = &object->un_pager.swp.swp_blocks[ix];
-	ix = offset % SWB_NPAGES;
-	if (valid)
-		swb->swb_valid |= (1 << ix);
-	else
-		swb->swb_valid &= ~(1 << ix);
-	return;
+	swp_pager_meta_free_all(object);
 }
 
+/************************************************************************
+ *			SWAP PAGER BITMAP ROUTINES			*
+ ************************************************************************/
+
 /*
- * this routine allocates swap space with a fragmentation
- * minimization policy.
+ * SWP_PAGER_GETSWAPSPACE() -	allocate raw swap space
+ *
+ *	Allocate swap for the requested number of pages.  The starting
+ *	swap block number (a page index) is returned or SWAPBLK_NONE
+ *	if the allocation failed.
+ *
+ *	Also has the side effect of advising that somebody made a mistake
+ *	when they configured swap and didn't configure enough.
+ *
+ *	Must be called at splvm() to avoid races with bitmap frees from
+ *	vm_page_remove() aka swap_pager_page_removed().
+ *
+ *	This routine may not block
+ *	This routine must be called at splvm().
  */
-static int
-swap_pager_getswapspace(object, amount, rtval)
-	vm_object_t object;
-	unsigned int amount;
-	daddr_t *rtval;
+
+static __inline daddr_t
+swp_pager_getswapspace(npages)
+	int npages;
 {
-	unsigned location;
+	daddr_t blk;
 
-	vm_swap_size -= amount;
-		
-	if (!rlist_alloc(&swaplist, amount, &location)) {
-		vm_swap_size += amount;
-		return 0;
+	if ((blk = blist_alloc(swapblist, npages)) == SWAPBLK_NONE) {
+		printf("swap_pager_getswapspace: failed\n");
 	} else {
-		swapsizecheck();
-		object->un_pager.swp.swp_allocsize += amount;
-		*rtval = location;
-		return 1;
+		vm_swap_size -= npages;
+		swp_sizecheck();
 	}
+	return(blk);
 }
 
 /*
- * this routine frees swap space with a fragmentation
- * minimization policy.
+ * SWP_PAGER_FREESWAPSPACE() -	free raw swap space 
+ *
+ *	This routine returns the specified swap blocks back to the bitmap.
+ *
+ *	Note:  This routine may not block (it could in the old swap code),
+ *	and through the use of the new blist routines it does not block.
+ *
+ *	We must be called at splvm() to avoid races with bitmap frees from
+ *	vm_page_remove() aka swap_pager_page_removed().
+ *
+ *	This routine may not block
+ *	This routine must be called at splvm().
  */
-static void
-swap_pager_freeswapspace(object, from, to)
-	vm_object_t object;
-	unsigned int from;
-	unsigned int to;
+
+static __inline void
+swp_pager_freeswapspace(blk, npages)
+	daddr_t blk;
+	int npages;
 {
-	rlist_free(&swaplist, from, to);
-	vm_swap_size += (to - from) + 1;
-	object->un_pager.swp.swp_allocsize -= (to - from) + 1;
-	swapsizecheck();
+	blist_free(swapblist, blk, npages);
+	vm_swap_size += npages;
+	swp_sizecheck();
 }
+
 /*
- * this routine frees swap blocks from a specified pager
+ * SWAP_PAGER_FREESPACE() -	frees swap blocks associated with a page
+ *				range within an object.
+ *
+ *	This is a globally accessible routine.
+ *
+ *	This routine removes swapblk assignments from swap metadata.
+ *
+ *	The external callers of this routine typically have already destroyed 
+ *	or renamed vm_page_t's associated with this range in the object so 
+ *	we should be ok.
  */
+
 void
 swap_pager_freespace(object, start, size)
 	vm_object_t object;
 	vm_pindex_t start;
 	vm_size_t size;
 {
-	vm_pindex_t i;
-	int s;
-
-	s = splvm();
-	for (i = start; i < start + size; i += 1) {
-		int valid;
-		daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
-
-		if (addr && *addr != SWB_EMPTY) {
-			swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1);
-			if (valid) {
-				swap_pager_setvalid(object, i, 0);
-			}
-			*addr = SWB_EMPTY;
-		}
-	}
-	splx(s);
+	swp_pager_meta_free(object, start, size);
 }
 
 /*
- * same as freespace, but don't free, just force a DMZ next time
- */
-void
-swap_pager_dmzspace(object, start, size)
-	vm_object_t object;
-	vm_pindex_t start;
-	vm_size_t size;
-{
-	vm_pindex_t i;
-	int s;
-
-	s = splvm();
-	for (i = start; i < start + size; i += 1) {
-		int valid;
-		daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
-
-		if (addr && *addr != SWB_EMPTY) {
-			if (valid) {
-				swap_pager_setvalid(object, i, 0);
-			}
-		}
-	}
-	splx(s);
-}
-
-static void
-swap_pager_free_swap(object)
-	vm_object_t object;
-{
-	register int i, j;
-	register sw_blk_t swb;
-	int first_block=0, block_count=0;
-	int s;
-	/*
-	 * Free left over swap blocks
-	 */
-	swb = object->un_pager.swp.swp_blocks;
-	if (swb == NULL) {
-		return;
-	}
-
-	s = splvm();
-	for (i = 0; i < object->un_pager.swp.swp_nblocks; i++, swb++) {
-		for (j = 0; j < SWB_NPAGES; j++) {
-			if (swb->swb_block[j] != SWB_EMPTY) {
-				/*
-   				 * initially the length of the run is zero
-   				 */
-				if (block_count == 0) {
-					first_block = swb->swb_block[j];
-					block_count = btodb(PAGE_SIZE);
-					swb->swb_block[j] = SWB_EMPTY;
-				/*
-   				 * if the new block can be included into the current run
-   				 */
-				} else if (swb->swb_block[j] == first_block + block_count) {
-					block_count += btodb(PAGE_SIZE);
-					swb->swb_block[j] = SWB_EMPTY;
-				/*
-   				 * terminate the previous run, and start a new one
-   				 */
-				} else {
-					swap_pager_freeswapspace(object, first_block,
-   					(unsigned) first_block + block_count - 1);
-					first_block = swb->swb_block[j];
-					block_count = btodb(PAGE_SIZE);
-					swb->swb_block[j] = SWB_EMPTY;
-				}
-			}
-		}
-	}
-
-	if (block_count) {
-		swap_pager_freeswapspace(object, first_block,
-		   	 (unsigned) first_block + block_count - 1);
-	}
-	splx(s);
-}
-
-
-/*
- * swap_pager_reclaim frees up over-allocated space from all pagers
- * this eliminates internal fragmentation due to allocation of space
- * for segments that are never swapped to. It has been written so that
- * it does not block until the rlist_free operation occurs; it keeps
- * the queues consistant.
- */
-
-/*
- * Maximum number of blocks (pages) to reclaim per pass
- */
-#define MAXRECLAIM 128
-
-static void
-swap_pager_reclaim()
-{
-	vm_object_t object;
-	int i, j, k;
-	int s;
-	int reclaimcount;
-	static struct {
-		int address;
-		vm_object_t object;
-	} reclaims[MAXRECLAIM];
-	static int in_reclaim;
-
-	/*
-	 * allow only one process to be in the swap_pager_reclaim subroutine
-	 */
-	s = splvm();
-	if (in_reclaim) {
-		tsleep(&in_reclaim, PSWP, "swrclm", 0);
-		splx(s);
-		return;
-	}
-	in_reclaim = 1;
-	reclaimcount = 0;
-
-	/* for each pager queue */
-	for (k = 0; swp_qs[k]; k++) {
-
-		object = TAILQ_FIRST(swp_qs[k]);
-		while (object && (reclaimcount < MAXRECLAIM)) {
-
-			/*
-			 * see if any blocks associated with a pager has been
-			 * allocated but not used (written)
-			 */
-			if ((object->flags & OBJ_DEAD) == 0 &&
-				(object->paging_in_progress == 0)) {
-				for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) {
-					sw_blk_t swb = &object->un_pager.swp.swp_blocks[i];
-
-					if (swb->swb_locked)
-						continue;
-					for (j = 0; j < SWB_NPAGES; j++) {
-						if (swb->swb_block[j] != SWB_EMPTY &&
-						    (swb->swb_valid & (1 << j)) == 0) {
-							reclaims[reclaimcount].address = swb->swb_block[j];
-							reclaims[reclaimcount++].object = object;
-							swb->swb_block[j] = SWB_EMPTY;
-							if (reclaimcount >= MAXRECLAIM)
-								goto rfinished;
-						}
-					}
-				}
-			}
-			object = TAILQ_NEXT(object, pager_object_list);
-		}
-	}
-
-rfinished:
-
-	/*
-	 * free the blocks that have been added to the reclaim list
-	 */
-	for (i = 0; i < reclaimcount; i++) {
-		swap_pager_freeswapspace(reclaims[i].object,
-		    reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1);
-	}
-	splx(s);
-	in_reclaim = 0;
-	wakeup(&in_reclaim);
-}
-
-
-/*
- * swap_pager_copy copies blocks from one pager to another and
- * destroys the source pager
+ * SWAP_PAGER_COPY() -  copy blocks from source pager to destination pager
+ *			and destroy the source.
+ *
+ *	Copy any valid swapblks from the source to the destination.  In
+ *	cases where both the source and destination have a valid swapblk,
+ *	we keep the destination's.
+ *
+ *	This routine is allowed to block.  It may block allocating metadata
+ *	indirectly through swp_pager_meta_build() or if paging is still in
+ *	progress on the source. 
+ *
+ *	XXX vm_page_collapse() kinda expects us not to block because we 
+ *	supposedly do not need to allocate memory, but for the moment we
+ *	*may* have to get a little memory from the zone allocator, but
+ *	it is taken from the interrupt memory.  We should be ok. 
+ *
+ *	The source object contains no vm_page_t's (which is just as well)
+ *
+ *	The source object is of type OBJT_SWAP.
+ *
+ *	The source and destination objects must be 
+ *	locked or inaccessible (XXX are they ???)
  */
 
 void
-swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset,
-	offset, destroysource)
+swap_pager_copy(srcobject, dstobject, offset, destroysource)
 	vm_object_t srcobject;
-	vm_pindex_t srcoffset;
 	vm_object_t dstobject;
-	vm_pindex_t dstoffset;
 	vm_pindex_t offset;
 	int destroysource;
 {
 	vm_pindex_t i;
-	int origsize;
-	int s;
-
-	if (vm_swap_size)
-		no_swap_space = 0;
-
-	origsize = srcobject->un_pager.swp.swp_allocsize;
 
 	/*
-	 * remove the source object from the swap_pager internal queue
+	 * If destroysource is set, we remove the source object from the 
+	 * swap_pager internal queue now. 
 	 */
+
 	if (destroysource) {
 		if (srcobject->handle == NULL) {
-			TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list);
+			TAILQ_REMOVE(
+			    &swap_pager_un_object_list, 
+			    srcobject, 
+			    pager_object_list
+			);
 		} else {
-			TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list);
+			TAILQ_REMOVE(
+			    NOBJLIST(srcobject->handle),
+			    srcobject,
+			    pager_object_list
+			);
 		}
 	}
 
-	s = splvm();
-	while (srcobject->un_pager.swp.swp_poip) {
-		tsleep(srcobject, PVM, "spgout", 0);
-	}
-
 	/*
-	 * clean all of the pages that are currently active and finished
+	 * transfer source to destination.
 	 */
-	if (swap_pager_free_pending)
-		swap_pager_sync();
 
-	/*
-	 * transfer source to destination
-	 */
-	for (i = 0; i < dstobject->size; i += 1) {
-		int srcvalid, dstvalid;
-		daddr_t *srcaddrp = swap_pager_diskaddr(srcobject,
-				i + offset + srcoffset, &srcvalid);
-		daddr_t *dstaddrp;
+	for (i = 0; i < dstobject->size; ++i) {
+		daddr_t dstaddr;
 
 		/*
-		 * see if the source has space allocated
+		 * Locate (without changing) the swapblk on the destination,
+		 * unless it is invalid in which case free it silently, or
+		 * if the destination is a resident page, in which case the
+		 * source is thrown away.
 		 */
-		if (srcaddrp && *srcaddrp != SWB_EMPTY) {
+
+		dstaddr = swp_pager_meta_ctl(dstobject, i, 0);
+
+		if (dstaddr == SWAPBLK_NONE) {
 			/*
-			 * if the source is valid and the dest has no space,
-			 * then copy the allocation from the srouce to the
-			 * dest.
+			 * Destination has no swapblk and is not resident,
+			 * copy source.
 			 */
-			if (srcvalid) {
-				dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset,
-							&dstvalid);
-				/*
-				 * if the dest already has a valid block,
-				 * deallocate the source block without
-				 * copying.
-				 */
-				if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
-					swap_pager_freeswapspace(dstobject, *dstaddrp,
-						*dstaddrp + btodb(PAGE_SIZE) - 1);
-					*dstaddrp = SWB_EMPTY;
-				}
-				if (dstaddrp && *dstaddrp == SWB_EMPTY) {
-					*dstaddrp = *srcaddrp;
-					*srcaddrp = SWB_EMPTY;
-					dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE);
-					srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE);
-					swap_pager_setvalid(dstobject, i + dstoffset, 1);
-				}
-			}
+			daddr_t srcaddr;
+
+			srcaddr = swp_pager_meta_ctl(
+			    srcobject, 
+			    i + offset,
+			    SWM_POP
+			);
+
+			if (srcaddr != SWAPBLK_NONE)
+				swp_pager_meta_build(dstobject, i, srcaddr, 1);
+		} else {
 			/*
-			 * if the source is not empty at this point, then
-			 * deallocate the space.
+			 * Destination has valid swapblk or it is represented
+			 * by a resident page.  We destroy the sourceblock.
 			 */
-			if (*srcaddrp != SWB_EMPTY) {
-				swap_pager_freeswapspace(srcobject, *srcaddrp,
-					*srcaddrp + btodb(PAGE_SIZE) - 1);
-				*srcaddrp = SWB_EMPTY;
-			}
+			
+			swp_pager_meta_ctl(srcobject, i + offset, SWM_FREE);
 		}
 	}
-	splx(s);
 
 	/*
-	 * Free left over swap blocks
+	 * Free left over swap blocks in source.
+	 *
+	 * We have to revert the type to OBJT_DEFAULT so we do not accidently
+	 * double-remove the object from the swap queues.
 	 */
-	if (destroysource) {
-		swap_pager_free_swap(srcobject);
 
-		if (srcobject->un_pager.swp.swp_allocsize) {
-			printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n",
-			    srcobject->un_pager.swp.swp_allocsize, origsize);
-		}
-
-		free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA);
-		srcobject->un_pager.swp.swp_blocks = NULL;
+	if (destroysource) {
+		swp_pager_meta_free_all(srcobject);
+		/*
+		 * Reverting the type is not necessary, the caller is going
+		 * to destroy srcobject directly, but I'm doing it here
+		 * for consistancy since we've removed the object from its
+		 * queues.
+		 */
+		srcobject->type = OBJT_DEFAULT;
 	}
 	return;
 }
 
-static void
-swap_pager_dealloc(object)
+/*
+ * SWAP_PAGER_HASPAGE() -	determine if we have good backing store for
+ *				the requested page.
+ *
+ *	We determine whether good backing store exists for the requested
+ *	page and return TRUE if it does, FALSE if it doesn't.
+ *
+ *	If TRUE, we also try to determine how much valid, contiguous backing
+ *	store exists before and after the requested page within a reasonable
+ *	distance.  We do not try to restrict it to the swap device stripe
+ *	(that is handled in getpages/putpages).  It probably isn't worth
+ *	doing here.
+ */
+
+boolean_t
+swap_pager_haspage(object, pindex, before, after)
 	vm_object_t object;
+	vm_pindex_t pindex;
+	int *before;
+	int *after;
 {
-	int s;
-	sw_blk_t swb;
+	daddr_t blk0;
 
 	/*
-	 * Remove from list right away so lookups will fail if we block for
-	 * pageout completion.
+	 * do we have good backing store at the requested index ?
 	 */
-	if (object->handle == NULL) {
-		TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list);
-	} else {
-		TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list);
-	}
 
-	/*
-	 * Wait for all pageouts to finish and remove all entries from
-	 * cleaning list.
-	 */
+	blk0 = swp_pager_meta_ctl(object, pindex, 0);
 
-	s = splvm();
-	while (object->un_pager.swp.swp_poip) {
-		tsleep(object, PVM, "swpout", 0);
+	if (blk0 & SWAPBLK_NONE) {
+		if (before)
+			*before = 0;
+		if (after)
+			*after = 0;
+		return (FALSE);
 	}
-	splx(s);
-
-	if (swap_pager_free_pending)
-		swap_pager_sync();
 
 	/*
-	 * Free left over swap blocks
+	 * find backwards-looking contiguous good backing store
 	 */
-	swap_pager_free_swap(object);
 
-	if (object->un_pager.swp.swp_allocsize) {
-		printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n",
-		    object->un_pager.swp.swp_allocsize);
-	}
-	swb = object->un_pager.swp.swp_blocks;
-	if (swb) {
-		/*
-   		* Free swap management resources
-   		*/
-		free(swb, M_VMPGDATA);
-		object->un_pager.swp.swp_blocks = NULL;
-	}
-}
+	if (before != NULL) {
+		int i;
 
-static __inline int
-swap_pager_block_index(pindex)
-	vm_pindex_t pindex;
-{
-	return (pindex / SWB_NPAGES);
-}
-
-static __inline int
-swap_pager_block_offset(pindex)
-	vm_pindex_t pindex;
-{
-	return (pindex % SWB_NPAGES);
-}
+		for (i = 1; i < (SWB_NPAGES/2); ++i) {
+			daddr_t blk;
 
-/*
- * swap_pager_haspage returns TRUE if the pager has data that has
- * been written out.
- */
-static boolean_t
-swap_pager_haspage(object, pindex, before, after)
-	vm_object_t object;
-	vm_pindex_t pindex;
-	int *before;
-	int *after;
-{
-	register sw_blk_t swb;
-	int ix;
-
-	if (before != NULL)
-		*before = 0;
-	if (after != NULL)
-		*after = 0;
-	ix = pindex / SWB_NPAGES;
-	if (ix >= object->un_pager.swp.swp_nblocks) {
-		return (FALSE);
+			if (i > pindex)
+				break;
+			blk = swp_pager_meta_ctl(object, pindex - i, 0);
+			if (blk & SWAPBLK_NONE)
+				break;
+			if (blk != blk0 - i)
+				break;
+		}
+		*before = (i - 1);
 	}
-	swb = &object->un_pager.swp.swp_blocks[ix];
-	ix = pindex % SWB_NPAGES;
-
-	if (swb->swb_block[ix] != SWB_EMPTY) {
-
-		if (swb->swb_valid & (1 << ix)) {
-			int tix;
-			if (before) {
-				for(tix = ix - 1; tix >= 0; --tix) {
-					if ((swb->swb_valid & (1 << tix)) == 0)
-						break;
-					if ((swb->swb_block[tix] +
-						(ix - tix) * (PAGE_SIZE/DEV_BSIZE)) !=
-						swb->swb_block[ix])
-						break;
-					(*before)++;
-				}
-			}
 
-			if (after) {
-				for(tix = ix + 1; tix < SWB_NPAGES; tix++) {
-					if ((swb->swb_valid & (1 << tix)) == 0)
-						break;
-					if ((swb->swb_block[tix] -
-						(tix - ix) * (PAGE_SIZE/DEV_BSIZE)) !=
-						swb->swb_block[ix])
-						break;
-					(*after)++;
-				}
-			}
+	/*
+	 * find forward-looking contiguous good backing store
+	 */
 
-			return TRUE;
+	if (after != NULL) {
+		int i;
+
+		for (i = 1; i < (SWB_NPAGES/2); ++i) {
+			daddr_t blk;
+
+			blk = swp_pager_meta_ctl(object, pindex + i, 0);
+			if (blk & SWAPBLK_NONE)
+				break;
+			if (blk != blk0 + i)
+				break;
 		}
+		*after = (i - 1);
 	}
-	return (FALSE);
-}
 
-/*
- * Wakeup based upon spc state
- */
-static void
-spc_wakeup(void)
-{
-	if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) {
-		swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT;
-		wakeup(&swap_pager_needflags);
-	} else if ((swap_pager_needflags & SWAP_FREE_NEEDED) &&
-		swap_pager_free_count >= ((2 * npendingio) / 3)) {
-		swap_pager_needflags &= ~SWAP_FREE_NEEDED;
-		wakeup(&swap_pager_free);
-	}
+	return (TRUE);
 }
 
 /*
- * Free an spc structure
+ * SWAP_PAGER_PAGE_UNSWAPPED() - remove swap backing store related to page
+ *
+ *	This removes any associated swap backing store, whether valid or
+ *	not, from the page.  
+ *
+ *	This routine is typically called when a page is made dirty, at
+ *	which point any associated swap can be freed.  MADV_FREE also
+ *	calls us in a special-case situation
+ *
+ *	NOTE!!!  If the page is clean and the swap was valid, the caller
+ *	should make the page dirty before calling this routine.  This routine
+ *	does NOT change the m->dirty status of the page.  Also: MADV_FREE
+ *	depends on it.
+ *
+ *	This routine may not block
  */
-static void
-spc_free(spc)
-	swp_clean_t spc;
-{
-	spc->spc_flags = 0;
-	TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
-	swap_pager_free_count++;
-	if (swap_pager_needflags) {
-		spc_wakeup();
-	}
-}
 
-/*
- * swap_pager_ridpages is a convienience routine that deallocates all
- * but the required page.  this is usually used in error returns that
- * need to invalidate the "extra" readahead pages.
- */
 static void
-swap_pager_ridpages(m, count, reqpage)
-	vm_page_t *m;
-	int count;
-	int reqpage;
+swap_pager_unswapped(m)
+	vm_page_t m;
 {
-	int i;
-
-	for (i = 0; i < count; i++) {
-		if (i != reqpage) {
-			vm_page_free(m[i]);
-		}
-	}
+	swp_pager_meta_ctl(m->object, m->pindex, SWM_FREE);
 }
 
 /*
- * swap_pager_iodone1 is the completion routine for both reads and async writes
+ * SWAP_PAGER_GETPAGES() - bring pages in from swap
+ *
+ *	Attempt to retrieve (m, count) pages from backing store, but make
+ *	sure we retrieve at least m[reqpage].  We try to load in as large
+ *	a chunk surrounding m[reqpage] as is contiguous in swap and which
+ *	belongs to the same object.
+ *
+ *	The code is designed for asynchronous operation and 
+ *	immediate-notification of 'reqpage' but tends not to be
+ *	used that way.  Please do not optimize-out this algorithmic
+ *	feature, I intend to improve on it in the future.
+ *
+ *	The parent has a single vm_object_pip_add() reference prior to
+ *	calling us and we should return with the same.
+ *
+ *	The parent has BUSY'd the pages.  We should return with 'm'
+ *	left busy, but the others adjusted.
  */
-static void
-swap_pager_iodone1(bp)
-	struct buf *bp;
-{
-	bp->b_flags |= B_DONE;
-	bp->b_flags &= ~B_ASYNC;
-	wakeup(bp);
-}
 
 static int
 swap_pager_getpages(object, m, count, reqpage)
@@ -926,208 +745,235 @@ swap_pager_getpages(object, m, count, reqpage)
 	vm_page_t *m;
 	int count, reqpage;
 {
-	register struct buf *bp;
-	sw_blk_t swb[count];
-	register int s;
+	struct buf *bp;
+	vm_page_t mreq;
+	int s;
 	int i;
-	boolean_t rv;
-	vm_offset_t kva, off[count];
-	vm_pindex_t paging_offset;
-	int reqaddr[count];
-	int sequential;
-
-	int first, last;
-	int failed;
-	int reqdskregion;
-
-	object = m[reqpage]->object;
-	paging_offset = OFF_TO_IDX(object->paging_offset);
-	sequential = (m[reqpage]->pindex == (object->last_read + 1));
-
-	for (i = 0; i < count; i++) {
-		vm_pindex_t fidx = m[i]->pindex + paging_offset;
-		int ix = swap_pager_block_index(fidx);
-
-		if (ix >= object->un_pager.swp.swp_nblocks) {
-			int j;
-
-			if (i <= reqpage) {
-				swap_pager_ridpages(m, count, reqpage);
-				return (VM_PAGER_FAIL);
-			}
-			for (j = i; j < count; j++) {
-				vm_page_free(m[j]);
-			}
-			count = i;
+	int j;
+	daddr_t blk;
+	vm_offset_t kva;
+	vm_pindex_t lastpindex;
+
+	mreq = m[reqpage];
+
+#if !defined(MAX_PERF)
+	if (mreq->object != object) {
+		panic("swap_pager_getpages: object mismatch %p/%p", 
+		    object, 
+		    mreq->object
+		);
+	}
+#endif
+	/*
+	 * Calculate range to retrieve.  The pages have already been assigned
+	 * their swapblks.  We require a *contiguous* range that falls entirely
+	 * within a single device stripe.   If we do not supply it, bad things
+	 * happen.
+	 */
+
+
+	blk = swp_pager_meta_ctl(mreq->object, mreq->pindex, 0);
+
+	for (i = reqpage - 1; i >= 0; --i) {
+		daddr_t iblk;
+
+		iblk = swp_pager_meta_ctl(m[i]->object, m[i]->pindex, 0);
+		if (iblk & SWAPBLK_NONE)
+			break;
+
+		if ((blk ^ iblk) & dmmax_mask)
+			break;
+
+		if (blk != iblk + (reqpage - i))
 			break;
-		}
-		swb[i] = &object->un_pager.swp.swp_blocks[ix];
-		off[i] = swap_pager_block_offset(fidx);
-		reqaddr[i] = swb[i]->swb_block[off[i]];
 	}
+	++i;
 
-	/* make sure that our required input request is existant */
+	for (j = reqpage + 1; j < count; ++j) {
+		daddr_t jblk;
 
-	if (reqaddr[reqpage] == SWB_EMPTY ||
-	    (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
-		swap_pager_ridpages(m, count, reqpage);
-		return (VM_PAGER_FAIL);
+		jblk = swp_pager_meta_ctl(m[j]->object, m[j]->pindex, 0);
+		if (jblk & SWAPBLK_NONE)
+			break;
+
+		if ((blk ^ jblk) & dmmax_mask)
+			break;
+
+		if (blk != jblk - (j - reqpage))
+			break;
 	}
-	reqdskregion = reqaddr[reqpage] / dmmax;
 
 	/*
-	 * search backwards for the first contiguous page to transfer
+	 * If blk itself is bad, well, we can't do any I/O.  This should
+	 * already be covered as a side effect, but I'm making sure.
 	 */
-	failed = 0;
-	first = 0;
-	for (i = reqpage - 1; i >= 0; --i) {
-		if (sequential || failed || (reqaddr[i] == SWB_EMPTY) ||
-		    (swb[i]->swb_valid & (1 << off[i])) == 0 ||
-		    (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
-		    ((reqaddr[i] / dmmax) != reqdskregion)) {
-			failed = 1;
-			vm_page_free(m[i]);
-			if (first == 0)
-				first = i + 1;
-		}
+
+	if (blk & SWAPBLK_NONE) {
+		i = reqpage;
+		j = reqpage + 1;
 	}
+
 	/*
-	 * search forwards for the last contiguous page to transfer
+	 * free pages outside our collection range.   Note: we never free
+	 * mreq, it must remain busy throughout.
 	 */
-	failed = 0;
-	last = count;
-	for (i = reqpage + 1; i < count; i++) {
-		if (failed || (reqaddr[i] == SWB_EMPTY) ||
-		    (swb[i]->swb_valid & (1 << off[i])) == 0 ||
-		    (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
-		    ((reqaddr[i] / dmmax) != reqdskregion)) {
-			failed = 1;
-			vm_page_free(m[i]);
-			if (last == count)
-				last = i;
-		}
-	}
 
-	count = last;
-	if (first != 0) {
-		for (i = first; i < count; i++) {
-			m[i - first] = m[i];
-			reqaddr[i - first] = reqaddr[i];
-			off[i - first] = off[i];
+	{
+		int k;
+
+		for (k = 0; k < i; ++k) {
+			vm_page_free(m[k]);
+		}
+		for (k = j; k < count; ++k) {
+			vm_page_free(m[k]);
 		}
-		count -= first;
-		reqpage -= first;
 	}
-	++swb[reqpage]->swb_locked;
 
 	/*
-	 * at this point: "m" is a pointer to the array of vm_page_t for
-	 * paging I/O "count" is the number of vm_page_t entries represented
-	 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index
-	 * into "m" for the page actually faulted
+	 * Return VM_PAGER_FAIL if we have nothing
+	 * to do.  Return mreq still busy, but the
+	 * others unbusied.
 	 */
 
+	if (blk & SWAPBLK_NONE)
+		return(VM_PAGER_FAIL);
+
+
 	/*
 	 * Get a swap buffer header to perform the IO
 	 */
-	bp = getpbuf();
+
+	bp = getpbuf(&nsw_rcount);
 	kva = (vm_offset_t) bp->b_data;
 
 	/*
 	 * map our page(s) into kva for input
+	 *
+	 * NOTE: B_PAGING is set by pbgetvp()
 	 */
-	pmap_qenter(kva, m, count);
 
-	bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING;
-	bp->b_iodone = swap_pager_iodone1;
+	pmap_qenter(kva, m + i, j - i);
+
+	bp->b_flags = B_BUSY | B_READ | B_CALL;
+	bp->b_iodone = swp_pager_async_iodone;
 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
 	crhold(bp->b_rcred);
 	crhold(bp->b_wcred);
 	bp->b_data = (caddr_t) kva;
-	bp->b_blkno = reqaddr[0];
-	bp->b_bcount = PAGE_SIZE * count;
-	bp->b_bufsize = PAGE_SIZE * count;
+	/*
+	 * b_blkno is in page-sized chunks.  swapblk is valid, too, so
+	 * we don't have to mask it against SWAPBLK_MASK.
+	 */
+	bp->b_blkno = blk - (reqpage - i);
+	bp->b_bcount = PAGE_SIZE * (j - i);
+	bp->b_bufsize = PAGE_SIZE * (j - i);
+	bp->b_pager.pg_reqpage = reqpage - i;
+
+	{
+		int k;
+
+		for (k = i; k < j; ++k) {
+			bp->b_pages[k - i] = m[k];
+			vm_page_flag_set(m[k], PG_SWAPINPROG);
+		}
+	}
+	bp->b_npages = j - i;
 
 	pbgetvp(swapdev_vp, bp);
 
 	cnt.v_swapin++;
-	cnt.v_swappgsin += count;
+	cnt.v_swappgsin += bp->b_npages;
+
+	/*
+	 * We still hold the lock on mreq, and our automatic completion routine
+	 * does not remove it.
+	 */
+
+	vm_object_pip_add(mreq->object, bp->b_npages);
+	lastpindex = m[j-1]->pindex;
+
 	/*
-	 * perform the I/O
+	 * perform the I/O.  NOTE!!!  bp cannot be considered valid after
+	 * this point because we automatically release it on completion.
+	 * Instead, we look at the one page we are interested in which we
+	 * still hold a lock on even through the I/O completion.
+	 *
+	 * The other pages in our m[] array are also released on completion,
+	 * so we cannot assume they are valid anymore either.
+	 *
+	 * NOTE: b_blkno is destroyed by the call to VOP_STRATEGY
 	 */
+
 	VOP_STRATEGY(bp->b_vp, bp);
 
 	/*
-	 * wait for the sync I/O to complete
+	 * wait for the page we want to complete.  PG_SWAPINPROG is always
+	 * cleared on completion.  If an I/O error occurs, SWAPBLK_NONE
+	 * is set in the meta-data.
 	 */
+
 	s = splvm();
-	while ((bp->b_flags & B_DONE) == 0) {
-		if (tsleep(bp, PVM, "swread", hz*20)) {
+
+	while ((mreq->flags & PG_SWAPINPROG) != 0) {
+		vm_page_flag_set(mreq, PG_WANTED | PG_REFERENCED);
+		cnt.v_intrans++;
+		if (tsleep(mreq, PSWP, "swread", hz*20)) {
 			printf(
-"swap_pager: indefinite wait buffer: device: %#lx, blkno: %ld, size: %ld\n",
+			    "swap_pager: indefinite wait buffer: device:"
+				" %#lx, blkno: %ld, size: %ld\n",
 			    (u_long)bp->b_dev, (long)bp->b_blkno,
-			    (long)bp->b_bcount);
+			    (long)bp->b_bcount
+			);
 		}
 	}
 
-	if (bp->b_flags & B_ERROR) {
-		printf(
-"swap_pager: I/O error - pagein failed; blkno %ld, size %ld, error %d\n",
-		    (long)bp->b_blkno, (long)bp->b_bcount, bp->b_error);
-		rv = VM_PAGER_ERROR;
-	} else {
-		rv = VM_PAGER_OK;
-	}
-
 	splx(s);
-	swb[reqpage]->swb_locked--;
-
-	/*
-	 * remove the mapping for kernel virtual
-	 */
-	pmap_qremove(kva, count);
 
 	/*
-	 * release the physical I/O buffer
-	 */
-	relpbuf(bp);
-	/*
-	 * finish up input if everything is ok
+	 * mreq is left bussied after completion, but all the other pages
+	 * are freed.  If we had an unrecoverable read error the page will
+	 * not be valid.
 	 */
-	if (rv == VM_PAGER_OK) {
-		for (i = 0; i < count; i++) {
-			m[i]->dirty = 0;
-			vm_page_flag_clear(m[i], PG_ZERO);
-			if (i != reqpage) {
-				/*
-				 * whether or not to leave the page
-				 * activated is up in the air, but we
-				 * should put the page on a page queue
-				 * somewhere. (it already is in the
-				 * object). After some emperical
-				 * results, it is best to deactivate
-				 * the readahead pages.
-				 */
-				vm_page_deactivate(m[i]);
 
-				/*
-				 * just in case someone was asking for
-				 * this page we now tell them that it
-				 * is ok to use
-				 */
-				m[i]->valid = VM_PAGE_BITS_ALL;
-				vm_page_wakeup(m[i]);
-			}
-		}
-
-		m[reqpage]->object->last_read = m[count-1]->pindex;
+	if (mreq->valid != VM_PAGE_BITS_ALL) {
+		return(VM_PAGER_ERROR);
 	} else {
-		swap_pager_ridpages(m, count, reqpage);
+		mreq->object->last_read = lastpindex;
+		return(VM_PAGER_OK);
 	}
-	return (rv);
+
+	/*
+	 * A final note: in a low swap situation, we cannot deallocate swap
+	 * and mark a page dirty here because the caller is likely to mark
+	 * the page clean when we return, causing the page to possibly revert 
+	 * to all-zero's later.
+	 */
 }
 
+/*
+ *	swap_pager_putpages: 
+ *
+ *	Assign swap (if necessary) and initiate I/O on the specified pages.
+ *
+ *	We support both OBJT_DEFAULT and OBJT_SWAP objects.  DEFAULT objects
+ *	are automatically converted to SWAP objects.
+ *
+ *	In a low memory situation we may block in VOP_STRATEGY(), but the new 
+ *	vm_page reservation system coupled with properly written VFS devices 
+ *	should ensure that no low-memory deadlock occurs.  This is an area
+ *	which needs work.
+ *
+ *	The parent has N vm_object_pip_add() references prior to
+ *	calling us and will remove references for rtvals[] that are
+ *	not set to VM_PAGER_PEND.  We need to remove the rest on I/O
+ *	completion.
+ *
+ *	The parent has soft-busy'd the pages it passes us and will unbusy
+ *	those whos rtvals[] entry is not set to VM_PAGER_PEND on return.
+ *	We need to unbusy the rest on I/O completion.
+ */
+
 int
 swap_pager_putpages(object, m, count, sync, rtvals)
 	vm_object_t object;
@@ -1136,534 +982,749 @@ swap_pager_putpages(object, m, count, sync, rtvals)
 	boolean_t sync;
 	int *rtvals;
 {
-	register struct buf *bp;
-	sw_blk_t swb[count];
-	register int s;
-	int i, j, ix, firstidx, lastidx;
-	boolean_t rv;
-	vm_offset_t kva, off, fidx;
-	swp_clean_t spc;
-	vm_pindex_t paging_pindex;
-	int reqaddr[count];
-	int failed;
-
-	if (vm_swap_size)
-		no_swap_space = 0;
-
-	if (no_swap_space) {
-		for (i = 0; i < count; i++)
-			rtvals[i] = VM_PAGER_FAIL;
-		return VM_PAGER_FAIL;
+	int i;
+	int n = 0;
+	int grv = VM_PAGER_OK;
+
+#if !defined(MAX_PERF)
+	if (count && m[0]->object != object) {
+		panic("swap_pager_getpages: object mismatch %p/%p", 
+		    object, 
+		    m[0]->object
+		);
+	}
+#endif
+	/*
+	 * Step 1
+	 *
+	 * Turn object into OBJT_SWAP
+	 * check for bogus sysops
+	 * force sync if not pageout process
+	 */
+
+	if (object->type != OBJT_SWAP) {
+		swp_pager_meta_build(object, 0, SWAPBLK_NONE, 0);
 	}
 
 	if (curproc != pageproc)
 		sync = TRUE;
 
-	object = m[0]->object;
-	paging_pindex = OFF_TO_IDX(object->paging_offset);
-
-	failed = 0;
-	for (j = 0; j < count; j++) {
-		fidx = m[j]->pindex + paging_pindex;
-		ix = swap_pager_block_index(fidx);
-		swb[j] = 0;
-		if (ix >= object->un_pager.swp.swp_nblocks) {
-			rtvals[j] = VM_PAGER_FAIL;
-			failed = 1;
-			continue;
-		} else {
-			rtvals[j] = VM_PAGER_OK;
-		}
-		swb[j] = &object->un_pager.swp.swp_blocks[ix];
-		swb[j]->swb_locked++;
-		if (failed) {
-			rtvals[j] = VM_PAGER_FAIL;
-			continue;
-		}
-		off = swap_pager_block_offset(fidx);
-		reqaddr[j] = swb[j]->swb_block[off];
-		if (reqaddr[j] == SWB_EMPTY) {
-			daddr_t blk;
-			int tries;
-			int ntoget;
+	/*
+	 * Step 2
+	 *
+	 * Assign swap blocks and issue I/O.  We reallocate swap on the fly.
+	 * The page is left dirty until the pageout operation completes
+	 * successfully.
+	 */
 
-			tries = 0;
-			s = splvm();
+	for (i = 0; i < count; i += n) {
+		int s;
+		int j;
+		struct buf *bp;
+		daddr_t blk;
 
-			/*
-			 * if any other pages have been allocated in this
-			 * block, we only try to get one page.
-			 */
-			for (i = 0; i < SWB_NPAGES; i++) {
-				if (swb[j]->swb_block[i] != SWB_EMPTY)
-					break;
-			}
+		/*
+		 * Maximum I/O size is limited by a number of factors.
+		 */
 
-			ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
-			/*
-			 * this code is alittle conservative, but works (the
-			 * intent of this code is to allocate small chunks for
-			 * small objects)
-			 */
-			if ((off == 0) && ((fidx + ntoget) > object->size)) {
-				ntoget = object->size - fidx;
-			}
-	retrygetspace:
-			if (!swap_pager_full && ntoget > 1 &&
-			    swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE),
-				&blk)) {
-
-				for (i = 0; i < ntoget; i++) {
-					swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
-					swb[j]->swb_valid = 0;
-				}
+		n = min(BLIST_MAX_ALLOC, count - i);
+		n = min(n, max_pageout_cluster);
 
-				reqaddr[j] = swb[j]->swb_block[off];
-			} else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE),
-				&swb[j]->swb_block[off])) {
-				/*
-				 * if the allocation has failed, we try to
-				 * reclaim space and retry.
-				 */
-				if (++tries == 1) {
-					swap_pager_reclaim();
-					goto retrygetspace;
-				}
-				rtvals[j] = VM_PAGER_AGAIN;
-				failed = 1;
-				swap_pager_full = 1;
-			} else {
-				reqaddr[j] = swb[j]->swb_block[off];
-				swb[j]->swb_valid &= ~(1 << off);
+		/*
+		 * Get biggest block of swap we can.  If we fail, fall
+		 * back and try to allocate a smaller block.  Don't go
+		 * overboard trying to allocate space if it would overly
+		 * fragment swap.
+		 */
+		while (
+		    (blk = swp_pager_getswapspace(n)) == SWAPBLK_NONE &&
+		    n > 4
+		) {
+			n >>= 1;
+		}
+		if (blk == SWAPBLK_NONE) {
+			for (j = 0; j < n; ++j) {
+				rtvals[i+j] = VM_PAGER_FAIL;
 			}
-			splx(s);
+			grv = VM_PAGER_FAIL;
+			continue;
 		}
-	}
 
-	/*
-	 * search forwards for the last contiguous page to transfer
-	 */
-	failed = 0;
-	for (i = 0; i < count; i++) {
-		if (failed ||
-			(reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) ||
-		    ((reqaddr[i] / dmmax) != (reqaddr[0] / dmmax)) ||
-		    (rtvals[i] != VM_PAGER_OK)) {
-			failed = 1;
-			if (rtvals[i] == VM_PAGER_OK)
-				rtvals[i] = VM_PAGER_AGAIN;
+		/*
+		 * Oops, too big if it crosses a stripe
+		 *
+		 * 1111000000
+		 *     111111
+		 *    1000001
+		 */
+		if ((blk ^ (blk + n)) & dmmax_mask) {
+			j = ((blk + dmmax) & dmmax_mask) - blk;
+			swp_pager_freeswapspace(blk + j, n - j);
+			n = j;
 		}
-	}
 
-	ix = 0;
-	firstidx = -1;
-	for (i = 0; i < count; i++) {
-		if (rtvals[i] == VM_PAGER_OK) {
-			ix++;
-			if (firstidx == -1) {
-				firstidx = i;
-			}
-		} else if (firstidx >= 0) {
-			break;
-		}
-	}
+		/*
+		 * All I/O parameters have been satisfied, build the I/O
+		 * request and assign the swap space.
+		 *
+		 * NOTE: B_PAGING is set by pbgetvp()
+		 */
 
-	if (firstidx == -1) {
-		for (i = 0; i < count; i++) {
-			if (rtvals[i] == VM_PAGER_OK)
-				rtvals[i] = VM_PAGER_AGAIN;
-		}
-		return VM_PAGER_AGAIN;
-	}
+		bp = getpbuf(&nsw_wcount);
+		bp->b_spc = NULL;	/* not used, but NULL-out anyway */
 
-	lastidx = firstidx + ix;
+		pmap_qenter((vm_offset_t)bp->b_data, &m[i], n);
 
-	if (ix > max_pageout_cluster) {
-		for (i = firstidx + max_pageout_cluster; i < lastidx; i++) {
-			if (rtvals[i] == VM_PAGER_OK)
-				rtvals[i] = VM_PAGER_AGAIN;
-		}
-		ix = max_pageout_cluster;
-		lastidx = firstidx + ix;
-	}
+		bp->b_flags = B_BUSY | B_ASYNC;
+		bp->b_proc = &proc0; /* XXX (but without B_PHYS this is ok) */
+		bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
 
-	for (i = 0; i < firstidx; i++) {
-		if (swb[i])
-			swb[i]->swb_locked--;
-	}
+		if (bp->b_rcred != NOCRED)
+			crhold(bp->b_rcred);
+		if (bp->b_wcred != NOCRED)
+			crhold(bp->b_wcred);
+		pbgetvp(swapdev_vp, bp);
 
-	for (i = lastidx; i < count; i++) {
-		if (swb[i])
-			swb[i]->swb_locked--;
-	}
+		bp->b_bcount = PAGE_SIZE * n;
+		bp->b_bufsize = PAGE_SIZE * n;
+		bp->b_blkno = blk;
 
-#ifdef INVARIANTS
-	for (i = firstidx; i < lastidx; i++) {
-		if (reqaddr[i] == SWB_EMPTY) {
-			printf("I/O to empty block???? -- pindex: %d, i: %d\n",
-				m[i]->pindex, i);
-		}
-	}
-#endif
+		s = splvm();
 
-	/*
-	 * Clean up all completed async pageouts.
-	 */
-	if (swap_pager_free_pending)
-		swap_pager_sync();
+		for (j = 0; j < n; ++j) {
+			vm_page_t mreq = m[i+j];
 
-	/*
-	 * get a swap pager clean data structure, block until we get it
-	 */
-	if (curproc == pageproc) {
-		if (swap_pager_free_count == 0) {
-			s = splvm();
-			while (swap_pager_free_count == 0) {
-				swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT;
-			/*
-			 * if it does not get one within a short time, then
-			 * there is a potential deadlock, so we go-on trying
-			 * to free pages.  It is important to block here as opposed
-			 * to returning, thereby allowing the pageout daemon to continue.
-			 * It is likely that pageout daemon will start suboptimally
-			 * reclaiming vnode backed pages if we don't block.  Since the
-			 * I/O subsystem is probably already fully utilized, might as
-			 * well wait.
-			 */
-				if (tsleep(&swap_pager_needflags, PVM-1, "swpfre", hz/2)) {
-					if (swap_pager_free_pending)
-						swap_pager_sync();
-					if (swap_pager_free_count == 0) {
-						for (i = firstidx; i < lastidx; i++) {
-							rtvals[i] = VM_PAGER_AGAIN;
-						}
-						splx(s);
-						return VM_PAGER_AGAIN;
-					}
-				} else {
-					swap_pager_sync();
-				}
-			}
-			splx(s);
+			swp_pager_meta_build(
+			    mreq->object, 
+			    mreq->pindex,
+			    blk + j,
+			    0
+			);
+			mreq->dirty = VM_PAGE_BITS_ALL;
+			rtvals[i+j] = VM_PAGER_OK;
+
+			vm_page_flag_set(mreq, PG_SWAPINPROG);
+			bp->b_pages[j] = mreq;
 		}
+		bp->b_flags |= B_CALL;
+		bp->b_npages = n;
 
-		spc = TAILQ_FIRST(&swap_pager_free);
-		KASSERT(spc != NULL,
-		    ("swap_pager_putpages: free queue is empty, %d expected\n",
-		    swap_pager_free_count));
-		TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
-		swap_pager_free_count--;
-
-		kva = spc->spc_kva;
-		bp = spc->spc_bp;
-		bzero(bp, sizeof *bp);
-		bp->b_spc = spc;
-		bp->b_xflags = 0;
-		bp->b_data = (caddr_t) kva;
-	} else {
-		spc = NULL;
-		bp = getpbuf();
-		kva = (vm_offset_t) bp->b_data;
-		bp->b_spc = NULL;
-	}
+		cnt.v_swapout++;
+		cnt.v_swappgsout += bp->b_npages;
+		swapdev_vp->v_numoutput++;
 
-	/*
-	 * map our page(s) into kva for I/O
-	 */
-	pmap_qenter(kva, &m[firstidx], ix);
+		/*
+		 * asynchronous
+		 *
+		 * NOTE: b_blkno is destroyed by the call to VOP_STRATEGY
+		 */
+
+		if (sync == FALSE) {
+			bp->b_iodone = swp_pager_async_iodone;
+			bp->b_dirtyoff = 0;
+			bp->b_dirtyend = bp->b_bcount;
+			VOP_STRATEGY(bp->b_vp, bp);
+
+			for (j = 0; j < n; ++j)
+				rtvals[i+j] = VM_PAGER_PEND;
+
+			splx(s);
+			grv = VM_PAGER_PEND;
+			continue;
+		}
 
-	/*
-	 * get the base I/O offset into the swap file
-	 */
-	for (i = firstidx; i < lastidx ; i++) {
-		fidx = m[i]->pindex + paging_pindex;
-		off = swap_pager_block_offset(fidx);
 		/*
-		 * set the valid bit
+		 * synchronous
+		 *
+		 * NOTE: b_blkno is destroyed by the call to VOP_STRATEGY
 		 */
-		swb[i]->swb_valid |= (1 << off);
+
+		bp->b_iodone = swp_pager_sync_iodone;
+		VOP_STRATEGY(bp->b_vp, bp);
+
 		/*
-		 * and unlock the data structure
+		 * Wait for the sync I/O to complete, then update rtvals.
+		 * We just set the rtvals[] to VM_PAGER_PEND so we can call
+		 * our async completion routine at the end, thus avoiding a
+		 * double-free.
 		 */
-		swb[i]->swb_locked--;
-	}
+		while ((bp->b_flags & B_DONE) == 0) {
+			tsleep(bp, PVM, "swwrt", 0);
+		}
 
-	bp->b_flags = B_BUSY | B_PAGING;
-	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
-	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
-	if (bp->b_rcred != NOCRED)
-		crhold(bp->b_rcred);
-	if (bp->b_wcred != NOCRED)
-		crhold(bp->b_wcred);
-	bp->b_blkno = reqaddr[firstidx];
-	pbgetvp(swapdev_vp, bp);
+		if (bp->b_flags & B_ERROR) {
+			grv = VM_PAGER_ERROR;
+		}
 
-	bp->b_bcount = PAGE_SIZE * ix;
-	bp->b_bufsize = PAGE_SIZE * ix;
+		for (j = 0; j < n; ++j)
+			rtvals[i+j] = VM_PAGER_PEND;
 
-	s = splvm();
-	swapdev_vp->v_numoutput++;
+		if (bp->b_flags & B_ERROR) {
+			grv = VM_PAGER_ERROR;
+		}
 
-	/*
-	 * If this is an async write we set up additional buffer fields and
-  	 * place a "cleaning" entry on the inuse queue.
-  	 */
- 	object->un_pager.swp.swp_poip++;
- 
- 	if (spc) {
-  		spc->spc_flags = 0;
-  		spc->spc_object = object;
- 		bp->b_npages = ix;
- 		for (i = firstidx; i < lastidx; i++) {
-  			spc->spc_m[i] = m[i];
- 			bp->b_pages[i - firstidx] = m[i];
- 			vm_page_protect(m[i], VM_PROT_READ);
- 			pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
- 			m[i]->dirty = 0;
- 		}
-  		spc->spc_first = firstidx;
-  		spc->spc_count = ix;
 		/*
-		 * the completion routine for async writes
+		 * Now that we are through with the bp, we can call the
+		 * normal async completion, which frees everything up.
 		 */
-		bp->b_flags |= B_CALL;
-		bp->b_iodone = swap_pager_iodone;
-		bp->b_dirtyoff = 0;
-		bp->b_dirtyend = bp->b_bcount;
-		TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
-	} else {
-		bp->b_flags |= B_CALL;
-		bp->b_iodone = swap_pager_iodone1;
-		bp->b_npages = ix;
-		for (i = firstidx; i < lastidx; i++)
-			bp->b_pages[i - firstidx] = m[i];
-	}
 
-	cnt.v_swapout++;
-	cnt.v_swappgsout += ix;
+		swp_pager_async_iodone(bp);
 
-	/*
-	 * perform the I/O
-	 */
-	VOP_STRATEGY(bp->b_vp, bp);
-	if (sync == FALSE) {
-		if (swap_pager_free_pending) {
-			swap_pager_sync();
-		}
-		for (i = firstidx; i < lastidx; i++) {
-			rtvals[i] = VM_PAGER_PEND;
-		}
 		splx(s);
-		return VM_PAGER_PEND;
 	}
 
+	return(grv);
+}
+
+/*
+ *	swap_pager_sync_iodone:
+ *
+ *	Completion routine for synchronous reads and writes from/to swap.
+ *	We just mark the bp is complete and wake up anyone waiting on it.
+ *
+ *	This routine may not block.
+ */
+
+static void
+swp_pager_sync_iodone(bp)
+	struct buf *bp;
+{
+	bp->b_flags |= B_DONE;
+	bp->b_flags &= ~B_ASYNC;
+	wakeup(bp);
+}
+
+/*
+ *	swp_pager_async_iodone:
+ *
+ *	Completion routine for asynchronous reads and writes from/to swap.
+ *	Also called manually by synchronous code to finish up a bp.
+ *
+ *	WARNING!  This routine may be called from an interrupt.  We cannot
+ *	mess with swap metadata unless we want to run all our other routines
+ *	at splbio() too, which I'd rather not do.  We up ourselves
+ * 	to splvm() because we may call vm_page_free(), which can unlink a
+ *	page from an object.
+ *
+ *	XXX currently I do not believe any object routines protect 
+ *	object->memq at splvm().  The code must be gone over to determine
+ *	the actual state of the problem.
+ *
+ *	For READ operations, the pages are PG_BUSY'd.  For WRITE operations, 
+ *	the pages are vm_page_t->busy'd.  For READ operations, we PG_BUSY 
+ *	unbusy all pages except the 'main' request page.  For WRITE 
+ *	operations, we vm_page_t->busy'd unbusy all pages ( we can do this 
+ *	because we marked them all VM_PAGER_PEND on return from putpages ).
+ *
+ *	This routine may not block.
+ *	This routine is called at splbio()
+ */
+
+static void
+swp_pager_async_iodone(bp)
+	register struct buf *bp;
+{
+	int s;
+	int i;
+	vm_object_t object = NULL;
+
+	s = splvm();
+
+	bp->b_flags |= B_DONE;
+
 	/*
-	 * wait for the sync I/O to complete
+	 * report error
 	 */
-	while ((bp->b_flags & B_DONE) == 0) {
-		tsleep(bp, PVM, "swwrt", 0);
-	}
 
 	if (bp->b_flags & B_ERROR) {
 		printf(
-"swap_pager: I/O error - pageout failed; blkno %ld, size %ld, error %d\n",
-		    (long)bp->b_blkno, (long)bp->b_bcount, bp->b_error);
-		rv = VM_PAGER_ERROR;
-	} else {
-		rv = VM_PAGER_OK;
+		    "swap_pager: I/O error - %s failed; blkno %ld,"
+			"size %ld, error %d\n",
+		    ((bp->b_flags & B_READ) ? "pagein" : "pageout"),
+		    (long)bp->b_blkno, 
+		    (long)bp->b_bcount,
+		    bp->b_error
+		);
 	}
 
-	object->un_pager.swp.swp_poip--;
-	if (object->un_pager.swp.swp_poip == 0)
-		wakeup(object);
-
-	if (bp->b_vp)
-		pbrelvp(bp);
+	/*
+	 * set object.
+	 */
 
-	splx(s);
+	if (bp->b_npages)
+		object = bp->b_pages[0]->object;
 
 	/*
 	 * remove the mapping for kernel virtual
 	 */
-	pmap_qremove(kva, ix);
+
+	pmap_qremove((vm_offset_t)bp->b_data, bp->b_npages);
 
 	/*
-	 * if we have written the page, then indicate that the page is clean.
+	 * cleanup pages.  If an error occurs writing to swap, we are in
+	 * very serious trouble.  If it happens to be a disk error, though,
+	 * we may be able to recover by reassigning the swap later on.  So
+	 * in this case we remove the m->swapblk assignment for the page 
+	 * but do not free it in the rlist.  The errornous block(s) are thus
+	 * never reallocated as swap.  Redirty the page and continue.
 	 */
-	if (rv == VM_PAGER_OK) {
-		for (i = firstidx; i < lastidx; i++) {
-			if (rtvals[i] == VM_PAGER_OK) {
-				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
-				m[i]->dirty = 0;
+
+	for (i = 0; i < bp->b_npages; ++i) {
+		vm_page_t m = bp->b_pages[i];
+
+		vm_page_flag_clear(m, PG_SWAPINPROG);
+
+		if (bp->b_flags & B_ERROR) {
+			/*
+			 * If an error occurs I'd love to throw the swapblk
+			 * away without freeing it back to swapspace, so it
+			 * can never be used again.  But I can't from an 
+			 * interrupt.
+			 */
+
+			if (bp->b_flags & B_READ) {
 				/*
-				 * optimization, if a page has been read
-				 * during the pageout process, we activate it.
+				 * When reading, reqpage needs to stay
+				 * locked for the parent, but all other
+				 * pages can be freed.  We still want to
+				 * wakeup the parent waiting on the page,
+				 * though.  ( also: pg_reqpage can be -1 and 
+				 * not match anything ).
+				 *
+				 * We have to wake specifically requested pages
+				 * up too because we cleared PG_SWAPINPROG and
+				 * someone may be waiting for that.
+				 *
+				 * NOTE: for reads, m->dirty will probably
+				 * be overriden by the original caller of
+				 * getpages so don't play cute tricks here.
+				 *
+				 * XXX it may not be legal to free the page
+				 * here as this messes with the object->memq's.
 				 */
-				if (((m[i]->flags & (PG_WANTED|PG_REFERENCED)) ||
-				    pmap_ts_referenced(VM_PAGE_TO_PHYS(m[i])))) {
-					vm_page_activate(m[i]);
-				}
+
+				m->valid = 0;
+				vm_page_flag_clear(m, PG_ZERO);
+
+				if (i != bp->b_pager.pg_reqpage)
+					vm_page_free(m);
+				else
+					vm_page_flash(m);
+				/*
+				 * If i == bp->b_pager.pg_reqpage, do not wake 
+				 * the page up.  The caller needs to.
+				 */
+			} else {
+				/*
+				 * If a write error occurs, reactivate page
+				 * so it doesn't clog the inactive list,
+				 * then finish the I/O.
+				 */
+				m->dirty = VM_PAGE_BITS_ALL;
+				vm_page_activate(m);
+				vm_page_io_finish(m);
 			}
-		}
-	} else {
-		for (i = firstidx; i < lastidx; i++) {
-			rtvals[i] = rv;
+		} else if (bp->b_flags & B_READ) {
+			/*
+			 * For read success, clear dirty bits.  Nobody should
+			 * have this page mapped but don't take any chances,
+			 * make sure the pmap modify bits are also cleared.
+			 *
+			 * NOTE: for reads, m->dirty will probably be 
+			 * overriden by the original caller of getpages so
+			 * we cannot set them in order to free the underlying
+			 * swap in a low-swap situation.  I don't think we'd
+			 * want to do that anyway, but it was an optimization
+			 * that existed in the old swapper for a time before
+			 * it got ripped out due to precisely this problem.
+			 *
+			 * clear PG_ZERO in page.
+			 *
+			 * If not the requested page then deactivate it.
+			 *
+			 * Note that the requested page, reqpage, is left
+			 * busied, but we still have to wake it up.  The
+			 * other pages are released (unbusied) by 
+			 * vm_page_wakeup().  We do not set reqpage's
+			 * valid bits here, it is up to the caller.
+			 */
+
+			pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+			m->valid = VM_PAGE_BITS_ALL;
+			m->dirty = 0;
+			vm_page_flag_clear(m, PG_ZERO);
+
+			/*
+			 * We have to wake specifically requested pages
+			 * up too because we cleared PG_SWAPINPROG and
+			 * could be waiting for it in getpages.  However,
+			 * be sure to not unbusy getpages specifically
+			 * requested page - getpages expects it to be 
+			 * left busy.
+			 */
+			if (i != bp->b_pager.pg_reqpage) {
+				vm_page_deactivate(m);
+				vm_page_wakeup(m);
+			} else {
+				vm_page_flash(m);
+			}
+		} else {
+			/*
+			 * For write success, clear the modify and dirty 
+			 * status, then finish the I/O ( which decrements the 
+			 * busy count and possibly wakes waiter's up ).
+			 */
+			vm_page_protect(m, VM_PROT_READ);
+			pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+			m->dirty = 0;
+			vm_page_io_finish(m);
 		}
 	}
 
-	if (spc != NULL) {
-		if (bp->b_rcred != NOCRED)
-			crfree(bp->b_rcred);
-		if (bp->b_wcred != NOCRED)
-			crfree(bp->b_wcred);
-		spc_free(spc);
-	} else
-		relpbuf(bp);
-	if (swap_pager_free_pending)
-		swap_pager_sync();
-
-	return (rv);
+	/*
+	 * adjust pip.  NOTE: the original parent may still have its own
+	 * pip refs on the object.
+	 */
+
+	if (object)
+		vm_object_pip_wakeupn(object, bp->b_npages);
+
+	/*
+	 * release the physical I/O buffer
+	 */
+
+	relpbuf(bp, ((bp->b_flags & B_READ) ? &nsw_rcount : &nsw_wcount));
+
+	splx(s);
 }
 
-void
-swap_pager_sync()
+/************************************************************************
+ *				SWAP META DATA 				*
+ ************************************************************************
+ *
+ *	These routines manipulate the swap metadata stored in the 
+ *	OBJT_SWAP object.
+ *
+ *	In fact, we just have a few counters in the vm_object_t.  The
+ *	metadata is actually stored in a hash table.
+ */
+
+/*
+ * SWP_PAGER_HASH() -	hash swap meta data
+ *
+ *	This is an inline helper function which hash the swapblk given
+ *	the object and page index.  It returns a pointer to a pointer
+ *	to the object, or a pointer to a NULL pointer if it could not
+ *	find a swapblk.
+ */
+
+static __inline struct swblock **
+swp_pager_hash(vm_object_t object, daddr_t index)
 {
-	swp_clean_t spc;
+	struct swblock **pswap;
+	struct swblock *swap;
+
+	index &= ~SWAP_META_MASK;
+	pswap = &swhash[(index ^ (int)(long)object) & swhash_mask];
 
-	while (spc = TAILQ_FIRST(&swap_pager_done)) {
-		swap_pager_finish(spc);
+	while ((swap = *pswap) != NULL) {
+		if (swap->swb_object == object &&
+		    swap->swb_index == index
+		) {
+			break;
+		}
+		pswap = &swap->swb_hnext;
 	}
-	return;
+	return(pswap);
 }
 
+/*
+ * SWP_PAGER_META_BUILD() -	add swap block to swap meta data for object
+ *
+ *	We first convert the object to a swap object if it is a default
+ *	object.
+ *
+ *	The specified swapblk is added to the object's swap metadata.  If
+ *	the swapblk is not valid, it is freed instead.  Any previously
+ *	assigned swapblk is freed.
+ */
+
 static void
-swap_pager_finish(spc)
-	register swp_clean_t spc;
-{
-	int i, s, lastidx;
-	vm_object_t object;
-	vm_page_t *ma;
+swp_pager_meta_build(
+	vm_object_t object, 
+	daddr_t index, 
+	daddr_t swapblk, 
+	int waitok
+) {
+	struct swblock *swap;
+	struct swblock **pswap;
 
-	ma = spc->spc_m;
-	object = spc->spc_object;
-	lastidx = spc->spc_first + spc->spc_count;
+	/*
+	 * Convert default object to swap object if necessary
+	 */
 
-	s = splvm();
-	TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
-	splx(s);
+	if (object->type != OBJT_SWAP) {
+		object->type = OBJT_SWAP;
+		object->un_pager.swp.swp_bcount = 0;
+
+		if (object->handle != NULL) {
+			TAILQ_INSERT_TAIL(
+			    NOBJLIST(object->handle),
+			    object, 
+			    pager_object_list
+			);
+		} else {
+			TAILQ_INSERT_TAIL(
+			    &swap_pager_un_object_list,
+			    object, 
+			    pager_object_list
+			);
+		}
+	}
+	
+	/*
+	 * Wait for free memory when waitok is TRUE prior to calling the
+	 * zone allocator.
+	 */
 
-	pmap_qremove(spc->spc_kva, spc->spc_count);
+	while (waitok && cnt.v_free_count == 0) {
+		VM_WAIT;
+	}
 
 	/*
-	 * If no error, mark as clean and inform the pmap system. If error,
-	 * mark as dirty so we will try again. (XXX could get stuck doing
-	 * this, should give up after awhile)
+	 * If swapblk being added is invalid, just free it.
 	 */
-	if (spc->spc_flags & SPC_ERROR) {
 
-		for (i = spc->spc_first; i < lastidx; i++) {
-			printf("swap_pager_finish: I/O error, clean of page %lx failed\n",
-			    (u_long) VM_PAGE_TO_PHYS(ma[i]));
-			ma[i]->dirty = VM_PAGE_BITS_ALL;
-			vm_page_io_finish(ma[i]);
+	if (swapblk & SWAPBLK_NONE) {
+		if (swapblk != SWAPBLK_NONE) {
+			swp_pager_freeswapspace(
+			    index,
+			    1
+			);
+			swapblk = SWAPBLK_NONE;
 		}
+	}
 
-		vm_object_pip_subtract(object, spc->spc_count);
-		if ((object->paging_in_progress == 0) &&
-			(object->flags & OBJ_PIPWNT)) {
-			vm_object_clear_flag(object, OBJ_PIPWNT);
-			wakeup(object);
-		}
+	/*
+	 * Locate hash entry.  If not found create, but if we aren't adding
+	 * anything just return.
+	 */
 
-	} else {
-		for (i = spc->spc_first; i < lastidx; i++) {
-			if ((ma[i]->queue != PQ_ACTIVE) &&
-			   ((ma[i]->flags & PG_WANTED) ||
-				 pmap_ts_referenced(VM_PAGE_TO_PHYS(ma[i])))) {
-				vm_page_activate(ma[i]);
-			}
-		}
+	pswap = swp_pager_hash(object, index);
+
+	if ((swap = *pswap) == NULL) {
+		int i;
+
+		if (swapblk == SWAPBLK_NONE)
+			return;
+
+		swap = *pswap = zalloc(swap_zone);
+
+		swap->swb_hnext = NULL;
+		swap->swb_object = object;
+		swap->swb_index = index & ~SWAP_META_MASK;
+		swap->swb_count = 0;
+
+		++object->un_pager.swp.swp_bcount;
+
+		for (i = 0; i < SWAP_META_PAGES; ++i)
+			swap->swb_pages[i] = SWAPBLK_NONE;
 	}
 
-	nswiodone -= spc->spc_count;
-	swap_pager_free_pending--;
-	spc_free(spc);
+	/*
+	 * Delete prior contents of metadata
+	 */
 
-	return;
+	index &= SWAP_META_MASK;
+
+	if (swap->swb_pages[index] != SWAPBLK_NONE) {
+		swp_pager_freeswapspace(
+		    swap->swb_pages[index] & SWAPBLK_MASK,
+		    1
+		);
+		--swap->swb_count;
+	}
+
+	/*
+	 * Enter block into metadata
+	 */
+
+	swap->swb_pages[index] = swapblk;
+	++swap->swb_count;
 }
 
 /*
- * swap_pager_iodone
+ * SWP_PAGER_META_FREE() - free a range of blocks in the object's swap metadata
+ *
+ *	The requested range of blocks is freed, with any associated swap 
+ *	returned to the swap bitmap.
+ *
+ *	This routine will free swap metadata structures as they are cleaned 
+ *	out.  This routine does *NOT* operate on swap metadata associated
+ *	with resident pages.
+ *
+ *	This routine must be called at splvm()
  */
+
 static void
-swap_pager_iodone(bp)
-	register struct buf *bp;
+swp_pager_meta_free(vm_object_t object, daddr_t index, daddr_t count)
 {
-	int i, s, lastidx;
-	register swp_clean_t spc;
-	vm_object_t object;
-	vm_page_t *ma;
+	if (object->type != OBJT_SWAP)
+		return;
 
+	while (count > 0) {
+		struct swblock **pswap;
+		struct swblock *swap;
 
-	s = splvm();
-	spc = (swp_clean_t) bp->b_spc;
-	TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
-	TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
+		pswap = swp_pager_hash(object, index);
 
-	object = spc->spc_object;
+		if ((swap = *pswap) != NULL) {
+			daddr_t v = swap->swb_pages[index & SWAP_META_MASK];
 
-#if defined(DIAGNOSTIC)
-	if (object->paging_in_progress < spc->spc_count)
-		printf("swap_pager_iodone: paging_in_progress(%d) < spc_count(%d)\n",
-			object->paging_in_progress, spc->spc_count);
-#endif
-
-	if (bp->b_flags & B_ERROR) {
-		spc->spc_flags |= SPC_ERROR;
-		printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n",
-		    (bp->b_flags & B_READ) ? "pagein" : "pageout",
-		    (u_long) bp->b_blkno, bp->b_bcount, bp->b_error);
-	} else {
-		vm_object_pip_subtract(object, spc->spc_count);
-		if ((object->paging_in_progress == 0) &&
-			(object->flags & OBJ_PIPWNT)) {
-			vm_object_clear_flag(object, OBJ_PIPWNT);
-			wakeup(object);
-		}
-		ma = spc->spc_m;
-		lastidx = spc->spc_first + spc->spc_count;
-		for (i = spc->spc_first; i < lastidx; i++) {
-			/*
-			 * we wakeup any processes that are waiting on these pages.
-			 */
-			vm_page_io_finish(ma[i]);
+			if (v != SWAPBLK_NONE) {
+				swp_pager_freeswapspace(v, 1);
+				swap->swb_pages[index & SWAP_META_MASK] =
+					SWAPBLK_NONE;
+				if (--swap->swb_count == 0) {
+					*pswap = swap->swb_hnext;
+					zfree(swap_zone, swap);
+					--object->un_pager.swp.swp_bcount;
+				}
+			}
+			--count;
+			++index;
+		} else {
+			daddr_t n = SWAP_META_PAGES - (index & SWAP_META_MASK);
+			count -= n;
+			index += n;
 		}
 	}
+}
+
+/*
+ * SWP_PAGER_META_FREE_ALL() - destroy all swap metadata associated with object
+ *
+ *	This routine locates and destroys all swap metadata associated with
+ *	an object.
+ */
+
+static void
+swp_pager_meta_free_all(vm_object_t object)
+{
+	daddr_t index = 0;
 
-	if (bp->b_vp)
-		pbrelvp(bp);
+	if (object->type != OBJT_SWAP)
+		return;
 
-	if (bp->b_rcred != NOCRED)
-		crfree(bp->b_rcred);
-	if (bp->b_wcred != NOCRED)
-		crfree(bp->b_wcred);
+	while (object->un_pager.swp.swp_bcount) {
+		struct swblock **pswap;
+		struct swblock *swap;
 
-	nswiodone += spc->spc_count;
-	swap_pager_free_pending++;
-	if (--spc->spc_object->un_pager.swp.swp_poip == 0) {
-		wakeup(spc->spc_object);
-	}
+		pswap = swp_pager_hash(object, index);
+		if ((swap = *pswap) != NULL) {
+			int i;
 
-	if (swap_pager_needflags &&
-	  ((swap_pager_free_count + swap_pager_free_pending) > (npendingio / 2))) {
-		spc_wakeup();
+			for (i = 0; i < SWAP_META_PAGES; ++i) {
+				daddr_t v = swap->swb_pages[i];
+				if (v != SWAPBLK_NONE) {
+#if !defined(MAX_PERF)
+					--swap->swb_count;
+#endif
+					swp_pager_freeswapspace(
+					    v,
+					    1
+					);
+				}
+			}
+#if !defined(MAX_PERF)
+			if (swap->swb_count != 0)
+				panic("swap_pager_meta_free_all: swb_count != 0");
+#endif
+			*pswap = swap->swb_hnext;
+			zfree(swap_zone, swap);
+			--object->un_pager.swp.swp_bcount;
+		}
+		index += SWAP_META_PAGES;
+#if !defined(MAX_PERF)
+		if (index > 0x20000000)
+			panic("swp_pager_meta_free_all: failed to locate all swap meta blocks");
+#endif
 	}
+}
 
-	if ((TAILQ_FIRST(&swap_pager_inuse) == NULL) &&
-		vm_pageout_pages_needed) {
-		wakeup(&vm_pageout_pages_needed);
-		vm_pageout_pages_needed = 0;
+/*
+ * SWP_PAGER_METACTL() -  misc control of swap and vm_page_t meta data.
+ *
+ *	This routine is capable of looking up, popping, or freeing
+ *	swapblk assignments in the swap meta data or in the vm_page_t.
+ *	The routine typically returns the swapblk being looked-up, or popped,
+ *	or SWAPBLK_NONE if the block was freed, or SWAPBLK_NONE if the block
+ *	was invalid.  This routine will automatically free any invalid 
+ *	meta-data swapblks.
+ *
+ *	It is not possible to store invalid swapblks in the swap meta data
+ *	(other then a literal 'SWAPBLK_NONE'), so we don't bother checking.
+ *
+ *	When acting on a busy resident page and paging is in progress, we 
+ *	have to wait until paging is complete but otherwise can act on the 
+ *	busy page.
+ *
+ *	SWM_FREE	remove and free swap block from metadata
+ *
+ *	SWM_POP		remove from meta data but do not free.. pop it out
+ */
+
+static daddr_t
+swp_pager_meta_ctl(
+	vm_object_t object,
+	vm_pindex_t index,
+	int flags
+) {
+	/*
+	 * The meta data only exists of the object is OBJT_SWAP 
+	 * and even then might not be allocated yet.
+	 */
+
+	if (
+	    object->type != OBJT_SWAP ||
+	    object->un_pager.swp.swp_bcount == 0
+	) {
+		return(SWAPBLK_NONE);
 	}
 
-	splx(s);
+	{
+		struct swblock **pswap;
+		struct swblock *swap;
+		daddr_t r1 = SWAPBLK_NONE;
+
+		pswap = swp_pager_hash(object, index);
+
+		index &= SWAP_META_MASK;
+
+		if ((swap = *pswap) != NULL) {
+			r1 = swap->swb_pages[index];
+
+			if (r1 != SWAPBLK_NONE) {
+				if (flags & SWM_FREE) {
+					swp_pager_freeswapspace(
+					    r1,
+					    1
+					);
+					r1 = SWAPBLK_NONE;
+				}
+				if (flags & (SWM_FREE|SWM_POP)) {
+					swap->swb_pages[index] = SWAPBLK_NONE;
+					if (--swap->swb_count == 0) {
+						*pswap = swap->swb_hnext;
+						zfree(swap_zone, swap);
+						--object->un_pager.swp.swp_bcount;
+					}
+				} 
+	 		}
+		}
+
+		return(r1);
+	}
+	/* not reached */
 }
+