aboutsummaryrefslogtreecommitdiff
path: root/sys/vm/swap_pager.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/vm/swap_pager.c')
-rw-r--r--sys/vm/swap_pager.c1522
1 files changed, 1022 insertions, 500 deletions
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 0f4f088feb74..bf3f38f9ce8d 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -35,24 +35,20 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
- * from: @(#)swap_pager.c 7.4 (Berkeley) 5/7/91
- * $Id: swap_pager.c,v 1.2 1993/10/16 16:20:19 rgrimes Exp $
+ * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
+ * from: @(#)swap_pager.c 7.4 (Berkeley) 5/7/91
+ *
+ * $Id: swap_pager.c,v 1.17.2.1 1994/03/07 02:07:06 rgrimes Exp $
*/
/*
- * Quick hack to page to dedicated partition(s).
- * TODO:
- * Add multiprocessor locks
- * Deal with async writes in a better fashion
+ * Mostly rewritten by John Dyson with help from David Greenman, 12-Jan-1994
*/
-#include "swappager.h"
-#if NSWAPPAGER > 0
-
#include "param.h"
#include "proc.h"
#include "buf.h"
+#include "kernel.h"
#include "systm.h"
#include "specdev.h"
#include "vnode.h"
@@ -63,30 +59,27 @@
#include "vm_param.h"
#include "queue.h"
#include "lock.h"
+#include "vm.h"
#include "vm_prot.h"
#include "vm_object.h"
#include "vm_page.h"
#include "vm_pageout.h"
#include "swap_pager.h"
+#include "vm_map.h"
-#define NSWSIZES 16 /* size of swtab */
-#define NPENDINGIO 64 /* max # of pending cleans */
-#define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */
-
-#ifdef DEBUG
-int swpagerdebug = 0 /*0x100*/;
-#define SDB_FOLLOW 0x001
-#define SDB_INIT 0x002
-#define SDB_ALLOC 0x004
-#define SDB_IO 0x008
-#define SDB_WRITE 0x010
-#define SDB_FAIL 0x020
-#define SDB_ALLOCBLK 0x040
-#define SDB_FULL 0x080
-#define SDB_ANOM 0x100
-#define SDB_ANOMPANIC 0x200
+#ifndef NPENDINGIO
+#define NPENDINGIO 96
#endif
+extern int nswbuf;
+int nswiodone;
+extern int vm_pageout_rate_limit;
+static int cleandone;
+extern int hz;
+int swap_pager_full;
+extern vm_map_t pager_map;
+extern int vm_pageout_pages_needed;
+
struct swpagerclean {
queue_head_t spc_list;
int spc_flags;
@@ -94,94 +87,77 @@ struct swpagerclean {
sw_pager_t spc_swp;
vm_offset_t spc_kva;
vm_page_t spc_m;
-} swcleanlist[NPENDINGIO];
+} swcleanlist [NPENDINGIO] ;
+
typedef struct swpagerclean *swp_clean_t;
+extern vm_map_t kernel_map;
+
/* spc_flags values */
-#define SPC_FREE 0x00
-#define SPC_BUSY 0x01
-#define SPC_DONE 0x02
-#define SPC_ERROR 0x04
-#define SPC_DIRTY 0x08
-
-struct swtab {
- vm_size_t st_osize; /* size of object (bytes) */
- int st_bsize; /* vs. size of swap block (DEV_BSIZE units) */
-#ifdef DEBUG
- u_long st_inuse; /* number in this range in use */
- u_long st_usecnt; /* total used of this size */
-#endif
-} swtab[NSWSIZES+1];
+#define SPC_ERROR 0x01
-#ifdef DEBUG
-int swap_pager_pendingio; /* max pending async "clean" ops */
-int swap_pager_poip; /* pageouts in progress */
-int swap_pager_piip; /* pageins in progress */
-#endif
+#define SWB_EMPTY (-1)
+queue_head_t swap_pager_done; /* list of compileted page cleans */
queue_head_t swap_pager_inuse; /* list of pending page cleans */
queue_head_t swap_pager_free; /* list of free pager clean structs */
queue_head_t swap_pager_list; /* list of "named" anon regions */
+queue_head_t swap_pager_un_list; /* list of "unnamed" anon pagers */
+#define SWAP_FREE_NEEDED 0x1 /* need a swap block */
+int swap_pager_needflags;
+
+static queue_head_t *swp_qs[]={
+ &swap_pager_list, &swap_pager_un_list, (queue_head_t *) 0
+};
+
+struct pagerops swappagerops = {
+ swap_pager_init,
+ swap_pager_alloc,
+ swap_pager_dealloc,
+ swap_pager_getpage,
+ swap_pager_getmulti,
+ swap_pager_putpage,
+ swap_pager_haspage
+};
+
+extern int nswbuf;
+
+int npendingio = NPENDINGIO;
+int pendingiowait;
+int require_swap_init;
+void swap_pager_finish();
+int dmmin, dmmax;
+extern int vm_page_count;
+
+struct buf * getpbuf() ;
+void relpbuf(struct buf *bp) ;
void
swap_pager_init()
{
- register swp_clean_t spc;
- register int i, bsize;
+ register int i;
extern int dmmin, dmmax;
- int maxbsize;
-#ifdef DEBUG
- if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
- printf("swpg_init()\n");
-#endif
dfltpagerops = &swappagerops;
queue_init(&swap_pager_list);
+ queue_init(&swap_pager_un_list);
/*
* Initialize clean lists
*/
queue_init(&swap_pager_inuse);
+ queue_init(&swap_pager_done);
queue_init(&swap_pager_free);
- for (i = 0, spc = swcleanlist; i < NPENDINGIO; i++, spc++) {
- queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
- spc->spc_flags = SPC_FREE;
- }
+
+ require_swap_init = 1;
/*
* Calculate the swap allocation constants.
*/
- if (dmmin == 0) {
- dmmin = DMMIN;
- if (dmmin < CLBYTES/DEV_BSIZE)
- dmmin = CLBYTES/DEV_BSIZE;
- }
- if (dmmax == 0)
- dmmax = DMMAX;
- /*
- * Fill in our table of object size vs. allocation size
- */
- bsize = btodb(PAGE_SIZE);
- if (bsize < dmmin)
- bsize = dmmin;
- maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
- if (maxbsize > dmmax)
- maxbsize = dmmax;
- for (i = 0; i < NSWSIZES; i++) {
- swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
- swtab[i].st_bsize = bsize;
-#ifdef DEBUG
- if (swpagerdebug & SDB_INIT)
- printf("swpg_init: ix %d, size %x, bsize %x\n",
- i, swtab[i].st_osize, swtab[i].st_bsize);
-#endif
- if (bsize >= maxbsize)
- break;
- bsize *= 2;
- }
- swtab[i].st_osize = 0;
- swtab[i].st_bsize = bsize;
+ dmmin = CLBYTES/DEV_BSIZE;
+ dmmax = btodb(SWB_NPAGES*NBPG)*2;
+
}
/*
@@ -190,20 +166,41 @@ swap_pager_init()
* we should not wait for memory as it could resulting in deadlock.
*/
vm_pager_t
-swap_pager_alloc(handle, size, prot)
+swap_pager_alloc(handle, size, prot, offset)
caddr_t handle;
register vm_size_t size;
vm_prot_t prot;
+ vm_offset_t offset;
{
register vm_pager_t pager;
register sw_pager_t swp;
- struct swtab *swt;
int waitok;
-
-#ifdef DEBUG
- if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
- printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot);
-#endif
+ int i,j;
+
+ if (require_swap_init) {
+ register swp_clean_t spc;
+ struct buf *bp;
+ /*
+ * kva's are allocated here so that we dont need to keep
+ * doing kmem_alloc pageables at runtime
+ */
+ for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) {
+ spc->spc_kva = kmem_alloc_pageable(pager_map, NBPG);
+ if (!spc->spc_kva) {
+ break;
+ }
+ spc->spc_bp = malloc(sizeof( *bp), M_TEMP,
+ M_NOWAIT);
+ if (!spc->spc_bp) {
+ kmem_free_wakeup(pager_map, spc->spc_kva, NBPG);
+ break;
+ }
+ spc->spc_flags = 0;
+ queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
+ }
+ require_swap_init = 0;
+ }
+
/*
* If this is a "named" anonymous region, look it up and
* return the appropriate pager if it exists.
@@ -221,50 +218,42 @@ swap_pager_alloc(handle, size, prot)
return(pager);
}
}
+
+ if (swap_pager_full)
+ return(NULL);
+
/*
* Pager doesn't exist, allocate swap management resources
* and initialize.
*/
- waitok = handle ? M_WAITOK : M_NOWAIT;
+ waitok = handle ? M_WAITOK : M_NOWAIT;
pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
if (pager == NULL)
return(NULL);
swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
if (swp == NULL) {
-#ifdef DEBUG
- if (swpagerdebug & SDB_FAIL)
- printf("swpg_alloc: swpager malloc failed\n");
-#endif
free((caddr_t)pager, M_VMPAGER);
return(NULL);
}
size = round_page(size);
- for (swt = swtab; swt->st_osize; swt++)
- if (size <= swt->st_osize)
- break;
-#ifdef DEBUG
- swt->st_inuse++;
- swt->st_usecnt++;
-#endif
swp->sw_osize = size;
- swp->sw_bsize = swt->st_bsize;
- swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
+ swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * NBPG) - 1) / btodb(SWB_NPAGES*NBPG);
swp->sw_blocks = (sw_blk_t)
malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
- M_VMPGDATA, M_NOWAIT);
+ M_VMPGDATA, waitok);
if (swp->sw_blocks == NULL) {
free((caddr_t)swp, M_VMPGDATA);
free((caddr_t)pager, M_VMPAGER);
-#ifdef DEBUG
- if (swpagerdebug & SDB_FAIL)
- printf("swpg_alloc: sw_blocks malloc failed\n");
- swt->st_inuse--;
- swt->st_usecnt--;
-#endif
- return(FALSE);
+ return(NULL);
}
bzero((caddr_t)swp->sw_blocks,
swp->sw_nblocks * sizeof(*swp->sw_blocks));
+
+ for (i = 0; i < swp->sw_nblocks; i++) {
+ for (j = 0; j < SWB_NPAGES; j++)
+ swp->sw_blocks[i].swb_block[j] = SWB_EMPTY;
+ }
+
swp->sw_poip = 0;
if (handle) {
vm_object_t object;
@@ -282,211 +271,644 @@ swap_pager_alloc(handle, size, prot)
} else {
swp->sw_flags = 0;
queue_init(&pager->pg_list);
+ queue_enter(&swap_pager_un_list, pager, vm_pager_t, pg_list);
}
pager->pg_handle = handle;
pager->pg_ops = &swappagerops;
pager->pg_type = PG_SWAP;
pager->pg_data = (caddr_t)swp;
-#ifdef DEBUG
- if (swpagerdebug & SDB_ALLOC)
- printf("swpg_alloc: pg_data %x, %x of %x at %x\n",
- swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
-#endif
return(pager);
}
+/*
+ * returns disk block associated with pager and offset
+ * additionally, as a side effect returns a flag indicating
+ * if the block has been written
+ */
+
+static int *
+swap_pager_diskaddr(swp, offset, valid)
+ sw_pager_t swp;
+ vm_offset_t offset;
+ int *valid;
+{
+ register sw_blk_t swb;
+ int ix;
+
+ if (valid)
+ *valid = 0;
+ ix = offset / (SWB_NPAGES*NBPG);
+ if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
+ return(FALSE);
+ }
+ swb = &swp->sw_blocks[ix];
+ ix = (offset % (SWB_NPAGES*NBPG)) / NBPG;
+ if (valid)
+ *valid = swb->swb_valid & (1<<ix);
+ return &swb->swb_block[ix];
+}
+
+/*
+ * Utility routine to set the valid (written) bit for
+ * a block associated with a pager and offset
+ */
+static void
+swap_pager_setvalid(swp, offset, valid)
+ sw_pager_t swp;
+ vm_offset_t offset;
+ int valid;
+{
+ register sw_blk_t swb;
+ int ix;
+
+ ix = offset / (SWB_NPAGES*NBPG);
+ if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks)
+ return;
+
+ swb = &swp->sw_blocks[ix];
+ ix = (offset % (SWB_NPAGES*NBPG)) / NBPG;
+ if (valid)
+ swb->swb_valid |= (1 << ix);
+ else
+ swb->swb_valid &= ~(1 << ix);
+ return;
+}
+
+/*
+ * this routine frees swap blocks from a specified pager
+ */
+void
+swap_pager_freespace(pager, start, size)
+ vm_pager_t pager;
+ vm_offset_t start;
+ vm_offset_t size;
+{
+ sw_pager_t swp = (sw_pager_t) pager->pg_data;
+ vm_offset_t i;
+ int s;
+
+ s = splbio();
+ for (i = start; i < round_page(start + size - 1); i += NBPG) {
+ int *addr = swap_pager_diskaddr(swp, i, 0);
+ if (addr && *addr != SWB_EMPTY) {
+ rlist_free(&swapmap, *addr, *addr + btodb(NBPG) - 1);
+ *addr = SWB_EMPTY;
+ swap_pager_full = 0;
+ }
+ }
+ splx(s);
+}
+
+/*
+ * swap_pager_reclaim frees up over-allocated space from all pagers
+ * this eliminates internal fragmentation due to allocation of space
+ * for segments that are never swapped to. It has been written so that
+ * it does not block until the rlist_free operation occurs; it keeps
+ * the queues consistant.
+ */
+
+/*
+ * Maximum number of blocks (pages) to reclaim per pass
+ */
+#define MAXRECLAIM 256
+
+void
+swap_pager_reclaim()
+{
+ vm_pager_t p;
+ sw_pager_t swp;
+ int i, j, k;
+ int s;
+ int reclaimcount;
+ static int reclaims[MAXRECLAIM];
+ static int in_reclaim;
+
+/*
+ * allow only one process to be in the swap_pager_reclaim subroutine
+ */
+ s = splbio();
+ if (in_reclaim) {
+ tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0);
+ splx(s);
+ return;
+ }
+ in_reclaim = 1;
+ reclaimcount = 0;
+
+ /* for each pager queue */
+ for (k = 0; swp_qs[k]; k++) {
+
+ p = (vm_pager_t) queue_first(swp_qs[k]);
+ while (reclaimcount < MAXRECLAIM &&
+ !queue_end(swp_qs[k], (queue_entry_t) p)) {
+
+ /*
+ * see if any blocks associated with a pager has been
+ * allocated but not used (written)
+ */
+ swp = (sw_pager_t) p->pg_data;
+ for (i = 0; i < swp->sw_nblocks; i++) {
+ sw_blk_t swb = &swp->sw_blocks[i];
+ for (j = 0; j < SWB_NPAGES; j++) {
+ if (swb->swb_block[j] != SWB_EMPTY &&
+ (swb->swb_valid & (1 << j)) == 0) {
+ reclaims[reclaimcount++] = swb->swb_block[j];
+ swb->swb_block[j] = SWB_EMPTY;
+ if (reclaimcount >= MAXRECLAIM)
+ goto rfinished;
+ }
+ }
+ }
+ p = (vm_pager_t) queue_next(&p->pg_list);
+ }
+ }
+
+rfinished:
+
+/*
+ * free the blocks that have been added to the reclaim list
+ */
+ for (i = 0; i < reclaimcount; i++) {
+ rlist_free(&swapmap, reclaims[i], reclaims[i] + btodb(NBPG) - 1);
+ wakeup((caddr_t) &in_reclaim);
+ swap_pager_full = 0;
+ }
+
+ splx(s);
+ in_reclaim = 0;
+ wakeup((caddr_t) &in_reclaim);
+}
+
+
+/*
+ * swap_pager_copy copies blocks from one pager to another and
+ * destroys the source pager
+ */
+
+void
+swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset)
+ vm_pager_t srcpager;
+ vm_offset_t srcoffset;
+ vm_pager_t dstpager;
+ vm_offset_t dstoffset;
+ vm_offset_t offset;
+{
+ sw_pager_t srcswp, dstswp;
+ vm_offset_t i;
+ int s;
+
+ srcswp = (sw_pager_t) srcpager->pg_data;
+ dstswp = (sw_pager_t) dstpager->pg_data;
+
+/*
+ * remove the source pager from the swap_pager internal queue
+ */
+ s = splbio();
+ if (srcswp->sw_flags & SW_NAMED) {
+ queue_remove(&swap_pager_list, srcpager, vm_pager_t, pg_list);
+ srcswp->sw_flags &= ~SW_NAMED;
+ } else {
+ queue_remove(&swap_pager_un_list, srcpager, vm_pager_t, pg_list);
+ }
+
+ while (srcswp->sw_poip) {
+ tsleep((caddr_t)srcswp, PVM, "spgout", 0);
+ }
+ splx(s);
+
+/*
+ * clean all of the pages that are currently active and finished
+ */
+ (void) swap_pager_clean(NULL, B_WRITE);
+
+ s = splbio();
+/*
+ * clear source block before destination object
+ * (release allocated space)
+ */
+ for (i = 0; i < offset + srcoffset; i += NBPG) {
+ int *addr = swap_pager_diskaddr(srcswp, i, 0);
+ if (addr && *addr != SWB_EMPTY) {
+ rlist_free(&swapmap, *addr, *addr + btodb(NBPG) - 1);
+ *addr = SWB_EMPTY;
+ swap_pager_full = 0;
+ }
+ }
+/*
+ * transfer source to destination
+ */
+ for (i = 0; i < dstswp->sw_osize; i += NBPG) {
+ int srcvalid, dstvalid;
+ int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset,
+ &srcvalid);
+ int *dstaddrp;
+ /*
+ * see if the source has space allocated
+ */
+ if (srcaddrp && *srcaddrp != SWB_EMPTY) {
+ /*
+ * if the source is valid and the dest has no space, then
+ * copy the allocation from the srouce to the dest.
+ */
+ if (srcvalid) {
+ dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid);
+ /*
+ * if the dest already has a valid block, deallocate the
+ * source block without copying.
+ */
+ if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
+ rlist_free(&swapmap, *dstaddrp, *dstaddrp + btodb(NBPG) - 1);
+ *dstaddrp = SWB_EMPTY;
+ swap_pager_full = 0;
+ }
+ if (dstaddrp && *dstaddrp == SWB_EMPTY) {
+ *dstaddrp = *srcaddrp;
+ *srcaddrp = SWB_EMPTY;
+ swap_pager_setvalid(dstswp, i + dstoffset, 1);
+ }
+ }
+ /*
+ * if the source is not empty at this point, then deallocate the space.
+ */
+ if (*srcaddrp != SWB_EMPTY) {
+ rlist_free(&swapmap, *srcaddrp, *srcaddrp + btodb(NBPG) - 1);
+ *srcaddrp = SWB_EMPTY;
+ swap_pager_full = 0;
+ }
+ }
+ }
+
+/*
+ * deallocate the rest of the source object
+ */
+ for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += NBPG) {
+ int *srcaddrp = swap_pager_diskaddr(srcswp, i, 0);
+ if (srcaddrp && *srcaddrp != SWB_EMPTY) {
+ rlist_free(&swapmap, *srcaddrp, *srcaddrp + btodb(NBPG) - 1);
+ *srcaddrp = SWB_EMPTY;
+ swap_pager_full = 0;
+ }
+ }
+
+ splx(s);
+
+ free((caddr_t)srcswp->sw_blocks, M_VMPGDATA);
+ srcswp->sw_blocks = 0;
+ free((caddr_t)srcswp, M_VMPGDATA);
+ srcpager->pg_data = 0;
+ free((caddr_t)srcpager, M_VMPAGER);
+
+ return;
+}
+
+
void
swap_pager_dealloc(pager)
vm_pager_t pager;
{
- register int i;
+ register int i,j;
register sw_blk_t bp;
register sw_pager_t swp;
- struct swtab *swt;
int s;
-#ifdef DEBUG
- /* save panic time state */
- if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
- return;
- if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
- printf("swpg_dealloc(%x)\n", pager);
-#endif
/*
* Remove from list right away so lookups will fail if we
* block for pageout completion.
*/
+ s = splbio();
swp = (sw_pager_t) pager->pg_data;
if (swp->sw_flags & SW_NAMED) {
queue_remove(&swap_pager_list, pager, vm_pager_t, pg_list);
swp->sw_flags &= ~SW_NAMED;
+ } else {
+ queue_remove(&swap_pager_un_list, pager, vm_pager_t, pg_list);
}
-#ifdef DEBUG
- for (swt = swtab; swt->st_osize; swt++)
- if (swp->sw_osize <= swt->st_osize)
- break;
- swt->st_inuse--;
-#endif
-
/*
* Wait for all pageouts to finish and remove
* all entries from cleaning list.
*/
- s = splbio();
+
while (swp->sw_poip) {
- swp->sw_flags |= SW_WANTED;
- assert_wait((int)swp);
- thread_block();
+ tsleep((caddr_t)swp, PVM, "swpout", 0);
}
splx(s);
+
+
(void) swap_pager_clean(NULL, B_WRITE);
/*
* Free left over swap blocks
*/
s = splbio();
- for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++)
- if (bp->swb_block) {
-#ifdef DEBUG
- if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL))
- printf("swpg_dealloc: blk %x\n",
- bp->swb_block);
-#endif
- rlist_free(&swapmap, (unsigned)bp->swb_block,
- (unsigned)bp->swb_block + swp->sw_bsize - 1);
+ for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) {
+ for (j = 0; j < SWB_NPAGES; j++)
+ if (bp->swb_block[j] != SWB_EMPTY) {
+ rlist_free(&swapmap, (unsigned)bp->swb_block[j],
+ (unsigned)bp->swb_block[j] + btodb(NBPG) - 1);
+ bp->swb_block[j] = SWB_EMPTY;
+ swap_pager_full = 0;
}
+ }
splx(s);
+
/*
* Free swap management resources
*/
free((caddr_t)swp->sw_blocks, M_VMPGDATA);
+ swp->sw_blocks = 0;
free((caddr_t)swp, M_VMPGDATA);
+ pager->pg_data = 0;
free((caddr_t)pager, M_VMPAGER);
}
+/*
+ * swap_pager_getmulti can get multiple pages.
+ */
+int
+swap_pager_getmulti(pager, m, count, reqpage, sync)
+ vm_pager_t pager;
+ vm_page_t *m;
+ int count;
+ int reqpage;
+ boolean_t sync;
+{
+ return swap_pager_io((sw_pager_t) pager->pg_data, m, count, reqpage, B_READ);
+}
+
+/*
+ * swap_pager_getpage gets individual pages
+ */
+int
swap_pager_getpage(pager, m, sync)
vm_pager_t pager;
vm_page_t m;
boolean_t sync;
{
-#ifdef DEBUG
- if (swpagerdebug & SDB_FOLLOW)
- printf("swpg_getpage(%x, %x, %d)\n", pager, m, sync);
-#endif
- return(swap_pager_io((sw_pager_t)pager->pg_data, m, B_READ));
+ vm_page_t marray[1];
+
+ marray[0] = m;
+ return swap_pager_io((sw_pager_t)pager->pg_data, marray, 1, 0, B_READ);
}
+/*
+ * swap_pager_putpage writes individual pages
+ */
+int
swap_pager_putpage(pager, m, sync)
vm_pager_t pager;
vm_page_t m;
boolean_t sync;
{
int flags;
+ vm_page_t marray[1];
+
-#ifdef DEBUG
- if (swpagerdebug & SDB_FOLLOW)
- printf("swpg_putpage(%x, %x, %d)\n", pager, m, sync);
-#endif
if (pager == NULL) {
(void) swap_pager_clean(NULL, B_WRITE);
- return;
+ return VM_PAGER_OK;
}
+
+ marray[0] = m;
flags = B_WRITE;
if (!sync)
flags |= B_ASYNC;
- return(swap_pager_io((sw_pager_t)pager->pg_data, m, flags));
+ return(swap_pager_io((sw_pager_t)pager->pg_data, marray, 1, 0, flags));
}
-boolean_t
-swap_pager_haspage(pager, offset)
- vm_pager_t pager;
+static inline int
+swap_pager_block_index(swp, offset)
+ sw_pager_t swp;
+ vm_offset_t offset;
+{
+ return (offset / (SWB_NPAGES*NBPG));
+}
+
+static inline int
+swap_pager_block_offset(swp, offset)
+ sw_pager_t swp;
+ vm_offset_t offset;
+{
+ return (offset % (SWB_NPAGES*NBPG));
+}
+
+/*
+ * _swap_pager_haspage returns TRUE if the pager has data that has
+ * been written out.
+ */
+static boolean_t
+_swap_pager_haspage(swp, offset)
+ sw_pager_t swp;
vm_offset_t offset;
{
- register sw_pager_t swp;
register sw_blk_t swb;
int ix;
-#ifdef DEBUG
- if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
- printf("swpg_haspage(%x, %x) ", pager, offset);
-#endif
- swp = (sw_pager_t) pager->pg_data;
- ix = offset / dbtob(swp->sw_bsize);
+ ix = offset / (SWB_NPAGES*NBPG);
if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
-#ifdef DEBUG
- if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK))
- printf("swpg_haspage: %x bad offset %x, ix %x\n",
- swp->sw_blocks, offset, ix);
-#endif
return(FALSE);
}
swb = &swp->sw_blocks[ix];
- if (swb->swb_block)
- ix = atop(offset % dbtob(swp->sw_bsize));
-#ifdef DEBUG
- if (swpagerdebug & SDB_ALLOCBLK)
- printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix);
- if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
- printf("-> %c\n",
- "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
-#endif
- if (swb->swb_block && (swb->swb_mask & (1 << ix)))
- return(TRUE);
+ ix = (offset % (SWB_NPAGES*NBPG)) / NBPG;
+ if (swb->swb_block[ix] != SWB_EMPTY) {
+ if (swb->swb_valid & (1 << ix))
+ return TRUE;
+ }
+
return(FALSE);
}
/*
+ * swap_pager_haspage is the externally accessible version of
+ * _swap_pager_haspage above. this routine takes a vm_pager_t
+ * for an argument instead of sw_pager_t.
+ */
+boolean_t
+swap_pager_haspage(pager, offset)
+ vm_pager_t pager;
+ vm_offset_t offset;
+{
+ return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset);
+}
+
+/*
+ * swap_pager_freepage is a convienience routine that clears the busy
+ * bit and deallocates a page.
+ */
+static void
+swap_pager_freepage(m)
+ vm_page_t m;
+{
+ PAGE_WAKEUP(m);
+ vm_page_free(m);
+}
+
+/*
+ * swap_pager_ridpages is a convienience routine that deallocates all
+ * but the required page. this is usually used in error returns that
+ * need to invalidate the "extra" readahead pages.
+ */
+static void
+swap_pager_ridpages(m, count, reqpage)
+ vm_page_t *m;
+ int count;
+ int reqpage;
+{
+ int i;
+ int s;
+
+ for (i = 0; i < count; i++)
+ if (i != reqpage)
+ swap_pager_freepage(m[i]);
+}
+
+int swapwritecount=0;
+
+/*
+ * swap_pager_iodone1 is the completion routine for both reads and async writes
+ */
+void
+swap_pager_iodone1(bp)
+ struct buf *bp;
+{
+ bp->b_flags |= B_DONE;
+ bp->b_flags &= ~B_ASYNC;
+ wakeup((caddr_t)bp);
+ if ((bp->b_flags & B_READ) == 0)
+ vwakeup(bp);
+}
+/*
* Scaled down version of swap().
- * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
* BOGUS: lower level IO routines expect a KVA so we have to map our
* provided physical page into the KVA to keep them happy.
*/
-swap_pager_io(swp, m, flags)
+int
+swap_pager_io(swp, m, count, reqpage, flags)
register sw_pager_t swp;
- vm_page_t m;
+ vm_page_t *m;
+ int count, reqpage;
int flags;
{
register struct buf *bp;
register sw_blk_t swb;
register int s;
- int ix;
+ int i, ix;
boolean_t rv;
vm_offset_t kva, off;
swp_clean_t spc;
+ int cluster;
+ vm_offset_t paging_offset;
+ vm_object_t object;
+ int reqaddr, mydskregion;
+ extern int dmmin, dmmax;
-#ifdef DEBUG
- /* save panic time state */
- if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
- return;
- if (swpagerdebug & (SDB_FOLLOW|SDB_IO))
- printf("swpg_io(%x, %x, %x)\n", swp, m, flags);
-#endif
+ spc = NULL;
+ object = m[reqpage]->object;
+ paging_offset = object->paging_offset;
/*
- * For reads (pageins) and synchronous writes, we clean up
- * all completed async pageouts.
+ * First determine if the page exists in the pager if this is
+ * a sync read. This quickly handles cases where we are
+ * following shadow chains looking for the top level object
+ * with the page.
*/
- if ((flags & B_ASYNC) == 0) {
- s = splbio();
-#ifdef DEBUG
+ off = m[reqpage]->offset + paging_offset;
+ ix = swap_pager_block_index(swp, off);
+ if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
+ /* printf("swap pager: out of range\n"); */
+ swap_pager_ridpages(m, count, reqpage);
+ return(VM_PAGER_FAIL);
+ }
+
+
+ swb = &swp->sw_blocks[ix];
+ off = swap_pager_block_offset(swp, off) / NBPG;
+ reqaddr = swb->swb_block[off];
+
+ /* make sure that our I/O request is contiguous */
+ if (flags & B_READ) {
+ int first = 0, last = count;
+ int failed = 0;
+ int reqdskregion = reqaddr / dmmax;
+ int valid;
+
+ if (reqaddr == SWB_EMPTY ||
+ (swb->swb_valid & (1 << off)) == 0) {
+ swap_pager_ridpages(m, count, reqpage);
+ return(VM_PAGER_FAIL);
+ }
+
/*
- * Check to see if this page is currently being cleaned.
- * If it is, we just wait til the operation is done before
- * continuing.
+ * search backwards for the first contiguous page to transfer
*/
- while (swap_pager_clean(m, flags&B_READ)) {
- if (swpagerdebug & SDB_ANOM)
- printf("swap_pager_io: page %x cleaning\n", m);
-
- swp->sw_flags |= SW_WANTED;
- assert_wait((int)swp);
- thread_block();
+ for (i = reqpage - 1; i >= 0; --i) {
+ int *tmpaddr = swap_pager_diskaddr(swp,
+ m[i]->offset + paging_offset,&valid);
+ if (tmpaddr == 0 || failed || !valid ||
+ *tmpaddr != reqaddr + btodb((i - reqpage) * NBPG)) {
+ failed = 1;
+ swap_pager_freepage(m[i]);
+ m[i] = 0;
+ if (first == 0)
+ first = i + 1;
+ } else {
+ mydskregion = *tmpaddr / dmmax;
+ if (mydskregion != reqdskregion) {
+ failed = 1;
+ swap_pager_freepage(m[i]);
+ m[i] = 0;
+ first = i + 1;
+ }
+ }
+ }
+ /*
+ * search forwards for the last contiguous page to transfer
+ */
+ failed = 0;
+ for (i = reqpage + 1; i < count; i++) {
+ int *tmpaddr = swap_pager_diskaddr(swp, m[i]->offset + paging_offset,&valid);
+ if (tmpaddr == 0 || failed || !valid ||
+ *tmpaddr != reqaddr + btodb((i - reqpage) * NBPG) ) {
+ failed = 1;
+ swap_pager_freepage(m[i]);
+ m[i] = 0;
+ if (last == count)
+ last = i;
+ } else {
+ mydskregion = *tmpaddr / dmmax;
+ if (mydskregion != reqdskregion) {
+ failed = 1;
+ swap_pager_freepage(m[i]);
+ m[i] = 0;
+ if (last == count)
+ last = i;
+ }
+ }
+ }
+ count = last;
+ if (first != 0) {
+ for (i = first; i < count; i++) {
+ m[i - first] = m[i];
+ }
+ count -= first;
+ reqpage -= first;
}
-#else
- (void) swap_pager_clean(m, flags&B_READ);
-#endif
- splx(s);
+ }
+
+ /*
+ * at this point:
+ * "m" is a pointer to the array of vm_page_t for paging I/O
+ * "count" is the number of vm_page_t entries represented by "m"
+ * "object" is the vm_object_t for I/O
+ * "reqpage" is the index into "m" for the page actually faulted
+ */
+
+ /*
+ * For reads (pageins) and synchronous writes, we clean up
+ * all completed async pageouts.
+ */
+ if ((flags & B_ASYNC) == 0) {
+ swap_pager_clean(NULL, flags);
}
/*
* For async writes (pageouts), we cleanup completed pageouts so
@@ -494,103 +916,162 @@ swap_pager_io(swp, m, flags)
* page is already being cleaned. If it is, or no resources
* are available, we try again later.
*/
- else if (swap_pager_clean(m, B_WRITE) ||
- queue_empty(&swap_pager_free)) {
-#ifdef DEBUG
- if ((swpagerdebug & SDB_ANOM) &&
- !queue_empty(&swap_pager_free))
- printf("swap_pager_io: page %x already cleaning\n", m);
-#endif
- return(VM_PAGER_FAIL);
+ else if (swap_pager_clean(m[reqpage], B_WRITE)) {
+ swap_pager_ridpages(m, count, reqpage);
+ return VM_PAGER_TRYAGAIN;
}
+ spc = NULL; /* we might not use an spc data structure */
+ kva = 0;
+
/*
- * Determine swap block and allocate as necessary.
+ * we allocate a new kva for transfers > 1 page
+ * but for transfers == 1 page, the swap_pager_free list contains
+ * entries that have pre-allocated kva's (for efficiency).
*/
- off = m->offset + m->object->paging_offset;
- ix = off / dbtob(swp->sw_bsize);
- if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
-#ifdef DEBUG
- if (swpagerdebug & SDB_FAIL)
- printf("swpg_io: bad offset %x+%x(%d) in %x\n",
- m->offset, m->object->paging_offset,
- ix, swp->sw_blocks);
-#endif
- return(VM_PAGER_FAIL);
+ if ((flags & B_READ) && count > 1) {
+ kva = kmem_alloc_pageable(pager_map, count*NBPG);
}
- s = splbio();
- swb = &swp->sw_blocks[ix];
- off = off % dbtob(swp->sw_bsize);
- if (flags & B_READ) {
- if (swb->swb_block == 0 ||
- (swb->swb_mask & (1 << atop(off))) == 0) {
-#ifdef DEBUG
- if (swpagerdebug & (SDB_ALLOCBLK|SDB_FAIL))
- printf("swpg_io: %x bad read: blk %x+%x, mask %x, off %x+%x\n",
- swp->sw_blocks,
- swb->swb_block, atop(off),
- swb->swb_mask,
- m->offset, m->object->paging_offset);
-#endif
- /* XXX: should we zero page here?? */
- splx(s);
- return(VM_PAGER_FAIL);
+
+
+ if (!kva) {
+ /*
+ * if a kva has not been allocated, we can only do a one page transfer,
+ * so we free the other pages that might have been allocated by vm_fault.
+ */
+ for (i = 0; i < count; i++) {
+ if (i != reqpage) {
+ swap_pager_freepage(m[i]);
+ m[i] = 0;
+ }
}
- } else if (swb->swb_block == 0) {
-#ifdef old
- swb->swb_block = rmalloc(swapmap, swp->sw_bsize);
- if (swb->swb_block == 0) {
-#else
- if (!rlist_alloc(&swapmap, (unsigned)swp->sw_bsize,
- (unsigned *)&swb->swb_block)) {
-#endif
-#ifdef DEBUG
- if (swpagerdebug & SDB_FAIL)
- printf("swpg_io: rmalloc of %x failed\n",
- swp->sw_bsize);
-#endif
- splx(s);
- return(VM_PAGER_FAIL);
+ count = 1;
+ m[0] = m[reqpage];
+ reqpage = 0;
+ /*
+ * get a swap pager clean data structure, block until we get it
+ */
+ if (queue_empty(&swap_pager_free)) {
+/*
+ if ((flags & (B_ASYNC|B_READ)) == B_ASYNC)
+ return VM_PAGER_TRYAGAIN;
+*/
+ s = splbio();
+ if( curproc == pageproc)
+ (void) swap_pager_clean(NULL, B_WRITE);
+ else
+ wakeup((caddr_t) &vm_pages_needed);
+ while (queue_empty(&swap_pager_free)) {
+ swap_pager_needflags |= SWAP_FREE_NEEDED;
+ tsleep((caddr_t)&swap_pager_free,
+ PVM, "swpfre", 0);
+ if (curproc == pageproc)
+ (void) swap_pager_clean(NULL, B_WRITE);
+ else
+ wakeup((caddr_t) &vm_pages_needed);
+ }
+ splx(s);
}
-#ifdef DEBUG
- if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
- printf("swpg_io: %x alloc blk %x at ix %x\n",
- swp->sw_blocks, swb->swb_block, ix);
-#endif
+ queue_remove_first(&swap_pager_free, spc, swp_clean_t, spc_list);
+ kva = spc->spc_kva;
+ }
+
+
+ /*
+ * Determine swap block and allocate as necessary.
+ * We try to get SWB_NPAGES first, but then we punt and try
+ * to get one page. If that fails, we look at the allocation
+ * data structures to find unused but allocated pages in other
+ * pagers allocations.
+ */
+ if (reqaddr == SWB_EMPTY) {
+ int blk;
+ int tries;
+ int ntoget;
+
+ tries = 0;
+ s = splbio();
+ /*
+ * if any other pages have been allocated in this block, we
+ * only try to get one page.
+ */
+ for (i = 0; i < SWB_NPAGES; i++) {
+ if (swb->swb_block[i] != SWB_EMPTY)
+ break;
+ }
+
+ ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
+retrygetspace:
+ if (ntoget == SWB_NPAGES &&
+ rlist_alloc(&swapmap, btodb(ntoget * NBPG),&blk)) {
+ for (i = 0; i < ntoget; i++)
+ swb->swb_block[i] = blk + btodb(NBPG) * i;
+ } else if (!rlist_alloc(&swapmap, btodb(NBPG), &swb->swb_block[off])) {
+ /*
+ * if the allocation has failed, we try to reclaim space and
+ * retry.
+ */
+ if (++tries == 1) {
+ swap_pager_reclaim();
+ goto retrygetspace;
+ }
+ /*
+ * here on swap space full.
+ */
+ if (spc)
+ queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
+ if (swap_pager_full == 0)
+ printf("swap_pager: out of swap space !!!\n");
+ swap_pager_full = 1;
+ swap_pager_ridpages(m, count, reqpage);
+ splx(s);
+ return(VM_PAGER_TRYAGAIN);
+ }
+ splx(s);
+ swap_pager_full = 0;
}
- splx(s);
/*
- * Allocate a kernel virtual address and initialize so that PTE
- * is available for lower level IO drivers.
+ * map our page(s) into kva for I/O
*/
- kva = vm_pager_map_page(m);
+ for (i = 0; i < count; i++) {
+ pmap_enter(vm_map_pmap(pager_map), kva + NBPG * i,
+ VM_PAGE_TO_PHYS(m[i]), VM_PROT_ALL, TRUE);
+ }
+
/*
- * Get a swap buffer header and perform the IO
+ * get the base I/O offset into the swap file
*/
- s = splbio();
- while (bswlist.av_forw == NULL) {
+ off = swap_pager_block_offset(swp, m[0]->offset + paging_offset) / NBPG;
+
#ifdef DEBUG
- if (swpagerdebug & SDB_ANOM)
- printf("swap_pager_io: wait on swbuf for %x (%d)\n",
- m, flags);
+ if (flags & B_READ && count > 1)
+ printf("obj: 0x%x off: 0x%x poff: 0x%x off: 0x%x, sz: %d blk: %d op: %s\n",
+ object, m[0]->offset, paging_offset, off, count, swb->swb_block[off], flags&B_READ?"r":"w");
#endif
- bswlist.b_flags |= B_WANTED;
- sleep((caddr_t)&bswlist, PSWP+1);
+
+ s = splbio();
+ /*
+ * Get a swap buffer header and perform the IO
+ */
+ if (spc) {
+ bp = spc->spc_bp;
+ bzero(bp, sizeof *bp);
+ bp->b_spc = spc;
+ } else {
+ bp = getpbuf();
}
- bp = bswlist.av_forw;
- bswlist.av_forw = bp->av_forw;
- splx(s);
bp->b_flags = B_BUSY | (flags & B_READ);
bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
- bp->b_un.b_addr = (caddr_t)kva;
- bp->b_blkno = swb->swb_block + btodb(off);
+ bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
+ bp->b_un.b_addr = (caddr_t) kva;
+ bp->b_blkno = swb->swb_block[off];
VHOLD(swapdev_vp);
bp->b_vp = swapdev_vp;
if (swapdev_vp->v_type == VBLK)
bp->b_dev = swapdev_vp->v_rdev;
- bp->b_bcount = PAGE_SIZE;
+ bp->b_bcount = NBPG*count;
if ((bp->b_flags & B_READ) == 0)
swapdev_vp->v_numoutput++;
@@ -599,92 +1080,139 @@ swap_pager_io(swp, m, flags)
* and place a "cleaning" entry on the inuse queue.
*/
if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
-#ifdef DEBUG
- if (queue_empty(&swap_pager_free))
- panic("swpg_io: lost spc");
-#endif
- queue_remove_first(&swap_pager_free,
- spc, swp_clean_t, spc_list);
-#ifdef DEBUG
- if (spc->spc_flags != SPC_FREE)
- panic("swpg_io: bad free spc");
-#endif
- spc->spc_flags = SPC_BUSY;
- spc->spc_bp = bp;
+ spc->spc_flags = 0;
spc->spc_swp = swp;
- spc->spc_kva = kva;
- spc->spc_m = m;
+ spc->spc_m = m[reqpage];
+ /*
+ * the completion routine for async writes
+ */
bp->b_flags |= B_CALL;
bp->b_iodone = swap_pager_iodone;
- s = splbio();
+ bp->b_dirtyoff = 0;
+ bp->b_dirtyend = bp->b_bcount;
swp->sw_poip++;
queue_enter(&swap_pager_inuse, spc, swp_clean_t, spc_list);
-
-#ifdef DEBUG
- swap_pager_poip++;
- if (swpagerdebug & SDB_WRITE)
- printf("swpg_io: write: bp=%x swp=%x spc=%x poip=%d\n",
- bp, swp, spc, swp->sw_poip);
- if ((swpagerdebug & SDB_ALLOCBLK) &&
- (swb->swb_mask & (1 << atop(off))) == 0)
- printf("swpg_io: %x write blk %x+%x\n",
- swp->sw_blocks, swb->swb_block, atop(off));
-#endif
- swb->swb_mask |= (1 << atop(off));
- splx(s);
+ /*
+ * we remember that we have used a block for paging.
+ */
+ swb->swb_valid |= (1 << off);
+ } else {
+ /*
+ * here for sync write or any read
+ */
+ if ((flags & B_READ) == 0) {
+ /*
+ * if we are writing, we remember that we have
+ * actually used a block for paging.
+ */
+ swb->swb_valid |= (1 << off);
+ swp->sw_poip++;
+ } else {
+ swp->sw_piip++;
+ }
+ /*
+ * the completion routine for reads and sync writes
+ */
+ bp->b_flags |= B_CALL;
+ bp->b_iodone = swap_pager_iodone1;
}
-#ifdef DEBUG
- if (swpagerdebug & SDB_IO)
- printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n",
- bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m));
-#endif
+ /*
+ * perform the I/O
+ */
VOP_STRATEGY(bp);
- if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
-#ifdef DEBUG
- if (swpagerdebug & SDB_IO)
- printf("swpg_io: IO started: bp %x\n", bp);
-#endif
+ if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) {
+ if ((bp->b_flags & B_DONE) == B_DONE) {
+ swap_pager_clean(NULL, flags);
+ }
+ splx(s);
return(VM_PAGER_PEND);
}
- s = splbio();
-#ifdef DEBUG
- if (flags & B_READ)
- swap_pager_piip++;
- else
- swap_pager_poip++;
-#endif
+
+ /*
+ * wait for the sync I/O to complete
+ */
while ((bp->b_flags & B_DONE) == 0) {
- assert_wait((int)bp);
- thread_block();
+ tsleep((caddr_t)bp, PVM, (flags & B_READ)?"swread":"swwrt", 0);
}
-#ifdef DEBUG
- if (flags & B_READ)
- --swap_pager_piip;
- else
- --swap_pager_poip;
-#endif
rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK;
- bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY);
- bp->av_forw = bswlist.av_forw;
- bswlist.av_forw = bp;
+ bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE);
+
+ if (bp->b_flags & B_READ) {
+ --swp->sw_piip;
+ if (swp->sw_piip == 0)
+ wakeup((caddr_t) swp);
+ } else {
+ --swp->sw_poip;
+ if (swp->sw_poip == 0)
+ wakeup((caddr_t) swp);
+ }
+
if (bp->b_vp)
brelvp(bp);
- if (bswlist.b_flags & B_WANTED) {
- bswlist.b_flags &= ~B_WANTED;
- thread_wakeup((int)&bswlist);
- }
+
+ /*
+ * release the physical I/O buffer
+ */
+ if (!spc)
+ relpbuf(bp);
+
+ splx(s);
+
+ /*
+ * remove the mapping for kernel virtual
+ */
+ pmap_remove(vm_map_pmap(pager_map), kva, kva + count * NBPG);
+
+ /*
+ * if we have written the page, then indicate that the page
+ * is clean.
+ */
if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) {
- m->clean = TRUE;
- pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+ m[reqpage]->flags |= PG_CLEAN;
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage]));
+ /*
+ * optimization, if a page has been read during the
+ * pageout process, we activate it.
+ */
+ if ( (m[reqpage]->flags & PG_ACTIVE) == 0 &&
+ pmap_is_referenced(VM_PAGE_TO_PHYS(m[reqpage])))
+ vm_page_activate(m[reqpage]);
+ }
+
+ if (spc) {
+ /*
+ * if we have used an spc, we need to free it.
+ */
+ queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
+ } else {
+ for (i = 0; i < count; i++) {
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
+ m[i]->flags |= PG_CLEAN;
+ m[i]->flags &= ~PG_LAUNDRY;
+ if (i != reqpage) {
+ /*
+ * whether or not to leave the page activated
+ * is up in the air, but we should put the page
+ * on a page queue somewhere. (it already is in
+ * the object).
+ * After some emperical results, it is best
+ * to deactivate the readahead pages.
+ */
+ vm_page_deactivate(m[i]);
+
+ /*
+ * just in case someone was asking for this
+ * page we now tell them that it is ok to use
+ */
+ m[i]->flags &= ~PG_FAKE;
+ PAGE_WAKEUP(m[i]);
+ }
+ }
+/*
+ * and free the kernel virtual addresses
+ */
+ kmem_free_wakeup(pager_map, kva, count * NBPG);
}
- splx(s);
-#ifdef DEBUG
- if (swpagerdebug & SDB_IO)
- printf("swpg_io: IO done: bp %x, rv %d\n", bp, rv);
- if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_FAIL)
- printf("swpg_io: IO error\n");
-#endif
- vm_pager_unmap_page(kva);
return(rv);
}
@@ -696,127 +1224,58 @@ swap_pager_clean(m, rw)
register swp_clean_t spc, tspc;
register int s;
-#ifdef DEBUG
- /* save panic time state */
- if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
- return;
- if (swpagerdebug & SDB_FOLLOW)
- printf("swpg_clean(%x, %d)\n", m, rw);
-#endif
tspc = NULL;
+ if (queue_empty(&swap_pager_done))
+ return FALSE;
for (;;) {
+ s = splbio();
/*
- * Look up and removal from inuse list must be done
+ * Look up and removal from done list must be done
* at splbio() to avoid conflicts with swap_pager_iodone.
*/
- s = splbio();
- spc = (swp_clean_t) queue_first(&swap_pager_inuse);
- while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
- if ((spc->spc_flags & SPC_DONE) &&
- swap_pager_finish(spc)) {
- queue_remove(&swap_pager_inuse, spc,
- swp_clean_t, spc_list);
- break;
- }
- if (m && m == spc->spc_m) {
-#ifdef DEBUG
- if (swpagerdebug & SDB_ANOM)
- printf("swap_pager_clean: page %x on list, flags %x\n",
- m, spc->spc_flags);
-#endif
- tspc = spc;
- }
- spc = (swp_clean_t) queue_next(&spc->spc_list);
+ spc = (swp_clean_t) queue_first(&swap_pager_done);
+ while (!queue_end(&swap_pager_done, (queue_entry_t)spc)) {
+ pmap_remove(vm_map_pmap(pager_map), spc->spc_kva, ((vm_offset_t) spc->spc_kva) + NBPG);
+ swap_pager_finish(spc);
+ queue_remove(&swap_pager_done, spc, swp_clean_t, spc_list);
+ goto doclean;
}
/*
* No operations done, thats all we can do for now.
*/
- if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
- break;
+
splx(s);
+ break;
/*
* The desired page was found to be busy earlier in
* the scan but has since completed.
*/
+doclean:
if (tspc && tspc == spc) {
-#ifdef DEBUG
- if (swpagerdebug & SDB_ANOM)
- printf("swap_pager_clean: page %x done while looking\n",
- m);
-#endif
tspc = NULL;
}
- spc->spc_flags = SPC_FREE;
- vm_pager_unmap_page(spc->spc_kva);
+ spc->spc_flags = 0;
queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
-#ifdef DEBUG
- if (swpagerdebug & SDB_WRITE)
- printf("swpg_clean: free spc %x\n", spc);
-#endif
- }
-#ifdef DEBUG
- /*
- * If we found that the desired page is already being cleaned
- * mark it so that swap_pager_iodone() will not set the clean
- * flag before the pageout daemon has another chance to clean it.
- */
- if (tspc && rw == B_WRITE) {
- if (swpagerdebug & SDB_ANOM)
- printf("swap_pager_clean: page %x on clean list\n",
- tspc);
- tspc->spc_flags |= SPC_DIRTY;
+ ++cleandone;
+ splx(s);
}
-#endif
- splx(s);
-#ifdef DEBUG
- if (swpagerdebug & SDB_WRITE)
- printf("swpg_clean: return %d\n", tspc ? TRUE : FALSE);
- if ((swpagerdebug & SDB_ANOM) && tspc)
- printf("swpg_clean: %s of cleaning page %x\n",
- rw == B_READ ? "get" : "put", m);
-#endif
return(tspc ? TRUE : FALSE);
}
+void
swap_pager_finish(spc)
register swp_clean_t spc;
{
- vm_object_t object = spc->spc_m->object;
-
- /*
- * Mark the paging operation as done.
- * (XXX) If we cannot get the lock, leave it til later.
- * (XXX) Also we are assuming that an async write is a
- * pageout operation that has incremented the counter.
- */
- if (!vm_object_lock_try(object))
- return(0);
+ vm_page_t m = spc->spc_m;
+ vm_object_t object = m->object;
+ extern int vm_pageout_free_min;
- if (--object->paging_in_progress == 0)
+ if (--object->paging_in_progress == 0)
thread_wakeup((int) object);
-#ifdef DEBUG
- /*
- * XXX: this isn't even close to the right thing to do,
- * introduces a variety of race conditions.
- *
- * If dirty, vm_pageout() has attempted to clean the page
- * again. In this case we do not do anything as we will
- * see the page again shortly.
- */
- if (spc->spc_flags & SPC_DIRTY) {
- if (swpagerdebug & SDB_ANOM)
- printf("swap_pager_finish: page %x dirty again\n",
- spc->spc_m);
- spc->spc_m->busy = FALSE;
- PAGE_WAKEUP(spc->spc_m);
- vm_object_unlock(object);
- return(1);
- }
-#endif
/*
* If no error mark as clean and inform the pmap system.
* If error, mark as dirty so we will try again.
@@ -824,19 +1283,43 @@ swap_pager_finish(spc)
*/
if (spc->spc_flags & SPC_ERROR) {
printf("swap_pager_finish: clean of page %x failed\n",
- VM_PAGE_TO_PHYS(spc->spc_m));
- spc->spc_m->laundry = TRUE;
+ VM_PAGE_TO_PHYS(m));
+ m->flags |= PG_LAUNDRY;
} else {
- spc->spc_m->clean = TRUE;
- pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m));
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+ m->flags |= PG_CLEAN;
}
- spc->spc_m->busy = FALSE;
- PAGE_WAKEUP(spc->spc_m);
- vm_object_unlock(object);
- return(1);
+ /*
+ * if a page has been read during pageout, then
+ * we activate the page.
+ */
+ if ((m->flags & PG_ACTIVE) == 0 &&
+ pmap_is_referenced(VM_PAGE_TO_PHYS(m)))
+ vm_page_activate(m);
+
+ /*
+ * we wakeup any processes that are waiting on
+ * this page.
+ */
+ PAGE_WAKEUP(m);
+ /*
+ * if we need memory desperately, then free it now
+ */
+ if (vm_page_free_count < vm_page_free_reserved &&
+ (m->flags & PG_CLEAN) && m->wire_count == 0) {
+ pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
+ vm_page_free(m);
+ }
+ --nswiodone;
+
+ return;
}
+/*
+ * swap_pager_iodone
+ */
+void
swap_pager_iodone(bp)
register struct buf *bp;
{
@@ -844,58 +1327,97 @@ swap_pager_iodone(bp)
daddr_t blk;
int s;
-#ifdef DEBUG
- /* save panic time state */
- if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
- return;
- if (swpagerdebug & SDB_FOLLOW)
- printf("swpg_iodone(%x)\n", bp);
-#endif
s = splbio();
- spc = (swp_clean_t) queue_first(&swap_pager_inuse);
- while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
- if (spc->spc_bp == bp)
- break;
- spc = (swp_clean_t) queue_next(&spc->spc_list);
- }
-#ifdef DEBUG
- if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
- panic("swap_pager_iodone: bp not found");
-#endif
-
- spc->spc_flags &= ~SPC_BUSY;
- spc->spc_flags |= SPC_DONE;
+ spc = (swp_clean_t) bp->b_spc;
+ queue_remove(&swap_pager_inuse, spc, swp_clean_t, spc_list);
+ queue_enter(&swap_pager_done, spc, swp_clean_t, spc_list);
if (bp->b_flags & B_ERROR) {
spc->spc_flags |= SPC_ERROR;
-printf("error %d blkno %d sz %d ", bp->b_error, bp->b_blkno, bp->b_bcount);
+ printf("error %d blkno %d sz %d ",
+ bp->b_error, bp->b_blkno, bp->b_bcount);
}
- spc->spc_bp = NULL;
- blk = bp->b_blkno;
-#ifdef DEBUG
- --swap_pager_poip;
- if (swpagerdebug & SDB_WRITE)
- printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n",
- bp, spc->spc_swp, spc->spc_swp->sw_flags,
- spc, spc->spc_swp->sw_poip);
-#endif
+ if ((bp->b_flags & B_READ) == 0)
+ vwakeup(bp);
+
+ bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC);
+ if (bp->b_vp) {
+ brelvp(bp);
+ }
- spc->spc_swp->sw_poip--;
- if (spc->spc_swp->sw_flags & SW_WANTED) {
- spc->spc_swp->sw_flags &= ~SW_WANTED;
- thread_wakeup((int)spc->spc_swp);
+ nswiodone++;
+ if (--spc->spc_swp->sw_poip == 0) {
+ wakeup((caddr_t)spc->spc_swp);
}
-
- bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY);
+
+ if ((swap_pager_needflags & SWAP_FREE_NEEDED) ||
+ queue_empty(&swap_pager_inuse)) {
+ swap_pager_needflags &= ~SWAP_FREE_NEEDED;
+ wakeup((caddr_t)&swap_pager_free);
+ wakeup((caddr_t)&vm_pages_needed);
+ }
+
+ if (vm_pageout_pages_needed) {
+ wakeup((caddr_t)&vm_pageout_pages_needed);
+ }
+
+ if (queue_empty(&swap_pager_inuse) ||
+ (vm_page_free_count < vm_page_free_min &&
+ nswiodone + vm_page_free_count >= vm_page_free_min) ) {
+ wakeup((caddr_t)&vm_pages_needed);
+ }
+ splx(s);
+}
+
+/*
+ * allocate a physical buffer
+ */
+struct buf *
+getpbuf() {
+ int s;
+ struct buf *bp;
+
+ s = splbio();
+ /* get a bp from the swap buffer header pool */
+ while (bswlist.av_forw == NULL) {
+ bswlist.b_flags |= B_WANTED;
+ tsleep((caddr_t)&bswlist, PVM, "wswbuf", 0);
+ }
+ bp = bswlist.av_forw;
+ bswlist.av_forw = bp->av_forw;
+
+ splx(s);
+
+ bzero(bp, sizeof *bp);
+ return bp;
+}
+
+/*
+ * release a physical buffer
+ */
+void
+relpbuf(bp)
+ struct buf *bp;
+{
+ int s;
+
+ s = splbio();
bp->av_forw = bswlist.av_forw;
bswlist.av_forw = bp;
- if (bp->b_vp)
- brelvp(bp);
if (bswlist.b_flags & B_WANTED) {
bswlist.b_flags &= ~B_WANTED;
- thread_wakeup((int)&bswlist);
+ wakeup((caddr_t)&bswlist);
}
- thread_wakeup((int) &vm_pages_needed);
splx(s);
}
-#endif
+
+/*
+ * return true if any swap control structures can be allocated
+ */
+int
+swap_pager_ready() {
+ if( queue_empty( &swap_pager_free))
+ return 0;
+ else
+ return 1;
+}