diff options
Diffstat (limited to 'sys/vm')
| -rw-r--r-- | sys/vm/device_pager.c | 1 | ||||
| -rw-r--r-- | sys/vm/queue.h | 26 | ||||
| -rw-r--r-- | sys/vm/swap_pager.c | 1010 | ||||
| -rw-r--r-- | sys/vm/swap_pager.h | 9 | ||||
| -rw-r--r-- | sys/vm/vm_fault.c | 28 | ||||
| -rw-r--r-- | sys/vm/vm_glue.c | 52 | ||||
| -rw-r--r-- | sys/vm/vm_mmap.c | 101 | ||||
| -rw-r--r-- | sys/vm/vm_object.c | 24 | ||||
| -rw-r--r-- | sys/vm/vm_object.h | 5 | ||||
| -rw-r--r-- | sys/vm/vm_page.c | 91 | ||||
| -rw-r--r-- | sys/vm/vm_page.h | 40 | ||||
| -rw-r--r-- | sys/vm/vm_pageout.c | 763 | ||||
| -rw-r--r-- | sys/vm/vm_pageout.h | 3 | ||||
| -rw-r--r-- | sys/vm/vm_pager.c | 22 | ||||
| -rw-r--r-- | sys/vm/vm_pager.h | 4 | ||||
| -rw-r--r-- | sys/vm/vm_swap.c | 4 | ||||
| -rw-r--r-- | sys/vm/vm_unix.c | 9 | ||||
| -rw-r--r-- | sys/vm/vnode_pager.c | 1398 |
18 files changed, 2251 insertions, 1339 deletions
diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index 01ce7305d5a1..12c113615ce4 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -86,6 +86,7 @@ struct pagerops devicepagerops = { dev_pager_getpage, 0, dev_pager_putpage, + 0, dev_pager_haspage }; diff --git a/sys/vm/queue.h b/sys/vm/queue.h index 8eaa42a0328e..7010951bbfc3 100644 --- a/sys/vm/queue.h +++ b/sys/vm/queue.h @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)queue.h 7.3 (Berkeley) 4/21/91 - * $Id: queue.h,v 1.2 1993/10/16 16:20:18 rgrimes Exp $ + * $Id: queue.h,v 1.3 1994/04/14 07:50:17 davidg Exp $ */ /* @@ -111,6 +111,30 @@ typedef struct queue_entry *queue_entry_t; } \ } +#define queue_enter_head(head, elt, type, field) { \ + if (queue_empty((head))) { \ + (head)->next = (queue_entry_t) elt; \ + (head)->prev = (queue_entry_t) elt; \ + (elt)->field.next = head; \ + (elt)->field.prev = head; \ + } else { \ + register queue_entry_t next = (head)->next; \ + (elt)->field.prev = head; \ + (elt)->field.next = next; \ + (head)->next = (queue_entry_t)(elt); \ + ((type)next)->field.prev = (queue_entry_t)(elt);\ + } \ +} + +/* insert 'item' after 'position' using field 'field' */ +/* XXX might be broken - BEWARE */ +#define queue_insert(position, item, type, field) { \ + ((type) position->field.next)->field.prev = (queue_entry_t)(item); \ + (item)->field.next = (position)->field.next; \ + (position)->field.next = (queue_entry_t)(item); \ + (item)->field.prev = (queue_entry_t) position; \ +} + #define queue_field(head, thing, type, field) \ (((head) == (thing)) ? (head) : &((type)(thing))->field) diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index bf3f38f9ce8d..e7c350b01502 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 1994 John S. Dyson * Copyright (c) 1990 University of Utah. * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. @@ -38,7 +39,7 @@ * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ * from: @(#)swap_pager.c 7.4 (Berkeley) 5/7/91 * - * $Id: swap_pager.c,v 1.17.2.1 1994/03/07 02:07:06 rgrimes Exp $ + * $Id: swap_pager.c,v 1.27 1994/05/25 11:06:48 davidg Exp $ */ /* @@ -68,7 +69,7 @@ #include "vm_map.h" #ifndef NPENDINGIO -#define NPENDINGIO 96 +#define NPENDINGIO 16 #endif extern int nswbuf; @@ -79,6 +80,9 @@ extern int hz; int swap_pager_full; extern vm_map_t pager_map; extern int vm_pageout_pages_needed; +extern int vm_swap_size; + +#define MAX_PAGEOUT_CLUSTER 8 struct swpagerclean { queue_head_t spc_list; @@ -86,7 +90,9 @@ struct swpagerclean { struct buf *spc_bp; sw_pager_t spc_swp; vm_offset_t spc_kva; - vm_page_t spc_m; + vm_offset_t spc_altkva; + int spc_count; + vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; } swcleanlist [NPENDINGIO] ; typedef struct swpagerclean *swp_clean_t; @@ -105,11 +111,14 @@ queue_head_t swap_pager_list; /* list of "named" anon regions */ queue_head_t swap_pager_un_list; /* list of "unnamed" anon pagers */ #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ int swap_pager_needflags; +struct rlist *swapfrag; static queue_head_t *swp_qs[]={ &swap_pager_list, &swap_pager_un_list, (queue_head_t *) 0 }; +int swap_pager_putmulti(); + struct pagerops swappagerops = { swap_pager_init, swap_pager_alloc, @@ -117,6 +126,7 @@ struct pagerops swappagerops = { swap_pager_getpage, swap_pager_getmulti, swap_pager_putpage, + swap_pager_putmulti, swap_pager_haspage }; @@ -132,10 +142,18 @@ extern int vm_page_count; struct buf * getpbuf() ; void relpbuf(struct buf *bp) ; +static inline void swapsizecheck() { + if( vm_swap_size < 128*btodb(NBPG)) { + if( swap_pager_full) + printf("swap_pager: out of space\n"); + swap_pager_full = 1; + } else if( vm_swap_size > 192*btodb(NBPG)) + swap_pager_full = 0; +} + void swap_pager_init() { - register int i; extern int dmmin, dmmax; dfltpagerops = &swappagerops; @@ -189,7 +207,7 @@ swap_pager_alloc(handle, size, prot, offset) if (!spc->spc_kva) { break; } - spc->spc_bp = malloc(sizeof( *bp), M_TEMP, + spc->spc_bp = malloc( sizeof( *bp), M_TEMP, M_NOWAIT); if (!spc->spc_bp) { kmem_free_wakeup(pager_map, spc->spc_kva, NBPG); @@ -199,6 +217,8 @@ swap_pager_alloc(handle, size, prot, offset) queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); } require_swap_init = 0; + if( size == 0) + return(NULL); } /* @@ -219,8 +239,9 @@ swap_pager_alloc(handle, size, prot, offset) } } - if (swap_pager_full) + if (swap_pager_full) { return(NULL); + } /* * Pager doesn't exist, allocate swap management resources @@ -246,10 +267,10 @@ swap_pager_alloc(handle, size, prot, offset) free((caddr_t)pager, M_VMPAGER); return(NULL); } - bzero((caddr_t)swp->sw_blocks, - swp->sw_nblocks * sizeof(*swp->sw_blocks)); for (i = 0; i < swp->sw_nblocks; i++) { + swp->sw_blocks[i].swb_valid = 0; + swp->sw_blocks[i].swb_locked = 0; for (j = 0; j < SWB_NPAGES; j++) swp->sw_blocks[i].swb_block[j] = SWB_EMPTY; } @@ -336,30 +357,87 @@ swap_pager_setvalid(swp, offset, valid) } /* + * this routine allocates swap space with a fragmentation + * minimization policy. + */ +int +swap_pager_getswapspace( unsigned amount, unsigned *rtval) { + unsigned tmpalloc; + unsigned nblocksfrag = btodb(SWB_NPAGES*NBPG); + if( amount < nblocksfrag) { + if( rlist_alloc(&swapfrag, amount, rtval)) + return 1; + if( !rlist_alloc(&swapmap, nblocksfrag, &tmpalloc)) + return 0; + rlist_free( &swapfrag, tmpalloc+amount, tmpalloc + nblocksfrag - 1); + *rtval = tmpalloc; + return 1; + } + if( !rlist_alloc(&swapmap, amount, rtval)) + return 0; + else + return 1; +} + +/* + * this routine frees swap space with a fragmentation + * minimization policy. + */ +void +swap_pager_freeswapspace( unsigned from, unsigned to) { + unsigned nblocksfrag = btodb(SWB_NPAGES*NBPG); + unsigned tmpalloc; + if( ((to + 1) - from) >= nblocksfrag) { + while( (from + nblocksfrag) <= to + 1) { + rlist_free(&swapmap, from, from + nblocksfrag - 1); + from += nblocksfrag; + } + } + if( from >= to) + return; + rlist_free(&swapfrag, from, to); + while( rlist_alloc(&swapfrag, nblocksfrag, &tmpalloc)) { + rlist_free(&swapmap, tmpalloc, tmpalloc + nblocksfrag-1); + } +} +/* * this routine frees swap blocks from a specified pager */ void -swap_pager_freespace(pager, start, size) - vm_pager_t pager; +_swap_pager_freespace(swp, start, size) + sw_pager_t swp; vm_offset_t start; vm_offset_t size; { - sw_pager_t swp = (sw_pager_t) pager->pg_data; vm_offset_t i; int s; s = splbio(); for (i = start; i < round_page(start + size - 1); i += NBPG) { - int *addr = swap_pager_diskaddr(swp, i, 0); + int valid; + int *addr = swap_pager_diskaddr(swp, i, &valid); if (addr && *addr != SWB_EMPTY) { - rlist_free(&swapmap, *addr, *addr + btodb(NBPG) - 1); + swap_pager_freeswapspace(*addr, *addr+btodb(NBPG) - 1); + if( valid) { + vm_swap_size += btodb(NBPG); + swap_pager_setvalid(swp, i, 0); + } *addr = SWB_EMPTY; - swap_pager_full = 0; } } + swapsizecheck(); splx(s); } +void +swap_pager_freespace(pager, start, size) + vm_pager_t pager; + vm_offset_t start; + vm_offset_t size; +{ + _swap_pager_freespace((sw_pager_t) pager->pg_data, start, size); +} + /* * swap_pager_reclaim frees up over-allocated space from all pagers * this eliminates internal fragmentation due to allocation of space @@ -410,6 +488,8 @@ swap_pager_reclaim() swp = (sw_pager_t) p->pg_data; for (i = 0; i < swp->sw_nblocks; i++) { sw_blk_t swb = &swp->sw_blocks[i]; + if( swb->swb_locked) + continue; for (j = 0; j < SWB_NPAGES; j++) { if (swb->swb_block[j] != SWB_EMPTY && (swb->swb_valid & (1 << j)) == 0) { @@ -430,9 +510,9 @@ rfinished: * free the blocks that have been added to the reclaim list */ for (i = 0; i < reclaimcount; i++) { - rlist_free(&swapmap, reclaims[i], reclaims[i] + btodb(NBPG) - 1); + swap_pager_freeswapspace(reclaims[i], reclaims[i]+btodb(NBPG) - 1); + swapsizecheck(); wakeup((caddr_t) &in_reclaim); - swap_pager_full = 0; } splx(s); @@ -480,7 +560,7 @@ swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) /* * clean all of the pages that are currently active and finished */ - (void) swap_pager_clean(NULL, B_WRITE); + (void) swap_pager_clean(); s = splbio(); /* @@ -488,11 +568,14 @@ swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) * (release allocated space) */ for (i = 0; i < offset + srcoffset; i += NBPG) { - int *addr = swap_pager_diskaddr(srcswp, i, 0); + int valid; + int *addr = swap_pager_diskaddr(srcswp, i, &valid); if (addr && *addr != SWB_EMPTY) { - rlist_free(&swapmap, *addr, *addr + btodb(NBPG) - 1); + swap_pager_freeswapspace(*addr, *addr+btodb(NBPG) - 1); + if( valid) + vm_swap_size += btodb(NBPG); + swapsizecheck(); *addr = SWB_EMPTY; - swap_pager_full = 0; } } /* @@ -518,23 +601,24 @@ swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) * source block without copying. */ if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { - rlist_free(&swapmap, *dstaddrp, *dstaddrp + btodb(NBPG) - 1); + swap_pager_freeswapspace(*dstaddrp, *dstaddrp+btodb(NBPG) - 1); *dstaddrp = SWB_EMPTY; - swap_pager_full = 0; } if (dstaddrp && *dstaddrp == SWB_EMPTY) { *dstaddrp = *srcaddrp; *srcaddrp = SWB_EMPTY; swap_pager_setvalid(dstswp, i + dstoffset, 1); - } - } + vm_swap_size -= btodb(NBPG); + } + } /* * if the source is not empty at this point, then deallocate the space. */ if (*srcaddrp != SWB_EMPTY) { - rlist_free(&swapmap, *srcaddrp, *srcaddrp + btodb(NBPG) - 1); + swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(NBPG) - 1); + if( srcvalid) + vm_swap_size += btodb(NBPG); *srcaddrp = SWB_EMPTY; - swap_pager_full = 0; } } } @@ -543,14 +627,17 @@ swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) * deallocate the rest of the source object */ for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += NBPG) { - int *srcaddrp = swap_pager_diskaddr(srcswp, i, 0); + int valid; + int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid); if (srcaddrp && *srcaddrp != SWB_EMPTY) { - rlist_free(&swapmap, *srcaddrp, *srcaddrp + btodb(NBPG) - 1); + swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(NBPG) - 1); + if( valid) + vm_swap_size += btodb(NBPG); *srcaddrp = SWB_EMPTY; - swap_pager_full = 0; } } + swapsizecheck(); splx(s); free((caddr_t)srcswp->sw_blocks, M_VMPGDATA); @@ -595,7 +682,7 @@ swap_pager_dealloc(pager) splx(s); - (void) swap_pager_clean(NULL, B_WRITE); + (void) swap_pager_clean(); /* * Free left over swap blocks @@ -604,13 +691,15 @@ swap_pager_dealloc(pager) for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) { for (j = 0; j < SWB_NPAGES; j++) if (bp->swb_block[j] != SWB_EMPTY) { - rlist_free(&swapmap, (unsigned)bp->swb_block[j], + swap_pager_freeswapspace((unsigned)bp->swb_block[j], (unsigned)bp->swb_block[j] + btodb(NBPG) - 1); + if( bp->swb_valid & (1<<j)) + vm_swap_size += btodb(NBPG); bp->swb_block[j] = SWB_EMPTY; - swap_pager_full = 0; } } splx(s); + swapsizecheck(); /* * Free swap management resources @@ -633,7 +722,9 @@ swap_pager_getmulti(pager, m, count, reqpage, sync) int reqpage; boolean_t sync; { - return swap_pager_io((sw_pager_t) pager->pg_data, m, count, reqpage, B_READ); + if( reqpage >= count) + panic("swap_pager_getmulti: reqpage >= count\n"); + return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage); } /* @@ -648,7 +739,29 @@ swap_pager_getpage(pager, m, sync) vm_page_t marray[1]; marray[0] = m; - return swap_pager_io((sw_pager_t)pager->pg_data, marray, 1, 0, B_READ); + return swap_pager_input((sw_pager_t)pager->pg_data, marray, 1, 0); +} + +int +swap_pager_putmulti(pager, m, c, sync, rtvals) + vm_pager_t pager; + vm_page_t *m; + int c; + boolean_t sync; + int *rtvals; +{ + int flags; + + if (pager == NULL) { + (void) swap_pager_clean(); + return VM_PAGER_OK; + } + + flags = B_WRITE; + if (!sync) + flags |= B_ASYNC; + + return swap_pager_output((sw_pager_t)pager->pg_data, m, c, flags, rtvals); } /* @@ -662,10 +775,11 @@ swap_pager_putpage(pager, m, sync) { int flags; vm_page_t marray[1]; + int rtvals[1]; if (pager == NULL) { - (void) swap_pager_clean(NULL, B_WRITE); + (void) swap_pager_clean(); return VM_PAGER_OK; } @@ -673,11 +787,14 @@ swap_pager_putpage(pager, m, sync) flags = B_WRITE; if (!sync) flags |= B_ASYNC; - return(swap_pager_io((sw_pager_t)pager->pg_data, marray, 1, 0, flags)); + + swap_pager_output((sw_pager_t)pager->pg_data, marray, 1, flags, rtvals); + + return rtvals[0]; } static inline int -swap_pager_block_index(swp, offset) +const swap_pager_block_index(swp, offset) sw_pager_t swp; vm_offset_t offset; { @@ -685,11 +802,11 @@ swap_pager_block_index(swp, offset) } static inline int -swap_pager_block_offset(swp, offset) +const swap_pager_block_offset(swp, offset) sw_pager_t swp; vm_offset_t offset; { - return (offset % (SWB_NPAGES*NBPG)); + return ((offset % (NBPG*SWB_NPAGES)) / NBPG); } /* @@ -755,8 +872,6 @@ swap_pager_ridpages(m, count, reqpage) int reqpage; { int i; - int s; - for (i = 0; i < count; i++) if (i != reqpage) swap_pager_freepage(m[i]); @@ -774,35 +889,33 @@ swap_pager_iodone1(bp) bp->b_flags |= B_DONE; bp->b_flags &= ~B_ASYNC; wakeup((caddr_t)bp); +/* if ((bp->b_flags & B_READ) == 0) vwakeup(bp); +*/ } -/* - * Scaled down version of swap(). - * BOGUS: lower level IO routines expect a KVA so we have to map our - * provided physical page into the KVA to keep them happy. - */ + + int -swap_pager_io(swp, m, count, reqpage, flags) +swap_pager_input(swp, m, count, reqpage) register sw_pager_t swp; vm_page_t *m; int count, reqpage; - int flags; { register struct buf *bp; - register sw_blk_t swb; + sw_blk_t swb[count]; register int s; - int i, ix; + int i; boolean_t rv; - vm_offset_t kva, off; + vm_offset_t kva, off[count]; swp_clean_t spc; - int cluster; vm_offset_t paging_offset; vm_object_t object; - int reqaddr, mydskregion; - extern int dmmin, dmmax; + int reqaddr[count]; - spc = NULL; + int first, last; + int failed; + int reqdskregion; object = m[reqpage]->object; paging_offset = object->paging_offset; @@ -812,89 +925,89 @@ swap_pager_io(swp, m, count, reqpage, flags) * following shadow chains looking for the top level object * with the page. */ - off = m[reqpage]->offset + paging_offset; - ix = swap_pager_block_index(swp, off); - if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { - /* printf("swap pager: out of range\n"); */ + if (swp->sw_blocks == NULL) { swap_pager_ridpages(m, count, reqpage); return(VM_PAGER_FAIL); } + + for(i = 0; i < count; i++) { + vm_offset_t foff = m[i]->offset + paging_offset; + int ix = swap_pager_block_index(swp, foff); + if (ix >= swp->sw_nblocks) { + int j; + if( i <= reqpage) { + swap_pager_ridpages(m, count, reqpage); + return(VM_PAGER_FAIL); + } + for(j = i; j < count; j++) { + swap_pager_freepage(m[j]); + } + count = i; + break; + } + swb[i] = &swp->sw_blocks[ix]; + off[i] = swap_pager_block_offset(swp, foff); + reqaddr[i] = swb[i]->swb_block[off[i]]; + } - swb = &swp->sw_blocks[ix]; - off = swap_pager_block_offset(swp, off) / NBPG; - reqaddr = swb->swb_block[off]; + /* make sure that our required input request is existant */ - /* make sure that our I/O request is contiguous */ - if (flags & B_READ) { - int first = 0, last = count; - int failed = 0; - int reqdskregion = reqaddr / dmmax; - int valid; + if (reqaddr[reqpage] == SWB_EMPTY || + (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { + swap_pager_ridpages(m, count, reqpage); + return(VM_PAGER_FAIL); + } - if (reqaddr == SWB_EMPTY || - (swb->swb_valid & (1 << off)) == 0) { - swap_pager_ridpages(m, count, reqpage); - return(VM_PAGER_FAIL); - } - - /* - * search backwards for the first contiguous page to transfer - */ - for (i = reqpage - 1; i >= 0; --i) { - int *tmpaddr = swap_pager_diskaddr(swp, - m[i]->offset + paging_offset,&valid); - if (tmpaddr == 0 || failed || !valid || - *tmpaddr != reqaddr + btodb((i - reqpage) * NBPG)) { + + reqdskregion = reqaddr[reqpage] / dmmax; + + /* + * search backwards for the first contiguous page to transfer + */ + failed = 0; + first = 0; + for (i = reqpage - 1; i >= 0; --i) { + if ( failed || (reqaddr[i] == SWB_EMPTY) || + (swb[i]->swb_valid & (1 << off[i])) == 0 || + (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(NBPG))) || + ((reqaddr[i] / dmmax) != reqdskregion)) { failed = 1; swap_pager_freepage(m[i]); - m[i] = 0; if (first == 0) first = i + 1; - } else { - mydskregion = *tmpaddr / dmmax; - if (mydskregion != reqdskregion) { - failed = 1; - swap_pager_freepage(m[i]); - m[i] = 0; - first = i + 1; - } - } - } - /* - * search forwards for the last contiguous page to transfer - */ - failed = 0; - for (i = reqpage + 1; i < count; i++) { - int *tmpaddr = swap_pager_diskaddr(swp, m[i]->offset + paging_offset,&valid); - if (tmpaddr == 0 || failed || !valid || - *tmpaddr != reqaddr + btodb((i - reqpage) * NBPG) ) { + } + } + /* + * search forwards for the last contiguous page to transfer + */ + failed = 0; + last = count; + for (i = reqpage + 1; i < count; i++) { + if ( failed || (reqaddr[i] == SWB_EMPTY) || + (swb[i]->swb_valid & (1 << off[i])) == 0 || + (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(NBPG))) || + ((reqaddr[i] / dmmax) != reqdskregion)) { failed = 1; swap_pager_freepage(m[i]); - m[i] = 0; if (last == count) last = i; - } else { - mydskregion = *tmpaddr / dmmax; - if (mydskregion != reqdskregion) { - failed = 1; - swap_pager_freepage(m[i]); - m[i] = 0; - if (last == count) - last = i; - } - } - } - count = last; - if (first != 0) { - for (i = first; i < count; i++) { - m[i - first] = m[i]; - } - count -= first; - reqpage -= first; + } + } + + count = last; + if (first != 0) { + for (i = first; i < count; i++) { + m[i-first] = m[i]; + reqaddr[i-first] = reqaddr[i]; + off[i-first] = off[i]; } + count -= first; + reqpage -= first; } + ++swb[reqpage]->swb_locked; + /* * at this point: * "m" is a pointer to the array of vm_page_t for paging I/O @@ -903,24 +1016,6 @@ swap_pager_io(swp, m, count, reqpage, flags) * "reqpage" is the index into "m" for the page actually faulted */ - /* - * For reads (pageins) and synchronous writes, we clean up - * all completed async pageouts. - */ - if ((flags & B_ASYNC) == 0) { - swap_pager_clean(NULL, flags); - } - /* - * For async writes (pageouts), we cleanup completed pageouts so - * that all available resources are freed. Also tells us if this - * page is already being cleaned. If it is, or no resources - * are available, we try again later. - */ - else if (swap_pager_clean(m[reqpage], B_WRITE)) { - swap_pager_ridpages(m, count, reqpage); - return VM_PAGER_TRYAGAIN; - } - spc = NULL; /* we might not use an spc data structure */ kva = 0; @@ -929,44 +1024,38 @@ swap_pager_io(swp, m, count, reqpage, flags) * but for transfers == 1 page, the swap_pager_free list contains * entries that have pre-allocated kva's (for efficiency). */ - if ((flags & B_READ) && count > 1) { + if (count > 1) { kva = kmem_alloc_pageable(pager_map, count*NBPG); } - + if (!kva) { /* * if a kva has not been allocated, we can only do a one page transfer, - * so we free the other pages that might have been allocated by vm_fault. + * so we free the other pages that might have been allocated by + * vm_fault. */ - for (i = 0; i < count; i++) { - if (i != reqpage) { - swap_pager_freepage(m[i]); - m[i] = 0; - } - } - count = 1; + swap_pager_ridpages(m, count, reqpage); m[0] = m[reqpage]; + reqaddr[0] = reqaddr[reqpage]; + + count = 1; reqpage = 0; /* * get a swap pager clean data structure, block until we get it */ if (queue_empty(&swap_pager_free)) { -/* - if ((flags & (B_ASYNC|B_READ)) == B_ASYNC) - return VM_PAGER_TRYAGAIN; -*/ s = splbio(); if( curproc == pageproc) - (void) swap_pager_clean(NULL, B_WRITE); + (void) swap_pager_clean(); else wakeup((caddr_t) &vm_pages_needed); while (queue_empty(&swap_pager_free)) { swap_pager_needflags |= SWAP_FREE_NEEDED; tsleep((caddr_t)&swap_pager_free, PVM, "swpfre", 0); - if (curproc == pageproc) - (void) swap_pager_clean(NULL, B_WRITE); + if( curproc == pageproc) + (void) swap_pager_clean(); else wakeup((caddr_t) &vm_pages_needed); } @@ -978,35 +1067,214 @@ swap_pager_io(swp, m, count, reqpage, flags) /* - * Determine swap block and allocate as necessary. - * We try to get SWB_NPAGES first, but then we punt and try - * to get one page. If that fails, we look at the allocation - * data structures to find unused but allocated pages in other - * pagers allocations. + * map our page(s) into kva for input */ - if (reqaddr == SWB_EMPTY) { - int blk; - int tries; - int ntoget; + for (i = 0; i < count; i++) { + pmap_kenter( kva + NBPG * i, VM_PAGE_TO_PHYS(m[i])); + } + pmap_update(); + - tries = 0; - s = splbio(); + /* + * Get a swap buffer header and perform the IO + */ + if( spc) { + bp = spc->spc_bp; + bzero(bp, sizeof *bp); + bp->b_spc = spc; + } else { + bp = getpbuf(); + } + + s = splbio(); + bp->b_flags = B_BUSY | B_READ | B_CALL; + bp->b_iodone = swap_pager_iodone1; + bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ + bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; + bp->b_un.b_addr = (caddr_t) kva; + bp->b_blkno = reqaddr[0]; + bp->b_bcount = NBPG*count; + bp->b_bufsize = NBPG*count; + + VHOLD(swapdev_vp); + bp->b_vp = swapdev_vp; + if (swapdev_vp->v_type == VBLK) + bp->b_dev = swapdev_vp->v_rdev; + + swp->sw_piip++; + + /* + * perform the I/O + */ + VOP_STRATEGY(bp); + + /* + * wait for the sync I/O to complete + */ + while ((bp->b_flags & B_DONE) == 0) { + tsleep((caddr_t)bp, PVM, "swread", 0); + } + rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; + bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); + + --swp->sw_piip; + if (swp->sw_piip == 0) + wakeup((caddr_t) swp); + + if (bp->b_vp) + brelvp(bp); + + splx(s); + --swb[reqpage]->swb_locked; + + /* + * remove the mapping for kernel virtual + */ + pmap_remove(vm_map_pmap(pager_map), kva, kva + count * NBPG); + + if (spc) { /* - * if any other pages have been allocated in this block, we - * only try to get one page. + * if we have used an spc, we need to free it. */ - for (i = 0; i < SWB_NPAGES; i++) { - if (swb->swb_block[i] != SWB_EMPTY) - break; + queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); + if (swap_pager_needflags & SWAP_FREE_NEEDED) { + swap_pager_needflags &= ~SWAP_FREE_NEEDED; + wakeup((caddr_t)&swap_pager_free); + } + } else { + /* + * free the kernel virtual addresses + */ + kmem_free_wakeup(pager_map, kva, count * NBPG); + /* + * release the physical I/O buffer + */ + relpbuf(bp); + /* + * finish up input if everything is ok + */ + if( rv == VM_PAGER_OK) { + for (i = 0; i < count; i++) { + pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); + m[i]->flags |= PG_CLEAN; + m[i]->flags &= ~PG_LAUNDRY; + if (i != reqpage) { + /* + * whether or not to leave the page activated + * is up in the air, but we should put the page + * on a page queue somewhere. (it already is in + * the object). + * After some emperical results, it is best + * to deactivate the readahead pages. + */ + vm_page_deactivate(m[i]); + + /* + * just in case someone was asking for this + * page we now tell them that it is ok to use + */ + m[i]->flags &= ~PG_FAKE; + PAGE_WAKEUP(m[i]); + } + } + if( swap_pager_full) { + _swap_pager_freespace( swp, m[0]->offset+paging_offset, count*NBPG); + } + } else { + swap_pager_ridpages(m, count, reqpage); } + } + return(rv); +} + +int +swap_pager_output(swp, m, count, flags, rtvals) + register sw_pager_t swp; + vm_page_t *m; + int count; + int flags; + int *rtvals; +{ + register struct buf *bp; + sw_blk_t swb[count]; + register int s; + int i, j, ix; + boolean_t rv; + vm_offset_t kva, off, foff; + swp_clean_t spc; + vm_offset_t paging_offset; + vm_object_t object; + int reqaddr[count]; + int failed; - ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; +/* + if( count > 1) + printf("off: 0x%x, count: %d\n", m[0]->offset, count); +*/ + spc = NULL; + + object = m[0]->object; + paging_offset = object->paging_offset; + + failed = 0; + for(j=0;j<count;j++) { + foff = m[j]->offset + paging_offset; + ix = swap_pager_block_index(swp, foff); + swb[j] = 0; + if( swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { + rtvals[j] = VM_PAGER_FAIL; + failed = 1; + continue; + } else { + rtvals[j] = VM_PAGER_OK; + } + swb[j] = &swp->sw_blocks[ix]; + ++swb[j]->swb_locked; + if( failed) { + rtvals[j] = VM_PAGER_FAIL; + continue; + } + off = swap_pager_block_offset(swp, foff); + reqaddr[j] = swb[j]->swb_block[off]; + if( reqaddr[j] == SWB_EMPTY) { + int blk; + int tries; + int ntoget; + tries = 0; + s = splbio(); + + /* + * if any other pages have been allocated in this block, we + * only try to get one page. + */ + for (i = 0; i < SWB_NPAGES; i++) { + if (swb[j]->swb_block[i] != SWB_EMPTY) + break; + } + + + ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; + /* + * this code is alittle conservative, but works + * (the intent of this code is to allocate small chunks + * for small objects) + */ + if( (m[j]->offset == 0) && (ntoget*NBPG > object->size)) { + ntoget = (object->size + (NBPG-1))/NBPG; + } + retrygetspace: - if (ntoget == SWB_NPAGES && - rlist_alloc(&swapmap, btodb(ntoget * NBPG),&blk)) { - for (i = 0; i < ntoget; i++) - swb->swb_block[i] = blk + btodb(NBPG) * i; - } else if (!rlist_alloc(&swapmap, btodb(NBPG), &swb->swb_block[off])) { + if (!swap_pager_full && ntoget > 1 && + swap_pager_getswapspace(ntoget * btodb(NBPG), &blk)) { + + for (i = 0; i < ntoget; i++) { + swb[j]->swb_block[i] = blk + btodb(NBPG) * i; + swb[j]->swb_valid = 0; + } + + reqaddr[j] = swb[j]->swb_block[off]; + } else if (!swap_pager_getswapspace(btodb(NBPG), + &swb[j]->swb_block[off])) { /* * if the allocation has failed, we try to reclaim space and * retry. @@ -1015,74 +1283,183 @@ retrygetspace: swap_pager_reclaim(); goto retrygetspace; } - /* - * here on swap space full. - */ - if (spc) - queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); - if (swap_pager_full == 0) - printf("swap_pager: out of swap space !!!\n"); - swap_pager_full = 1; - swap_pager_ridpages(m, count, reqpage); - splx(s); - return(VM_PAGER_TRYAGAIN); + rtvals[j] = VM_PAGER_TRYAGAIN; + failed = 1; + } else { + reqaddr[j] = swb[j]->swb_block[off]; + swb[j]->swb_valid &= ~(1<<off); + } + splx(s); + } + } + + /* + * search forwards for the last contiguous page to transfer + */ + failed = 0; + for (i = 0; i < count; i++) { + if( failed || (reqaddr[i] != reqaddr[0] + i*btodb(NBPG)) || + (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) || + (rtvals[i] != VM_PAGER_OK)) { + failed = 1; + if( rtvals[i] == VM_PAGER_OK) + rtvals[i] = VM_PAGER_TRYAGAIN; + } + } + + for(i = 0; i < count; i++) { + if( rtvals[i] != VM_PAGER_OK) { + if( swb[i]) + --swb[i]->swb_locked; + } + } + + for(i = 0; i < count; i++) + if( rtvals[i] != VM_PAGER_OK) + break; + + if( i == 0) { + return VM_PAGER_TRYAGAIN; + } + + count = i; + for(i=0;i<count;i++) { + if( reqaddr[i] == SWB_EMPTY) + printf("I/O to empty block????\n"); + } + + /* + */ + + /* + * For synchronous writes, we clean up + * all completed async pageouts. + */ + if ((flags & B_ASYNC) == 0) { + swap_pager_clean(); + } + + kva = 0; + + /* + * we allocate a new kva for transfers > 1 page + * but for transfers == 1 page, the swap_pager_free list contains + * entries that have pre-allocated kva's (for efficiency). + */ + if ( count > 1) { + kva = kmem_alloc_pageable(pager_map, count*NBPG); + if( !kva) { + for (i = 0; i < count; i++) { + if( swb[i]) + --swb[i]->swb_locked; + rtvals[i] = VM_PAGER_TRYAGAIN; + } + return VM_PAGER_TRYAGAIN; + } + } + + /* + * get a swap pager clean data structure, block until we get it + */ + if (queue_empty(&swap_pager_free)) { +/* + if (flags & B_ASYNC) { + for(i=0;i<count;i++) { + rtvals[i] = VM_PAGER_TRYAGAIN; + if( swb[i]) + --swb[i]->swb_locked; + } + return VM_PAGER_TRYAGAIN; + } +*/ + + s = splbio(); + if( curproc == pageproc) + (void) swap_pager_clean(); + else + wakeup((caddr_t) &vm_pages_needed); + while (queue_empty(&swap_pager_free)) { + swap_pager_needflags |= SWAP_FREE_NEEDED; + tsleep((caddr_t)&swap_pager_free, + PVM, "swpfre", 0); + if( curproc == pageproc) + (void) swap_pager_clean(); + else + wakeup((caddr_t) &vm_pages_needed); } splx(s); - swap_pager_full = 0; + } + + queue_remove_first(&swap_pager_free, spc, swp_clean_t, spc_list); + if( !kva) { + kva = spc->spc_kva; + spc->spc_altkva = 0; + } else { + spc->spc_altkva = kva; } /* * map our page(s) into kva for I/O */ for (i = 0; i < count; i++) { - pmap_enter(vm_map_pmap(pager_map), kva + NBPG * i, - VM_PAGE_TO_PHYS(m[i]), VM_PROT_ALL, TRUE); + pmap_kenter( kva + NBPG * i, VM_PAGE_TO_PHYS(m[i])); } - + pmap_update(); /* * get the base I/O offset into the swap file */ - off = swap_pager_block_offset(swp, m[0]->offset + paging_offset) / NBPG; - -#ifdef DEBUG - if (flags & B_READ && count > 1) - printf("obj: 0x%x off: 0x%x poff: 0x%x off: 0x%x, sz: %d blk: %d op: %s\n", - object, m[0]->offset, paging_offset, off, count, swb->swb_block[off], flags&B_READ?"r":"w"); -#endif + for(i=0;i<count;i++) { + foff = m[i]->offset + paging_offset; + off = swap_pager_block_offset(swp, foff); + /* + * if we are setting the valid bit anew, + * then diminish the swap free space + */ + if( (swb[i]->swb_valid & (1 << off)) == 0) + vm_swap_size -= btodb(NBPG); + + /* + * set the valid bit + */ + swb[i]->swb_valid |= (1 << off); + /* + * and unlock the data structure + */ + --swb[i]->swb_locked; + } s = splbio(); /* * Get a swap buffer header and perform the IO */ - if (spc) { - bp = spc->spc_bp; - bzero(bp, sizeof *bp); - bp->b_spc = spc; - } else { - bp = getpbuf(); - } - bp->b_flags = B_BUSY | (flags & B_READ); + bp = spc->spc_bp; + bzero(bp, sizeof *bp); + bp->b_spc = spc; + + bp->b_flags = B_BUSY; bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; bp->b_un.b_addr = (caddr_t) kva; - bp->b_blkno = swb->swb_block[off]; + bp->b_blkno = reqaddr[0]; VHOLD(swapdev_vp); bp->b_vp = swapdev_vp; if (swapdev_vp->v_type == VBLK) bp->b_dev = swapdev_vp->v_rdev; bp->b_bcount = NBPG*count; - if ((bp->b_flags & B_READ) == 0) - swapdev_vp->v_numoutput++; + bp->b_bufsize = NBPG*count; + swapdev_vp->v_numoutput++; /* * If this is an async write we set up additional buffer fields * and place a "cleaning" entry on the inuse queue. */ - if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { + if ( flags & B_ASYNC ) { spc->spc_flags = 0; spc->spc_swp = swp; - spc->spc_m = m[reqpage]; + for(i=0;i<count;i++) + spc->spc_m[i] = m[i]; + spc->spc_count = count; /* * the completion routine for async writes */ @@ -1092,27 +1469,8 @@ retrygetspace: bp->b_dirtyend = bp->b_bcount; swp->sw_poip++; queue_enter(&swap_pager_inuse, spc, swp_clean_t, spc_list); - /* - * we remember that we have used a block for paging. - */ - swb->swb_valid |= (1 << off); } else { - /* - * here for sync write or any read - */ - if ((flags & B_READ) == 0) { - /* - * if we are writing, we remember that we have - * actually used a block for paging. - */ - swb->swb_valid |= (1 << off); - swp->sw_poip++; - } else { - swp->sw_piip++; - } - /* - * the completion routine for reads and sync writes - */ + swp->sw_poip++; bp->b_flags |= B_CALL; bp->b_iodone = swap_pager_iodone1; } @@ -1122,40 +1480,31 @@ retrygetspace: VOP_STRATEGY(bp); if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) { if ((bp->b_flags & B_DONE) == B_DONE) { - swap_pager_clean(NULL, flags); + swap_pager_clean(); } splx(s); - return(VM_PAGER_PEND); + for(i=0;i<count;i++) { + rtvals[i] = VM_PAGER_PEND; + } + return VM_PAGER_PEND; } /* * wait for the sync I/O to complete */ while ((bp->b_flags & B_DONE) == 0) { - tsleep((caddr_t)bp, PVM, (flags & B_READ)?"swread":"swwrt", 0); + tsleep((caddr_t)bp, PVM, "swwrt", 0); } rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); - if (bp->b_flags & B_READ) { - --swp->sw_piip; - if (swp->sw_piip == 0) - wakeup((caddr_t) swp); - } else { - --swp->sw_poip; - if (swp->sw_poip == 0) - wakeup((caddr_t) swp); - } + --swp->sw_poip; + if (swp->sw_poip == 0) + wakeup((caddr_t) swp); if (bp->b_vp) brelvp(bp); - /* - * release the physical I/O buffer - */ - if (!spc) - relpbuf(bp); - splx(s); /* @@ -1167,59 +1516,42 @@ retrygetspace: * if we have written the page, then indicate that the page * is clean. */ - if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) { - m[reqpage]->flags |= PG_CLEAN; - pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage])); - /* - * optimization, if a page has been read during the - * pageout process, we activate it. - */ - if ( (m[reqpage]->flags & PG_ACTIVE) == 0 && - pmap_is_referenced(VM_PAGE_TO_PHYS(m[reqpage]))) - vm_page_activate(m[reqpage]); - } - - if (spc) { - /* - * if we have used an spc, we need to free it. - */ - queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); - } else { - for (i = 0; i < count; i++) { - pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); - m[i]->flags |= PG_CLEAN; - m[i]->flags &= ~PG_LAUNDRY; - if (i != reqpage) { + if (rv == VM_PAGER_OK) { + for(i=0;i<count;i++) { + if( rtvals[i] == VM_PAGER_OK) { + m[i]->flags |= PG_CLEAN; + m[i]->flags &= ~PG_LAUNDRY; + pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); /* - * whether or not to leave the page activated - * is up in the air, but we should put the page - * on a page queue somewhere. (it already is in - * the object). - * After some emperical results, it is best - * to deactivate the readahead pages. + * optimization, if a page has been read during the + * pageout process, we activate it. */ - vm_page_deactivate(m[i]); - - /* - * just in case someone was asking for this - * page we now tell them that it is ok to use - */ - m[i]->flags &= ~PG_FAKE; - PAGE_WAKEUP(m[i]); + if ( (m[i]->flags & PG_ACTIVE) == 0 && + pmap_is_referenced(VM_PAGE_TO_PHYS(m[i]))) + vm_page_activate(m[i]); } } -/* - * and free the kernel virtual addresses - */ + } else { + for(i=0;i<count;i++) { + rtvals[i] = rv; + m[i]->flags |= PG_LAUNDRY; + } + } + + if( spc->spc_altkva) kmem_free_wakeup(pager_map, kva, count * NBPG); + + queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); + if (swap_pager_needflags & SWAP_FREE_NEEDED) { + swap_pager_needflags &= ~SWAP_FREE_NEEDED; + wakeup((caddr_t)&swap_pager_free); } + return(rv); } boolean_t -swap_pager_clean(m, rw) - vm_page_t m; - int rw; +swap_pager_clean() { register swp_clean_t spc, tspc; register int s; @@ -1235,7 +1567,13 @@ swap_pager_clean(m, rw) */ spc = (swp_clean_t) queue_first(&swap_pager_done); while (!queue_end(&swap_pager_done, (queue_entry_t)spc)) { - pmap_remove(vm_map_pmap(pager_map), spc->spc_kva, ((vm_offset_t) spc->spc_kva) + NBPG); + if( spc->spc_altkva) { + pmap_remove(vm_map_pmap(pager_map), spc->spc_altkva, spc->spc_altkva + spc->spc_count * NBPG); + kmem_free_wakeup(pager_map, spc->spc_altkva, spc->spc_count * NBPG); + spc->spc_altkva = 0; + } else { + pmap_remove(vm_map_pmap(pager_map), spc->spc_kva, spc->spc_kva + NBPG); + } swap_pager_finish(spc); queue_remove(&swap_pager_done, spc, swp_clean_t, spc_list); goto doclean; @@ -1258,6 +1596,10 @@ doclean: } spc->spc_flags = 0; queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); + if (swap_pager_needflags & SWAP_FREE_NEEDED) { + swap_pager_needflags &= ~SWAP_FREE_NEEDED; + wakeup((caddr_t)&swap_pager_free); + } ++cleandone; splx(s); } @@ -1269,11 +1611,10 @@ void swap_pager_finish(spc) register swp_clean_t spc; { - vm_page_t m = spc->spc_m; - vm_object_t object = m->object; - extern int vm_pageout_free_min; + vm_object_t object = spc->spc_m[0]->object; + int i; - if (--object->paging_in_progress == 0) + if ((object->paging_in_progress -= spc->spc_count) == 0) thread_wakeup((int) object); /* @@ -1282,36 +1623,27 @@ swap_pager_finish(spc) * (XXX could get stuck doing this, should give up after awhile) */ if (spc->spc_flags & SPC_ERROR) { - printf("swap_pager_finish: clean of page %x failed\n", - VM_PAGE_TO_PHYS(m)); - m->flags |= PG_LAUNDRY; + for(i=0;i<spc->spc_count;i++) { + printf("swap_pager_finish: clean of page %x failed\n", + VM_PAGE_TO_PHYS(spc->spc_m[i])); + spc->spc_m[i]->flags |= PG_LAUNDRY; + } } else { - pmap_clear_modify(VM_PAGE_TO_PHYS(m)); - m->flags |= PG_CLEAN; + for(i=0;i<spc->spc_count;i++) { + pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); + spc->spc_m[i]->flags |= PG_CLEAN; + } } - /* - * if a page has been read during pageout, then - * we activate the page. - */ - if ((m->flags & PG_ACTIVE) == 0 && - pmap_is_referenced(VM_PAGE_TO_PHYS(m))) - vm_page_activate(m); - /* - * we wakeup any processes that are waiting on - * this page. - */ - PAGE_WAKEUP(m); - /* - * if we need memory desperately, then free it now - */ - if (vm_page_free_count < vm_page_free_reserved && - (m->flags & PG_CLEAN) && m->wire_count == 0) { - pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); - vm_page_free(m); + for(i=0;i<spc->spc_count;i++) { + /* + * we wakeup any processes that are waiting on + * these pages. + */ + PAGE_WAKEUP(spc->spc_m[i]); } - --nswiodone; + nswiodone -= spc->spc_count; return; } @@ -1324,7 +1656,6 @@ swap_pager_iodone(bp) register struct buf *bp; { register swp_clean_t spc; - daddr_t blk; int s; s = splbio(); @@ -1337,15 +1668,17 @@ swap_pager_iodone(bp) bp->b_error, bp->b_blkno, bp->b_bcount); } +/* if ((bp->b_flags & B_READ) == 0) vwakeup(bp); +*/ bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC); if (bp->b_vp) { brelvp(bp); } - nswiodone++; + nswiodone += spc->spc_count; if (--spc->spc_swp->sw_poip == 0) { wakeup((caddr_t)spc->spc_swp); } @@ -1393,6 +1726,27 @@ getpbuf() { } /* + * allocate a physical buffer, if one is available + */ +struct buf * +trypbuf() { + int s; + struct buf *bp; + + s = splbio(); + if( bswlist.av_forw == NULL) { + splx(s); + return NULL; + } + bp = bswlist.av_forw; + bswlist.av_forw = bp->av_forw; + splx(s); + + bzero(bp, sizeof *bp); + return bp; +} + +/* * release a physical buffer */ void @@ -1416,8 +1770,8 @@ relpbuf(bp) */ int swap_pager_ready() { - if( queue_empty( &swap_pager_free)) - return 0; - else + if( !queue_empty( &swap_pager_free)) return 1; + else + return 0; } diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h index e505e436d1ce..853edd5d1b16 100644 --- a/sys/vm/swap_pager.h +++ b/sys/vm/swap_pager.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * from: @(#)swap_pager.h 7.1 (Berkeley) 12/5/90 - * $Id: swap_pager.h,v 1.7 1994/01/17 09:33:25 davidg Exp $ + * $Id: swap_pager.h,v 1.9 1994/03/14 21:54:23 davidg Exp $ */ /* @@ -48,7 +48,7 @@ #define _SWAP_PAGER_ 1 /* - * SWB_NPAGES can be set to any value from 1 to 32 pages per allocation, + * SWB_NPAGES can be set to any value from 1 to 16 pages per allocation, * however, due to the allocation spilling into non-swap pager backed memory, * suggest keeping SWB_NPAGES small (1-4). If high performance is manditory * perhaps up to 8 pages might be in order???? @@ -57,7 +57,8 @@ */ #define SWB_NPAGES 8 struct swblock { - unsigned int swb_valid; /* bitmask for valid pages */ + unsigned short swb_valid; /* bitmask for valid pages */ + unsigned short swb_locked; /* block locked */ int swb_block[SWB_NPAGES]; /* unfortunately int instead of daddr_t */ }; typedef struct swblock *sw_blk_t; @@ -89,7 +90,7 @@ boolean_t swap_pager_getmulti(vm_pager_t, vm_page_t *, int, int, boolean_t); boolean_t swap_pager_haspage(vm_pager_t, vm_offset_t); int swap_pager_io(sw_pager_t, vm_page_t *, int, int, int); void swap_pager_iodone(struct buf *); -boolean_t swap_pager_clean(vm_page_t, int); +boolean_t swap_pager_clean(); extern struct pagerops swappagerops; diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index c7254bce4c51..1cf99e068e9e 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -66,7 +66,7 @@ * rights to redistribute these changes. */ /* - * $Id: vm_fault.c,v 1.14.2.1 1994/03/24 07:20:29 rgrimes Exp $ + * $Id: vm_fault.c,v 1.18 1994/05/25 11:06:49 davidg Exp $ */ /* @@ -82,9 +82,9 @@ #include "resource.h" #include "resourcevar.h" -#define VM_FAULT_READ_AHEAD 3 +#define VM_FAULT_READ_AHEAD 4 #define VM_FAULT_READ_AHEAD_MIN 1 -#define VM_FAULT_READ_BEHIND 2 +#define VM_FAULT_READ_BEHIND 3 #define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1) extern int swap_pager_full; extern int vm_pageout_proc_limit; @@ -133,6 +133,7 @@ vm_fault(map, vaddr, fault_type, change_wiring) vm_page_t marray[VM_FAULT_READ]; int reqpage; int spl; + int hardfault=0; vm_stat.faults++; /* needs lock XXX */ /* @@ -284,13 +285,12 @@ vm_fault(map, vaddr, fault_type, change_wiring) */ vm_page_lock_queues(); - spl = vm_disable_intr(); + spl = splimp(); if (m->flags & PG_INACTIVE) { queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq); m->flags &= ~PG_INACTIVE; vm_page_inactive_count--; - vm_stat.reactivations++; } if (m->flags & PG_ACTIVE) { @@ -299,7 +299,7 @@ vm_fault(map, vaddr, fault_type, change_wiring) m->flags &= ~PG_ACTIVE; vm_page_active_count--; } - vm_set_intr(spl); + splx(spl); vm_page_unlock_queues(); /* @@ -328,10 +328,11 @@ vm_fault(map, vaddr, fault_type, change_wiring) (object->pager && object->pager->pg_type == PG_SWAP && !vm_pager_has_page(object->pager, offset+object->paging_offset)))) { if (vaddr < VM_MAXUSER_ADDRESS && curproc && curproc->p_pid >= 48) /* XXX */ { - UNLOCK_AND_DEALLOCATE; printf("Process %d killed by vm_fault -- out of swap\n", curproc->p_pid); psignal(curproc, SIGKILL); - return KERN_RESOURCE_SHORTAGE; + curproc->p_cpu = 0; + curproc->p_nice = PRIO_MIN; + setpri(curproc); } } @@ -403,6 +404,7 @@ vm_fault(map, vaddr, fault_type, change_wiring) vm_stat.pageins++; m->flags &= ~PG_FAKE; pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + hardfault++; break; } @@ -895,8 +897,16 @@ vm_fault(map, vaddr, fault_type, change_wiring) } else { vm_page_activate(m); - vm_pageout_deact_bump(m); } + + if( curproc && curproc->p_stats) { + if (hardfault) { + curproc->p_stats->p_ru.ru_majflt++; + } else { + curproc->p_stats->p_ru.ru_minflt++; + } + } + vm_page_unlock_queues(); /* diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index bd2fd07a5445..482f968f652a 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -75,6 +75,7 @@ #include "vm_page.h" #include "vm_kern.h" #include "machine/stdarg.h" +#include "machine/vmparam.h" extern char kstack[]; int avefree = 0; /* XXX */ @@ -115,19 +116,16 @@ useracc(addr, len, rw) vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; /* - * XXX - specially disallow access to user page tables - they are - * in the map. - * - * XXX - don't specially disallow access to the user area - treat - * it as incorrectly as elsewhere. + * XXX - check separately to disallow access to user area and user + * page tables - they are in the map. * * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. It was - * only used (as an end address) in trap.c. Use it as an end - * address here too. + * once only used (as an end address) in trap.c. Use it as an end + * address here too. This bogusness has spread. I just fixed + * where it was used as a max in vm_mmap.c. */ - if ((vm_offset_t) addr >= VM_MAXUSER_ADDRESS - || (vm_offset_t) addr + len > VM_MAXUSER_ADDRESS - || (vm_offset_t) addr + len <= (vm_offset_t) addr) { + if ((vm_offset_t) addr + len > /* XXX */ VM_MAXUSER_ADDRESS + || (vm_offset_t) addr + len < (vm_offset_t) addr) { printf("address wrap\n"); return (FALSE); } @@ -213,7 +211,6 @@ vm_fork(p1, p2, isvfork) * Allocate a wired-down (for now) pcb and kernel stack for the process */ - /* addr = UPT_MIN_ADDRESS - UPAGES*NBPG; */ addr = (vm_offset_t) kstack; vp = &p2->p_vmspace->vm_map; @@ -281,23 +278,24 @@ void vm_init_limits(p) register struct proc *p; { - int tmp; + int rss_limit; /* * Set up the initial limits on process VM. - * Set the maximum resident set size to be all - * of (reasonably) available memory. This causes - * any single, large process to start random page - * replacement once it fills memory. + * Set the maximum resident set size to be half + * of (reasonably) available memory. Since this + * is a soft limit, it comes into effect only + * when the system is out of memory - half of + * main memory helps to favor smaller processes, + * and reduces thrashing of the object cache. */ p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ; p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ; p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ; p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ; - tmp = ((2 * vm_page_free_count) / 3) - 32; - if (vm_page_free_count < 512) - tmp = vm_page_free_count; - p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(tmp); + /* limit the limit to no less than 128K */ + rss_limit = max(vm_page_free_count / 2, 32); + p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(rss_limit); p->p_rlimit[RLIMIT_RSS].rlim_max = RLIM_INFINITY; } @@ -425,7 +423,7 @@ noswap: (void) splhigh(); if (((vm_page_free_count + vm_page_inactive_count) >= (vm_page_inactive_target + vm_page_free_reserved)) || - (vm_page_free_count >= vm_page_free_min)) { + (vm_page_free_count > vm_page_free_reserved)) { spl0(); faultin(p); p->p_time = 0; @@ -485,8 +483,6 @@ swapout_threads() continue; switch (p->p_stat) { case SRUN: - if (p->p_pri < PUSER) - continue; if ((tpri = p->p_time + p->p_nice * 8) > outpri2) { outp2 = p; outpri2 = tpri; @@ -495,7 +491,7 @@ swapout_threads() case SSLEEP: case SSTOP: - if (p->p_pri <= PRIBIO) + if (p->p_pri <= PVM) continue; if (p->p_slptime > maxslp) { swapout(p); @@ -511,12 +507,12 @@ swapout_threads() * If we didn't get rid of any real duds, toss out the next most * likely sleeping/stopped or running candidate. We only do this * if we are real low on memory since we don't gain much by doing - * it (UPAGES pages). + * it (UPAGES+1 pages). */ if (didswap == 0 && (swapinreq && - vm_page_free_count <= vm_pageout_free_min)) { + (vm_page_free_count + vm_page_inactive_count) <= (vm_page_free_min + vm_page_inactive_target))) { if ((p = outp) == 0 && - (vm_page_free_count <= vm_pageout_free_min)) + (vm_page_free_count <= vm_page_free_reserved)) p = outp2; #ifdef DEBUG if (swapdebug & SDB_SWAPOUT) @@ -552,6 +548,8 @@ swapout(p) p->p_slptime, vm_page_free_count); #endif + ++p->p_stats->p_ru.ru_nswap; + (void) splhigh(); p->p_flag &= ~SLOAD; diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index e6759a756844..0a753040bf4e 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -37,7 +37,7 @@ * * from: Utah $Hdr: vm_mmap.c 1.3 90/01/21$ * from: @(#)vm_mmap.c 7.5 (Berkeley) 6/28/91 - * $Id: vm_mmap.c,v 1.21 1994/01/31 04:20:26 davidg Exp $ + * $Id: vm_mmap.c,v 1.23 1994/06/22 05:53:10 jkh Exp $ */ /* @@ -59,6 +59,7 @@ #include "vm_prot.h" #include "vm_statistics.h" #include "vm_user.h" +#include "vm_page.h" static boolean_t vm_map_is_allocated(vm_map_t, vm_offset_t, vm_offset_t, boolean_t); @@ -181,9 +182,7 @@ smmap(p, uap, retval) /* * Check address range for validity */ - if (addr + size >= VM_MAXUSER_ADDRESS) - return(EINVAL); - if (addr > addr + size) + if (addr + size > /* XXX */ VM_MAXUSER_ADDRESS || addr + size < addr) return(EINVAL); /* @@ -255,8 +254,8 @@ smmap(p, uap, retval) handle = NULL; } - error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, - flags, handle, (vm_offset_t)uap->pos); + error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, + maxprot, flags, handle, (vm_offset_t)uap->pos); if (error == 0) *retval = (int) addr; return(error); @@ -363,9 +362,7 @@ munmap(p, uap, retval) size = (vm_size_t) round_page(uap->len); if (size == 0) return(0); - if (addr + size >= VM_MAXUSER_ADDRESS) - return(EINVAL); - if (addr >= addr + size) + if (addr + size > /* XXX */ VM_MAXUSER_ADDRESS || addr + size < addr) return(EINVAL); if (!vm_map_is_allocated(&p->p_vmspace->vm_map, addr, addr+size, FALSE)) @@ -475,6 +472,7 @@ mincore(p, uap, retval) return (EOPNOTSUPP); } +void pmap_object_init_pt(); /* * Internal version of mmap. * Currently used by mmap, exec, and sys5 shared memory. @@ -493,9 +491,9 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) caddr_t handle; /* XXX should be vp */ vm_offset_t foff; { - register vm_pager_t pager; + register vm_pager_t pager = 0; boolean_t fitit; - vm_object_t object; + vm_object_t object = 0; struct vnode *vp = 0; int type; int rv = KERN_SUCCESS; @@ -508,7 +506,10 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) *addr = round_page(*addr); } else { fitit = FALSE; - (void) vm_deallocate(map, *addr, size); + /* + * Defer deallocating address space until + * after a reference is gained to the object + */ } /* @@ -516,48 +517,64 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) * gain a reference to ensure continued existance of the object. * (XXX the exception is to appease the pageout daemon) */ - if ((flags & MAP_TYPE) == MAP_ANON) - type = PG_DFLT; - else { - vp = (struct vnode *)handle; - if (vp->v_type == VCHR) { - type = PG_DEVICE; - handle = (caddr_t)(u_long)vp->v_rdev; - } else - type = PG_VNODE; - } - pager = vm_pager_allocate(type, handle, size, prot, foff); - if (pager == NULL) - return (type == PG_DEVICE ? EINVAL : ENOMEM); + if ((((flags & MAP_TYPE) != MAP_ANON) || (handle != NULL))) { + if ((flags & MAP_TYPE) == MAP_ANON) + type = PG_DFLT; + else { + vp = (struct vnode *)handle; + if (vp->v_type == VCHR) { + type = PG_DEVICE; + handle = (caddr_t)(u_long)vp->v_rdev; + } else + type = PG_VNODE; + } + pager = vm_pager_allocate(type, handle, size, prot, foff); + if (pager == NULL) { + /* on failure, don't leave anything mapped */ + if (!fitit) + (void) vm_deallocate(map, *addr, size); + return (type == PG_DEVICE ? EINVAL : ENOMEM); + } /* * Find object and release extra reference gained by lookup */ - object = vm_object_lookup(pager); - vm_object_deallocate(object); + object = vm_object_lookup(pager); + vm_object_deallocate(object); + } + + /* blow away anything that might be mapped here already */ + if (!fitit) + (void) vm_deallocate(map, *addr, size); /* * Anonymous memory. */ - if ((flags & MAP_TYPE) == MAP_ANON) { - rv = vm_allocate_with_pager(map, addr, size, fitit, + if (((flags & MAP_TYPE) == MAP_ANON)) { + if (handle != NULL) { + rv = vm_allocate_with_pager(map, addr, size, fitit, pager, (vm_offset_t)foff, TRUE); - if (rv != KERN_SUCCESS) { - if (handle == NULL) - vm_pager_deallocate(pager); - else + if (rv != KERN_SUCCESS) { vm_object_deallocate(object); - goto out; - } + goto out; + } +#if 1 /* * Don't cache anonymous objects. * Loses the reference gained by vm_pager_allocate. */ - (void) pager_cache(object, FALSE); + (void) pager_cache(object, FALSE); +#endif #ifdef DEBUG - if (mmapdebug & MDB_MAPIT) - printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n", - curproc->p_pid, *addr, size, pager); + if (mmapdebug & MDB_MAPIT) + printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n", + curproc->p_pid, *addr, size, pager); #endif + } else { + rv = vm_map_find(map, NULL, (vm_offset_t) 0, addr, size, fitit); + if(rv != KERN_SUCCESS) { + goto out; + } + } } /* * Must be type MAP_FILE. @@ -610,6 +627,9 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) pager_cache(object, FALSE); else vm_object_deallocate(object); + + if( map->pmap) + pmap_object_init_pt(map->pmap, *addr, object, foff, size); } /* * Copy-on-write of file. Two flavors. @@ -683,6 +703,8 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) */ vm_object_pmap_copy(object, (vm_offset_t)foff, (vm_offset_t)foff+size); + if( map->pmap) + pmap_object_init_pt(map->pmap, *addr, object, foff, size); vm_object_deallocate(object); vm_map_deallocate(tmap); if (rv != KERN_SUCCESS) @@ -866,6 +888,7 @@ vm_allocate_with_pager(map, addr, size, fitit, pager, poffset, internal) vm_stat.lookups++; if (object == NULL) { object = vm_object_allocate(size); + /* don't put internal objects in the hash table */ if (!internal) vm_object_enter(object, pager); } else diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 5c463f64d6c5..3dea7601b5ba 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_object.c 7.4 (Berkeley) 5/7/91 - * $Id: vm_object.c,v 1.21.2.1 1994/03/07 02:22:13 rgrimes Exp $ + * $Id: vm_object.c,v 1.25 1994/04/14 07:50:21 davidg Exp $ * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. @@ -401,7 +401,7 @@ vm_object_terminate(object) VM_PAGE_CHECK(p); vm_page_lock_queues(); - s = vm_disable_intr(); + s = splimp(); if (p->flags & PG_ACTIVE) { queue_remove(&vm_page_queue_active, p, vm_page_t, pageq); @@ -415,7 +415,7 @@ vm_object_terminate(object) p->flags &= ~PG_INACTIVE; vm_page_inactive_count--; } - vm_set_intr(s); + splx(s); vm_page_unlock_queues(); p = (vm_page_t) queue_next(&p->listq); } @@ -514,15 +514,7 @@ again: vm_page_deactivate(p); if ((p->flags & PG_CLEAN) == 0) { - p->flags |= PG_BUSY; - object->paging_in_progress++; - vm_object_unlock(object); - (void) vm_pager_put(object->pager, p, TRUE); - vm_object_lock(object); - object->paging_in_progress--; - if (object->paging_in_progress == 0) - wakeup((caddr_t) object); - PAGE_WAKEUP(p); + vm_pageout_clean(p,1); goto again; } } @@ -551,7 +543,7 @@ vm_object_deactivate_pages(object) next = (vm_page_t) queue_next(&p->listq); vm_page_lock_queues(); if ((p->flags & (PG_INACTIVE|PG_BUSY)) == 0 && - p->wire_count == 0) + (p->wire_count == 0 && p->hold_count == 0)) vm_page_deactivate(p); /* optimisation from mach 3.0 - * andrew@werple.apana.org.au, * Feb '93 @@ -658,14 +650,14 @@ vm_object_pmap_copy(object, start, end) register vm_page_t p; vm_offset_t amount; + if (object == NULL) + return; + start = trunc_page(start); end = round_page(end); amount = ((end - start) + PAGE_SIZE - 1) / PAGE_SIZE; - if (object == NULL) - return; - vm_object_lock(object); p = (vm_page_t) queue_first(&object->memq); while (!queue_end(&object->memq, (queue_entry_t) p)) { diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index 60e7677b27ed..b1b82bcda8e9 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_object.h 7.3 (Berkeley) 4/21/91 - * $Id: vm_object.h,v 1.6 1994/01/14 16:27:25 davidg Exp $ + * $Id: vm_object.h,v 1.7 1994/03/14 21:54:27 davidg Exp $ */ /* @@ -100,7 +100,8 @@ struct vm_object { /* Paging (in or out) - don't collapse or destroy */ /* boolean_t */ can_persist:1, /* allow to persist */ - /* boolean_t */ internal:1; /* internally created object */ + /* boolean_t */ internal:1, /* internally created object */ + read_only:1; /* entire obj is read only */ queue_chain_t cached_list; /* for persistence */ }; diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 31a99382a7d6..409438163be6 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 - * $Id: vm_page.c,v 1.12 1994/02/09 07:03:10 davidg Exp $ + * $Id: vm_page.c,v 1.18 1994/06/17 13:29:13 davidg Exp $ */ /* @@ -311,6 +311,7 @@ vm_page_startup(starta, enda, vaddr) m->flags = 0; m->object = 0; m->phys_addr = pa; + m->hold_count = 0; queue_enter(&vm_page_queue_free, m, vm_page_t, pageq); pa += PAGE_SIZE; } @@ -469,7 +470,7 @@ vm_page_lookup(object, offset) */ bucket = &vm_page_buckets[vm_page_hash(object, offset)]; - spl = vm_disable_intr(); + spl = splimp(); simple_lock(&bucket_lock); mem = (vm_page_t) queue_first(bucket); @@ -477,14 +478,14 @@ vm_page_lookup(object, offset) VM_PAGE_CHECK(mem); if ((mem->object == object) && (mem->offset == offset)) { simple_unlock(&bucket_lock); - vm_set_intr(spl); + splx(spl); return(mem); } mem = (vm_page_t) queue_next(&mem->hashq); } simple_unlock(&bucket_lock); - vm_set_intr(spl); + splx(spl); return(NULL); } @@ -508,10 +509,10 @@ vm_page_rename(mem, new_object, new_offset) vm_page_lock_queues(); /* keep page from moving out from under pageout daemon */ - spl = vm_disable_intr(); + spl = splimp(); vm_page_remove(mem); vm_page_insert(mem, new_object, new_offset); - vm_set_intr(spl); + splx(spl); vm_page_unlock_queues(); } @@ -531,7 +532,7 @@ vm_page_alloc(object, offset) register vm_page_t mem; int spl; - spl = vm_disable_intr(); + spl = splimp(); simple_lock(&vm_page_queue_free_lock); if ( object != kernel_object && object != kmem_object && @@ -539,7 +540,7 @@ vm_page_alloc(object, offset) vm_page_free_count < vm_page_free_reserved) { simple_unlock(&vm_page_queue_free_lock); - vm_set_intr(spl); + splx(spl); /* * this wakeup seems unnecessary, but there is code that * might just check to see if there are free pages, and @@ -552,7 +553,7 @@ vm_page_alloc(object, offset) } if (queue_empty(&vm_page_queue_free)) { simple_unlock(&vm_page_queue_free_lock); - vm_set_intr(spl); + splx(spl); /* * comment above re: wakeups applies here too... */ @@ -569,8 +570,9 @@ vm_page_alloc(object, offset) mem->flags = PG_BUSY|PG_CLEAN|PG_FAKE; vm_page_insert(mem, object, offset); mem->wire_count = 0; - mem->deact = 0; - vm_set_intr(spl); + mem->hold_count = 0; + mem->act_count = 0; + splx(spl); /* * don't wakeup too often, so we wakeup the pageout daemon when @@ -597,10 +599,9 @@ vm_page_free(mem) { int spl; - spl = vm_disable_intr(); + spl = splimp(); vm_page_remove(mem); - mem->deact = 0; if (mem->flags & PG_ACTIVE) { queue_remove(&vm_page_queue_active, mem, vm_page_t, pageq); mem->flags &= ~PG_ACTIVE; @@ -624,7 +625,7 @@ vm_page_free(mem) vm_page_free_count++; simple_unlock(&vm_page_queue_free_lock); - vm_set_intr(spl); + splx(spl); /* * if pageout daemon needs pages, then tell it that there @@ -650,7 +651,7 @@ vm_page_free(mem) } } else { - vm_set_intr(spl); + splx(spl); } wakeup((caddr_t) mem); } @@ -670,7 +671,7 @@ vm_page_wire(mem) { int spl; VM_PAGE_CHECK(mem); - spl = vm_disable_intr(); + spl = splimp(); if (mem->wire_count == 0) { if (mem->flags & PG_ACTIVE) { @@ -688,7 +689,7 @@ vm_page_wire(mem) vm_page_wire_count++; } mem->wire_count++; - vm_set_intr(spl); + splx(spl); } /* @@ -706,7 +707,7 @@ vm_page_unwire(mem) int spl; VM_PAGE_CHECK(mem); - spl = vm_disable_intr(); + spl = splimp(); if (mem->wire_count != 0) mem->wire_count--; if (mem->wire_count == 0) { @@ -714,9 +715,8 @@ vm_page_unwire(mem) vm_page_active_count++; mem->flags |= PG_ACTIVE; vm_page_wire_count--; - vm_pageout_deact_bump(mem); } - vm_set_intr(spl); + splx(spl); } /* @@ -745,9 +745,8 @@ vm_page_deactivate(m) * Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93. */ - spl = splhigh(); - m->deact = 0; - if (!(m->flags & PG_INACTIVE) && m->wire_count == 0) { + spl = splimp(); + if (!(m->flags & PG_INACTIVE) && m->wire_count == 0 && m->hold_count == 0) { pmap_clear_reference(VM_PAGE_TO_PHYS(m)); if (m->flags & PG_ACTIVE) { queue_remove(&vm_page_queue_active, m, vm_page_t, pageq); @@ -757,7 +756,13 @@ vm_page_deactivate(m) queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq); m->flags |= PG_INACTIVE; vm_page_inactive_count++; +#define NOT_DEACTIVATE_PROTECTS +#ifndef NOT_DEACTIVATE_PROTECTS pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); +#else + if (pmap_is_modified(VM_PAGE_TO_PHYS(m))) + m->flags &= ~PG_CLEAN; +#endif if ((m->flags & PG_CLEAN) == 0) m->flags |= PG_LAUNDRY; } @@ -789,32 +794,42 @@ void vm_page_activate(m) register vm_page_t m; { - int spl; + int spl, target, shortage, maxscan; + vm_page_t actm, next; + VM_PAGE_CHECK(m); - vm_pageout_deact_bump(m); + spl = splimp(); + + if (m->wire_count) { + splx(spl); + return; + } - spl = vm_disable_intr(); + if ((m->flags & (PG_INACTIVE|PG_ACTIVE)) == + (PG_INACTIVE|PG_ACTIVE)) { + panic("vm_page_activate: on both queues?"); + } if (m->flags & PG_INACTIVE) { - queue_remove(&vm_page_queue_inactive, m, vm_page_t, - pageq); + queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq); vm_page_inactive_count--; m->flags &= ~PG_INACTIVE; + vm_stat.reactivations++; } - if (m->wire_count == 0) { - if (m->flags & PG_ACTIVE) - panic("vm_page_activate: already active"); - m->flags |= PG_ACTIVE; - queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); - queue_remove(&m->object->memq, m, vm_page_t, listq); - queue_enter(&m->object->memq, m, vm_page_t, listq); - vm_page_active_count++; + if (m->flags & PG_ACTIVE) + panic("vm_page_activate: already active"); - } + m->flags |= PG_ACTIVE; + queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); + queue_remove(&m->object->memq, m, vm_page_t, listq); + queue_enter(&m->object->memq, m, vm_page_t, listq); + vm_page_active_count++; + /* m->act_count = 10; */ + m->act_count = 1; - vm_set_intr(spl); + splx(spl); } /* diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 33ada305990b..5a3be88adf23 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_page.h 7.3 (Berkeley) 4/21/91 - * $Id: vm_page.h,v 1.8 1994/01/31 04:21:19 davidg Exp $ + * $Id: vm_page.h,v 1.14 1994/04/14 07:50:24 davidg Exp $ */ /* @@ -71,6 +71,9 @@ #ifndef _VM_PAGE_ #define _VM_PAGE_ +#ifdef KERNEL +#include <systm.h> +#endif /* * Management of resident (logical) pages. * @@ -121,18 +124,14 @@ struct vm_page { unsigned int wire_count; /* how many wired down maps use me? */ unsigned short flags; /* bit encoded flags */ - unsigned short deact; /* deactivation count */ + unsigned short act_count; /* active count */ + int hold_count; /* page hold count -- don't pageout */ vm_offset_t phys_addr; /* physical address of page */ }; typedef struct vm_page *vm_page_t; -#define DEACT_START 5 -#define DEACT_DELAY 2 -#define DEACT_CLEAN 1 -#define DEACT_FREE 0 - #if VM_PAGE_DEBUG #define VM_PAGE_CHECK(mem) { \ if ((((unsigned int) mem) < ((unsigned int) &vm_page_array[0])) || \ @@ -226,10 +225,10 @@ void vm_page_replace(); boolean_t vm_page_zero_fill(); void vm_page_copy(); - +#if 0 void vm_page_wire(); void vm_page_unwire(); - +#endif /* * Functions implemented as macros @@ -272,12 +271,25 @@ extern vm_offset_t pmap_phys_ddress(int); /* - * these macros are *MUCH* faster on a 386/486 type machine - * eventually they need to be implemented correctly and put - * somewhere in the machine dependant stuff. + * Keep page from being freed by the page daemon + * much of the same effect as wiring, except much lower + * overhead and should be used only for *very* temporary + * holding ("wiring"). */ -#define vm_disable_intr() (disable_intr(), 0) -#define vm_set_intr(spl) enable_intr() +static inline void +vm_page_hold(mem) + vm_page_t mem; +{ + mem->hold_count++; +} + +static inline void +vm_page_unhold(mem) + vm_page_t mem; +{ + if( --mem->hold_count < 0) + panic("vm_page_unhold: hold count < 0!!!"); +} #endif /* KERNEL */ #endif /* _VM_PAGE_ */ diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 229a4090922c..c6817768c2bb 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -65,7 +65,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_pageout.c,v 1.13 1994/02/10 08:08:37 davidg Exp $ + * $Id: vm_pageout.c,v 1.24 1994/06/17 13:29:15 davidg Exp $ */ /* @@ -85,22 +85,35 @@ extern vm_map_t kmem_map; int vm_pages_needed; /* Event on which pageout daemon sleeps */ +int vm_pagescanner; /* Event on which pagescanner sleeps */ int vm_pageout_free_min = 0; /* Stop pageout to wait for pagers at this free level */ int vm_pageout_pages_needed = 0; /* flag saying that the pageout daemon needs pages */ int vm_page_pagesfreed; +extern int vm_page_count; extern int npendingio; extern int hz; int vm_pageout_proc_limit; extern int nswiodone; +extern int swap_pager_full; +extern int swap_pager_ready(); #define MAXREF 32767 -#define DEACT_MAX (DEACT_START * 4) -#define MINSCAN 512 /* minimum number of pages to scan in active queue */ - /* set the "clock" hands to be (MINSCAN * 4096) Bytes */ -static int minscan; -void vm_pageout_deact_bump(vm_page_t m) ; + +#define MAXSCAN 512 /* maximum number of pages to scan in active queue */ + /* set the "clock" hands to be (MAXSCAN * 4096) Bytes */ +#define ACT_DECLINE 1 +#define ACT_ADVANCE 3 +#define ACT_MAX 300 + +#define LOWATER ((2048*1024)/NBPG) + +#define VM_PAGEOUT_PAGE_COUNT 8 +int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT; +static vm_offset_t vm_space_needed; +int vm_pageout_req_do_stats; +int vm_pageout_do_stats; /* @@ -108,9 +121,9 @@ void vm_pageout_deact_bump(vm_page_t m) ; * cleans a vm_page */ int -vm_pageout_clean(m, wait) +vm_pageout_clean(m, sync) register vm_page_t m; - int wait; + int sync; { /* * Clean the page and remove it from the @@ -130,7 +143,12 @@ vm_pageout_clean(m, wait) register vm_object_t object; register vm_pager_t pager; - int pageout_status; + int pageout_status[VM_PAGEOUT_PAGE_COUNT]; + vm_page_t ms[VM_PAGEOUT_PAGE_COUNT]; + int pageout_count; + int anyok=0; + int i; + vm_offset_t offset = m->offset; object = m->object; if (!object) { @@ -153,37 +171,60 @@ vm_pageout_clean(m, wait) vm_page_free_count < vm_pageout_free_min) return 0; -collapseagain: if (!object->pager && object->shadow && object->shadow->paging_in_progress) return 0; - if (object->shadow) { - vm_offset_t offset = m->offset; - vm_object_collapse(object); - if (!vm_page_lookup(object, offset)) + if( !sync) { + if (object->shadow) { + vm_object_collapse(object); + if (!vm_page_lookup(object, offset)) + return 0; + } + + if ((m->flags & PG_BUSY) || (m->hold_count != 0)) { return 0; + } } -waitagain: - if (!wait && (m->flags & PG_BUSY)) { - return 0; - } else if (m->flags & PG_BUSY) { - int s = splhigh(); - m->flags |= PG_WANTED; - tsleep((caddr_t)m, PVM, "clnslp", 0); - splx(s); - goto waitagain; - } + pageout_count = 1; + ms[0] = m; - m->flags |= PG_BUSY; + if( pager = object->pager) { + for(i=1;i<vm_pageout_page_count;i++) { + if( ms[i] = vm_page_lookup( object, offset+i*NBPG)) { + if( ((ms[i]->flags & (PG_CLEAN|PG_INACTIVE|PG_BUSY)) == PG_INACTIVE) + && (ms[i]->wire_count == 0) + && (ms[i]->hold_count == 0)) + pageout_count++; + else + break; + } else + break; + } + for(i=0;i<pageout_count;i++) { + ms[i]->flags |= PG_BUSY; + pmap_page_protect(VM_PAGE_TO_PHYS(ms[i]), VM_PROT_READ); + } + object->paging_in_progress += pageout_count; + vm_stat.pageouts += pageout_count; + } else { + + m->flags |= PG_BUSY; - pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_READ); + pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_READ); - vm_stat.pageouts++; + vm_stat.pageouts++; - object->paging_in_progress++; + object->paging_in_progress++; + + pager = vm_pager_allocate(PG_DFLT, (caddr_t)0, + object->size, VM_PROT_ALL, 0); + if (pager != NULL) { + vm_object_setpager(object, pager, 0, FALSE); + } + } /* * If there is no pager for the page, @@ -194,160 +235,83 @@ waitagain: * later. */ - if ((pager = object->pager) == NULL) { - pager = vm_pager_allocate(PG_DFLT, (caddr_t)0, - object->size, VM_PROT_ALL, 0); - if (pager != NULL) { - vm_object_setpager(object, pager, 0, FALSE); - } - } if ((pager && pager->pg_type == PG_SWAP) || vm_page_free_count >= vm_pageout_free_min) { - pageout_status = pager ? - vm_pager_put(pager, m, (((object == kernel_object) || wait) ? TRUE: FALSE)) : - VM_PAGER_FAIL; - } else - pageout_status = VM_PAGER_FAIL; - - switch (pageout_status) { - case VM_PAGER_OK: - m->flags &= ~PG_LAUNDRY; - break; - case VM_PAGER_PEND: - m->flags &= ~PG_LAUNDRY; - break; - case VM_PAGER_BAD: - /* - * Page outside of range of object. - * Right now we essentially lose the - * changes by pretending it worked. - */ - m->flags &= ~PG_LAUNDRY; - m->flags |= PG_CLEAN; - pmap_clear_modify(VM_PAGE_TO_PHYS(m)); - break; - case VM_PAGER_FAIL: - /* - * If page couldn't be paged out, then - * reactivate the page so it doesn't - * clog the inactive list. (We will - * try paging out it again later). - */ - if ((m->flags & PG_ACTIVE) == 0) - vm_page_activate(m); - break; - case VM_PAGER_TRYAGAIN: - break; - } - - - /* - * If the operation is still going, leave - * the page busy to block all other accesses. - * Also, leave the paging in progress - * indicator set so that we don't attempt an - * object collapse. - */ - if (pageout_status != VM_PAGER_PEND) { - if ((m->flags & PG_ACTIVE) == 0 && - pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { - vm_page_activate(m); + if( pageout_count == 1) { + pageout_status[0] = pager ? + vm_pager_put(pager, m, + ((sync || (object == kernel_object)) ? TRUE: FALSE)) : + VM_PAGER_FAIL; + } else { + if( !pager) { + for(i=0;i<pageout_count;i++) + pageout_status[i] = VM_PAGER_FAIL; + } else { + vm_pager_putmulti(pager, ms, pageout_count, + ((sync || (object == kernel_object)) ? TRUE : FALSE), + pageout_status); + } } - PAGE_WAKEUP(m); - if (--object->paging_in_progress == 0) - wakeup((caddr_t) object); + + } else { + for(i=0;i<pageout_count;i++) + pageout_status[i] = VM_PAGER_FAIL; } - return (pageout_status == VM_PAGER_PEND || - pageout_status == VM_PAGER_OK) ? 1 : 0; -} - -int -vm_fault_object_deactivate_pages(map, object, dummy) - vm_map_t map; - vm_object_t object; - int dummy; -{ - register vm_page_t p, next; - int rcount; - int s; - int dcount; - int count; - dcount = 0; - /* - * deactivate the pages in the objects shadow - */ + for(i=0;i<pageout_count;i++) { + switch (pageout_status[i]) { + case VM_PAGER_OK: + ms[i]->flags &= ~PG_LAUNDRY; + ++anyok; + break; + case VM_PAGER_PEND: + ms[i]->flags &= ~PG_LAUNDRY; + ++anyok; + break; + case VM_PAGER_BAD: + /* + * Page outside of range of object. + * Right now we essentially lose the + * changes by pretending it worked. + */ + ms[i]->flags &= ~PG_LAUNDRY; + ms[i]->flags |= PG_CLEAN; + pmap_clear_modify(VM_PAGE_TO_PHYS(ms[i])); + break; + case VM_PAGER_FAIL: + /* + * If page couldn't be paged out, then + * reactivate the page so it doesn't + * clog the inactive list. (We will + * try paging out it again later). + */ + if (ms[i]->flags & PG_INACTIVE) + vm_page_activate(ms[i]); + break; + case VM_PAGER_TRYAGAIN: + break; + } - if (object->shadow) - dcount += vm_fault_object_deactivate_pages(map, object->shadow, 0); - /* - * scan the objects memory queue and remove 20% of the active pages - */ - rcount = object->resident_page_count; - count = rcount; - if (count == 0) - return dcount; -#define MINOBJWRITE 10 -#define OBJDIVISOR 5 - if (count > MINOBJWRITE) { - count = MINOBJWRITE + ((count - MINOBJWRITE) / OBJDIVISOR); - } - p = (vm_page_t) queue_first(&object->memq); - while ((rcount-- > 0) && !queue_end(&object->memq, (queue_entry_t) p) ) { - next = (vm_page_t) queue_next(&p->listq); - vm_page_lock_queues(); /* - * if a page is active, not wired and is in the processes pmap, - * then deactivate the page. + * If the operation is still going, leave + * the page busy to block all other accesses. + * Also, leave the paging in progress + * indicator set so that we don't attempt an + * object collapse. */ - if ((p->flags & (PG_ACTIVE|PG_BUSY)) == PG_ACTIVE && - p->wire_count == 0 && - pmap_page_exists(vm_map_pmap(map), VM_PAGE_TO_PHYS(p))) { - if (!pmap_is_referenced(VM_PAGE_TO_PHYS(p))) { - vm_page_deactivate(p); - if ((p->flags & PG_CLEAN) == 0) { - vm_pageout_clean(p, 0); - } - ++dcount; - if (--count <= 0) { - vm_page_unlock_queues(); - s = splbio(); - while (object->paging_in_progress) { - tsleep((caddr_t) object,PVM,"vmfobw",0); - } - splx(s); - return dcount; - } - } else { - vm_pageout_deact_bump(p); - pmap_clear_reference(VM_PAGE_TO_PHYS(p)); - queue_remove(&object->memq, p, vm_page_t, listq); - queue_enter(&object->memq, p, vm_page_t, listq); - queue_remove(&vm_page_queue_active, p, vm_page_t, pageq); - queue_enter(&vm_page_queue_active, p, vm_page_t, pageq); + if (pageout_status[i] != VM_PAGER_PEND) { + PAGE_WAKEUP(ms[i]); + if (--object->paging_in_progress == 0) + wakeup((caddr_t) object); + if (pmap_is_referenced(VM_PAGE_TO_PHYS(ms[i]))) { + pmap_clear_reference(VM_PAGE_TO_PHYS(ms[i])); + if( ms[i]->flags & PG_INACTIVE) + vm_page_activate(ms[i]); } - /* - * if a page is inactive and has been modified, clean it now - */ - } else if ((p->flags & (PG_INACTIVE|PG_BUSY)) == PG_INACTIVE) { - if ((p->flags & PG_CLEAN) && - pmap_is_modified(VM_PAGE_TO_PHYS(p))) - p->flags &= ~PG_CLEAN; - - if ((p->flags & PG_CLEAN) == 0) - vm_pageout_clean(p, 0); } - - vm_page_unlock_queues(); - p = next; - } - s = splbio(); - while (object->paging_in_progress) { - tsleep((caddr_t)object,PVM,"vmfobw",0); } - splx(s); - return dcount; + return anyok; } /* @@ -376,7 +340,11 @@ vm_pageout_object_deactivate_pages(map, object, count) count = 1; if (object->shadow) { - dcount += vm_pageout_object_deactivate_pages(map, object->shadow, count); + int scount = count; + if( object->shadow->ref_count > 1) + scount /= object->shadow->ref_count; + if( scount) + dcount += vm_pageout_object_deactivate_pages(map, object->shadow, scount); } if (object->paging_in_progress) @@ -396,15 +364,28 @@ vm_pageout_object_deactivate_pages(map, object, count) */ if ((p->flags & (PG_ACTIVE|PG_BUSY)) == PG_ACTIVE && p->wire_count == 0 && + p->hold_count == 0 && pmap_page_exists(vm_map_pmap(map), VM_PAGE_TO_PHYS(p))) { if (!pmap_is_referenced(VM_PAGE_TO_PHYS(p))) { - if (object->ref_count <= 1) + p->act_count -= min(p->act_count, ACT_DECLINE); + /* + * if the page act_count is zero -- then we deactivate + */ + if (!p->act_count) { vm_page_deactivate(p); - else - vm_page_pageout_deactivate(p); - if (((p->flags & PG_INACTIVE)) && - (p->flags & PG_CLEAN) == 0) - vm_pageout_clean(p, 0); + pmap_page_protect(VM_PAGE_TO_PHYS(p), + VM_PROT_NONE); + /* + * else if on the next go-around we will deactivate the page + * we need to place the page on the end of the queue to age + * the other pages in memory. + */ + } else { + queue_remove(&vm_page_queue_active, p, vm_page_t, pageq); + queue_enter(&vm_page_queue_active, p, vm_page_t, pageq); + queue_remove(&object->memq, p, vm_page_t, listq); + queue_enter(&object->memq, p, vm_page_t, listq); + } /* * see if we are done yet */ @@ -419,23 +400,18 @@ vm_pageout_object_deactivate_pages(map, object, count) } } else { - vm_pageout_deact_bump(p); + /* + * Move the page to the bottom of the queue. + */ pmap_clear_reference(VM_PAGE_TO_PHYS(p)); + if (p->act_count < ACT_MAX) + p->act_count += ACT_ADVANCE; + queue_remove(&object->memq, p, vm_page_t, listq); queue_enter(&object->memq, p, vm_page_t, listq); queue_remove(&vm_page_queue_active, p, vm_page_t, pageq); queue_enter(&vm_page_queue_active, p, vm_page_t, pageq); } - /* - * if a page is inactive and has been modified, clean it now - */ - } else if ((p->flags & (PG_INACTIVE|PG_BUSY)) == PG_INACTIVE) { - if ((p->flags & PG_CLEAN) && - pmap_is_modified(VM_PAGE_TO_PHYS(p))) - p->flags &= ~PG_CLEAN; - - if ((p->flags & PG_CLEAN) == 0) - vm_pageout_clean(p, 0); } vm_page_unlock_queues(); @@ -488,50 +464,28 @@ vm_pageout_map_deactivate_pages(map, entry, count, freeer) return; } -void -vm_fault_free_pages(p) - struct proc *p; -{ - int overage = 1; - vm_pageout_map_deactivate_pages(&p->p_vmspace->vm_map, - (vm_map_entry_t) 0, &overage, vm_fault_object_deactivate_pages); -} - /* * vm_pageout_scan does the dirty work for the pageout daemon. */ -void +int vm_pageout_scan() { vm_page_t m; int page_shortage, maxscan, maxlaunder; - int pages_freed, free, nproc, nbusy; + int pages_freed, free, nproc; + int desired_free; vm_page_t next; struct proc *p; vm_object_t object; int s; + int force_wakeup = 0; +morefree: /* - * deactivate objects with ref_counts == 0 - */ - object = (vm_object_t) queue_first(&vm_object_list); - while (!queue_end(&vm_object_list, (queue_entry_t) object)) { - if (object->ref_count == 0) - vm_object_deactivate_pages(object); - object = (vm_object_t) queue_next(&object->object_list); - } - -rerun: -#if 1 - /* - * next scan the processes for exceeding their rlimits or if process + * scan the processes for exceeding their rlimits or if process * is swapped out -- deactivate pages */ -rescanproc1a: - for (p = allproc; p != NULL; p = p->p_nxt) - p->p_flag &= ~SPAGEDAEMON; - rescanproc1: for (p = allproc; p != NULL; p = p->p_nxt) { vm_offset_t size; @@ -572,22 +526,17 @@ rescanproc1: overage = (size - limit) / NBPG; vm_pageout_map_deactivate_pages(&p->p_vmspace->vm_map, (vm_map_entry_t) 0, &overage, vm_pageout_object_deactivate_pages); - p->p_flag |= SPAGEDAEMON; - goto rescanproc1; } - p->p_flag |= SPAGEDAEMON; + } -#if 0 if (((vm_page_free_count + vm_page_inactive_count) >= (vm_page_inactive_target + vm_page_free_target)) && (vm_page_free_count >= vm_page_free_target)) - return; -#endif - -#endif + return force_wakeup; pages_freed = 0; + desired_free = vm_page_free_target; /* * Start scanning the inactive queue for pages we can free. @@ -597,26 +546,37 @@ rescanproc1: */ maxlaunder = (vm_page_free_target - vm_page_free_count); -rescan: - m = (vm_page_t) queue_first(&vm_page_queue_inactive); maxscan = vm_page_inactive_count; +rescan1: + m = (vm_page_t) queue_first(&vm_page_queue_inactive); while (maxscan-- > 0) { vm_page_t next; - if (queue_end(&vm_page_queue_inactive, (queue_entry_t) m) - || (vm_page_free_count >= vm_page_free_target)) { + || (vm_page_free_count >= desired_free)) { break; } next = (vm_page_t) queue_next(&m->pageq); + if( (m->flags & PG_INACTIVE) == 0) { + printf("vm_pageout_scan: page not inactive?"); + continue; + } + + /* + * activate held pages + */ + if (m->hold_count != 0) { + vm_page_activate(m); + m = next; + continue; + } + /* * dont mess with busy pages */ if (m->flags & PG_BUSY) { - queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq); - queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq); m = next; continue; } @@ -628,34 +588,25 @@ rescan: * vm system. */ if (m->flags & PG_CLEAN) { - if ((vm_page_free_count > vm_pageout_free_min) + if ((vm_page_free_count > vm_pageout_free_min) /* XXX */ && pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { vm_page_activate(m); - ++vm_stat.reactivations; - m = next; - continue; - } - else { + } else if (!m->act_count) { pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); vm_page_free(m); ++pages_freed; - m = next; - continue; + } else { + m->act_count -= min(m->act_count, ACT_DECLINE); } } else if ((m->flags & PG_LAUNDRY) && maxlaunder > 0) { - /* - * if a page has been used even if it is in the laundry, - * activate it. - */ - + int written; if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { + pmap_clear_reference(VM_PAGE_TO_PHYS(m)); vm_page_activate(m); - m->flags &= ~PG_LAUNDRY; m = next; continue; } - /* * If a page is dirty, then it is either * being washed (but not yet cleaned) @@ -664,17 +615,18 @@ rescan: * cleaning operation. */ - if (vm_pageout_clean(m,0)) { - --maxlaunder; - /* - * if the next page has been re-activated, start scanning again - */ - if ((next->flags & PG_INACTIVE) == 0) - goto rescan; + if (written = vm_pageout_clean(m,0)) { + maxlaunder -= written; } - } else if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { + /* + * if the next page has been re-activated, start scanning again + */ + if (!next || (next->flags & PG_INACTIVE) == 0) + goto rescan1; + } else if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { + pmap_clear_reference(VM_PAGE_TO_PHYS(m)); vm_page_activate(m); - } + } m = next; } @@ -682,42 +634,11 @@ rescan: * now check malloc area or swap processes out if we are in low * memory conditions */ - free = vm_page_free_count; - if (free <= vm_page_free_min) { - /* - * Be sure the pmap system is updated so - * we can scan the inactive queue. - */ - pmap_update(); - + if (vm_page_free_count < vm_page_free_min) { /* * swap out inactive processes */ swapout_threads(); - -#if 0 - /* - * see if malloc has anything for us - */ - if (free <= vm_page_free_reserved) - malloc_gc(); -#endif - } - -skipfree: - /* - * If we did not free any pages, but we need to do so, we grow the - * inactive target. But as we successfully free pages, then we - * shrink the inactive target. - */ - if (pages_freed == 0 && vm_page_free_count < vm_page_free_min) { - vm_page_inactive_target += (vm_page_free_min - vm_page_free_count); - if (vm_page_inactive_target > vm_page_free_target*5) - vm_page_inactive_target = vm_page_free_target*5; - } else if (pages_freed > 0) { - vm_page_inactive_target -= vm_page_free_min/2; - if (vm_page_inactive_target < vm_page_free_target*2) - vm_page_inactive_target = vm_page_free_target*2; } /* @@ -726,35 +647,27 @@ skipfree: * to inactive. */ -restart_inactivate_all: - - page_shortage = vm_page_inactive_target - vm_page_inactive_count; - page_shortage -= vm_page_free_count; + page_shortage = vm_page_inactive_target - + (vm_page_free_count + vm_page_inactive_count); if (page_shortage <= 0) { - if (pages_freed == 0 && - ((vm_page_free_count + vm_page_inactive_count) < + if (pages_freed == 0) { + if( vm_page_free_count < vm_page_free_min) { + page_shortage = vm_page_free_min - vm_page_free_count; + } else if(((vm_page_free_count + vm_page_inactive_count) < (vm_page_free_min + vm_page_inactive_target))) { - page_shortage = 1; - } else { - page_shortage = 0; + page_shortage = 1; + } else { + page_shortage = 0; + } } + } - maxscan = vm_page_active_count; - - /* - * deactivate pages that are active, but have not been used - * for a while. - */ -restart_inactivate: m = (vm_page_t) queue_first(&vm_page_queue_active); - while (maxscan-- > 0) { + maxscan = vm_page_active_count; + while (maxscan-- && (page_shortage > 0)) { - if (page_shortage <= 0 && - maxscan < (vm_page_active_count - minscan) ) - break; - if (queue_end(&vm_page_queue_active, (queue_entry_t) m)) { break; } @@ -762,109 +675,156 @@ restart_inactivate: next = (vm_page_t) queue_next(&m->pageq); /* - * dont mess with pages that are busy + * Don't deactivate pages that are busy. */ - if (m->flags & PG_BUSY) { + if ((m->flags & PG_BUSY) || (m->hold_count != 0)) { m = next; continue; } - /* - * Move some more pages from active to inactive. - */ + if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { + pmap_clear_reference(VM_PAGE_TO_PHYS(m)); + if (m->act_count < ACT_MAX) + m->act_count += ACT_ADVANCE; + queue_remove(&vm_page_queue_active, m, vm_page_t, pageq); + queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); + queue_remove(&m->object->memq, m, vm_page_t, listq); + queue_enter(&m->object->memq, m, vm_page_t, listq); + } else { + m->act_count -= min(m->act_count, ACT_DECLINE); - /* - * see if there are any pages that are able to be deactivated - */ - /* - * the referenced bit is the one that say that the page - * has been used. - */ - if (!pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { /* - * if the page has not been referenced, call the - * vm_page_pageout_deactivate routine. It might - * not deactivate the page every time. There is - * a policy associated with it. + * if the page act_count is zero -- then we deactivate */ - if (page_shortage > 0) { - if (vm_page_pageout_deactivate(m)) { - /* - * if the page was really deactivated, then - * decrement the page_shortage - */ - if ((m->flags & PG_ACTIVE) == 0) { - --page_shortage; - } - } - } - } else { + if (!m->act_count) { + vm_page_deactivate(m); + --page_shortage; /* - * if the page was recently referenced, set our - * deactivate count and clear reference for a future - * check for deactivation. + * else if on the next go-around we will deactivate the page + * we need to place the page on the end of the queue to age + * the other pages in memory. */ - vm_pageout_deact_bump(m); - if (page_shortage > 0 || m->deact >= (DEACT_MAX/2)) - pmap_clear_reference(VM_PAGE_TO_PHYS(m)); - queue_remove(&m->object->memq, m, vm_page_t, listq); - queue_enter(&m->object->memq, m, vm_page_t, listq); - queue_remove(&vm_page_queue_active, m, vm_page_t, pageq); - queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); + } else { + queue_remove(&vm_page_queue_active, m, vm_page_t, pageq); + queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); + queue_remove(&m->object->memq, m, vm_page_t, listq); + queue_enter(&m->object->memq, m, vm_page_t, listq); + } } + m = next; } - vm_page_pagesfreed += pages_freed; -} + /* + * if we have not freed any pages and we are desparate for memory + * then we keep trying until we get some (any) memory. + */ -/* - * this code maintains a dynamic reference count per page - */ -void -vm_pageout_deact_bump(vm_page_t m) { - if( m->deact >= DEACT_START) { - m->deact += 1; - if( m->deact > DEACT_MAX) - m->deact = DEACT_MAX; - } else { - m->deact += DEACT_START; + if( !force_wakeup && (swap_pager_full || !force_wakeup || + (pages_freed == 0 && (vm_page_free_count < vm_page_free_min)))){ + vm_pager_sync(); + force_wakeup = 1; + goto morefree; } + vm_page_pagesfreed += pages_freed; + return force_wakeup; } -/* - * optionally do a deactivate if the deactivate has been done - * enough to justify it. - */ -int -vm_page_pageout_deactivate(m) - vm_page_t m; +void +vm_pagescan() { + int maxscan, pages_scanned, pages_referenced, nextscan, scantick = hz/20; + int m_ref, next_ref; + vm_page_t m, next; + + (void) splnone(); + + nextscan = scantick; - switch (m->deact) { -case DEACT_FREE: - vm_page_deactivate(m); - return 1; -case DEACT_CLEAN: - break; -case DEACT_DELAY: - vm_page_makefault(m); -case DEACT_START: - break; +scanloop: + + pages_scanned = 0; + pages_referenced = 0; + maxscan = min(vm_page_active_count, MAXSCAN); + + /* + * Gather statistics on page usage. + */ + m = (vm_page_t) queue_first(&vm_page_queue_active); + while (maxscan-- > 0) { + + if (queue_end(&vm_page_queue_active, (queue_entry_t) m)) { + break; + } + + ++pages_scanned; + + next = (vm_page_t) queue_next(&m->pageq); + + /* + * Dont mess with pages that are busy. + */ + if ((m->flags & PG_BUSY) || (m->hold_count != 0)) { + m = next; + continue; + } + + /* + * Advance pages that have been referenced, decline pages that + * have not. + */ + if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { + pmap_clear_reference(VM_PAGE_TO_PHYS(m)); + pages_referenced++; + if (m->act_count < ACT_MAX) + m->act_count += ACT_ADVANCE; + queue_remove(&vm_page_queue_active, m, vm_page_t, pageq); + queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); + queue_remove(&m->object->memq, m, vm_page_t, listq); + queue_enter(&m->object->memq, m, vm_page_t, listq); + } else { + m->act_count -= min(m->act_count, ACT_DECLINE); + /* + * if the page act_count is zero, and we are low on mem -- then we deactivate + */ + if (!m->act_count && + (vm_page_free_count+vm_page_inactive_count < vm_page_free_target+vm_page_inactive_target )) { + vm_page_deactivate(m); + /* + * else if on the next go-around we will deactivate the page + * we need to place the page on the end of the queue to age + * the other pages in memory. + */ + } else { + queue_remove(&vm_page_queue_active, m, vm_page_t, pageq); + queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); + queue_remove(&m->object->memq, m, vm_page_t, listq); + queue_enter(&m->object->memq, m, vm_page_t, listq); + } + } + m = next; } - --m->deact; - return 0; + + if (pages_referenced) { + nextscan = (pages_scanned / pages_referenced) * scantick; + nextscan = max(nextscan, scantick); + nextscan = min(nextscan, hz); + } else + nextscan = hz; + tsleep((caddr_t) &vm_pagescanner, PVM, "scanw", nextscan); + + goto scanloop; } /* * vm_pageout is the high level pageout daemon. */ - void vm_pageout() { extern npendingio, swiopend; extern int vm_page_count; + static nowakeup; (void) spl0(); /* @@ -872,49 +832,42 @@ vm_pageout() */ vmretry: - vm_page_free_min = npendingio/3; -#ifdef VSMALL - vm_page_free_min = 8; -#endif + vm_page_free_min = 12; vm_page_free_reserved = 8; if (vm_page_free_min < 8) vm_page_free_min = 8; if (vm_page_free_min > 32) vm_page_free_min = 32; - vm_pageout_free_min = 3; + vm_pageout_free_min = 4; vm_page_free_target = 2*vm_page_free_min + vm_page_free_reserved; - vm_page_inactive_target = 3*vm_page_free_min + vm_page_free_reserved; + vm_page_inactive_target = vm_page_free_count / 12; vm_page_free_min += vm_page_free_reserved; - minscan = MINSCAN; - if (minscan > vm_page_count/3) - minscan = vm_page_count/3; + + (void) swap_pager_alloc(0, 0, 0, 0); /* * The pageout daemon is never done, so loop * forever. */ - - while (TRUE) { + int force_wakeup; - splhigh(); - if (vm_page_free_count > vm_page_free_min) { - wakeup((caddr_t) &vm_page_free_count); - tsleep((caddr_t) &vm_pages_needed, PVM, "psleep", 0); - } else { - if (nswiodone) { - spl0(); - goto dosync; - } - tsleep((caddr_t) &vm_pages_needed, PVM, "pslp1", 5); - } - spl0(); - + tsleep((caddr_t) &vm_pages_needed, PVM, "psleep", 0); + vm_pager_sync(); - vm_pageout_scan(); - dosync: + /* + * The force wakeup hack added to eliminate delays and potiential + * deadlock. It was possible for the page daemon to indefintely + * postpone waking up a process that it might be waiting for memory + * on. The putmulti stuff seems to have aggravated the situation. + */ + force_wakeup = vm_pageout_scan(); vm_pager_sync(); + if( force_wakeup) + wakeup( (caddr_t) &vm_page_free_count); + vm_pageout_do_stats = 0; cnt.v_scan++; wakeup((caddr_t) kmem_map); } } + diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h index d975d8006fd3..a80605967ec8 100644 --- a/sys/vm/vm_pageout.h +++ b/sys/vm/vm_pageout.h @@ -86,6 +86,8 @@ simple_lock_data_t vm_pages_needed_lock; inline static void vm_wait() { extern struct proc *curproc, *pageproc; extern int vm_pageout_pages_needed; + int s; + s = splhigh(); if (curproc == pageproc) { vm_pageout_pages_needed = 1; tsleep((caddr_t) &vm_pageout_pages_needed, PSWP, "vmwait", 0); @@ -94,6 +96,7 @@ inline static void vm_wait() { wakeup((caddr_t) &vm_pages_needed); tsleep((caddr_t) &vm_page_free_count, PVM, "vmwait", 0); } + splx(s); } diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c index d31be45a430f..f1138e1db3f3 100644 --- a/sys/vm/vm_pager.c +++ b/sys/vm/vm_pager.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_pager.c 7.4 (Berkeley) 5/7/91 - * $Id: vm_pager.c,v 1.10 1994/01/31 04:21:43 davidg Exp $ + * $Id: vm_pager.c,v 1.11 1994/03/07 11:39:16 davidg Exp $ */ /* @@ -167,6 +167,26 @@ vm_pager_getmulti(pager, m, count, reqpage, sync) } int +vm_pager_putmulti(pager, m, count, sync, rtvals) + vm_pager_t pager; + vm_page_t *m; + int count; + boolean_t sync; + int *rtvals; +{ + int i; + + if( pager->pg_ops->pgo_putmulti) + return(VM_PAGER_PUT_MULTI(pager, m, count, sync, rtvals)); + else { + for(i=0;i<count;i++) { + rtvals[i] = VM_PAGER_PUT( pager, m[i], sync); + } + return 1; + } +} + +int vm_pager_get(pager, m, sync) vm_pager_t pager; vm_page_t m; diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h index 699881ab51a3..2cf52674fab2 100644 --- a/sys/vm/vm_pager.h +++ b/sys/vm/vm_pager.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * from: @(#)vm_pager.h 7.2 (Berkeley) 4/20/91 - * $Id: vm_pager.h,v 1.6 1994/01/31 04:21:50 davidg Exp $ + * $Id: vm_pager.h,v 1.7 1994/03/07 11:39:17 davidg Exp $ */ /* @@ -69,6 +69,7 @@ struct pagerops { int (*pgo_getpage)(); /* get (read) page */ int (*pgo_getmulti)(); /* get (read) multiple pages */ int (*pgo_putpage)(); /* put (write) page */ + int (*pgo_putmulti)(); /* get (read) multiple pages */ boolean_t (*pgo_haspage)(); /* does pager have page? */ }; @@ -91,6 +92,7 @@ struct pagerops { #define VM_PAGER_GET(pg, m, s) (*(pg)->pg_ops->pgo_getpage)(pg, m, s) #define VM_PAGER_GET_MULTI(pg, m, c, r, s) (*(pg)->pg_ops->pgo_getmulti)(pg, m, c, r, s) #define VM_PAGER_PUT(pg, m, s) (*(pg)->pg_ops->pgo_putpage)(pg, m, s) +#define VM_PAGER_PUT_MULTI(pg, m, c, s, rtval) (*(pg)->pg_ops->pgo_putmulti)(pg, m, c, s, rtval) #define VM_PAGER_HASPAGE(pg, o) (*(pg)->pg_ops->pgo_haspage)(pg, o) #ifdef KERNEL diff --git a/sys/vm/vm_swap.c b/sys/vm/vm_swap.c index 0a8a57457101..462948c8703b 100644 --- a/sys/vm/vm_swap.c +++ b/sys/vm/vm_swap.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * from: @(#)vm_swap.c 7.18 (Berkeley) 5/6/91 - * $Id: vm_swap.c,v 1.7 1993/12/19 00:56:15 wollman Exp $ + * $Id: vm_swap.c,v 1.8 1994/03/14 21:54:32 davidg Exp $ */ #include "param.h" @@ -48,6 +48,7 @@ #include "kernel.h" static int swfree(struct proc *, int); +int vm_swap_size; /* * Indirect driver for multi-controller paging. @@ -264,6 +265,7 @@ swfree(p, index) if (blk > dmmax) blk = dmmax; rlist_free(&swapmap, vsbase, vsbase + blk - 1); + vm_swap_size += blk; } return (0); } diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c index 169bf376357b..f1127e2b07de 100644 --- a/sys/vm/vm_unix.c +++ b/sys/vm/vm_unix.c @@ -37,7 +37,7 @@ * * from: Utah $Hdr: vm_unix.c 1.1 89/11/07$ * from: @(#)vm_unix.c 7.2 (Berkeley) 4/20/91 - * $Id: vm_unix.c,v 1.5 1993/12/12 12:27:26 davidg Exp $ + * $Id: vm_unix.c,v 1.6 1994/03/14 21:54:33 davidg Exp $ */ /* @@ -50,6 +50,8 @@ #include "vm.h" +extern int swap_pager_full; + struct obreak_args { char *nsiz; }; @@ -73,9 +75,11 @@ obreak(p, uap, retval) old = round_page(old + ctob(vm->vm_dsize)); diff = new - old; if (diff > 0) { + if (swap_pager_full) { + return(ENOMEM); + } rv = vm_allocate(&vm->vm_map, &old, diff, FALSE); if (rv != KERN_SUCCESS) { - uprintf("sbrk: grow failed, return = %d\n", rv); return(ENOMEM); } vm->vm_dsize += btoc(diff); @@ -83,7 +87,6 @@ obreak(p, uap, retval) diff = -diff; rv = vm_deallocate(&vm->vm_map, new, diff); if (rv != KERN_SUCCESS) { - uprintf("sbrk: shrink failed, return = %d\n", rv); return(ENOMEM); } vm->vm_dsize -= btoc(diff); diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index c35971ba67e0..d44e12e22ac6 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -2,7 +2,7 @@ * Copyright (c) 1990 University of Utah. * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. - * Copyright (c) 1993 John S. Dyson + * Copyright (c) 1993,1994 John S. Dyson * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer @@ -37,7 +37,7 @@ * SUCH DAMAGE. * * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 - * $Id: vnode_pager.c,v 1.11.2.3 1994/04/18 04:57:49 rgrimes Exp $ + * $Id: vnode_pager.c,v 1.21 1994/06/22 05:53:12 jkh Exp $ */ /* @@ -57,7 +57,7 @@ * * 1) Supports multiple - block reads * 2) Bypasses buffer cache for reads - * + * * TODO: * * 1) Totally bypass buffer cache for reads @@ -86,6 +86,8 @@ #include "buf.h" #include "specdev.h" +int vnode_pager_putmulti(); + struct pagerops vnodepagerops = { vnode_pager_init, vnode_pager_alloc, @@ -93,35 +95,24 @@ struct pagerops vnodepagerops = { vnode_pager_getpage, vnode_pager_getmulti, vnode_pager_putpage, + vnode_pager_putmulti, vnode_pager_haspage }; -static int vnode_pager_io(vn_pager_t vnp, vm_page_t *m, int count, int reqpage, - enum uio_rw rw); -struct buf * getpbuf() ; -void relpbuf(struct buf *bp) ; +static int vnode_pager_input(vn_pager_t vnp, vm_page_t * m, int count, int reqpage); +static int vnode_pager_output(vn_pager_t vnp, vm_page_t * m, int count, int *rtvals); +struct buf * getpbuf(); +void relpbuf(struct buf * bp); extern vm_map_t pager_map; -queue_head_t vnode_pager_list; /* list of managed vnodes */ +queue_head_t vnode_pager_list; /* list of managed vnodes */ -#ifdef DEBUG -int vpagerdebug = 0x00; -#define VDB_FOLLOW 0x01 -#define VDB_INIT 0x02 -#define VDB_IO 0x04 -#define VDB_FAIL 0x08 -#define VDB_ALLOC 0x10 -#define VDB_SIZE 0x20 -#endif +#define MAXBP (NBPG/DEV_BSIZE); void vnode_pager_init() { -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_init()\n"); -#endif queue_init(&vnode_pager_list); } @@ -143,34 +134,32 @@ vnode_pager_alloc(handle, size, prot, offset) struct vnode *vp; struct proc *p = curproc; /* XXX */ -#ifdef DEBUG - if (vpagerdebug & (VDB_FOLLOW|VDB_ALLOC)) - printf("vnode_pager_alloc(%x, %x, %x)\n", handle, size, prot); -#endif /* * Pageout to vnode, no can do yet. */ if (handle == NULL) - return(NULL); + return (NULL); /* - * Vnodes keep a pointer to any associated pager so no need to - * lookup with vm_pager_lookup. + * Vnodes keep a pointer to any associated pager so no need to lookup + * with vm_pager_lookup. */ - vp = (struct vnode *)handle; - pager = (vm_pager_t)vp->v_vmdata; + vp = (struct vnode *) handle; + pager = (vm_pager_t) vp->v_vmdata; if (pager == NULL) { + /* * Allocate pager structures */ - pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK); + pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, M_WAITOK); if (pager == NULL) - return(NULL); - vnp = (vn_pager_t)malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK); + return (NULL); + vnp = (vn_pager_t) malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK); if (vnp == NULL) { - free((caddr_t)pager, M_VMPAGER); - return(NULL); + free((caddr_t) pager, M_VMPAGER); + return (NULL); } + /* * And an object of the appropriate size */ @@ -179,10 +168,11 @@ vnode_pager_alloc(handle, size, prot, offset) vm_object_enter(object, pager); vm_object_setpager(object, pager, 0, TRUE); } else { - free((caddr_t)vnp, M_VMPGDATA); - free((caddr_t)pager, M_VMPAGER); - return(NULL); + free((caddr_t) vnp, M_VMPGDATA); + free((caddr_t) pager, M_VMPAGER); + return (NULL); } + /* * Hold a reference to the vnode and initialize pager data. */ @@ -190,42 +180,32 @@ vnode_pager_alloc(handle, size, prot, offset) vnp->vnp_flags = 0; vnp->vnp_vp = vp; vnp->vnp_size = vattr.va_size; + queue_enter(&vnode_pager_list, pager, vm_pager_t, pg_list); pager->pg_handle = handle; pager->pg_type = PG_VNODE; pager->pg_ops = &vnodepagerops; - pager->pg_data = (caddr_t)vnp; - vp->v_vmdata = (caddr_t)pager; + pager->pg_data = (caddr_t) vnp; + vp->v_vmdata = (caddr_t) pager; } else { + /* - * vm_object_lookup() will remove the object from the - * cache if found and also gain a reference to the object. + * vm_object_lookup() will remove the object from the cache if + * found and also gain a reference to the object. */ object = vm_object_lookup(pager); -#ifdef DEBUG - vnp = (vn_pager_t)pager->pg_data; -#endif } -#ifdef DEBUG - if (vpagerdebug & VDB_ALLOC) - printf("vnode_pager_setup: vp %x sz %x pager %x object %x\n", - vp, vnp->vnp_size, pager, object); -#endif - return(pager); + return (pager); } void vnode_pager_dealloc(pager) vm_pager_t pager; { - register vn_pager_t vnp = (vn_pager_t)pager->pg_data; + register vn_pager_t vnp = (vn_pager_t) pager->pg_data; register struct vnode *vp; - struct proc *p = curproc; /* XXX */ + struct proc *p = curproc; /* XXX */ -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_dealloc(%x)\n", pager); -#endif if (vp = vnp->vnp_vp) { vp->v_vmdata = NULL; vp->v_flag &= ~VTEXT; @@ -236,22 +216,21 @@ vnode_pager_dealloc(pager) vrele(vp); } queue_remove(&vnode_pager_list, pager, vm_pager_t, pg_list); - free((caddr_t)vnp, M_VMPGDATA); - free((caddr_t)pager, M_VMPAGER); + free((caddr_t) vnp, M_VMPGDATA); + free((caddr_t) pager, M_VMPAGER); } int vnode_pager_getmulti(pager, m, count, reqpage, sync) vm_pager_t pager; vm_page_t *m; - int count; - int reqpage; + int count; + int reqpage; boolean_t sync; { - - return vnode_pager_io((vn_pager_t) pager->pg_data, m, count, reqpage, UIO_READ); -} + return vnode_pager_input((vn_pager_t) pager->pg_data, m, count, reqpage); +} int vnode_pager_getpage(pager, m, sync) @@ -260,17 +239,14 @@ vnode_pager_getpage(pager, m, sync) boolean_t sync; { - int err; + int err; vm_page_t marray[1]; -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_getpage(%x, %x)\n", pager, m); -#endif + if (pager == NULL) return FALSE; marray[0] = m; - return vnode_pager_io((vn_pager_t)pager->pg_data, marray, 1, 0, UIO_READ); + return vnode_pager_input((vn_pager_t) pager->pg_data, marray, 1, 0); } boolean_t @@ -279,69 +255,61 @@ vnode_pager_putpage(pager, m, sync) vm_page_t m; boolean_t sync; { - int err; + int err; vm_page_t marray[1]; + int rtvals[1]; -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_putpage(%x, %x)\n", pager, m); -#endif if (pager == NULL) return FALSE; marray[0] = m; - err = vnode_pager_io((vn_pager_t)pager->pg_data, marray, 1, 0, UIO_WRITE); - return err; + vnode_pager_output((vn_pager_t) pager->pg_data, marray, 1, rtvals); + return rtvals[0]; } +int +vnode_pager_putmulti(pager, m, c, sync, rtvals) + vm_pager_t pager; + vm_page_t *m; + int c; + boolean_t sync; + int *rtvals; +{ + return vnode_pager_output((vn_pager_t) pager->pg_data, m, c, rtvals); +} + + boolean_t vnode_pager_haspage(pager, offset) vm_pager_t pager; vm_offset_t offset; { - register vn_pager_t vnp = (vn_pager_t)pager->pg_data; + register vn_pager_t vnp = (vn_pager_t) pager->pg_data; daddr_t bn; - int err; - -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_haspage(%x, %x)\n", pager, offset); -#endif + int err; /* * Offset beyond end of file, do not have the page */ if (offset >= vnp->vnp_size) { -#ifdef DEBUG - if (vpagerdebug & (VDB_FAIL|VDB_SIZE)) - printf("vnode_pager_haspage: pg %x, off %x, size %x\n", - pager, offset, vnp->vnp_size); -#endif - return(FALSE); + return (FALSE); } /* - * Read the index to find the disk block to read - * from. If there is no block, report that we don't - * have this data. - * + * Read the index to find the disk block to read from. If there is no + * block, report that we don't have this data. + * * Assumes that the vnode has whole page or nothing. */ err = VOP_BMAP(vnp->vnp_vp, offset / vnp->vnp_vp->v_mount->mnt_stat.f_bsize, - (struct vnode **)0, &bn); + (struct vnode **) 0, &bn); if (err) { -#ifdef DEBUG - if (vpagerdebug & VDB_FAIL) - printf("vnode_pager_haspage: BMAP err %d, pg %x, off %x\n", - err, pager, offset); -#endif - return(TRUE); + return (TRUE); } - return((long)bn < 0 ? FALSE : TRUE); + return ((long) bn < 0 ? FALSE : TRUE); } /* - * (XXX) * Lets the VM system know about a change in size for a file. * If this vnode is mapped into some address space (i.e. we have a pager * for it) we adjust our own internal size and flush any cached pages in @@ -353,7 +321,7 @@ vnode_pager_haspage(pager, offset) void vnode_pager_setsize(vp, nsize) struct vnode *vp; - u_long nsize; + u_long nsize; { register vn_pager_t vnp; register vm_object_t object; @@ -364,42 +332,73 @@ vnode_pager_setsize(vp, nsize) */ if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL) return; + /* * Hasn't changed size */ - pager = (vm_pager_t)vp->v_vmdata; - vnp = (vn_pager_t)pager->pg_data; + pager = (vm_pager_t) vp->v_vmdata; + vnp = (vn_pager_t) pager->pg_data; if (nsize == vnp->vnp_size) return; + /* - * No object. - * This can happen during object termination since - * vm_object_page_clean is called after the object - * has been removed from the hash table, and clean - * may cause vnode write operations which can wind - * up back here. + * No object. This can happen during object termination since + * vm_object_page_clean is called after the object has been removed + * from the hash table, and clean may cause vnode write operations + * which can wind up back here. */ object = vm_object_lookup(pager); if (object == NULL) return; -#ifdef DEBUG - if (vpagerdebug & (VDB_FOLLOW|VDB_SIZE)) - printf("vnode_pager_setsize: vp %x obj %x osz %d nsz %d\n", - vp, object, vnp->vnp_size, nsize); -#endif - /* - * File has shrunk. - * Toss any cached pages beyond the new EOF. + * File has shrunk. Toss any cached pages beyond the new EOF. */ - if (round_page(nsize) < round_page(vnp->vnp_size)) { + if (nsize < vnp->vnp_size) { vm_object_lock(object); vm_object_page_remove(object, - (vm_offset_t)round_page(nsize), round_page(vnp->vnp_size)); + round_page((vm_offset_t) nsize), vnp->vnp_size); vm_object_unlock(object); + + /* + * this gets rid of garbage at the end of a page that is now + * only partially backed by the vnode... + */ + if (nsize & PAGE_MASK) { + vm_offset_t kva; + vm_page_t m; + + m = vm_page_lookup(object, trunc_page((vm_offset_t) nsize)); + if (m) { + kva = vm_pager_map_page(m); + bzero((caddr_t) kva + (nsize & PAGE_MASK), + round_page(nsize) - nsize); + vm_pager_unmap_page(kva); + } + } + } else { + + /* + * this allows the filesystem and VM cache to stay in sync if + * the VM page hasn't been modified... After the page is + * removed -- it will be faulted back in from the filesystem + * cache. + */ + if (vnp->vnp_size & PAGE_MASK) { + vm_page_t m; + + m = vm_page_lookup(object, trunc_page(vnp->vnp_size)); + if (m && (m->flags & PG_CLEAN)) { + vm_object_lock(object); + vm_object_page_remove(object, + vnp->vnp_size, vnp->vnp_size); + vm_object_unlock(object); + } + } } - vnp->vnp_size = (vm_offset_t)nsize; + vnp->vnp_size = (vm_offset_t) nsize; + object->size = round_page(nsize); + vm_object_deallocate(object); } @@ -411,14 +410,15 @@ vnode_pager_umount(mp) struct vnode *vp; pager = (vm_pager_t) queue_first(&vnode_pager_list); - while (!queue_end(&vnode_pager_list, (queue_entry_t)pager)) { + while (!queue_end(&vnode_pager_list, (queue_entry_t) pager)) { + /* - * Save the next pointer now since uncaching may - * terminate the object and render pager invalid + * Save the next pointer now since uncaching may terminate the + * object and render pager invalid */ - vp = ((vn_pager_t)pager->pg_data)->vnp_vp; + vp = ((vn_pager_t) pager->pg_data)->vnp_vp; npager = (vm_pager_t) queue_next(&pager->pg_list); - if (mp == (struct mount *)0 || vp->v_mount == mp) + if (mp == (struct mount *) 0 || vp->v_mount == mp) (void) vnode_pager_uncache(vp); pager = npager; } @@ -441,21 +441,22 @@ vnode_pager_uncache(vp) /* * Not a mapped vnode */ - pager = (vm_pager_t)vp->v_vmdata; + pager = (vm_pager_t) vp->v_vmdata; if (pager == NULL) return (TRUE); + /* - * Unlock the vnode if it is currently locked. - * We do this since uncaching the object may result - * in its destruction which may initiate paging - * activity which may necessitate locking the vnode. + * Unlock the vnode if it is currently locked. We do this since + * uncaching the object may result in its destruction which may + * initiate paging activity which may necessitate locking the vnode. */ locked = VOP_ISLOCKED(vp); if (locked) VOP_UNLOCK(vp); + /* - * Must use vm_object_lookup() as it actually removes - * the object from the cache list. + * Must use vm_object_lookup() as it actually removes the object from + * the cache list. */ object = vm_object_lookup(pager); if (object) { @@ -465,7 +466,7 @@ vnode_pager_uncache(vp) uncached = TRUE; if (locked) VOP_LOCK(vp); - return(uncached); + return (uncached); } @@ -486,20 +487,23 @@ vnode_pager_addr(vp, address) struct vnode *vp; vm_offset_t address; { - int rtaddress; - int bsize; + int rtaddress; + int bsize; vm_offset_t block; struct vnode *rtvp; - int err; - int vblock, voffset; + int err; + int vblock, voffset; bsize = vp->v_mount->mnt_stat.f_bsize; vblock = address / bsize; voffset = address % bsize; - err = VOP_BMAP(vp,vblock,&rtvp,&block); + err = VOP_BMAP(vp, vblock, &rtvp, &block); - rtaddress = block * DEV_BSIZE + voffset; + if (err) + rtaddress = -1; + else + rtaddress = block * DEV_BSIZE + voffset; return rtaddress; } @@ -512,411 +516,540 @@ vnode_pager_iodone(bp) struct buf *bp; { bp->b_flags |= B_DONE; - wakeup((caddr_t)bp); + wakeup((caddr_t) bp); } /* - * vnode_pager_io: - * Perform read or write operation for vnode_paging - * - * args: - * vnp -- pointer to vnode pager data structure - * containing size and vnode pointer, etc - * - * m -- pointer to array of vm_page_t entries to - * do I/O to. It is not necessary to fill any - * pages except for the reqpage entry. If a - * page is not filled, it needs to be removed - * from its object... - * - * count -- number of pages for I/O - * - * reqpage -- fault requested page for I/O - * (index into vm_page_t entries above) - * - * rw -- UIO_READ or UIO_WRITE - * - * NOTICE!!!! direct writes look like that they are close to being - * implemented. They are not really, several things need - * to be done to make it work (subtile things.) Hack at - * your own risk (direct writes are scarey). - * - * ANOTHER NOTICE!!!! - * we currently only support direct I/O to filesystems whose - * contiguously allocated blocksize is at least a vm page. - * changes will be made in the future to support more flexibility. + * small block file system vnode pager input */ +int +vnode_pager_input_smlfs(vnp, m) + vn_pager_t vnp; + vm_page_t m; +{ + int i; + int s; + vm_offset_t paging_offset; + struct vnode *dp, *vp; + struct buf *bp; + vm_offset_t mapsize; + vm_offset_t foff; + vm_offset_t kva; + int fileaddr; + int block; + vm_offset_t bsize; + int error = 0; + + paging_offset = m->object->paging_offset; + vp = vnp->vnp_vp; + bsize = vp->v_mount->mnt_stat.f_bsize; + foff = m->offset + paging_offset; + + VOP_BMAP(vp, foff, &dp, 0); + + kva = vm_pager_map_page(m); + + for (i = 0; i < NBPG / bsize; i++) { + + /* + * calculate logical block and offset + */ + block = foff / bsize + i; + s = splbio(); + while (bp = incore(vp, block)) { + int amount; + + /* + * wait until the buffer is avail or gone + */ + if (bp->b_flags & B_BUSY) { + bp->b_flags |= B_WANTED; + tsleep((caddr_t) bp, PVM, "vnwblk", 0); + continue; + } + amount = bsize; + if ((foff + bsize) > vnp->vnp_size) + amount = vnp->vnp_size - foff; + /* + * make sure that this page is in the buffer + */ + if ((amount > 0) && amount <= bp->b_bcount) { + bp->b_flags |= B_BUSY; + splx(s); + + /* + * copy the data from the buffer + */ + bcopy(bp->b_un.b_addr, (caddr_t) kva + i * bsize, amount); + if (amount < bsize) { + bzero((caddr_t) kva + amount, bsize - amount); + } + bp->b_flags &= ~B_BUSY; + wakeup((caddr_t) bp); + goto nextblock; + } + break; + } + splx(s); + fileaddr = vnode_pager_addr(vp, foff + i * bsize); + if (fileaddr != -1) { + VHOLD(vp); + bp = getpbuf(); + + /* build a minimal buffer header */ + bp->b_flags = B_BUSY | B_READ | B_CALL; + bp->b_iodone = vnode_pager_iodone; + bp->b_proc = curproc; + bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; + bp->b_un.b_addr = (caddr_t) kva + i * bsize; + bp->b_blkno = fileaddr / DEV_BSIZE; + bp->b_vp = dp; + + /* + * Should be a BLOCK or character DEVICE if we get + * here + */ + bp->b_dev = dp->v_rdev; + bp->b_bcount = bsize; + bp->b_bufsize = bsize; + + /* do the input */ + VOP_STRATEGY(bp); + + /* we definitely need to be at splbio here */ + + s = splbio(); + while ((bp->b_flags & B_DONE) == 0) { + tsleep((caddr_t) bp, PVM, "vnsrd", 0); + } + splx(s); + if ((bp->b_flags & B_ERROR) != 0) + error = EIO; + + HOLDRELE(vp); + + /* + * free the buffer header back to the swap buffer pool + */ + relpbuf(bp); + if (error) + break; + } else { + bzero((caddr_t) kva + i * bsize, bsize); + } +nextblock: + } + vm_pager_unmap_page(kva); + if (error) { + return VM_PAGER_FAIL; + } + pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + m->flags |= PG_CLEAN; + m->flags &= ~PG_LAUNDRY; + return VM_PAGER_OK; + +} + + +/* + * old style vnode pager output routine + */ int -vnode_pager_io(vnp, m, count, reqpage, rw) - register vn_pager_t vnp; - vm_page_t *m; - int count, reqpage; - enum uio_rw rw; +vnode_pager_input_old(vnp, m) + vn_pager_t vnp; + vm_page_t m; { - int i,j; + int i; struct uio auio; struct iovec aiov; + int error; + int size; + vm_offset_t foff; + vm_offset_t kva; + + error = 0; + foff = m->offset + m->object->paging_offset; + + /* + * Return failure if beyond current EOF + */ + if (foff >= vnp->vnp_size) { + return VM_PAGER_BAD; + } else { + size = NBPG; + if (foff + size > vnp->vnp_size) + size = vnp->vnp_size - foff; +/* + * Allocate a kernel virtual address and initialize so that + * we can use VOP_READ/WRITE routines. + */ + kva = vm_pager_map_page(m); + aiov.iov_base = (caddr_t) kva; + aiov.iov_len = size; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = foff; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_READ; + auio.uio_resid = size; + auio.uio_procp = (struct proc *) 0; + + error = VOP_READ(vnp->vnp_vp, &auio, IO_PAGER, curproc->p_ucred); + if (!error) { + register int count = size - auio.uio_resid; + + if (count == 0) + error = EINVAL; + else if (count != NBPG) + bzero((caddr_t) kva + count, NBPG - count); + } + vm_pager_unmap_page(kva); + } + pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + m->flags |= PG_CLEAN; + m->flags &= ~PG_LAUNDRY; + return error ? VM_PAGER_FAIL : VM_PAGER_OK; +} + +/* + * generic vnode pager input routine + */ +int +vnode_pager_input(vnp, m, count, reqpage) + register vn_pager_t vnp; + vm_page_t *m; + int count, reqpage; +{ + int i, j; vm_offset_t kva, foff; - int size; - struct proc *p = curproc; /* XXX */ + int size; + struct proc *p = curproc; /* XXX */ vm_object_t object; vm_offset_t paging_offset; struct vnode *dp, *vp; vm_offset_t mapsize; - int bsize; - int errtype=0; /* 0 is file type otherwise vm type */ - int error = 0; - int trimmed; + int bsize; - object = m[reqpage]->object; /* all vm_page_t items are in same object */ + int first, last; + int reqaddr, firstaddr; + int block, offset; + + int nbp; + struct buf *bp; + int s; + int failflag; + + int errtype = 0; /* 0 is file type otherwise vm type */ + int error = 0; + + object = m[reqpage]->object; /* all vm_page_t items are in same + * object */ paging_offset = object->paging_offset; vp = vnp->vnp_vp; bsize = vp->v_mount->mnt_stat.f_bsize; /* get the UNDERLYING device for the file with VOP_BMAP() */ + /* - * originally, we did not check for an error return - * value -- assuming an fs always has a bmap entry point - * -- that assumption is wrong!!! - */ - /* - * we only do direct I/O if the file is on a local - * BLOCK device and currently if it is a read operation only. + * originally, we did not check for an error return value -- assuming + * an fs always has a bmap entry point -- that assumption is wrong!!! */ kva = 0; mapsize = 0; - if (!VOP_BMAP(vp, m[reqpage]->offset+paging_offset, &dp, 0) && - rw == UIO_READ && ((dp->v_type == VBLK && - (vp->v_mount->mnt_stat.f_type == MOUNT_UFS)) || - (vp->v_mount->mnt_stat.f_type == MOUNT_NFS))) { + foff = m[reqpage]->offset + paging_offset; + if (!VOP_BMAP(vp, foff, &dp, 0)) { + /* * we do not block for a kva, notice we default to a kva * conservative behavior */ - kva = kmem_alloc_pageable(pager_map, - (mapsize = count*NBPG)); - if( !kva) { + kva = kmem_alloc_pageable(pager_map, (mapsize = count * NBPG)); + if (!kva) { for (i = 0; i < count; i++) { if (i != reqpage) { vnode_pager_freepage(m[i]); - m[i] = 0; } } m[0] = m[reqpage]; - kva = vm_pager_map_page(m[0]); + kva = kmem_alloc_wait(pager_map, mapsize = NBPG); reqpage = 0; count = 1; - mapsize = count*NBPG; } } + /* + * if we can't get a kva or we can't bmap, use old VOP code + */ if (!kva) { + for (i = 0; i < count; i++) { + if (i != reqpage) { + vnode_pager_freepage(m[i]); + } + } + return vnode_pager_input_old(vnp, m[reqpage]); + /* - * here on I/O through VFS + * if the blocksize is smaller than a page size, then use + * special small filesystem code. NFS sometimes has a small + * blocksize, but it can handle large reads itself. */ + } else if ((NBPG / bsize) > 1 && + (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { + + kmem_free_wakeup(pager_map, kva, mapsize); + for (i = 0; i < count; i++) { if (i != reqpage) { vnode_pager_freepage(m[i]); - m[i] = 0; } } - m[0] = m[reqpage]; - foff = m[0]->offset + paging_offset; - reqpage = 0; - count = 1; - /* - * Return failure if beyond current EOF - */ - if (foff >= vnp->vnp_size) { - errtype = 1; - error = VM_PAGER_BAD; - } else { - if (foff + NBPG > vnp->vnp_size) - size = vnp->vnp_size - foff; - else - size = NBPG; + return vnode_pager_input_smlfs(vnp, m[reqpage]); + } /* - * Allocate a kernel virtual address and initialize so that - * we can use VOP_READ/WRITE routines. + * here on direct device I/O */ - kva = vm_pager_map_page(m[0]); - aiov.iov_base = (caddr_t)kva; - aiov.iov_len = size; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = foff; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_rw = rw; - auio.uio_resid = size; - auio.uio_procp = (struct proc *)0; - if (rw == UIO_READ) { - error = VOP_READ(vp, &auio, IO_PAGER, p->p_ucred); - } else { - error = VOP_WRITE(vp, &auio, IO_PAGER, p->p_ucred); - } - if (!error) { - register int count = size - auio.uio_resid; - if (count == 0) - error = EINVAL; - else if (count != NBPG && rw == UIO_READ) - bzero((caddr_t)kva + count, NBPG - count); - } - vm_pager_unmap_page(kva); - } - } else { - /* - * here on direct device I/O - */ - int first=0, last=count; - int reqaddr, firstaddr; - int block, offset; - - struct buf *bp; - int s; - int failflag; + /* + * This pathetic hack gets data from the buffer cache, if it's there. + * I believe that this is not really necessary, and the ends can be + * gotten by defaulting to the normal vfs read behavior, but this + * might be more efficient, because the will NOT invoke read-aheads + * and one of the purposes of this code is to bypass the buffer cache + * and keep from flushing it by reading in a program. + */ - foff = m[reqpage]->offset + paging_offset; + /* + * calculate logical block and offset + */ + block = foff / bsize; + offset = foff % bsize; + s = splbio(); + + /* + * if we have a buffer in core, then try to use it + */ + while (bp = incore(vp, block)) { + int amount; /* - * This pathetic hack gets data from the buffer cache, if it's there. - * I believe that this is not really necessary, and the ends can - * be gotten by defaulting to the normal vfs read behavior, but this - * might be more efficient, because the will NOT invoke read-aheads - * and one of the purposes of this code is to bypass the buffer - * cache and keep from flushing it by reading in a program. - */ - /* - * calculate logical block and offset + * wait until the buffer is avail or gone */ - block = foff / bsize; - offset = foff % bsize; - s = splbio(); + if (bp->b_flags & B_BUSY) { + bp->b_flags |= B_WANTED; + tsleep((caddr_t) bp, PVM, "vnwblk", 0); + continue; + } + amount = NBPG; + if ((foff + amount) > vnp->vnp_size) + amount = vnp->vnp_size - foff; /* - * if we have a buffer in core, then try to use it + * make sure that this page is in the buffer */ - while (bp = incore(vp, block)) { - int amount; - + if ((amount > 0) && (offset + amount) <= bp->b_bcount) { + bp->b_flags |= B_BUSY; + splx(s); + /* - * wait until the buffer is avail or gone + * map the requested page */ - if (bp->b_flags & B_BUSY) { - bp->b_flags |= B_WANTED; - tsleep ((caddr_t)bp, PVM, "vnwblk", 0); - continue; + pmap_kenter(kva, VM_PAGE_TO_PHYS(m[reqpage])); + pmap_update(); + + /* + * copy the data from the buffer + */ + bcopy(bp->b_un.b_addr + offset, (caddr_t) kva, amount); + if (amount < NBPG) { + bzero((caddr_t) kva + amount, NBPG - amount); } - amount = NBPG; - if ((foff + amount) > vnp->vnp_size) - amount = vnp->vnp_size - foff; + /* + * unmap the page and free the kva + */ + pmap_remove(vm_map_pmap(pager_map), kva, kva + NBPG); + kmem_free_wakeup(pager_map, kva, mapsize); /* - * make sure that this page is in the buffer + * release the buffer back to the block subsystem */ - if ((amount > 0) && (offset + amount) <= bp->b_bcount) { - bp->b_flags |= B_BUSY; - splx(s); + bp->b_flags &= ~B_BUSY; + wakeup((caddr_t) bp); - /* - * map the requested page - */ - pmap_enter(vm_map_pmap(pager_map), - kva, VM_PAGE_TO_PHYS(m[reqpage]), - VM_PROT_DEFAULT, TRUE); - /* - * copy the data from the buffer - */ - bcopy(bp->b_un.b_addr + offset, (caddr_t)kva, amount); - if (amount < NBPG) { - bzero((caddr_t)kva + amount, NBPG - amount); - } - /* - * unmap the page and free the kva - */ - pmap_remove(vm_map_pmap(pager_map), kva, kva + NBPG); - kmem_free_wakeup(pager_map, kva, mapsize); - /* - * release the buffer back to the block subsystem - */ - bp->b_flags &= ~B_BUSY; - wakeup((caddr_t)bp); - /* - * we did not have to do any work to get the requested - * page, the read behind/ahead does not justify a read - */ - for (i = 0; i < count; i++) { - if (i != reqpage) { - vnode_pager_freepage(m[i]); - m[i] = 0; - } + /* + * we did not have to do any work to get the requested + * page, the read behind/ahead does not justify a read + */ + for (i = 0; i < count; i++) { + if (i != reqpage) { + vnode_pager_freepage(m[i]); } - /* - * sorry for the goto - */ - goto finishup; } + count = 1; + reqpage = 0; + m[0] = m[reqpage]; + /* - * buffer is nowhere to be found, read from the disk + * sorry for the goto */ - break; + goto finishup; } - foff = m[reqpage]->offset + paging_offset; - reqaddr = vnode_pager_addr(vp, foff); /* - * Make sure that our I/O request is contiguous. - * Scan backward and stop for the first discontiguous - * entry or stop for a page being in buffer cache. + * buffer is nowhere to be found, read from the disk */ - failflag = 0; - for (i = reqpage - 1; i >= 0; --i) { - int myaddr; - if (failflag || - incore(vp, (foff + (i - reqpage) * NBPG) / bsize) || - (myaddr = vnode_pager_addr(vp, m[i]->offset + paging_offset)) - != reqaddr + (i - reqpage) * NBPG) { - vnode_pager_freepage(m[i]); - m[i] = 0; - if (first == 0) - first = i + 1; - failflag = 1; - } - } + break; + } + splx(s); - /* - * Scan forward and stop for the first non-contiguous - * entry or stop for a page being in buffer cache. - */ - failflag = 0; - for (i = reqpage + 1; i < count; i++) { - int myaddr; - if (failflag || - incore(vp, (foff + (i - reqpage) * NBPG) / bsize) || - (myaddr = vnode_pager_addr(vp, m[i]->offset + paging_offset)) - != reqaddr + (i - reqpage) * NBPG) { - vnode_pager_freepage(m[i]); - m[i] = 0; - if (last == count) - last = i; - failflag = 1; - } + reqaddr = vnode_pager_addr(vp, foff); + + /* + * Make sure that our I/O request is contiguous. Scan backward and + * stop for the first discontiguous entry or stop for a page being in + * buffer cache. + */ + failflag = 0; + first = reqpage; + for (i = reqpage - 1; i >= 0; --i) { + if (failflag || + incore(vp, (foff + (i - reqpage) * NBPG) / bsize) || + (vnode_pager_addr(vp, m[i]->offset + paging_offset)) + != reqaddr + (i - reqpage) * NBPG) { + vnode_pager_freepage(m[i]); + failflag = 1; + } else { + first = i; } + } - /* - * the first and last page have been calculated now, move input - * pages to be zero based... - */ - count = last; - if (first != 0) { - for (i = first; i < count; i++) { - m[i - first] = m[i]; - } - count -= first; - reqpage -= first; + /* + * Scan forward and stop for the first non-contiguous entry or stop + * for a page being in buffer cache. + */ + failflag = 0; + last = reqpage + 1; + for (i = reqpage + 1; i < count; i++) { + if (failflag || + incore(vp, (foff + (i - reqpage) * NBPG) / bsize) || + (vnode_pager_addr(vp, m[i]->offset + paging_offset)) + != reqaddr + (i - reqpage) * NBPG) { + vnode_pager_freepage(m[i]); + failflag = 1; + } else { + last = i + 1; } + } + /* + * the first and last page have been calculated now, move input pages + * to be zero based... + */ + count = last; + if (first != 0) { + for (i = first; i < count; i++) { + m[i - first] = m[i]; + } + count -= first; + reqpage -= first; + } - /* - * calculate the file virtual address for the transfer - */ - foff = m[0]->offset + paging_offset; - /* - * and get the disk physical address (in bytes) - */ - firstaddr = vnode_pager_addr(vp, foff); + /* + * calculate the file virtual address for the transfer + */ + foff = m[0]->offset + paging_offset; - /* - * calculate the size of the transfer - */ - if ((m[count - 1]->offset + paging_offset) + NBPG > vnp->vnp_size) - size = vnp->vnp_size - foff; - else - size = count * NBPG; + /* + * and get the disk physical address (in bytes) + */ + firstaddr = vnode_pager_addr(vp, foff); + /* + * calculate the size of the transfer + */ + size = count * NBPG; + if ((foff + size) > vnp->vnp_size) + size = vnp->vnp_size - foff; - /* - * and map the pages to be read into the kva - */ - for (i = 0; i < count; i++) - pmap_enter(vm_map_pmap(pager_map), - kva + NBPG * i, VM_PAGE_TO_PHYS(m[i]), - VM_PROT_DEFAULT, TRUE); - VHOLD(vp); - bp = getpbuf(); + /* + * round up physical size for real devices + */ + if (dp->v_type == VBLK || dp->v_type == VCHR) + size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); - /* build a minimal buffer header */ - bzero((caddr_t)bp, sizeof(struct buf)); - bp->b_flags = B_BUSY | B_READ | B_CALL; - bp->b_iodone = vnode_pager_iodone; - /* B_PHYS is not set, but it is nice to fill this in */ - /* bp->b_proc = &proc0; */ - bp->b_proc = curproc; - bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; - bp->b_un.b_addr = (caddr_t) kva; - bp->b_blkno = firstaddr / DEV_BSIZE; - bp->b_vp = dp; - - /* Should be a BLOCK or character DEVICE if we get here */ - bp->b_dev = dp->v_rdev; - bp->b_bcount = NBPG * count; + /* + * and map the pages to be read into the kva + */ + for (i = 0; i < count; i++) + pmap_kenter(kva + NBPG * i, VM_PAGE_TO_PHYS(m[i])); - /* do the input */ - VOP_STRATEGY(bp); + pmap_update(); + VHOLD(vp); + bp = getpbuf(); - /* we definitely need to be at splbio here */ + /* build a minimal buffer header */ + bp->b_flags = B_BUSY | B_READ | B_CALL; + bp->b_iodone = vnode_pager_iodone; + /* B_PHYS is not set, but it is nice to fill this in */ + bp->b_proc = curproc; + bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; + bp->b_un.b_addr = (caddr_t) kva; + bp->b_blkno = firstaddr / DEV_BSIZE; + bp->b_vp = dp; - while ((bp->b_flags & B_DONE) == 0) { - tsleep((caddr_t)bp, PVM, "vnread", 0); - } - splx(s); - if ((bp->b_flags & B_ERROR) != 0) - error = EIO; + /* Should be a BLOCK or character DEVICE if we get here */ + bp->b_dev = dp->v_rdev; + bp->b_bcount = size; + bp->b_bufsize = size; - if (!error) { - if (size != count * NBPG) - bzero((caddr_t)kva + size, NBPG * count - size); - } - HOLDRELE(vp); + /* do the input */ + VOP_STRATEGY(bp); - pmap_remove(vm_map_pmap(pager_map), kva, kva + NBPG * count); - kmem_free_wakeup(pager_map, kva, mapsize); + s = splbio(); + /* we definitely need to be at splbio here */ - /* - * free the buffer header back to the swap buffer pool - */ - relpbuf(bp); + while ((bp->b_flags & B_DONE) == 0) { + tsleep((caddr_t) bp, PVM, "vnread", 0); + } + splx(s); + if ((bp->b_flags & B_ERROR) != 0) + error = EIO; + if (!error) { + if (size != count * NBPG) + bzero((caddr_t) kva + size, NBPG * count - size); } + HOLDRELE(vp); + + pmap_remove(vm_map_pmap(pager_map), kva, kva + NBPG * count); + kmem_free_wakeup(pager_map, kva, mapsize); + + /* + * free the buffer header back to the swap buffer pool + */ + relpbuf(bp); finishup: - if (rw == UIO_READ) for (i = 0; i < count; i++) { - /* - * we dont mess with pages that have been already - * deallocated.... - */ - if (!m[i]) - continue; pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); m[i]->flags |= PG_CLEAN; m[i]->flags &= ~PG_LAUNDRY; if (i != reqpage) { + /* - * whether or not to leave the page activated - * is up in the air, but we should put the page - * on a page queue somewhere. (it already is in - * the object). - * Result: It appears that emperical results show - * that deactivating pages is best. + * whether or not to leave the page activated is up in + * the air, but we should put the page on a page queue + * somewhere. (it already is in the object). Result: + * It appears that emperical results show that + * deactivating pages is best. */ + /* - * just in case someone was asking for this - * page we now tell them that it is ok to use + * just in case someone was asking for this page we + * now tell them that it is ok to use */ if (!error) { vm_page_deactivate(m[i]); @@ -927,15 +1060,380 @@ finishup: } } } - if (!error && rw == UIO_WRITE) { - pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage])); - m[reqpage]->flags |= PG_CLEAN; - m[reqpage]->flags &= ~PG_LAUNDRY; - } if (error) { - printf("vnode pager error: %d\n", error); + printf("vnode pager read error: %d\n", error); } if (errtype) return error; return (error ? VM_PAGER_FAIL : VM_PAGER_OK); } + +/* + * old-style vnode pager output routine + */ +int +vnode_pager_output_old(vnp, m) + register vn_pager_t vnp; + vm_page_t m; +{ + vm_offset_t foff; + vm_offset_t kva; + vm_offset_t size; + struct iovec aiov; + struct uio auio; + struct vnode *vp; + int error; + + vp = vnp->vnp_vp; + foff = m->offset + m->object->paging_offset; + + /* + * Return failure if beyond current EOF + */ + if (foff >= vnp->vnp_size) { + return VM_PAGER_BAD; + } else { + size = NBPG; + if (foff + size > vnp->vnp_size) + size = vnp->vnp_size - foff; +/* + * Allocate a kernel virtual address and initialize so that + * we can use VOP_WRITE routines. + */ + kva = vm_pager_map_page(m); + aiov.iov_base = (caddr_t) kva; + aiov.iov_len = size; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = foff; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_WRITE; + auio.uio_resid = size; + auio.uio_procp = (struct proc *) 0; + + error = VOP_WRITE(vp, &auio, IO_PAGER, curproc->p_ucred); + + if (!error) { + if ((size - auio.uio_resid) == 0) { + error = EINVAL; + } + } + vm_pager_unmap_page(kva); + return error ? VM_PAGER_FAIL : VM_PAGER_OK; + } +} + +/* + * vnode pager output on a small-block file system + */ +int +vnode_pager_output_smlfs(vnp, m) + vn_pager_t vnp; + vm_page_t m; +{ + int i; + int s; + vm_offset_t paging_offset; + struct vnode *dp, *vp; + struct buf *bp; + vm_offset_t mapsize; + vm_offset_t foff; + vm_offset_t kva; + int fileaddr; + int block; + vm_offset_t bsize; + int error = 0; + + paging_offset = m->object->paging_offset; + vp = vnp->vnp_vp; + bsize = vp->v_mount->mnt_stat.f_bsize; + foff = m->offset + paging_offset; + + VOP_BMAP(vp, foff, &dp, 0); + kva = vm_pager_map_page(m); + for (i = 0; !error && i < (NBPG / bsize); i++) { + + /* + * calculate logical block and offset + */ + fileaddr = vnode_pager_addr(vp, foff + i * bsize); + if (fileaddr != -1) { + s = splbio(); + if (bp = incore(vp, (foff / bsize) + i)) { + bp = getblk(vp, (foff / bsize) + i, bp->b_bufsize); + bp->b_flags |= B_INVAL; + brelse(bp); + } + splx(s); + + VHOLD(vp); + bp = getpbuf(); + + /* build a minimal buffer header */ + bp->b_flags = B_BUSY | B_CALL | B_WRITE; + bp->b_iodone = vnode_pager_iodone; + bp->b_proc = curproc; + bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; + bp->b_un.b_addr = (caddr_t) kva + i * bsize; + bp->b_blkno = fileaddr / DEV_BSIZE; + bp->b_vp = dp; + ++dp->v_numoutput; + /* for NFS */ + bp->b_dirtyoff = 0; + bp->b_dirtyend = bsize; + + /* + * Should be a BLOCK or character DEVICE if we get + * here + */ + bp->b_dev = dp->v_rdev; + bp->b_bcount = bsize; + bp->b_bufsize = bsize; + + /* do the input */ + VOP_STRATEGY(bp); + + /* we definitely need to be at splbio here */ + + s = splbio(); + while ((bp->b_flags & B_DONE) == 0) { + tsleep((caddr_t) bp, PVM, "vnswrt", 0); + } + splx(s); + if ((bp->b_flags & B_ERROR) != 0) + error = EIO; + + HOLDRELE(vp); + + /* + * free the buffer header back to the swap buffer pool + */ + relpbuf(bp); + } + } + vm_pager_unmap_page(kva); + if (error) + return VM_PAGER_FAIL; + else + return VM_PAGER_OK; +} + +/* + * generic vnode pager output routine + */ +int +vnode_pager_output(vnp, m, count, rtvals) + vn_pager_t vnp; + vm_page_t *m; + int count; + int *rtvals; +{ + int i, j; + vm_offset_t kva, foff; + int size; + struct proc *p = curproc; /* XXX */ + vm_object_t object; + vm_offset_t paging_offset; + struct vnode *dp, *vp; + struct buf *bp; + vm_offset_t mapsize; + vm_offset_t reqaddr; + int bsize; + int s; + + int error = 0; + +retryoutput: + object = m[0]->object; /* all vm_page_t items are in same object */ + paging_offset = object->paging_offset; + + vp = vnp->vnp_vp; + bsize = vp->v_mount->mnt_stat.f_bsize; + + for (i = 0; i < count; i++) + rtvals[i] = VM_PAGER_TRYAGAIN; + + /* + * if the filesystem does not have a bmap, then use the old code + */ + if (VOP_BMAP(vp, m[0]->offset + paging_offset, &dp, 0)) { + + rtvals[0] = vnode_pager_output_old(vnp, m[0]); + + pmap_clear_modify(VM_PAGE_TO_PHYS(m[0])); + m[0]->flags |= PG_CLEAN; + m[0]->flags &= ~PG_LAUNDRY; + return rtvals[0]; + } + + /* + * if the filesystem has a small blocksize, then use the small block + * filesystem output code + */ + if ((bsize < NBPG) && + (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { + + for (i = 0; i < count; i++) { + rtvals[i] = vnode_pager_output_smlfs(vnp, m[i]); + if (rtvals[i] == VM_PAGER_OK) { + pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); + m[i]->flags |= PG_CLEAN; + m[i]->flags &= ~PG_LAUNDRY; + } + } + return rtvals[0]; + } + + /* + * get some kva for the output + */ + kva = kmem_alloc_pageable(pager_map, (mapsize = count * NBPG)); + if (!kva) { + kva = kmem_alloc_pageable(pager_map, (mapsize = NBPG)); + count = 1; + if (!kva) + return rtvals[0]; + } + for (i = 0; i < count; i++) { + foff = m[i]->offset + paging_offset; + if (foff >= vnp->vnp_size) { + for (j = i; j < count; j++) + rtvals[j] = VM_PAGER_BAD; + count = i; + break; + } + } + if (count == 0) { + return rtvals[0]; + } + foff = m[0]->offset + paging_offset; + reqaddr = vnode_pager_addr(vp, foff); + + /* + * Scan forward and stop for the first non-contiguous entry or stop + * for a page being in buffer cache. + */ + for (i = 1; i < count; i++) { + if (vnode_pager_addr(vp, m[i]->offset + paging_offset) + != reqaddr + i * NBPG) { + count = i; + break; + } + } + + /* + * calculate the size of the transfer + */ + size = count * NBPG; + if ((foff + size) > vnp->vnp_size) + size = vnp->vnp_size - foff; + + /* + * round up physical size for real devices + */ + if (dp->v_type == VBLK || dp->v_type == VCHR) + size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); + + /* + * and map the pages to be read into the kva + */ + for (i = 0; i < count; i++) + pmap_kenter(kva + NBPG * i, VM_PAGE_TO_PHYS(m[i])); + pmap_update(); +/* + printf("vnode: writing foff: %d, devoff: %d, size: %d\n", + foff, reqaddr, size); +*/ + + /* + * next invalidate the incore vfs_bio data + */ + for (i = 0; i < count; i++) { + int filblock = (foff + i * NBPG) / bsize; + struct buf *fbp; + + s = splbio(); + if (fbp = incore(vp, filblock)) { + fbp = getblk(vp, filblock, fbp->b_bufsize); + if (fbp->b_flags & B_DELWRI) { + if (fbp->b_bufsize <= NBPG) + fbp->b_flags &= ~B_DELWRI; + else { + bwrite(fbp); + fbp = getblk(vp, filblock, + fbp->b_bufsize); + } + } + fbp->b_flags |= B_INVAL; + brelse(fbp); + } + splx(s); + } + + + VHOLD(vp); + bp = getpbuf(); + + /* build a minimal buffer header */ + bp->b_flags = B_BUSY | B_WRITE | B_CALL; + bp->b_iodone = vnode_pager_iodone; + /* B_PHYS is not set, but it is nice to fill this in */ + /* bp->b_proc = &proc0; */ + bp->b_proc = curproc; + bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; + bp->b_un.b_addr = (caddr_t) kva; + bp->b_blkno = reqaddr / DEV_BSIZE; + bp->b_vp = dp; + ++dp->v_numoutput; + + /* Should be a BLOCK or character DEVICE if we get here */ + bp->b_dev = dp->v_rdev; + /* for NFS */ + bp->b_dirtyoff = 0; + bp->b_dirtyend = size; + + bp->b_bcount = size; + bp->b_bufsize = size; + + /* do the output */ + VOP_STRATEGY(bp); + + s = splbio(); + + /* we definitely need to be at splbio here */ + + while ((bp->b_flags & B_DONE) == 0) { + tsleep((caddr_t) bp, PVM, "vnwrite", 0); + } + splx(s); + + if ((bp->b_flags & B_ERROR) != 0) + error = EIO; + + HOLDRELE(vp); + + pmap_remove(vm_map_pmap(pager_map), kva, kva + NBPG * count); + kmem_free_wakeup(pager_map, kva, mapsize); + + /* + * free the buffer header back to the swap buffer pool + */ + relpbuf(bp); + + if (!error) { + for (i = 0; i < count; i++) { + pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); + m[i]->flags |= PG_CLEAN; + m[i]->flags &= ~PG_LAUNDRY; + rtvals[i] = VM_PAGER_OK; + } + } else if (count != 1) { + error = 0; + count = 1; + goto retryoutput; + } + if (error) { + printf("vnode pager write error: %d\n", error); + } + return (error ? VM_PAGER_FAIL : VM_PAGER_OK); +} |
