diff options
Diffstat (limited to 'sys/vm/vnode_pager.c')
-rw-r--r-- | sys/vm/vnode_pager.c | 1334 |
1 files changed, 1089 insertions, 245 deletions
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index 9c2f8260cfb3..b8e5a192796f 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -1,7 +1,8 @@ /* * Copyright (c) 1990 University of Utah. - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. + * Copyright (c) 1991 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 1993,1994 John S. Dyson * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer @@ -35,7 +36,8 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vnode_pager.c 8.8 (Berkeley) 2/13/94 + * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 + * $Id: vnode_pager.c,v 1.17 1994/04/05 03:23:53 davidg Exp $ */ /* @@ -46,6 +48,24 @@ * fix credential use (uses current process credentials now) */ +/* + * MODIFICATIONS: + * John S. Dyson 08 Dec 93 + * + * This file in conjunction with some vm_fault mods, eliminate the performance + * advantage for using the buffer cache and minimize memory copies. + * + * 1) Supports multiple - block reads + * 2) Bypasses buffer cache for reads + * + * TODO: + * + * 1) Totally bypass buffer cache for reads + * (Currently will still sometimes use buffer cache for reads) + * 2) Bypass buffer cache for writes + * (Code does not support it, but mods are simple) + */ + #include <sys/param.h> #include <sys/systm.h> #include <sys/proc.h> @@ -58,51 +78,44 @@ #include <vm/vm_page.h> #include <vm/vnode_pager.h> -struct pagerlst vnode_pager_list; /* list of managed vnodes */ +#include <sys/buf.h> +#include <miscfs/specfs/specdev.h> -#ifdef DEBUG -int vpagerdebug = 0x00; -#define VDB_FOLLOW 0x01 -#define VDB_INIT 0x02 -#define VDB_IO 0x04 -#define VDB_FAIL 0x08 -#define VDB_ALLOC 0x10 -#define VDB_SIZE 0x20 -#endif +int vnode_pager_putmulti(); -static vm_pager_t vnode_pager_alloc - __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t)); -static void vnode_pager_cluster - __P((vm_pager_t, vm_offset_t, - vm_offset_t *, vm_offset_t *)); -static void vnode_pager_dealloc __P((vm_pager_t)); -static int vnode_pager_getpage - __P((vm_pager_t, vm_page_t *, int, boolean_t)); -static boolean_t vnode_pager_haspage __P((vm_pager_t, vm_offset_t)); -static void vnode_pager_init __P((void)); -static int vnode_pager_io - __P((vn_pager_t, vm_page_t *, int, - boolean_t, enum uio_rw)); -static boolean_t vnode_pager_putpage - __P((vm_pager_t, vm_page_t *, int, boolean_t)); +void vnode_pager_init(); +vm_pager_t vnode_pager_alloc(caddr_t, vm_offset_t, vm_prot_t, vm_offset_t); +void vnode_pager_dealloc(); +int vnode_pager_getpage(); +int vnode_pager_getmulti(); +int vnode_pager_putpage(); +boolean_t vnode_pager_haspage(); struct pagerops vnodepagerops = { vnode_pager_init, vnode_pager_alloc, vnode_pager_dealloc, vnode_pager_getpage, + vnode_pager_getmulti, vnode_pager_putpage, - vnode_pager_haspage, - vnode_pager_cluster + vnode_pager_putmulti, + vnode_pager_haspage }; -static void +static int vnode_pager_input(vn_pager_t vnp, vm_page_t *m, int count, int reqpage); +static int vnode_pager_output(vn_pager_t vnp, vm_page_t *m, int count, int *rtvals); +struct buf * getpbuf() ; +void relpbuf(struct buf *bp) ; + +extern vm_map_t pager_map; + +struct pagerlst vnode_pager_list; /* list of managed vnodes */ + +#define MAXBP (PAGE_SIZE/DEV_BSIZE); + +void vnode_pager_init() { -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_init()\n"); -#endif TAILQ_INIT(&vnode_pager_list); } @@ -110,12 +123,12 @@ vnode_pager_init() * Allocate (or lookup) pager for a vnode. * Handle is a vnode pointer. */ -static vm_pager_t -vnode_pager_alloc(handle, size, prot, foff) +vm_pager_t +vnode_pager_alloc(handle, size, prot, offset) caddr_t handle; vm_size_t size; vm_prot_t prot; - vm_offset_t foff; + vm_offset_t offset; { register vm_pager_t pager; register vn_pager_t vnp; @@ -124,10 +137,6 @@ vnode_pager_alloc(handle, size, prot, foff) struct vnode *vp; struct proc *p = curproc; /* XXX */ -#ifdef DEBUG - if (vpagerdebug & (VDB_FOLLOW|VDB_ALLOC)) - printf("vnode_pager_alloc(%x, %x, %x)\n", handle, size, prot); -#endif /* * Pageout to vnode, no can do yet. */ @@ -171,12 +180,12 @@ vnode_pager_alloc(handle, size, prot, foff) vnp->vnp_flags = 0; vnp->vnp_vp = vp; vnp->vnp_size = vattr.va_size; + TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list); pager->pg_handle = handle; pager->pg_type = PG_VNODE; - pager->pg_flags = 0; pager->pg_ops = &vnodepagerops; - pager->pg_data = vnp; + pager->pg_data = (caddr_t)vnp; vp->v_vmdata = (caddr_t)pager; } else { /* @@ -184,121 +193,104 @@ vnode_pager_alloc(handle, size, prot, foff) * cache if found and also gain a reference to the object. */ object = vm_object_lookup(pager); -#ifdef DEBUG - vnp = (vn_pager_t)pager->pg_data; -#endif } -#ifdef DEBUG - if (vpagerdebug & VDB_ALLOC) - printf("vnode_pager_setup: vp %x sz %x pager %x object %x\n", - vp, vnp->vnp_size, pager, object); -#endif return(pager); } -static void +void vnode_pager_dealloc(pager) vm_pager_t pager; { register vn_pager_t vnp = (vn_pager_t)pager->pg_data; register struct vnode *vp; -#ifdef NOTDEF struct proc *p = curproc; /* XXX */ -#endif -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_dealloc(%x)\n", pager); -#endif if (vp = vnp->vnp_vp) { vp->v_vmdata = NULL; vp->v_flag &= ~VTEXT; -#if NOTDEF +#if 0 /* can hang if done at reboot on NFS FS */ (void) VOP_FSYNC(vp, p->p_ucred, p); #endif vrele(vp); } + TAILQ_REMOVE(&vnode_pager_list, pager, pg_list); free((caddr_t)vnp, M_VMPGDATA); free((caddr_t)pager, M_VMPAGER); } -static int -vnode_pager_getpage(pager, mlist, npages, sync) +int +vnode_pager_getmulti(pager, m, count, reqpage, sync) + vm_pager_t pager; + vm_page_t *m; + int count; + int reqpage; + boolean_t sync; +{ + + return vnode_pager_input((vn_pager_t) pager->pg_data, m, count, reqpage); +} + +int +vnode_pager_getpage(pager, m, sync) vm_pager_t pager; - vm_page_t *mlist; - int npages; + vm_page_t m; boolean_t sync; { -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_getpage(%x, %x, %x, %x)\n", - pager, mlist, npages, sync); -#endif - return(vnode_pager_io((vn_pager_t)pager->pg_data, - mlist, npages, sync, UIO_READ)); + int err; + vm_page_t marray[1]; + if (pager == NULL) + return FALSE; + marray[0] = m; + + return vnode_pager_input((vn_pager_t)pager->pg_data, marray, 1, 0); } -static boolean_t -vnode_pager_putpage(pager, mlist, npages, sync) +boolean_t +vnode_pager_putpage(pager, m, sync) vm_pager_t pager; - vm_page_t *mlist; - int npages; + vm_page_t m; boolean_t sync; { int err; + vm_page_t marray[1]; + int rtvals[1]; -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_putpage(%x, %x, %x, %x)\n", - pager, mlist, npages, sync); -#endif if (pager == NULL) - return (FALSE); /* ??? */ - err = vnode_pager_io((vn_pager_t)pager->pg_data, - mlist, npages, sync, UIO_WRITE); - /* - * If the operation was successful, mark the pages clean. - */ - if (err == VM_PAGER_OK) { - while (npages--) { - (*mlist)->flags |= PG_CLEAN; - pmap_clear_modify(VM_PAGE_TO_PHYS(*mlist)); - mlist++; - } - } - return(err); + return FALSE; + marray[0] = m; + vnode_pager_output((vn_pager_t)pager->pg_data, marray, 1, rtvals); + return rtvals[0]; +} + +int +vnode_pager_putmulti(pager, m, c, sync, rtvals) + vm_pager_t pager; + vm_page_t *m; + int c; + boolean_t sync; + int *rtvals; +{ + return vnode_pager_output((vn_pager_t)pager->pg_data, m, c, rtvals); } -static boolean_t + +boolean_t vnode_pager_haspage(pager, offset) vm_pager_t pager; vm_offset_t offset; { register vn_pager_t vnp = (vn_pager_t)pager->pg_data; daddr_t bn; + int run; int err; -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_haspage(%x, %x)\n", pager, offset); -#endif - /* * Offset beyond end of file, do not have the page - * Lock the vnode first to make sure we have the most recent - * version of the size. */ - VOP_LOCK(vnp->vnp_vp); if (offset >= vnp->vnp_size) { - VOP_UNLOCK(vnp->vnp_vp); -#ifdef DEBUG - if (vpagerdebug & (VDB_FAIL|VDB_SIZE)) - printf("vnode_pager_haspage: pg %x, off %x, size %x\n", - pager, offset, vnp->vnp_size); -#endif return(FALSE); } @@ -311,53 +303,14 @@ vnode_pager_haspage(pager, offset) */ err = VOP_BMAP(vnp->vnp_vp, offset / vnp->vnp_vp->v_mount->mnt_stat.f_iosize, - (struct vnode **)0, &bn, NULL); - VOP_UNLOCK(vnp->vnp_vp); + (struct vnode **)0, &bn, 0); if (err) { -#ifdef DEBUG - if (vpagerdebug & VDB_FAIL) - printf("vnode_pager_haspage: BMAP err %d, pg %x, off %x\n", - err, pager, offset); -#endif return(TRUE); } return((long)bn < 0 ? FALSE : TRUE); } -static void -vnode_pager_cluster(pager, offset, loffset, hoffset) - vm_pager_t pager; - vm_offset_t offset; - vm_offset_t *loffset; - vm_offset_t *hoffset; -{ - vn_pager_t vnp = (vn_pager_t)pager->pg_data; - vm_offset_t loff, hoff; - -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_cluster(%x, %x) ", pager, offset); -#endif - loff = offset; - if (loff >= vnp->vnp_size) - panic("vnode_pager_cluster: bad offset"); - /* - * XXX could use VOP_BMAP to get maxcontig value - */ - hoff = loff + MAXBSIZE; - if (hoff > round_page(vnp->vnp_size)) - hoff = round_page(vnp->vnp_size); - - *loffset = loff; - *hoffset = hoff; -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("returns [%x-%x]\n", loff, hoff); -#endif -} - /* - * (XXX) * Lets the VM system know about a change in size for a file. * If this vnode is mapped into some address space (i.e. we have a pager * for it) we adjust our own internal size and flush any cached pages in @@ -399,19 +352,14 @@ vnode_pager_setsize(vp, nsize) if (object == NULL) return; -#ifdef DEBUG - if (vpagerdebug & (VDB_FOLLOW|VDB_SIZE)) - printf("vnode_pager_setsize: vp %x obj %x osz %d nsz %d\n", - vp, object, vnp->vnp_size, nsize); -#endif /* * File has shrunk. * Toss any cached pages beyond the new EOF. */ - if (nsize < vnp->vnp_size) { + if (round_page(nsize) < round_page(vnp->vnp_size)) { vm_object_lock(object); vm_object_page_remove(object, - (vm_offset_t)nsize, vnp->vnp_size); + (vm_offset_t)round_page(nsize), round_page(vnp->vnp_size)); vm_object_unlock(object); } vnp->vnp_size = (vm_offset_t)nsize; @@ -425,24 +373,67 @@ vnode_pager_umount(mp) register vm_pager_t pager, npager; struct vnode *vp; - for (pager = vnode_pager_list.tqh_first; pager != NULL; pager = npager){ + pager = vnode_pager_list.tqh_first; + while( pager) { /* * Save the next pointer now since uncaching may * terminate the object and render pager invalid */ - npager = pager->pg_list.tqe_next; vp = ((vn_pager_t)pager->pg_data)->vnp_vp; - if (mp == (struct mount *)0 || vp->v_mount == mp) { - VOP_LOCK(vp); + npager = pager->pg_list.tqe_next; + if (mp == (struct mount *)0 || vp->v_mount == mp) (void) vnode_pager_uncache(vp); - VOP_UNLOCK(vp); - } + pager = npager; } } /* * Remove vnode associated object from the object cache. * + * Note: this routine may be invoked as a result of a pager put + * operation (possibly at object termination time), so we must be careful. + */ +boolean_t +vnode_pager_uncache(vp) + register struct vnode *vp; +{ + register vm_object_t object; + boolean_t uncached, locked; + vm_pager_t pager; + + /* + * Not a mapped vnode + */ + pager = (vm_pager_t)vp->v_vmdata; + if (pager == NULL) + return (TRUE); + /* + * Unlock the vnode if it is currently locked. + * We do this since uncaching the object may result + * in its destruction which may initiate paging + * activity which may necessitate locking the vnode. + */ + locked = VOP_ISLOCKED(vp); + if (locked) + VOP_UNLOCK(vp); + /* + * Must use vm_object_lookup() as it actually removes + * the object from the cache list. + */ + object = vm_object_lookup(pager); + if (object) { + uncached = (object->ref_count <= 1); + pager_cache(object, FALSE); + } else + uncached = TRUE; + if (locked) + VOP_LOCK(vp); + return(uncached); +} +#if 0 +/* + * Remove vnode associated object from the object cache. + * * XXX unlock the vnode if it is currently locked. * We must do this since uncaching the object may result in its * destruction which may initiate paging activity which may necessitate @@ -462,14 +453,6 @@ vnode_pager_uncache(vp) pager = (vm_pager_t)vp->v_vmdata; if (pager == NULL) return (TRUE); -#ifdef DEBUG - if (!VOP_ISLOCKED(vp)) { - extern int (**nfsv2_vnodeop_p)(); - - if (vp->v_op != nfsv2_vnodeop_p) - panic("vnode_pager_uncache: vnode not locked!"); - } -#endif /* * Must use vm_object_lookup() as it actually removes * the object from the cache list. @@ -484,97 +467,958 @@ vnode_pager_uncache(vp) uncached = TRUE; return(uncached); } +#endif -static int -vnode_pager_io(vnp, mlist, npages, sync, rw) - register vn_pager_t vnp; - vm_page_t *mlist; - int npages; - boolean_t sync; - enum uio_rw rw; + +void +vnode_pager_freepage(m) + vm_page_t m; +{ + PAGE_WAKEUP(m); + vm_page_free(m); +} + +/* + * calculate the linear (byte) disk address of specified virtual + * file address + */ +vm_offset_t +vnode_pager_addr(vp, address) + struct vnode *vp; + vm_offset_t address; +{ + int rtaddress; + int bsize; + vm_offset_t block; + struct vnode *rtvp; + int err; + int vblock, voffset; + int run; + + bsize = vp->v_mount->mnt_stat.f_iosize; + vblock = address / bsize; + voffset = address % bsize; + + err = VOP_BMAP(vp,vblock,&rtvp,&block,0); + + if( err) + rtaddress = -1; + else + rtaddress = block * DEV_BSIZE + voffset; + + return rtaddress; +} + +/* + * interrupt routine for I/O completion + */ +void +vnode_pager_iodone(bp) + struct buf *bp; { + bp->b_flags |= B_DONE; + wakeup((caddr_t)bp); +} + +/* + * small block file system vnode pager input + */ +int +vnode_pager_input_smlfs(vnp, m) + vn_pager_t vnp; + vm_page_t m; +{ + int i; + int s; + vm_offset_t paging_offset; + struct vnode *dp, *vp; + struct buf *bp; + vm_offset_t mapsize; + vm_offset_t foff; + vm_offset_t kva; + int fileaddr; + int block; + vm_offset_t bsize; + int error = 0; + int run; + + paging_offset = m->object->paging_offset; + vp = vnp->vnp_vp; + bsize = vp->v_mount->mnt_stat.f_iosize; + foff = m->offset + paging_offset; + + VOP_BMAP(vp, foff, &dp, 0, 0); + + kva = vm_pager_map_page(m); + + for(i=0;i<PAGE_SIZE/bsize;i++) { + /* + * calculate logical block and offset + */ + block = foff / bsize + i; + s = splbio(); + while (bp = incore(vp, block)) { + int amount; + + /* + * wait until the buffer is avail or gone + */ + if (bp->b_flags & B_BUSY) { + bp->b_flags |= B_WANTED; + tsleep ((caddr_t)bp, PVM, "vnwblk", 0); + continue; + } + + amount = bsize; + if ((foff + bsize) > vnp->vnp_size) + amount = vnp->vnp_size - foff; + + /* + * make sure that this page is in the buffer + */ + if ((amount > 0) && amount <= bp->b_bcount) { + bp->b_flags |= B_BUSY; + splx(s); + + /* + * copy the data from the buffer + */ + bcopy(bp->b_un.b_addr, (caddr_t)kva + i * bsize, amount); + if (amount < bsize) { + bzero((caddr_t)kva + amount, bsize - amount); + } + bp->b_flags &= ~B_BUSY; + wakeup((caddr_t)bp); + goto nextblock; + } + break; + } + splx(s); + fileaddr = vnode_pager_addr(vp, foff + i * bsize); + if( fileaddr != -1) { + bp = getpbuf(); + VHOLD(vp); + + /* build a minimal buffer header */ + bp->b_flags = B_BUSY | B_READ | B_CALL; + bp->b_iodone = vnode_pager_iodone; + bp->b_proc = curproc; + bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; + if( bp->b_rcred != NOCRED) + crhold(bp->b_rcred); + if( bp->b_wcred != NOCRED) + crhold(bp->b_wcred); + bp->b_un.b_addr = (caddr_t) kva + i * bsize; + bp->b_blkno = fileaddr / DEV_BSIZE; + bgetvp(dp, bp); + bp->b_bcount = bsize; + bp->b_bufsize = bsize; + + /* do the input */ + VOP_STRATEGY(bp); + + /* we definitely need to be at splbio here */ + + s = splbio(); + while ((bp->b_flags & B_DONE) == 0) { + tsleep((caddr_t)bp, PVM, "vnsrd", 0); + } + splx(s); + if ((bp->b_flags & B_ERROR) != 0) + error = EIO; + + /* + * free the buffer header back to the swap buffer pool + */ + relpbuf(bp); + HOLDRELE(vp); + if( error) + break; + } else { + bzero((caddr_t) kva + i * bsize, bsize); + } +nextblock: + } + vm_pager_unmap_page(kva); + if( error) { + return VM_PAGER_FAIL; + } + pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + m->flags |= PG_CLEAN; + m->flags &= ~PG_LAUNDRY; + return VM_PAGER_OK; + +} + + +/* + * old style vnode pager output routine + */ +int +vnode_pager_input_old(vnp, m) + vn_pager_t vnp; + vm_page_t m; +{ + int i; struct uio auio; struct iovec aiov; + int error; + int size; + vm_offset_t foff; + vm_offset_t kva; + + error = 0; + foff = m->offset + m->object->paging_offset; + /* + * Return failure if beyond current EOF + */ + if (foff >= vnp->vnp_size) { + return VM_PAGER_BAD; + } else { + size = PAGE_SIZE; + if (foff + size > vnp->vnp_size) + size = vnp->vnp_size - foff; +/* + * Allocate a kernel virtual address and initialize so that + * we can use VOP_READ/WRITE routines. + */ + kva = vm_pager_map_page(m); + aiov.iov_base = (caddr_t)kva; + aiov.iov_len = size; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = foff; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_READ; + auio.uio_resid = size; + auio.uio_procp = (struct proc *)0; + + error = VOP_READ(vnp->vnp_vp, &auio, 0, curproc->p_ucred); + if (!error) { + register int count = size - auio.uio_resid; + + if (count == 0) + error = EINVAL; + else if (count != PAGE_SIZE) + bzero((caddr_t)kva + count, PAGE_SIZE - count); + } + vm_pager_unmap_page(kva); + } + pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + m->flags |= PG_CLEAN; + m->flags &= ~PG_LAUNDRY; + return error?VM_PAGER_FAIL:VM_PAGER_OK; +} + +/* + * generic vnode pager input routine + */ +int +vnode_pager_input(vnp, m, count, reqpage) + register vn_pager_t vnp; + vm_page_t *m; + int count, reqpage; +{ + int i,j; vm_offset_t kva, foff; - int error, size; + int size; struct proc *p = curproc; /* XXX */ + vm_object_t object; + vm_offset_t paging_offset; + struct vnode *dp, *vp; + vm_offset_t mapsize; + int bsize; + + int first, last; + int reqaddr, firstaddr; + int run; + int block, offset; + + int nbp; + struct buf *bp; + int s; + int failflag; + + int errtype=0; /* 0 is file type otherwise vm type */ + int error = 0; + + object = m[reqpage]->object; /* all vm_page_t items are in same object */ + paging_offset = object->paging_offset; + + vp = vnp->vnp_vp; + bsize = vp->v_mount->mnt_stat.f_iosize; + + /* get the UNDERLYING device for the file with VOP_BMAP() */ + /* + * originally, we did not check for an error return + * value -- assuming an fs always has a bmap entry point + * -- that assumption is wrong!!! + */ + kva = 0; + mapsize = 0; + foff = m[reqpage]->offset + paging_offset; + if (!VOP_BMAP(vp, foff, &dp, 0, 0)) { + /* + * we do not block for a kva, notice we default to a kva + * conservative behavior + */ + kva = kmem_alloc_pageable(pager_map, (mapsize = count*PAGE_SIZE)); + if( !kva) { + for (i = 0; i < count; i++) { + if (i != reqpage) { + vnode_pager_freepage(m[i]); + } + } + m[0] = m[reqpage]; + kva = kmem_alloc_wait(pager_map, mapsize = PAGE_SIZE); + reqpage = 0; + count = 1; + } + } - /* XXX */ - vm_page_t m; - if (npages != 1) - panic("vnode_pager_io: cannot handle multiple pages"); - m = *mlist; - /* XXX */ - -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_io(%x, %x, %c): vnode %x\n", - vnp, m, rw == UIO_READ ? 'R' : 'W', vnp->vnp_vp); -#endif - foff = m->offset + m->object->paging_offset; /* - * Allocate a kernel virtual address and initialize so that - * we can use VOP_READ/WRITE routines. + * if we can't get a kva or we can't bmap, use old VOP code */ - kva = vm_pager_map_pages(mlist, npages, sync); - if (kva == NULL) - return(VM_PAGER_AGAIN); + if (!kva) { + for (i = 0; i < count; i++) { + if (i != reqpage) { + vnode_pager_freepage(m[i]); + } + } + return vnode_pager_input_old(vnp, m[reqpage]); /* - * After all of the potentially blocking operations have been - * performed, we can do the size checks: - * read beyond EOF (returns error) - * short read + * if the blocksize is smaller than a page size, then use + * special small filesystem code. NFS sometimes has a small + * blocksize, but it can handle large reads itself. */ - VOP_LOCK(vnp->vnp_vp); - if (foff >= vnp->vnp_size) { - VOP_UNLOCK(vnp->vnp_vp); - vm_pager_unmap_pages(kva, npages); -#ifdef DEBUG - if (vpagerdebug & VDB_SIZE) - printf("vnode_pager_io: vp %x, off %d size %d\n", - vnp->vnp_vp, foff, vnp->vnp_size); -#endif - return(VM_PAGER_BAD); + } else if( (PAGE_SIZE / bsize) > 1 && + (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { + + kmem_free_wakeup(pager_map, kva, mapsize); + + for (i = 0; i < count; i++) { + if (i != reqpage) { + vnode_pager_freepage(m[i]); + } + } + return vnode_pager_input_smlfs(vnp, m[reqpage]); + } + +/* + * here on direct device I/O + */ + + + /* + * This pathetic hack gets data from the buffer cache, if it's there. + * I believe that this is not really necessary, and the ends can + * be gotten by defaulting to the normal vfs read behavior, but this + * might be more efficient, because the will NOT invoke read-aheads + * and one of the purposes of this code is to bypass the buffer + * cache and keep from flushing it by reading in a program. + */ + /* + * calculate logical block and offset + */ + block = foff / bsize; + offset = foff % bsize; + s = splbio(); + + /* + * if we have a buffer in core, then try to use it + */ + while (bp = incore(vp, block)) { + int amount; + + /* + * wait until the buffer is avail or gone + */ + if (bp->b_flags & B_BUSY) { + bp->b_flags |= B_WANTED; + tsleep ((caddr_t)bp, PVM, "vnwblk", 0); + continue; + } + + amount = PAGE_SIZE; + if ((foff + amount) > vnp->vnp_size) + amount = vnp->vnp_size - foff; + + /* + * make sure that this page is in the buffer + */ + if ((amount > 0) && (offset + amount) <= bp->b_bcount) { + bp->b_flags |= B_BUSY; + splx(s); + + /* + * map the requested page + */ + pmap_kenter(kva, VM_PAGE_TO_PHYS(m[reqpage])); + pmap_update(); + + /* + * copy the data from the buffer + */ + bcopy(bp->b_un.b_addr + offset, (caddr_t)kva, amount); + if (amount < PAGE_SIZE) { + bzero((caddr_t)kva + amount, PAGE_SIZE - amount); + } + /* + * unmap the page and free the kva + */ + pmap_remove(vm_map_pmap(pager_map), kva, kva + PAGE_SIZE); + kmem_free_wakeup(pager_map, kva, mapsize); + /* + * release the buffer back to the block subsystem + */ + bp->b_flags &= ~B_BUSY; + wakeup((caddr_t)bp); + /* + * we did not have to do any work to get the requested + * page, the read behind/ahead does not justify a read + */ + for (i = 0; i < count; i++) { + if (i != reqpage) { + vnode_pager_freepage(m[i]); + } + } + count = 1; + reqpage = 0; + m[0] = m[reqpage]; + + /* + * sorry for the goto + */ + goto finishup; + } + /* + * buffer is nowhere to be found, read from the disk + */ + break; + } + splx(s); + + reqaddr = vnode_pager_addr(vp, foff); + s = splbio(); + /* + * Make sure that our I/O request is contiguous. + * Scan backward and stop for the first discontiguous + * entry or stop for a page being in buffer cache. + */ + failflag = 0; + first = reqpage; + for (i = reqpage - 1; i >= 0; --i) { + if (failflag || + incore(vp, (foff + (i - reqpage) * PAGE_SIZE) / bsize) || + (vnode_pager_addr(vp, m[i]->offset + paging_offset)) + != reqaddr + (i - reqpage) * PAGE_SIZE) { + vnode_pager_freepage(m[i]); + failflag = 1; + } else { + first = i; + } + } + + /* + * Scan forward and stop for the first non-contiguous + * entry or stop for a page being in buffer cache. + */ + failflag = 0; + last = reqpage + 1; + for (i = reqpage + 1; i < count; i++) { + if (failflag || + incore(vp, (foff + (i - reqpage) * PAGE_SIZE) / bsize) || + (vnode_pager_addr(vp, m[i]->offset + paging_offset)) + != reqaddr + (i - reqpage) * PAGE_SIZE) { + vnode_pager_freepage(m[i]); + failflag = 1; + } else { + last = i + 1; + } + } + splx(s); + + /* + * the first and last page have been calculated now, move input + * pages to be zero based... + */ + count = last; + if (first != 0) { + for (i = first; i < count; i++) { + m[i - first] = m[i]; + } + count -= first; + reqpage -= first; } - if (foff + PAGE_SIZE > vnp->vnp_size) + + /* + * calculate the file virtual address for the transfer + */ + foff = m[0]->offset + paging_offset; + /* + * and get the disk physical address (in bytes) + */ + firstaddr = vnode_pager_addr(vp, foff); + + /* + * calculate the size of the transfer + */ + size = count * PAGE_SIZE; + if ((foff + size) > vnp->vnp_size) size = vnp->vnp_size - foff; - else + + /* + * round up physical size for real devices + */ + if( dp->v_type == VBLK || dp->v_type == VCHR) + size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); + + /* + * and map the pages to be read into the kva + */ + for (i = 0; i < count; i++) + pmap_kenter( kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i])); + + pmap_update(); + bp = getpbuf(); + VHOLD(vp); + + /* build a minimal buffer header */ + bp->b_flags = B_BUSY | B_READ | B_CALL; + bp->b_iodone = vnode_pager_iodone; + /* B_PHYS is not set, but it is nice to fill this in */ + bp->b_proc = curproc; + bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; + if( bp->b_rcred != NOCRED) + crhold(bp->b_rcred); + if( bp->b_wcred != NOCRED) + crhold(bp->b_wcred); + bp->b_un.b_addr = (caddr_t) kva; + bp->b_blkno = firstaddr / DEV_BSIZE; + bgetvp(dp, bp); + bp->b_bcount = size; + bp->b_bufsize = size; + + /* do the input */ + VOP_STRATEGY(bp); + + s = splbio(); + /* we definitely need to be at splbio here */ + + while ((bp->b_flags & B_DONE) == 0) { + tsleep((caddr_t)bp, PVM, "vnread", 0); + } + splx(s); + if ((bp->b_flags & B_ERROR) != 0) + error = EIO; + + if (!error) { + if (size != count * PAGE_SIZE) + bzero((caddr_t)kva + size, PAGE_SIZE * count - size); + } + + pmap_remove(vm_map_pmap(pager_map), kva, kva + PAGE_SIZE * count); + kmem_free_wakeup(pager_map, kva, mapsize); + + /* + * free the buffer header back to the swap buffer pool + */ + relpbuf(bp); + HOLDRELE(vp); + +finishup: + for (i = 0; i < count; i++) { + pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); + m[i]->flags |= PG_CLEAN; + m[i]->flags &= ~PG_LAUNDRY; + if (i != reqpage) { + /* + * whether or not to leave the page activated + * is up in the air, but we should put the page + * on a page queue somewhere. (it already is in + * the object). + * Result: It appears that emperical results show + * that deactivating pages is best. + */ + /* + * just in case someone was asking for this + * page we now tell them that it is ok to use + */ + if (!error) { + vm_page_deactivate(m[i]); + PAGE_WAKEUP(m[i]); + m[i]->flags &= ~PG_FAKE; + m[i]->act_count = 2; + } else { + vnode_pager_freepage(m[i]); + } + } + } + if (error) { + printf("vnode pager read error: %d\n", error); + } + if (errtype) + return error; + return (error ? VM_PAGER_FAIL : VM_PAGER_OK); +} + +/* + * old-style vnode pager output routine + */ +int +vnode_pager_output_old(vnp, m) + register vn_pager_t vnp; + vm_page_t m; +{ + vm_offset_t foff; + vm_offset_t kva; + vm_offset_t size; + struct iovec aiov; + struct uio auio; + struct vnode *vp; + int error; + + vp = vnp->vnp_vp; + foff = m->offset + m->object->paging_offset; + /* + * Return failure if beyond current EOF + */ + if (foff >= vnp->vnp_size) { + return VM_PAGER_BAD; + } else { size = PAGE_SIZE; - aiov.iov_base = (caddr_t)kva; - aiov.iov_len = size; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = foff; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_rw = rw; - auio.uio_resid = size; - auio.uio_procp = (struct proc *)0; -#ifdef DEBUG - if (vpagerdebug & VDB_IO) - printf("vnode_pager_io: vp %x kva %x foff %x size %x", - vnp->vnp_vp, kva, foff, size); -#endif - if (rw == UIO_READ) - error = VOP_READ(vnp->vnp_vp, &auio, 0, p->p_ucred); + if (foff + size > vnp->vnp_size) + size = vnp->vnp_size - foff; +/* + * Allocate a kernel virtual address and initialize so that + * we can use VOP_WRITE routines. + */ + kva = vm_pager_map_page(m); + aiov.iov_base = (caddr_t)kva; + aiov.iov_len = size; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = foff; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_WRITE; + auio.uio_resid = size; + auio.uio_procp = (struct proc *)0; + + error = VOP_WRITE(vp, &auio, 0, curproc->p_ucred); + + if (!error) { + if ((size - auio.uio_resid) == 0) { + error = EINVAL; + } + } + vm_pager_unmap_page(kva); + return error?VM_PAGER_FAIL:VM_PAGER_OK; + } +} + +/* + * vnode pager output on a small-block file system + */ +int +vnode_pager_output_smlfs(vnp, m) + vn_pager_t vnp; + vm_page_t m; +{ + int i; + int s; + vm_offset_t paging_offset; + struct vnode *dp, *vp; + struct buf *bp; + vm_offset_t mapsize; + vm_offset_t foff; + vm_offset_t kva; + int fileaddr; + int block; + vm_offset_t bsize; + int run; + int error = 0; + + paging_offset = m->object->paging_offset; + vp = vnp->vnp_vp; + bsize = vp->v_mount->mnt_stat.f_iosize; + foff = m->offset + paging_offset; + + VOP_BMAP(vp, foff, &dp, 0, 0); + kva = vm_pager_map_page(m); + for(i = 0; !error && i < (PAGE_SIZE/bsize); i++) { + /* + * calculate logical block and offset + */ + fileaddr = vnode_pager_addr(vp, foff + i * bsize); + if( fileaddr != -1) { + s = splbio(); + if( bp = incore( vp, (foff/bsize) + i)) { + bp = getblk(vp, (foff/bsize) + i, bp->b_bufsize,0, 0); + bp->b_flags |= B_INVAL; + brelse(bp); + } + splx(s); + + bp = getpbuf(); + VHOLD(vp); + + /* build a minimal buffer header */ + bp->b_flags = B_BUSY | B_CALL | B_WRITE; + bp->b_iodone = vnode_pager_iodone; + bp->b_proc = curproc; + bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; + if( bp->b_rcred != NOCRED) + crhold(bp->b_rcred); + if( bp->b_wcred != NOCRED) + crhold(bp->b_wcred); + bp->b_un.b_addr = (caddr_t) kva + i * bsize; + bp->b_blkno = fileaddr / DEV_BSIZE; + bgetvp(dp, bp); + ++dp->v_numoutput; + /* for NFS */ + bp->b_dirtyoff = 0; + bp->b_dirtyend = bsize; + bp->b_bcount = bsize; + bp->b_bufsize = bsize; + + /* do the input */ + VOP_STRATEGY(bp); + + /* we definitely need to be at splbio here */ + + s = splbio(); + while ((bp->b_flags & B_DONE) == 0) { + tsleep((caddr_t)bp, PVM, "vnswrt", 0); + } + splx(s); + if ((bp->b_flags & B_ERROR) != 0) + error = EIO; + + /* + * free the buffer header back to the swap buffer pool + */ + relpbuf(bp); + HOLDRELE(vp); + } + } + vm_pager_unmap_page(kva); + if( error) + return VM_PAGER_FAIL; else - error = VOP_WRITE(vnp->vnp_vp, &auio, 0, p->p_ucred); - VOP_UNLOCK(vnp->vnp_vp); -#ifdef DEBUG - if (vpagerdebug & VDB_IO) { - if (error || auio.uio_resid) - printf(" returns error %x, resid %x", - error, auio.uio_resid); - printf("\n"); + return VM_PAGER_OK; +} + +/* + * generic vnode pager output routine + */ +int +vnode_pager_output(vnp, m, count, rtvals) + vn_pager_t vnp; + vm_page_t *m; + int count; + int *rtvals; +{ + int i,j; + vm_offset_t kva, foff; + int size; + struct proc *p = curproc; /* XXX */ + vm_object_t object; + vm_offset_t paging_offset; + struct vnode *dp, *vp; + struct buf *bp; + vm_offset_t mapsize; + vm_offset_t reqaddr; + int run; + int bsize; + int s; + + int error = 0; + +retryoutput: + object = m[0]->object; /* all vm_page_t items are in same object */ + paging_offset = object->paging_offset; + + vp = vnp->vnp_vp; + bsize = vp->v_mount->mnt_stat.f_iosize; + + for(i=0;i<count;i++) + rtvals[i] = VM_PAGER_AGAIN; + + /* + * if the filesystem does not have a bmap, then use the + * old code + */ + if (VOP_BMAP(vp, m[0]->offset+paging_offset, &dp, 0, 0)) { + + rtvals[0] = vnode_pager_output_old(vnp, m[0]); + + pmap_clear_modify(VM_PAGE_TO_PHYS(m[0])); + m[0]->flags |= PG_CLEAN; + m[0]->flags &= ~PG_LAUNDRY; + return rtvals[0]; } -#endif - if (!error) { - register int count = size - auio.uio_resid; - if (count == 0) - error = EINVAL; - else if (count != PAGE_SIZE && rw == UIO_READ) - bzero((void *)(kva + count), PAGE_SIZE - count); + /* + * if the filesystem has a small blocksize, then use + * the small block filesystem output code + */ + if ((bsize < PAGE_SIZE) && + (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { + + for(i=0;i<count;i++) { + rtvals[i] = vnode_pager_output_smlfs(vnp, m[i]); + if( rtvals[i] == VM_PAGER_OK) { + pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); + m[i]->flags |= PG_CLEAN; + m[i]->flags &= ~PG_LAUNDRY; + } + } + return rtvals[0]; } - vm_pager_unmap_pages(kva, npages); - return (error ? VM_PAGER_ERROR : VM_PAGER_OK); + + /* + * get some kva for the output + */ + kva = kmem_alloc_pageable(pager_map, (mapsize = count*PAGE_SIZE)); + if( !kva) { + kva = kmem_alloc_pageable(pager_map, (mapsize = PAGE_SIZE)); + count = 1; + if( !kva) + return rtvals[0]; + } + + for(i=0;i<count;i++) { + foff = m[i]->offset + paging_offset; + if (foff >= vnp->vnp_size) { + for(j=i;j<count;j++) + rtvals[j] = VM_PAGER_BAD; + count = i; + break; + } + } + if (count == 0) { + return rtvals[0]; + } + foff = m[0]->offset + paging_offset; + reqaddr = vnode_pager_addr(vp, foff); + /* + * Scan forward and stop for the first non-contiguous + * entry or stop for a page being in buffer cache. + */ + for (i = 1; i < count; i++) { + if ( vnode_pager_addr(vp, m[i]->offset + paging_offset) + != reqaddr + i * PAGE_SIZE) { + count = i; + break; + } + } + + /* + * calculate the size of the transfer + */ + size = count * PAGE_SIZE; + if ((foff + size) > vnp->vnp_size) + size = vnp->vnp_size - foff; + + /* + * round up physical size for real devices + */ + if( dp->v_type == VBLK || dp->v_type == VCHR) + size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); + + /* + * and map the pages to be read into the kva + */ + for (i = 0; i < count; i++) + pmap_kenter( kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i])); + pmap_update(); +/* + printf("vnode: writing foff: %d, devoff: %d, size: %d\n", + foff, reqaddr, size); +*/ + /* + * next invalidate the incore vfs_bio data + */ + for (i = 0; i < count; i++) { + int filblock = (foff + i * PAGE_SIZE) / bsize; + struct buf *fbp; + + s = splbio(); + if( fbp = incore( vp, filblock)) { + /* printf("invalidating: %d\n", filblock); */ + fbp = getblk(vp, filblock, fbp->b_bufsize,0,0); + fbp->b_flags |= B_INVAL; + brelse(fbp); + } + splx(s); + } + + + bp = getpbuf(); + VHOLD(vp); + /* build a minimal buffer header */ + bp->b_flags = B_BUSY | B_WRITE | B_CALL; + bp->b_iodone = vnode_pager_iodone; + /* B_PHYS is not set, but it is nice to fill this in */ + bp->b_proc = curproc; + bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; + + if( bp->b_rcred != NOCRED) + crhold(bp->b_rcred); + if( bp->b_wcred != NOCRED) + crhold(bp->b_wcred); + bp->b_un.b_addr = (caddr_t) kva; + bp->b_blkno = reqaddr / DEV_BSIZE; + bgetvp(dp, bp); + ++dp->v_numoutput; + + /* for NFS */ + bp->b_dirtyoff = 0; + bp->b_dirtyend = size; + + bp->b_bcount = size; + bp->b_bufsize = size; + + /* do the output */ + VOP_STRATEGY(bp); + + s = splbio(); + + /* we definitely need to be at splbio here */ + + while ((bp->b_flags & B_DONE) == 0) { + tsleep((caddr_t)bp, PVM, "vnwrite", 0); + } + splx(s); + + if ((bp->b_flags & B_ERROR) != 0) + error = EIO; + + pmap_remove(vm_map_pmap(pager_map), kva, kva + PAGE_SIZE * count); + kmem_free_wakeup(pager_map, kva, mapsize); + + /* + * free the buffer header back to the swap buffer pool + */ + relpbuf(bp); + HOLDRELE(vp); + + if( !error) { + for(i=0;i<count;i++) { + pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); + m[i]->flags |= PG_CLEAN; + m[i]->flags &= ~PG_LAUNDRY; + rtvals[i] = VM_PAGER_OK; + } + } else if( count != 1) { + error = 0; + count = 1; + goto retryoutput; + } + + if (error) { + printf("vnode pager write error: %d\n", error); + } + return (error ? VM_PAGER_FAIL : VM_PAGER_OK); } + |