diff options
author | David Greenman <dg@FreeBSD.org> | 1995-01-09 16:06:02 +0000 |
---|---|---|
committer | David Greenman <dg@FreeBSD.org> | 1995-01-09 16:06:02 +0000 |
commit | 0d94caffcad13f94e28da90c9e945749e0659463 (patch) | |
tree | afd2f1c13b93c3fff08b58c44153df0d13479824 /sys | |
parent | bf8af437897959be977ea469d742006e422c74c5 (diff) | |
download | src-0d94caffcad13f94e28da90c9e945749e0659463.tar.gz src-0d94caffcad13f94e28da90c9e945749e0659463.zip |
Notes
Diffstat (limited to 'sys')
90 files changed, 7296 insertions, 6367 deletions
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index e4d7443e7b00..eeba17b1f7b1 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - * $Id: machdep.c,v 1.98 1994/12/11 03:33:58 davidg Exp $ + * $Id: machdep.c,v 1.99 1995/01/05 19:51:14 se Exp $ */ #include "npx.h" @@ -114,17 +114,12 @@ char cpu_model[sizeof("Cy486DLC") + 1]; /* * Declare these as initialized data so we can patch them. */ -int nswbuf = 0; +int nswbuf = 128; #ifdef NBUF int nbuf = NBUF; #else int nbuf = 0; #endif -#ifdef BUFPAGES -int bufpages = BUFPAGES; -#else -int bufpages = 0; -#endif #ifdef BOUNCE_BUFFERS extern char *bouncememory; @@ -170,9 +165,7 @@ cpu_startup() vm_offset_t maxaddr; vm_size_t size = 0; int firstaddr; -#ifdef BOUNCE_BUFFERS vm_offset_t minaddr; -#endif /* BOUNCE_BUFFERS */ if (boothowto & RB_VERBOSE) bootverbose++; @@ -261,33 +254,11 @@ again: valloc(msghdrs, struct msg, msginfo.msgtql); valloc(msqids, struct msqid_ds, msginfo.msgmni); #endif - /* - * Determine how many buffers to allocate. - * Use 20% of memory of memory beyond the first 2MB - * Insure a minimum of 16 fs buffers. - * We allocate 1/2 as many swap buffer headers as file i/o buffers. - */ - if (bufpages == 0) - bufpages = ((physmem << PGSHIFT) - 2048*1024) / NBPG / 6; - if (bufpages < 64) - bufpages = 64; - /* - * We must still limit the maximum number of buffers to be no - * more than 750 because we'll run out of kernel VM otherwise. - */ - bufpages = min(bufpages, 1500); - if (nbuf == 0) { - nbuf = bufpages / 2; - if (nbuf < 32) - nbuf = 32; - } - freebufspace = bufpages * NBPG; - if (nswbuf == 0) { - nswbuf = (nbuf / 2) &~ 1; /* force even */ - if (nswbuf > 64) - nswbuf = 64; /* sanity */ - } + if (nbuf == 0) + nbuf = min(physmem / 30, 256); + nswbuf = nbuf; + valloc(swbuf, struct buf, nswbuf); valloc(buf, struct buf, nbuf); @@ -296,8 +267,10 @@ again: * If there is more than 16MB of memory, allocate some bounce buffers */ if (Maxmem > 4096) { - if (bouncepages == 0) - bouncepages = 96; /* largest physio size + extra */ + if (bouncepages == 0) { + bouncepages = 64; + bouncepages += ((Maxmem - 4096) / 2048) * 32; + } v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)); valloc(bouncememory, char, bouncepages * PAGE_SIZE); } @@ -333,6 +306,10 @@ again: (nbuf*MAXBSIZE), TRUE); pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva, (nswbuf*MAXPHYS) + pager_map_size, TRUE); + exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, + (16*ARG_MAX), TRUE); + u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, + (maxproc*UPAGES*PAGE_SIZE), FALSE); /* * Finally, allocate mbuf pool. Since mclrefcnt is an off-size @@ -353,8 +330,6 @@ again: if (boothowto & RB_CONFIG) userconfig(); printf("avail memory = %d (%d pages)\n", ptoa(cnt.v_free_count), cnt.v_free_count); - printf("using %d buffers containing %d bytes of memory\n", - nbuf, bufpages * CLBYTES); #ifdef BOUNCE_BUFFERS /* @@ -744,9 +719,11 @@ boot(arghowto) for (iter = 0; iter < 20; iter++) { nbusy = 0; - for (bp = &buf[nbuf]; --bp >= buf; ) - if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) + for (bp = &buf[nbuf]; --bp >= buf; ) { + if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) { nbusy++; + } + } if (nbusy == 0) break; printf("%d ", nbusy); @@ -1642,4 +1619,3 @@ disk_externalize(int drive, void *userp, size_t *maxlen) *maxlen -= sizeof drive; return copyout(&drive, userp, sizeof drive); } - diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 22cec02726c1..d8f550153471 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -39,7 +39,7 @@ * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 - * $Id: pmap.c,v 1.38 1994/12/18 03:36:27 davidg Exp $ + * $Id: pmap.c,v 1.39 1994/12/18 14:16:22 davidg Exp $ */ /* @@ -142,8 +142,6 @@ int i386pagesperpage; /* PAGE_SIZE / I386_PAGE_SIZE */ boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ vm_offset_t vm_first_phys, vm_last_phys; -static inline boolean_t pmap_testbit(); -static inline void pmap_changebit(); static inline int pmap_is_managed(); static inline void * vm_get_pmap(); static inline void vm_put_pmap(); @@ -152,6 +150,7 @@ static void pmap_alloc_pv_entry(); static inline pv_entry_t get_pv_entry(); static inline void pmap_use_pt(); static inline void pmap_unuse_pt(); +int nkpt; extern vm_offset_t clean_sva, clean_eva; @@ -230,8 +229,7 @@ pmap_extract(pmap, va) } pa = *(int *) avtopte(va); } - pa = (pa & PG_FRAME) | (va & ~PG_FRAME); - return pa; + return ((pa & PG_FRAME) | (va & ~PG_FRAME)); } return 0; @@ -358,6 +356,7 @@ pmap_bootstrap(firstaddr, loadaddr) simple_lock_init(&kernel_pmap->pm_lock); kernel_pmap->pm_count = 1; + nkpt = NKPT; #if BSDVM_COMPAT /* @@ -376,10 +375,8 @@ pmap_bootstrap(firstaddr, loadaddr) virtual_avail = va; #endif /* - * reserve special hunk of memory for use by bus dma as a bounce - * buffer (contiguous virtual *and* physical memory). for now, - * assume vm does not use memory beneath hole, and we know that - * the bootstrap uses top 32k of base memory. -wfj + * Reserve special hunk of memory for use by bus dma as a bounce + * buffer (contiguous virtual *and* physical memory). */ { extern vm_offset_t isaphysmem; @@ -420,8 +417,7 @@ pmap_init(phys_start, phys_end) addr = (vm_offset_t) KERNBASE + IdlePTD; vm_object_reference(kernel_object); (void) vm_map_find(kernel_map, kernel_object, addr, - &addr, (4 + NKPT) * NBPG, FALSE); - + &addr, (4 + NKPDE) * NBPG, FALSE); /* * calculate the number of pv_entries needed @@ -542,7 +538,7 @@ pmap_pinit(pmap) pmap->pm_pdir = (pd_entry_t *) vm_get_pmap(); /* wire in kernel global address entries */ - bcopy(PTD+KPTDI, pmap->pm_pdir+KPTDI, NKPT*PTESIZE); + bcopy(PTD+KPTDI, pmap->pm_pdir+KPTDI, nkpt*PTESIZE); /* install self-referential address mapping entry */ *(int *)(pmap->pm_pdir+PTDPTDI) = @@ -553,6 +549,65 @@ pmap_pinit(pmap) } /* + * grow the number of kernel page table entries, if needed + */ + +vm_page_t nkpg; +vm_offset_t kernel_vm_end; + +void +pmap_growkernel(vm_offset_t addr) { + struct proc *p; + struct pmap *pmap; + int s; + + s = splhigh(); + if (kernel_vm_end == 0) { + kernel_vm_end = KERNBASE; + nkpt = 0; + while(pdir_pde(PTD, kernel_vm_end)) { + kernel_vm_end = (kernel_vm_end + NBPG*NPTEPG) & ~(NBPG*NPTEPG-1); + ++nkpt; + } + } + + addr = (addr + NBPG*NPTEPG) & ~(NBPG*NPTEPG-1); + while( kernel_vm_end < addr) { + if( pdir_pde( PTD, kernel_vm_end)) { + kernel_vm_end = (kernel_vm_end + NBPG*NPTEPG) & ~(NBPG*NPTEPG-1); + continue; + } + + ++nkpt; + if( !nkpg) { + nkpg = vm_page_alloc(kernel_object, 0, TRUE); + vm_page_remove(nkpg); + pmap_zero_page(VM_PAGE_TO_PHYS(nkpg)); + if( !nkpg) + panic("pmap_growkernel: no memory to grow kernel"); + } + pdir_pde( PTD, kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_KW); + nkpg = NULL; + + for (p = (struct proc *)allproc; p != NULL; p = p->p_next) { + if( p->p_vmspace) { + pmap = &p->p_vmspace->vm_pmap; + *pmap_pde( pmap, kernel_vm_end) = pdir_pde( PTD, kernel_vm_end); + } + } + kernel_vm_end = (kernel_vm_end + NBPG*NPTEPG) & ~(NBPG*NPTEPG-1); + } +#if 0 + if( !nkpg) { + nkpg = vm_page_alloc(kernel_object, 0, TRUE); + vm_page_remove(nkpg); + pmap_zero_page(VM_PAGE_TO_PHYS(nkpg)); + } +#endif + splx(s); +} + +/* * Retire the given physical map from service. * Should only be called if the map contains * no valid mappings. @@ -674,7 +729,7 @@ pmap_alloc_pv_entry() /* * allocate a physical page out of the vm system */ - m = vm_page_alloc(kernel_object, pvva-vm_map_min(kernel_map)); + m = vm_page_alloc(kernel_object, pvva-vm_map_min(kernel_map), TRUE); if (m) { int newentries; int i; @@ -738,20 +793,18 @@ static pt_entry_t * get_pt_entry(pmap) pmap_t pmap; { - pt_entry_t *ptp; vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; + /* are we current address space or kernel? */ if (pmap == kernel_pmap || frame == ((int) PTDpde & PG_FRAME)) { - ptp=PTmap; + return PTmap; + } /* otherwise, we are alternate address space */ - } else { - if ( frame != ((int) APTDpde & PG_FRAME)) { - APTDpde = pmap->pm_pdir[PTDPTDI]; - pmap_update(); - } - ptp=APTmap; - } - return ptp; + if ( frame != ((int) APTDpde & PG_FRAME)) { + APTDpde = pmap->pm_pdir[PTDPTDI]; + pmap_update(); + } + return APTmap; } /* @@ -842,17 +895,11 @@ pmap_remove(pmap, sva, eva) *ptq = 0; if (pmap_is_managed(pa)) { - if ((int) oldpte & (PG_M | PG_U)) { + if ((int) oldpte & PG_M) { if ((sva < USRSTACK || sva > UPT_MAX_ADDRESS) || (sva >= USRSTACK && sva < USRSTACK+(UPAGES*NBPG))) { if (sva < clean_sva || sva >= clean_eva) { - m = PHYS_TO_VM_PAGE(pa); - if ((int) oldpte & PG_M) { - m->flags &= ~PG_CLEAN; - } - if ((int) oldpte & PG_U) { - m->flags |= PG_REFERENCED; - } + PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; } } } @@ -938,11 +985,12 @@ pmap_remove(pmap, sva, eva) continue; } - if ((((int) oldpte & PG_M) && (va < USRSTACK || va > UPT_MAX_ADDRESS)) - || (va >= USRSTACK && va < USRSTACK+(UPAGES*NBPG))) { - if (va < clean_sva || va >= clean_eva ) { - m = PHYS_TO_VM_PAGE(pa); - m->flags &= ~PG_CLEAN; + if ((int) oldpte & PG_M) { + if ((va < USRSTACK || va > UPT_MAX_ADDRESS) || + (va >= USRSTACK && va < USRSTACK+(UPAGES*NBPG))) { + if (va < clean_sva || va >= clean_eva) { + PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; + } } } @@ -992,28 +1040,29 @@ pmap_remove_all(pa) while (pv->pv_pmap != NULL) { pmap = pv->pv_pmap; ptp = get_pt_entry(pmap); - va = i386_btop(pv->pv_va); - pte = ptp + va; + va = pv->pv_va; + pte = ptp + i386_btop(va); if (pmap_pte_w(pte)) pmap->pm_stats.wired_count--; - if ( *pte) { + if (*pte) { pmap->pm_stats.resident_count--; anyvalid++; /* - * update the vm_page_t clean bit + * Update the vm_page_t clean and reference bits. */ - if ( (m->flags & PG_CLEAN) && - ((((int) *pte) & PG_M) && (pv->pv_va < USRSTACK || pv->pv_va > UPT_MAX_ADDRESS)) - || (pv->pv_va >= USRSTACK && pv->pv_va < USRSTACK+(UPAGES*NBPG))) { - if (pv->pv_va < clean_sva || pv->pv_va >= clean_eva) { - m->flags &= ~PG_CLEAN; + if ((int) *pte & PG_M) { + if ((va < USRSTACK || va > UPT_MAX_ADDRESS) || + (va >= USRSTACK && va < USRSTACK+(UPAGES*NBPG))) { + if (va < clean_sva || va >= clean_eva) { + PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; + } } } *pte = 0; } - pmap_unuse_pt(pmap, pv->pv_va); + pmap_unuse_pt(pmap, va); npv = pv->pv_next; if (npv) { @@ -1150,13 +1199,15 @@ pmap_enter(pmap, va, pa, prot, wired) va = i386_trunc_page(va); pa = i386_trunc_page(pa); - if (va > VM_MAX_KERNEL_ADDRESS)panic("pmap_enter: toobig"); + if (va > VM_MAX_KERNEL_ADDRESS) + panic("pmap_enter: toobig"); /* * Page Directory table entry not valid, we need a new PT page */ - if ( *pmap_pde(pmap, va) == 0) { - pg("ptdi %x, va %x", pmap->pm_pdir[PTDPTDI], va); + if (*pmap_pde(pmap, va) == 0) { + printf("kernel page directory invalid pdir=0x%x, va=0x%x\n", pmap->pm_pdir[PTDPTDI], va); + panic("invalid kernel page directory"); } pte = pmap_pte(pmap, va); @@ -1315,7 +1366,7 @@ pmap_qremove(va, count) { int i; register pt_entry_t *pte; - for(i=0;i<count;i++) { + for (i=0;i<count;i++) { pte = vtopte(va + i * NBPG); *pte = 0; } @@ -1371,7 +1422,7 @@ pmap_kremove( va) * but is *MUCH* faster than pmap_enter... */ -static inline int +static inline void pmap_enter_quick(pmap, va, pa) register pmap_t pmap; vm_offset_t va; @@ -1380,7 +1431,6 @@ pmap_enter_quick(pmap, va, pa) register pt_entry_t *pte; register pv_entry_t pv, npv; int s; - int anyvalid = 0; /* * Enter on the PV list if part of our managed memory @@ -1389,7 +1439,9 @@ pmap_enter_quick(pmap, va, pa) */ pte = vtopte(va); - if (pmap_pte_pa(pte)) { + + /* a fault on the page table might occur here */ + if (*pte) { pmap_remove(pmap, va, va + PAGE_SIZE); } @@ -1399,8 +1451,8 @@ pmap_enter_quick(pmap, va, pa) * No entries yet, use header as the first entry */ if (pv->pv_pmap == NULL) { - pv->pv_va = va; pv->pv_pmap = pmap; + pv->pv_va = va; pv->pv_next = NULL; } /* @@ -1423,14 +1475,12 @@ pmap_enter_quick(pmap, va, pa) */ pmap->pm_stats.resident_count++; - if (*pte) - anyvalid++; /* * Now validate mapping with desired protection/wiring. */ *pte = (pt_entry_t) ( (int) (pa | PG_V | PG_u)); - return (anyvalid); + return; } /* @@ -1446,12 +1496,10 @@ pmap_object_init_pt(pmap, addr, object, offset, size) vm_offset_t offset; vm_offset_t size; { - vm_offset_t tmpoff; vm_page_t p; - vm_offset_t v; vm_offset_t objbytes; - int anyvalid = 0; + int bits; if (!pmap) return; @@ -1460,7 +1508,7 @@ pmap_object_init_pt(pmap, addr, object, offset, size) * if we are processing a major portion of the object, then * scan the entire thing. */ - if( size > object->size / 2) { + if (size > (object->size >> 1)) { objbytes = size; p = object->memq.tqh_first; while ((p != NULL) && (objbytes != 0)) { @@ -1475,12 +1523,12 @@ pmap_object_init_pt(pmap, addr, object, offset, size) continue; } - if ((p->flags & (PG_BUSY|PG_FICTITIOUS|PG_FAKE)) == 0 ) { + if ((p->bmapped == 0) && + (p->busy == 0) && + ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && + (p->flags & (PG_BUSY|PG_FICTITIOUS|PG_CACHE)) == 0 ) { vm_page_hold(p); - v = i386_trunc_page(((vm_offset_t)vtopte( addr+tmpoff))); - /* a fault might occur here */ - *(volatile char *)v += 0; - anyvalid += pmap_enter_quick(pmap, addr+tmpoff, VM_PAGE_TO_PHYS(p)); + pmap_enter_quick(pmap, addr+tmpoff, VM_PAGE_TO_PHYS(p)); vm_page_unhold(p); } p = p->listq.tqe_next; @@ -1490,23 +1538,20 @@ pmap_object_init_pt(pmap, addr, object, offset, size) /* * else lookup the pages one-by-one. */ - for(tmpoff = 0; tmpoff < size; tmpoff += NBPG) { + for (tmpoff = 0; tmpoff < size; tmpoff += NBPG) { p = vm_page_lookup(object, tmpoff + offset); if (p) { - if( (p->flags & (PG_BUSY|PG_FICTITIOUS|PG_FAKE)) == 0) { + if ((p->bmapped == 0) && + (p->busy == 0) && + ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && + (p->flags & (PG_BUSY|PG_FICTITIOUS|PG_CACHE)) == 0) { vm_page_hold(p); - v = i386_trunc_page(((vm_offset_t)vtopte( addr+tmpoff))); - /* a fault might occur here */ - *(volatile char *)v += 0; - anyvalid += pmap_enter_quick(pmap, addr+tmpoff, VM_PAGE_TO_PHYS(p)); + pmap_enter_quick(pmap, addr+tmpoff, VM_PAGE_TO_PHYS(p)); vm_page_unhold(p); } } } } - - if (anyvalid) - pmap_update(); } /* @@ -1685,7 +1730,7 @@ pmap_page_exists(pmap, pa) * note that the testbit/changebit routines are inline, * and a lot of things compile-time evaluate. */ -static inline boolean_t +boolean_t pmap_testbit(pa, bit) register vm_offset_t pa; int bit; @@ -1749,7 +1794,7 @@ pmap_testbit(pa, bit) /* * this routine is used to modify bits in ptes */ -static inline void +void pmap_changebit(pa, bit, setem) vm_offset_t pa; int bit; @@ -1816,70 +1861,6 @@ pmap_page_protect(phys, prot) } } -/* - * Clear the modify bits on the specified physical page. - */ -void -pmap_clear_modify(pa) - vm_offset_t pa; -{ - pmap_changebit(pa, PG_M, FALSE); -} - -/* - * pmap_clear_reference: - * - * Clear the reference bit on the specified physical page. - */ -void -pmap_clear_reference(pa) - vm_offset_t pa; -{ - pmap_changebit(pa, PG_U, FALSE); -} - -/* - * pmap_is_referenced: - * - * Return whether or not the specified physical page is referenced - * by any physical maps. - */ - -boolean_t -pmap_is_referenced(pa) - vm_offset_t pa; -{ - return(pmap_testbit(pa, PG_U)); -} - -/* - * pmap_is_modified: - * - * Return whether or not the specified physical page is modified - * by any physical maps. - */ - -boolean_t -pmap_is_modified(pa) - vm_offset_t pa; -{ - return(pmap_testbit(pa, PG_M)); -} - -/* - * Routine: pmap_copy_on_write - * Function: - * Remove write privileges from all - * physical maps for this physical page. - */ -void -pmap_copy_on_write(pa) - vm_offset_t pa; -{ - pmap_changebit(pa, PG_RW, FALSE); -} - - vm_offset_t pmap_phys_address(ppn) int ppn; diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index d0ca150eefbc..0b4946f2d36b 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 - * $Id: trap.c,v 1.41 1994/10/30 20:25:21 bde Exp $ + * $Id: trap.c,v 1.42 1994/12/24 07:22:58 bde Exp $ */ /* @@ -421,8 +421,6 @@ trap_pfault(frame, usermode) *(volatile char *)v += 0; ptepg = (vm_page_t) pmap_pte_vm_page(vm_map_pmap(map), v); - if( ptepg->hold_count == 0) - ptepg->act_count += 3; vm_page_hold(ptepg); /* Fault in the user page: */ diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 9663065f8686..bb35a55ef99e 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -38,7 +38,7 @@ * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ - * $Id: vm_machdep.c,v 1.28 1994/09/02 04:12:07 davidg Exp $ + * $Id: vm_machdep.c,v 1.29 1994/10/08 22:19:51 phk Exp $ */ #include "npx.h" @@ -321,6 +321,8 @@ vm_bounce_alloc(bp) pa = pmap_kextract(va); if (pa >= SIXTEENMEG) ++dobounceflag; + if( pa == 0) + panic("vm_bounce_alloc: Unmapped page"); va += NBPG; } if (dobounceflag == 0) @@ -492,11 +494,14 @@ vm_bounce_init() if (!bounceallocarray) panic("Cannot allocate bounce resource array\n"); - bzero(bounceallocarray, bounceallocarraysize * sizeof(unsigned)); bouncepa = malloc(bouncepages * sizeof(vm_offset_t), M_TEMP, M_NOWAIT); if (!bouncepa) panic("Cannot allocate physical memory array\n"); + for(i=0;i<bounceallocarraysize;i++) { + bounceallocarray[i] = 0xffffffff; + } + for(i=0;i<bouncepages;i++) { vm_offset_t pa; if( (pa = pmap_kextract((vm_offset_t) bouncememory + i * NBPG)) >= SIXTEENMEG) @@ -504,6 +509,7 @@ vm_bounce_init() if( pa == 0) panic("bounce memory not resident"); bouncepa[i] = pa; + bounceallocarray[i/(8*sizeof(int))] &= ~(1<<(i%(8*sizeof(int)))); } bouncefree = bouncepages; @@ -603,9 +609,9 @@ cpu_wait(p) struct proc *p; { /* extern vm_map_t upages_map; */ /* drop per-process resources */ - pmap_remove(vm_map_pmap(kernel_map), (vm_offset_t) p->p_addr, + pmap_remove(vm_map_pmap(u_map), (vm_offset_t) p->p_addr, ((vm_offset_t) p->p_addr) + ctob(UPAGES)); - kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES)); + kmem_free(u_map, (vm_offset_t)p->p_addr, ctob(UPAGES)); vmspace_free(p->p_vmspace); } diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index b83628a7e53e..190b8dd80472 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -42,7 +42,7 @@ * * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 - * $Id: pmap.h,v 1.18 1994/11/14 14:12:24 bde Exp $ + * $Id: pmap.h,v 1.19 1994/12/18 03:11:46 davidg Exp $ */ #ifndef _MACHINE_PMAP_H_ @@ -62,7 +62,11 @@ typedef unsigned int *pt_entry_t; * given to the user (NUPDE) */ #ifndef NKPT +#if 0 #define NKPT 26 /* actual number of kernel page tables */ +#else +#define NKPT 9 /* actual number of kernel page tables */ +#endif #endif #ifndef NKPDE #define NKPDE 63 /* addressable number of page tables/pde's */ @@ -126,6 +130,43 @@ pmap_kextract(vm_offset_t va) pa = (pa & PG_FRAME) | (va & ~PG_FRAME); return pa; } + +/* + * pmap_is_referenced: + * + * Return whether or not the specified physical page was referenced + * by any physical maps. + */ +#define pmap_is_referenced(pa) pmap_testbit((pa), PG_U) + +/* + * pmap_is_modified: + * + * Return whether or not the specified physical page was modified + * in any physical maps. + */ +#define pmap_is_modified(pa) pmap_testbit((pa), PG_M) + +/* + * Clear the modify bits on the specified physical page. + */ +#define pmap_clear_modify(pa) pmap_changebit((pa), PG_M, FALSE) + +/* + * pmap_clear_reference: + * + * Clear the reference bit on the specified physical page. + */ +#define pmap_clear_reference(pa) pmap_changebit((pa), PG_U, FALSE) + +/* + * Routine: pmap_copy_on_write + * Function: + * Remove write privileges from all + * physical maps for this physical page. + */ +#define pmap_copy_on_write(pa) pmap_changebit((pa), PG_RW, FALSE) + #endif /* @@ -196,6 +237,7 @@ pv_entry_t pv_table; /* array of entries, one per page */ struct pcb; void pmap_activate __P((pmap_t, struct pcb *)); +void pmap_changebit __P((vm_offset_t, int, boolean_t)); pmap_t pmap_kernel __P((void)); boolean_t pmap_page_exists __P((pmap_t, vm_offset_t)); pt_entry_t *pmap_pte(pmap_t, vm_offset_t); diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h index 482ee5b60874..b38e914dd7c9 100644 --- a/sys/amd64/include/vmparam.h +++ b/sys/amd64/include/vmparam.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * from: @(#)vmparam.h 5.9 (Berkeley) 5/12/91 - * $Id: vmparam.h,v 1.15 1994/09/12 11:38:20 davidg Exp $ + * $Id: vmparam.h,v 1.16 1994/09/23 07:00:12 davidg Exp $ */ @@ -124,11 +124,11 @@ #define UPDT VM_MIN_KERNEL_ADDRESS #define KPT_MIN_ADDRESS ((vm_offset_t)KERNBASE - NBPG*(NKPDE+1)) #define KPT_MAX_ADDRESS ((vm_offset_t)KERNBASE - NBPG) -#define VM_MAX_KERNEL_ADDRESS ((vm_offset_t)KERNBASE + NKPT*NBPG*NPTEPG) +#define VM_MAX_KERNEL_ADDRESS ((vm_offset_t)KERNBASE + NKPDE*NBPG*NPTEPG) /* virtual sizes (bytes) for various kernel submaps */ #define VM_MBUF_SIZE (NMBCLUSTERS*MCLBYTES) -#define VM_KMEM_SIZE (16 * 1024 * 1024) +#define VM_KMEM_SIZE (32 * 1024 * 1024) #define VM_PHYS_SIZE (USRIOSIZE*CLBYTES) #endif /* _MACHINE_VMPARAM_H_ */ diff --git a/sys/conf/param.c b/sys/conf/param.c index f714cab0a653..bc6ccb35278f 100644 --- a/sys/conf/param.c +++ b/sys/conf/param.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)param.c 8.2 (Berkeley) 1/21/94 - * $Id: param.c,v 1.3 1994/08/02 07:38:30 davidg Exp $ + * $Id: param.c,v 1.4 1994/09/13 14:46:24 dfr Exp $ */ #include <sys/param.h> @@ -81,14 +81,9 @@ int tick = 1000000 / HZ; int tickadj = 30000 / (60 * HZ); /* can adjust 30ms in 60s */ struct timezone tz = { TIMEZONE, DST }; #define NPROC (20 + 16 * MAXUSERS) -int maxproc = NPROC; -#define NTEXT NPROC -int vm_cache_max = NTEXT/2 + 16; -#define NVNODE (NPROC + NTEXT + 100) -int desiredvnodes = NVNODE; -int maxfiles = 3 * (NPROC + MAXUSERS) + 80; -int ncallout = 16 + NPROC; -int nclist = 60 + 12 * MAXUSERS; +int maxproc = NPROC; /* maximum # of processes */ +int maxfiles = 256; /* open files per process limit */ +int ncallout = 16 + NPROC; /* maximum # of timer events */ int nmbclusters = NMBCLUSTERS; int fscale = FSCALE; /* kernel uses `FSCALE', user uses `fscale' */ diff --git a/sys/fs/msdosfs/msdosfs_denode.c b/sys/fs/msdosfs/msdosfs_denode.c index c20b7e5f1081..b811c111ddc3 100644 --- a/sys/fs/msdosfs/msdosfs_denode.c +++ b/sys/fs/msdosfs/msdosfs_denode.c @@ -1,4 +1,4 @@ -/* $Id: msdosfs_denode.c,v 1.5 1994/12/12 12:35:43 bde Exp $ */ +/* $Id: msdosfs_denode.c,v 1.6 1994/12/27 12:37:35 bde Exp $ */ /* $NetBSD: msdosfs_denode.c,v 1.9 1994/08/21 18:44:00 ws Exp $ */ /*- @@ -477,7 +477,7 @@ detrunc(dep, length, flags, cred, p) #endif return error; } - vnode_pager_uncache(DETOV(dep)); /* what's this for? */ + /* vnode_pager_uncache(DETOV(dep)); /* what's this for? */ /* * is this the right place for it? */ diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c index 1948062c64bd..b309fcf97e50 100644 --- a/sys/fs/msdosfs/msdosfs_vnops.c +++ b/sys/fs/msdosfs/msdosfs_vnops.c @@ -1,4 +1,4 @@ -/* $Id: msdosfs_vnops.c,v 1.10 1994/12/12 12:35:50 bde Exp $ */ +/* $Id: msdosfs_vnops.c,v 1.11 1994/12/27 12:37:36 bde Exp $ */ /* $NetBSD: msdosfs_vnops.c,v 1.20 1994/08/21 18:44:13 ws Exp $ */ /*- @@ -704,7 +704,6 @@ msdosfs_write(ap) dep->de_FileSize = uio->uio_offset + n; vnode_pager_setsize(vp, dep->de_FileSize); /* why? */ } - (void) vnode_pager_uncache(vp); /* why not? */ /* * Should these vnode_pager_* functions be done on dir * files? @@ -725,7 +724,6 @@ msdosfs_write(ap) if (ioflag & IO_SYNC) (void) bwrite(bp); else if (n + croffset == pmp->pm_bpcluster) { - bp->b_flags |= B_AGE; bawrite(bp); } else bdwrite(bp); diff --git a/sys/gnu/ext2fs/ext2_bmap.c b/sys/gnu/ext2fs/ext2_bmap.c index f3009bd30584..108a5aa58f2e 100644 --- a/sys/gnu/ext2fs/ext2_bmap.c +++ b/sys/gnu/ext2fs/ext2_bmap.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)ufs_bmap.c 8.6 (Berkeley) 1/21/94 - * $Id: ufs_bmap.c,v 1.3 1994/08/02 07:54:52 davidg Exp $ + * $Id: ufs_bmap.c,v 1.4 1994/10/08 06:57:21 phk Exp $ */ #include <sys/param.h> @@ -128,12 +128,12 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp) if (runp) { /* * XXX - * If MAXBSIZE is the largest transfer the disks can handle, + * If MAXPHYS is the largest transfer the disks can handle, * we probably want maxrun to be 1 block less so that we * don't create a block larger than the device can handle. */ *runp = 0; - maxrun = MAXBSIZE / mp->mnt_stat.f_iosize - 1; + maxrun = MAXPHYS / mp->mnt_stat.f_iosize - 1; } xap = ap == NULL ? a : ap; @@ -179,7 +179,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp) xap->in_exists = 1; bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0); - if (bp->b_flags & (B_DONE | B_DELWRI)) { + if (bp->b_flags & B_CACHE) { trace(TR_BREADHIT, pack(vp, size), metalbn); } #ifdef DIAGNOSTIC @@ -190,6 +190,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp) trace(TR_BREADMISS, pack(vp, size), metalbn); bp->b_blkno = blkptrtodb(ump, daddr); bp->b_flags |= B_READ; + vfs_busy_pages(bp, 0); VOP_STRATEGY(bp); curproc->p_stats->p_ru.ru_inblock++; /* XXX */ error = biowait(bp); diff --git a/sys/gnu/fs/ext2fs/ext2_bmap.c b/sys/gnu/fs/ext2fs/ext2_bmap.c index f3009bd30584..108a5aa58f2e 100644 --- a/sys/gnu/fs/ext2fs/ext2_bmap.c +++ b/sys/gnu/fs/ext2fs/ext2_bmap.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)ufs_bmap.c 8.6 (Berkeley) 1/21/94 - * $Id: ufs_bmap.c,v 1.3 1994/08/02 07:54:52 davidg Exp $ + * $Id: ufs_bmap.c,v 1.4 1994/10/08 06:57:21 phk Exp $ */ #include <sys/param.h> @@ -128,12 +128,12 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp) if (runp) { /* * XXX - * If MAXBSIZE is the largest transfer the disks can handle, + * If MAXPHYS is the largest transfer the disks can handle, * we probably want maxrun to be 1 block less so that we * don't create a block larger than the device can handle. */ *runp = 0; - maxrun = MAXBSIZE / mp->mnt_stat.f_iosize - 1; + maxrun = MAXPHYS / mp->mnt_stat.f_iosize - 1; } xap = ap == NULL ? a : ap; @@ -179,7 +179,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp) xap->in_exists = 1; bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0); - if (bp->b_flags & (B_DONE | B_DELWRI)) { + if (bp->b_flags & B_CACHE) { trace(TR_BREADHIT, pack(vp, size), metalbn); } #ifdef DIAGNOSTIC @@ -190,6 +190,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp) trace(TR_BREADMISS, pack(vp, size), metalbn); bp->b_blkno = blkptrtodb(ump, daddr); bp->b_flags |= B_READ; + vfs_busy_pages(bp, 0); VOP_STRATEGY(bp); curproc->p_stats->p_ru.ru_inblock++; /* XXX */ error = biowait(bp); diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index e4d7443e7b00..eeba17b1f7b1 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - * $Id: machdep.c,v 1.98 1994/12/11 03:33:58 davidg Exp $ + * $Id: machdep.c,v 1.99 1995/01/05 19:51:14 se Exp $ */ #include "npx.h" @@ -114,17 +114,12 @@ char cpu_model[sizeof("Cy486DLC") + 1]; /* * Declare these as initialized data so we can patch them. */ -int nswbuf = 0; +int nswbuf = 128; #ifdef NBUF int nbuf = NBUF; #else int nbuf = 0; #endif -#ifdef BUFPAGES -int bufpages = BUFPAGES; -#else -int bufpages = 0; -#endif #ifdef BOUNCE_BUFFERS extern char *bouncememory; @@ -170,9 +165,7 @@ cpu_startup() vm_offset_t maxaddr; vm_size_t size = 0; int firstaddr; -#ifdef BOUNCE_BUFFERS vm_offset_t minaddr; -#endif /* BOUNCE_BUFFERS */ if (boothowto & RB_VERBOSE) bootverbose++; @@ -261,33 +254,11 @@ again: valloc(msghdrs, struct msg, msginfo.msgtql); valloc(msqids, struct msqid_ds, msginfo.msgmni); #endif - /* - * Determine how many buffers to allocate. - * Use 20% of memory of memory beyond the first 2MB - * Insure a minimum of 16 fs buffers. - * We allocate 1/2 as many swap buffer headers as file i/o buffers. - */ - if (bufpages == 0) - bufpages = ((physmem << PGSHIFT) - 2048*1024) / NBPG / 6; - if (bufpages < 64) - bufpages = 64; - /* - * We must still limit the maximum number of buffers to be no - * more than 750 because we'll run out of kernel VM otherwise. - */ - bufpages = min(bufpages, 1500); - if (nbuf == 0) { - nbuf = bufpages / 2; - if (nbuf < 32) - nbuf = 32; - } - freebufspace = bufpages * NBPG; - if (nswbuf == 0) { - nswbuf = (nbuf / 2) &~ 1; /* force even */ - if (nswbuf > 64) - nswbuf = 64; /* sanity */ - } + if (nbuf == 0) + nbuf = min(physmem / 30, 256); + nswbuf = nbuf; + valloc(swbuf, struct buf, nswbuf); valloc(buf, struct buf, nbuf); @@ -296,8 +267,10 @@ again: * If there is more than 16MB of memory, allocate some bounce buffers */ if (Maxmem > 4096) { - if (bouncepages == 0) - bouncepages = 96; /* largest physio size + extra */ + if (bouncepages == 0) { + bouncepages = 64; + bouncepages += ((Maxmem - 4096) / 2048) * 32; + } v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)); valloc(bouncememory, char, bouncepages * PAGE_SIZE); } @@ -333,6 +306,10 @@ again: (nbuf*MAXBSIZE), TRUE); pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva, (nswbuf*MAXPHYS) + pager_map_size, TRUE); + exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, + (16*ARG_MAX), TRUE); + u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, + (maxproc*UPAGES*PAGE_SIZE), FALSE); /* * Finally, allocate mbuf pool. Since mclrefcnt is an off-size @@ -353,8 +330,6 @@ again: if (boothowto & RB_CONFIG) userconfig(); printf("avail memory = %d (%d pages)\n", ptoa(cnt.v_free_count), cnt.v_free_count); - printf("using %d buffers containing %d bytes of memory\n", - nbuf, bufpages * CLBYTES); #ifdef BOUNCE_BUFFERS /* @@ -744,9 +719,11 @@ boot(arghowto) for (iter = 0; iter < 20; iter++) { nbusy = 0; - for (bp = &buf[nbuf]; --bp >= buf; ) - if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) + for (bp = &buf[nbuf]; --bp >= buf; ) { + if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) { nbusy++; + } + } if (nbusy == 0) break; printf("%d ", nbusy); @@ -1642,4 +1619,3 @@ disk_externalize(int drive, void *userp, size_t *maxlen) *maxlen -= sizeof drive; return copyout(&drive, userp, sizeof drive); } - diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 22cec02726c1..d8f550153471 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -39,7 +39,7 @@ * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 - * $Id: pmap.c,v 1.38 1994/12/18 03:36:27 davidg Exp $ + * $Id: pmap.c,v 1.39 1994/12/18 14:16:22 davidg Exp $ */ /* @@ -142,8 +142,6 @@ int i386pagesperpage; /* PAGE_SIZE / I386_PAGE_SIZE */ boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ vm_offset_t vm_first_phys, vm_last_phys; -static inline boolean_t pmap_testbit(); -static inline void pmap_changebit(); static inline int pmap_is_managed(); static inline void * vm_get_pmap(); static inline void vm_put_pmap(); @@ -152,6 +150,7 @@ static void pmap_alloc_pv_entry(); static inline pv_entry_t get_pv_entry(); static inline void pmap_use_pt(); static inline void pmap_unuse_pt(); +int nkpt; extern vm_offset_t clean_sva, clean_eva; @@ -230,8 +229,7 @@ pmap_extract(pmap, va) } pa = *(int *) avtopte(va); } - pa = (pa & PG_FRAME) | (va & ~PG_FRAME); - return pa; + return ((pa & PG_FRAME) | (va & ~PG_FRAME)); } return 0; @@ -358,6 +356,7 @@ pmap_bootstrap(firstaddr, loadaddr) simple_lock_init(&kernel_pmap->pm_lock); kernel_pmap->pm_count = 1; + nkpt = NKPT; #if BSDVM_COMPAT /* @@ -376,10 +375,8 @@ pmap_bootstrap(firstaddr, loadaddr) virtual_avail = va; #endif /* - * reserve special hunk of memory for use by bus dma as a bounce - * buffer (contiguous virtual *and* physical memory). for now, - * assume vm does not use memory beneath hole, and we know that - * the bootstrap uses top 32k of base memory. -wfj + * Reserve special hunk of memory for use by bus dma as a bounce + * buffer (contiguous virtual *and* physical memory). */ { extern vm_offset_t isaphysmem; @@ -420,8 +417,7 @@ pmap_init(phys_start, phys_end) addr = (vm_offset_t) KERNBASE + IdlePTD; vm_object_reference(kernel_object); (void) vm_map_find(kernel_map, kernel_object, addr, - &addr, (4 + NKPT) * NBPG, FALSE); - + &addr, (4 + NKPDE) * NBPG, FALSE); /* * calculate the number of pv_entries needed @@ -542,7 +538,7 @@ pmap_pinit(pmap) pmap->pm_pdir = (pd_entry_t *) vm_get_pmap(); /* wire in kernel global address entries */ - bcopy(PTD+KPTDI, pmap->pm_pdir+KPTDI, NKPT*PTESIZE); + bcopy(PTD+KPTDI, pmap->pm_pdir+KPTDI, nkpt*PTESIZE); /* install self-referential address mapping entry */ *(int *)(pmap->pm_pdir+PTDPTDI) = @@ -553,6 +549,65 @@ pmap_pinit(pmap) } /* + * grow the number of kernel page table entries, if needed + */ + +vm_page_t nkpg; +vm_offset_t kernel_vm_end; + +void +pmap_growkernel(vm_offset_t addr) { + struct proc *p; + struct pmap *pmap; + int s; + + s = splhigh(); + if (kernel_vm_end == 0) { + kernel_vm_end = KERNBASE; + nkpt = 0; + while(pdir_pde(PTD, kernel_vm_end)) { + kernel_vm_end = (kernel_vm_end + NBPG*NPTEPG) & ~(NBPG*NPTEPG-1); + ++nkpt; + } + } + + addr = (addr + NBPG*NPTEPG) & ~(NBPG*NPTEPG-1); + while( kernel_vm_end < addr) { + if( pdir_pde( PTD, kernel_vm_end)) { + kernel_vm_end = (kernel_vm_end + NBPG*NPTEPG) & ~(NBPG*NPTEPG-1); + continue; + } + + ++nkpt; + if( !nkpg) { + nkpg = vm_page_alloc(kernel_object, 0, TRUE); + vm_page_remove(nkpg); + pmap_zero_page(VM_PAGE_TO_PHYS(nkpg)); + if( !nkpg) + panic("pmap_growkernel: no memory to grow kernel"); + } + pdir_pde( PTD, kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_KW); + nkpg = NULL; + + for (p = (struct proc *)allproc; p != NULL; p = p->p_next) { + if( p->p_vmspace) { + pmap = &p->p_vmspace->vm_pmap; + *pmap_pde( pmap, kernel_vm_end) = pdir_pde( PTD, kernel_vm_end); + } + } + kernel_vm_end = (kernel_vm_end + NBPG*NPTEPG) & ~(NBPG*NPTEPG-1); + } +#if 0 + if( !nkpg) { + nkpg = vm_page_alloc(kernel_object, 0, TRUE); + vm_page_remove(nkpg); + pmap_zero_page(VM_PAGE_TO_PHYS(nkpg)); + } +#endif + splx(s); +} + +/* * Retire the given physical map from service. * Should only be called if the map contains * no valid mappings. @@ -674,7 +729,7 @@ pmap_alloc_pv_entry() /* * allocate a physical page out of the vm system */ - m = vm_page_alloc(kernel_object, pvva-vm_map_min(kernel_map)); + m = vm_page_alloc(kernel_object, pvva-vm_map_min(kernel_map), TRUE); if (m) { int newentries; int i; @@ -738,20 +793,18 @@ static pt_entry_t * get_pt_entry(pmap) pmap_t pmap; { - pt_entry_t *ptp; vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; + /* are we current address space or kernel? */ if (pmap == kernel_pmap || frame == ((int) PTDpde & PG_FRAME)) { - ptp=PTmap; + return PTmap; + } /* otherwise, we are alternate address space */ - } else { - if ( frame != ((int) APTDpde & PG_FRAME)) { - APTDpde = pmap->pm_pdir[PTDPTDI]; - pmap_update(); - } - ptp=APTmap; - } - return ptp; + if ( frame != ((int) APTDpde & PG_FRAME)) { + APTDpde = pmap->pm_pdir[PTDPTDI]; + pmap_update(); + } + return APTmap; } /* @@ -842,17 +895,11 @@ pmap_remove(pmap, sva, eva) *ptq = 0; if (pmap_is_managed(pa)) { - if ((int) oldpte & (PG_M | PG_U)) { + if ((int) oldpte & PG_M) { if ((sva < USRSTACK || sva > UPT_MAX_ADDRESS) || (sva >= USRSTACK && sva < USRSTACK+(UPAGES*NBPG))) { if (sva < clean_sva || sva >= clean_eva) { - m = PHYS_TO_VM_PAGE(pa); - if ((int) oldpte & PG_M) { - m->flags &= ~PG_CLEAN; - } - if ((int) oldpte & PG_U) { - m->flags |= PG_REFERENCED; - } + PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; } } } @@ -938,11 +985,12 @@ pmap_remove(pmap, sva, eva) continue; } - if ((((int) oldpte & PG_M) && (va < USRSTACK || va > UPT_MAX_ADDRESS)) - || (va >= USRSTACK && va < USRSTACK+(UPAGES*NBPG))) { - if (va < clean_sva || va >= clean_eva ) { - m = PHYS_TO_VM_PAGE(pa); - m->flags &= ~PG_CLEAN; + if ((int) oldpte & PG_M) { + if ((va < USRSTACK || va > UPT_MAX_ADDRESS) || + (va >= USRSTACK && va < USRSTACK+(UPAGES*NBPG))) { + if (va < clean_sva || va >= clean_eva) { + PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; + } } } @@ -992,28 +1040,29 @@ pmap_remove_all(pa) while (pv->pv_pmap != NULL) { pmap = pv->pv_pmap; ptp = get_pt_entry(pmap); - va = i386_btop(pv->pv_va); - pte = ptp + va; + va = pv->pv_va; + pte = ptp + i386_btop(va); if (pmap_pte_w(pte)) pmap->pm_stats.wired_count--; - if ( *pte) { + if (*pte) { pmap->pm_stats.resident_count--; anyvalid++; /* - * update the vm_page_t clean bit + * Update the vm_page_t clean and reference bits. */ - if ( (m->flags & PG_CLEAN) && - ((((int) *pte) & PG_M) && (pv->pv_va < USRSTACK || pv->pv_va > UPT_MAX_ADDRESS)) - || (pv->pv_va >= USRSTACK && pv->pv_va < USRSTACK+(UPAGES*NBPG))) { - if (pv->pv_va < clean_sva || pv->pv_va >= clean_eva) { - m->flags &= ~PG_CLEAN; + if ((int) *pte & PG_M) { + if ((va < USRSTACK || va > UPT_MAX_ADDRESS) || + (va >= USRSTACK && va < USRSTACK+(UPAGES*NBPG))) { + if (va < clean_sva || va >= clean_eva) { + PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; + } } } *pte = 0; } - pmap_unuse_pt(pmap, pv->pv_va); + pmap_unuse_pt(pmap, va); npv = pv->pv_next; if (npv) { @@ -1150,13 +1199,15 @@ pmap_enter(pmap, va, pa, prot, wired) va = i386_trunc_page(va); pa = i386_trunc_page(pa); - if (va > VM_MAX_KERNEL_ADDRESS)panic("pmap_enter: toobig"); + if (va > VM_MAX_KERNEL_ADDRESS) + panic("pmap_enter: toobig"); /* * Page Directory table entry not valid, we need a new PT page */ - if ( *pmap_pde(pmap, va) == 0) { - pg("ptdi %x, va %x", pmap->pm_pdir[PTDPTDI], va); + if (*pmap_pde(pmap, va) == 0) { + printf("kernel page directory invalid pdir=0x%x, va=0x%x\n", pmap->pm_pdir[PTDPTDI], va); + panic("invalid kernel page directory"); } pte = pmap_pte(pmap, va); @@ -1315,7 +1366,7 @@ pmap_qremove(va, count) { int i; register pt_entry_t *pte; - for(i=0;i<count;i++) { + for (i=0;i<count;i++) { pte = vtopte(va + i * NBPG); *pte = 0; } @@ -1371,7 +1422,7 @@ pmap_kremove( va) * but is *MUCH* faster than pmap_enter... */ -static inline int +static inline void pmap_enter_quick(pmap, va, pa) register pmap_t pmap; vm_offset_t va; @@ -1380,7 +1431,6 @@ pmap_enter_quick(pmap, va, pa) register pt_entry_t *pte; register pv_entry_t pv, npv; int s; - int anyvalid = 0; /* * Enter on the PV list if part of our managed memory @@ -1389,7 +1439,9 @@ pmap_enter_quick(pmap, va, pa) */ pte = vtopte(va); - if (pmap_pte_pa(pte)) { + + /* a fault on the page table might occur here */ + if (*pte) { pmap_remove(pmap, va, va + PAGE_SIZE); } @@ -1399,8 +1451,8 @@ pmap_enter_quick(pmap, va, pa) * No entries yet, use header as the first entry */ if (pv->pv_pmap == NULL) { - pv->pv_va = va; pv->pv_pmap = pmap; + pv->pv_va = va; pv->pv_next = NULL; } /* @@ -1423,14 +1475,12 @@ pmap_enter_quick(pmap, va, pa) */ pmap->pm_stats.resident_count++; - if (*pte) - anyvalid++; /* * Now validate mapping with desired protection/wiring. */ *pte = (pt_entry_t) ( (int) (pa | PG_V | PG_u)); - return (anyvalid); + return; } /* @@ -1446,12 +1496,10 @@ pmap_object_init_pt(pmap, addr, object, offset, size) vm_offset_t offset; vm_offset_t size; { - vm_offset_t tmpoff; vm_page_t p; - vm_offset_t v; vm_offset_t objbytes; - int anyvalid = 0; + int bits; if (!pmap) return; @@ -1460,7 +1508,7 @@ pmap_object_init_pt(pmap, addr, object, offset, size) * if we are processing a major portion of the object, then * scan the entire thing. */ - if( size > object->size / 2) { + if (size > (object->size >> 1)) { objbytes = size; p = object->memq.tqh_first; while ((p != NULL) && (objbytes != 0)) { @@ -1475,12 +1523,12 @@ pmap_object_init_pt(pmap, addr, object, offset, size) continue; } - if ((p->flags & (PG_BUSY|PG_FICTITIOUS|PG_FAKE)) == 0 ) { + if ((p->bmapped == 0) && + (p->busy == 0) && + ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && + (p->flags & (PG_BUSY|PG_FICTITIOUS|PG_CACHE)) == 0 ) { vm_page_hold(p); - v = i386_trunc_page(((vm_offset_t)vtopte( addr+tmpoff))); - /* a fault might occur here */ - *(volatile char *)v += 0; - anyvalid += pmap_enter_quick(pmap, addr+tmpoff, VM_PAGE_TO_PHYS(p)); + pmap_enter_quick(pmap, addr+tmpoff, VM_PAGE_TO_PHYS(p)); vm_page_unhold(p); } p = p->listq.tqe_next; @@ -1490,23 +1538,20 @@ pmap_object_init_pt(pmap, addr, object, offset, size) /* * else lookup the pages one-by-one. */ - for(tmpoff = 0; tmpoff < size; tmpoff += NBPG) { + for (tmpoff = 0; tmpoff < size; tmpoff += NBPG) { p = vm_page_lookup(object, tmpoff + offset); if (p) { - if( (p->flags & (PG_BUSY|PG_FICTITIOUS|PG_FAKE)) == 0) { + if ((p->bmapped == 0) && + (p->busy == 0) && + ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && + (p->flags & (PG_BUSY|PG_FICTITIOUS|PG_CACHE)) == 0) { vm_page_hold(p); - v = i386_trunc_page(((vm_offset_t)vtopte( addr+tmpoff))); - /* a fault might occur here */ - *(volatile char *)v += 0; - anyvalid += pmap_enter_quick(pmap, addr+tmpoff, VM_PAGE_TO_PHYS(p)); + pmap_enter_quick(pmap, addr+tmpoff, VM_PAGE_TO_PHYS(p)); vm_page_unhold(p); } } } } - - if (anyvalid) - pmap_update(); } /* @@ -1685,7 +1730,7 @@ pmap_page_exists(pmap, pa) * note that the testbit/changebit routines are inline, * and a lot of things compile-time evaluate. */ -static inline boolean_t +boolean_t pmap_testbit(pa, bit) register vm_offset_t pa; int bit; @@ -1749,7 +1794,7 @@ pmap_testbit(pa, bit) /* * this routine is used to modify bits in ptes */ -static inline void +void pmap_changebit(pa, bit, setem) vm_offset_t pa; int bit; @@ -1816,70 +1861,6 @@ pmap_page_protect(phys, prot) } } -/* - * Clear the modify bits on the specified physical page. - */ -void -pmap_clear_modify(pa) - vm_offset_t pa; -{ - pmap_changebit(pa, PG_M, FALSE); -} - -/* - * pmap_clear_reference: - * - * Clear the reference bit on the specified physical page. - */ -void -pmap_clear_reference(pa) - vm_offset_t pa; -{ - pmap_changebit(pa, PG_U, FALSE); -} - -/* - * pmap_is_referenced: - * - * Return whether or not the specified physical page is referenced - * by any physical maps. - */ - -boolean_t -pmap_is_referenced(pa) - vm_offset_t pa; -{ - return(pmap_testbit(pa, PG_U)); -} - -/* - * pmap_is_modified: - * - * Return whether or not the specified physical page is modified - * by any physical maps. - */ - -boolean_t -pmap_is_modified(pa) - vm_offset_t pa; -{ - return(pmap_testbit(pa, PG_M)); -} - -/* - * Routine: pmap_copy_on_write - * Function: - * Remove write privileges from all - * physical maps for this physical page. - */ -void -pmap_copy_on_write(pa) - vm_offset_t pa; -{ - pmap_changebit(pa, PG_RW, FALSE); -} - - vm_offset_t pmap_phys_address(ppn) int ppn; diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index d0ca150eefbc..0b4946f2d36b 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 - * $Id: trap.c,v 1.41 1994/10/30 20:25:21 bde Exp $ + * $Id: trap.c,v 1.42 1994/12/24 07:22:58 bde Exp $ */ /* @@ -421,8 +421,6 @@ trap_pfault(frame, usermode) *(volatile char *)v += 0; ptepg = (vm_page_t) pmap_pte_vm_page(vm_map_pmap(map), v); - if( ptepg->hold_count == 0) - ptepg->act_count += 3; vm_page_hold(ptepg); /* Fault in the user page: */ diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 9663065f8686..bb35a55ef99e 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -38,7 +38,7 @@ * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ - * $Id: vm_machdep.c,v 1.28 1994/09/02 04:12:07 davidg Exp $ + * $Id: vm_machdep.c,v 1.29 1994/10/08 22:19:51 phk Exp $ */ #include "npx.h" @@ -321,6 +321,8 @@ vm_bounce_alloc(bp) pa = pmap_kextract(va); if (pa >= SIXTEENMEG) ++dobounceflag; + if( pa == 0) + panic("vm_bounce_alloc: Unmapped page"); va += NBPG; } if (dobounceflag == 0) @@ -492,11 +494,14 @@ vm_bounce_init() if (!bounceallocarray) panic("Cannot allocate bounce resource array\n"); - bzero(bounceallocarray, bounceallocarraysize * sizeof(unsigned)); bouncepa = malloc(bouncepages * sizeof(vm_offset_t), M_TEMP, M_NOWAIT); if (!bouncepa) panic("Cannot allocate physical memory array\n"); + for(i=0;i<bounceallocarraysize;i++) { + bounceallocarray[i] = 0xffffffff; + } + for(i=0;i<bouncepages;i++) { vm_offset_t pa; if( (pa = pmap_kextract((vm_offset_t) bouncememory + i * NBPG)) >= SIXTEENMEG) @@ -504,6 +509,7 @@ vm_bounce_init() if( pa == 0) panic("bounce memory not resident"); bouncepa[i] = pa; + bounceallocarray[i/(8*sizeof(int))] &= ~(1<<(i%(8*sizeof(int)))); } bouncefree = bouncepages; @@ -603,9 +609,9 @@ cpu_wait(p) struct proc *p; { /* extern vm_map_t upages_map; */ /* drop per-process resources */ - pmap_remove(vm_map_pmap(kernel_map), (vm_offset_t) p->p_addr, + pmap_remove(vm_map_pmap(u_map), (vm_offset_t) p->p_addr, ((vm_offset_t) p->p_addr) + ctob(UPAGES)); - kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES)); + kmem_free(u_map, (vm_offset_t)p->p_addr, ctob(UPAGES)); vmspace_free(p->p_vmspace); } diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h index b83628a7e53e..190b8dd80472 100644 --- a/sys/i386/include/pmap.h +++ b/sys/i386/include/pmap.h @@ -42,7 +42,7 @@ * * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 - * $Id: pmap.h,v 1.18 1994/11/14 14:12:24 bde Exp $ + * $Id: pmap.h,v 1.19 1994/12/18 03:11:46 davidg Exp $ */ #ifndef _MACHINE_PMAP_H_ @@ -62,7 +62,11 @@ typedef unsigned int *pt_entry_t; * given to the user (NUPDE) */ #ifndef NKPT +#if 0 #define NKPT 26 /* actual number of kernel page tables */ +#else +#define NKPT 9 /* actual number of kernel page tables */ +#endif #endif #ifndef NKPDE #define NKPDE 63 /* addressable number of page tables/pde's */ @@ -126,6 +130,43 @@ pmap_kextract(vm_offset_t va) pa = (pa & PG_FRAME) | (va & ~PG_FRAME); return pa; } + +/* + * pmap_is_referenced: + * + * Return whether or not the specified physical page was referenced + * by any physical maps. + */ +#define pmap_is_referenced(pa) pmap_testbit((pa), PG_U) + +/* + * pmap_is_modified: + * + * Return whether or not the specified physical page was modified + * in any physical maps. + */ +#define pmap_is_modified(pa) pmap_testbit((pa), PG_M) + +/* + * Clear the modify bits on the specified physical page. + */ +#define pmap_clear_modify(pa) pmap_changebit((pa), PG_M, FALSE) + +/* + * pmap_clear_reference: + * + * Clear the reference bit on the specified physical page. + */ +#define pmap_clear_reference(pa) pmap_changebit((pa), PG_U, FALSE) + +/* + * Routine: pmap_copy_on_write + * Function: + * Remove write privileges from all + * physical maps for this physical page. + */ +#define pmap_copy_on_write(pa) pmap_changebit((pa), PG_RW, FALSE) + #endif /* @@ -196,6 +237,7 @@ pv_entry_t pv_table; /* array of entries, one per page */ struct pcb; void pmap_activate __P((pmap_t, struct pcb *)); +void pmap_changebit __P((vm_offset_t, int, boolean_t)); pmap_t pmap_kernel __P((void)); boolean_t pmap_page_exists __P((pmap_t, vm_offset_t)); pt_entry_t *pmap_pte(pmap_t, vm_offset_t); diff --git a/sys/i386/include/vmparam.h b/sys/i386/include/vmparam.h index 482ee5b60874..b38e914dd7c9 100644 --- a/sys/i386/include/vmparam.h +++ b/sys/i386/include/vmparam.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * from: @(#)vmparam.h 5.9 (Berkeley) 5/12/91 - * $Id: vmparam.h,v 1.15 1994/09/12 11:38:20 davidg Exp $ + * $Id: vmparam.h,v 1.16 1994/09/23 07:00:12 davidg Exp $ */ @@ -124,11 +124,11 @@ #define UPDT VM_MIN_KERNEL_ADDRESS #define KPT_MIN_ADDRESS ((vm_offset_t)KERNBASE - NBPG*(NKPDE+1)) #define KPT_MAX_ADDRESS ((vm_offset_t)KERNBASE - NBPG) -#define VM_MAX_KERNEL_ADDRESS ((vm_offset_t)KERNBASE + NKPT*NBPG*NPTEPG) +#define VM_MAX_KERNEL_ADDRESS ((vm_offset_t)KERNBASE + NKPDE*NBPG*NPTEPG) /* virtual sizes (bytes) for various kernel submaps */ #define VM_MBUF_SIZE (NMBCLUSTERS*MCLBYTES) -#define VM_KMEM_SIZE (16 * 1024 * 1024) +#define VM_KMEM_SIZE (32 * 1024 * 1024) #define VM_PHYS_SIZE (USRIOSIZE*CLBYTES) #endif /* _MACHINE_VMPARAM_H_ */ diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index ffd78da0a84b..b35343be093c 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -28,7 +28,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: kern_exec.c,v 1.9 1994/09/25 19:33:36 phk Exp $ + * $Id: kern_exec.c,v 1.10 1994/10/02 17:35:13 phk Exp $ */ #include <sys/param.h> @@ -97,7 +97,7 @@ execve(p, uap, retval) * Allocate temporary demand zeroed space for argument and * environment strings */ - error = vm_allocate(kernel_map, (vm_offset_t *)&iparams->stringbase, + error = vm_allocate(exec_map, (vm_offset_t *)&iparams->stringbase, ARG_MAX, TRUE); if (error) { log(LOG_WARNING, "execve: failed to allocate string space\n"); @@ -127,7 +127,7 @@ interpret: error = namei(ndp); if (error) { - vm_deallocate(kernel_map, (vm_offset_t)iparams->stringbase, + vm_deallocate(exec_map, (vm_offset_t)iparams->stringbase, ARG_MAX); goto exec_fail; } @@ -296,7 +296,7 @@ interpret: /* * free various allocated resources */ - if (vm_deallocate(kernel_map, (vm_offset_t)iparams->stringbase, ARG_MAX)) + if (vm_deallocate(exec_map, (vm_offset_t)iparams->stringbase, ARG_MAX)) panic("execve: string buffer dealloc failed (1)"); if (vm_deallocate(kernel_map, (vm_offset_t)image_header, PAGE_SIZE)) panic("execve: header dealloc failed (2)"); @@ -307,7 +307,7 @@ interpret: exec_fail_dealloc: if (iparams->stringbase && iparams->stringbase != (char *)-1) - if (vm_deallocate(kernel_map, (vm_offset_t)iparams->stringbase, + if (vm_deallocate(exec_map, (vm_offset_t)iparams->stringbase, ARG_MAX)) panic("execve: string buffer dealloc failed (2)"); if (iparams->image_header && iparams->image_header != (char *)-1) diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c index 86e35d7401da..24eeb8875981 100644 --- a/sys/kern/kern_malloc.c +++ b/sys/kern/kern_malloc.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)kern_malloc.c 8.3 (Berkeley) 1/4/94 - * $Id: kern_malloc.c,v 1.5 1994/10/09 07:34:56 davidg Exp $ + * $Id: kern_malloc.c,v 1.6 1994/12/17 04:04:42 davidg Exp $ */ #include <sys/param.h> @@ -365,6 +365,9 @@ kmeminit() ERROR!_kmeminit:_MAXALLOCSAVE_too_small #endif npg = VM_KMEM_SIZE/ NBPG; + if( npg > cnt.v_page_count) + npg = cnt.v_page_count; + kmemusage = (struct kmemusage *) kmem_alloc(kernel_map, (vm_size_t)(npg * sizeof(struct kmemusage))); kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase, diff --git a/sys/kern/kern_physio.c b/sys/kern/kern_physio.c index 4a33c462cbc7..ad88ab6785bd 100644 --- a/sys/kern/kern_physio.c +++ b/sys/kern/kern_physio.c @@ -16,7 +16,7 @@ * 4. Modifications may be freely made to this file if the above conditions * are met. * - * $Id: kern_physio.c,v 1.7 1994/08/18 22:35:02 wollman Exp $ + * $Id: kern_physio.c,v 1.8 1994/09/25 19:33:40 phk Exp $ */ #include <sys/param.h> @@ -158,8 +158,8 @@ u_int minphys(struct buf *bp) { - if( bp->b_bcount > MAXBSIZE) { - bp->b_bcount = MAXBSIZE; + if( bp->b_bcount > MAXPHYS) { + bp->b_bcount = MAXPHYS; } return bp->b_bcount; } diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c index f714cab0a653..bc6ccb35278f 100644 --- a/sys/kern/subr_param.c +++ b/sys/kern/subr_param.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)param.c 8.2 (Berkeley) 1/21/94 - * $Id: param.c,v 1.3 1994/08/02 07:38:30 davidg Exp $ + * $Id: param.c,v 1.4 1994/09/13 14:46:24 dfr Exp $ */ #include <sys/param.h> @@ -81,14 +81,9 @@ int tick = 1000000 / HZ; int tickadj = 30000 / (60 * HZ); /* can adjust 30ms in 60s */ struct timezone tz = { TIMEZONE, DST }; #define NPROC (20 + 16 * MAXUSERS) -int maxproc = NPROC; -#define NTEXT NPROC -int vm_cache_max = NTEXT/2 + 16; -#define NVNODE (NPROC + NTEXT + 100) -int desiredvnodes = NVNODE; -int maxfiles = 3 * (NPROC + MAXUSERS) + 80; -int ncallout = 16 + NPROC; -int nclist = 60 + 12 * MAXUSERS; +int maxproc = NPROC; /* maximum # of processes */ +int maxfiles = 256; /* open files per process limit */ +int ncallout = 16 + NPROC; /* maximum # of timer events */ int nmbclusters = NMBCLUSTERS; int fscale = FSCALE; /* kernel uses `FSCALE', user uses `fscale' */ diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index d0ca150eefbc..0b4946f2d36b 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 - * $Id: trap.c,v 1.41 1994/10/30 20:25:21 bde Exp $ + * $Id: trap.c,v 1.42 1994/12/24 07:22:58 bde Exp $ */ /* @@ -421,8 +421,6 @@ trap_pfault(frame, usermode) *(volatile char *)v += 0; ptepg = (vm_page_t) pmap_pte_vm_page(vm_map_pmap(map), v); - if( ptepg->hold_count == 0) - ptepg->act_count += 3; vm_page_hold(ptepg); /* Fault in the user page: */ diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index c705741bec42..464ba83ed3c4 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -13,35 +13,55 @@ * documentation and/or other materials provided with the distribution. * 3. Absolutely no warranty of function or purpose is made by the author * John S. Dyson. - * 4. Modifications may be freely made to this file if the above conditions + * 4. This work was done expressly for inclusion into FreeBSD. Other use + * is allowed if this notation is included. + * 5. Modifications may be freely made to this file if the above conditions * are met. * - * $Id: vfs_bio.c,v 1.15 1994/10/18 06:55:40 davidg Exp $ + * $Id: vfs_bio.c,v 1.16 1994/10/23 20:52:11 davidg Exp $ */ +/* + * this file contains a new buffer I/O scheme implementing a coherent + * VM object and buffer cache scheme. Pains have been taken to make + * sure that the performance degradation associated with schemes such + * as this is not realized. + * + * Author: John S. Dyson + * Significant help during the development and debugging phases + * had been provided by David Greenman, also of the FreeBSD core team. + */ + +#define VMIO #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/proc.h> #include <sys/vnode.h> +#include <vm/vm.h> +#include <vm/vm_pageout.h> +#include <vm/vm_page.h> +#include <vm/vm_object.h> #include <sys/buf.h> #include <sys/mount.h> #include <sys/malloc.h> #include <sys/resourcevar.h> #include <sys/proc.h> -#include <vm/vm.h> -#include <vm/vm_pageout.h> #include <miscfs/specfs/specdev.h> -struct buf *buf; /* buffer header pool */ -int nbuf; /* number of buffer headers calculated elsewhere */ +struct buf *buf; /* buffer header pool */ +int nbuf; /* number of buffer headers calculated + * elsewhere */ struct swqueue bswlist; +int nvmio, nlru; -extern vm_map_t buffer_map, io_map; +extern vm_map_t buffer_map, io_map, kernel_map, pager_map; -void vm_hold_free_pages(vm_offset_t from, vm_offset_t to); -void vm_hold_load_pages(vm_offset_t from, vm_offset_t to); +void vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to); +void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to); +void vfs_dirty_pages(struct buf * bp); +void vfs_busy_pages(struct buf *, int clear_modify); int needsbuffer; @@ -51,6 +71,18 @@ int needsbuffer; */ int vfs_update_wakeup; + +/* + * buffers base kva + */ +caddr_t buffers_kva; + +/* + * bogus page -- for I/O to/from partially complete buffers + */ +vm_page_t bogus_page; +vm_offset_t bogus_offset; + /* * Initialize buffer headers and related structures. */ @@ -59,22 +91,21 @@ bufinit() { struct buf *bp; int i; - caddr_t baddr; TAILQ_INIT(&bswlist); LIST_INIT(&invalhash); /* first, make a null hash table */ - for(i=0;i<BUFHSZ;i++) + for (i = 0; i < BUFHSZ; i++) LIST_INIT(&bufhashtbl[i]); /* next, make a null set of free lists */ - for(i=0;i<BUFFER_QUEUES;i++) + for (i = 0; i < BUFFER_QUEUES; i++) TAILQ_INIT(&bufqueues[i]); - baddr = (caddr_t)kmem_alloc_pageable(buffer_map, MAXBSIZE * nbuf); + buffers_kva = (caddr_t) kmem_alloc_pageable(buffer_map, MAXBSIZE * nbuf); /* finally, initialize each buffer header and stick on empty q */ - for(i=0;i<nbuf;i++) { + for (i = 0; i < nbuf; i++) { bp = &buf[i]; bzero(bp, sizeof *bp); bp->b_flags = B_INVAL; /* we're just an empty header */ @@ -84,20 +115,27 @@ bufinit() bp->b_wcred = NOCRED; bp->b_qindex = QUEUE_EMPTY; bp->b_vnbufs.le_next = NOLIST; - bp->b_data = baddr + i * MAXBSIZE; + bp->b_data = buffers_kva + i * MAXBSIZE; TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); LIST_INSERT_HEAD(&invalhash, bp, b_hash); } + + bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE); + bogus_page = vm_page_alloc(kernel_object, bogus_offset - VM_MIN_KERNEL_ADDRESS, 0); + } /* * remove the buffer from the appropriate free list */ void -bremfree(struct buf *bp) +bremfree(struct buf * bp) { int s = splbio(); - if( bp->b_qindex != QUEUE_NONE) { + + if (bp->b_qindex != QUEUE_NONE) { + if (bp->b_qindex == QUEUE_LRU) + --nlru; TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist); bp->b_qindex = QUEUE_NONE; } else { @@ -110,12 +148,12 @@ bremfree(struct buf *bp) * Get a buffer with the specified data. Look in the cache first. */ int -bread(struct vnode *vp, daddr_t blkno, int size, struct ucred *cred, - struct buf **bpp) +bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred, + struct buf ** bpp) { struct buf *bp; - bp = getblk (vp, blkno, size, 0, 0); + bp = getblk(vp, blkno, size, 0, 0); *bpp = bp; /* if not found in cache, do some I/O */ @@ -123,16 +161,19 @@ bread(struct vnode *vp, daddr_t blkno, int size, struct ucred *cred, if (curproc && curproc->p_stats) /* count block I/O */ curproc->p_stats->p_ru.ru_inblock++; bp->b_flags |= B_READ; - bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL); - if( bp->b_rcred == NOCRED) { + bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); + if (bp->b_rcred == NOCRED) { if (cred != NOCRED) crhold(cred); bp->b_rcred = cred; } + vfs_busy_pages(bp, 0); VOP_STRATEGY(bp); - return( biowait (bp)); + return (biowait(bp)); + } else if (bp->b_lblkno == bp->b_blkno) { + VOP_BMAP(vp, bp->b_lblkno, (struct vnode **) 0, + &bp->b_blkno, (int *) 0); } - return (0); } @@ -141,103 +182,270 @@ bread(struct vnode *vp, daddr_t blkno, int size, struct ucred *cred, * read-ahead blocks. */ int -breadn(struct vnode *vp, daddr_t blkno, int size, - daddr_t *rablkno, int *rabsize, - int cnt, struct ucred *cred, struct buf **bpp) +breadn(struct vnode * vp, daddr_t blkno, int size, + daddr_t * rablkno, int *rabsize, + int cnt, struct ucred * cred, struct buf ** bpp) { struct buf *bp, *rabp; int i; int rv = 0, readwait = 0; - *bpp = bp = getblk (vp, blkno, size, 0, 0); + *bpp = bp = getblk(vp, blkno, size, 0, 0); /* if not found in cache, do some I/O */ if ((bp->b_flags & B_CACHE) == 0) { if (curproc && curproc->p_stats) /* count block I/O */ curproc->p_stats->p_ru.ru_inblock++; bp->b_flags |= B_READ; - bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL); - if( bp->b_rcred == NOCRED) { + bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); + if (bp->b_rcred == NOCRED) { if (cred != NOCRED) crhold(cred); bp->b_rcred = cred; } + vfs_busy_pages(bp, 0); VOP_STRATEGY(bp); ++readwait; + } else if (bp->b_lblkno == bp->b_blkno) { + VOP_BMAP(vp, bp->b_lblkno, (struct vnode **) 0, + &bp->b_blkno, (int *) 0); } - - for(i=0;i<cnt;i++, rablkno++, rabsize++) { - if( incore(vp, *rablkno)) { + for (i = 0; i < cnt; i++, rablkno++, rabsize++) { + if (inmem(vp, *rablkno)) continue; - } - rabp = getblk (vp, *rablkno, *rabsize, 0, 0); + rabp = getblk(vp, *rablkno, *rabsize, 0, 0); if ((rabp->b_flags & B_CACHE) == 0) { if (curproc && curproc->p_stats) curproc->p_stats->p_ru.ru_inblock++; rabp->b_flags |= B_READ | B_ASYNC; - rabp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL); - if( rabp->b_rcred == NOCRED) { + rabp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); + if (rabp->b_rcred == NOCRED) { if (cred != NOCRED) crhold(cred); rabp->b_rcred = cred; } + vfs_busy_pages(rabp, 0); VOP_STRATEGY(rabp); } else { brelse(rabp); } } - if( readwait) { - rv = biowait (bp); + if (readwait) { + rv = biowait(bp); } - return (rv); } /* + * this routine is used by filesystems to get at pages in the PG_CACHE + * queue. also, it is used to read pages that are currently being + * written out by the file i/o routines. + */ +int +vfs_read_bypass(struct vnode * vp, struct uio * uio, int maxread, daddr_t lbn) +{ + vm_page_t m; + vm_offset_t kv; + int nread; + int error; + struct buf *bp, *bpa; + vm_object_t obj; + int off; + int nrest; + int flags; + int s; + + return 0; + /* + * don't use the bypass mechanism for non-vmio vnodes + */ + if ((vp->v_flag & VVMIO) == 0) + return 0; + /* + * get the VM object (it has the pages) + */ + obj = (vm_object_t) vp->v_vmdata; + if (obj == NULL) + return 0; + + /* + * if there is a buffer that is not busy, it is faster to use it. + * This like read-ahead, etc work better + */ + + s = splbio(); + if ((bp = incore(vp, lbn)) && + (((bp->b_flags & B_READ) && (bp->b_flags & B_BUSY)) + || (bp->b_flags & B_BUSY) == 0)) { + splx(s); + return 0; + } + splx(s); + + /* + * get a pbuf --> we just use the kva + */ + kv = kmem_alloc_wait(pager_map, PAGE_SIZE); + nread = 0; + error = 0; + + while (!error && uio->uio_resid && maxread > 0) { + int po; + int count; + int s; + +relookup: + /* + * lookup the page + */ + m = vm_page_lookup(obj, trunc_page(uio->uio_offset)); + if (!m) + break; + /* + * get the offset into the page, and the amount to read in the + * page + */ + nrest = round_page(uio->uio_offset) - uio->uio_offset; + if (nrest > uio->uio_resid) + nrest = uio->uio_resid; + + /* + * check the valid bits for the page (DEV_BSIZE chunks) + */ + if (!vm_page_is_valid(m, uio->uio_offset, nrest)) + break; + + /* + * if the page is busy, wait for it + */ + s = splhigh(); + if (!m->valid || (m->flags & PG_BUSY)) { + m->flags |= PG_WANTED; + tsleep((caddr_t) m, PVM, "vnibyp", 0); + splx(s); + goto relookup; + } + /* + * if the page is on the cache queue, remove it -- cache queue + * pages should be freeable by vm_page_alloc anytime. + */ + if (m->flags & PG_CACHE) { + if (cnt.v_free_count + cnt.v_cache_count < cnt.v_free_reserved) { + VM_WAIT; + goto relookup; + } + vm_page_unqueue(m); + } + /* + * add a buffer mapping (essentially wires the page too). + */ + m->bmapped++; + splx(s); + + /* + * enter it into the kva + */ + pmap_qenter(kv, &m, 1); + + /* + * do the copy + */ + po = uio->uio_offset & (PAGE_SIZE - 1); + count = PAGE_SIZE - po; + if (count > maxread) + count = maxread; + if (count > uio->uio_resid) + count = uio->uio_resid; + + error = uiomove((caddr_t) kv + po, count, uio); + if (!error) { + nread += count; + maxread -= count; + } + /* + * remove from kva + */ + pmap_qremove(kv, 1); + PAGE_WAKEUP(m); /* XXX probably unnecessary */ + /* + * If the page was on the cache queue, then by definition + * bmapped was 0. Thus the following case will also take care + * of the page being removed from the cache queue above. + * Also, it is possible that the page was already entered onto + * another queue (or was already there), so we don't put it + * onto the cache queue... + */ + m->bmapped--; + if (m->bmapped == 0 && + (m->flags & (PG_CACHE | PG_ACTIVE | PG_INACTIVE)) == 0 && + m->wire_count == 0) { + vm_page_test_dirty(m); + + /* + * make sure that the darned page is on a queue + * somewhere... + */ + if ((m->dirty & m->valid) == 0) { + vm_page_cache(m); + } else if (m->hold_count == 0) { + vm_page_deactivate(m); + } else { + vm_page_activate(m); + } + } + } + /* + * release our buffer(kva). + */ + kmem_free_wakeup(pager_map, kv, PAGE_SIZE); + return nread; +} + + +/* * Write, release buffer on completion. (Done by iodone * if async.) */ int -bwrite(struct buf *bp) +bwrite(struct buf * bp) { int oldflags = bp->b_flags; - if(bp->b_flags & B_INVAL) { + if (bp->b_flags & B_INVAL) { brelse(bp); return (0); - } - - if(!(bp->b_flags & B_BUSY)) + } + if (!(bp->b_flags & B_BUSY)) panic("bwrite: buffer is not busy???"); - bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); + bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); bp->b_flags |= B_WRITEINPROG; if (oldflags & B_ASYNC) { if (oldflags & B_DELWRI) { reassignbuf(bp, bp->b_vp); - } else if( curproc) { + } else if (curproc) { ++curproc->p_stats->p_ru.ru_oublock; } } - bp->b_vp->v_numoutput++; + vfs_busy_pages(bp, 1); VOP_STRATEGY(bp); - if( (oldflags & B_ASYNC) == 0) { + if ((oldflags & B_ASYNC) == 0) { int rtval = biowait(bp); + if (oldflags & B_DELWRI) { reassignbuf(bp, bp->b_vp); - } else if( curproc) { + } else if (curproc) { ++curproc->p_stats->p_ru.ru_oublock; } brelse(bp); return (rtval); - } - - return(0); + } + return (0); } int @@ -251,28 +459,26 @@ vn_bwrite(ap) * Delayed write. (Buffer is marked dirty). */ void -bdwrite(struct buf *bp) +bdwrite(struct buf * bp) { - if((bp->b_flags & B_BUSY) == 0) { + if ((bp->b_flags & B_BUSY) == 0) { panic("bdwrite: buffer is not busy"); } - - if(bp->b_flags & B_INVAL) { + if (bp->b_flags & B_INVAL) { brelse(bp); return; } - - if(bp->b_flags & B_TAPE) { + if (bp->b_flags & B_TAPE) { bawrite(bp); return; } - bp->b_flags &= ~B_READ; - if( (bp->b_flags & B_DELWRI) == 0) { - if( curproc) + vfs_dirty_pages(bp); + if ((bp->b_flags & B_DELWRI) == 0) { + if (curproc) ++curproc->p_stats->p_ru.ru_oublock; - bp->b_flags |= B_DONE|B_DELWRI; + bp->b_flags |= B_DONE | B_DELWRI; reassignbuf(bp, bp->b_vp); } brelse(bp); @@ -285,8 +491,17 @@ bdwrite(struct buf *bp) * The buffer is released when the output completes. */ void -bawrite(struct buf *bp) +bawrite(struct buf * bp) { + if (((bp->b_flags & B_DELWRI) == 0) && (bp->b_vp->v_numoutput > 24)) { + int s = splbio(); + + while (bp->b_vp->v_numoutput > 16) { + bp->b_vp->v_flag |= VBWAIT; + tsleep((caddr_t) & bp->b_vp->v_numoutput, PRIBIO, "bawnmo", 0); + } + splx(s); + } bp->b_flags |= B_ASYNC; (void) bwrite(bp); } @@ -295,69 +510,187 @@ bawrite(struct buf *bp) * Release a buffer. */ void -brelse(struct buf *bp) +brelse(struct buf * bp) { - int x; + int s; + if (bp->b_flags & B_CLUSTER) { + relpbuf(bp); + return; + } /* anyone need a "free" block? */ - x=splbio(); + s = splbio(); + if (needsbuffer) { needsbuffer = 0; - wakeup((caddr_t)&needsbuffer); + wakeup((caddr_t) & needsbuffer); } - /* anyone need this block? */ if (bp->b_flags & B_WANTED) { - bp->b_flags &= ~(B_WANTED|B_AGE); - wakeup((caddr_t)bp); + bp->b_flags &= ~(B_PDWANTED | B_WANTED | B_AGE); + wakeup((caddr_t) bp); + } else if (bp->b_flags & B_VMIO) { + bp->b_flags &= ~(B_WANTED | B_PDWANTED); + wakeup((caddr_t) bp); } - if (bp->b_flags & B_LOCKED) bp->b_flags &= ~B_ERROR; - if ((bp->b_flags & (B_NOCACHE|B_INVAL|B_ERROR)) || - (bp->b_bufsize <= 0)) { + if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR)) || + (bp->b_bufsize <= 0)) { bp->b_flags |= B_INVAL; - bp->b_flags &= ~(B_DELWRI|B_CACHE); - if(bp->b_vp) + bp->b_flags &= ~(B_DELWRI | B_CACHE); + if (((bp->b_flags & B_VMIO) == 0) && bp->b_vp) brelvp(bp); } + if (bp->b_flags & B_VMIO) { + vm_offset_t foff; + vm_object_t obj; + int i, resid; + vm_page_t m; + int iototal = bp->b_bufsize; + + foff = 0; + obj = 0; + if (bp->b_npages) { + if (bp->b_vp && bp->b_vp->v_mount) { + foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; + } else { + /* + * vnode pointer has been ripped away -- + * probably file gone... + */ + foff = bp->b_pages[0]->offset; + } + } + for (i = 0; i < bp->b_npages; i++) { + m = bp->b_pages[i]; + if (m == bogus_page) { + panic("brelse: bogus page found"); + } + resid = (m->offset + PAGE_SIZE) - foff; + if (resid > iototal) + resid = iototal; + if (resid > 0) { + if (bp->b_flags & (B_ERROR | B_NOCACHE)) { + vm_page_set_invalid(m, foff, resid); + } else if ((bp->b_flags & B_DELWRI) == 0) { + vm_page_set_clean(m, foff, resid); + vm_page_set_valid(m, foff, resid); + } + } else { + vm_page_test_dirty(m); + } + if (bp->b_flags & B_INVAL) { + if (m->bmapped == 0) { + panic("brelse: bmapped is zero for page\n"); + } + --m->bmapped; + if (m->bmapped == 0) { + PAGE_WAKEUP(m); + if ((m->dirty & m->valid) == 0) + vm_page_cache(m); + } + } + foff += resid; + iototal -= resid; + } - if( bp->b_qindex != QUEUE_NONE) + if (bp->b_flags & B_INVAL) { + pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); + bp->b_npages = 0; + bp->b_bufsize = 0; + bp->b_flags &= ~B_VMIO; + if (bp->b_vp) + brelvp(bp); + --nvmio; + } + } + if (bp->b_qindex != QUEUE_NONE) panic("brelse: free buffer onto another queue???"); /* enqueue */ /* buffers with no memory */ - if(bp->b_bufsize == 0) { + if (bp->b_bufsize == 0) { bp->b_qindex = QUEUE_EMPTY; - TAILQ_INSERT_HEAD(&bufqueues[QUEUE_EMPTY], bp, b_freelist); + TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); LIST_REMOVE(bp, b_hash); LIST_INSERT_HEAD(&invalhash, bp, b_hash); bp->b_dev = NODEV; - /* buffers with junk contents */ - } else if(bp->b_flags & (B_ERROR|B_INVAL|B_NOCACHE)) { + /* buffers with junk contents */ + } else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE)) { bp->b_qindex = QUEUE_AGE; TAILQ_INSERT_HEAD(&bufqueues[QUEUE_AGE], bp, b_freelist); LIST_REMOVE(bp, b_hash); LIST_INSERT_HEAD(&invalhash, bp, b_hash); bp->b_dev = NODEV; - /* buffers that are locked */ - } else if(bp->b_flags & B_LOCKED) { + /* buffers that are locked */ + } else if (bp->b_flags & B_LOCKED) { bp->b_qindex = QUEUE_LOCKED; TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist); - /* buffers with stale but valid contents */ - } else if(bp->b_flags & B_AGE) { + /* buffers with stale but valid contents */ + } else if (bp->b_flags & B_AGE) { bp->b_qindex = QUEUE_AGE; TAILQ_INSERT_TAIL(&bufqueues[QUEUE_AGE], bp, b_freelist); - /* buffers with valid and quite potentially reuseable contents */ + /* buffers with valid and quite potentially reuseable contents */ } else { - bp->b_qindex = QUEUE_LRU; - TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist); + if (bp->b_flags & B_VMIO) + bp->b_qindex = QUEUE_VMIO; + else { + bp->b_qindex = QUEUE_LRU; + ++nlru; + } + TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist); } /* unlock */ - bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_NOCACHE|B_AGE); - splx(x); + bp->b_flags &= ~(B_PDWANTED | B_WANTED | B_BUSY | B_ASYNC | B_NOCACHE | B_AGE); + splx(s); +} + +/* + * this routine implements clustered async writes for + * clearing out B_DELWRI buffers... + */ +void +vfs_bio_awrite(struct buf * bp) +{ + int i; + daddr_t lblkno = bp->b_lblkno; + struct vnode *vp = bp->b_vp; + int size = vp->v_mount->mnt_stat.f_iosize; + int s; + int ncl; + struct buf *bpa; + + s = splbio(); + ncl = 1; + if (vp->v_flag & VVMIO) { + for (i = 1; i < MAXPHYS / size; i++) { + if ((bpa = incore(vp, lblkno + i)) && + ((bpa->b_flags & (B_BUSY | B_DELWRI | B_BUSY | B_CLUSTEROK | B_INVAL)) == B_DELWRI | B_CLUSTEROK) && + (bpa->b_bufsize == size)) { + if ((bpa->b_blkno == bpa->b_lblkno) || + (bpa->b_blkno != bp->b_blkno + (i * size) / DEV_BSIZE)) + break; + } else { + break; + } + } + ncl = i; + } + /* + * we don't attempt to cluster meta-data or INVALID??? buffers + */ + if ((ncl != 1) && + (bp->b_flags & (B_INVAL | B_CLUSTEROK)) == B_CLUSTEROK) { + cluster_wbuild(vp, NULL, size, lblkno, ncl, -1); + } else { + bremfree(bp); + bp->b_flags |= B_BUSY | B_ASYNC; + bwrite(bp); + } + splx(s); } int freebufspace; @@ -367,45 +700,79 @@ int allocbufspace; * Find a buffer header which is available for use. */ struct buf * -getnewbuf(int slpflag, int slptimeo) +getnewbuf(int slpflag, int slptimeo, int doingvmio) { struct buf *bp; int s; + int firstbp = 1; + s = splbio(); start: /* can we constitute a new buffer? */ if ((bp = bufqueues[QUEUE_EMPTY].tqh_first)) { - if( bp->b_qindex != QUEUE_EMPTY) + if (bp->b_qindex != QUEUE_EMPTY) panic("getnewbuf: inconsistent EMPTY queue"); bremfree(bp); goto fillbuf; } - - if ((bp = bufqueues[QUEUE_AGE].tqh_first)) { - if( bp->b_qindex != QUEUE_AGE) + /* + * we keep the file I/O from hogging metadata I/O + */ + if (bp = bufqueues[QUEUE_AGE].tqh_first) { + if (bp->b_qindex != QUEUE_AGE) panic("getnewbuf: inconsistent AGE queue"); - bremfree(bp); - } else if ((bp = bufqueues[QUEUE_LRU].tqh_first)) { - if( bp->b_qindex != QUEUE_LRU) + } else if ((nvmio > (2 * nbuf / 3)) + && (bp = bufqueues[QUEUE_VMIO].tqh_first)) { + if (bp->b_qindex != QUEUE_VMIO) + panic("getnewbuf: inconsistent VMIO queue"); + } else if ((!doingvmio || (nlru > (2 * nbuf / 3))) && + (bp = bufqueues[QUEUE_LRU].tqh_first)) { + if (bp->b_qindex != QUEUE_LRU) panic("getnewbuf: inconsistent LRU queue"); - bremfree(bp); - } else { + } + if (!bp) { + if (doingvmio) { + if (bp = bufqueues[QUEUE_VMIO].tqh_first) { + if (bp->b_qindex != QUEUE_VMIO) + panic("getnewbuf: inconsistent VMIO queue"); + } else if (bp = bufqueues[QUEUE_LRU].tqh_first) { + if (bp->b_qindex != QUEUE_LRU) + panic("getnewbuf: inconsistent LRU queue"); + } + } else { + if (bp = bufqueues[QUEUE_LRU].tqh_first) { + if (bp->b_qindex != QUEUE_LRU) + panic("getnewbuf: inconsistent LRU queue"); + } else if (bp = bufqueues[QUEUE_VMIO].tqh_first) { + if (bp->b_qindex != QUEUE_VMIO) + panic("getnewbuf: inconsistent VMIO queue"); + } + } + } + if (!bp) { /* wait for a free buffer of any kind */ needsbuffer = 1; - tsleep((caddr_t)&needsbuffer, PRIBIO, "newbuf", 0); + tsleep((caddr_t) & needsbuffer, PRIBIO | slpflag, "newbuf", slptimeo); splx(s); return (0); } - - /* if we are a delayed write, convert to an async write */ - if (bp->b_flags & B_DELWRI) { - bp->b_flags |= B_BUSY; - bawrite (bp); + if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) { + vfs_bio_awrite(bp); + if (!slpflag && !slptimeo) { + splx(s); + return (0); + } goto start; } + bremfree(bp); - if(bp->b_vp) + if (bp->b_flags & B_VMIO) { + bp->b_flags |= B_INVAL | B_BUSY; + brelse(bp); + bremfree(bp); + } + if (bp->b_vp) brelvp(bp); /* we are not free, nor do we contain interesting data */ @@ -418,6 +785,9 @@ fillbuf: LIST_REMOVE(bp, b_hash); LIST_INSERT_HEAD(&invalhash, bp, b_hash); splx(s); + if (bp->b_bufsize) { + allocbuf(bp, 0, 0); + } bp->b_dev = NODEV; bp->b_vp = NULL; bp->b_blkno = bp->b_lblkno = 0; @@ -425,7 +795,9 @@ fillbuf: bp->b_error = 0; bp->b_resid = 0; bp->b_bcount = 0; + bp->b_npages = 0; bp->b_wcred = bp->b_rcred = NOCRED; + bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE; bp->b_dirtyoff = bp->b_dirtyend = 0; bp->b_validoff = bp->b_validend = 0; return (bp); @@ -435,7 +807,7 @@ fillbuf: * Check to see if a block is currently memory resident. */ struct buf * -incore(struct vnode *vp, daddr_t blkno) +incore(struct vnode * vp, daddr_t blkno) { struct buf *bp; struct bufhashhdr *bh; @@ -447,16 +819,9 @@ incore(struct vnode *vp, daddr_t blkno) /* Search hash chain */ while (bp) { -#ifdef DEBUG - if( (bp < buf) || (bp >= buf + nbuf)) { - printf("incore: buf out of range: %p, hash: %d\n", - bp, bh - bufhashtbl); - panic("incore: buf fault"); - } -#endif /* hit */ if (bp->b_lblkno == blkno && bp->b_vp == vp - && (bp->b_flags & B_INVAL) == 0) { + && (bp->b_flags & B_INVAL) == 0) { splx(s); return (bp); } @@ -464,26 +829,79 @@ incore(struct vnode *vp, daddr_t blkno) } splx(s); - return(0); + return (0); +} + +/* + * returns true if no I/O is needed to access the + * associated VM object. + */ + +int +inmem(struct vnode * vp, daddr_t blkno) +{ + vm_object_t obj; + vm_offset_t off, toff, tinc; + vm_page_t m; + + if (incore(vp, blkno)) + return 1; + if (vp->v_mount == 0) + return 0; + if (vp->v_vmdata == 0) + return 0; + + obj = (vm_object_t) vp->v_vmdata; + tinc = PAGE_SIZE; + if (tinc > vp->v_mount->mnt_stat.f_iosize) + tinc = vp->v_mount->mnt_stat.f_iosize; + off = blkno * vp->v_mount->mnt_stat.f_iosize; + + for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) { + int mask; + + m = vm_page_lookup(obj, trunc_page(toff + off)); + if (!m) + return 0; + if (vm_page_is_valid(m, toff + off, tinc) == 0) + return 0; + } + return 1; } /* * Get a block given a specified block and offset into a file/device. */ struct buf * -getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo) +getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo) { struct buf *bp; int s; struct bufhashhdr *bh; + vm_offset_t off; + int bsize; + int nleft; + bsize = DEV_BSIZE; + if (vp->v_mount) { + bsize = vp->v_mount->mnt_stat.f_iosize; + } s = splbio(); loop: - if ((bp = incore(vp, blkno))) { + if ((cnt.v_free_count + cnt.v_cache_count) < + cnt.v_free_reserved + MAXBSIZE / PAGE_SIZE) + wakeup((caddr_t) & vm_pages_needed); + if (bp = incore(vp, blkno)) { if (bp->b_flags & B_BUSY) { bp->b_flags |= B_WANTED; - tsleep ((caddr_t)bp, PRIBIO, "getblk", 0); - goto loop; + if (curproc == pageproc) { + bp->b_flags |= B_PDWANTED; + wakeup((caddr_t) & cnt.v_free_count); + } + if (!tsleep((caddr_t) bp, PRIBIO | slpflag, "getblk", slptimeo)) + goto loop; + splx(s); + return (struct buf *) NULL; } bp->b_flags |= B_BUSY | B_CACHE; bremfree(bp); @@ -491,23 +909,59 @@ loop: * check for size inconsistancies */ if (bp->b_bcount != size) { +#if defined(VFS_BIO_DEBUG) printf("getblk: invalid buffer size: %ld\n", bp->b_bcount); +#endif bp->b_flags |= B_INVAL; bwrite(bp); goto loop; } + splx(s); + return (bp); } else { - if ((bp = getnewbuf(0, 0)) == 0) + vm_object_t obj; + int doingvmio; + + if ((obj = (vm_object_t) vp->v_vmdata) && + (vp->v_flag & VVMIO) /* && (blkno >= 0) */ ) { + doingvmio = 1; + } else { + doingvmio = 0; + } + if ((bp = getnewbuf(slpflag, slptimeo, doingvmio)) == 0) { + if (slpflag || slptimeo) + return NULL; + goto loop; + } + if (incore(vp, blkno)) { + bp->b_flags |= B_INVAL; + brelse(bp); goto loop; + } bp->b_blkno = bp->b_lblkno = blkno; bgetvp(vp, bp); LIST_REMOVE(bp, b_hash); bh = BUFHASH(vp, blkno); LIST_INSERT_HEAD(bh, bp, b_hash); - allocbuf(bp, size); + if (doingvmio) { + bp->b_flags |= (B_VMIO | B_CACHE); +#if defined(VFS_BIO_DEBUG) + if (vp->v_type != VREG) + printf("getblk: vmioing file type %d???\n", vp->v_type); +#endif + ++nvmio; + } else { + if (bp->b_flags & B_VMIO) + --nvmio; + bp->b_flags &= ~B_VMIO; + } + splx(s); + if (!allocbuf(bp, size, 1)) { + s = splbio(); + goto loop; + } + return (bp); } - splx(s); - return (bp); } /* @@ -517,9 +971,9 @@ struct buf * geteblk(int size) { struct buf *bp; - while ((bp = getnewbuf(0, 0)) == 0) - ; - allocbuf(bp, size); + + while ((bp = getnewbuf(0, 0, 0)) == 0); + allocbuf(bp, size, 0); bp->b_flags |= B_INVAL; return (bp); } @@ -528,50 +982,275 @@ geteblk(int size) * Modify the length of a buffer's underlying buffer storage without * destroying information (unless, of course the buffer is shrinking). */ -void -allocbuf(struct buf *bp, int size) +int +allocbuf(struct buf * bp, int size, int vmio) { - int newbsize = round_page(size); + int s; + int newbsize; + int i; - if( newbsize == bp->b_bufsize) { - bp->b_bcount = size; - return; - } else if( newbsize < bp->b_bufsize) { - vm_hold_free_pages( - (vm_offset_t) bp->b_data + newbsize, - (vm_offset_t) bp->b_data + bp->b_bufsize); - } else if( newbsize > bp->b_bufsize) { - vm_hold_load_pages( - (vm_offset_t) bp->b_data + bp->b_bufsize, - (vm_offset_t) bp->b_data + newbsize); + if ((bp->b_flags & B_VMIO) == 0) { + newbsize = round_page(size); + if (newbsize == bp->b_bufsize) { + bp->b_bcount = size; + return 1; + } else if (newbsize < bp->b_bufsize) { + if (bp->b_flags & B_MALLOC) { + bp->b_bcount = size; + return 1; + } + vm_hold_free_pages( + bp, + (vm_offset_t) bp->b_data + newbsize, + (vm_offset_t) bp->b_data + bp->b_bufsize); + } else if (newbsize > bp->b_bufsize) { + if (bp->b_flags & B_MALLOC) { + vm_offset_t bufaddr; + + bufaddr = (vm_offset_t) bp->b_data; + bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE; + vm_hold_load_pages( + bp, + (vm_offset_t) bp->b_data, + (vm_offset_t) bp->b_data + newbsize); + bcopy((caddr_t) bufaddr, bp->b_data, bp->b_bcount); + free((caddr_t) bufaddr, M_TEMP); + } else if ((newbsize <= PAGE_SIZE / 2) && (bp->b_bufsize == 0)) { + bp->b_flags |= B_MALLOC; + bp->b_data = malloc(newbsize, M_TEMP, M_WAITOK); + bp->b_npages = 0; + } else { + vm_hold_load_pages( + bp, + (vm_offset_t) bp->b_data + bp->b_bufsize, + (vm_offset_t) bp->b_data + newbsize); + } + } + /* + * adjust buffer cache's idea of memory allocated to buffer + * contents + */ + freebufspace -= newbsize - bp->b_bufsize; + allocbufspace += newbsize - bp->b_bufsize; + } else { + vm_page_t m; + int desiredpages; + + newbsize = ((size + DEV_BSIZE - 1) / DEV_BSIZE) * DEV_BSIZE; + desiredpages = round_page(newbsize) / PAGE_SIZE; + + if (newbsize == bp->b_bufsize) { + bp->b_bcount = size; + return 1; + } else if (newbsize < bp->b_bufsize) { + if (desiredpages < bp->b_npages) { + pmap_qremove((vm_offset_t) trunc_page(bp->b_data) + + desiredpages * PAGE_SIZE, (bp->b_npages - desiredpages)); + for (i = desiredpages; i < bp->b_npages; i++) { + m = bp->b_pages[i]; + s = splhigh(); + if ((m->flags & PG_BUSY) || (m->busy != 0)) { + m->flags |= PG_WANTED; + tsleep(m, PVM, "biodep", 0); + } + splx(s); + + if (m->bmapped == 0) { + printf("allocbuf: bmapped is zero for page %d\n", i); + panic("allocbuf: error"); + } + --m->bmapped; + if (m->bmapped == 0) { + PAGE_WAKEUP(m); + pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); + vm_page_free(m); + } + bp->b_pages[i] = NULL; + } + bp->b_npages = desiredpages; + } + } else { + vm_object_t obj; + vm_offset_t tinc, off, toff, objoff; + int pageindex, curbpnpages; + struct vnode *vp; + int bsize; + + vp = bp->b_vp; + bsize = vp->v_mount->mnt_stat.f_iosize; + + if (bp->b_npages < desiredpages) { + obj = (vm_object_t) vp->v_vmdata; + tinc = PAGE_SIZE; + if (tinc > bsize) + tinc = bsize; + off = bp->b_lblkno * bsize; + curbpnpages = bp->b_npages; + doretry: + for (toff = 0; toff < newbsize; toff += tinc) { + int mask; + int bytesinpage; + + pageindex = toff / PAGE_SIZE; + objoff = trunc_page(toff + off); + if (pageindex < curbpnpages) { + int pb; + + m = bp->b_pages[pageindex]; + if (m->offset != objoff) + panic("allocbuf: page changed offset??!!!?"); + bytesinpage = tinc; + if (tinc > (newbsize - toff)) + bytesinpage = newbsize - toff; + if (!vm_page_is_valid(m, toff + off, bytesinpage)) { + bp->b_flags &= ~B_CACHE; + } + if ((m->flags & PG_ACTIVE) == 0) + vm_page_activate(m); + continue; + } + m = vm_page_lookup(obj, objoff); + if (!m) { + m = vm_page_alloc(obj, objoff, 0); + if (!m) { + int j; + + for (j = bp->b_npages; j < pageindex; j++) { + vm_page_t mt = bp->b_pages[j]; + + PAGE_WAKEUP(mt); + if (!mt->valid) { + vm_page_free(mt); + } + } + VM_WAIT; + if (vmio && (bp->b_flags & B_PDWANTED)) { + --nvmio; + bp->b_flags &= ~B_VMIO; + bp->b_flags |= B_INVAL; + brelse(bp); + return 0; + } + curbpnpages = bp->b_npages; + goto doretry; + } + m->valid = 0; + vm_page_activate(m); + } else if ((m->valid == 0) || (m->flags & PG_BUSY)) { + int j; + int bufferdestroyed = 0; + + for (j = bp->b_npages; j < pageindex; j++) { + vm_page_t mt = bp->b_pages[j]; + + PAGE_WAKEUP(mt); + if (mt->valid == 0) { + vm_page_free(mt); + } + } + if (vmio && (bp->b_flags & B_PDWANTED)) { + --nvmio; + bp->b_flags &= ~B_VMIO; + bp->b_flags |= B_INVAL; + brelse(bp); + VM_WAIT; + bufferdestroyed = 1; + } + s = splbio(); + if (m) { + m->flags |= PG_WANTED; + tsleep(m, PRIBIO, "pgtblk", 0); + } + splx(s); + if (bufferdestroyed) + return 0; + curbpnpages = bp->b_npages; + goto doretry; + } else { + int pb; + + if ((m->flags & PG_CACHE) && + (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_reserved) { + int j; + + for (j = bp->b_npages; j < pageindex; j++) { + vm_page_t mt = bp->b_pages[j]; + + PAGE_WAKEUP(mt); + if (mt->valid == 0) { + vm_page_free(mt); + } + } + VM_WAIT; + if (vmio && (bp->b_flags & B_PDWANTED)) { + --nvmio; + bp->b_flags &= ~B_VMIO; + bp->b_flags |= B_INVAL; + brelse(bp); + return 0; + } + curbpnpages = bp->b_npages; + goto doretry; + } + bytesinpage = tinc; + if (tinc > (newbsize - toff)) + bytesinpage = newbsize - toff; + if (!vm_page_is_valid(m, toff + off, bytesinpage)) { + bp->b_flags &= ~B_CACHE; + } + if ((m->flags & PG_ACTIVE) == 0) + vm_page_activate(m); + m->flags |= PG_BUSY; + } + bp->b_pages[pageindex] = m; + curbpnpages = pageindex + 1; + } + if (bsize >= PAGE_SIZE) { + for (i = bp->b_npages; i < curbpnpages; i++) { + m = bp->b_pages[i]; + if (m->valid == 0) { + bp->b_flags &= ~B_CACHE; + } + m->bmapped++; + PAGE_WAKEUP(m); + } + } else { + if (!vm_page_is_valid(bp->b_pages[0], off, bsize)) + bp->b_flags &= ~B_CACHE; + bp->b_pages[0]->bmapped++; + PAGE_WAKEUP(bp->b_pages[0]); + } + bp->b_npages = curbpnpages; + bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE; + pmap_qenter((vm_offset_t) bp->b_data, bp->b_pages, bp->b_npages); + bp->b_data += off % PAGE_SIZE; + } + } } - - /* adjust buffer cache's idea of memory allocated to buffer contents */ - freebufspace -= newbsize - bp->b_bufsize; - allocbufspace += newbsize - bp->b_bufsize; - bp->b_bufsize = newbsize; bp->b_bcount = size; + return 1; } /* * Wait for buffer I/O completion, returning error status. */ int -biowait(register struct buf *bp) +biowait(register struct buf * bp) { int s; s = splbio(); while ((bp->b_flags & B_DONE) == 0) - tsleep((caddr_t)bp, PRIBIO, "biowait", 0); - if((bp->b_flags & B_ERROR) || bp->b_error) { + tsleep((caddr_t) bp, PRIBIO, "biowait", 0); + if ((bp->b_flags & B_ERROR) || bp->b_error) { if ((bp->b_flags & B_INVAL) == 0) { bp->b_flags |= B_INVAL; bp->b_dev = NODEV; LIST_REMOVE(bp, b_hash); LIST_INSERT_HEAD(&invalhash, bp, b_hash); + wakeup((caddr_t) bp); } if (!bp->b_error) bp->b_error = EIO; @@ -591,16 +1270,18 @@ biowait(register struct buf *bp) * is not *a good idea*. */ void -biodone(register struct buf *bp) +biodone(register struct buf * bp) { int s; + s = splbio(); + if (bp->b_flags & B_DONE) + printf("biodone: buffer already done\n"); bp->b_flags |= B_DONE; - if ((bp->b_flags & B_READ) == 0) { + if ((bp->b_flags & B_READ) == 0) { vwakeup(bp); } - #ifdef BOUNCE_BUFFERS if (bp->b_flags & B_BOUNCE) vm_bounce_free(bp); @@ -609,21 +1290,85 @@ biodone(register struct buf *bp) /* call optional completion function if requested */ if (bp->b_flags & B_CALL) { bp->b_flags &= ~B_CALL; - (*bp->b_iodone)(bp); + (*bp->b_iodone) (bp); splx(s); return; } - -/* - * For asynchronous completions, release the buffer now. The brelse - * checks for B_WANTED and will do the wakeup there if necessary - - * so no need to do a wakeup here in the async case. - */ + if (bp->b_flags & B_VMIO) { + int i, resid; + vm_offset_t foff; + vm_page_t m; + vm_object_t obj; + int iosize; + struct vnode *vp = bp->b_vp; + + foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; + obj = (vm_object_t) vp->v_vmdata; + if (!obj) { + return; + } +#if defined(VFS_BIO_DEBUG) + if (obj->paging_in_progress < bp->b_npages) { + printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n", + obj->paging_in_progress, bp->b_npages); + } +#endif + iosize = bp->b_bufsize; + for (i = 0; i < bp->b_npages; i++) { + m = bp->b_pages[i]; + if (m == bogus_page) { + m = vm_page_lookup(obj, foff); + if (!m) { +#if defined(VFS_BIO_DEBUG) + printf("biodone: page disappeared\n"); +#endif + --obj->paging_in_progress; + continue; + } + bp->b_pages[i] = m; + pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); + } +#if defined(VFS_BIO_DEBUG) + if (trunc_page(foff) != m->offset) { + printf("biodone: foff(%d)/m->offset(%d) mismatch\n", foff, m->offset); + } +#endif + resid = (m->offset + PAGE_SIZE) - foff; + if (resid > iosize) + resid = iosize; + if (resid > 0) { + vm_page_set_valid(m, foff, resid); + vm_page_set_clean(m, foff, resid); + } + if (m->busy == 0) { + printf("biodone: page busy < 0, off: %d, foff: %d, resid: %d, index: %d\n", + m->offset, foff, resid, i); + printf(" iosize: %d, lblkno: %d\n", + bp->b_vp->v_mount->mnt_stat.f_iosize, bp->b_lblkno); + printf(" valid: 0x%x, dirty: 0x%x, mapped: %d\n", + m->valid, m->dirty, m->bmapped); + panic("biodone: page busy < 0\n"); + } + m->flags &= ~PG_FAKE; + --m->busy; + PAGE_WAKEUP(m); + --obj->paging_in_progress; + foff += resid; + iosize -= resid; + } + if (obj && obj->paging_in_progress == 0) + wakeup((caddr_t) obj); + } + /* + * For asynchronous completions, release the buffer now. The brelse + * checks for B_WANTED and will do the wakeup there if necessary - so + * no need to do a wakeup here in the async case. + */ if (bp->b_flags & B_ASYNC) { brelse(bp); } else { - bp->b_flags &= ~B_WANTED; + bp->b_flags &= ~(B_WANTED | B_PDWANTED); wakeup((caddr_t) bp); } splx(s); @@ -634,107 +1379,168 @@ count_lock_queue() { int count; struct buf *bp; - + count = 0; - for(bp = bufqueues[QUEUE_LOCKED].tqh_first; + for (bp = bufqueues[QUEUE_LOCKED].tqh_first; bp != NULL; bp = bp->b_freelist.tqe_next) count++; - return(count); + return (count); } int vfs_update_interval = 30; void -vfs_update() { +vfs_update() +{ (void) spl0(); - while(1) { - tsleep((caddr_t)&vfs_update_wakeup, PRIBIO, "update", - hz * vfs_update_interval); + while (1) { + tsleep((caddr_t) & vfs_update_wakeup, PRIBIO, "update", + hz * vfs_update_interval); vfs_update_wakeup = 0; sync(curproc, NULL, NULL); } } -#if 0 -#define MAXFREEBP 128 -#define LDFREE_BUSY 1 -#define LDFREE_WANT 2 -int loadfreeing; -struct buf *freebp[MAXFREEBP]; -#endif +void +vfs_unbusy_pages(struct buf * bp) +{ + int i; + + if (bp->b_flags & B_VMIO) { + struct vnode *vp = bp->b_vp; + vm_object_t obj = (vm_object_t) vp->v_vmdata; + vm_offset_t foff; + + foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; + + for (i = 0; i < bp->b_npages; i++) { + vm_page_t m = bp->b_pages[i]; + + if (m == bogus_page) { + m = vm_page_lookup(obj, foff); + if (!m) { + panic("vfs_unbusy_pages: page missing\n"); + } + bp->b_pages[i] = m; + pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); + } + --obj->paging_in_progress; + --m->busy; + PAGE_WAKEUP(m); + } + if (obj->paging_in_progress == 0) + wakeup((caddr_t) obj); + } +} + +void +vfs_busy_pages(struct buf * bp, int clear_modify) +{ + int i; + + if (bp->b_flags & B_VMIO) { + vm_object_t obj = (vm_object_t) bp->b_vp->v_vmdata; + vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; + int iocount = bp->b_bufsize; + + for (i = 0; i < bp->b_npages; i++) { + vm_page_t m = bp->b_pages[i]; + int resid = (m->offset + PAGE_SIZE) - foff; + + if (resid > iocount) + resid = iocount; + obj->paging_in_progress++; + m->busy++; + if (clear_modify) { + vm_page_test_dirty(m); + pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_READ); + } else if (bp->b_bcount >= PAGE_SIZE) { + if (m->valid && (bp->b_flags & B_CACHE) == 0) { + bp->b_pages[i] = bogus_page; + pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); + } + } + foff += resid; + iocount -= resid; + } + } +} + +void +vfs_dirty_pages(struct buf * bp) +{ + int i; + + if (bp->b_flags & B_VMIO) { + vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; + int iocount = bp->b_bufsize; + + for (i = 0; i < bp->b_npages; i++) { + vm_page_t m = bp->b_pages[i]; + int resid = (m->offset + PAGE_SIZE) - foff; + + if (resid > iocount) + resid = iocount; + if (resid > 0) { + vm_page_set_valid(m, foff, resid); + vm_page_set_dirty(m, foff, resid); + } + PAGE_WAKEUP(m); + foff += resid; + iocount -= resid; + } + } +} /* * these routines are not in the correct place (yet) * also they work *ONLY* for kernel_pmap!!! */ void -vm_hold_load_pages(vm_offset_t froma, vm_offset_t toa) { +vm_hold_load_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa) +{ vm_offset_t pg; vm_page_t p; vm_offset_t from = round_page(froma); vm_offset_t to = round_page(toa); - for(pg = from ; pg < to ; pg += PAGE_SIZE) { +tryagain0: + if ((curproc != pageproc) && ((cnt.v_free_count + cnt.v_cache_count) <= + cnt.v_free_reserved + (toa - froma) / PAGE_SIZE)) { + VM_WAIT; + goto tryagain0; + } + for (pg = from; pg < to; pg += PAGE_SIZE) { - tryagain: -#if 0 -/* - * don't allow buffer cache to cause VM paging - */ - if ( cnt.v_free_count < cnt.v_free_min) { - if( !loadfreeing ) { - int n=0; - struct buf *bp; - loadfreeing = LDFREE_BUSY; - while( (cnt.v_free_count <= cnt.v_free_min) && - (n < MAXFREEBP)) { - bp = geteblk(0); - if( bp) - freebp[n++] = bp; - else - break; - } - while(--n >= 0) { - brelse(freebp[n]); - } - if( loadfreeing & LDFREE_WANT) - wakeup((caddr_t) &loadfreeing); - loadfreeing = 0; - } else { - loadfreeing |= LDFREE_WANT; - tsleep(&loadfreeing, PRIBIO, "biofree", 0); - } - } -#endif - if ((curproc != pageproc) && (cnt.v_free_count <= - cnt.v_free_reserved + (toa-froma) / PAGE_SIZE)) { - VM_WAIT; - goto tryagain; - } +tryagain: - p = vm_page_alloc(kernel_object, pg - VM_MIN_KERNEL_ADDRESS); - if( !p) { + p = vm_page_alloc(kernel_object, pg - VM_MIN_KERNEL_ADDRESS, 0); + if (!p) { VM_WAIT; goto tryagain; } - vm_page_wire(p); - pmap_kenter( pg, VM_PAGE_TO_PHYS(p)); + pmap_kenter(pg, VM_PAGE_TO_PHYS(p)); + bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = p; + PAGE_WAKEUP(p); + bp->b_npages++; } } void -vm_hold_free_pages(vm_offset_t froma, vm_offset_t toa) +vm_hold_free_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa) { vm_offset_t pg; vm_page_t p; vm_offset_t from = round_page(froma); vm_offset_t to = round_page(toa); - - for(pg = from ; pg < to ; pg += PAGE_SIZE) { - p = PHYS_TO_VM_PAGE( pmap_kextract( pg)); - pmap_kremove( pg); + + for (pg = from; pg < to; pg += PAGE_SIZE) { + p = bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE]; + bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = 0; + pmap_kremove(pg); vm_page_free(p); + --bp->b_npages; } } @@ -742,4 +1548,3 @@ void bufstats() { } - diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index e58dfc178d29..bca74820a785 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -1,6 +1,8 @@ /*- * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. + * Modifications/enhancements: + * Copyright (c) 1995 John S. Dyson. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -31,7 +33,7 @@ * SUCH DAMAGE. * * @(#)vfs_cluster.c 8.7 (Berkeley) 2/13/94 - * $Id: vfs_cluster.c,v 1.6 1994/10/08 22:33:41 phk Exp $ + * $Id: vfs_cluster.c,v 1.7 1994/12/18 03:05:49 davidg Exp $ */ #include <sys/param.h> @@ -43,12 +45,15 @@ #include <sys/trace.h> #include <sys/malloc.h> #include <sys/resourcevar.h> +#include <sys/vmmeter.h> +#include <miscfs/specfs/specdev.h> #ifdef DEBUG #include <vm/vm.h> #include <sys/sysctl.h> int doreallocblks = 0; -struct ctldebug debug13 = { "doreallocblks", &doreallocblks }; +struct ctldebug debug13 = {"doreallocblks", &doreallocblks}; + #else /* XXX for cluster_write */ #define doreallocblks 0 @@ -57,14 +62,14 @@ struct ctldebug debug13 = { "doreallocblks", &doreallocblks }; /* * Local declarations */ -struct buf *cluster_newbuf __P((struct vnode *, struct buf *, long, daddr_t, - daddr_t, long, int)); struct buf *cluster_rbuild __P((struct vnode *, u_quad_t, struct buf *, - daddr_t, daddr_t, long, int, long)); -void cluster_wbuild __P((struct vnode *, struct buf *, long, - daddr_t, int, daddr_t)); + daddr_t, daddr_t, long, int, long)); +void cluster_wbuild __P((struct vnode *, struct buf *, long, daddr_t, int, daddr_t)); struct cluster_save *cluster_collectbufs __P((struct vnode *, struct buf *)); +int totreads; +int totreadblocks; + #ifdef DIAGNOSTIC /* * Set to 1 if reads of block zero should cause readahead to be done. @@ -78,7 +83,8 @@ struct cluster_save *cluster_collectbufs __P((struct vnode *, struct buf *)); * blocks from the cache. The former seems intuitive, but some quick tests * showed that the latter performed better from a system-wide point of view. */ -int doclusterraz = 0; + int doclusterraz = 0; + #define ISSEQREAD(vp, blk) \ (((blk) != 0 || doclusterraz) && \ ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr)) @@ -92,17 +98,6 @@ int doclusterraz = 0; * lastr is 0, we assume this is the first read and we'll read up to two * blocks if they are sequential. After that, we'll do regular read ahead * in clustered chunks. - * - * There are 4 or 5 cases depending on how you count: - * Desired block is in the cache: - * 1 Not sequential access (0 I/Os). - * 2 Access is sequential, do read-ahead (1 ASYNC). - * Desired block is not in cache: - * 3 Not sequential access (1 SYNC). - * 4 Sequential access, next block is contiguous (1 SYNC). - * 5 Sequential access, next block is not contiguous (1 SYNC, 1 ASYNC) - * - * There are potentially two buffers that require I/O. * bp is the block requested. * rbp is the read-ahead block. * If either is NULL, then you don't have to do the I/O. @@ -117,156 +112,136 @@ cluster_read(vp, filesize, lblkno, size, cred, bpp) struct buf **bpp; { struct buf *bp, *rbp; - daddr_t blkno, ioblkno; + daddr_t blkno, rablkno, origlblkno; long flags; int error, num_ra, alreadyincore; -#ifdef DIAGNOSTIC - if (size == 0) - panic("cluster_read: size = 0"); -#endif - + origlblkno = lblkno; error = 0; - flags = B_READ; + /* + * get the requested block + */ *bpp = bp = getblk(vp, lblkno, size, 0, 0); + /* + * if it is in the cache, then check to see if the reads have been + * sequential. If they have, then try some read-ahead, otherwise + * back-off on prospective read-aheads. + */ if (bp->b_flags & B_CACHE) { - /* - * Desired block is in cache; do any readahead ASYNC. - * Case 1, 2. - */ - trace(TR_BREADHIT, pack(vp, size), lblkno); - flags |= B_ASYNC; - ioblkno = lblkno + (vp->v_ralen ? vp->v_ralen : 1); - alreadyincore = (int)incore(vp, ioblkno); + int i; + + if (!ISSEQREAD(vp, origlblkno)) { + vp->v_ralen >>= 1; + return 0; + } bp = NULL; } else { - /* Block wasn't in cache, case 3, 4, 5. */ - trace(TR_BREADMISS, pack(vp, size), lblkno); + /* + * if it isn't in the cache, then get a chunk from disk if + * sequential, otherwise just get the block. + */ bp->b_flags |= B_READ; - ioblkno = lblkno; - alreadyincore = 0; - curproc->p_stats->p_ru.ru_inblock++; /* XXX */ + lblkno += 1; + curproc->p_stats->p_ru.ru_inblock++; /* XXX */ } /* - * XXX - * Replace 1 with a window size based on some permutation of - * maxcontig and rot_delay. This will let you figure out how - * many blocks you should read-ahead (case 2, 4, 5). - * - * If the access isn't sequential, reset the window to 1. - * Note that a read to the same block is considered sequential. - * This catches the case where the file is being read sequentially, - * but at smaller than the filesystem block size. + * if ralen is "none", then try a little */ - rbp = NULL; - if (!ISSEQREAD(vp, lblkno)) { - vp->v_ralen = 0; - vp->v_maxra = lblkno; - } else if ((ioblkno + 1) * size <= filesize && !alreadyincore && - !(error = VOP_BMAP(vp, ioblkno, NULL, &blkno, &num_ra)) && - blkno != -1) { + if (vp->v_ralen == 0) + vp->v_ralen = 1; + /* + * assume no read-ahead + */ + alreadyincore = 1; + rablkno = lblkno; + + /* + * if we have been doing sequential I/O, then do some read-ahead + */ + if (ISSEQREAD(vp, origlblkno)) { + int i; + /* - * Reading sequentially, and the next block is not in the - * cache. We are going to try reading ahead. + * this code makes sure that the stuff that we have read-ahead + * is still in the cache. If it isn't, we have been reading + * ahead too much, and we need to back-off, otherwise we might + * try to read more. */ - if (num_ra) { - /* - * If our desired readahead block had been read - * in a previous readahead but is no longer in - * core, then we may be reading ahead too far - * or are not using our readahead very rapidly. - * In this case we scale back the window. - */ - if (!alreadyincore && ioblkno <= vp->v_maxra) - vp->v_ralen = max(vp->v_ralen >> 1, 1); - /* - * There are more sequential blocks than our current - * window allows, scale up. Ideally we want to get - * in sync with the filesystem maxcontig value. - */ - else if (num_ra > vp->v_ralen && lblkno != vp->v_lastr) - vp->v_ralen = vp->v_ralen ? - min(num_ra, vp->v_ralen << 1) : 1; - - if (num_ra > vp->v_ralen) - num_ra = vp->v_ralen; + for (i = 0; i < vp->v_ralen; i++) { + rablkno = lblkno + i; + alreadyincore = (int) incore(vp, rablkno); + if (!alreadyincore) { + if (rablkno < vp->v_maxra) { + vp->v_maxra = rablkno; + vp->v_ralen >>= 1; + alreadyincore = 1; + } else { + if (inmem(vp, rablkno)) + continue; + if ((vp->v_ralen + 1) < MAXPHYS / size) + vp->v_ralen++; + } + break; + } } + } + /* + * we now build the read-ahead buffer if it is desirable. + */ + rbp = NULL; + if (!alreadyincore && + (rablkno + 1) * size <= filesize && + !(error = VOP_BMAP(vp, rablkno, NULL, &blkno, &num_ra)) && + blkno != -1) { + if (num_ra > vp->v_ralen) + num_ra = vp->v_ralen; - if (num_ra) /* case 2, 4 */ + if (num_ra && + ((cnt.v_free_count + cnt.v_cache_count) > cnt.v_free_reserved)) { rbp = cluster_rbuild(vp, filesize, - bp, ioblkno, blkno, size, num_ra, flags); - else if (ioblkno == lblkno) { - bp->b_blkno = blkno; - /* Case 5: check how many blocks to read ahead */ - ++ioblkno; - if ((ioblkno + 1) * size > filesize || - incore(vp, ioblkno) || (error = VOP_BMAP(vp, - ioblkno, NULL, &blkno, &num_ra)) || blkno == -1) - goto skip_readahead; - /* - * Adjust readahead as above - */ - if (num_ra) { - if (!alreadyincore && ioblkno <= vp->v_maxra) - vp->v_ralen = max(vp->v_ralen >> 1, 1); - else if (num_ra > vp->v_ralen && - lblkno != vp->v_lastr) - vp->v_ralen = vp->v_ralen ? - min(num_ra,vp->v_ralen<<1) : 1; - if (num_ra > vp->v_ralen) - num_ra = vp->v_ralen; - } - flags |= B_ASYNC; - if (num_ra) - rbp = cluster_rbuild(vp, filesize, - NULL, ioblkno, blkno, size, num_ra, flags); - else { - rbp = getblk(vp, ioblkno, size, 0, 0); - rbp->b_flags |= flags; - rbp->b_blkno = blkno; - } + NULL, rablkno, blkno, size, num_ra, B_READ | B_ASYNC); } else { - /* case 2; read ahead single block */ - rbp = getblk(vp, ioblkno, size, 0, 0); - rbp->b_flags |= flags; + rbp = getblk(vp, rablkno, size, 0, 0); + rbp->b_flags |= B_READ | B_ASYNC; rbp->b_blkno = blkno; } - - if (rbp == bp) /* case 4 */ - rbp = NULL; - else if (rbp) { /* case 2, 5 */ - trace(TR_BREADMISSRA, - pack(vp, (num_ra + 1) * size), ioblkno); - curproc->p_stats->p_ru.ru_inblock++; /* XXX */ - } } - - /* XXX Kirk, do we need to make sure the bp has creds? */ skip_readahead: - if (bp) + /* + * if the synchronous read is a cluster, handle it, otherwise do a + * simple, non-clustered read. + */ + if (bp) { if (bp->b_flags & (B_DONE | B_DELWRI)) panic("cluster_read: DONE bp"); - else + else { + vfs_busy_pages(bp, 0); error = VOP_STRATEGY(bp); - - if (rbp) - if (error || rbp->b_flags & (B_DONE | B_DELWRI)) { + vp->v_maxra = bp->b_lblkno + bp->b_bcount / size; + totreads++; + totreadblocks += bp->b_bcount / size; + curproc->p_stats->p_ru.ru_inblock++; + } + } + /* + * and if we have read-aheads, do them too + */ + if (rbp) { + if (error || (rbp->b_flags & B_CACHE)) { rbp->b_flags &= ~(B_ASYNC | B_READ); brelse(rbp); - } else + } else { + vfs_busy_pages(rbp, 0); (void) VOP_STRATEGY(rbp); - - /* - * Recalculate our maximum readahead - */ - if (rbp == NULL) - rbp = bp; - if (rbp) - vp->v_maxra = rbp->b_lblkno + (rbp->b_bufsize / size) - 1; - + vp->v_maxra = rbp->b_lblkno + rbp->b_bcount / size; + totreads++; + totreadblocks += rbp->b_bcount / size; + curproc->p_stats->p_ru.ru_inblock++; + } + } if (bp) - return(biowait(bp)); - return(error); + return (biowait(bp)); + return (error); } /* @@ -288,12 +263,12 @@ cluster_rbuild(vp, filesize, bp, lbn, blkno, size, run, flags) struct cluster_save *b_save; struct buf *tbp; daddr_t bn; - int i, inc; + int i, inc, j; #ifdef DIAGNOSTIC if (size != vp->v_mount->mnt_stat.f_iosize) panic("cluster_rbuild: size %d != filesize %d\n", - size, vp->v_mount->mnt_stat.f_iosize); + size, vp->v_mount->mnt_stat.f_iosize); #endif if (size * (lbn + run + 1) > filesize) --run; @@ -303,97 +278,65 @@ cluster_rbuild(vp, filesize, bp, lbn, blkno, size, run, flags) bp->b_blkno = blkno; bp->b_flags |= flags; } - return(bp); - } - - bp = cluster_newbuf(vp, bp, flags, blkno, lbn, size, run + 1); - if (bp->b_flags & (B_DONE | B_DELWRI)) return (bp); + } + tbp = bp; + if (!tbp) { + tbp = getblk(vp, lbn, size, 0, 0); + } + if (tbp->b_flags & B_CACHE) { + return (tbp); + } else if (bp == NULL) { + tbp->b_flags |= B_ASYNC; + } + bp = getpbuf(); + bp->b_flags = flags | B_CALL | B_BUSY | B_CLUSTER; + bp->b_iodone = cluster_callback; + bp->b_blkno = blkno; + bp->b_lblkno = lbn; + pbgetvp(vp, bp); b_save = malloc(sizeof(struct buf *) * (run + 1) + sizeof(struct cluster_save), M_SEGMENT, M_WAITOK); - b_save->bs_bufsize = b_save->bs_bcount = size; b_save->bs_nchildren = 0; - b_save->bs_children = (struct buf **)(b_save + 1); - b_save->bs_saveaddr = bp->b_saveaddr; - bp->b_saveaddr = (caddr_t) b_save; + b_save->bs_children = (struct buf **) (b_save + 1); + bp->b_saveaddr = b_save; + + bp->b_bcount = 0; + bp->b_bufsize = 0; + bp->b_npages = 0; + + if (tbp->b_flags & B_VMIO) + bp->b_flags |= B_VMIO; inc = btodb(size); - for (bn = blkno + inc, i = 1; i <= run; ++i, bn += inc) { - if (incore(vp, lbn + i)) { - if (i == 1) { - bp->b_saveaddr = b_save->bs_saveaddr; - bp->b_flags &= ~B_CALL; - bp->b_iodone = NULL; - allocbuf(bp, size); - free(b_save, M_SEGMENT); - } else - allocbuf(bp, size * i); - break; - } - tbp = getblk(vp, lbn + i, 0, 0, 0); - /* - * getblk may return some memory in the buffer if there were - * no empty buffers to shed it to. If there is currently - * memory in the buffer, we move it down size bytes to make - * room for the valid pages that cluster_callback will insert. - * We do this now so we don't have to do it at interrupt time - * in the callback routine. - */ - if (tbp->b_bufsize != 0) { - caddr_t bdata = (char *)tbp->b_data; - - if (tbp->b_bufsize + size > MAXBSIZE) - panic("cluster_rbuild: too much memory"); - if (tbp->b_bufsize > size) { - /* - * XXX if the source and destination regions - * overlap we have to copy backward to avoid - * clobbering any valid pages (i.e. pagemove - * implementations typically can't handle - * overlap). - */ - bdata += tbp->b_bufsize; - while (bdata > (char *)tbp->b_data) { - bdata -= CLBYTES; - pagemove(bdata, bdata + size, CLBYTES); - } - } else - pagemove(bdata, bdata + size, tbp->b_bufsize); + for (bn = blkno, i = 0; i <= run; ++i, bn += inc) { + if (i != 0) { + if (inmem(vp, lbn + i)) { + break; + } + tbp = getblk(vp, lbn + i, size, 0, 0); + if ((tbp->b_flags & B_CACHE) || + (tbp->b_flags & B_VMIO) != (bp->b_flags & B_VMIO)) { + brelse(tbp); + break; + } + tbp->b_blkno = bn; + tbp->b_flags |= flags | B_READ | B_ASYNC; + } else { + tbp->b_flags |= flags | B_READ; } - tbp->b_blkno = bn; - tbp->b_flags |= flags | B_READ | B_ASYNC; ++b_save->bs_nchildren; - b_save->bs_children[i - 1] = tbp; - } - return(bp); -} - -/* - * Either get a new buffer or grow the existing one. - */ -struct buf * -cluster_newbuf(vp, bp, flags, blkno, lblkno, size, run) - struct vnode *vp; - struct buf *bp; - long flags; - daddr_t blkno; - daddr_t lblkno; - long size; - int run; -{ - if (!bp) { - bp = getblk(vp, lblkno, size, 0, 0); - if (bp->b_flags & (B_DONE | B_DELWRI)) { - bp->b_blkno = blkno; - return(bp); + b_save->bs_children[i] = tbp; + for (j = 0; j < tbp->b_npages; j += 1) { + bp->b_pages[j + bp->b_npages] = tbp->b_pages[j]; } + bp->b_npages += tbp->b_npages; + bp->b_bcount += size; + bp->b_bufsize += size; } - allocbuf(bp, run * size); - bp->b_blkno = blkno; - bp->b_iodone = cluster_callback; - bp->b_flags |= flags | B_CALL; - return(bp); + pmap_qenter(bp->b_data, bp->b_pages, bp->b_npages); + return (bp); } /* @@ -408,7 +351,6 @@ cluster_callback(bp) { struct cluster_save *b_save; struct buf **bpp, *tbp; - long bsize; caddr_t cp; int error = 0; @@ -418,46 +360,22 @@ cluster_callback(bp) if (bp->b_flags & B_ERROR) error = bp->b_error; - b_save = (struct cluster_save *)(bp->b_saveaddr); - bp->b_saveaddr = b_save->bs_saveaddr; - - bsize = b_save->bs_bufsize; - cp = (char *)bp->b_data + bsize; + b_save = (struct cluster_save *) (bp->b_saveaddr); + pmap_qremove(bp->b_data, bp->b_npages); /* * Move memory from the large cluster buffer into the component * buffers and mark IO as done on these. */ for (bpp = b_save->bs_children; b_save->bs_nchildren--; ++bpp) { tbp = *bpp; - pagemove(cp, tbp->b_data, bsize); - tbp->b_bufsize += bsize; - tbp->b_bcount = bsize; if (error) { tbp->b_flags |= B_ERROR; tbp->b_error = error; } biodone(tbp); - bp->b_bufsize -= bsize; - cp += bsize; - } - /* - * If there was excess memory in the cluster buffer, - * slide it up adjacent to the remaining valid data. - */ - if (bp->b_bufsize != bsize) { - if (bp->b_bufsize < bsize) - panic("cluster_callback: too little memory"); - pagemove(cp, (char *)bp->b_data + bsize, bp->b_bufsize - bsize); } - bp->b_bcount = bsize; - bp->b_iodone = NULL; free(b_save, M_SEGMENT); - if (bp->b_flags & B_ASYNC) - brelse(bp); - else { - bp->b_flags &= ~B_WANTED; - wakeup((caddr_t)bp); - } + relpbuf(bp); } /* @@ -472,78 +390,47 @@ cluster_callback(bp) */ void cluster_write(bp, filesize) - struct buf *bp; + struct buf *bp; u_quad_t filesize; { - struct vnode *vp; - daddr_t lbn; - int maxclen, cursize; + struct vnode *vp; + daddr_t lbn; + int maxclen, cursize; + int lblocksize; - vp = bp->b_vp; - lbn = bp->b_lblkno; + vp = bp->b_vp; + lblocksize = vp->v_mount->mnt_stat.f_iosize; + lbn = bp->b_lblkno; /* Initialize vnode to beginning of file. */ if (lbn == 0) vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; - if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 || - (bp->b_blkno != vp->v_lasta + btodb(bp->b_bcount))) { - maxclen = MAXBSIZE / vp->v_mount->mnt_stat.f_iosize - 1; + if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 || + (bp->b_blkno != vp->v_lasta + btodb(lblocksize))) { + maxclen = MAXPHYS / lblocksize; if (vp->v_clen != 0) { /* * Next block is not sequential. - * + * * If we are not writing at end of file, the process - * seeked to another point in the file since its - * last write, or we have reached our maximum - * cluster size, then push the previous cluster. - * Otherwise try reallocating to make it sequential. + * seeked to another point in the file since its last + * write, or we have reached our maximum cluster size, + * then push the previous cluster. Otherwise try + * reallocating to make it sequential. */ cursize = vp->v_lastw - vp->v_cstart + 1; - if (!doreallocblks || - (lbn + 1) * bp->b_bcount != filesize || - lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) { - cluster_wbuild(vp, NULL, bp->b_bcount, - vp->v_cstart, cursize, lbn); - } else { - struct buf **bpp, **endbp; - struct cluster_save *buflist; - - buflist = cluster_collectbufs(vp, bp); - endbp = &buflist->bs_children - [buflist->bs_nchildren - 1]; - if (VOP_REALLOCBLKS(vp, buflist)) { - /* - * Failed, push the previous cluster. - */ - for (bpp = buflist->bs_children; - bpp < endbp; bpp++) - brelse(*bpp); - free(buflist, M_SEGMENT); - cluster_wbuild(vp, NULL, bp->b_bcount, - vp->v_cstart, cursize, lbn); - } else { - /* - * Succeeded, keep building cluster. - */ - for (bpp = buflist->bs_children; - bpp <= endbp; bpp++) - bdwrite(*bpp); - free(buflist, M_SEGMENT); - vp->v_lastw = lbn; - vp->v_lasta = bp->b_blkno; - return; - } - } + cluster_wbuild(vp, NULL, lblocksize, + vp->v_cstart, cursize, lbn); } /* - * Consider beginning a cluster. - * If at end of file, make cluster as large as possible, - * otherwise find size of existing cluster. + * Consider beginning a cluster. If at end of file, make + * cluster as large as possible, otherwise find size of + * existing cluster. */ - if ((lbn + 1) * bp->b_bcount != filesize && + if ((lbn + 1) * lblocksize != filesize && (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen) || - bp->b_blkno == -1)) { + bp->b_blkno == -1)) { bawrite(bp); vp->v_clen = 0; vp->v_lasta = bp->b_blkno; @@ -551,13 +438,13 @@ cluster_write(bp, filesize) vp->v_lastw = lbn; return; } - vp->v_clen = maxclen; - if (maxclen == 0) { /* I/O not contiguous */ + vp->v_clen = maxclen; + if (maxclen == 0) { /* I/O not contiguous */ vp->v_cstart = lbn + 1; - bawrite(bp); - } else { /* Wait for rest of cluster */ + bawrite(bp); + } else { /* Wait for rest of cluster */ vp->v_cstart = lbn; - bdwrite(bp); + bdwrite(bp); } } else if (lbn == vp->v_cstart + vp->v_clen) { /* @@ -569,8 +456,7 @@ cluster_write(bp, filesize) vp->v_cstart = lbn + 1; } else /* - * In the middle of a cluster, so just delay the - * I/O for now. + * In the middle of a cluster, so just delay the I/O for now. */ bdwrite(bp); vp->v_lastw = lbn; @@ -591,17 +477,17 @@ cluster_wbuild(vp, last_bp, size, start_lbn, len, lbn) long size; daddr_t start_lbn; int len; - daddr_t lbn; + daddr_t lbn; { struct cluster_save *b_save; struct buf *bp, *tbp; - caddr_t cp; - int i, s; + caddr_t cp; + int i, j, s; #ifdef DIAGNOSTIC if (size != vp->v_mount->mnt_stat.f_iosize) panic("cluster_wbuild: size %d != filesize %d\n", - size, vp->v_mount->mnt_stat.f_iosize); + size, vp->v_mount->mnt_stat.f_iosize); #endif redo: while ((!incore(vp, start_lbn) || start_lbn == lbn) && len) { @@ -619,104 +505,95 @@ redo: } return; } - - bp = getblk(vp, start_lbn, size, 0, 0); - if (!(bp->b_flags & B_DELWRI)) { + tbp = getblk(vp, start_lbn, size, 0, 0); + if (!(tbp->b_flags & B_DELWRI)) { ++start_lbn; --len; - brelse(bp); + brelse(tbp); goto redo; } - /* - * Extra memory in the buffer, punt on this buffer. - * XXX we could handle this in most cases, but we would have to - * push the extra memory down to after our max possible cluster - * size and then potentially pull it back up if the cluster was - * terminated prematurely--too much hassle. + * Extra memory in the buffer, punt on this buffer. XXX we could + * handle this in most cases, but we would have to push the extra + * memory down to after our max possible cluster size and then + * potentially pull it back up if the cluster was terminated + * prematurely--too much hassle. */ - if (bp->b_bcount != bp->b_bufsize) { + if (tbp->b_bcount != tbp->b_bufsize) { ++start_lbn; --len; - bawrite(bp); + bawrite(tbp); goto redo; } - - --len; - b_save = malloc(sizeof(struct buf *) * len + sizeof(struct cluster_save), + bp = getpbuf(); + b_save = malloc(sizeof(struct buf *) * (len + 1) + sizeof(struct cluster_save), M_SEGMENT, M_WAITOK); - b_save->bs_bcount = bp->b_bcount; - b_save->bs_bufsize = bp->b_bufsize; b_save->bs_nchildren = 0; - b_save->bs_children = (struct buf **)(b_save + 1); - b_save->bs_saveaddr = bp->b_saveaddr; - bp->b_saveaddr = (caddr_t) b_save; - - bp->b_flags |= B_CALL; + b_save->bs_children = (struct buf **) (b_save + 1); + bp->b_saveaddr = b_save; + bp->b_bcount = 0; + bp->b_bufsize = 0; + bp->b_npages = 0; + + if (tbp->b_flags & B_VMIO) + bp->b_flags |= B_VMIO; + + bp->b_blkno = tbp->b_blkno; + bp->b_lblkno = tbp->b_lblkno; + bp->b_flags |= B_CALL | B_BUSY | B_CLUSTER; bp->b_iodone = cluster_callback; - cp = (char *)bp->b_data + size; - for (++start_lbn, i = 0; i < len; ++i, ++start_lbn) { - /* - * Block is not in core or the non-sequential block - * ending our cluster was part of the cluster (in which - * case we don't want to write it twice). - */ - if (!incore(vp, start_lbn) || - (last_bp == NULL && start_lbn == lbn)) - break; + pbgetvp(vp, bp); - /* - * Get the desired block buffer (unless it is the final - * sequential block whose buffer was passed in explictly - * as last_bp). - */ - if (last_bp == NULL || start_lbn != lbn) { - tbp = getblk(vp, start_lbn, size, 0, 0); - if (!(tbp->b_flags & B_DELWRI)) { - brelse(tbp); + for (i = 0; i < len; ++i, ++start_lbn) { + if (i != 0) { + /* + * Block is not in core or the non-sequential block + * ending our cluster was part of the cluster (in + * which case we don't want to write it twice). + */ + if (!(tbp = incore(vp, start_lbn)) || + (last_bp == NULL && start_lbn == lbn)) break; - } - } else - tbp = last_bp; - ++b_save->bs_nchildren; + if ((tbp->b_flags & (B_INVAL | B_BUSY | B_CLUSTEROK)) != B_CLUSTEROK) + break; - /* Move memory from children to parent */ - if (tbp->b_blkno != (bp->b_blkno + btodb(bp->b_bufsize))) { - printf("Clustered Block: %lu addr %lx bufsize: %ld\n", - (u_long)bp->b_lblkno, bp->b_blkno, bp->b_bufsize); - printf("Child Block: %lu addr: %lx\n", - (u_long)tbp->b_lblkno, tbp->b_blkno); - panic("Clustered write to wrong blocks"); + /* + * Get the desired block buffer (unless it is the + * final sequential block whose buffer was passed in + * explictly as last_bp). + */ + if (last_bp == NULL || start_lbn != lbn) { + tbp = getblk(vp, start_lbn, size, 0, 0); + if (!(tbp->b_flags & B_DELWRI) || + ((tbp->b_flags & B_VMIO) != (bp->b_flags & B_VMIO))) { + brelse(tbp); + break; + } + } else + tbp = last_bp; } - - pagemove(tbp->b_data, cp, size); + for (j = 0; j < tbp->b_npages; j += 1) { + bp->b_pages[j + bp->b_npages] = tbp->b_pages[j]; + } + bp->b_npages += tbp->b_npages; bp->b_bcount += size; bp->b_bufsize += size; - tbp->b_bufsize -= size; tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); tbp->b_flags |= B_ASYNC; s = splbio(); - reassignbuf(tbp, tbp->b_vp); /* put on clean list */ + reassignbuf(tbp, tbp->b_vp); /* put on clean list */ ++tbp->b_vp->v_numoutput; splx(s); b_save->bs_children[i] = tbp; - - cp += size; - } - - if (i == 0) { - /* None to cluster */ - bp->b_saveaddr = b_save->bs_saveaddr; - bp->b_flags &= ~B_CALL; - bp->b_iodone = NULL; - free(b_save, M_SEGMENT); } + b_save->bs_nchildren = i; + pmap_qenter(bp->b_data, bp->b_pages, bp->b_npages); bawrite(bp); + if (i < len) { - len -= i + 1; - start_lbn += 1; + len -= i; goto redo; } } @@ -731,17 +608,17 @@ cluster_collectbufs(vp, last_bp) struct buf *last_bp; { struct cluster_save *buflist; - daddr_t lbn; + daddr_t lbn; int i, len; len = vp->v_lastw - vp->v_cstart + 1; buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist), M_SEGMENT, M_WAITOK); buflist->bs_nchildren = 0; - buflist->bs_children = (struct buf **)(buflist + 1); + buflist->bs_children = (struct buf **) (buflist + 1); for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++) - (void)bread(vp, lbn, last_bp->b_bcount, NOCRED, - &buflist->bs_children[i]); + (void) bread(vp, lbn, last_bp->b_bcount, NOCRED, + &buflist->bs_children[i]); buflist->bs_children[i] = last_bp; buflist->bs_nchildren = i + 1; return (buflist); diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c index b749976c9249..59f6232bfea5 100644 --- a/sys/kern/vfs_export.c +++ b/sys/kern/vfs_export.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 - * $Id: vfs_subr.c,v 1.12 1994/10/06 21:06:37 davidg Exp $ + * $Id: vfs_subr.c,v 1.13 1994/12/23 04:52:55 davidg Exp $ */ /* @@ -63,13 +63,13 @@ #include <miscfs/specfs/specdev.h> -void insmntque __P((struct vnode *, struct mount *)); +void insmntque __P((struct vnode *, struct mount *)); enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, }; -int vttoif_tab[9] = { +int vttoif_tab[9] = { 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFSOCK, S_IFIFO, S_IFMT, }; @@ -84,7 +84,9 @@ int vttoif_tab[9] = { } TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ -struct mntlist mountlist; /* mounted filesystem list */ +struct mntlist mountlist; /* mounted filesystem list */ + +int desiredvnodes; /* * Initialize the vnode management data structures. @@ -92,6 +94,9 @@ struct mntlist mountlist; /* mounted filesystem list */ void vntblinit() { + extern int vm_object_cache_max; + + desiredvnodes = maxproc + vm_object_cache_max; TAILQ_INIT(&vnode_free_list); TAILQ_INIT(&mountlist); @@ -106,9 +111,9 @@ vfs_lock(mp) register struct mount *mp; { - while(mp->mnt_flag & MNT_MLOCK) { + while (mp->mnt_flag & MNT_MLOCK) { mp->mnt_flag |= MNT_MWAIT; - (void) tsleep((caddr_t)mp, PVFS, "vfslck", 0); + (void) tsleep((caddr_t) mp, PVFS, "vfslck", 0); } mp->mnt_flag |= MNT_MLOCK; return (0); @@ -128,7 +133,7 @@ vfs_unlock(mp) mp->mnt_flag &= ~MNT_MLOCK; if (mp->mnt_flag & MNT_MWAIT) { mp->mnt_flag &= ~MNT_MWAIT; - wakeup((caddr_t)mp); + wakeup((caddr_t) mp); } } @@ -141,9 +146,9 @@ vfs_busy(mp) register struct mount *mp; { - while(mp->mnt_flag & MNT_MPBUSY) { + while (mp->mnt_flag & MNT_MPBUSY) { mp->mnt_flag |= MNT_MPWANT; - (void) tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbsy", 0); + (void) tsleep((caddr_t) & mp->mnt_flag, PVFS, "vfsbsy", 0); } if (mp->mnt_flag & MNT_UNMOUNT) return (1); @@ -165,7 +170,7 @@ vfs_unbusy(mp) mp->mnt_flag &= ~MNT_MPBUSY; if (mp->mnt_flag & MNT_MPWANT) { mp->mnt_flag &= ~MNT_MPWANT; - wakeup((caddr_t)&mp->mnt_flag); + wakeup((caddr_t) & mp->mnt_flag); } } @@ -173,20 +178,18 @@ void vfs_unmountroot(rootfs) struct mount *rootfs; { - struct mount *mp = rootfs; - int error; + struct mount *mp = rootfs; + int error; if (vfs_busy(mp)) { printf("failed to unmount root\n"); return; } - mp->mnt_flag |= MNT_UNMOUNT; if ((error = vfs_lock(mp))) { printf("lock of root filesystem failed (%d)\n", error); return; } - vnode_pager_umount(mp); /* release cached vnodes */ cache_purgevfs(mp); /* remove cache entries for this file sys */ @@ -200,7 +203,6 @@ vfs_unmountroot(rootfs) else printf("%d)\n", error); } - mp->mnt_flag &= ~MNT_UNMOUNT; vfs_unbusy(mp); } @@ -222,7 +224,6 @@ vfs_unmountall() rootfs = mp; continue; } - error = dounmount(mp, MNT_FORCE, initproc); if (error) { printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); @@ -255,7 +256,7 @@ getvfs(fsid) mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) return (mp); } - return ((struct mount *)0); + return ((struct mount *) 0); } /* @@ -266,7 +267,7 @@ getnewfsid(mp, mtype) struct mount *mp; int mtype; { -static u_short xxxfs_mntid; + static u_short xxxfs_mntid; fsid_t tfsid; @@ -297,19 +298,19 @@ vattr_null(vap) vap->va_size = VNOVAL; vap->va_bytes = VNOVAL; vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = - vap->va_fsid = vap->va_fileid = - vap->va_blocksize = vap->va_rdev = - vap->va_atime.ts_sec = vap->va_atime.ts_nsec = - vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = - vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = - vap->va_flags = vap->va_gen = VNOVAL; + vap->va_fsid = vap->va_fileid = + vap->va_blocksize = vap->va_rdev = + vap->va_atime.ts_sec = vap->va_atime.ts_nsec = + vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = + vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = + vap->va_flags = vap->va_gen = VNOVAL; vap->va_vaflags = 0; } /* * Routines having to do with the management of the vnode table. */ -extern int (**dead_vnodeop_p)(); +extern int (**dead_vnodeop_p) (); extern void vclean(); long numvnodes; @@ -320,17 +321,16 @@ int getnewvnode(tag, mp, vops, vpp) enum vtagtype tag; struct mount *mp; - int (**vops)(); + int (**vops) (); struct vnode **vpp; { register struct vnode *vp; - if ((vnode_free_list.tqh_first == NULL && - numvnodes < 2 * desiredvnodes) || + if (vnode_free_list.tqh_first == NULL || numvnodes < desiredvnodes) { - vp = (struct vnode *)malloc((u_long)sizeof *vp, + vp = (struct vnode *) malloc((u_long) sizeof *vp, M_VNODE, M_WAITOK); - bzero((char *)vp, sizeof *vp); + bzero((char *) vp, sizeof *vp); numvnodes++; } else { if ((vp = vnode_free_list.tqh_first) == NULL) { @@ -340,21 +340,23 @@ getnewvnode(tag, mp, vops, vpp) } if (vp->v_usecount) panic("free vnode isn't"); + TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); /* see comment on why 0xdeadb is set at end of vgone (below) */ - vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; + vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb; vp->v_lease = NULL; if (vp->v_type != VBAD) vgone(vp); #ifdef DIAGNOSTIC { - int s; - if (vp->v_data) - panic("cleaned vnode isn't"); - s = splbio(); - if (vp->v_numoutput) - panic("Clean vnode has pending I/O's"); - splx(s); + int s; + + if (vp->v_data) + panic("cleaned vnode isn't"); + s = splbio(); + if (vp->v_numoutput) + panic("Clean vnode has pending I/O's"); + splx(s); } #endif vp->v_flag = 0; @@ -366,7 +368,7 @@ getnewvnode(tag, mp, vops, vpp) vp->v_cstart = 0; vp->v_clen = 0; vp->v_socket = 0; - vp->v_writecount = 0; /* XXX */ + vp->v_writecount = 0; /* XXX */ } vp->v_type = VNON; cache_purge(vp); @@ -415,11 +417,9 @@ vwakeup(bp) vp->v_numoutput--; if (vp->v_numoutput < 0) panic("vwakeup: neg numoutput"); - if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { - if (vp->v_numoutput < 0) - panic("vwakeup: neg numoutput"); + if (vp->v_flag & VBWAIT) { vp->v_flag &= ~VBWAIT; - wakeup((caddr_t)&vp->v_numoutput); + wakeup((caddr_t) & vp->v_numoutput); } } } @@ -452,7 +452,7 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) while (blist && blist->b_lblkno < 0) blist = blist->b_vnbufs.le_next; - if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && + if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && (flags & V_SAVEMETA)) while (blist && blist->b_lblkno < 0) blist = blist->b_vnbufs.le_next; @@ -466,9 +466,9 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) s = splbio(); if (bp->b_flags & B_BUSY) { bp->b_flags |= B_WANTED; - error = tsleep((caddr_t)bp, - slpflag | (PRIBIO + 1), "vinvalbuf", - slptimeo); + error = tsleep((caddr_t) bp, + slpflag | (PRIBIO + 1), "vinvalbuf", + slptimeo); splx(s); if (error) return (error); @@ -478,9 +478,10 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) bp->b_flags |= B_BUSY; splx(s); /* - * XXX Since there are no node locks for NFS, I believe - * there is a slight chance that a delayed write will - * occur while sleeping just above, so check for it. + * XXX Since there are no node locks for NFS, I + * believe there is a slight chance that a delayed + * write will occur while sleeping just above, so + * check for it. */ if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { (void) VOP_BWRITE(bp); @@ -491,9 +492,17 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) } } + + s = splbio(); + while (vp->v_numoutput > 0) { + vp->v_flag |= VBWAIT; + tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); + } + splx(s); + pager = NULL; - object = (vm_object_t)vp->v_vmdata; - if( object != NULL) + object = (vm_object_t) vp->v_vmdata; + if (object != NULL) pager = object->pager; if (pager != NULL) { object = vm_object_lookup(pager); @@ -506,7 +515,6 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) vm_object_deallocate(object); } } - if (!(flags & V_SAVEMETA) && (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) panic("vinvalbuf: flush failed"); @@ -565,6 +573,41 @@ brelvp(bp) } /* + * Associate a p-buffer with a vnode. + */ +void +pbgetvp(vp, bp) + register struct vnode *vp; + register struct buf *bp; +{ + if (bp->b_vp) + panic("pbgetvp: not free"); + VHOLD(vp); + bp->b_vp = vp; + if (vp->v_type == VBLK || vp->v_type == VCHR) + bp->b_dev = vp->v_rdev; + else + bp->b_dev = NODEV; +} + +/* + * Disassociate a p-buffer from a vnode. + */ +void +pbrelvp(bp) + register struct buf *bp; +{ + struct vnode *vp; + + if (bp->b_vp == (struct vnode *) 0) + panic("brelvp: NULL"); + + vp = bp->b_vp; + bp->b_vp = (struct vnode *) 0; + HOLDRELE(vp); +} + +/* * Reassign a buffer from one vnode to another. * Used to assign file specific control information * (indirect blocks) to the vnode to which they belong. @@ -586,14 +629,25 @@ reassignbuf(bp, newvp) if (bp->b_vnbufs.le_next != NOLIST) bufremvn(bp); /* - * If dirty, put on list of dirty buffers; - * otherwise insert onto list of clean buffers. + * If dirty, put on list of dirty buffers; otherwise insert onto list + * of clean buffers. */ - if (bp->b_flags & B_DELWRI) - listheadp = &newvp->v_dirtyblkhd; - else + if (bp->b_flags & B_DELWRI) { + struct buf *tbp; + + tbp = newvp->v_dirtyblkhd.lh_first; + if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) { + bufinsvn(bp, &newvp->v_dirtyblkhd); + } else { + while (tbp->b_vnbufs.le_next && (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { + tbp = tbp->b_vnbufs.le_next; + } + LIST_INSERT_AFTER(tbp, bp, b_vnbufs); + } + } else { listheadp = &newvp->v_cleanblkhd; - bufinsvn(bp, listheadp); + bufinsvn(bp, listheadp); + } } /* @@ -612,14 +666,14 @@ bdevvp(dev, vpp) if (dev == NODEV) return (0); - error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); + error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp); if (error) { *vpp = 0; return (error); } vp = nvp; vp->v_type = VBLK; - if ((nvp = checkalias(vp, dev, (struct mount *)0))) { + if ((nvp = checkalias(vp, dev, (struct mount *) 0))) { vput(vp); vp = nvp; } @@ -665,7 +719,7 @@ loop: } if (vp == NULL || vp->v_tag != VT_NON) { MALLOC(nvp->v_specinfo, struct specinfo *, - sizeof(struct specinfo), M_VNODE, M_WAITOK); + sizeof(struct specinfo), M_VNODE, M_WAITOK); nvp->v_rdev = nvp_rdev; nvp->v_hashchain = vpp; nvp->v_specnext = *vpp; @@ -702,20 +756,19 @@ vget(vp, lockflag) { /* - * If the vnode is in the process of being cleaned out for - * another use, we wait for the cleaning to finish and then - * return failure. Cleaning is determined either by checking - * that the VXLOCK flag is set, or that the use count is - * zero with the back pointer set to show that it has been - * removed from the free list by getnewvnode. The VXLOCK - * flag may not have been set yet because vclean is blocked in - * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. + * If the vnode is in the process of being cleaned out for another + * use, we wait for the cleaning to finish and then return failure. + * Cleaning is determined either by checking that the VXLOCK flag is + * set, or that the use count is zero with the back pointer set to + * show that it has been removed from the free list by getnewvnode. + * The VXLOCK flag may not have been set yet because vclean is blocked + * in the VOP_LOCK call waiting for the VOP_INACTIVE to complete. */ if ((vp->v_flag & VXLOCK) || (vp->v_usecount == 0 && - vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { + vp->v_freelist.tqe_prev == (struct vnode **) 0xdeadb)) { vp->v_flag |= VXWANT; - (void) tsleep((caddr_t)vp, PINOD, "vget", 0); + (void) tsleep((caddr_t) vp, PINOD, "vget", 0); return (1); } if (vp->v_usecount == 0) @@ -768,7 +821,7 @@ vrele(vp) if (vp->v_usecount > 0) return; #ifdef DIAGNOSTIC - if (vp->v_usecount != 0 /* || vp->v_writecount != 0 */) { + if (vp->v_usecount != 0 /* || vp->v_writecount != 0 */ ) { vprint("vrele: bad ref count", vp); panic("vrele: ref cnt"); } @@ -813,8 +866,9 @@ holdrele(vp) * that are found. */ #ifdef DIAGNOSTIC -int busyprt = 0; /* print out busy vnodes */ -struct ctldebug debug1 = { "busyprt", &busyprt }; +int busyprt = 0; /* print out busy vnodes */ +struct ctldebug debug1 = {"busyprt", &busyprt}; + #endif int @@ -844,24 +898,24 @@ loop: if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) continue; /* - * If WRITECLOSE is set, only flush out regular file - * vnodes open for writing. + * If WRITECLOSE is set, only flush out regular file vnodes + * open for writing. */ if ((flags & WRITECLOSE) && (vp->v_writecount == 0 || vp->v_type != VREG)) continue; /* - * With v_usecount == 0, all we need to do is clear - * out the vnode data structures and we are done. + * With v_usecount == 0, all we need to do is clear out the + * vnode data structures and we are done. */ if (vp->v_usecount == 0) { vgone(vp); continue; } /* - * If FORCECLOSE is set, forcibly close the vnode. - * For block or character devices, revert to an - * anonymous device. For all other files, just kill them. + * If FORCECLOSE is set, forcibly close the vnode. For block + * or character devices, revert to an anonymous device. For + * all other files, just kill them. */ if (flags & FORCECLOSE) { if (vp->v_type != VBLK && vp->v_type != VCHR) { @@ -869,7 +923,7 @@ loop: } else { vclean(vp, 0); vp->v_op = spec_vnodeop_p; - insmntque(vp, (struct mount *)0); + insmntque(vp, (struct mount *) 0); } continue; } @@ -895,24 +949,23 @@ vclean(vp, flags) int active; /* - * Check to see if the vnode is in use. - * If so we have to reference it before we clean it out - * so that its count cannot fall to zero and generate a - * race against ourselves to recycle it. + * Check to see if the vnode is in use. If so we have to reference it + * before we clean it out so that its count cannot fall to zero and + * generate a race against ourselves to recycle it. */ if ((active = vp->v_usecount)) VREF(vp); /* - * Even if the count is zero, the VOP_INACTIVE routine may still - * have the object locked while it cleans it out. The VOP_LOCK - * ensures that the VOP_INACTIVE routine is done with its work. - * For active vnodes, it ensures that no other activity can - * occur while the underlying object is being cleaned out. + * Even if the count is zero, the VOP_INACTIVE routine may still have + * the object locked while it cleans it out. The VOP_LOCK ensures that + * the VOP_INACTIVE routine is done with its work. For active vnodes, + * it ensures that no other activity can occur while the underlying + * object is being cleaned out. */ VOP_LOCK(vp); /* - * Prevent the vnode from being recycled or - * brought into use while we clean it out. + * Prevent the vnode from being recycled or brought into use while we + * clean it out. */ if (vp->v_flag & VXLOCK) panic("vclean: deadlock"); @@ -923,13 +976,13 @@ vclean(vp, flags) if (flags & DOCLOSE) vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); /* - * Any other processes trying to obtain this lock must first - * wait for VXLOCK to clear, then call the new lock operation. + * Any other processes trying to obtain this lock must first wait for + * VXLOCK to clear, then call the new lock operation. */ VOP_UNLOCK(vp); /* - * If purging an active vnode, it must be closed and - * deactivated before being reclaimed. + * If purging an active vnode, it must be closed and deactivated + * before being reclaimed. */ if (active) { if (flags & DOCLOSE) @@ -952,7 +1005,7 @@ vclean(vp, flags) vp->v_flag &= ~VXLOCK; if (vp->v_flag & VXWANT) { vp->v_flag &= ~VXWANT; - wakeup((caddr_t)vp); + wakeup((caddr_t) vp); } } @@ -968,17 +1021,17 @@ vgoneall(vp) if (vp->v_flag & VALIASED) { /* - * If a vgone (or vclean) is already in progress, - * wait until it is done and return. + * If a vgone (or vclean) is already in progress, wait until + * it is done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; - (void) tsleep((caddr_t)vp, PINOD, "vgall", 0); + (void) tsleep((caddr_t) vp, PINOD, "vgall", 0); return; } /* - * Ensure that vp will not be vgone'd while we - * are eliminating its aliases. + * Ensure that vp will not be vgone'd while we are eliminating + * its aliases. */ vp->v_flag |= VXLOCK; while (vp->v_flag & VALIASED) { @@ -991,9 +1044,8 @@ vgoneall(vp) } } /* - * Remove the lock so that vgone below will - * really eliminate the vnode after which time - * vgone will awaken any sleepers. + * Remove the lock so that vgone below will really eliminate + * the vnode after which time vgone will awaken any sleepers. */ vp->v_flag &= ~VXLOCK; } @@ -1012,12 +1064,12 @@ vgone(vp) struct vnode *vx; /* - * If a vgone (or vclean) is already in progress, - * wait until it is done and return. + * If a vgone (or vclean) is already in progress, wait until it is + * done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; - (void) tsleep((caddr_t)vp, PINOD, "vgone", 0); + (void) tsleep((caddr_t) vp, PINOD, "vgone", 0); return; } /* @@ -1067,20 +1119,18 @@ vgone(vp) vp->v_specinfo = NULL; } /* - * If it is on the freelist and not already at the head, - * move it to the head of the list. The test of the back - * pointer and the reference count of zero is because - * it will be removed from the free list by getnewvnode, - * but will not have its reference count incremented until - * after calling vgone. If the reference count were - * incremented first, vgone would (incorrectly) try to - * close the previous instance of the underlying object. - * So, the back pointer is explicitly set to `0xdeadb' in - * getnewvnode after removing it from the freelist to ensure - * that we do not try to move it here. + * If it is on the freelist and not already at the head, move it to + * the head of the list. The test of the back pointer and the + * reference count of zero is because it will be removed from the free + * list by getnewvnode, but will not have its reference count + * incremented until after calling vgone. If the reference count were + * incremented first, vgone would (incorrectly) try to close the + * previous instance of the underlying object. So, the back pointer is + * explicitly set to `0xdeadb' in getnewvnode after removing it from + * the freelist to ensure that we do not try to move it here. */ if (vp->v_usecount == 0 && - vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && + vp->v_freelist.tqe_prev != (struct vnode **) 0xdeadb && vnode_free_list.tqh_first != vp) { TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); @@ -1141,7 +1191,7 @@ loop: * Print out a description of a vnode. */ static char *typename[] = - { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; +{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; void vprint(label, vp) @@ -1153,8 +1203,8 @@ vprint(label, vp) if (label != NULL) printf("%s: ", label); printf("type %s, usecount %d, writecount %d, refcount %ld,", - typename[vp->v_type], vp->v_usecount, vp->v_writecount, - vp->v_holdcnt); + typename[vp->v_type], vp->v_usecount, vp->v_writecount, + vp->v_holdcnt); buf[0] = '\0'; if (vp->v_flag & VROOT) strcat(buf, "|VROOT"); @@ -1194,16 +1244,17 @@ printlockedvnodes() printf("Locked vnodes\n"); for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { for (vp = mp->mnt_vnodelist.lh_first; - vp != NULL; - vp = vp->v_mntvnodes.le_next) + vp != NULL; + vp = vp->v_mntvnodes.le_next) if (VOP_ISLOCKED(vp)) - vprint((char *)0, vp); + vprint((char *) 0, vp); } } #endif int kinfo_vdebug = 1; int kinfo_vgetfailed; + #define KINFO_VNODESLOP 10 /* * Dump vnode list (via sysctl). @@ -1228,7 +1279,7 @@ sysctl_vnode(where, sizep) return (0); } ewhere = where + *sizep; - + for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) { nmp = mp->mnt_list.tqe_next; if (vfs_busy(mp)) @@ -1236,12 +1287,12 @@ sysctl_vnode(where, sizep) savebp = bp; again: for (vp = mp->mnt_vnodelist.lh_first; - vp != NULL; - vp = vp->v_mntvnodes.le_next) { + vp != NULL; + vp = vp->v_mntvnodes.le_next) { /* - * Check that the vp is still associated with - * this filesystem. RACE: could have been - * recycled onto the same filesystem. + * Check that the vp is still associated with this + * filesystem. RACE: could have been recycled onto + * the same filesystem. */ if (vp->v_mount != mp) { if (kinfo_vdebug) @@ -1253,8 +1304,8 @@ again: *sizep = bp - where; return (ENOMEM); } - if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || - (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) + if ((error = copyout((caddr_t) & vp, bp, VPTRSZ)) || + (error = copyout((caddr_t) vp, bp + VPTRSZ, VNODESZ))) return (error); bp += VPTRSZ + VNODESZ; } @@ -1317,16 +1368,16 @@ vfs_hang_addrlist(mp, nep, argp) return (0); } i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; - np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); - bzero((caddr_t)np, i); - saddr = (struct sockaddr *)(np + 1); - if ((error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))) + np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); + bzero((caddr_t) np, i); + saddr = (struct sockaddr *) (np + 1); + if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) goto out; if (saddr->sa_len > argp->ex_addrlen) saddr->sa_len = argp->ex_addrlen; if (argp->ex_masklen) { - smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); - error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); + smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); + error = copyin(argp->ex_addr, (caddr_t) smask, argp->ex_masklen); if (error) goto out; if (smask->sa_len > argp->ex_masklen) @@ -1335,13 +1386,13 @@ vfs_hang_addrlist(mp, nep, argp) i = saddr->sa_family; if ((rnh = nep->ne_rtable[i]) == 0) { /* - * Seems silly to initialize every AF when most are not - * used, do so on demand here + * Seems silly to initialize every AF when most are not used, + * do so on demand here */ for (dom = domains; dom; dom = dom->dom_next) if (dom->dom_family == i && dom->dom_rtattach) { - dom->dom_rtattach((void **)&nep->ne_rtable[i], - dom->dom_rtoffset); + dom->dom_rtattach((void **) &nep->ne_rtable[i], + dom->dom_rtoffset); break; } if ((rnh = nep->ne_rtable[i]) == 0) { @@ -1349,9 +1400,9 @@ vfs_hang_addrlist(mp, nep, argp) goto out; } } - rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, - np->netc_rnodes); - if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ + rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, + np->netc_rnodes); + if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ error = EPERM; goto out; } @@ -1370,13 +1421,13 @@ vfs_free_netcred(rn, w) struct radix_node *rn; caddr_t w; { - register struct radix_node_head *rnh = (struct radix_node_head *)w; + register struct radix_node_head *rnh = (struct radix_node_head *) w; - (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); - free((caddr_t)rn, M_NETADDR); + (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); + free((caddr_t) rn, M_NETADDR); return (0); } - + /* * Free the net address hash lists that are hanging off the mount points. */ @@ -1389,9 +1440,9 @@ vfs_free_addrlist(nep) for (i = 0; i <= AF_MAX; i++) if ((rnh = nep->ne_rtable[i])) { - (*rnh->rnh_walktree)(rnh, vfs_free_netcred, - (caddr_t)rnh); - free((caddr_t)rnh, M_RTABLE); + (*rnh->rnh_walktree) (rnh, vfs_free_netcred, + (caddr_t) rnh); + free((caddr_t) rnh, M_RTABLE); nep->ne_rtable[i] = 0; } } @@ -1436,8 +1487,8 @@ vfs_export_lookup(mp, nep, nam) rnh = nep->ne_rtable[saddr->sa_family]; if (rnh != NULL) { np = (struct netcred *) - (*rnh->rnh_matchaddr)((caddr_t)saddr, - rnh); + (*rnh->rnh_matchaddr) ((caddr_t) saddr, + rnh); if (np && np->netc_rnodes->rn_flags & RNF_ROOT) np = NULL; } diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index b749976c9249..59f6232bfea5 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 - * $Id: vfs_subr.c,v 1.12 1994/10/06 21:06:37 davidg Exp $ + * $Id: vfs_subr.c,v 1.13 1994/12/23 04:52:55 davidg Exp $ */ /* @@ -63,13 +63,13 @@ #include <miscfs/specfs/specdev.h> -void insmntque __P((struct vnode *, struct mount *)); +void insmntque __P((struct vnode *, struct mount *)); enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, }; -int vttoif_tab[9] = { +int vttoif_tab[9] = { 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFSOCK, S_IFIFO, S_IFMT, }; @@ -84,7 +84,9 @@ int vttoif_tab[9] = { } TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ -struct mntlist mountlist; /* mounted filesystem list */ +struct mntlist mountlist; /* mounted filesystem list */ + +int desiredvnodes; /* * Initialize the vnode management data structures. @@ -92,6 +94,9 @@ struct mntlist mountlist; /* mounted filesystem list */ void vntblinit() { + extern int vm_object_cache_max; + + desiredvnodes = maxproc + vm_object_cache_max; TAILQ_INIT(&vnode_free_list); TAILQ_INIT(&mountlist); @@ -106,9 +111,9 @@ vfs_lock(mp) register struct mount *mp; { - while(mp->mnt_flag & MNT_MLOCK) { + while (mp->mnt_flag & MNT_MLOCK) { mp->mnt_flag |= MNT_MWAIT; - (void) tsleep((caddr_t)mp, PVFS, "vfslck", 0); + (void) tsleep((caddr_t) mp, PVFS, "vfslck", 0); } mp->mnt_flag |= MNT_MLOCK; return (0); @@ -128,7 +133,7 @@ vfs_unlock(mp) mp->mnt_flag &= ~MNT_MLOCK; if (mp->mnt_flag & MNT_MWAIT) { mp->mnt_flag &= ~MNT_MWAIT; - wakeup((caddr_t)mp); + wakeup((caddr_t) mp); } } @@ -141,9 +146,9 @@ vfs_busy(mp) register struct mount *mp; { - while(mp->mnt_flag & MNT_MPBUSY) { + while (mp->mnt_flag & MNT_MPBUSY) { mp->mnt_flag |= MNT_MPWANT; - (void) tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbsy", 0); + (void) tsleep((caddr_t) & mp->mnt_flag, PVFS, "vfsbsy", 0); } if (mp->mnt_flag & MNT_UNMOUNT) return (1); @@ -165,7 +170,7 @@ vfs_unbusy(mp) mp->mnt_flag &= ~MNT_MPBUSY; if (mp->mnt_flag & MNT_MPWANT) { mp->mnt_flag &= ~MNT_MPWANT; - wakeup((caddr_t)&mp->mnt_flag); + wakeup((caddr_t) & mp->mnt_flag); } } @@ -173,20 +178,18 @@ void vfs_unmountroot(rootfs) struct mount *rootfs; { - struct mount *mp = rootfs; - int error; + struct mount *mp = rootfs; + int error; if (vfs_busy(mp)) { printf("failed to unmount root\n"); return; } - mp->mnt_flag |= MNT_UNMOUNT; if ((error = vfs_lock(mp))) { printf("lock of root filesystem failed (%d)\n", error); return; } - vnode_pager_umount(mp); /* release cached vnodes */ cache_purgevfs(mp); /* remove cache entries for this file sys */ @@ -200,7 +203,6 @@ vfs_unmountroot(rootfs) else printf("%d)\n", error); } - mp->mnt_flag &= ~MNT_UNMOUNT; vfs_unbusy(mp); } @@ -222,7 +224,6 @@ vfs_unmountall() rootfs = mp; continue; } - error = dounmount(mp, MNT_FORCE, initproc); if (error) { printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); @@ -255,7 +256,7 @@ getvfs(fsid) mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) return (mp); } - return ((struct mount *)0); + return ((struct mount *) 0); } /* @@ -266,7 +267,7 @@ getnewfsid(mp, mtype) struct mount *mp; int mtype; { -static u_short xxxfs_mntid; + static u_short xxxfs_mntid; fsid_t tfsid; @@ -297,19 +298,19 @@ vattr_null(vap) vap->va_size = VNOVAL; vap->va_bytes = VNOVAL; vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = - vap->va_fsid = vap->va_fileid = - vap->va_blocksize = vap->va_rdev = - vap->va_atime.ts_sec = vap->va_atime.ts_nsec = - vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = - vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = - vap->va_flags = vap->va_gen = VNOVAL; + vap->va_fsid = vap->va_fileid = + vap->va_blocksize = vap->va_rdev = + vap->va_atime.ts_sec = vap->va_atime.ts_nsec = + vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = + vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = + vap->va_flags = vap->va_gen = VNOVAL; vap->va_vaflags = 0; } /* * Routines having to do with the management of the vnode table. */ -extern int (**dead_vnodeop_p)(); +extern int (**dead_vnodeop_p) (); extern void vclean(); long numvnodes; @@ -320,17 +321,16 @@ int getnewvnode(tag, mp, vops, vpp) enum vtagtype tag; struct mount *mp; - int (**vops)(); + int (**vops) (); struct vnode **vpp; { register struct vnode *vp; - if ((vnode_free_list.tqh_first == NULL && - numvnodes < 2 * desiredvnodes) || + if (vnode_free_list.tqh_first == NULL || numvnodes < desiredvnodes) { - vp = (struct vnode *)malloc((u_long)sizeof *vp, + vp = (struct vnode *) malloc((u_long) sizeof *vp, M_VNODE, M_WAITOK); - bzero((char *)vp, sizeof *vp); + bzero((char *) vp, sizeof *vp); numvnodes++; } else { if ((vp = vnode_free_list.tqh_first) == NULL) { @@ -340,21 +340,23 @@ getnewvnode(tag, mp, vops, vpp) } if (vp->v_usecount) panic("free vnode isn't"); + TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); /* see comment on why 0xdeadb is set at end of vgone (below) */ - vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; + vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb; vp->v_lease = NULL; if (vp->v_type != VBAD) vgone(vp); #ifdef DIAGNOSTIC { - int s; - if (vp->v_data) - panic("cleaned vnode isn't"); - s = splbio(); - if (vp->v_numoutput) - panic("Clean vnode has pending I/O's"); - splx(s); + int s; + + if (vp->v_data) + panic("cleaned vnode isn't"); + s = splbio(); + if (vp->v_numoutput) + panic("Clean vnode has pending I/O's"); + splx(s); } #endif vp->v_flag = 0; @@ -366,7 +368,7 @@ getnewvnode(tag, mp, vops, vpp) vp->v_cstart = 0; vp->v_clen = 0; vp->v_socket = 0; - vp->v_writecount = 0; /* XXX */ + vp->v_writecount = 0; /* XXX */ } vp->v_type = VNON; cache_purge(vp); @@ -415,11 +417,9 @@ vwakeup(bp) vp->v_numoutput--; if (vp->v_numoutput < 0) panic("vwakeup: neg numoutput"); - if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { - if (vp->v_numoutput < 0) - panic("vwakeup: neg numoutput"); + if (vp->v_flag & VBWAIT) { vp->v_flag &= ~VBWAIT; - wakeup((caddr_t)&vp->v_numoutput); + wakeup((caddr_t) & vp->v_numoutput); } } } @@ -452,7 +452,7 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) while (blist && blist->b_lblkno < 0) blist = blist->b_vnbufs.le_next; - if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && + if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && (flags & V_SAVEMETA)) while (blist && blist->b_lblkno < 0) blist = blist->b_vnbufs.le_next; @@ -466,9 +466,9 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) s = splbio(); if (bp->b_flags & B_BUSY) { bp->b_flags |= B_WANTED; - error = tsleep((caddr_t)bp, - slpflag | (PRIBIO + 1), "vinvalbuf", - slptimeo); + error = tsleep((caddr_t) bp, + slpflag | (PRIBIO + 1), "vinvalbuf", + slptimeo); splx(s); if (error) return (error); @@ -478,9 +478,10 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) bp->b_flags |= B_BUSY; splx(s); /* - * XXX Since there are no node locks for NFS, I believe - * there is a slight chance that a delayed write will - * occur while sleeping just above, so check for it. + * XXX Since there are no node locks for NFS, I + * believe there is a slight chance that a delayed + * write will occur while sleeping just above, so + * check for it. */ if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { (void) VOP_BWRITE(bp); @@ -491,9 +492,17 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) } } + + s = splbio(); + while (vp->v_numoutput > 0) { + vp->v_flag |= VBWAIT; + tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); + } + splx(s); + pager = NULL; - object = (vm_object_t)vp->v_vmdata; - if( object != NULL) + object = (vm_object_t) vp->v_vmdata; + if (object != NULL) pager = object->pager; if (pager != NULL) { object = vm_object_lookup(pager); @@ -506,7 +515,6 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) vm_object_deallocate(object); } } - if (!(flags & V_SAVEMETA) && (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) panic("vinvalbuf: flush failed"); @@ -565,6 +573,41 @@ brelvp(bp) } /* + * Associate a p-buffer with a vnode. + */ +void +pbgetvp(vp, bp) + register struct vnode *vp; + register struct buf *bp; +{ + if (bp->b_vp) + panic("pbgetvp: not free"); + VHOLD(vp); + bp->b_vp = vp; + if (vp->v_type == VBLK || vp->v_type == VCHR) + bp->b_dev = vp->v_rdev; + else + bp->b_dev = NODEV; +} + +/* + * Disassociate a p-buffer from a vnode. + */ +void +pbrelvp(bp) + register struct buf *bp; +{ + struct vnode *vp; + + if (bp->b_vp == (struct vnode *) 0) + panic("brelvp: NULL"); + + vp = bp->b_vp; + bp->b_vp = (struct vnode *) 0; + HOLDRELE(vp); +} + +/* * Reassign a buffer from one vnode to another. * Used to assign file specific control information * (indirect blocks) to the vnode to which they belong. @@ -586,14 +629,25 @@ reassignbuf(bp, newvp) if (bp->b_vnbufs.le_next != NOLIST) bufremvn(bp); /* - * If dirty, put on list of dirty buffers; - * otherwise insert onto list of clean buffers. + * If dirty, put on list of dirty buffers; otherwise insert onto list + * of clean buffers. */ - if (bp->b_flags & B_DELWRI) - listheadp = &newvp->v_dirtyblkhd; - else + if (bp->b_flags & B_DELWRI) { + struct buf *tbp; + + tbp = newvp->v_dirtyblkhd.lh_first; + if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) { + bufinsvn(bp, &newvp->v_dirtyblkhd); + } else { + while (tbp->b_vnbufs.le_next && (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { + tbp = tbp->b_vnbufs.le_next; + } + LIST_INSERT_AFTER(tbp, bp, b_vnbufs); + } + } else { listheadp = &newvp->v_cleanblkhd; - bufinsvn(bp, listheadp); + bufinsvn(bp, listheadp); + } } /* @@ -612,14 +666,14 @@ bdevvp(dev, vpp) if (dev == NODEV) return (0); - error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); + error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp); if (error) { *vpp = 0; return (error); } vp = nvp; vp->v_type = VBLK; - if ((nvp = checkalias(vp, dev, (struct mount *)0))) { + if ((nvp = checkalias(vp, dev, (struct mount *) 0))) { vput(vp); vp = nvp; } @@ -665,7 +719,7 @@ loop: } if (vp == NULL || vp->v_tag != VT_NON) { MALLOC(nvp->v_specinfo, struct specinfo *, - sizeof(struct specinfo), M_VNODE, M_WAITOK); + sizeof(struct specinfo), M_VNODE, M_WAITOK); nvp->v_rdev = nvp_rdev; nvp->v_hashchain = vpp; nvp->v_specnext = *vpp; @@ -702,20 +756,19 @@ vget(vp, lockflag) { /* - * If the vnode is in the process of being cleaned out for - * another use, we wait for the cleaning to finish and then - * return failure. Cleaning is determined either by checking - * that the VXLOCK flag is set, or that the use count is - * zero with the back pointer set to show that it has been - * removed from the free list by getnewvnode. The VXLOCK - * flag may not have been set yet because vclean is blocked in - * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. + * If the vnode is in the process of being cleaned out for another + * use, we wait for the cleaning to finish and then return failure. + * Cleaning is determined either by checking that the VXLOCK flag is + * set, or that the use count is zero with the back pointer set to + * show that it has been removed from the free list by getnewvnode. + * The VXLOCK flag may not have been set yet because vclean is blocked + * in the VOP_LOCK call waiting for the VOP_INACTIVE to complete. */ if ((vp->v_flag & VXLOCK) || (vp->v_usecount == 0 && - vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { + vp->v_freelist.tqe_prev == (struct vnode **) 0xdeadb)) { vp->v_flag |= VXWANT; - (void) tsleep((caddr_t)vp, PINOD, "vget", 0); + (void) tsleep((caddr_t) vp, PINOD, "vget", 0); return (1); } if (vp->v_usecount == 0) @@ -768,7 +821,7 @@ vrele(vp) if (vp->v_usecount > 0) return; #ifdef DIAGNOSTIC - if (vp->v_usecount != 0 /* || vp->v_writecount != 0 */) { + if (vp->v_usecount != 0 /* || vp->v_writecount != 0 */ ) { vprint("vrele: bad ref count", vp); panic("vrele: ref cnt"); } @@ -813,8 +866,9 @@ holdrele(vp) * that are found. */ #ifdef DIAGNOSTIC -int busyprt = 0; /* print out busy vnodes */ -struct ctldebug debug1 = { "busyprt", &busyprt }; +int busyprt = 0; /* print out busy vnodes */ +struct ctldebug debug1 = {"busyprt", &busyprt}; + #endif int @@ -844,24 +898,24 @@ loop: if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) continue; /* - * If WRITECLOSE is set, only flush out regular file - * vnodes open for writing. + * If WRITECLOSE is set, only flush out regular file vnodes + * open for writing. */ if ((flags & WRITECLOSE) && (vp->v_writecount == 0 || vp->v_type != VREG)) continue; /* - * With v_usecount == 0, all we need to do is clear - * out the vnode data structures and we are done. + * With v_usecount == 0, all we need to do is clear out the + * vnode data structures and we are done. */ if (vp->v_usecount == 0) { vgone(vp); continue; } /* - * If FORCECLOSE is set, forcibly close the vnode. - * For block or character devices, revert to an - * anonymous device. For all other files, just kill them. + * If FORCECLOSE is set, forcibly close the vnode. For block + * or character devices, revert to an anonymous device. For + * all other files, just kill them. */ if (flags & FORCECLOSE) { if (vp->v_type != VBLK && vp->v_type != VCHR) { @@ -869,7 +923,7 @@ loop: } else { vclean(vp, 0); vp->v_op = spec_vnodeop_p; - insmntque(vp, (struct mount *)0); + insmntque(vp, (struct mount *) 0); } continue; } @@ -895,24 +949,23 @@ vclean(vp, flags) int active; /* - * Check to see if the vnode is in use. - * If so we have to reference it before we clean it out - * so that its count cannot fall to zero and generate a - * race against ourselves to recycle it. + * Check to see if the vnode is in use. If so we have to reference it + * before we clean it out so that its count cannot fall to zero and + * generate a race against ourselves to recycle it. */ if ((active = vp->v_usecount)) VREF(vp); /* - * Even if the count is zero, the VOP_INACTIVE routine may still - * have the object locked while it cleans it out. The VOP_LOCK - * ensures that the VOP_INACTIVE routine is done with its work. - * For active vnodes, it ensures that no other activity can - * occur while the underlying object is being cleaned out. + * Even if the count is zero, the VOP_INACTIVE routine may still have + * the object locked while it cleans it out. The VOP_LOCK ensures that + * the VOP_INACTIVE routine is done with its work. For active vnodes, + * it ensures that no other activity can occur while the underlying + * object is being cleaned out. */ VOP_LOCK(vp); /* - * Prevent the vnode from being recycled or - * brought into use while we clean it out. + * Prevent the vnode from being recycled or brought into use while we + * clean it out. */ if (vp->v_flag & VXLOCK) panic("vclean: deadlock"); @@ -923,13 +976,13 @@ vclean(vp, flags) if (flags & DOCLOSE) vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); /* - * Any other processes trying to obtain this lock must first - * wait for VXLOCK to clear, then call the new lock operation. + * Any other processes trying to obtain this lock must first wait for + * VXLOCK to clear, then call the new lock operation. */ VOP_UNLOCK(vp); /* - * If purging an active vnode, it must be closed and - * deactivated before being reclaimed. + * If purging an active vnode, it must be closed and deactivated + * before being reclaimed. */ if (active) { if (flags & DOCLOSE) @@ -952,7 +1005,7 @@ vclean(vp, flags) vp->v_flag &= ~VXLOCK; if (vp->v_flag & VXWANT) { vp->v_flag &= ~VXWANT; - wakeup((caddr_t)vp); + wakeup((caddr_t) vp); } } @@ -968,17 +1021,17 @@ vgoneall(vp) if (vp->v_flag & VALIASED) { /* - * If a vgone (or vclean) is already in progress, - * wait until it is done and return. + * If a vgone (or vclean) is already in progress, wait until + * it is done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; - (void) tsleep((caddr_t)vp, PINOD, "vgall", 0); + (void) tsleep((caddr_t) vp, PINOD, "vgall", 0); return; } /* - * Ensure that vp will not be vgone'd while we - * are eliminating its aliases. + * Ensure that vp will not be vgone'd while we are eliminating + * its aliases. */ vp->v_flag |= VXLOCK; while (vp->v_flag & VALIASED) { @@ -991,9 +1044,8 @@ vgoneall(vp) } } /* - * Remove the lock so that vgone below will - * really eliminate the vnode after which time - * vgone will awaken any sleepers. + * Remove the lock so that vgone below will really eliminate + * the vnode after which time vgone will awaken any sleepers. */ vp->v_flag &= ~VXLOCK; } @@ -1012,12 +1064,12 @@ vgone(vp) struct vnode *vx; /* - * If a vgone (or vclean) is already in progress, - * wait until it is done and return. + * If a vgone (or vclean) is already in progress, wait until it is + * done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; - (void) tsleep((caddr_t)vp, PINOD, "vgone", 0); + (void) tsleep((caddr_t) vp, PINOD, "vgone", 0); return; } /* @@ -1067,20 +1119,18 @@ vgone(vp) vp->v_specinfo = NULL; } /* - * If it is on the freelist and not already at the head, - * move it to the head of the list. The test of the back - * pointer and the reference count of zero is because - * it will be removed from the free list by getnewvnode, - * but will not have its reference count incremented until - * after calling vgone. If the reference count were - * incremented first, vgone would (incorrectly) try to - * close the previous instance of the underlying object. - * So, the back pointer is explicitly set to `0xdeadb' in - * getnewvnode after removing it from the freelist to ensure - * that we do not try to move it here. + * If it is on the freelist and not already at the head, move it to + * the head of the list. The test of the back pointer and the + * reference count of zero is because it will be removed from the free + * list by getnewvnode, but will not have its reference count + * incremented until after calling vgone. If the reference count were + * incremented first, vgone would (incorrectly) try to close the + * previous instance of the underlying object. So, the back pointer is + * explicitly set to `0xdeadb' in getnewvnode after removing it from + * the freelist to ensure that we do not try to move it here. */ if (vp->v_usecount == 0 && - vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && + vp->v_freelist.tqe_prev != (struct vnode **) 0xdeadb && vnode_free_list.tqh_first != vp) { TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); @@ -1141,7 +1191,7 @@ loop: * Print out a description of a vnode. */ static char *typename[] = - { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; +{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; void vprint(label, vp) @@ -1153,8 +1203,8 @@ vprint(label, vp) if (label != NULL) printf("%s: ", label); printf("type %s, usecount %d, writecount %d, refcount %ld,", - typename[vp->v_type], vp->v_usecount, vp->v_writecount, - vp->v_holdcnt); + typename[vp->v_type], vp->v_usecount, vp->v_writecount, + vp->v_holdcnt); buf[0] = '\0'; if (vp->v_flag & VROOT) strcat(buf, "|VROOT"); @@ -1194,16 +1244,17 @@ printlockedvnodes() printf("Locked vnodes\n"); for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { for (vp = mp->mnt_vnodelist.lh_first; - vp != NULL; - vp = vp->v_mntvnodes.le_next) + vp != NULL; + vp = vp->v_mntvnodes.le_next) if (VOP_ISLOCKED(vp)) - vprint((char *)0, vp); + vprint((char *) 0, vp); } } #endif int kinfo_vdebug = 1; int kinfo_vgetfailed; + #define KINFO_VNODESLOP 10 /* * Dump vnode list (via sysctl). @@ -1228,7 +1279,7 @@ sysctl_vnode(where, sizep) return (0); } ewhere = where + *sizep; - + for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) { nmp = mp->mnt_list.tqe_next; if (vfs_busy(mp)) @@ -1236,12 +1287,12 @@ sysctl_vnode(where, sizep) savebp = bp; again: for (vp = mp->mnt_vnodelist.lh_first; - vp != NULL; - vp = vp->v_mntvnodes.le_next) { + vp != NULL; + vp = vp->v_mntvnodes.le_next) { /* - * Check that the vp is still associated with - * this filesystem. RACE: could have been - * recycled onto the same filesystem. + * Check that the vp is still associated with this + * filesystem. RACE: could have been recycled onto + * the same filesystem. */ if (vp->v_mount != mp) { if (kinfo_vdebug) @@ -1253,8 +1304,8 @@ again: *sizep = bp - where; return (ENOMEM); } - if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || - (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) + if ((error = copyout((caddr_t) & vp, bp, VPTRSZ)) || + (error = copyout((caddr_t) vp, bp + VPTRSZ, VNODESZ))) return (error); bp += VPTRSZ + VNODESZ; } @@ -1317,16 +1368,16 @@ vfs_hang_addrlist(mp, nep, argp) return (0); } i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; - np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); - bzero((caddr_t)np, i); - saddr = (struct sockaddr *)(np + 1); - if ((error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))) + np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); + bzero((caddr_t) np, i); + saddr = (struct sockaddr *) (np + 1); + if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) goto out; if (saddr->sa_len > argp->ex_addrlen) saddr->sa_len = argp->ex_addrlen; if (argp->ex_masklen) { - smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); - error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); + smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); + error = copyin(argp->ex_addr, (caddr_t) smask, argp->ex_masklen); if (error) goto out; if (smask->sa_len > argp->ex_masklen) @@ -1335,13 +1386,13 @@ vfs_hang_addrlist(mp, nep, argp) i = saddr->sa_family; if ((rnh = nep->ne_rtable[i]) == 0) { /* - * Seems silly to initialize every AF when most are not - * used, do so on demand here + * Seems silly to initialize every AF when most are not used, + * do so on demand here */ for (dom = domains; dom; dom = dom->dom_next) if (dom->dom_family == i && dom->dom_rtattach) { - dom->dom_rtattach((void **)&nep->ne_rtable[i], - dom->dom_rtoffset); + dom->dom_rtattach((void **) &nep->ne_rtable[i], + dom->dom_rtoffset); break; } if ((rnh = nep->ne_rtable[i]) == 0) { @@ -1349,9 +1400,9 @@ vfs_hang_addrlist(mp, nep, argp) goto out; } } - rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, - np->netc_rnodes); - if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ + rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, + np->netc_rnodes); + if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ error = EPERM; goto out; } @@ -1370,13 +1421,13 @@ vfs_free_netcred(rn, w) struct radix_node *rn; caddr_t w; { - register struct radix_node_head *rnh = (struct radix_node_head *)w; + register struct radix_node_head *rnh = (struct radix_node_head *) w; - (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); - free((caddr_t)rn, M_NETADDR); + (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); + free((caddr_t) rn, M_NETADDR); return (0); } - + /* * Free the net address hash lists that are hanging off the mount points. */ @@ -1389,9 +1440,9 @@ vfs_free_addrlist(nep) for (i = 0; i <= AF_MAX; i++) if ((rnh = nep->ne_rtable[i])) { - (*rnh->rnh_walktree)(rnh, vfs_free_netcred, - (caddr_t)rnh); - free((caddr_t)rnh, M_RTABLE); + (*rnh->rnh_walktree) (rnh, vfs_free_netcred, + (caddr_t) rnh); + free((caddr_t) rnh, M_RTABLE); nep->ne_rtable[i] = 0; } } @@ -1436,8 +1487,8 @@ vfs_export_lookup(mp, nep, nam) rnh = nep->ne_rtable[saddr->sa_family]; if (rnh != NULL) { np = (struct netcred *) - (*rnh->rnh_matchaddr)((caddr_t)saddr, - rnh); + (*rnh->rnh_matchaddr) ((caddr_t) saddr, + rnh); if (np && np->netc_rnodes->rn_flags & RNF_ROOT) np = NULL; } diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 79b92cbd40ef..553a0958b452 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94 - * $Id: vfs_vnops.c,v 1.5 1994/10/02 17:35:40 phk Exp $ + * $Id: vfs_vnops.c,v 1.6 1994/10/05 09:48:26 davidg Exp $ */ #include <sys/param.h> @@ -158,21 +158,26 @@ vn_open(ndp, fmode, cmode) if( vp->v_type == VREG) { vm_object_t object; vm_pager_t pager; +retry: if( (vp->v_flag & VVMIO) == 0) { pager = (vm_pager_t) vnode_pager_alloc(vp, 0, 0, 0); object = (vm_object_t) vp->v_vmdata; if( object->pager != pager) - panic("ufs_open: pager/object mismatch"); + panic("vn_open: pager/object mismatch"); (void) vm_object_lookup( pager); pager_cache( object, TRUE); vp->v_flag |= VVMIO; } else { - object = (vm_object_t) vp->v_vmdata; + if( (object = (vm_object_t)vp->v_vmdata) && + (object->flags & OBJ_DEAD)) { + tsleep( (caddr_t) object, PVM, "vodead", 0); + goto retry; + } if( !object) - panic("ufs_open: VMIO object missing"); + panic("vn_open: VMIO object missing"); pager = object->pager; if( !pager) - panic("ufs_open: VMIO pager missing"); + panic("vn_open: VMIO pager missing"); (void) vm_object_lookup( pager); } } @@ -235,11 +240,12 @@ vn_close(vp, flags, cred, p) * be in vfs code. */ if (vp->v_flag & VVMIO) { + vrele(vp); if( vp->v_vmdata == NULL) - panic("ufs_close: VMIO object missing"); + panic("vn_close: VMIO object missing"); vm_object_deallocate( (vm_object_t) vp->v_vmdata); - } - vrele(vp); + } else + vrele(vp); return (error); } diff --git a/sys/msdosfs/msdosfs_denode.c b/sys/msdosfs/msdosfs_denode.c index c20b7e5f1081..b811c111ddc3 100644 --- a/sys/msdosfs/msdosfs_denode.c +++ b/sys/msdosfs/msdosfs_denode.c @@ -1,4 +1,4 @@ -/* $Id: msdosfs_denode.c,v 1.5 1994/12/12 12:35:43 bde Exp $ */ +/* $Id: msdosfs_denode.c,v 1.6 1994/12/27 12:37:35 bde Exp $ */ /* $NetBSD: msdosfs_denode.c,v 1.9 1994/08/21 18:44:00 ws Exp $ */ /*- @@ -477,7 +477,7 @@ detrunc(dep, length, flags, cred, p) #endif return error; } - vnode_pager_uncache(DETOV(dep)); /* what's this for? */ + /* vnode_pager_uncache(DETOV(dep)); /* what's this for? */ /* * is this the right place for it? */ diff --git a/sys/msdosfs/msdosfs_vnops.c b/sys/msdosfs/msdosfs_vnops.c index 1948062c64bd..b309fcf97e50 100644 --- a/sys/msdosfs/msdosfs_vnops.c +++ b/sys/msdosfs/msdosfs_vnops.c @@ -1,4 +1,4 @@ -/* $Id: msdosfs_vnops.c,v 1.10 1994/12/12 12:35:50 bde Exp $ */ +/* $Id: msdosfs_vnops.c,v 1.11 1994/12/27 12:37:36 bde Exp $ */ /* $NetBSD: msdosfs_vnops.c,v 1.20 1994/08/21 18:44:13 ws Exp $ */ /*- @@ -704,7 +704,6 @@ msdosfs_write(ap) dep->de_FileSize = uio->uio_offset + n; vnode_pager_setsize(vp, dep->de_FileSize); /* why? */ } - (void) vnode_pager_uncache(vp); /* why not? */ /* * Should these vnode_pager_* functions be done on dir * files? @@ -725,7 +724,6 @@ msdosfs_write(ap) if (ioflag & IO_SYNC) (void) bwrite(bp); else if (n + croffset == pmp->pm_bpcluster) { - bp->b_flags |= B_AGE; bawrite(bp); } else bdwrite(bp); diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c index 4d75cf84a96d..4f8fc3e10176 100644 --- a/sys/nfs/nfs_bio.c +++ b/sys/nfs/nfs_bio.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_bio.c 8.5 (Berkeley) 1/4/94 - * $Id: nfs_bio.c,v 1.6 1994/10/02 17:26:55 phk Exp $ + * $Id: nfs_bio.c,v 1.7 1994/10/17 17:47:32 phk Exp $ */ #include <sys/param.h> @@ -78,7 +78,7 @@ nfs_bioread(vp, uio, ioflag, cred) struct vattr vattr; struct proc *p; struct nfsmount *nmp; - daddr_t lbn, bn, rabn; + daddr_t lbn, rabn; caddr_t baddr; int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin; @@ -94,7 +94,7 @@ nfs_bioread(vp, uio, ioflag, cred) if (uio->uio_offset < 0 && vp->v_type != VDIR) return (EINVAL); nmp = VFSTONFS(vp->v_mount); - biosize = nmp->nm_rsize; + biosize = NFS_MAXDGRAMDATA; p = uio->uio_procp; /* * For nfs, cache consistency can only be maintained approximately. @@ -198,7 +198,6 @@ nfs_bioread(vp, uio, ioflag, cred) nfsstats.biocache_reads++; lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize-1); - bn = lbn * (biosize / DEV_BSIZE); not_readin = 1; /* @@ -208,15 +207,17 @@ nfs_bioread(vp, uio, ioflag, cred) lbn == vp->v_lastr + 1) { for (nra = 0; nra < nmp->nm_readahead && (lbn + 1 + nra) * biosize < np->n_size; nra++) { - rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE); + rabn = lbn + 1 + nra; if (!incore(vp, rabn)) { rabp = nfs_getcacheblk(vp, rabn, biosize, p); if (!rabp) return (EINTR); if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) { rabp->b_flags |= (B_READ | B_ASYNC); + vfs_busy_pages(rabp, 0); if (nfs_asyncio(rabp, cred)) { - rabp->b_flags |= B_INVAL; + rabp->b_flags |= B_INVAL|B_ERROR; + vfs_unbusy_pages(rabp); brelse(rabp); } } @@ -230,21 +231,23 @@ nfs_bioread(vp, uio, ioflag, cred) * Otherwise, get the block and write back/read in, * as required. */ - if ((bp = incore(vp, bn)) && + if ((bp = incore(vp, lbn)) && (bp->b_flags & (B_BUSY | B_WRITEINPROG)) == (B_BUSY | B_WRITEINPROG)) got_buf = 0; else { again: - bp = nfs_getcacheblk(vp, bn, biosize, p); + bp = nfs_getcacheblk(vp, lbn, biosize, p); if (!bp) return (EINTR); got_buf = 1; if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { bp->b_flags |= B_READ; not_readin = 0; + vfs_busy_pages(bp, 0); error = nfs_doio(bp, cred, p); if (error) { + vfs_unbusy_pages(bp); brelse(bp); return (error); } @@ -257,7 +260,7 @@ again: if (not_readin && n > 0) { if (on < bp->b_validoff || (on + n) > bp->b_validend) { if (!got_buf) { - bp = nfs_getcacheblk(vp, bn, biosize, p); + bp = nfs_getcacheblk(vp, lbn, biosize, p); if (!bp) return (EINTR); got_buf = 1; @@ -285,8 +288,11 @@ again: return (EINTR); if ((bp->b_flags & B_DONE) == 0) { bp->b_flags |= B_READ; + vfs_busy_pages(bp, 0); error = nfs_doio(bp, cred, p); if (error) { + vfs_unbusy_pages(bp); + bp->b_flags |= B_ERROR; brelse(bp); return (error); } @@ -297,14 +303,18 @@ again: break; case VDIR: nfsstats.biocache_readdirs++; - bn = (daddr_t)uio->uio_offset; - bp = nfs_getcacheblk(vp, bn, NFS_DIRBLKSIZ, p); + lbn = (daddr_t)uio->uio_offset; + bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p); if (!bp) return (EINTR); + if ((bp->b_flags & B_DONE) == 0) { bp->b_flags |= B_READ; + vfs_busy_pages(bp, 0); error = nfs_doio(bp, cred, p); if (error) { + vfs_unbusy_pages(bp); + bp->b_flags |= B_ERROR; brelse(bp); return (error); } @@ -323,8 +333,10 @@ again: if (rabp) { if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) { rabp->b_flags |= (B_READ | B_ASYNC); + vfs_busy_pages(rabp, 0); if (nfs_asyncio(rabp, cred)) { - rabp->b_flags |= B_INVAL; + vfs_unbusy_pages(rabp); + rabp->b_flags |= B_INVAL|B_ERROR; brelse(rabp); } } @@ -385,7 +397,7 @@ nfs_write(ap) struct buf *bp; struct vattr vattr; struct nfsmount *nmp; - daddr_t lbn, bn; + daddr_t lbn; int n, on, error = 0; #ifdef DIAGNOSTIC @@ -434,14 +446,12 @@ nfs_write(ap) * will be the same size within a filesystem. nfs_writerpc will * still use nm_wsize when sizing the rpc's. */ - biosize = nmp->nm_rsize; + biosize = NFS_MAXDGRAMDATA; do { /* * XXX make sure we aren't cached in the VM page cache */ - (void)vnode_pager_uncache(vp); - /* * Check for a valid write lease. * If non-cachable, just do the rpc @@ -467,9 +477,8 @@ nfs_write(ap) lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize-1); n = min((unsigned)(biosize - on), uio->uio_resid); - bn = lbn * (biosize / DEV_BSIZE); again: - bp = nfs_getcacheblk(vp, bn, biosize, p); + bp = nfs_getcacheblk(vp, lbn, biosize, p); if (!bp) return (EINTR); if (bp->b_wcred == NOCRED) { @@ -591,6 +600,10 @@ nfs_getcacheblk(vp, bn, size, p) } } else bp = getblk(vp, bn, size, 0, 0); + + if( vp->v_type == VREG) + bp->b_blkno = (bn * NFS_MAXDGRAMDATA) / DEV_BSIZE; + return (bp); } diff --git a/sys/nfs/nfs_common.c b/sys/nfs/nfs_common.c index 9b2ef800780c..f28136087602 100644 --- a/sys/nfs/nfs_common.c +++ b/sys/nfs/nfs_common.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94 - * $Id: nfs_subs.c,v 1.6 1994/10/02 17:27:01 phk Exp $ + * $Id: nfs_subs.c,v 1.7 1994/10/17 17:47:37 phk Exp $ */ /* @@ -995,6 +995,7 @@ nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, p) */ if (cnp->cn_flags & (SAVENAME | SAVESTART)) { cnp->cn_flags |= HASBUF; + nfsrv_vmio( ndp->ni_vp); return (0); } out: @@ -1123,6 +1124,7 @@ nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp) *rdonlyp = 0; if (!lockflag) VOP_UNLOCK(*vpp); + nfsrv_vmio(*vpp); return (0); } @@ -1168,3 +1170,54 @@ netaddr_match(family, haddr, nam) }; return (0); } + +int +nfsrv_vmio( struct vnode *vp) { + int rtval; + vm_object_t object; + vm_pager_t pager; + + if( (vp == NULL) || (vp->v_type != VREG)) + return 1; + +retry: + if( (vp->v_flag & VVMIO) == 0) { + pager = (vm_pager_t) vnode_pager_alloc(vp, 0, 0, 0); + object = (vm_object_t) vp->v_vmdata; + if( object->pager != pager) + panic("nfsrv_vmio: pager/object mismatch"); + (void) vm_object_lookup( pager); + pager_cache( object, TRUE); + vp->v_flag |= VVMIO; + } else { + if( (object = (vm_object_t)vp->v_vmdata) && + (object->flags & OBJ_DEAD)) { + tsleep( (caddr_t) object, PVM, "nfdead", 0); + goto retry; + } + if( !object) + panic("nfsrv_vmio: VMIO object missing"); + pager = object->pager; + if( !pager) + panic("nfsrv_vmio: VMIO pager missing"); + (void) vm_object_lookup( pager); + } + return 0; +} +int +nfsrv_vput( struct vnode *vp) { + if( (vp->v_flag & VVMIO) && vp->v_vmdata) { + vm_object_deallocate( (vm_object_t) vp->v_vmdata); + } + vput( vp); + return 0; +} +int +nfsrv_vrele( struct vnode *vp) { + if( (vp->v_flag & VVMIO) && vp->v_vmdata) { + vm_object_deallocate( (vm_object_t) vp->v_vmdata); + } + vrele( vp); + return 0; +} + diff --git a/sys/nfs/nfs_nqlease.c b/sys/nfs/nfs_nqlease.c index 19467641a32e..30162506be9f 100644 --- a/sys/nfs/nfs_nqlease.c +++ b/sys/nfs/nfs_nqlease.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_nqlease.c 8.3 (Berkeley) 1/4/94 - * $Id: nfs_nqlease.c,v 1.6 1994/10/02 17:26:57 phk Exp $ + * $Id: nfs_nqlease.c,v 1.7 1994/10/17 17:47:34 phk Exp $ */ /* @@ -715,7 +715,7 @@ nqnfsrv_getlease(nfsd, mrep, md, dpos, cred, nam, mrq) (void) nqsrv_getlease(vp, &nfsd->nd_duration, flags, nfsd, nam, &cache, &frev, cred); error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp); - vput(vp); + nfsrv_vput(vp); nfsm_reply(NFSX_NQFATTR + 4*NFSX_UNSIGNED); nfsm_build(tl, u_long *, 4*NFSX_UNSIGNED); *tl++ = txdr_unsigned(cache); diff --git a/sys/nfs/nfs_serv.c b/sys/nfs/nfs_serv.c index b6e601420d4e..5b3bb63df333 100644 --- a/sys/nfs/nfs_serv.c +++ b/sys/nfs/nfs_serv.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_serv.c 8.3 (Berkeley) 1/12/94 - * $Id: nfs_serv.c,v 1.6 1994/09/28 16:45:18 dfr Exp $ + * $Id: nfs_serv.c,v 1.7 1994/10/02 17:26:58 phk Exp $ */ /* @@ -121,7 +121,7 @@ nqnfsrv_access(nfsd, mrep, md, dpos, cred, nam, mrq) if (*tl == nfs_true) mode |= VEXEC; error = nfsrv_access(vp, mode, cred, rdonly, nfsd->nd_procp); - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); nfsm_srvdone; } @@ -158,7 +158,7 @@ nfsrv_getattr(nfsd, mrep, md, dpos, cred, nam, mrq) nfsm_reply(0); nqsrv_getl(vp, NQL_READ); error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp); - vput(vp); + nfsrv_vput(vp); nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); nfsm_srvfillattr; @@ -255,12 +255,12 @@ nfsrv_setattr(nfsd, mrep, md, dpos, cred, nam, mrq) } error = VOP_SETATTR(vp, vap, cred, nfsd->nd_procp); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp); out: - vput(vp); + nfsrv_vput(vp); nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL) + 2*NFSX_UNSIGNED); nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); nfsm_srvfillattr; @@ -314,21 +314,21 @@ nfsrv_lookup(nfsd, mrep, md, dpos, cred, nam, mrq) if (error) nfsm_reply(0); nqsrv_getl(nd.ni_startdir, NQL_READ); - vrele(nd.ni_startdir); + nfsrv_vrele(nd.ni_startdir); FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI); vp = nd.ni_vp; bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } if (duration2) (void) nqsrv_getlease(vp, &duration2, NQL_READ, nfsd, nam, &cache2, &frev2, cred); error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp); - vput(vp); + nfsrv_vput(vp); nfsm_reply(NFSX_FH + NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL) + 5*NFSX_UNSIGNED); if (nfsd->nd_nqlflag != NQL_NOVAL) { if (duration2) { @@ -417,7 +417,7 @@ nfsrv_readlink(nfsd, mrep, md, dpos, cred, nam, mrq) nqsrv_getl(vp, NQL_READ); error = VOP_READLINK(vp, uiop, cred); out: - vput(vp); + nfsrv_vput(vp); if (error) m_freem(mp3); nfsm_reply(NFSX_UNSIGNED); @@ -488,7 +488,7 @@ nfsrv_read(nfsd, mrep, md, dpos, cred, nam, mrq) } error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } if (off >= vap->va_size) @@ -539,12 +539,12 @@ nfsrv_read(nfsd, mrep, md, dpos, cred, nam, mrq) FREE((caddr_t)iv2, M_TEMP); if (error || (error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp))) { m_freem(mreq); - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } } else uiop->uio_resid = 0; - vput(vp); + nfsrv_vput(vp); nfsm_srvfillattr; len -= uiop->uio_resid; tlen = nfsm_rndup(len); @@ -619,13 +619,13 @@ nfsrv_write(nfsd, mrep, md, dpos, cred, nam, mrq) nfsm_reply(0); if (vp->v_type != VREG) { error = (vp->v_type == VDIR) ? EISDIR : EACCES; - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } nqsrv_getl(vp, NQL_WRITE); error = nfsrv_access(vp, VWRITE, cred, rdonly, nfsd->nd_procp); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } uiop->uio_resid = 0; @@ -663,19 +663,19 @@ nfsrv_write(nfsd, mrep, md, dpos, cred, nam, mrq) } if (len > 0 && mp == NULL) { error = EBADRPC; - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } uiop->uio_resid = siz; error = VOP_WRITE(vp, uiop, ioflags, cred); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } off = uiop->uio_offset; } error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp); - vput(vp); + nfsrv_vput(vp); nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); nfsm_srvfillattr; @@ -743,7 +743,7 @@ nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq) else rdev = fxdr_unsigned(long, sp->sa_nqrdev); if (vap->va_type == VREG || vap->va_type == VSOCK) { - vrele(nd.ni_startdir); + nfsrv_vrele(nd.ni_startdir); nqsrv_getl(nd.ni_dvp, NQL_WRITE); error=VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); if (error) @@ -758,7 +758,7 @@ nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq) error = suser(cred, (u_short *)0); if (error) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - vput(nd.ni_dvp); + nfsrv_vput(nd.ni_dvp); goto out; } else vap->va_rdev = (dev_t)rdev; @@ -766,7 +766,7 @@ nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq) nqsrv_getl(nd.ni_dvp, NQL_WRITE); error=VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); if (error) { - vrele(nd.ni_startdir); + nfsrv_vrele(nd.ni_startdir); nfsm_reply(0); } nd.ni_cnd.cn_nameiop = LOOKUP; @@ -780,27 +780,27 @@ nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq) } FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI); if (nd.ni_cnd.cn_flags & ISSYMLINK) { - vrele(nd.ni_dvp); - vput(nd.ni_vp); + nfsrv_vrele(nd.ni_dvp); + nfsrv_vput(nd.ni_vp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); error = EINVAL; nfsm_reply(0); } } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - vput(nd.ni_dvp); + nfsrv_vput(nd.ni_dvp); error = ENXIO; goto out; } vp = nd.ni_vp; } else { - vrele(nd.ni_startdir); + nfsrv_vrele(nd.ni_startdir); free(nd.ni_cnd.cn_pnbuf, M_NAMEI); vp = nd.ni_vp; if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); + nfsrv_vrele(nd.ni_dvp); else - vput(nd.ni_dvp); + nfsrv_vput(nd.ni_dvp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nfsd->nd_nqlflag == NQL_NOVAL) { tsize = fxdr_unsigned(long, sp->sa_nfssize); @@ -814,13 +814,13 @@ nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq) error = nfsrv_access(vp, VWRITE, cred, (nd.ni_cnd.cn_flags & RDONLY), nfsd->nd_procp); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } nqsrv_getl(vp, NQL_WRITE); error = VOP_SETATTR(vp, vap, cred, nfsd->nd_procp); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } } @@ -829,11 +829,11 @@ nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq) fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp); - vput(vp); + nfsrv_vput(vp); nfsm_reply(NFSX_FH+NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); nfsm_srvfhtom(fhp); nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); @@ -841,18 +841,18 @@ nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq) return (error); nfsmout: if (nd.ni_cnd.cn_nameiop || nd.ni_cnd.cn_flags) - vrele(nd.ni_startdir); + nfsrv_vrele(nd.ni_startdir); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); + nfsrv_vrele(nd.ni_dvp); else - vput(nd.ni_dvp); + nfsrv_vput(nd.ni_dvp); if (nd.ni_vp) - vput(nd.ni_vp); + nfsrv_vput(nd.ni_vp); return (error); out: - vrele(nd.ni_startdir); + nfsrv_vrele(nd.ni_startdir); free(nd.ni_cnd.cn_pnbuf, M_NAMEI); nfsm_reply(0); return (0); @@ -911,10 +911,10 @@ out: } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); + nfsrv_vrele(nd.ni_dvp); else - vput(nd.ni_dvp); - vput(vp); + nfsrv_vput(nd.ni_dvp); + nfsrv_vput(vp); } nfsm_reply(0); nfsm_srvdone; @@ -973,8 +973,8 @@ nfsrv_rename(nfsd, mrep, md, dpos, cred, nam, mrq) &dpos, nfsd->nd_procp); if (error) { VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - vrele(fromnd.ni_dvp); - vrele(fvp); + nfsrv_vrele(fromnd.ni_dvp); + nfsrv_vrele(fvp); goto out1; } tdvp = tond.ni_dvp; @@ -1023,34 +1023,34 @@ out: } else { VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); if (tdvp == tvp) - vrele(tdvp); + nfsrv_vrele(tdvp); else - vput(tdvp); + nfsrv_vput(tdvp); if (tvp) - vput(tvp); + nfsrv_vput(tvp); VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - vrele(fromnd.ni_dvp); - vrele(fvp); + nfsrv_vrele(fromnd.ni_dvp); + nfsrv_vrele(fvp); } - vrele(tond.ni_startdir); + nfsrv_vrele(tond.ni_startdir); FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI); out1: - vrele(fromnd.ni_startdir); + nfsrv_vrele(fromnd.ni_startdir); FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI); nfsm_reply(0); return (error); nfsmout: if (tond.ni_cnd.cn_nameiop || tond.ni_cnd.cn_flags) { - vrele(tond.ni_startdir); + nfsrv_vrele(tond.ni_startdir); FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI); } if (fromnd.ni_cnd.cn_nameiop || fromnd.ni_cnd.cn_flags) { - vrele(fromnd.ni_startdir); + nfsrv_vrele(fromnd.ni_startdir); FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI); VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - vrele(fromnd.ni_dvp); - vrele(fvp); + nfsrv_vrele(fromnd.ni_dvp); + nfsrv_vrele(fvp); } return (error); } @@ -1111,14 +1111,14 @@ out: } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); + nfsrv_vrele(nd.ni_dvp); else - vput(nd.ni_dvp); + nfsrv_vput(nd.ni_dvp); if (nd.ni_vp) - vrele(nd.ni_vp); + nfsrv_vrele(nd.ni_vp); } out1: - vrele(vp); + nfsrv_vrele(vp); nfsm_reply(0); nfsm_srvdone; } @@ -1178,10 +1178,10 @@ nfsrv_symlink(nfsd, mrep, md, dpos, cred, nam, mrq) if (nd.ni_vp) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); + nfsrv_vrele(nd.ni_dvp); else - vput(nd.ni_dvp); - vrele(nd.ni_vp); + nfsrv_vput(nd.ni_dvp); + nfsrv_vrele(nd.ni_vp); error = EEXIST; goto out; } @@ -1197,11 +1197,11 @@ out: nfsmout: VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); + nfsrv_vrele(nd.ni_dvp); else - vput(nd.ni_dvp); + nfsrv_vput(nd.ni_dvp); if (nd.ni_vp) - vrele(nd.ni_vp); + nfsrv_vrele(nd.ni_vp); if (pathcp) FREE(pathcp, M_TEMP); return (error); @@ -1252,10 +1252,10 @@ nfsrv_mkdir(nfsd, mrep, md, dpos, cred, nam, mrq) if (vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); + nfsrv_vrele(nd.ni_dvp); else - vput(nd.ni_dvp); - vrele(vp); + nfsrv_vput(nd.ni_dvp); + nfsrv_vrele(vp); error = EEXIST; nfsm_reply(0); } @@ -1268,11 +1268,11 @@ nfsrv_mkdir(nfsd, mrep, md, dpos, cred, nam, mrq) fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp); - vput(vp); + nfsrv_vput(vp); nfsm_reply(NFSX_FH+NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); nfsm_srvfhtom(fhp); nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); @@ -1281,11 +1281,11 @@ nfsrv_mkdir(nfsd, mrep, md, dpos, cred, nam, mrq) nfsmout: VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); + nfsrv_vrele(nd.ni_dvp); else - vput(nd.ni_dvp); + nfsrv_vput(nd.ni_dvp); if (nd.ni_vp) - vrele(nd.ni_vp); + nfsrv_vrele(nd.ni_vp); return (error); } @@ -1347,10 +1347,10 @@ out: } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); + nfsrv_vrele(nd.ni_dvp); else - vput(nd.ni_dvp); - vput(vp); + nfsrv_vput(nd.ni_dvp); + nfsrv_vput(vp); } nfsm_reply(0); nfsm_srvdone; @@ -1438,7 +1438,7 @@ nfsrv_readdir(nfsd, mrep, md, dpos, cred, nam, mrq) nqsrv_getl(vp, NQL_READ); error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } VOP_UNLOCK(vp); @@ -1458,7 +1458,7 @@ again: error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); off = (off_t)io.uio_offset; if (error) { - vrele(vp); + nfsrv_vrele(vp); free((caddr_t)rbuf, M_TEMP); nfsm_reply(0); } @@ -1466,7 +1466,7 @@ again: /* * If the filesystem doen't support cookies, return eof. */ - vrele(vp); + nfsrv_vrele(vp); nfsm_reply(2*NFSX_UNSIGNED); nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); *tl++ = nfs_false; @@ -1482,7 +1482,7 @@ again: * rpc reply */ if (siz == 0) { - vrele(vp); + nfsrv_vrele(vp); nfsm_reply(2*NFSX_UNSIGNED); nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); *tl++ = nfs_false; @@ -1573,7 +1573,7 @@ again: dp = (struct dirent *)cpos; cookiep++; } - vrele(vp); + nfsrv_vrele(vp); nfsm_clget; *tl = nfs_false; bp += NFSX_UNSIGNED; @@ -1643,7 +1643,7 @@ nqnfsrv_readdirlook(nfsd, mrep, md, dpos, cred, nam, mrq) nqsrv_getl(vp, NQL_READ); error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } VOP_UNLOCK(vp); @@ -1663,7 +1663,7 @@ again: error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); off = (u_long)io.uio_offset; if (error) { - vrele(vp); + nfsrv_vrele(vp); free((caddr_t)rbuf, M_TEMP); nfsm_reply(0); } @@ -1671,7 +1671,7 @@ again: /* * If the filesystem doen't support cookies, return eof. */ - vrele(vp); + nfsrv_vrele(vp); nfsm_reply(2*NFSX_UNSIGNED); nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); *tl++ = nfs_false; @@ -1687,7 +1687,7 @@ again: * rpc reply */ if (siz == 0) { - vrele(vp); + nfsrv_vrele(vp); nfsm_reply(2 * NFSX_UNSIGNED); nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_false; @@ -1742,7 +1742,7 @@ again: fl.fl_nfh.fh_generic.fh_fsid = nvp->v_mount->mnt_stat.f_fsid; if (VFS_VPTOFH(nvp, &fl.fl_nfh.fh_generic.fh_fid)) { - vput(nvp); + nfsrv_vput(nvp); goto invalid; } if (duration2) { @@ -1754,10 +1754,10 @@ again: } else fl.fl_duration = 0; if (VOP_GETATTR(nvp, vap, cred, nfsd->nd_procp)) { - vput(nvp); + nfsrv_vput(nvp); goto invalid; } - vput(nvp); + nfsrv_vput(nvp); fp = (struct nfsv2_fattr *)&fl.fl_fattr; nfsm_srvfillattr; len += (4*NFSX_UNSIGNED + nlen + rem + NFSX_FH @@ -1827,7 +1827,7 @@ invalid: dp = (struct dirent *)cpos; cookiep++; } - vrele(vp); + nfsrv_vrele(vp); nfsm_clget; *tl = nfs_false; bp += NFSX_UNSIGNED; @@ -1880,7 +1880,7 @@ nfsrv_statfs(nfsd, mrep, md, dpos, cred, nam, mrq) nfsm_reply(0); sf = &statfs; error = VFS_STATFS(vp->v_mount, sf, nfsd->nd_procp); - vput(vp); + nfsrv_vput(vp); nfsm_reply(NFSX_STATFS(isnq)); nfsm_build(sfp, struct nfsv2_statfs *, NFSX_STATFS(isnq)); sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA); diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c index 9b2ef800780c..f28136087602 100644 --- a/sys/nfs/nfs_subs.c +++ b/sys/nfs/nfs_subs.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94 - * $Id: nfs_subs.c,v 1.6 1994/10/02 17:27:01 phk Exp $ + * $Id: nfs_subs.c,v 1.7 1994/10/17 17:47:37 phk Exp $ */ /* @@ -995,6 +995,7 @@ nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, p) */ if (cnp->cn_flags & (SAVENAME | SAVESTART)) { cnp->cn_flags |= HASBUF; + nfsrv_vmio( ndp->ni_vp); return (0); } out: @@ -1123,6 +1124,7 @@ nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp) *rdonlyp = 0; if (!lockflag) VOP_UNLOCK(*vpp); + nfsrv_vmio(*vpp); return (0); } @@ -1168,3 +1170,54 @@ netaddr_match(family, haddr, nam) }; return (0); } + +int +nfsrv_vmio( struct vnode *vp) { + int rtval; + vm_object_t object; + vm_pager_t pager; + + if( (vp == NULL) || (vp->v_type != VREG)) + return 1; + +retry: + if( (vp->v_flag & VVMIO) == 0) { + pager = (vm_pager_t) vnode_pager_alloc(vp, 0, 0, 0); + object = (vm_object_t) vp->v_vmdata; + if( object->pager != pager) + panic("nfsrv_vmio: pager/object mismatch"); + (void) vm_object_lookup( pager); + pager_cache( object, TRUE); + vp->v_flag |= VVMIO; + } else { + if( (object = (vm_object_t)vp->v_vmdata) && + (object->flags & OBJ_DEAD)) { + tsleep( (caddr_t) object, PVM, "nfdead", 0); + goto retry; + } + if( !object) + panic("nfsrv_vmio: VMIO object missing"); + pager = object->pager; + if( !pager) + panic("nfsrv_vmio: VMIO pager missing"); + (void) vm_object_lookup( pager); + } + return 0; +} +int +nfsrv_vput( struct vnode *vp) { + if( (vp->v_flag & VVMIO) && vp->v_vmdata) { + vm_object_deallocate( (vm_object_t) vp->v_vmdata); + } + vput( vp); + return 0; +} +int +nfsrv_vrele( struct vnode *vp) { + if( (vp->v_flag & VVMIO) && vp->v_vmdata) { + vm_object_deallocate( (vm_object_t) vp->v_vmdata); + } + vrele( vp); + return 0; +} + diff --git a/sys/nfs/nfs_vnops.c b/sys/nfs/nfs_vnops.c index 7032a5a0aa9a..d189a18454c7 100644 --- a/sys/nfs/nfs_vnops.c +++ b/sys/nfs/nfs_vnops.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_vnops.c 8.5 (Berkeley) 2/13/94 - * $Id: nfs_vnops.c,v 1.9 1994/10/09 07:35:06 davidg Exp $ + * $Id: nfs_vnops.c,v 1.10 1994/10/17 17:47:41 phk Exp $ */ /* @@ -2356,8 +2356,10 @@ nfs_update(ap) } */ *ap; { +#if 0 /* Use nfs_setattr */ printf("nfs_update: need to implement!!"); +#endif return (EOPNOTSUPP); } diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c index 4d75cf84a96d..4f8fc3e10176 100644 --- a/sys/nfsclient/nfs_bio.c +++ b/sys/nfsclient/nfs_bio.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_bio.c 8.5 (Berkeley) 1/4/94 - * $Id: nfs_bio.c,v 1.6 1994/10/02 17:26:55 phk Exp $ + * $Id: nfs_bio.c,v 1.7 1994/10/17 17:47:32 phk Exp $ */ #include <sys/param.h> @@ -78,7 +78,7 @@ nfs_bioread(vp, uio, ioflag, cred) struct vattr vattr; struct proc *p; struct nfsmount *nmp; - daddr_t lbn, bn, rabn; + daddr_t lbn, rabn; caddr_t baddr; int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin; @@ -94,7 +94,7 @@ nfs_bioread(vp, uio, ioflag, cred) if (uio->uio_offset < 0 && vp->v_type != VDIR) return (EINVAL); nmp = VFSTONFS(vp->v_mount); - biosize = nmp->nm_rsize; + biosize = NFS_MAXDGRAMDATA; p = uio->uio_procp; /* * For nfs, cache consistency can only be maintained approximately. @@ -198,7 +198,6 @@ nfs_bioread(vp, uio, ioflag, cred) nfsstats.biocache_reads++; lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize-1); - bn = lbn * (biosize / DEV_BSIZE); not_readin = 1; /* @@ -208,15 +207,17 @@ nfs_bioread(vp, uio, ioflag, cred) lbn == vp->v_lastr + 1) { for (nra = 0; nra < nmp->nm_readahead && (lbn + 1 + nra) * biosize < np->n_size; nra++) { - rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE); + rabn = lbn + 1 + nra; if (!incore(vp, rabn)) { rabp = nfs_getcacheblk(vp, rabn, biosize, p); if (!rabp) return (EINTR); if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) { rabp->b_flags |= (B_READ | B_ASYNC); + vfs_busy_pages(rabp, 0); if (nfs_asyncio(rabp, cred)) { - rabp->b_flags |= B_INVAL; + rabp->b_flags |= B_INVAL|B_ERROR; + vfs_unbusy_pages(rabp); brelse(rabp); } } @@ -230,21 +231,23 @@ nfs_bioread(vp, uio, ioflag, cred) * Otherwise, get the block and write back/read in, * as required. */ - if ((bp = incore(vp, bn)) && + if ((bp = incore(vp, lbn)) && (bp->b_flags & (B_BUSY | B_WRITEINPROG)) == (B_BUSY | B_WRITEINPROG)) got_buf = 0; else { again: - bp = nfs_getcacheblk(vp, bn, biosize, p); + bp = nfs_getcacheblk(vp, lbn, biosize, p); if (!bp) return (EINTR); got_buf = 1; if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { bp->b_flags |= B_READ; not_readin = 0; + vfs_busy_pages(bp, 0); error = nfs_doio(bp, cred, p); if (error) { + vfs_unbusy_pages(bp); brelse(bp); return (error); } @@ -257,7 +260,7 @@ again: if (not_readin && n > 0) { if (on < bp->b_validoff || (on + n) > bp->b_validend) { if (!got_buf) { - bp = nfs_getcacheblk(vp, bn, biosize, p); + bp = nfs_getcacheblk(vp, lbn, biosize, p); if (!bp) return (EINTR); got_buf = 1; @@ -285,8 +288,11 @@ again: return (EINTR); if ((bp->b_flags & B_DONE) == 0) { bp->b_flags |= B_READ; + vfs_busy_pages(bp, 0); error = nfs_doio(bp, cred, p); if (error) { + vfs_unbusy_pages(bp); + bp->b_flags |= B_ERROR; brelse(bp); return (error); } @@ -297,14 +303,18 @@ again: break; case VDIR: nfsstats.biocache_readdirs++; - bn = (daddr_t)uio->uio_offset; - bp = nfs_getcacheblk(vp, bn, NFS_DIRBLKSIZ, p); + lbn = (daddr_t)uio->uio_offset; + bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p); if (!bp) return (EINTR); + if ((bp->b_flags & B_DONE) == 0) { bp->b_flags |= B_READ; + vfs_busy_pages(bp, 0); error = nfs_doio(bp, cred, p); if (error) { + vfs_unbusy_pages(bp); + bp->b_flags |= B_ERROR; brelse(bp); return (error); } @@ -323,8 +333,10 @@ again: if (rabp) { if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) { rabp->b_flags |= (B_READ | B_ASYNC); + vfs_busy_pages(rabp, 0); if (nfs_asyncio(rabp, cred)) { - rabp->b_flags |= B_INVAL; + vfs_unbusy_pages(rabp); + rabp->b_flags |= B_INVAL|B_ERROR; brelse(rabp); } } @@ -385,7 +397,7 @@ nfs_write(ap) struct buf *bp; struct vattr vattr; struct nfsmount *nmp; - daddr_t lbn, bn; + daddr_t lbn; int n, on, error = 0; #ifdef DIAGNOSTIC @@ -434,14 +446,12 @@ nfs_write(ap) * will be the same size within a filesystem. nfs_writerpc will * still use nm_wsize when sizing the rpc's. */ - biosize = nmp->nm_rsize; + biosize = NFS_MAXDGRAMDATA; do { /* * XXX make sure we aren't cached in the VM page cache */ - (void)vnode_pager_uncache(vp); - /* * Check for a valid write lease. * If non-cachable, just do the rpc @@ -467,9 +477,8 @@ nfs_write(ap) lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize-1); n = min((unsigned)(biosize - on), uio->uio_resid); - bn = lbn * (biosize / DEV_BSIZE); again: - bp = nfs_getcacheblk(vp, bn, biosize, p); + bp = nfs_getcacheblk(vp, lbn, biosize, p); if (!bp) return (EINTR); if (bp->b_wcred == NOCRED) { @@ -591,6 +600,10 @@ nfs_getcacheblk(vp, bn, size, p) } } else bp = getblk(vp, bn, size, 0, 0); + + if( vp->v_type == VREG) + bp->b_blkno = (bn * NFS_MAXDGRAMDATA) / DEV_BSIZE; + return (bp); } diff --git a/sys/nfsclient/nfs_subs.c b/sys/nfsclient/nfs_subs.c index 9b2ef800780c..f28136087602 100644 --- a/sys/nfsclient/nfs_subs.c +++ b/sys/nfsclient/nfs_subs.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94 - * $Id: nfs_subs.c,v 1.6 1994/10/02 17:27:01 phk Exp $ + * $Id: nfs_subs.c,v 1.7 1994/10/17 17:47:37 phk Exp $ */ /* @@ -995,6 +995,7 @@ nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, p) */ if (cnp->cn_flags & (SAVENAME | SAVESTART)) { cnp->cn_flags |= HASBUF; + nfsrv_vmio( ndp->ni_vp); return (0); } out: @@ -1123,6 +1124,7 @@ nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp) *rdonlyp = 0; if (!lockflag) VOP_UNLOCK(*vpp); + nfsrv_vmio(*vpp); return (0); } @@ -1168,3 +1170,54 @@ netaddr_match(family, haddr, nam) }; return (0); } + +int +nfsrv_vmio( struct vnode *vp) { + int rtval; + vm_object_t object; + vm_pager_t pager; + + if( (vp == NULL) || (vp->v_type != VREG)) + return 1; + +retry: + if( (vp->v_flag & VVMIO) == 0) { + pager = (vm_pager_t) vnode_pager_alloc(vp, 0, 0, 0); + object = (vm_object_t) vp->v_vmdata; + if( object->pager != pager) + panic("nfsrv_vmio: pager/object mismatch"); + (void) vm_object_lookup( pager); + pager_cache( object, TRUE); + vp->v_flag |= VVMIO; + } else { + if( (object = (vm_object_t)vp->v_vmdata) && + (object->flags & OBJ_DEAD)) { + tsleep( (caddr_t) object, PVM, "nfdead", 0); + goto retry; + } + if( !object) + panic("nfsrv_vmio: VMIO object missing"); + pager = object->pager; + if( !pager) + panic("nfsrv_vmio: VMIO pager missing"); + (void) vm_object_lookup( pager); + } + return 0; +} +int +nfsrv_vput( struct vnode *vp) { + if( (vp->v_flag & VVMIO) && vp->v_vmdata) { + vm_object_deallocate( (vm_object_t) vp->v_vmdata); + } + vput( vp); + return 0; +} +int +nfsrv_vrele( struct vnode *vp) { + if( (vp->v_flag & VVMIO) && vp->v_vmdata) { + vm_object_deallocate( (vm_object_t) vp->v_vmdata); + } + vrele( vp); + return 0; +} + diff --git a/sys/nfsclient/nfs_vnops.c b/sys/nfsclient/nfs_vnops.c index 7032a5a0aa9a..d189a18454c7 100644 --- a/sys/nfsclient/nfs_vnops.c +++ b/sys/nfsclient/nfs_vnops.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_vnops.c 8.5 (Berkeley) 2/13/94 - * $Id: nfs_vnops.c,v 1.9 1994/10/09 07:35:06 davidg Exp $ + * $Id: nfs_vnops.c,v 1.10 1994/10/17 17:47:41 phk Exp $ */ /* @@ -2356,8 +2356,10 @@ nfs_update(ap) } */ *ap; { +#if 0 /* Use nfs_setattr */ printf("nfs_update: need to implement!!"); +#endif return (EOPNOTSUPP); } diff --git a/sys/nfsserver/nfs_serv.c b/sys/nfsserver/nfs_serv.c index b6e601420d4e..5b3bb63df333 100644 --- a/sys/nfsserver/nfs_serv.c +++ b/sys/nfsserver/nfs_serv.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_serv.c 8.3 (Berkeley) 1/12/94 - * $Id: nfs_serv.c,v 1.6 1994/09/28 16:45:18 dfr Exp $ + * $Id: nfs_serv.c,v 1.7 1994/10/02 17:26:58 phk Exp $ */ /* @@ -121,7 +121,7 @@ nqnfsrv_access(nfsd, mrep, md, dpos, cred, nam, mrq) if (*tl == nfs_true) mode |= VEXEC; error = nfsrv_access(vp, mode, cred, rdonly, nfsd->nd_procp); - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); nfsm_srvdone; } @@ -158,7 +158,7 @@ nfsrv_getattr(nfsd, mrep, md, dpos, cred, nam, mrq) nfsm_reply(0); nqsrv_getl(vp, NQL_READ); error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp); - vput(vp); + nfsrv_vput(vp); nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); nfsm_srvfillattr; @@ -255,12 +255,12 @@ nfsrv_setattr(nfsd, mrep, md, dpos, cred, nam, mrq) } error = VOP_SETATTR(vp, vap, cred, nfsd->nd_procp); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp); out: - vput(vp); + nfsrv_vput(vp); nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL) + 2*NFSX_UNSIGNED); nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); nfsm_srvfillattr; @@ -314,21 +314,21 @@ nfsrv_lookup(nfsd, mrep, md, dpos, cred, nam, mrq) if (error) nfsm_reply(0); nqsrv_getl(nd.ni_startdir, NQL_READ); - vrele(nd.ni_startdir); + nfsrv_vrele(nd.ni_startdir); FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI); vp = nd.ni_vp; bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } if (duration2) (void) nqsrv_getlease(vp, &duration2, NQL_READ, nfsd, nam, &cache2, &frev2, cred); error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp); - vput(vp); + nfsrv_vput(vp); nfsm_reply(NFSX_FH + NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL) + 5*NFSX_UNSIGNED); if (nfsd->nd_nqlflag != NQL_NOVAL) { if (duration2) { @@ -417,7 +417,7 @@ nfsrv_readlink(nfsd, mrep, md, dpos, cred, nam, mrq) nqsrv_getl(vp, NQL_READ); error = VOP_READLINK(vp, uiop, cred); out: - vput(vp); + nfsrv_vput(vp); if (error) m_freem(mp3); nfsm_reply(NFSX_UNSIGNED); @@ -488,7 +488,7 @@ nfsrv_read(nfsd, mrep, md, dpos, cred, nam, mrq) } error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } if (off >= vap->va_size) @@ -539,12 +539,12 @@ nfsrv_read(nfsd, mrep, md, dpos, cred, nam, mrq) FREE((caddr_t)iv2, M_TEMP); if (error || (error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp))) { m_freem(mreq); - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } } else uiop->uio_resid = 0; - vput(vp); + nfsrv_vput(vp); nfsm_srvfillattr; len -= uiop->uio_resid; tlen = nfsm_rndup(len); @@ -619,13 +619,13 @@ nfsrv_write(nfsd, mrep, md, dpos, cred, nam, mrq) nfsm_reply(0); if (vp->v_type != VREG) { error = (vp->v_type == VDIR) ? EISDIR : EACCES; - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } nqsrv_getl(vp, NQL_WRITE); error = nfsrv_access(vp, VWRITE, cred, rdonly, nfsd->nd_procp); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } uiop->uio_resid = 0; @@ -663,19 +663,19 @@ nfsrv_write(nfsd, mrep, md, dpos, cred, nam, mrq) } if (len > 0 && mp == NULL) { error = EBADRPC; - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } uiop->uio_resid = siz; error = VOP_WRITE(vp, uiop, ioflags, cred); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } off = uiop->uio_offset; } error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp); - vput(vp); + nfsrv_vput(vp); nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); nfsm_srvfillattr; @@ -743,7 +743,7 @@ nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq) else rdev = fxdr_unsigned(long, sp->sa_nqrdev); if (vap->va_type == VREG || vap->va_type == VSOCK) { - vrele(nd.ni_startdir); + nfsrv_vrele(nd.ni_startdir); nqsrv_getl(nd.ni_dvp, NQL_WRITE); error=VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); if (error) @@ -758,7 +758,7 @@ nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq) error = suser(cred, (u_short *)0); if (error) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - vput(nd.ni_dvp); + nfsrv_vput(nd.ni_dvp); goto out; } else vap->va_rdev = (dev_t)rdev; @@ -766,7 +766,7 @@ nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq) nqsrv_getl(nd.ni_dvp, NQL_WRITE); error=VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); if (error) { - vrele(nd.ni_startdir); + nfsrv_vrele(nd.ni_startdir); nfsm_reply(0); } nd.ni_cnd.cn_nameiop = LOOKUP; @@ -780,27 +780,27 @@ nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq) } FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI); if (nd.ni_cnd.cn_flags & ISSYMLINK) { - vrele(nd.ni_dvp); - vput(nd.ni_vp); + nfsrv_vrele(nd.ni_dvp); + nfsrv_vput(nd.ni_vp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); error = EINVAL; nfsm_reply(0); } } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - vput(nd.ni_dvp); + nfsrv_vput(nd.ni_dvp); error = ENXIO; goto out; } vp = nd.ni_vp; } else { - vrele(nd.ni_startdir); + nfsrv_vrele(nd.ni_startdir); free(nd.ni_cnd.cn_pnbuf, M_NAMEI); vp = nd.ni_vp; if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); + nfsrv_vrele(nd.ni_dvp); else - vput(nd.ni_dvp); + nfsrv_vput(nd.ni_dvp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nfsd->nd_nqlflag == NQL_NOVAL) { tsize = fxdr_unsigned(long, sp->sa_nfssize); @@ -814,13 +814,13 @@ nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq) error = nfsrv_access(vp, VWRITE, cred, (nd.ni_cnd.cn_flags & RDONLY), nfsd->nd_procp); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } nqsrv_getl(vp, NQL_WRITE); error = VOP_SETATTR(vp, vap, cred, nfsd->nd_procp); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } } @@ -829,11 +829,11 @@ nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq) fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp); - vput(vp); + nfsrv_vput(vp); nfsm_reply(NFSX_FH+NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); nfsm_srvfhtom(fhp); nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); @@ -841,18 +841,18 @@ nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq) return (error); nfsmout: if (nd.ni_cnd.cn_nameiop || nd.ni_cnd.cn_flags) - vrele(nd.ni_startdir); + nfsrv_vrele(nd.ni_startdir); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); + nfsrv_vrele(nd.ni_dvp); else - vput(nd.ni_dvp); + nfsrv_vput(nd.ni_dvp); if (nd.ni_vp) - vput(nd.ni_vp); + nfsrv_vput(nd.ni_vp); return (error); out: - vrele(nd.ni_startdir); + nfsrv_vrele(nd.ni_startdir); free(nd.ni_cnd.cn_pnbuf, M_NAMEI); nfsm_reply(0); return (0); @@ -911,10 +911,10 @@ out: } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); + nfsrv_vrele(nd.ni_dvp); else - vput(nd.ni_dvp); - vput(vp); + nfsrv_vput(nd.ni_dvp); + nfsrv_vput(vp); } nfsm_reply(0); nfsm_srvdone; @@ -973,8 +973,8 @@ nfsrv_rename(nfsd, mrep, md, dpos, cred, nam, mrq) &dpos, nfsd->nd_procp); if (error) { VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - vrele(fromnd.ni_dvp); - vrele(fvp); + nfsrv_vrele(fromnd.ni_dvp); + nfsrv_vrele(fvp); goto out1; } tdvp = tond.ni_dvp; @@ -1023,34 +1023,34 @@ out: } else { VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); if (tdvp == tvp) - vrele(tdvp); + nfsrv_vrele(tdvp); else - vput(tdvp); + nfsrv_vput(tdvp); if (tvp) - vput(tvp); + nfsrv_vput(tvp); VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - vrele(fromnd.ni_dvp); - vrele(fvp); + nfsrv_vrele(fromnd.ni_dvp); + nfsrv_vrele(fvp); } - vrele(tond.ni_startdir); + nfsrv_vrele(tond.ni_startdir); FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI); out1: - vrele(fromnd.ni_startdir); + nfsrv_vrele(fromnd.ni_startdir); FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI); nfsm_reply(0); return (error); nfsmout: if (tond.ni_cnd.cn_nameiop || tond.ni_cnd.cn_flags) { - vrele(tond.ni_startdir); + nfsrv_vrele(tond.ni_startdir); FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI); } if (fromnd.ni_cnd.cn_nameiop || fromnd.ni_cnd.cn_flags) { - vrele(fromnd.ni_startdir); + nfsrv_vrele(fromnd.ni_startdir); FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI); VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - vrele(fromnd.ni_dvp); - vrele(fvp); + nfsrv_vrele(fromnd.ni_dvp); + nfsrv_vrele(fvp); } return (error); } @@ -1111,14 +1111,14 @@ out: } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); + nfsrv_vrele(nd.ni_dvp); else - vput(nd.ni_dvp); + nfsrv_vput(nd.ni_dvp); if (nd.ni_vp) - vrele(nd.ni_vp); + nfsrv_vrele(nd.ni_vp); } out1: - vrele(vp); + nfsrv_vrele(vp); nfsm_reply(0); nfsm_srvdone; } @@ -1178,10 +1178,10 @@ nfsrv_symlink(nfsd, mrep, md, dpos, cred, nam, mrq) if (nd.ni_vp) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); + nfsrv_vrele(nd.ni_dvp); else - vput(nd.ni_dvp); - vrele(nd.ni_vp); + nfsrv_vput(nd.ni_dvp); + nfsrv_vrele(nd.ni_vp); error = EEXIST; goto out; } @@ -1197,11 +1197,11 @@ out: nfsmout: VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); + nfsrv_vrele(nd.ni_dvp); else - vput(nd.ni_dvp); + nfsrv_vput(nd.ni_dvp); if (nd.ni_vp) - vrele(nd.ni_vp); + nfsrv_vrele(nd.ni_vp); if (pathcp) FREE(pathcp, M_TEMP); return (error); @@ -1252,10 +1252,10 @@ nfsrv_mkdir(nfsd, mrep, md, dpos, cred, nam, mrq) if (vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); + nfsrv_vrele(nd.ni_dvp); else - vput(nd.ni_dvp); - vrele(vp); + nfsrv_vput(nd.ni_dvp); + nfsrv_vrele(vp); error = EEXIST; nfsm_reply(0); } @@ -1268,11 +1268,11 @@ nfsrv_mkdir(nfsd, mrep, md, dpos, cred, nam, mrq) fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp); - vput(vp); + nfsrv_vput(vp); nfsm_reply(NFSX_FH+NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); nfsm_srvfhtom(fhp); nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); @@ -1281,11 +1281,11 @@ nfsrv_mkdir(nfsd, mrep, md, dpos, cred, nam, mrq) nfsmout: VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); + nfsrv_vrele(nd.ni_dvp); else - vput(nd.ni_dvp); + nfsrv_vput(nd.ni_dvp); if (nd.ni_vp) - vrele(nd.ni_vp); + nfsrv_vrele(nd.ni_vp); return (error); } @@ -1347,10 +1347,10 @@ out: } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); + nfsrv_vrele(nd.ni_dvp); else - vput(nd.ni_dvp); - vput(vp); + nfsrv_vput(nd.ni_dvp); + nfsrv_vput(vp); } nfsm_reply(0); nfsm_srvdone; @@ -1438,7 +1438,7 @@ nfsrv_readdir(nfsd, mrep, md, dpos, cred, nam, mrq) nqsrv_getl(vp, NQL_READ); error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } VOP_UNLOCK(vp); @@ -1458,7 +1458,7 @@ again: error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); off = (off_t)io.uio_offset; if (error) { - vrele(vp); + nfsrv_vrele(vp); free((caddr_t)rbuf, M_TEMP); nfsm_reply(0); } @@ -1466,7 +1466,7 @@ again: /* * If the filesystem doen't support cookies, return eof. */ - vrele(vp); + nfsrv_vrele(vp); nfsm_reply(2*NFSX_UNSIGNED); nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); *tl++ = nfs_false; @@ -1482,7 +1482,7 @@ again: * rpc reply */ if (siz == 0) { - vrele(vp); + nfsrv_vrele(vp); nfsm_reply(2*NFSX_UNSIGNED); nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); *tl++ = nfs_false; @@ -1573,7 +1573,7 @@ again: dp = (struct dirent *)cpos; cookiep++; } - vrele(vp); + nfsrv_vrele(vp); nfsm_clget; *tl = nfs_false; bp += NFSX_UNSIGNED; @@ -1643,7 +1643,7 @@ nqnfsrv_readdirlook(nfsd, mrep, md, dpos, cred, nam, mrq) nqsrv_getl(vp, NQL_READ); error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp); if (error) { - vput(vp); + nfsrv_vput(vp); nfsm_reply(0); } VOP_UNLOCK(vp); @@ -1663,7 +1663,7 @@ again: error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); off = (u_long)io.uio_offset; if (error) { - vrele(vp); + nfsrv_vrele(vp); free((caddr_t)rbuf, M_TEMP); nfsm_reply(0); } @@ -1671,7 +1671,7 @@ again: /* * If the filesystem doen't support cookies, return eof. */ - vrele(vp); + nfsrv_vrele(vp); nfsm_reply(2*NFSX_UNSIGNED); nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); *tl++ = nfs_false; @@ -1687,7 +1687,7 @@ again: * rpc reply */ if (siz == 0) { - vrele(vp); + nfsrv_vrele(vp); nfsm_reply(2 * NFSX_UNSIGNED); nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_false; @@ -1742,7 +1742,7 @@ again: fl.fl_nfh.fh_generic.fh_fsid = nvp->v_mount->mnt_stat.f_fsid; if (VFS_VPTOFH(nvp, &fl.fl_nfh.fh_generic.fh_fid)) { - vput(nvp); + nfsrv_vput(nvp); goto invalid; } if (duration2) { @@ -1754,10 +1754,10 @@ again: } else fl.fl_duration = 0; if (VOP_GETATTR(nvp, vap, cred, nfsd->nd_procp)) { - vput(nvp); + nfsrv_vput(nvp); goto invalid; } - vput(nvp); + nfsrv_vput(nvp); fp = (struct nfsv2_fattr *)&fl.fl_fattr; nfsm_srvfillattr; len += (4*NFSX_UNSIGNED + nlen + rem + NFSX_FH @@ -1827,7 +1827,7 @@ invalid: dp = (struct dirent *)cpos; cookiep++; } - vrele(vp); + nfsrv_vrele(vp); nfsm_clget; *tl = nfs_false; bp += NFSX_UNSIGNED; @@ -1880,7 +1880,7 @@ nfsrv_statfs(nfsd, mrep, md, dpos, cred, nam, mrq) nfsm_reply(0); sf = &statfs; error = VFS_STATFS(vp->v_mount, sf, nfsd->nd_procp); - vput(vp); + nfsrv_vput(vp); nfsm_reply(NFSX_STATFS(isnq)); nfsm_build(sfp, struct nfsv2_statfs *, NFSX_STATFS(isnq)); sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA); diff --git a/sys/nfsserver/nfs_srvsubs.c b/sys/nfsserver/nfs_srvsubs.c index 9b2ef800780c..f28136087602 100644 --- a/sys/nfsserver/nfs_srvsubs.c +++ b/sys/nfsserver/nfs_srvsubs.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94 - * $Id: nfs_subs.c,v 1.6 1994/10/02 17:27:01 phk Exp $ + * $Id: nfs_subs.c,v 1.7 1994/10/17 17:47:37 phk Exp $ */ /* @@ -995,6 +995,7 @@ nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, p) */ if (cnp->cn_flags & (SAVENAME | SAVESTART)) { cnp->cn_flags |= HASBUF; + nfsrv_vmio( ndp->ni_vp); return (0); } out: @@ -1123,6 +1124,7 @@ nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp) *rdonlyp = 0; if (!lockflag) VOP_UNLOCK(*vpp); + nfsrv_vmio(*vpp); return (0); } @@ -1168,3 +1170,54 @@ netaddr_match(family, haddr, nam) }; return (0); } + +int +nfsrv_vmio( struct vnode *vp) { + int rtval; + vm_object_t object; + vm_pager_t pager; + + if( (vp == NULL) || (vp->v_type != VREG)) + return 1; + +retry: + if( (vp->v_flag & VVMIO) == 0) { + pager = (vm_pager_t) vnode_pager_alloc(vp, 0, 0, 0); + object = (vm_object_t) vp->v_vmdata; + if( object->pager != pager) + panic("nfsrv_vmio: pager/object mismatch"); + (void) vm_object_lookup( pager); + pager_cache( object, TRUE); + vp->v_flag |= VVMIO; + } else { + if( (object = (vm_object_t)vp->v_vmdata) && + (object->flags & OBJ_DEAD)) { + tsleep( (caddr_t) object, PVM, "nfdead", 0); + goto retry; + } + if( !object) + panic("nfsrv_vmio: VMIO object missing"); + pager = object->pager; + if( !pager) + panic("nfsrv_vmio: VMIO pager missing"); + (void) vm_object_lookup( pager); + } + return 0; +} +int +nfsrv_vput( struct vnode *vp) { + if( (vp->v_flag & VVMIO) && vp->v_vmdata) { + vm_object_deallocate( (vm_object_t) vp->v_vmdata); + } + vput( vp); + return 0; +} +int +nfsrv_vrele( struct vnode *vp) { + if( (vp->v_flag & VVMIO) && vp->v_vmdata) { + vm_object_deallocate( (vm_object_t) vp->v_vmdata); + } + vrele( vp); + return 0; +} + diff --git a/sys/sys/bio.h b/sys/sys/bio.h index 86554e66d154..7d9c238b2a2b 100644 --- a/sys/sys/bio.h +++ b/sys/sys/bio.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)buf.h 8.7 (Berkeley) 1/21/94 - * $Id: buf.h,v 1.9 1994/10/10 00:58:31 phk Exp $ + * $Id: buf.h,v 1.10 1994/10/18 06:55:57 davidg Exp $ */ #ifndef _SYS_BUF_H_ @@ -83,9 +83,9 @@ struct buf { void *b_driver2; /* for private use by the driver */ void *b_spc; #ifndef VMIO - void *b_pages[(MAXBSIZE + PAGE_SIZE - 1)/PAGE_SIZE]; + void *b_pages[(MAXPHYS + PAGE_SIZE - 1)/PAGE_SIZE]; #else - vm_page_t b_pages[(MAXBSIZE + PAGE_SIZE - 1)/PAGE_SIZE]; + struct vm_page *b_pages[(MAXPHYS + PAGE_SIZE - 1)/PAGE_SIZE]; #endif int b_npages; }; @@ -116,13 +116,13 @@ struct buf { #define B_INVAL 0x00002000 /* Does not contain valid info. */ #define B_LOCKED 0x00004000 /* Locked in core (not reusable). */ #define B_NOCACHE 0x00008000 /* Do not cache block after use. */ -#define B_PAGET 0x00010000 /* Page in/out of page table space. */ -#define B_PGIN 0x00020000 /* Pagein op, so swap() can count it. */ +#define B_MALLOC 0x00010000 /* malloced b_data */ +#define B_CLUSTEROK 0x00020000 /* Pagein op, so swap() can count it. */ #define B_PHYS 0x00040000 /* I/O to user memory. */ #define B_RAW 0x00080000 /* Set by physio for raw transfers. */ #define B_READ 0x00100000 /* Read buffer. */ #define B_TAPE 0x00200000 /* Magnetic tape I/O. */ -#define B_UAREA 0x00400000 /* Buffer describes Uarea I/O. */ +#define B_PDWANTED 0x00400000 /* Pageout daemon wants this buffer. */ #define B_WANTED 0x00800000 /* Process wants this buffer. */ #define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */ #define B_WRITEINPROG 0x01000000 /* Write in progress. */ @@ -159,7 +159,7 @@ struct cluster_save { /* * Definitions for the buffer free lists. */ -#define BUFFER_QUEUES 5 /* number of free buffer queues */ +#define BUFFER_QUEUES 6 /* number of free buffer queues */ LIST_HEAD(bufhashhdr, buf) bufhashtbl[BUFHSZ], invalhash; TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES]; @@ -167,8 +167,9 @@ TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES]; #define QUEUE_NONE 0 /* on no queue */ #define QUEUE_LOCKED 1 /* locked buffers */ #define QUEUE_LRU 2 /* useful buffers */ -#define QUEUE_AGE 3 /* less useful buffers */ -#define QUEUE_EMPTY 4 /* empty buffer headers*/ +#define QUEUE_VMIO 3 /* VMIO buffers */ +#define QUEUE_AGE 4 /* not-useful buffers */ +#define QUEUE_EMPTY 5 /* empty buffer headers*/ /* * Zero out the buffer's data area. @@ -202,12 +203,12 @@ int bwrite __P((struct buf *)); void bdwrite __P((struct buf *)); void bawrite __P((struct buf *)); void brelse __P((struct buf *)); -struct buf *getnewbuf __P((int slpflag, int slptimeo)); +struct buf *getnewbuf __P((int slpflag, int slptimeo, int)); struct buf * getpbuf __P((void)); struct buf *incore __P((struct vnode *, daddr_t)); struct buf *getblk __P((struct vnode *, daddr_t, int, int, int)); struct buf *geteblk __P((int)); -void allocbuf __P((struct buf *, int)); +int allocbuf __P((struct buf *, int, int)); int biowait __P((struct buf *)); void biodone __P((struct buf *)); diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 86554e66d154..7d9c238b2a2b 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)buf.h 8.7 (Berkeley) 1/21/94 - * $Id: buf.h,v 1.9 1994/10/10 00:58:31 phk Exp $ + * $Id: buf.h,v 1.10 1994/10/18 06:55:57 davidg Exp $ */ #ifndef _SYS_BUF_H_ @@ -83,9 +83,9 @@ struct buf { void *b_driver2; /* for private use by the driver */ void *b_spc; #ifndef VMIO - void *b_pages[(MAXBSIZE + PAGE_SIZE - 1)/PAGE_SIZE]; + void *b_pages[(MAXPHYS + PAGE_SIZE - 1)/PAGE_SIZE]; #else - vm_page_t b_pages[(MAXBSIZE + PAGE_SIZE - 1)/PAGE_SIZE]; + struct vm_page *b_pages[(MAXPHYS + PAGE_SIZE - 1)/PAGE_SIZE]; #endif int b_npages; }; @@ -116,13 +116,13 @@ struct buf { #define B_INVAL 0x00002000 /* Does not contain valid info. */ #define B_LOCKED 0x00004000 /* Locked in core (not reusable). */ #define B_NOCACHE 0x00008000 /* Do not cache block after use. */ -#define B_PAGET 0x00010000 /* Page in/out of page table space. */ -#define B_PGIN 0x00020000 /* Pagein op, so swap() can count it. */ +#define B_MALLOC 0x00010000 /* malloced b_data */ +#define B_CLUSTEROK 0x00020000 /* Pagein op, so swap() can count it. */ #define B_PHYS 0x00040000 /* I/O to user memory. */ #define B_RAW 0x00080000 /* Set by physio for raw transfers. */ #define B_READ 0x00100000 /* Read buffer. */ #define B_TAPE 0x00200000 /* Magnetic tape I/O. */ -#define B_UAREA 0x00400000 /* Buffer describes Uarea I/O. */ +#define B_PDWANTED 0x00400000 /* Pageout daemon wants this buffer. */ #define B_WANTED 0x00800000 /* Process wants this buffer. */ #define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */ #define B_WRITEINPROG 0x01000000 /* Write in progress. */ @@ -159,7 +159,7 @@ struct cluster_save { /* * Definitions for the buffer free lists. */ -#define BUFFER_QUEUES 5 /* number of free buffer queues */ +#define BUFFER_QUEUES 6 /* number of free buffer queues */ LIST_HEAD(bufhashhdr, buf) bufhashtbl[BUFHSZ], invalhash; TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES]; @@ -167,8 +167,9 @@ TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES]; #define QUEUE_NONE 0 /* on no queue */ #define QUEUE_LOCKED 1 /* locked buffers */ #define QUEUE_LRU 2 /* useful buffers */ -#define QUEUE_AGE 3 /* less useful buffers */ -#define QUEUE_EMPTY 4 /* empty buffer headers*/ +#define QUEUE_VMIO 3 /* VMIO buffers */ +#define QUEUE_AGE 4 /* not-useful buffers */ +#define QUEUE_EMPTY 5 /* empty buffer headers*/ /* * Zero out the buffer's data area. @@ -202,12 +203,12 @@ int bwrite __P((struct buf *)); void bdwrite __P((struct buf *)); void bawrite __P((struct buf *)); void brelse __P((struct buf *)); -struct buf *getnewbuf __P((int slpflag, int slptimeo)); +struct buf *getnewbuf __P((int slpflag, int slptimeo, int)); struct buf * getpbuf __P((void)); struct buf *incore __P((struct vnode *, daddr_t)); struct buf *getblk __P((struct vnode *, daddr_t, int, int, int)); struct buf *geteblk __P((int)); -void allocbuf __P((struct buf *, int)); +int allocbuf __P((struct buf *, int, int)); int biowait __P((struct buf *)); void biodone __P((struct buf *)); diff --git a/sys/sys/param.h b/sys/sys/param.h index bf190de7b11e..55e6e45db4ca 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)param.h 8.2 (Berkeley) 1/21/94 - * $Id: param.h,v 1.4 1994/08/21 04:41:55 paul Exp $ + * $Id: param.h,v 1.5 1994/09/01 05:12:51 davidg Exp $ */ #ifndef _SYS_PARAM_H_ @@ -152,9 +152,10 @@ * smaller units (fragments) only in the last direct block. MAXBSIZE * primarily determines the size of buffers in the buffer pool. It may be * made larger without any effect on existing file systems; however making - * it smaller make make some file systems unmountable. + * it smaller make make some file systems unmountable. Also, MAXBSIZE + * must be less than MAXPHYS!!! */ -#define MAXBSIZE MAXPHYS +#define MAXBSIZE 16384 #define MAXFRAG 8 /* diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 468c49f8e3d2..79a23263d615 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)proc.h 8.8 (Berkeley) 1/21/94 - * $Id: proc.h,v 1.13 1994/11/13 12:46:08 davidg Exp $ + * $Id: proc.h,v 1.14 1994/11/15 14:37:39 bde Exp $ */ #ifndef _SYS_PROC_H_ @@ -99,8 +99,7 @@ struct proc { int p_flag; /* P_* flags. */ char p_stat; /* S* process status. */ - char p_lock; /* Process lock count. */ - char p_pad1[2]; + char p_pad1[3]; pid_t p_pid; /* Process identifier. */ struct proc *p_hash; /* Hashed based on p_pid for kill+exit+... */ @@ -137,7 +136,9 @@ struct proc { struct vnode *p_textvp; /* Vnode of executable. */ - long p_spare[3]; /* Pad to 256, avoid shifting eproc. */ + char p_lock; /* Process lock count. */ + char p_pad2[3]; /* alignment */ + long p_spare[2]; /* Pad to 256, avoid shifting eproc. XXX */ /* End area that is zeroed on creation. */ #define p_endzero p_startcopy diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h index d70c6b30c8cb..22da839cbdc4 100644 --- a/sys/sys/vmmeter.h +++ b/sys/sys/vmmeter.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)vmmeter.h 8.1 (Berkeley) 6/2/93 - * $Id: vmmeter.h,v 1.5 1994/10/15 13:33:02 davidg Exp $ + * $Id: vmmeter.h,v 1.6 1994/10/18 14:59:13 davidg Exp $ */ #ifndef _SYS_VMMETER_H_ @@ -87,6 +87,10 @@ struct vmmeter { unsigned v_active_count;/* number of pages active */ unsigned v_inactive_target; /* number of pages desired inactive */ unsigned v_inactive_count; /* number of pages inactive */ + unsigned v_cache_count; /* number of pages on buffer cache queue */ + unsigned v_cache_min; /* min number of pages desired on cache queue */ + unsigned v_cache_max; /* max number of pages in cached obj */ + unsigned v_pageout_free_min; /* min number pages reserved for kernel */ }; #ifdef KERNEL struct vmmeter cnt; diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index 9830e4fd0ad6..7feaa512e828 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94 - * $Id: ffs_alloc.c,v 1.4 1994/09/20 05:53:24 bde Exp $ + * $Id: ffs_alloc.c,v 1.5 1994/10/10 01:04:34 phk Exp $ */ #include <sys/param.h> @@ -210,7 +210,7 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) panic("bad blockno"); ip->i_blocks += btodb(nsize - osize); ip->i_flag |= IN_CHANGE | IN_UPDATE; - allocbuf(bp, nsize); + allocbuf(bp, nsize, 0); bp->b_flags |= B_DONE; bzero((char *)bp->b_data + osize, (u_int)nsize - osize); *bpp = bp; @@ -268,14 +268,14 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) (u_long (*)())ffs_alloccg); if (bno > 0) { bp->b_blkno = fsbtodb(fs, bno); - (void) vnode_pager_uncache(ITOV(ip)); + /* (void) vnode_pager_uncache(ITOV(ip)); */ ffs_blkfree(ip, bprev, (long)osize); if (nsize < request) ffs_blkfree(ip, bno + numfrags(fs, nsize), (long)(request - nsize)); ip->i_blocks += btodb(nsize - osize); ip->i_flag |= IN_CHANGE | IN_UPDATE; - allocbuf(bp, nsize); + allocbuf(bp, nsize, 0); bp->b_flags |= B_DONE; bzero((char *)bp->b_data + osize, (u_int)nsize - osize); *bpp = bp; diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index 0058d9b8edcb..8ba128cc02a5 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ffs_inode.c 8.5 (Berkeley) 12/30/93 - * $Id: ffs_inode.c,v 1.9 1994/10/22 02:27:32 davidg Exp $ + * $Id: ffs_inode.c,v 1.10 1994/12/27 14:44:42 bde Exp $ */ #include <sys/param.h> @@ -204,7 +204,6 @@ ffs_truncate(ap) if (error) return (error); #endif - vnode_pager_setsize(ovp, (u_long)length); osize = oip->i_size; /* * Lengthen the size of the file. We must ensure that the @@ -226,6 +225,7 @@ ffs_truncate(ap) bwrite(bp); else bawrite(bp); + vnode_pager_setsize(ovp, (u_long)length); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (VOP_UPDATE(ovp, &tv, &tv, 1)); } @@ -250,7 +250,7 @@ ffs_truncate(ap) oip->i_size = length; size = blksize(fs, oip, lbn); bzero((char *)bp->b_data + offset, (u_int)(size - offset)); - allocbuf(bp, size); + allocbuf(bp, size, 0); if (aflags & IO_SYNC) bwrite(bp); else @@ -386,6 +386,7 @@ done: if (oip->i_blocks < 0) /* sanity */ oip->i_blocks = 0; oip->i_flag |= IN_CHANGE; + vnode_pager_setsize(ovp, (u_long)length); #ifdef QUOTA (void) chkdq(oip, -blocksreleased, NOCRED, 0); #endif @@ -441,7 +442,8 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp) */ vp = ITOV(ip); bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0); - if (bp->b_flags & (B_DONE | B_DELWRI)) { + /* if (bp->b_flags & (B_DONE | B_DELWRI)) { */ + if (bp->b_flags & B_CACHE) { /* Braces must be here in case trace evaluates to nothing. */ trace(TR_BREADHIT, pack(vp, fs->fs_bsize), lbn); } else { @@ -451,6 +453,7 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp) if (bp->b_bcount > bp->b_bufsize) panic("ffs_indirtrunc: bad buffer size"); bp->b_blkno = dbn; + vfs_busy_pages(bp, 0); VOP_STRATEGY(bp); error = biowait(bp); } diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index 35c905750919..be78eafaab73 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ffs_vnops.c 8.7 (Berkeley) 2/3/94 - * $Id: ffs_vnops.c,v 1.6 1994/10/06 21:06:59 davidg Exp $ + * $Id: ffs_vnops.c,v 1.7 1994/10/10 01:04:40 phk Exp $ */ #include <sys/param.h> @@ -261,19 +261,27 @@ loop: continue; if ((bp->b_flags & B_DELWRI) == 0) panic("ffs_fsync: not dirty"); - bremfree(bp); - bp->b_flags |= B_BUSY; - splx(s); + + if (bp->b_vp != vp && ap->a_waitfor != MNT_NOWAIT) { + + bremfree(bp); + bp->b_flags |= B_BUSY; + splx(s); /* * Wait for I/O associated with indirect blocks to complete, * since there is no way to quickly wait for them below. */ - if (bp->b_vp == vp || ap->a_waitfor == MNT_NOWAIT) - (void) bawrite(bp); - else - (void) bwrite(bp); + if (bp->b_vp == vp || ap->a_waitfor == MNT_NOWAIT) + (void) bawrite(bp); + else + (void) bwrite(bp); + } else { + vfs_bio_awrite(bp); + splx(s); + } goto loop; } + if (ap->a_waitfor == MNT_WAIT) { while (vp->v_numoutput) { vp->v_flag |= VBWAIT; @@ -287,6 +295,7 @@ loop: #endif } splx(s); + tv = time; return (VOP_UPDATE(ap->a_vp, &tv, &tv, ap->a_waitfor == MNT_WAIT)); } diff --git a/sys/ufs/lfs/lfs_balloc.c b/sys/ufs/lfs/lfs_balloc.c index b7cf75502c43..f8063c8956ba 100644 --- a/sys/ufs/lfs/lfs_balloc.c +++ b/sys/ufs/lfs/lfs_balloc.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)lfs_balloc.c 8.1 (Berkeley) 6/11/93 - * $Id$ + * $Id: lfs_balloc.c,v 1.2 1994/08/02 07:54:30 davidg Exp $ */ #include <sys/param.h> #include <sys/buf.h> @@ -129,6 +129,7 @@ lfs_balloc(vp, iosize, lbn, bpp) else { bp->b_blkno = daddr; bp->b_flags |= B_READ; + vfs_busy_pages(bp, 0); VOP_STRATEGY(bp); return(biowait(bp)); } diff --git a/sys/ufs/lfs/lfs_inode.c b/sys/ufs/lfs/lfs_inode.c index 4e230161176b..68169a3716a6 100644 --- a/sys/ufs/lfs/lfs_inode.c +++ b/sys/ufs/lfs/lfs_inode.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)lfs_inode.c 8.5 (Berkeley) 12/30/93 - * $Id: lfs_inode.c,v 1.4 1994/10/10 01:04:50 phk Exp $ + * $Id: lfs_inode.c,v 1.5 1995/01/04 23:46:31 gibbs Exp $ */ #include <sys/param.h> @@ -235,7 +235,7 @@ lfs_truncate(ap) ip->i_size = length; size = blksize(fs); bzero((char *)bp->b_data + offset, (u_int)(size - offset)); - allocbuf(bp, size); + allocbuf(bp, size, 0); if (e1 = VOP_BWRITE(bp)) return (e1); } diff --git a/sys/ufs/lfs/lfs_segment.c b/sys/ufs/lfs/lfs_segment.c index d19de2d1a53d..d3937132830f 100644 --- a/sys/ufs/lfs/lfs_segment.c +++ b/sys/ufs/lfs/lfs_segment.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)lfs_segment.c 8.5 (Berkeley) 1/4/94 - * $Id: lfs_segment.c,v 1.5 1994/11/17 01:30:49 gibbs Exp $ + * $Id: lfs_segment.c,v 1.6 1995/01/04 23:46:32 gibbs Exp $ */ #include <sys/param.h> @@ -1091,7 +1091,6 @@ lfs_newbuf(vp, daddr, size) bp = getpbuf(); if (nbytes) bp->b_data = lfs_alloc_buffer( nbytes); - bgetvp(vp, bp); bp->b_bufsize = size; bp->b_bcount = size; bp->b_lblkno = daddr; diff --git a/sys/ufs/lfs/lfs_subr.c b/sys/ufs/lfs/lfs_subr.c index 82e8068201ee..52d1574f1211 100644 --- a/sys/ufs/lfs/lfs_subr.c +++ b/sys/ufs/lfs/lfs_subr.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)lfs_subr.c 8.2 (Berkeley) 9/21/93 - * $Id: lfs_subr.c,v 1.4 1994/11/17 01:30:51 gibbs Exp $ + * $Id: lfs_subr.c,v 1.5 1995/01/04 23:46:32 gibbs Exp $ */ #include <sys/param.h> @@ -147,11 +147,8 @@ lfs_segunlock(fs) if (sp->bpp != sp->cbpp) { /* Free allocated segment summary */ fs->lfs_offset -= LFS_SUMMARY_SIZE / DEV_BSIZE; -/* free((*sp->bpp)->b_data, M_SEGMENT); */ - lfs_free_buffer((*sp->bpp)->b_data, roundup( (*sp->bpp)->b_bufsize, DEV_BSIZE)); - /* free(*sp->bpp, M_SEGMENT); */ + lfs_free_buffer((*sp->bpp)->b_data, roundup((*sp->bpp)->b_bufsize, DEV_BSIZE)); relpbuf(*sp->bpp); - } else printf ("unlock to 0 with no summary"); free(sp->bpp, M_SEGMENT); diff --git a/sys/ufs/lfs/lfs_syscalls.c b/sys/ufs/lfs/lfs_syscalls.c index 71ac5e9123e5..73c5045f48a1 100644 --- a/sys/ufs/lfs/lfs_syscalls.c +++ b/sys/ufs/lfs/lfs_syscalls.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)lfs_syscalls.c 8.5 (Berkeley) 4/20/94 - * $Id: lfs_syscalls.c,v 1.4 1994/11/17 01:30:52 gibbs Exp $ + * $Id: lfs_syscalls.c,v 1.5 1995/01/04 23:46:33 gibbs Exp $ */ #include <sys/param.h> @@ -238,10 +238,6 @@ err2: lfs_vunref(vp); /* Free up fakebuffers */ for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp) if ((*bpp)->b_flags & B_CALL) { - brelvp(*bpp); -/* - free(*bpp, M_SEGMENT); -*/ relpbuf(*bpp); } else brelse(*bpp); diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c index f3009bd30584..108a5aa58f2e 100644 --- a/sys/ufs/ufs/ufs_bmap.c +++ b/sys/ufs/ufs/ufs_bmap.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)ufs_bmap.c 8.6 (Berkeley) 1/21/94 - * $Id: ufs_bmap.c,v 1.3 1994/08/02 07:54:52 davidg Exp $ + * $Id: ufs_bmap.c,v 1.4 1994/10/08 06:57:21 phk Exp $ */ #include <sys/param.h> @@ -128,12 +128,12 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp) if (runp) { /* * XXX - * If MAXBSIZE is the largest transfer the disks can handle, + * If MAXPHYS is the largest transfer the disks can handle, * we probably want maxrun to be 1 block less so that we * don't create a block larger than the device can handle. */ *runp = 0; - maxrun = MAXBSIZE / mp->mnt_stat.f_iosize - 1; + maxrun = MAXPHYS / mp->mnt_stat.f_iosize - 1; } xap = ap == NULL ? a : ap; @@ -179,7 +179,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp) xap->in_exists = 1; bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0); - if (bp->b_flags & (B_DONE | B_DELWRI)) { + if (bp->b_flags & B_CACHE) { trace(TR_BREADHIT, pack(vp, size), metalbn); } #ifdef DIAGNOSTIC @@ -190,6 +190,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp) trace(TR_BREADMISS, pack(vp, size), metalbn); bp->b_blkno = blkptrtodb(ump, daddr); bp->b_flags |= B_READ; + vfs_busy_pages(bp, 0); VOP_STRATEGY(bp); curproc->p_stats->p_ru.ru_inblock++; /* XXX */ error = biowait(bp); diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c index 058af53f4d66..04956e9a0cf1 100644 --- a/sys/ufs/ufs/ufs_readwrite.c +++ b/sys/ufs/ufs/ufs_readwrite.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ufs_readwrite.c 8.7 (Berkeley) 1/21/94 - * $Id: ufs_readwrite.c,v 1.4 1994/08/08 09:11:44 davidg Exp $ + * $Id: ufs_readwrite.c,v 1.5 1994/10/10 01:04:55 phk Exp $ */ #ifdef LFS_READWRITE @@ -101,6 +101,9 @@ READ(ap) if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) break; lbn = lblkno(fs, uio->uio_offset); + xfersize = vfs_read_bypass( vp, uio, bytesinfile, lbn); + if( xfersize != 0) + continue; nextlbn = lbn + 1; size = BLKSIZE(fs, ip, lbn); blkoffset = blkoff(fs, uio->uio_offset); @@ -231,6 +234,10 @@ WRITE(ap) xfersize = fs->fs_bsize - blkoffset; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; + + if (uio->uio_offset + xfersize > ip->i_size) + vnode_pager_setsize(vp, (u_long)uio->uio_offset + xfersize); + #ifdef LFS_READWRITE (void)lfs_check(vp, lbn); error = lfs_balloc(vp, xfersize, lbn, &bp); @@ -245,11 +252,13 @@ WRITE(ap) #endif if (error) break; + if (uio->uio_offset + xfersize > ip->i_size) { ip->i_size = uio->uio_offset + xfersize; - vnode_pager_setsize(vp, (u_long)ip->i_size); } +/* (void)vnode_pager_uncache(vp); +*/ size = BLKSIZE(fs, ip, lbn) - bp->b_resid; if (size < xfersize) @@ -262,14 +271,17 @@ WRITE(ap) #else if (ioflag & IO_SYNC) (void)bwrite(bp); - else if (xfersize + blkoffset == fs->fs_bsize) - if (doclusterwrite) + else if (xfersize + blkoffset == fs->fs_bsize) { + if (doclusterwrite) { + bp->b_flags |= B_CLUSTEROK; cluster_write(bp, ip->i_size); - else { + } else { bawrite(bp); } - else + } else { + bp->b_flags |= B_CLUSTEROK; bdwrite(bp); + } #endif if (error || xfersize == 0) break; diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 7a65de487e85..b21d6a37bdd5 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)ufs_vnops.c 8.10 (Berkeley) 4/1/94 - * $Id: ufs_vnops.c,v 1.12 1994/10/21 01:19:25 wollman Exp $ + * $Id: ufs_vnops.c,v 1.13 1994/11/26 19:38:30 bde Exp $ */ #include <sys/param.h> @@ -441,8 +441,10 @@ ufs_chmod(vp, mode, cred, p) ip->i_mode &= ~ALLPERMS; ip->i_mode |= (mode & ALLPERMS); ip->i_flag |= IN_CHANGE; +/* if ((vp->v_flag & VTEXT) && (ip->i_mode & S_ISTXT) == 0) (void) vnode_pager_uncache(vp); +*/ return (0); } @@ -647,6 +649,8 @@ ufs_remove(ap) if ((error = ufs_dirremove(dvp, ap->a_cnp)) == 0) { ip->i_nlink--; ip->i_flag |= IN_CHANGE; + if( (ip->i_nlink == 0) && vp->v_vmdata) + ((vm_object_t)vp->v_vmdata)->flags |= OBJ_INTERNAL; } out: if (dvp == vp) diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index f5eb3d33e263..512f77768356 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)device_pager.c 8.1 (Berkeley) 6/11/93 - * $Id: device_pager.c,v 1.3 1994/08/02 07:55:06 davidg Exp $ + * $Id: device_pager.c,v 1.4 1994/10/02 17:48:58 phk Exp $ */ /* @@ -55,28 +55,26 @@ #include <vm/vm_page.h> #include <vm/device_pager.h> -struct pagerlst dev_pager_list; /* list of managed devices */ +struct pagerlst dev_pager_list; /* list of managed devices */ struct pglist dev_pager_fakelist; /* list of available vm_page_t's */ #ifdef DEBUG -int dpagerdebug = 0; +int dpagerdebug = 0; + #define DDB_FOLLOW 0x01 #define DDB_INIT 0x02 #define DDB_ALLOC 0x04 #define DDB_FAIL 0x08 #endif -static vm_pager_t dev_pager_alloc - __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t)); -static void dev_pager_dealloc __P((vm_pager_t)); -static int dev_pager_getpage - __P((vm_pager_t, vm_page_t, boolean_t)); -static boolean_t dev_pager_haspage __P((vm_pager_t, vm_offset_t)); -static void dev_pager_init __P((void)); -static int dev_pager_putpage - __P((vm_pager_t, vm_page_t, boolean_t)); -static vm_page_t dev_pager_getfake __P((vm_offset_t)); -static void dev_pager_putfake __P((vm_page_t)); +static vm_pager_t dev_pager_alloc __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t)); +static void dev_pager_dealloc __P((vm_pager_t)); +static int dev_pager_getpage __P((vm_pager_t, vm_page_t, boolean_t)); +static boolean_t dev_pager_haspage __P((vm_pager_t, vm_offset_t)); +static void dev_pager_init __P((void)); +static int dev_pager_putpage __P((vm_pager_t, vm_page_t, boolean_t)); +static vm_page_t dev_pager_getfake __P((vm_offset_t)); +static void dev_pager_putfake __P((vm_page_t)); struct pagerops devicepagerops = { dev_pager_init, @@ -109,7 +107,7 @@ dev_pager_alloc(handle, size, prot, foff) { dev_t dev; vm_pager_t pager; - int (*mapfunc)(); + int (*mapfunc) (); vm_object_t object; dev_pager_t devp; unsigned int npages, off; @@ -117,7 +115,7 @@ dev_pager_alloc(handle, size, prot, foff) #ifdef DEBUG if (dpagerdebug & DDB_FOLLOW) printf("dev_pager_alloc(%x, %x, %x, %x)\n", - handle, size, prot, foff); + handle, size, prot, foff); #endif #ifdef DIAGNOSTIC /* @@ -130,27 +128,27 @@ dev_pager_alloc(handle, size, prot, foff) /* * Make sure this device can be mapped. */ - dev = (dev_t)(u_long)handle; + dev = (dev_t) (u_long) handle; mapfunc = cdevsw[major(dev)].d_mmap; if (mapfunc == NULL || mapfunc == enodev || mapfunc == nullop) - return(NULL); + return (NULL); /* * Offset should be page aligned. */ - if (foff & (PAGE_SIZE-1)) - return(NULL); + if (foff & (PAGE_SIZE - 1)) + return (NULL); /* - * Check that the specified range of the device allows the - * desired protection. - * + * Check that the specified range of the device allows the desired + * protection. + * * XXX assumes VM_PROT_* == PROT_* */ npages = atop(round_page(size)); for (off = foff; npages--; off += PAGE_SIZE) - if ((*mapfunc)(dev, off, (int)prot) == -1) - return(NULL); + if ((*mapfunc) (dev, off, (int) prot) == -1) + return (NULL); /* * Look up pager, creating as necessary. @@ -161,58 +159,57 @@ top: /* * Allocate and initialize pager structs */ - pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK); + pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, M_WAITOK); if (pager == NULL) - return(NULL); - devp = (dev_pager_t)malloc(sizeof *devp, M_VMPGDATA, M_WAITOK); + return (NULL); + devp = (dev_pager_t) malloc(sizeof *devp, M_VMPGDATA, M_WAITOK); if (devp == NULL) { - free((caddr_t)pager, M_VMPAGER); - return(NULL); + free((caddr_t) pager, M_VMPAGER); + return (NULL); } pager->pg_handle = handle; pager->pg_ops = &devicepagerops; pager->pg_type = PG_DEVICE; - pager->pg_data = (caddr_t)devp; - pager->pg_flags = 0; + pager->pg_data = (caddr_t) devp; TAILQ_INIT(&devp->devp_pglist); /* * Allocate object and associate it with the pager. */ object = devp->devp_object = vm_object_allocate(0); vm_object_enter(object, pager); - vm_object_setpager(object, pager, (vm_offset_t)foff, FALSE); + vm_object_setpager(object, pager, (vm_offset_t) foff, FALSE); /* * Finally, put it on the managed list so other can find it. * First we re-lookup in case someone else beat us to this - * point (due to blocking in the various mallocs). If so, - * we free everything and start over. + * point (due to blocking in the various mallocs). If so, we + * free everything and start over. */ if (vm_pager_lookup(&dev_pager_list, handle)) { - free((caddr_t)devp, M_VMPGDATA); - free((caddr_t)pager, M_VMPAGER); + free((caddr_t) devp, M_VMPGDATA); + free((caddr_t) pager, M_VMPAGER); goto top; } TAILQ_INSERT_TAIL(&dev_pager_list, pager, pg_list); #ifdef DEBUG if (dpagerdebug & DDB_ALLOC) { printf("dev_pager_alloc: pager %x devp %x object %x\n", - pager, devp, object); + pager, devp, object); vm_object_print(object, FALSE); } #endif } else { /* - * vm_object_lookup() gains a reference and also - * removes the object from the cache. + * vm_object_lookup() gains a reference and also removes the + * object from the cache. */ object = vm_object_lookup(pager); #ifdef DIAGNOSTIC - devp = (dev_pager_t)pager->pg_data; + devp = (dev_pager_t) pager->pg_data; if (object != devp->devp_object) panic("dev_pager_setup: bad object"); #endif } - return(pager); + return (pager); } static void @@ -229,11 +226,10 @@ dev_pager_dealloc(pager) #endif TAILQ_REMOVE(&dev_pager_list, pager, pg_list); /* - * Get the object. - * Note: cannot use vm_object_lookup since object has already - * been removed from the hash chain. + * Get the object. Note: cannot use vm_object_lookup since object has + * already been removed from the hash chain. */ - devp = (dev_pager_t)pager->pg_data; + devp = (dev_pager_t) pager->pg_data; object = devp->devp_object; #ifdef DEBUG if (dpagerdebug & DDB_ALLOC) @@ -242,12 +238,12 @@ dev_pager_dealloc(pager) /* * Free up our fake pages. */ - while ((m=devp->devp_pglist.tqh_first) != 0) { + while ((m = devp->devp_pglist.tqh_first) != 0) { TAILQ_REMOVE(&devp->devp_pglist, m, pageq); dev_pager_putfake(m); } - free((caddr_t)devp, M_VMPGDATA); - free((caddr_t)pager, M_VMPAGER); + free((caddr_t) devp, M_VMPGDATA); + free((caddr_t) pager, M_VMPAGER); } static int @@ -261,7 +257,7 @@ dev_pager_getpage(pager, m, sync) vm_page_t page; dev_t dev; int s; - int (*mapfunc)(), prot; + int (*mapfunc) (), prot; #ifdef DEBUG if (dpagerdebug & DDB_FOLLOW) @@ -269,7 +265,7 @@ dev_pager_getpage(pager, m, sync) #endif object = m->object; - dev = (dev_t)(u_long)pager->pg_handle; + dev = (dev_t) (u_long) pager->pg_handle; offset = m->offset + object->paging_offset; prot = PROT_READ; /* XXX should pass in? */ mapfunc = cdevsw[major(dev)].d_mmap; @@ -277,31 +273,31 @@ dev_pager_getpage(pager, m, sync) if (mapfunc == NULL || mapfunc == enodev || mapfunc == nullop) panic("dev_pager_getpage: no map function"); - paddr = pmap_phys_address((*mapfunc)((dev_t)dev, (int)offset, prot)); + paddr = pmap_phys_address((*mapfunc) ((dev_t) dev, (int) offset, prot)); #ifdef DIAGNOSTIC if (paddr == -1) panic("dev_pager_getpage: map function returns error"); #endif /* - * Replace the passed in page with our own fake page and free - * up the original. + * Replace the passed in page with our own fake page and free up the + * original. */ page = dev_pager_getfake(paddr); - TAILQ_INSERT_TAIL(&((dev_pager_t)pager->pg_data)->devp_pglist, - page, pageq); + TAILQ_INSERT_TAIL(&((dev_pager_t) pager->pg_data)->devp_pglist, + page, pageq); vm_object_lock(object); vm_page_lock_queues(); + PAGE_WAKEUP(m); vm_page_free(m); vm_page_unlock_queues(); s = splhigh(); vm_page_insert(page, object, offset); splx(s); - PAGE_WAKEUP(m); if (offset + PAGE_SIZE > object->size) object->size = offset + PAGE_SIZE; /* XXX anal */ vm_object_unlock(object); - return(VM_PAGER_OK); + return (VM_PAGER_OK); } static int @@ -328,7 +324,7 @@ dev_pager_haspage(pager, offset) if (dpagerdebug & DDB_FOLLOW) printf("dev_pager_haspage(%x, %x)\n", pager, offset); #endif - return(TRUE); + return (TRUE); } static vm_page_t @@ -339,8 +335,8 @@ dev_pager_getfake(paddr) int i; if (dev_pager_fakelist.tqh_first == NULL) { - m = (vm_page_t)malloc(PAGE_SIZE, M_VMPGDATA, M_WAITOK); - for (i = PAGE_SIZE / sizeof(*m); i > 0; i--) { + m = (vm_page_t) malloc(PAGE_SIZE * 2, M_VMPGDATA, M_WAITOK); + for (i = (PAGE_SIZE * 2) / sizeof(*m); i > 0; i--) { TAILQ_INSERT_TAIL(&dev_pager_fakelist, m, pageq); m++; } @@ -348,12 +344,16 @@ dev_pager_getfake(paddr) m = dev_pager_fakelist.tqh_first; TAILQ_REMOVE(&dev_pager_fakelist, m, pageq); - m->flags = PG_BUSY | PG_CLEAN | PG_FAKE | PG_FICTITIOUS; + m->flags = PG_BUSY | PG_FICTITIOUS; + m->dirty = 0; + m->valid = VM_PAGE_BITS_ALL; + m->busy = 0; + m->bmapped = 0; m->wire_count = 1; m->phys_addr = paddr; - return(m); + return (m); } static void diff --git a/sys/vm/device_pager.h b/sys/vm/device_pager.h index 677017f997ef..6fa8bfe6ac51 100644 --- a/sys/vm/device_pager.h +++ b/sys/vm/device_pager.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)device_pager.h 8.3 (Berkeley) 12/13/93 - * $Id$ + * $Id: device_pager.h,v 1.2 1994/08/02 07:55:07 davidg Exp $ */ #ifndef _DEVICE_PAGER_ @@ -46,9 +46,9 @@ * Device pager private data. */ struct devpager { - struct pglist devp_pglist; /* list of pages allocated */ - vm_object_t devp_object; /* object representing this device */ + struct pglist devp_pglist; /* list of pages allocated */ + vm_object_t devp_object; /* object representing this device */ }; -typedef struct devpager *dev_pager_t; +typedef struct devpager *dev_pager_t; -#endif /* _DEVICE_PAGER_ */ +#endif /* _DEVICE_PAGER_ */ diff --git a/sys/vm/kern_lock.c b/sys/vm/kern_lock.c index d87d321b7e46..eedb557d591e 100644 --- a/sys/vm/kern_lock.c +++ b/sys/vm/kern_lock.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -40,17 +40,17 @@ * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id$ + * $Id: kern_lock.c,v 1.2 1994/08/02 07:55:08 davidg Exp $ */ /* @@ -75,7 +75,8 @@ /* XXX */ #include <sys/proc.h> -typedef int *thread_t; +typedef int *thread_t; + #define current_thread() ((thread_t)&curproc->p_thread) /* XXX */ @@ -112,44 +113,49 @@ typedef int *thread_t; * may only be used for exclusive locks. */ -void simple_lock_init(l) - simple_lock_t l; +void +simple_lock_init(l) + simple_lock_t l; { - *(boolean_t *)l = FALSE; + *(boolean_t *) l = FALSE; } -void simple_lock(l) - simple_lock_t l; +void +simple_lock(l) + simple_lock_t l; { - while (test_and_set((boolean_t *)l)) + while (test_and_set((boolean_t *) l)) continue; } -void simple_unlock(l) - simple_lock_t l; +void +simple_unlock(l) + simple_lock_t l; { - *(boolean_t *)l = FALSE; + *(boolean_t *) l = FALSE; } -boolean_t simple_lock_try(l) - simple_lock_t l; +boolean_t +simple_lock_try(l) + simple_lock_t l; { - return (!test_and_set((boolean_t *)l)); + return (!test_and_set((boolean_t *) l)); } -#endif /* notdef */ -#endif /* NCPUS > 1 */ +#endif /* notdef */ +#endif /* NCPUS > 1 */ #if NCPUS > 1 int lock_wait_time = 100; -#else /* NCPUS > 1 */ - /* - * It is silly to spin on a uni-processor as if we - * thought something magical would happen to the - * want_write bit while we are executing. - */ +#else /* NCPUS > 1 */ + + /* + * It is silly to spin on a uni-processor as if we thought something magical + * would happen to the want_write bit while we are executing. + */ int lock_wait_time = 0; -#endif /* NCPUS > 1 */ + +#endif /* NCPUS > 1 */ /* @@ -160,9 +166,10 @@ int lock_wait_time = 0; * variables and then initialize them, rather * than getting a new one from this module. */ -void lock_init(l, can_sleep) - lock_t l; - boolean_t can_sleep; +void +lock_init(l, can_sleep) + lock_t l; + boolean_t can_sleep; { bzero(l, sizeof(lock_data_t)); simple_lock_init(&l->interlock); @@ -170,13 +177,14 @@ void lock_init(l, can_sleep) l->want_upgrade = FALSE; l->read_count = 0; l->can_sleep = can_sleep; - l->thread = (char *)-1; /* XXX */ + l->thread = (char *) -1; /* XXX */ l->recursion_depth = 0; } -void lock_sleepable(l, can_sleep) - lock_t l; - boolean_t can_sleep; +void +lock_sleepable(l, can_sleep) + lock_t l; + boolean_t can_sleep; { simple_lock(&l->interlock); l->can_sleep = can_sleep; @@ -190,24 +198,24 @@ void lock_sleepable(l, can_sleep) * for the lock. These work on uniprocessor systems. */ -void lock_write(l) - register lock_t l; +void +lock_write(l) + register lock_t l; { - register int i; + register int i; simple_lock(&l->interlock); - if (((thread_t)l->thread) == current_thread()) { + if (((thread_t) l->thread) == current_thread()) { /* - * Recursive lock. + * Recursive lock. */ l->recursion_depth++; simple_unlock(&l->interlock); return; } - /* - * Try to acquire the want_write bit. + * Try to acquire the want_write bit. */ while (l->want_write) { if ((i = lock_wait_time) > 0) { @@ -216,7 +224,6 @@ void lock_write(l) continue; simple_lock(&l->interlock); } - if (l->can_sleep && l->want_write) { l->waiting = TRUE; thread_sleep((int) l, &l->interlock, FALSE); @@ -231,11 +238,10 @@ void lock_write(l) if ((i = lock_wait_time) > 0) { simple_unlock(&l->interlock); while (--i > 0 && (l->read_count != 0 || - l->want_upgrade)) + l->want_upgrade)) continue; simple_lock(&l->interlock); } - if (l->can_sleep && (l->read_count != 0 || l->want_upgrade)) { l->waiting = TRUE; thread_sleep((int) l, &l->interlock, FALSE); @@ -245,21 +251,20 @@ void lock_write(l) simple_unlock(&l->interlock); } -void lock_done(l) - register lock_t l; +void +lock_done(l) + register lock_t l; { simple_lock(&l->interlock); if (l->read_count != 0) l->read_count--; - else - if (l->recursion_depth != 0) + else if (l->recursion_depth != 0) l->recursion_depth--; + else if (l->want_upgrade) + l->want_upgrade = FALSE; else - if (l->want_upgrade) - l->want_upgrade = FALSE; - else - l->want_write = FALSE; + l->want_write = FALSE; if (l->waiting) { l->waiting = FALSE; @@ -268,22 +273,22 @@ void lock_done(l) simple_unlock(&l->interlock); } -void lock_read(l) - register lock_t l; +void +lock_read(l) + register lock_t l; { - register int i; + register int i; simple_lock(&l->interlock); - if (((thread_t)l->thread) == current_thread()) { + if (((thread_t) l->thread) == current_thread()) { /* - * Recursive lock. + * Recursive lock. */ l->read_count++; simple_unlock(&l->interlock); return; } - while (l->want_write || l->want_upgrade) { if ((i = lock_wait_time) > 0) { simple_unlock(&l->interlock); @@ -291,7 +296,6 @@ void lock_read(l) continue; simple_lock(&l->interlock); } - if (l->can_sleep && (l->want_write || l->want_upgrade)) { l->waiting = TRUE; thread_sleep((int) l, &l->interlock, FALSE); @@ -313,39 +317,36 @@ void lock_read(l) * * Returns TRUE if the upgrade *failed*. */ -boolean_t lock_read_to_write(l) - register lock_t l; +boolean_t +lock_read_to_write(l) + register lock_t l; { - register int i; + register int i; simple_lock(&l->interlock); l->read_count--; - if (((thread_t)l->thread) == current_thread()) { + if (((thread_t) l->thread) == current_thread()) { /* - * Recursive lock. + * Recursive lock. */ l->recursion_depth++; simple_unlock(&l->interlock); - return(FALSE); + return (FALSE); } - if (l->want_upgrade) { /* - * Someone else has requested upgrade. - * Since we've released a read lock, wake - * him up. + * Someone else has requested upgrade. Since we've released a + * read lock, wake him up. */ if (l->waiting) { l->waiting = FALSE; thread_wakeup((int) l); } - simple_unlock(&l->interlock); return (TRUE); } - l->want_upgrade = TRUE; while (l->read_count != 0) { @@ -355,7 +356,6 @@ boolean_t lock_read_to_write(l) continue; simple_lock(&l->interlock); } - if (l->can_sleep && l->read_count != 0) { l->waiting = TRUE; thread_sleep((int) l, &l->interlock, FALSE); @@ -367,25 +367,24 @@ boolean_t lock_read_to_write(l) return (FALSE); } -void lock_write_to_read(l) - register lock_t l; +void +lock_write_to_read(l) + register lock_t l; { simple_lock(&l->interlock); l->read_count++; if (l->recursion_depth != 0) l->recursion_depth--; - else - if (l->want_upgrade) + else if (l->want_upgrade) l->want_upgrade = FALSE; else - l->want_write = FALSE; + l->want_write = FALSE; if (l->waiting) { l->waiting = FALSE; thread_wakeup((int) l); } - simple_unlock(&l->interlock); } @@ -398,36 +397,35 @@ void lock_write_to_read(l) * Returns FALSE if the lock is not held on return. */ -boolean_t lock_try_write(l) - register lock_t l; +boolean_t +lock_try_write(l) + register lock_t l; { simple_lock(&l->interlock); - if (((thread_t)l->thread) == current_thread()) { + if (((thread_t) l->thread) == current_thread()) { /* - * Recursive lock + * Recursive lock */ l->recursion_depth++; simple_unlock(&l->interlock); - return(TRUE); + return (TRUE); } - if (l->want_write || l->want_upgrade || l->read_count) { /* - * Can't get lock. + * Can't get lock. */ simple_unlock(&l->interlock); - return(FALSE); + return (FALSE); } - /* - * Have lock. + * Have lock. */ l->want_write = TRUE; simple_unlock(&l->interlock); - return(TRUE); + return (TRUE); } /* @@ -438,28 +436,27 @@ boolean_t lock_try_write(l) * Returns FALSE if the lock is not held on return. */ -boolean_t lock_try_read(l) - register lock_t l; +boolean_t +lock_try_read(l) + register lock_t l; { simple_lock(&l->interlock); - if (((thread_t)l->thread) == current_thread()) { + if (((thread_t) l->thread) == current_thread()) { /* - * Recursive lock + * Recursive lock */ l->read_count++; simple_unlock(&l->interlock); - return(TRUE); + return (TRUE); } - if (l->want_write || l->want_upgrade) { simple_unlock(&l->interlock); - return(FALSE); + return (FALSE); } - l->read_count++; simple_unlock(&l->interlock); - return(TRUE); + return (TRUE); } /* @@ -472,25 +469,25 @@ boolean_t lock_try_read(l) * * Returns FALSE if the upgrade *failed*. */ -boolean_t lock_try_read_to_write(l) - register lock_t l; +boolean_t +lock_try_read_to_write(l) + register lock_t l; { simple_lock(&l->interlock); - if (((thread_t)l->thread) == current_thread()) { + if (((thread_t) l->thread) == current_thread()) { /* - * Recursive lock + * Recursive lock */ l->read_count--; l->recursion_depth++; simple_unlock(&l->interlock); - return(TRUE); + return (TRUE); } - if (l->want_upgrade) { simple_unlock(&l->interlock); - return(FALSE); + return (FALSE); } l->want_upgrade = TRUE; l->read_count--; @@ -502,15 +499,16 @@ boolean_t lock_try_read_to_write(l) } simple_unlock(&l->interlock); - return(TRUE); + return (TRUE); } /* * Allow a process that has a lock for write to acquire it * recursively (for read, write, or update). */ -void lock_set_recursive(l) - lock_t l; +void +lock_set_recursive(l) + lock_t l; { simple_lock(&l->interlock); if (!l->want_write) { @@ -523,14 +521,15 @@ void lock_set_recursive(l) /* * Prevent a lock from being re-acquired. */ -void lock_clear_recursive(l) - lock_t l; +void +lock_clear_recursive(l) + lock_t l; { simple_lock(&l->interlock); if (((thread_t) l->thread) != current_thread()) { panic("lock_clear_recursive: wrong thread"); } if (l->recursion_depth == 0) - l->thread = (char *)-1; /* XXX */ + l->thread = (char *) -1; /* XXX */ simple_unlock(&l->interlock); } diff --git a/sys/vm/lock.h b/sys/vm/lock.h index 308d9411c96c..6cd71edc2947 100644 --- a/sys/vm/lock.h +++ b/sys/vm/lock.h @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -40,17 +40,17 @@ * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id$ + * $Id: lock.h,v 1.2 1994/08/02 07:55:11 davidg Exp $ */ /* @@ -78,11 +78,11 @@ */ struct slock { - int lock_data; /* in general 1 bit is sufficient */ + int lock_data; /* in general 1 bit is sufficient */ }; -typedef struct slock simple_lock_data_t; -typedef struct slock *simple_lock_t; +typedef struct slock simple_lock_data_t; +typedef struct slock *simple_lock_t; /* * The general lock structure. Provides for multiple readers, @@ -93,61 +93,55 @@ typedef struct slock *simple_lock_t; struct lock { #ifdef vax /* - * Efficient VAX implementation -- see field description below. + * Efficient VAX implementation -- see field description below. */ - unsigned int read_count:16, - want_upgrade:1, - want_write:1, - waiting:1, - can_sleep:1, - :0; - - simple_lock_data_t interlock; -#else /* vax */ + unsigned int read_count:16, want_upgrade:1, want_write:1, waiting:1, can_sleep:1,:0; + + simple_lock_data_t interlock; +#else /* vax */ #ifdef ns32000 /* - * Efficient ns32000 implementation -- - * see field description below. + * Efficient ns32000 implementation -- see field description below. */ - simple_lock_data_t interlock; - unsigned int read_count:16, - want_upgrade:1, - want_write:1, - waiting:1, - can_sleep:1, - :0; - -#else /* ns32000 */ - /* Only the "interlock" field is used for hardware exclusion; - * other fields are modified with normal instructions after - * acquiring the interlock bit. + simple_lock_data_t interlock; + unsigned int read_count:16, want_upgrade:1, want_write:1, waiting:1, can_sleep:1,:0; + +#else /* ns32000 */ + /* + * Only the "interlock" field is used for hardware exclusion; other + * fields are modified with normal instructions after acquiring the + * interlock bit. + */ + simple_lock_data_t + interlock; /* Interlock for remaining fields */ + boolean_t want_write; /* Writer is waiting, or locked for write */ + boolean_t want_upgrade; /* Read-to-write upgrade waiting */ + boolean_t waiting; /* Someone is sleeping on lock */ + boolean_t can_sleep; /* Can attempts to lock go to sleep */ + int read_count; /* Number of accepted readers */ +#endif /* ns32000 */ +#endif /* vax */ + char *thread; /* Thread that has lock, if recursive locking + * allowed */ + /* + * (should be thread_t, but but we then have mutually recursive + * definitions) */ - simple_lock_data_t - interlock; /* Interlock for remaining fields */ - boolean_t want_write; /* Writer is waiting, or locked for write */ - boolean_t want_upgrade; /* Read-to-write upgrade waiting */ - boolean_t waiting; /* Someone is sleeping on lock */ - boolean_t can_sleep; /* Can attempts to lock go to sleep */ - int read_count; /* Number of accepted readers */ -#endif /* ns32000 */ -#endif /* vax */ - char *thread; /* Thread that has lock, if recursive locking allowed */ - /* (should be thread_t, but but we then have mutually - recursive definitions) */ - int recursion_depth;/* Depth of recursion */ + int recursion_depth; /* Depth of recursion */ }; -typedef struct lock lock_data_t; -typedef struct lock *lock_t; +typedef struct lock lock_data_t; +typedef struct lock *lock_t; #if NCPUS > 1 __BEGIN_DECLS -void simple_lock __P((simple_lock_t)); -void simple_lock_init __P((simple_lock_t)); -boolean_t simple_lock_try __P((simple_lock_t)); -void simple_unlock __P((simple_lock_t)); +void simple_lock __P((simple_lock_t)); +void simple_lock_init __P((simple_lock_t)); +boolean_t simple_lock_try __P((simple_lock_t)); +void simple_unlock __P((simple_lock_t)); + __END_DECLS -#else /* No multiprocessor locking is necessary. */ +#else /* No multiprocessor locking is necessary. */ #define simple_lock(l) #define simple_lock_init(l) #define simple_lock_try(l) (1) /* Always succeeds. */ @@ -159,16 +153,17 @@ __END_DECLS #define lock_read_done(l) lock_done(l) #define lock_write_done(l) lock_done(l) -void lock_clear_recursive __P((lock_t)); -void lock_done __P((lock_t)); -void lock_init __P((lock_t, boolean_t)); -void lock_read __P((lock_t)); -boolean_t lock_read_to_write __P((lock_t)); -void lock_set_recursive __P((lock_t)); -void lock_sleepable __P((lock_t, boolean_t)); -boolean_t lock_try_read __P((lock_t)); -boolean_t lock_try_read_to_write __P((lock_t)); -boolean_t lock_try_write __P((lock_t)); -void lock_write __P((lock_t)); -void lock_write_to_read __P((lock_t)); -#endif /* !_LOCK_H_ */ +void lock_clear_recursive __P((lock_t)); +void lock_done __P((lock_t)); +void lock_init __P((lock_t, boolean_t)); +void lock_read __P((lock_t)); +boolean_t lock_read_to_write __P((lock_t)); +void lock_set_recursive __P((lock_t)); +void lock_sleepable __P((lock_t, boolean_t)); +boolean_t lock_try_read __P((lock_t)); +boolean_t lock_try_read_to_write __P((lock_t)); +boolean_t lock_try_write __P((lock_t)); +void lock_write __P((lock_t)); +void lock_write_to_read __P((lock_t)); + +#endif /* !_LOCK_H_ */ diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h index 3cf91ecd1d4b..b55519f2f20d 100644 --- a/sys/vm/pmap.h +++ b/sys/vm/pmap.h @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -40,17 +40,17 @@ * All rights reserved. * * Author: Avadis Tevanian, Jr. - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: pmap.h,v 1.4 1994/09/02 04:12:26 davidg Exp $ + * $Id: pmap.h,v 1.5 1994/11/14 08:19:07 bde Exp $ */ /* @@ -79,47 +79,50 @@ * in the following structure. */ struct pmap_statistics { - long resident_count; /* # of pages mapped (total)*/ - long wired_count; /* # of pages wired */ + long resident_count; /* # of pages mapped (total) */ + long wired_count; /* # of pages wired */ }; -typedef struct pmap_statistics *pmap_statistics_t; +typedef struct pmap_statistics *pmap_statistics_t; #include <machine/pmap.h> #ifdef KERNEL __BEGIN_DECLS -void * pmap_bootstrap_alloc __P((int)); -#if 0 /* XXX */ -void pmap_bootstrap __P((/* machine dependent */)); +void *pmap_bootstrap_alloc __P((int)); + +#if 0 /* XXX */ +void pmap_bootstrap __P(( /* machine dependent */ )); + #endif -void pmap_change_wiring __P((pmap_t, vm_offset_t, boolean_t)); -void pmap_clear_modify __P((vm_offset_t pa)); -void pmap_clear_reference __P((vm_offset_t pa)); -void pmap_collect __P((pmap_t)); -void pmap_copy __P((pmap_t, pmap_t, vm_offset_t, vm_size_t, vm_offset_t)); -void pmap_copy_page __P((vm_offset_t, vm_offset_t)); -pmap_t pmap_create __P((vm_size_t)); -void pmap_destroy __P((pmap_t)); -void pmap_enter __P((pmap_t, vm_offset_t, vm_offset_t, vm_prot_t, boolean_t)); -vm_offset_t pmap_extract __P((pmap_t, vm_offset_t)); -void pmap_init __P((vm_offset_t, vm_offset_t)); -boolean_t pmap_is_modified __P((vm_offset_t pa)); -boolean_t pmap_is_referenced __P((vm_offset_t pa)); -void pmap_kenter __P((vm_offset_t, vm_offset_t)); -void pmap_kremove __P((vm_offset_t)); -vm_offset_t pmap_map __P((vm_offset_t, vm_offset_t, vm_offset_t, int)); -void pmap_page_protect __P((vm_offset_t, vm_prot_t)); -void pmap_pageable __P((pmap_t, vm_offset_t, vm_offset_t, boolean_t)); -vm_offset_t pmap_phys_address __P((int)); -void pmap_pinit __P((pmap_t)); -void pmap_protect __P((pmap_t, vm_offset_t, vm_offset_t, vm_prot_t)); -void pmap_qenter __P((vm_offset_t, vm_page_t *, int)); -void pmap_qremove __P((vm_offset_t, int)); -void pmap_reference __P((pmap_t)); -void pmap_release __P((pmap_t)); -void pmap_remove __P((pmap_t, vm_offset_t, vm_offset_t)); -void pmap_zero_page __P((vm_offset_t)); +void pmap_change_wiring __P((pmap_t, vm_offset_t, boolean_t)); +void pmap_clear_modify __P((vm_offset_t pa)); +void pmap_clear_reference __P((vm_offset_t pa)); +void pmap_collect __P((pmap_t)); +void pmap_copy __P((pmap_t, pmap_t, vm_offset_t, vm_size_t, vm_offset_t)); +void pmap_copy_page __P((vm_offset_t, vm_offset_t)); +pmap_t pmap_create __P((vm_size_t)); +void pmap_destroy __P((pmap_t)); +void pmap_enter __P((pmap_t, vm_offset_t, vm_offset_t, vm_prot_t, boolean_t)); +vm_offset_t pmap_extract __P((pmap_t, vm_offset_t)); +void pmap_init __P((vm_offset_t, vm_offset_t)); +boolean_t pmap_is_modified __P((vm_offset_t pa)); +boolean_t pmap_is_referenced __P((vm_offset_t pa)); +void pmap_kenter __P((vm_offset_t, vm_offset_t)); +void pmap_kremove __P((vm_offset_t)); +vm_offset_t pmap_map __P((vm_offset_t, vm_offset_t, vm_offset_t, int)); +void pmap_page_protect __P((vm_offset_t, vm_prot_t)); +void pmap_pageable __P((pmap_t, vm_offset_t, vm_offset_t, boolean_t)); +vm_offset_t pmap_phys_address __P((int)); +void pmap_pinit __P((pmap_t)); +void pmap_protect __P((pmap_t, vm_offset_t, vm_offset_t, vm_prot_t)); +void pmap_qenter __P((vm_offset_t, vm_page_t *, int)); +void pmap_qremove __P((vm_offset_t, int)); +void pmap_reference __P((pmap_t)); +void pmap_release __P((pmap_t)); +void pmap_remove __P((pmap_t, vm_offset_t, vm_offset_t)); +void pmap_zero_page __P((vm_offset_t)); + __END_DECLS #endif -#endif /* _PMAP_VM_ */ +#endif /* _PMAP_VM_ */ diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 89852c01ed1a..56492568962e 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -39,7 +39,7 @@ * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ * * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 - * $Id: swap_pager.c,v 1.20 1994/12/22 05:18:12 davidg Exp $ + * $Id: swap_pager.c,v 1.21 1994/12/23 04:56:50 davidg Exp $ */ /* @@ -66,11 +66,11 @@ #include <vm/swap_pager.h> #ifndef NPENDINGIO -#define NPENDINGIO 16 +#define NPENDINGIO 10 #endif -int swap_pager_input __P((sw_pager_t, vm_page_t *, int, int)); -int swap_pager_output __P((sw_pager_t, vm_page_t *, int, int, int *)); +int swap_pager_input __P((sw_pager_t, vm_page_t *, int, int)); +int swap_pager_output __P((sw_pager_t, vm_page_t *, int, int, int *)); int nswiodone; extern int vm_pageout_rate_limit; @@ -79,25 +79,26 @@ extern int hz; int swap_pager_full; extern vm_map_t pager_map; extern int vm_swap_size; -int no_swap_space=1; +int no_swap_space = 1; struct rlist *swaplist; int nswaplist; +extern int vm_pio_needed; #define MAX_PAGEOUT_CLUSTER 8 TAILQ_HEAD(swpclean, swpagerclean); -typedef struct swpagerclean *swp_clean_t; +typedef struct swpagerclean *swp_clean_t; struct swpagerclean { - TAILQ_ENTRY(swpagerclean) spc_list; - int spc_flags; - struct buf *spc_bp; - sw_pager_t spc_swp; - vm_offset_t spc_kva; - int spc_count; - vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; -} swcleanlist [NPENDINGIO] ; + TAILQ_ENTRY(swpagerclean) spc_list; + int spc_flags; + struct buf *spc_bp; + sw_pager_t spc_swp; + vm_offset_t spc_kva; + int spc_count; + vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; +} swcleanlist[NPENDINGIO]; extern vm_map_t kernel_map; @@ -117,7 +118,7 @@ struct pagerlst swap_pager_un_list; /* list of "unnamed" anon pagers */ int swap_pager_needflags; struct rlist *swapfrag; -struct pagerlst *swp_qs[]={ +struct pagerlst *swp_qs[] = { &swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0 }; @@ -141,12 +142,14 @@ void swap_pager_finish(); int dmmin, dmmax; extern int vm_page_count; -static inline void swapsizecheck() { - if( vm_swap_size < 128*btodb(PAGE_SIZE)) { - if( swap_pager_full) +static inline void +swapsizecheck() +{ + if (vm_swap_size < 128 * btodb(PAGE_SIZE)) { + if (swap_pager_full) printf("swap_pager: out of space\n"); swap_pager_full = 1; - } else if( vm_swap_size > 192*btodb(PAGE_SIZE)) + } else if (vm_swap_size > 192 * btodb(PAGE_SIZE)) swap_pager_full = 0; } @@ -171,8 +174,8 @@ swap_pager_init() * Calculate the swap allocation constants. */ - dmmin = CLBYTES/DEV_BSIZE; - dmmax = btodb(SWB_NPAGES*PAGE_SIZE)*2; + dmmin = CLBYTES / DEV_BSIZE; + dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2; } @@ -191,21 +194,30 @@ swap_pager_alloc(handle, size, prot, offset) register vm_pager_t pager; register sw_pager_t swp; int waitok; - int i,j; - + int i, j; + if (require_swap_init) { swp_clean_t spc; struct buf *bp; + +#if 0 + int desiredpendingio; + + desiredpendingio = cnt.v_page_count / 200 + 2; + if (desiredpendingio < npendingio) + npendingio = desiredpendingio; +#endif + /* - * kva's are allocated here so that we dont need to keep - * doing kmem_alloc pageables at runtime + * kva's are allocated here so that we dont need to keep doing + * kmem_alloc pageables at runtime */ - for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) { - spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE*MAX_PAGEOUT_CLUSTER); + for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) { + spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * MAX_PAGEOUT_CLUSTER); if (!spc->spc_kva) { break; } - spc->spc_bp = malloc( sizeof( *bp), M_TEMP, M_NOWAIT); + spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_NOWAIT); if (!spc->spc_bp) { kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); break; @@ -214,53 +226,49 @@ swap_pager_alloc(handle, size, prot, offset) TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); } require_swap_init = 0; - if( size == 0) - return(NULL); + if (size == 0) + return (NULL); } - /* - * If this is a "named" anonymous region, look it up and - * return the appropriate pager if it exists. + * If this is a "named" anonymous region, look it up and return the + * appropriate pager if it exists. */ if (handle) { pager = vm_pager_lookup(&swap_pager_list, handle); if (pager != NULL) { /* - * Use vm_object_lookup to gain a reference - * to the object and also to remove from the - * object cache. + * Use vm_object_lookup to gain a reference to the + * object and also to remove from the object cache. */ if (vm_object_lookup(pager) == NULL) panic("swap_pager_alloc: bad object"); - return(pager); + return (pager); } } - /* - * Pager doesn't exist, allocate swap management resources - * and initialize. + * Pager doesn't exist, allocate swap management resources and + * initialize. */ - waitok = handle ? M_WAITOK : M_NOWAIT; - pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); + waitok = handle ? M_WAITOK : M_NOWAIT; + pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, waitok); if (pager == NULL) - return(NULL); - swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); + return (NULL); + swp = (sw_pager_t) malloc(sizeof *swp, M_VMPGDATA, waitok); if (swp == NULL) { - free((caddr_t)pager, M_VMPAGER); - return(NULL); + free((caddr_t) pager, M_VMPAGER); + return (NULL); } size = round_page(size); swp->sw_osize = size; - swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES*PAGE_SIZE); + swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES * PAGE_SIZE); swp->sw_blocks = (sw_blk_t) - malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), - M_VMPGDATA, waitok); + malloc(swp->sw_nblocks * sizeof(*swp->sw_blocks), + M_VMPGDATA, waitok); if (swp->sw_blocks == NULL) { - free((caddr_t)swp, M_VMPGDATA); - free((caddr_t)pager, M_VMPAGER); - return(NULL); + free((caddr_t) swp, M_VMPGDATA); + free((caddr_t) pager, M_VMPAGER); + return (NULL); } - for (i = 0; i < swp->sw_nblocks; i++) { swp->sw_blocks[i].swb_valid = 0; swp->sw_blocks[i].swb_locked = 0; @@ -276,8 +284,8 @@ swap_pager_alloc(handle, size, prot, offset) TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list); /* * Consistant with other pagers: return with object - * referenced. Can't do this with handle == NULL - * since it might be the pageout daemon calling. + * referenced. Can't do this with handle == NULL since it + * might be the pageout daemon calling. */ object = vm_object_allocate(size); vm_object_enter(object, pager); @@ -289,9 +297,9 @@ swap_pager_alloc(handle, size, prot, offset) pager->pg_handle = handle; pager->pg_ops = &swappagerops; pager->pg_type = PG_SWAP; - pager->pg_data = (caddr_t)swp; + pager->pg_data = (caddr_t) swp; - return(pager); + return (pager); } /* @@ -311,14 +319,14 @@ swap_pager_diskaddr(swp, offset, valid) if (valid) *valid = 0; - ix = offset / (SWB_NPAGES*PAGE_SIZE); + ix = offset / (SWB_NPAGES * PAGE_SIZE); if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { - return(FALSE); + return (FALSE); } swb = &swp->sw_blocks[ix]; - ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; + ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE; if (valid) - *valid = swb->swb_valid & (1<<ix); + *valid = swb->swb_valid & (1 << ix); return &swb->swb_block[ix]; } @@ -334,13 +342,13 @@ swap_pager_setvalid(swp, offset, valid) { register sw_blk_t swb; int ix; - - ix = offset / (SWB_NPAGES*PAGE_SIZE); - if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) + + ix = offset / (SWB_NPAGES * PAGE_SIZE); + if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) return; swb = &swp->sw_blocks[ix]; - ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; + ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE; if (valid) swb->swb_valid |= (1 << ix); else @@ -353,9 +361,10 @@ swap_pager_setvalid(swp, offset, valid) * minimization policy. */ int -swap_pager_getswapspace( unsigned amount, unsigned *rtval) { +swap_pager_getswapspace(unsigned amount, unsigned *rtval) +{ vm_swap_size -= amount; - if( !rlist_alloc(&swaplist, amount, rtval)) { + if (!rlist_alloc(&swaplist, amount, rtval)) { vm_swap_size += amount; return 0; } else { @@ -369,9 +378,10 @@ swap_pager_getswapspace( unsigned amount, unsigned *rtval) { * minimization policy. */ void -swap_pager_freeswapspace( unsigned from, unsigned to) { +swap_pager_freeswapspace(unsigned from, unsigned to) +{ rlist_free(&swaplist, from, to); - vm_swap_size += (to-from)+1; + vm_swap_size += (to - from) + 1; swapsizecheck(); } /* @@ -390,9 +400,10 @@ _swap_pager_freespace(swp, start, size) for (i = start; i < round_page(start + size); i += PAGE_SIZE) { int valid; int *addr = swap_pager_diskaddr(swp, i, &valid); + if (addr && *addr != SWB_EMPTY) { - swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); - if( valid) { + swap_pager_freeswapspace(*addr, *addr + btodb(PAGE_SIZE) - 1); + if (valid) { swap_pager_setvalid(swp, i, 0); } *addr = SWB_EMPTY; @@ -402,14 +413,14 @@ _swap_pager_freespace(swp, start, size) } void -swap_pager_freespace(pager, start, size) +swap_pager_freespace(pager, start, size) vm_pager_t pager; vm_offset_t start; vm_offset_t size; { _swap_pager_freespace((sw_pager_t) pager->pg_data, start, size); } - + /* * swap_pager_reclaim frees up over-allocated space from all pagers * this eliminates internal fragmentation due to allocation of space @@ -434,12 +445,12 @@ swap_pager_reclaim() static int reclaims[MAXRECLAIM]; static int in_reclaim; -/* - * allow only one process to be in the swap_pager_reclaim subroutine - */ + /* + * allow only one process to be in the swap_pager_reclaim subroutine + */ s = splbio(); if (in_reclaim) { - tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0); + tsleep((caddr_t) & in_reclaim, PSWP, "swrclm", 0); splx(s); return; } @@ -459,11 +470,12 @@ swap_pager_reclaim() swp = (sw_pager_t) p->pg_data; for (i = 0; i < swp->sw_nblocks; i++) { sw_blk_t swb = &swp->sw_blocks[i]; - if( swb->swb_locked) + + if (swb->swb_locked) continue; for (j = 0; j < SWB_NPAGES; j++) { if (swb->swb_block[j] != SWB_EMPTY && - (swb->swb_valid & (1 << j)) == 0) { + (swb->swb_valid & (1 << j)) == 0) { reclaims[reclaimcount++] = swb->swb_block[j]; swb->swb_block[j] = SWB_EMPTY; if (reclaimcount >= MAXRECLAIM) @@ -474,22 +486,22 @@ swap_pager_reclaim() p = p->pg_list.tqe_next; } } - + rfinished: -/* - * free the blocks that have been added to the reclaim list - */ + /* + * free the blocks that have been added to the reclaim list + */ for (i = 0; i < reclaimcount; i++) { - swap_pager_freeswapspace(reclaims[i], reclaims[i]+btodb(PAGE_SIZE) - 1); - wakeup((caddr_t) &in_reclaim); + swap_pager_freeswapspace(reclaims[i], reclaims[i] + btodb(PAGE_SIZE) - 1); + wakeup((caddr_t) & in_reclaim); } splx(s); in_reclaim = 0; - wakeup((caddr_t) &in_reclaim); + wakeup((caddr_t) & in_reclaim); } - + /* * swap_pager_copy copies blocks from one pager to another and @@ -508,18 +520,18 @@ swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) vm_offset_t i; int s; - if( vm_swap_size) + if (vm_swap_size) no_swap_space = 0; - if( no_swap_space) + if (no_swap_space) return; srcswp = (sw_pager_t) srcpager->pg_data; dstswp = (sw_pager_t) dstpager->pg_data; -/* - * remove the source pager from the swap_pager internal queue - */ + /* + * remove the source pager from the swap_pager internal queue + */ s = splbio(); if (srcswp->sw_flags & SW_NAMED) { TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list); @@ -527,91 +539,97 @@ swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) } else { TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list); } - + while (srcswp->sw_poip) { - tsleep((caddr_t)srcswp, PVM, "spgout", 0); + tsleep((caddr_t) srcswp, PVM, "spgout", 0); } splx(s); -/* - * clean all of the pages that are currently active and finished - */ + /* + * clean all of the pages that are currently active and finished + */ (void) swap_pager_clean(); - + s = splbio(); -/* - * clear source block before destination object - * (release allocated space) - */ + /* + * clear source block before destination object + * (release allocated space) + */ for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) { int valid; int *addr = swap_pager_diskaddr(srcswp, i, &valid); + if (addr && *addr != SWB_EMPTY) { - swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); + swap_pager_freeswapspace(*addr, *addr + btodb(PAGE_SIZE) - 1); *addr = SWB_EMPTY; } } -/* - * transfer source to destination - */ + /* + * transfer source to destination + */ for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) { int srcvalid, dstvalid; int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset, - &srcvalid); + &srcvalid); int *dstaddrp; - /* - * see if the source has space allocated - */ - if (srcaddrp && *srcaddrp != SWB_EMPTY) { + /* - * if the source is valid and the dest has no space, then - * copy the allocation from the srouce to the dest. + * see if the source has space allocated */ + if (srcaddrp && *srcaddrp != SWB_EMPTY) { + /* + * if the source is valid and the dest has no space, + * then copy the allocation from the srouce to the + * dest. + */ if (srcvalid) { dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid); /* - * if the dest already has a valid block, deallocate the - * source block without copying. + * if the dest already has a valid block, + * deallocate the source block without + * copying. */ if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { - swap_pager_freeswapspace(*dstaddrp, *dstaddrp+btodb(PAGE_SIZE) - 1); + swap_pager_freeswapspace(*dstaddrp, *dstaddrp + btodb(PAGE_SIZE) - 1); *dstaddrp = SWB_EMPTY; } if (dstaddrp && *dstaddrp == SWB_EMPTY) { *dstaddrp = *srcaddrp; *srcaddrp = SWB_EMPTY; swap_pager_setvalid(dstswp, i + dstoffset, 1); - } - } - /* - * if the source is not empty at this point, then deallocate the space. - */ + } + } + /* + * if the source is not empty at this point, then + * deallocate the space. + */ if (*srcaddrp != SWB_EMPTY) { - swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); + swap_pager_freeswapspace(*srcaddrp, *srcaddrp + btodb(PAGE_SIZE) - 1); *srcaddrp = SWB_EMPTY; } } } -/* - * deallocate the rest of the source object - */ + /* + * deallocate the rest of the source object + */ for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) { int valid; int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid); + if (srcaddrp && *srcaddrp != SWB_EMPTY) { - swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); + swap_pager_freeswapspace(*srcaddrp, *srcaddrp + btodb(PAGE_SIZE) - 1); *srcaddrp = SWB_EMPTY; } } - + splx(s); - free((caddr_t)srcswp->sw_blocks, M_VMPGDATA); + free((caddr_t) srcswp->sw_blocks, M_VMPGDATA); srcswp->sw_blocks = 0; - free((caddr_t)srcswp, M_VMPGDATA); + free((caddr_t) srcswp, M_VMPGDATA); srcpager->pg_data = 0; - free((caddr_t)srcpager, M_VMPAGER); + free((caddr_t) srcpager, M_VMPAGER); return; } @@ -621,14 +639,14 @@ void swap_pager_dealloc(pager) vm_pager_t pager; { - register int i,j; + register int i, j; register sw_blk_t bp; register sw_pager_t swp; int s; /* - * Remove from list right away so lookups will fail if we - * block for pageout completion. + * Remove from list right away so lookups will fail if we block for + * pageout completion. */ s = splbio(); swp = (sw_pager_t) pager->pg_data; @@ -639,15 +657,15 @@ swap_pager_dealloc(pager) TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list); } /* - * Wait for all pageouts to finish and remove - * all entries from cleaning list. + * Wait for all pageouts to finish and remove all entries from + * cleaning list. */ while (swp->sw_poip) { - tsleep((caddr_t)swp, PVM, "swpout", 0); + tsleep((caddr_t) swp, PVM, "swpout", 0); } splx(s); - + (void) swap_pager_clean(); @@ -657,22 +675,22 @@ swap_pager_dealloc(pager) s = splbio(); for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) { for (j = 0; j < SWB_NPAGES; j++) - if (bp->swb_block[j] != SWB_EMPTY) { - swap_pager_freeswapspace((unsigned)bp->swb_block[j], - (unsigned)bp->swb_block[j] + btodb(PAGE_SIZE) - 1); - bp->swb_block[j] = SWB_EMPTY; - } + if (bp->swb_block[j] != SWB_EMPTY) { + swap_pager_freeswapspace((unsigned) bp->swb_block[j], + (unsigned) bp->swb_block[j] + btodb(PAGE_SIZE) - 1); + bp->swb_block[j] = SWB_EMPTY; + } } splx(s); /* * Free swap management resources */ - free((caddr_t)swp->sw_blocks, M_VMPGDATA); + free((caddr_t) swp->sw_blocks, M_VMPGDATA); swp->sw_blocks = 0; - free((caddr_t)swp, M_VMPGDATA); + free((caddr_t) swp, M_VMPGDATA); pager->pg_data = 0; - free((caddr_t)pager, M_VMPAGER); + free((caddr_t) pager, M_VMPAGER); } /* @@ -686,7 +704,7 @@ swap_pager_getmulti(pager, m, count, reqpage, sync) int reqpage; boolean_t sync; { - if( reqpage >= count) + if (reqpage >= count) panic("swap_pager_getmulti: reqpage >= count\n"); return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage); } @@ -701,9 +719,9 @@ swap_pager_getpage(pager, m, sync) boolean_t sync; { vm_page_t marray[1]; - + marray[0] = m; - return swap_pager_input((sw_pager_t)pager->pg_data, marray, 1, 0); + return swap_pager_input((sw_pager_t) pager->pg_data, marray, 1, 0); } int @@ -720,12 +738,11 @@ swap_pager_putmulti(pager, m, c, sync, rtvals) (void) swap_pager_clean(); return VM_PAGER_OK; } - flags = B_WRITE; if (!sync) flags |= B_ASYNC; - - return swap_pager_output((sw_pager_t)pager->pg_data, m, c, flags, rtvals); + + return swap_pager_output((sw_pager_t) pager->pg_data, m, c, flags, rtvals); } /* @@ -746,36 +763,37 @@ swap_pager_putpage(pager, m, sync) (void) swap_pager_clean(); return VM_PAGER_OK; } - marray[0] = m; flags = B_WRITE; if (!sync) flags |= B_ASYNC; - - swap_pager_output((sw_pager_t)pager->pg_data, marray, 1, flags, rtvals); + + swap_pager_output((sw_pager_t) pager->pg_data, marray, 1, flags, rtvals); return rtvals[0]; } static inline int -const swap_pager_block_index(swp, offset) +const +swap_pager_block_index(swp, offset) sw_pager_t swp; vm_offset_t offset; { - return (offset / (SWB_NPAGES*PAGE_SIZE)); + return (offset / (SWB_NPAGES * PAGE_SIZE)); } static inline int -const swap_pager_block_offset(swp, offset) +const +swap_pager_block_offset(swp, offset) sw_pager_t swp; vm_offset_t offset; -{ - return ((offset % (PAGE_SIZE*SWB_NPAGES)) / PAGE_SIZE); +{ + return ((offset % (PAGE_SIZE * SWB_NPAGES)) / PAGE_SIZE); } /* * _swap_pager_haspage returns TRUE if the pager has data that has - * been written out. + * been written out. */ static boolean_t _swap_pager_haspage(swp, offset) @@ -785,18 +803,17 @@ _swap_pager_haspage(swp, offset) register sw_blk_t swb; int ix; - ix = offset / (SWB_NPAGES*PAGE_SIZE); + ix = offset / (SWB_NPAGES * PAGE_SIZE); if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { - return(FALSE); + return (FALSE); } swb = &swp->sw_blocks[ix]; - ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; + ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE; if (swb->swb_block[ix] != SWB_EMPTY) { if (swb->swb_valid & (1 << ix)) return TRUE; } - - return(FALSE); + return (FALSE); } /* @@ -836,12 +853,13 @@ swap_pager_ridpages(m, count, reqpage) int reqpage; { int i; + for (i = 0; i < count; i++) if (i != reqpage) swap_pager_freepage(m[i]); } -int swapwritecount=0; +int swapwritecount = 0; /* * swap_pager_iodone1 is the completion routine for both reads and async writes @@ -852,7 +870,7 @@ swap_pager_iodone1(bp) { bp->b_flags |= B_DONE; bp->b_flags &= ~B_ASYNC; - wakeup((caddr_t)bp); + wakeup((caddr_t) bp); /* if ((bp->b_flags & B_READ) == 0) vwakeup(bp); @@ -884,32 +902,31 @@ swap_pager_input(swp, m, count, reqpage) object = m[reqpage]->object; paging_offset = object->paging_offset; /* - * First determine if the page exists in the pager if this is - * a sync read. This quickly handles cases where we are - * following shadow chains looking for the top level object - * with the page. + * First determine if the page exists in the pager if this is a sync + * read. This quickly handles cases where we are following shadow + * chains looking for the top level object with the page. */ if (swp->sw_blocks == NULL) { swap_pager_ridpages(m, count, reqpage); - return(VM_PAGER_FAIL); + return (VM_PAGER_FAIL); } - - for(i = 0; i < count; i++) { + for (i = 0; i < count; i++) { vm_offset_t foff = m[i]->offset + paging_offset; int ix = swap_pager_block_index(swp, foff); + if (ix >= swp->sw_nblocks) { int j; - if( i <= reqpage) { + + if (i <= reqpage) { swap_pager_ridpages(m, count, reqpage); - return(VM_PAGER_FAIL); + return (VM_PAGER_FAIL); } - for(j = i; j < count; j++) { + for (j = i; j < count; j++) { swap_pager_freepage(m[j]); } count = i; break; } - swb[i] = &swp->sw_blocks[ix]; off[i] = swap_pager_block_offset(swp, foff); reqaddr[i] = swb[i]->swb_block[off[i]]; @@ -918,12 +935,10 @@ swap_pager_input(swp, m, count, reqpage) /* make sure that our required input request is existant */ if (reqaddr[reqpage] == SWB_EMPTY || - (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { + (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { swap_pager_ridpages(m, count, reqpage); - return(VM_PAGER_FAIL); + return (VM_PAGER_FAIL); } - - reqdskregion = reqaddr[reqpage] / dmmax; /* @@ -932,15 +947,15 @@ swap_pager_input(swp, m, count, reqpage) failed = 0; first = 0; for (i = reqpage - 1; i >= 0; --i) { - if ( failed || (reqaddr[i] == SWB_EMPTY) || - (swb[i]->swb_valid & (1 << off[i])) == 0 || - (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || - ((reqaddr[i] / dmmax) != reqdskregion)) { - failed = 1; - swap_pager_freepage(m[i]); - if (first == 0) - first = i + 1; - } + if (failed || (reqaddr[i] == SWB_EMPTY) || + (swb[i]->swb_valid & (1 << off[i])) == 0 || + (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || + ((reqaddr[i] / dmmax) != reqdskregion)) { + failed = 1; + swap_pager_freepage(m[i]); + if (first == 0) + first = i + 1; + } } /* * search forwards for the last contiguous page to transfer @@ -948,45 +963,43 @@ swap_pager_input(swp, m, count, reqpage) failed = 0; last = count; for (i = reqpage + 1; i < count; i++) { - if ( failed || (reqaddr[i] == SWB_EMPTY) || - (swb[i]->swb_valid & (1 << off[i])) == 0 || - (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || - ((reqaddr[i] / dmmax) != reqdskregion)) { - failed = 1; - swap_pager_freepage(m[i]); - if (last == count) - last = i; - } + if (failed || (reqaddr[i] == SWB_EMPTY) || + (swb[i]->swb_valid & (1 << off[i])) == 0 || + (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || + ((reqaddr[i] / dmmax) != reqdskregion)) { + failed = 1; + swap_pager_freepage(m[i]); + if (last == count) + last = i; + } } count = last; if (first != 0) { for (i = first; i < count; i++) { - m[i-first] = m[i]; - reqaddr[i-first] = reqaddr[i]; - off[i-first] = off[i]; + m[i - first] = m[i]; + reqaddr[i - first] = reqaddr[i]; + off[i - first] = off[i]; } count -= first; reqpage -= first; } - ++swb[reqpage]->swb_locked; /* - * at this point: - * "m" is a pointer to the array of vm_page_t for paging I/O - * "count" is the number of vm_page_t entries represented by "m" - * "object" is the vm_object_t for I/O - * "reqpage" is the index into "m" for the page actually faulted + * at this point: "m" is a pointer to the array of vm_page_t for + * paging I/O "count" is the number of vm_page_t entries represented + * by "m" "object" is the vm_object_t for I/O "reqpage" is the index + * into "m" for the page actually faulted */ - + spc = NULL; /* we might not use an spc data structure */ if (count == 1) { /* - * if a kva has not been allocated, we can only do a one page transfer, - * so we free the other pages that might have been allocated by - * vm_fault. + * if a kva has not been allocated, we can only do a one page + * transfer, so we free the other pages that might have been + * allocated by vm_fault. */ swap_pager_ridpages(m, count, reqpage); m[0] = m[reqpage]; @@ -994,23 +1007,24 @@ swap_pager_input(swp, m, count, reqpage) count = 1; reqpage = 0; - /* - * get a swap pager clean data structure, block until we get it - */ + /* + * get a swap pager clean data structure, block until we get + * it + */ if (swap_pager_free.tqh_first == NULL) { s = splbio(); - if( curproc == pageproc) + if (curproc == pageproc) (void) swap_pager_clean(); else - wakeup((caddr_t) &vm_pages_needed); - while (swap_pager_free.tqh_first == NULL) { + wakeup((caddr_t) & vm_pages_needed); + while (swap_pager_free.tqh_first == NULL) { swap_pager_needflags |= SWAP_FREE_NEEDED; - tsleep((caddr_t)&swap_pager_free, - PVM, "swpfre", 0); - if( curproc == pageproc) + tsleep((caddr_t) & swap_pager_free, + PVM, "swpfre", 0); + if (curproc == pageproc) (void) swap_pager_clean(); else - wakeup((caddr_t) &vm_pages_needed); + wakeup((caddr_t) & vm_pages_needed); } splx(s); } @@ -1022,9 +1036,9 @@ swap_pager_input(swp, m, count, reqpage) bp->b_spc = spc; bp->b_vnbufs.le_next = NOLIST; } else { - /* - * Get a swap buffer header to perform the IO - */ + /* + * Get a swap buffer header to perform the IO + */ bp = getpbuf(); kva = (vm_offset_t) bp->b_data; } @@ -1032,7 +1046,7 @@ swap_pager_input(swp, m, count, reqpage) /* * map our page(s) into kva for input */ - pmap_qenter( kva, m, count); + pmap_qenter(kva, m, count); bp->b_flags = B_BUSY | B_READ | B_CALL; bp->b_iodone = swap_pager_iodone1; @@ -1042,11 +1056,10 @@ swap_pager_input(swp, m, count, reqpage) crhold(bp->b_wcred); bp->b_un.b_addr = (caddr_t) kva; bp->b_blkno = reqaddr[0]; - bp->b_bcount = PAGE_SIZE*count; - bp->b_bufsize = PAGE_SIZE*count; - - bgetvp( swapdev_vp, bp); + bp->b_bcount = PAGE_SIZE * count; + bp->b_bufsize = PAGE_SIZE * count; + pbgetvp(swapdev_vp, bp); swp->sw_piip++; cnt.v_swapin++; @@ -1061,9 +1074,9 @@ swap_pager_input(swp, m, count, reqpage) */ s = splbio(); while ((bp->b_flags & B_DONE) == 0) { - tsleep((caddr_t)bp, PVM, "swread", 0); + tsleep((caddr_t) bp, PVM, "swread", 0); } - + if (bp->b_flags & B_ERROR) { printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n", bp->b_blkno, bp->b_bcount, bp->b_error); @@ -1075,13 +1088,13 @@ swap_pager_input(swp, m, count, reqpage) --swp->sw_piip; if (swp->sw_piip == 0) wakeup((caddr_t) swp); - + + /* - * relpbuf does this, but we maintain our own buffer - * list also... + * relpbuf does this, but we maintain our own buffer list also... */ if (bp->b_vp) - brelvp(bp); + pbrelvp(bp); splx(s); --swb[reqpage]->swb_locked; @@ -1089,20 +1102,22 @@ swap_pager_input(swp, m, count, reqpage) /* * remove the mapping for kernel virtual */ - pmap_qremove( kva, count); + pmap_qremove(kva, count); if (spc) { + if (bp->b_flags & B_WANTED) + wakeup((caddr_t) bp); /* * if we have used an spc, we need to free it. */ - if( bp->b_rcred != NOCRED) + if (bp->b_rcred != NOCRED) crfree(bp->b_rcred); - if( bp->b_wcred != NOCRED) + if (bp->b_wcred != NOCRED) crfree(bp->b_wcred); TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); if (swap_pager_needflags & SWAP_FREE_NEEDED) { swap_pager_needflags &= ~SWAP_FREE_NEEDED; - wakeup((caddr_t)&swap_pager_free); + wakeup((caddr_t) & swap_pager_free); } } else { /* @@ -1112,27 +1127,31 @@ swap_pager_input(swp, m, count, reqpage) /* * finish up input if everything is ok */ - if( rv == VM_PAGER_OK) { + if (rv == VM_PAGER_OK) { for (i = 0; i < count; i++) { pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); - m[i]->flags |= PG_CLEAN; - m[i]->flags &= ~PG_LAUNDRY; + m[i]->dirty = 0; if (i != reqpage) { /* - * whether or not to leave the page activated - * is up in the air, but we should put the page - * on a page queue somewhere. (it already is in - * the object). - * After some emperical results, it is best - * to deactivate the readahead pages. + * whether or not to leave the page + * activated is up in the air, but we + * should put the page on a page queue + * somewhere. (it already is in the + * object). After some emperical + * results, it is best to deactivate + * the readahead pages. */ - vm_page_deactivate(m[i]); - + if ((i == reqpage - 1) || (i == reqpage + 1)) + vm_page_activate(m[i]); + else + vm_page_deactivate(m[i]); + /* - * just in case someone was asking for this - * page we now tell them that it is ok to use + * just in case someone was asking for + * this page we now tell them that it + * is ok to use */ - m[i]->flags &= ~PG_FAKE; + m[i]->valid = VM_PAGE_BITS_ALL; PAGE_WAKEUP(m[i]); } } @@ -1144,15 +1163,15 @@ swap_pager_input(swp, m, count, reqpage) */ if (swap_pager_full) { for (i = 0; i < count; i++) { - m[i]->flags &= ~PG_CLEAN; + m[i]->dirty = VM_PAGE_BITS_ALL; } - _swap_pager_freespace( swp, m[0]->offset+paging_offset, count*PAGE_SIZE); + _swap_pager_freespace(swp, m[0]->offset + paging_offset, count * PAGE_SIZE); } } else { swap_pager_ridpages(m, count, reqpage); } } - return(rv); + return (rv); } int @@ -1175,29 +1194,24 @@ swap_pager_output(swp, m, count, flags, rtvals) int reqaddr[count]; int failed; -/* - if( count > 1) - printf("off: 0x%x, count: %d\n", m[0]->offset, count); -*/ - if( vm_swap_size) + if (vm_swap_size) no_swap_space = 0; - if( no_swap_space) { - for(i=0;i<count;i++) + if (no_swap_space) { + for (i = 0; i < count; i++) rtvals[i] = VM_PAGER_FAIL; return VM_PAGER_FAIL; } - spc = NULL; object = m[0]->object; paging_offset = object->paging_offset; failed = 0; - for(j=0;j<count;j++) { + for (j = 0; j < count; j++) { foff = m[j]->offset + paging_offset; ix = swap_pager_block_index(swp, foff); swb[j] = 0; - if( swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { + if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { rtvals[j] = VM_PAGER_FAIL; failed = 1; continue; @@ -1206,22 +1220,23 @@ swap_pager_output(swp, m, count, flags, rtvals) } swb[j] = &swp->sw_blocks[ix]; ++swb[j]->swb_locked; - if( failed) { + if (failed) { rtvals[j] = VM_PAGER_FAIL; continue; } off = swap_pager_block_offset(swp, foff); reqaddr[j] = swb[j]->swb_block[off]; - if( reqaddr[j] == SWB_EMPTY) { + if (reqaddr[j] == SWB_EMPTY) { int blk; int tries; int ntoget; + tries = 0; s = splbio(); /* - * if any other pages have been allocated in this block, we - * only try to get one page. + * if any other pages have been allocated in this + * block, we only try to get one page. */ for (i = 0; i < SWB_NPAGES; i++) { if (swb[j]->swb_block[i] != SWB_EMPTY) @@ -1231,17 +1246,16 @@ swap_pager_output(swp, m, count, flags, rtvals) ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; /* - * this code is alittle conservative, but works - * (the intent of this code is to allocate small chunks - * for small objects) + * this code is alittle conservative, but works (the + * intent of this code is to allocate small chunks for + * small objects) */ - if( (m[j]->offset == 0) && (ntoget*PAGE_SIZE > object->size)) { - ntoget = (object->size + (PAGE_SIZE-1))/PAGE_SIZE; + if ((m[j]->offset == 0) && (ntoget * PAGE_SIZE > object->size)) { + ntoget = (object->size + (PAGE_SIZE - 1)) / PAGE_SIZE; } - -retrygetspace: + retrygetspace: if (!swap_pager_full && ntoget > 1 && - swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) { + swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) { for (i = 0; i < ntoget; i++) { swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; @@ -1252,8 +1266,8 @@ retrygetspace: } else if (!swap_pager_getswapspace(btodb(PAGE_SIZE), &swb[j]->swb_block[off])) { /* - * if the allocation has failed, we try to reclaim space and - * retry. + * if the allocation has failed, we try to + * reclaim space and retry. */ if (++tries == 1) { swap_pager_reclaim(); @@ -1264,7 +1278,7 @@ retrygetspace: swap_pager_full = 1; } else { reqaddr[j] = swb[j]->swb_block[off]; - swb[j]->swb_valid &= ~(1<<off); + swb[j]->swb_valid &= ~(1 << off); } splx(s); } @@ -1275,58 +1289,54 @@ retrygetspace: */ failed = 0; for (i = 0; i < count; i++) { - if( failed || (reqaddr[i] != reqaddr[0] + i*btodb(PAGE_SIZE)) || - (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) || - (rtvals[i] != VM_PAGER_OK)) { + if (failed || (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) || + (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) || + (rtvals[i] != VM_PAGER_OK)) { failed = 1; - if( rtvals[i] == VM_PAGER_OK) + if (rtvals[i] == VM_PAGER_OK) rtvals[i] = VM_PAGER_AGAIN; } } - for(i = 0; i < count; i++) { - if( rtvals[i] != VM_PAGER_OK) { - if( swb[i]) + for (i = 0; i < count; i++) { + if (rtvals[i] != VM_PAGER_OK) { + if (swb[i]) --swb[i]->swb_locked; } } - for(i = 0; i < count; i++) - if( rtvals[i] != VM_PAGER_OK) + for (i = 0; i < count; i++) + if (rtvals[i] != VM_PAGER_OK) break; - if( i == 0) { + if (i == 0) { return VM_PAGER_AGAIN; } - count = i; - for(i=0;i<count;i++) { - if( reqaddr[i] == SWB_EMPTY) + for (i = 0; i < count; i++) { + if (reqaddr[i] == SWB_EMPTY) printf("I/O to empty block????\n"); } - + /* - */ - + * */ + /* - * For synchronous writes, we clean up - * all completed async pageouts. + * For synchronous writes, we clean up all completed async pageouts. */ if ((flags & B_ASYNC) == 0) { swap_pager_clean(); } - kva = 0; /* - * we allocate a new kva for transfers > 1 page - * but for transfers == 1 page, the swap_pager_free list contains - * entries that have pre-allocated kva's (for efficiency). - * NOTE -- we do not use the physical buffer pool or the - * preallocated associated kva's because of the potential for - * deadlock. This is very subtile -- but deadlocks or resource - * contention must be avoided on pageouts -- or your system will - * sleep (forever) !!! + * we allocate a new kva for transfers > 1 page but for transfers == 1 + * page, the swap_pager_free list contains entries that have + * pre-allocated kva's (for efficiency). NOTE -- we do not use the + * physical buffer pool or the preallocated associated kva's because + * of the potential for deadlock. This is very subtile -- but + * deadlocks or resource contention must be avoided on pageouts -- or + * your system will sleep (forever) !!! */ /* if ( count > 1) { @@ -1339,30 +1349,37 @@ retrygetspace: } return VM_PAGER_AGAIN; } - } + } */ /* * get a swap pager clean data structure, block until we get it */ - if (swap_pager_free.tqh_first == NULL) { + if (swap_pager_free.tqh_first == NULL || swap_pager_free.tqh_first->spc_list.tqe_next == NULL || swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { s = splbio(); - if( curproc == pageproc) + if (curproc == pageproc) { (void) swap_pager_clean(); - else - wakeup((caddr_t) &vm_pages_needed); - while (swap_pager_free.tqh_first == NULL) { +/* + splx(s); + return VM_PAGER_AGAIN; +*/ + } else + wakeup((caddr_t) & vm_pages_needed); + while (swap_pager_free.tqh_first == NULL || swap_pager_free.tqh_first->spc_list.tqe_next == NULL || swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { + if (curproc == pageproc && + (cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min) + wakeup((caddr_t) & cnt.v_free_count); + swap_pager_needflags |= SWAP_FREE_NEEDED; - tsleep((caddr_t)&swap_pager_free, - PVM, "swpfre", 0); - if( curproc == pageproc) + tsleep((caddr_t) & swap_pager_free, + PVM, "swpfre", 0); + if (curproc == pageproc) (void) swap_pager_clean(); else - wakeup((caddr_t) &vm_pages_needed); + wakeup((caddr_t) & vm_pages_needed); } splx(s); } - spc = swap_pager_free.tqh_first; TAILQ_REMOVE(&swap_pager_free, spc, spc_list); @@ -1376,7 +1393,7 @@ retrygetspace: /* * get the base I/O offset into the swap file */ - for(i=0;i<count;i++) { + for (i = 0; i < count; i++) { foff = m[i]->offset + paging_offset; off = swap_pager_block_offset(swp, foff); /* @@ -1400,27 +1417,27 @@ retrygetspace: bp->b_flags = B_BUSY; bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; - if( bp->b_rcred != NOCRED) + if (bp->b_rcred != NOCRED) crhold(bp->b_rcred); - if( bp->b_wcred != NOCRED) + if (bp->b_wcred != NOCRED) crhold(bp->b_wcred); bp->b_data = (caddr_t) kva; bp->b_blkno = reqaddr[0]; - bgetvp( swapdev_vp, bp); + pbgetvp(swapdev_vp, bp); - bp->b_bcount = PAGE_SIZE*count; - bp->b_bufsize = PAGE_SIZE*count; + bp->b_bcount = PAGE_SIZE * count; + bp->b_bufsize = PAGE_SIZE * count; swapdev_vp->v_numoutput++; /* - * If this is an async write we set up additional buffer fields - * and place a "cleaning" entry on the inuse queue. + * If this is an async write we set up additional buffer fields and + * place a "cleaning" entry on the inuse queue. */ s = splbio(); - if ( flags & B_ASYNC ) { + if (flags & B_ASYNC) { spc->spc_flags = 0; spc->spc_swp = swp; - for(i=0;i<count;i++) + for (i = 0; i < count; i++) spc->spc_m[i] = m[i]; spc->spc_count = count; /* @@ -1444,22 +1461,21 @@ retrygetspace: * perform the I/O */ VOP_STRATEGY(bp); - if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) { + if ((flags & (B_READ | B_ASYNC)) == B_ASYNC) { if ((bp->b_flags & B_DONE) == B_DONE) { swap_pager_clean(); } splx(s); - for(i=0;i<count;i++) { + for (i = 0; i < count; i++) { rtvals[i] = VM_PAGER_PEND; } return VM_PAGER_PEND; } - /* * wait for the sync I/O to complete */ while ((bp->b_flags & B_DONE) == 0) { - tsleep((caddr_t)bp, PVM, "swwrt", 0); + tsleep((caddr_t) bp, PVM, "swwrt", 0); } if (bp->b_flags & B_ERROR) { printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n", @@ -1472,54 +1488,52 @@ retrygetspace: --swp->sw_poip; if (swp->sw_poip == 0) wakeup((caddr_t) swp); - + if (bp->b_vp) - brelvp(bp); + pbrelvp(bp); + if (bp->b_flags & B_WANTED) + wakeup((caddr_t) bp); splx(s); /* * remove the mapping for kernel virtual */ - pmap_qremove( kva, count); + pmap_qremove(kva, count); /* - * if we have written the page, then indicate that the page - * is clean. + * if we have written the page, then indicate that the page is clean. */ if (rv == VM_PAGER_OK) { - for(i=0;i<count;i++) { - if( rtvals[i] == VM_PAGER_OK) { - m[i]->flags |= PG_CLEAN; - m[i]->flags &= ~PG_LAUNDRY; + for (i = 0; i < count; i++) { + if (rtvals[i] == VM_PAGER_OK) { pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); + m[i]->dirty = 0; /* - * optimization, if a page has been read during the - * pageout process, we activate it. + * optimization, if a page has been read + * during the pageout process, we activate it. */ - if ( (m[i]->flags & PG_ACTIVE) == 0 && - pmap_is_referenced(VM_PAGE_TO_PHYS(m[i]))) + if ((m[i]->flags & PG_ACTIVE) == 0 && + ((m[i]->flags & PG_WANTED) || pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))) vm_page_activate(m[i]); } } } else { - for(i=0;i<count;i++) { + for (i = 0; i < count; i++) { rtvals[i] = rv; - m[i]->flags |= PG_LAUNDRY; } } - if( bp->b_rcred != NOCRED) + if (bp->b_rcred != NOCRED) crfree(bp->b_rcred); - if( bp->b_wcred != NOCRED) + if (bp->b_wcred != NOCRED) crfree(bp->b_wcred); TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); if (swap_pager_needflags & SWAP_FREE_NEEDED) { swap_pager_needflags &= ~SWAP_FREE_NEEDED; - wakeup((caddr_t)&swap_pager_free); + wakeup((caddr_t) & swap_pager_free); } - - return(rv); + return (rv); } boolean_t @@ -1534,11 +1548,11 @@ swap_pager_clean() for (;;) { s = splbio(); /* - * Look up and removal from done list must be done - * at splbio() to avoid conflicts with swap_pager_iodone. + * Look up and removal from done list must be done at splbio() + * to avoid conflicts with swap_pager_iodone. */ while ((spc = swap_pager_done.tqh_first) != 0) { - pmap_qremove( spc->spc_kva, spc->spc_count); + pmap_qremove(spc->spc_kva, spc->spc_count); swap_pager_finish(spc); TAILQ_REMOVE(&swap_pager_done, spc, spc_list); goto doclean; @@ -1552,8 +1566,8 @@ swap_pager_clean() break; /* - * The desired page was found to be busy earlier in - * the scan but has since completed. + * The desired page was found to be busy earlier in the scan + * but has since completed. */ doclean: if (tspc && tspc == spc) { @@ -1563,13 +1577,13 @@ doclean: TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); if (swap_pager_needflags & SWAP_FREE_NEEDED) { swap_pager_needflags &= ~SWAP_FREE_NEEDED; - wakeup((caddr_t)&swap_pager_free); + wakeup((caddr_t) & swap_pager_free); } ++cleandone; splx(s); } - return(tspc ? TRUE : FALSE); + return (tspc ? TRUE : FALSE); } void @@ -1579,32 +1593,33 @@ swap_pager_finish(spc) vm_object_t object = spc->spc_m[0]->object; int i; - if ((object->paging_in_progress -= spc->spc_count) == 0) + if ((object->paging_in_progress -= spc->spc_count) == 0) thread_wakeup((int) object); /* - * If no error mark as clean and inform the pmap system. - * If error, mark as dirty so we will try again. - * (XXX could get stuck doing this, should give up after awhile) + * If no error mark as clean and inform the pmap system. If error, + * mark as dirty so we will try again. (XXX could get stuck doing + * this, should give up after awhile) */ if (spc->spc_flags & SPC_ERROR) { - for(i=0;i<spc->spc_count;i++) { + for (i = 0; i < spc->spc_count; i++) { printf("swap_pager_finish: I/O error, clean of page %lx failed\n", - (u_long)VM_PAGE_TO_PHYS(spc->spc_m[i])); - spc->spc_m[i]->flags |= PG_LAUNDRY; + (u_long) VM_PAGE_TO_PHYS(spc->spc_m[i])); } } else { - for(i=0;i<spc->spc_count;i++) { + for (i = 0; i < spc->spc_count; i++) { pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); - spc->spc_m[i]->flags |= PG_CLEAN; + spc->spc_m[i]->dirty = 0; + if ((spc->spc_m[i]->flags & PG_ACTIVE) == 0 && + ((spc->spc_m[i]->flags & PG_WANTED) || pmap_is_referenced(VM_PAGE_TO_PHYS(spc->spc_m[i])))) + vm_page_activate(spc->spc_m[i]); } } - for(i=0;i<spc->spc_count;i++) { + for (i = 0; i < spc->spc_count; i++) { /* - * we wakeup any processes that are waiting on - * these pages. + * we wakeup any processes that are waiting on these pages. */ PAGE_WAKEUP(spc->spc_m[i]); } @@ -1631,42 +1646,41 @@ swap_pager_iodone(bp) spc->spc_flags |= SPC_ERROR; printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d", (bp->b_flags & B_READ) ? "pagein" : "pageout", - bp->b_error, (u_long)bp->b_blkno, bp->b_bcount); + bp->b_error, (u_long) bp->b_blkno, bp->b_bcount); } - /* if ((bp->b_flags & B_READ) == 0) vwakeup(bp); */ - - if (bp->b_vp) { - brelvp(bp); - } - if( bp->b_rcred != NOCRED) + + if (bp->b_vp) + pbrelvp(bp); + + if (bp->b_flags & B_WANTED) + wakeup((caddr_t) bp); + + if (bp->b_rcred != NOCRED) crfree(bp->b_rcred); - if( bp->b_wcred != NOCRED) + if (bp->b_wcred != NOCRED) crfree(bp->b_wcred); nswiodone += spc->spc_count; if (--spc->spc_swp->sw_poip == 0) { - wakeup((caddr_t)spc->spc_swp); + wakeup((caddr_t) spc->spc_swp); } - if ((swap_pager_needflags & SWAP_FREE_NEEDED) || - swap_pager_inuse.tqh_first == 0) { + swap_pager_inuse.tqh_first == 0) { swap_pager_needflags &= ~SWAP_FREE_NEEDED; - wakeup((caddr_t)&swap_pager_free); - wakeup((caddr_t)&vm_pages_needed); + wakeup((caddr_t) & swap_pager_free); + wakeup((caddr_t) & vm_pages_needed); } - if (vm_pageout_pages_needed) { - wakeup((caddr_t)&vm_pageout_pages_needed); + wakeup((caddr_t) & vm_pageout_pages_needed); } - if ((swap_pager_inuse.tqh_first == NULL) || - (cnt.v_free_count < cnt.v_free_min && - nswiodone + cnt.v_free_count >= cnt.v_free_min) ) { - wakeup((caddr_t)&vm_pages_needed); + ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min && + nswiodone + cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min)) { + wakeup((caddr_t) & vm_pages_needed); } splx(s); } @@ -1675,8 +1689,9 @@ swap_pager_iodone(bp) * return true if any swap control structures can be allocated */ int -swap_pager_ready() { - if( swap_pager_free.tqh_first) +swap_pager_ready() +{ + if (swap_pager_free.tqh_first) return 1; else return 0; diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h index 848039c9f961..c1bc8c82d002 100644 --- a/sys/vm/swap_pager.h +++ b/sys/vm/swap_pager.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * from: @(#)swap_pager.h 7.1 (Berkeley) 12/5/90 - * $Id: swap_pager.h,v 1.2 1994/05/25 09:18:39 rgrimes Exp $ + * $Id: swap_pager.h,v 1.3 1994/10/09 01:52:06 phk Exp $ */ /* @@ -56,46 +56,46 @@ * space is recovered by the swap pager now... */ #define SWB_NPAGES 8 -struct swblock { +struct swblock { unsigned short swb_valid; /* bitmask for valid pages */ unsigned short swb_locked; /* block locked */ - int swb_block[SWB_NPAGES]; /* unfortunately int instead of daddr_t */ + int swb_block[SWB_NPAGES]; /* unfortunately int instead of daddr_t */ }; -typedef struct swblock *sw_blk_t; +typedef struct swblock *sw_blk_t; /* * Swap pager private data. */ struct swpager { - vm_size_t sw_osize; /* size of object we are backing (bytes) */ - int sw_nblocks;/* number of blocks in list (sw_blk_t units) */ - sw_blk_t sw_blocks; /* pointer to list of swap blocks */ - short sw_flags; /* flags */ - short sw_poip; /* pageouts in progress */ - short sw_piip; /* pageins in progress */ + vm_size_t sw_osize; /* size of object we are backing (bytes) */ + int sw_nblocks; /* number of blocks in list (sw_blk_t units) */ + sw_blk_t sw_blocks; /* pointer to list of swap blocks */ + short sw_flags; /* flags */ + short sw_poip; /* pageouts in progress */ + short sw_piip; /* pageins in progress */ }; -typedef struct swpager *sw_pager_t; +typedef struct swpager *sw_pager_t; #define SW_WANTED 0x01 #define SW_NAMED 0x02 #ifdef KERNEL -void swap_pager_init(void); -vm_pager_t swap_pager_alloc(caddr_t, vm_size_t, vm_prot_t, vm_offset_t); -void swap_pager_dealloc(vm_pager_t); -boolean_t swap_pager_getpage(vm_pager_t, vm_page_t, boolean_t); -boolean_t swap_pager_putpage(vm_pager_t, vm_page_t, boolean_t); -boolean_t swap_pager_getmulti(vm_pager_t, vm_page_t *, int, int, boolean_t); -boolean_t swap_pager_haspage(vm_pager_t, vm_offset_t); -int swap_pager_io(sw_pager_t, vm_page_t *, int, int, int); -void swap_pager_iodone(struct buf *); -boolean_t swap_pager_clean(); -void swap_pager_copy __P((vm_pager_t, vm_offset_t, vm_pager_t, vm_offset_t, vm_offset_t)); -void swap_pager_freespace __P((vm_pager_t, vm_offset_t, vm_offset_t)); +void swap_pager_init(void); +vm_pager_t swap_pager_alloc(caddr_t, vm_size_t, vm_prot_t, vm_offset_t); +void swap_pager_dealloc(vm_pager_t); +boolean_t swap_pager_getpage(vm_pager_t, vm_page_t, boolean_t); +boolean_t swap_pager_putpage(vm_pager_t, vm_page_t, boolean_t); +boolean_t swap_pager_getmulti(vm_pager_t, vm_page_t *, int, int, boolean_t); +boolean_t swap_pager_haspage(vm_pager_t, vm_offset_t); +int swap_pager_io(sw_pager_t, vm_page_t *, int, int, int); +void swap_pager_iodone(struct buf *); +boolean_t swap_pager_clean(); +void swap_pager_copy __P((vm_pager_t, vm_offset_t, vm_pager_t, vm_offset_t, vm_offset_t)); +void swap_pager_freespace __P((vm_pager_t, vm_offset_t, vm_offset_t)); extern struct pagerops swappagerops; #endif -#endif /* _SWAP_PAGER_ */ +#endif /* _SWAP_PAGER_ */ diff --git a/sys/vm/vm.h b/sys/vm/vm.h index 9f51c9481ac9..a24ba321a569 100644 --- a/sys/vm/vm.h +++ b/sys/vm/vm.h @@ -31,13 +31,13 @@ * SUCH DAMAGE. * * @(#)vm.h 8.2 (Berkeley) 12/13/93 - * $Id$ + * $Id: vm.h,v 1.3 1994/08/02 07:55:16 davidg Exp $ */ #ifndef VM_H #define VM_H -typedef char vm_inherit_t; /* XXX: inheritance codes */ +typedef char vm_inherit_t; /* XXX: inheritance codes */ union vm_map_object; typedef union vm_map_object vm_map_object_t; @@ -52,7 +52,7 @@ struct vm_object; typedef struct vm_object *vm_object_t; struct vm_page; -typedef struct vm_page *vm_page_t; +typedef struct vm_page *vm_page_t; struct pager_struct; typedef struct pager_struct *vm_pager_t; @@ -75,20 +75,21 @@ typedef struct pager_struct *vm_pager_t; * Several fields are temporary (text, data stuff). */ struct vmspace { - struct vm_map vm_map; /* VM address map */ - struct pmap vm_pmap; /* private physical map */ - int vm_refcnt; /* number of references */ - caddr_t vm_shm; /* SYS5 shared memory private data XXX */ + struct vm_map vm_map; /* VM address map */ + struct pmap vm_pmap; /* private physical map */ + int vm_refcnt; /* number of references */ + caddr_t vm_shm; /* SYS5 shared memory private data XXX */ /* we copy from vm_startcopy to the end of the structure on fork */ #define vm_startcopy vm_rssize - segsz_t vm_rssize; /* current resident set size in pages */ + segsz_t vm_rssize; /* current resident set size in pages */ segsz_t vm_swrss; /* resident set size before last swap */ segsz_t vm_tsize; /* text size (pages) XXX */ segsz_t vm_dsize; /* data size (pages) XXX */ segsz_t vm_ssize; /* stack size (pages) */ - caddr_t vm_taddr; /* user virtual address of text XXX */ - caddr_t vm_daddr; /* user virtual address of data XXX */ + caddr_t vm_taddr; /* user virtual address of text XXX */ + caddr_t vm_daddr; /* user virtual address of data XXX */ caddr_t vm_maxsaddr; /* user VA at max stack growth */ caddr_t vm_minsaddr; /* user VA at max stack growth */ }; -#endif /* VM_H */ + +#endif /* VM_H */ diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h index 74a80d5b1cda..e55a5f44accc 100644 --- a/sys/vm/vm_extern.h +++ b/sys/vm/vm_extern.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)vm_extern.h 8.2 (Berkeley) 1/12/94 - * $Id: vm_extern.h,v 1.6 1994/09/27 18:00:26 davidg Exp $ + * $Id: vm_extern.h,v 1.7 1994/12/30 08:02:16 bde Exp $ */ #ifndef _VM_EXTERN_H_ @@ -46,13 +46,15 @@ struct mount; struct vnode; #ifdef KGDB -void chgkprot __P((caddr_t, int, int)); +void chgkprot __P((caddr_t, int, int)); + #endif /* * Try to get semi-meaningful wait messages into thread_sleep... */ void thread_sleep_ __P((int, simple_lock_t, char *)); + #if __GNUC__ >= 2 #define thread_sleep(a,b,c) thread_sleep_((a), (b), __FUNCTION__) #else @@ -61,83 +63,77 @@ void thread_sleep_ __P((int, simple_lock_t, char *)); #ifdef KERNEL #ifdef TYPEDEF_FOR_UAP -int getpagesize __P((struct proc *p, void *, int *)); -int madvise __P((struct proc *, void *, int *)); -int mincore __P((struct proc *, void *, int *)); -int mprotect __P((struct proc *, void *, int *)); -int msync __P((struct proc *, void *, int *)); -int munmap __P((struct proc *, void *, int *)); -int obreak __P((struct proc *, void *, int *)); -int sbrk __P((struct proc *, void *, int *)); -int smmap __P((struct proc *, void *, int *)); -int sstk __P((struct proc *, void *, int *)); +int getpagesize __P((struct proc * p, void *, int *)); +int madvise __P((struct proc *, void *, int *)); +int mincore __P((struct proc *, void *, int *)); +int mprotect __P((struct proc *, void *, int *)); +int msync __P((struct proc *, void *, int *)); +int munmap __P((struct proc *, void *, int *)); +int obreak __P((struct proc *, void *, int *)); +int sbrk __P((struct proc *, void *, int *)); +int smmap __P((struct proc *, void *, int *)); +int sstk __P((struct proc *, void *, int *)); + #endif -void assert_wait __P((int, boolean_t)); -int grow __P((struct proc *, u_int)); -void iprintf __P((const char *, ...)); -int kernacc __P((caddr_t, int, int)); -int kinfo_loadavg __P((int, char *, int *, int, int *)); -int kinfo_meter __P((int, caddr_t, int *, int, int *)); -vm_offset_t kmem_alloc __P((vm_map_t, vm_size_t)); -vm_offset_t kmem_alloc_pageable __P((vm_map_t, vm_size_t)); -vm_offset_t kmem_alloc_wait __P((vm_map_t, vm_size_t)); -void kmem_free __P((vm_map_t, vm_offset_t, vm_size_t)); -void kmem_free_wakeup __P((vm_map_t, vm_offset_t, vm_size_t)); -void kmem_init __P((vm_offset_t, vm_offset_t)); -vm_offset_t kmem_malloc __P((vm_map_t, vm_size_t, boolean_t)); -vm_map_t kmem_suballoc __P((vm_map_t, vm_offset_t *, vm_offset_t *, - vm_size_t, boolean_t)); -void loadav __P((struct loadavg *)); -void munmapfd __P((struct proc *, int)); -int pager_cache __P((vm_object_t, boolean_t)); -void sched __P((void)); -int svm_allocate __P((struct proc *, void *, int *)); -int svm_deallocate __P((struct proc *, void *, int *)); -int svm_inherit __P((struct proc *, void *, int *)); -int svm_protect __P((struct proc *, void *, int *)); -void swapinit __P((void)); -int swapon __P((struct proc *, void *, int *)); -void swapout __P((struct proc *)); -void swapout_threads __P((void)); -int swfree __P((struct proc *, int)); -void swstrategy __P((struct buf *)); -void thread_block __P((char *)); -void thread_sleep __P((int, simple_lock_t, boolean_t)); -void thread_wakeup __P((int)); -int useracc __P((caddr_t, int, int)); -int vm_allocate __P((vm_map_t, - vm_offset_t *, vm_size_t, boolean_t)); -int vm_allocate_with_pager __P((vm_map_t, vm_offset_t *, - vm_size_t, boolean_t, vm_pager_t, vm_offset_t, boolean_t)); -int vm_deallocate __P((vm_map_t, vm_offset_t, vm_size_t)); -int vm_fault __P((vm_map_t, vm_offset_t, vm_prot_t, boolean_t)); -void vm_fault_copy_entry __P((vm_map_t, - vm_map_t, vm_map_entry_t, vm_map_entry_t)); -void vm_fault_unwire __P((vm_map_t, vm_offset_t, vm_offset_t)); -int vm_fault_wire __P((vm_map_t, vm_offset_t, vm_offset_t)); -int vm_fork __P((struct proc *, struct proc *, int)); -int vm_inherit __P((vm_map_t, - vm_offset_t, vm_size_t, vm_inherit_t)); -void vm_init_limits __P((struct proc *)); -void vm_mem_init __P((void)); -int vm_mmap __P((vm_map_t, vm_offset_t *, vm_size_t, - vm_prot_t, vm_prot_t, int, caddr_t, vm_offset_t)); -vm_offset_t vm_page_alloc_contig __P((vm_offset_t, vm_offset_t, - vm_offset_t, vm_offset_t)); -int vm_protect __P((vm_map_t, - vm_offset_t, vm_size_t, boolean_t, vm_prot_t)); -void vm_set_page_size __P((void)); -void vmmeter __P((void)); -struct vmspace *vmspace_alloc __P((vm_offset_t, vm_offset_t, int)); -struct vmspace *vmspace_fork __P((struct vmspace *)); -void vmspace_free __P((struct vmspace *)); -void vmtotal __P((struct vmtotal *)); -void vnode_pager_setsize __P((struct vnode *, u_long)); -void vnode_pager_umount __P((struct mount *)); -boolean_t vnode_pager_uncache __P((struct vnode *)); -void vslock __P((caddr_t, u_int)); -void vsunlock __P((caddr_t, u_int, int)); -#endif /* KERNEL */ +void assert_wait __P((int, boolean_t)); +int grow __P((struct proc *, u_int)); +void iprintf __P((const char *,...)); +int kernacc __P((caddr_t, int, int)); +int kinfo_loadavg __P((int, char *, int *, int, int *)); +int kinfo_meter __P((int, caddr_t, int *, int, int *)); +vm_offset_t kmem_alloc __P((vm_map_t, vm_size_t)); +vm_offset_t kmem_alloc_pageable __P((vm_map_t, vm_size_t)); +vm_offset_t kmem_alloc_wait __P((vm_map_t, vm_size_t)); +void kmem_free __P((vm_map_t, vm_offset_t, vm_size_t)); +void kmem_free_wakeup __P((vm_map_t, vm_offset_t, vm_size_t)); +void kmem_init __P((vm_offset_t, vm_offset_t)); +vm_offset_t kmem_malloc __P((vm_map_t, vm_size_t, boolean_t)); +vm_map_t kmem_suballoc __P((vm_map_t, vm_offset_t *, vm_offset_t *, vm_size_t, boolean_t)); +void loadav __P((struct loadavg *)); +void munmapfd __P((struct proc *, int)); +int pager_cache __P((vm_object_t, boolean_t)); +void sched __P((void)); +int svm_allocate __P((struct proc *, void *, int *)); +int svm_deallocate __P((struct proc *, void *, int *)); +int svm_inherit __P((struct proc *, void *, int *)); +int svm_protect __P((struct proc *, void *, int *)); +void swapinit __P((void)); +int swapon __P((struct proc *, void *, int *)); +void swapout __P((struct proc *)); +void swapout_threads __P((void)); +int swfree __P((struct proc *, int)); +void swstrategy __P((struct buf *)); +void thread_block __P((char *)); +void thread_sleep __P((int, simple_lock_t, boolean_t)); +void thread_wakeup __P((int)); +int useracc __P((caddr_t, int, int)); +int vm_allocate __P((vm_map_t, vm_offset_t *, vm_size_t, boolean_t)); +int vm_allocate_with_pager __P((vm_map_t, vm_offset_t *, vm_size_t, boolean_t, vm_pager_t, vm_offset_t, boolean_t)); +int vm_deallocate __P((vm_map_t, vm_offset_t, vm_size_t)); +int vm_fault __P((vm_map_t, vm_offset_t, vm_prot_t, boolean_t)); +void vm_fault_copy_entry __P((vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t)); +void vm_fault_unwire __P((vm_map_t, vm_offset_t, vm_offset_t)); +int vm_fault_wire __P((vm_map_t, vm_offset_t, vm_offset_t)); +int vm_fork __P((struct proc *, struct proc *, int)); +int vm_inherit __P((vm_map_t, vm_offset_t, vm_size_t, vm_inherit_t)); +void vm_init_limits __P((struct proc *)); +void vm_mem_init __P((void)); +int vm_mmap __P((vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, caddr_t, vm_offset_t)); +vm_offset_t vm_page_alloc_contig __P((vm_offset_t, vm_offset_t, vm_offset_t, vm_offset_t)); +int vm_protect __P((vm_map_t, vm_offset_t, vm_size_t, boolean_t, vm_prot_t)); +void vm_set_page_size __P((void)); +void vmmeter __P((void)); +struct vmspace *vmspace_alloc __P((vm_offset_t, vm_offset_t, int)); +struct vmspace *vmspace_fork __P((struct vmspace *)); +void vmspace_free __P((struct vmspace *)); +void vmtotal __P((struct vmtotal *)); +void vnode_pager_setsize __P((struct vnode *, u_long)); +void vnode_pager_umount __P((struct mount *)); +boolean_t vnode_pager_uncache __P((struct vnode *)); +void vslock __P((caddr_t, u_int)); +void vsunlock __P((caddr_t, u_int, int)); + +#endif /* KERNEL */ -#endif /* !_VM_EXTERN_H_ */ +#endif /* !_VM_EXTERN_H_ */ diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 31a69c2a47d3..b8a404bf0518 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * Copyright (c) 1994 John S. Dyson @@ -45,17 +45,17 @@ * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -66,7 +66,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_fault.c,v 1.12 1994/11/06 09:55:29 davidg Exp $ + * $Id: vm_fault.c,v 1.13 1994/11/13 22:48:53 davidg Exp $ */ /* @@ -85,7 +85,7 @@ #include <vm/vm_pageout.h> #include <vm/vm_kern.h> -int vm_fault_additional_pages __P((vm_object_t, vm_offset_t, vm_page_t, int, int, vm_page_t *, int *)); +int vm_fault_additional_pages __P((vm_object_t, vm_offset_t, vm_page_t, int, int, vm_page_t *, int *)); #define VM_FAULT_READ_AHEAD 4 #define VM_FAULT_READ_AHEAD_MIN 1 @@ -114,31 +114,31 @@ extern int vm_pageout_proc_limit; */ int vm_fault(map, vaddr, fault_type, change_wiring) - vm_map_t map; - vm_offset_t vaddr; - vm_prot_t fault_type; - boolean_t change_wiring; + vm_map_t map; + vm_offset_t vaddr; + vm_prot_t fault_type; + boolean_t change_wiring; { - vm_object_t first_object; - vm_offset_t first_offset; - vm_map_entry_t entry; - register vm_object_t object; - register vm_offset_t offset; - vm_page_t m; - vm_page_t first_m; - vm_prot_t prot; - int result; - boolean_t wired; - boolean_t su; - boolean_t lookup_still_valid; - boolean_t page_exists; - vm_page_t old_m; - vm_object_t next_object; - vm_page_t marray[VM_FAULT_READ]; - int spl; - int hardfault=0; - - cnt.v_vm_faults++; /* needs lock XXX */ + vm_object_t first_object; + vm_offset_t first_offset; + vm_map_entry_t entry; + register vm_object_t object; + register vm_offset_t offset; + vm_page_t m; + vm_page_t first_m; + vm_prot_t prot; + int result; + boolean_t wired; + boolean_t su; + boolean_t lookup_still_valid; + boolean_t page_exists; + vm_page_t old_m; + vm_object_t next_object; + vm_page_t marray[VM_FAULT_READ]; + int spl; + int hardfault = 0; + + cnt.v_vm_faults++; /* needs lock XXX */ /* * Recovery actions */ @@ -185,17 +185,16 @@ vm_fault(map, vaddr, fault_type, change_wiring) } - RetryFault: ; +RetryFault:; /* - * Find the backing store object and offset into - * it to begin the search. + * Find the backing store object and offset into it to begin the + * search. */ - if ((result = vm_map_lookup(&map, vaddr, fault_type, &entry, - &first_object, &first_offset, - &prot, &wired, &su)) != KERN_SUCCESS) { - return(result); + if ((result = vm_map_lookup(&map, vaddr, fault_type, &entry, &first_object, + &first_offset, &prot, &wired, &su)) != KERN_SUCCESS) { + return (result); } lookup_still_valid = TRUE; @@ -204,12 +203,11 @@ vm_fault(map, vaddr, fault_type, change_wiring) first_m = NULL; - /* - * Make a reference to this object to - * prevent its disposal while we are messing with - * it. Once we have the reference, the map is free - * to be diddled. Since objects reference their - * shadows (and copies), they will stay around as well. + /* + * Make a reference to this object to prevent its disposal while we + * are messing with it. Once we have the reference, the map is free + * to be diddled. Since objects reference their shadows (and copies), + * they will stay around as well. */ vm_object_lock(first_object); @@ -218,132 +216,111 @@ vm_fault(map, vaddr, fault_type, change_wiring) first_object->paging_in_progress++; /* - * INVARIANTS (through entire routine): - * - * 1) At all times, we must either have the object - * lock or a busy page in some object to prevent - * some other thread from trying to bring in - * the same page. - * - * Note that we cannot hold any locks during the - * pager access or when waiting for memory, so - * we use a busy page then. - * - * Note also that we aren't as concerned about - * more than one thead attempting to pager_data_unlock - * the same page at once, so we don't hold the page - * as busy then, but do record the highest unlock - * value so far. [Unlock requests may also be delivered - * out of order.] - * - * 2) Once we have a busy page, we must remove it from - * the pageout queues, so that the pageout daemon - * will not grab it away. - * - * 3) To prevent another thread from racing us down the - * shadow chain and entering a new page in the top - * object before we do, we must keep a busy page in - * the top object while following the shadow chain. - * - * 4) We must increment paging_in_progress on any object - * for which we have a busy page, to prevent - * vm_object_collapse from removing the busy page - * without our noticing. + * INVARIANTS (through entire routine): + * + * 1) At all times, we must either have the object lock or a busy + * page in some object to prevent some other thread from trying to + * bring in the same page. + * + * Note that we cannot hold any locks during the pager access or when + * waiting for memory, so we use a busy page then. + * + * Note also that we aren't as concerned about more than one thead + * attempting to pager_data_unlock the same page at once, so we don't + * hold the page as busy then, but do record the highest unlock value + * so far. [Unlock requests may also be delivered out of order.] + * + * 2) Once we have a busy page, we must remove it from the pageout + * queues, so that the pageout daemon will not grab it away. + * + * 3) To prevent another thread from racing us down the shadow chain + * and entering a new page in the top object before we do, we must + * keep a busy page in the top object while following the shadow + * chain. + * + * 4) We must increment paging_in_progress on any object for which + * we have a busy page, to prevent vm_object_collapse from removing + * the busy page without our noticing. */ /* - * Search for the page at object/offset. + * Search for the page at object/offset. */ object = first_object; offset = first_offset; /* - * See whether this page is resident + * See whether this page is resident */ while (TRUE) { m = vm_page_lookup(object, offset); if (m != NULL) { /* - * If the page is being brought in, - * wait for it and then retry. + * If the page is being brought in, wait for it and + * then retry. */ - if (m->flags & (PG_BUSY|PG_VMIO)) { + if ((m->flags & PG_BUSY) || m->busy) { int s; + UNLOCK_THINGS; s = splhigh(); - if (m->flags & (PG_BUSY|PG_VMIO)) { - m->flags |= PG_WANTED; + if ((m->flags & PG_BUSY) || m->busy) { + m->flags |= PG_WANTED | PG_REFERENCED; cnt.v_intrans++; - tsleep((caddr_t)m,PSWP,"vmpfw",0); + tsleep((caddr_t) m, PSWP, "vmpfw", 0); } splx(s); vm_object_deallocate(first_object); goto RetryFault; } - + if ((m->flags & PG_CACHE) && + (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_reserved) { + UNLOCK_AND_DEALLOCATE; + VM_WAIT; + goto RetryFault; + } /* - * Remove the page from the pageout daemon's - * reach while we play with it. + * Remove the page from the pageout daemon's reach + * while we play with it. */ vm_page_lock_queues(); - spl = splhigh(); - if (m->flags & PG_INACTIVE) { - TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq); - m->flags &= ~PG_INACTIVE; - cnt.v_inactive_count--; - cnt.v_reactivated++; - } - - if (m->flags & PG_ACTIVE) { - TAILQ_REMOVE(&vm_page_queue_active, m, pageq); - m->flags &= ~PG_ACTIVE; - cnt.v_active_count--; - } - splx(spl); + vm_page_unqueue(m); vm_page_unlock_queues(); /* - * Mark page busy for other threads. + * Mark page busy for other threads. */ m->flags |= PG_BUSY; + if (m->object != kernel_object && m->object != kmem_object && + m->valid && + ((m->valid & vm_page_bits(0, PAGE_SIZE)) + != vm_page_bits(0, PAGE_SIZE))) { + goto readrest; + } break; } - - if (((object->pager != NULL) && - (!change_wiring || wired)) + if (((object->pager != NULL) && (!change_wiring || wired)) || (object == first_object)) { -#if 0 - if (curproc && (vaddr < VM_MAXUSER_ADDRESS) && - (curproc->p_rlimit[RLIMIT_RSS].rlim_max < - curproc->p_vmspace->vm_pmap.pm_stats.resident_count * NBPG)) { - UNLOCK_AND_DEALLOCATE; - vm_fault_free_pages(curproc); - goto RetryFault; - } -#endif - - if (swap_pager_full && !object->shadow && (!object->pager || + if (swap_pager_full && !object->shadow && (!object->pager || (object->pager && object->pager->pg_type == PG_SWAP && - !vm_pager_has_page(object->pager, offset+object->paging_offset)))) { - if (vaddr < VM_MAXUSER_ADDRESS && curproc && curproc->p_pid >= 48) /* XXX */ { - printf("Process %lu killed by vm_fault -- out of swap\n", (u_long)curproc->p_pid); + !vm_pager_has_page(object->pager, offset + object->paging_offset)))) { + if (vaddr < VM_MAXUSER_ADDRESS && curproc && curproc->p_pid >= 48) { /* XXX */ + printf("Process %lu killed by vm_fault -- out of swap\n", (u_long) curproc->p_pid); psignal(curproc, SIGKILL); curproc->p_estcpu = 0; curproc->p_nice = PRIO_MIN; resetpriority(curproc); } } - /* - * Allocate a new page for this object/offset - * pair. + * Allocate a new page for this object/offset pair. */ - m = vm_page_alloc(object, offset); + m = vm_page_alloc(object, offset, 0); if (m == NULL) { UNLOCK_AND_DEALLOCATE; @@ -351,88 +328,90 @@ vm_fault(map, vaddr, fault_type, change_wiring) goto RetryFault; } } - +readrest: if (object->pager != NULL && (!change_wiring || wired)) { int rv; int faultcount; int reqpage; /* - * Now that we have a busy page, we can - * release the object lock. + * Now that we have a busy page, we can release the + * object lock. */ vm_object_unlock(object); /* - * now we find out if any other pages should - * be paged in at this time - * this routine checks to see if the pages surrounding this fault - * reside in the same object as the page for this fault. If - * they do, then they are faulted in also into the - * object. The array "marray" returned contains an array of - * vm_page_t structs where one of them is the vm_page_t passed to - * the routine. The reqpage return value is the index into the - * marray for the vm_page_t passed to the routine. + * now we find out if any other pages should be paged + * in at this time this routine checks to see if the + * pages surrounding this fault reside in the same + * object as the page for this fault. If they do, + * then they are faulted in also into the object. The + * array "marray" returned contains an array of + * vm_page_t structs where one of them is the + * vm_page_t passed to the routine. The reqpage + * return value is the index into the marray for the + * vm_page_t passed to the routine. */ faultcount = vm_fault_additional_pages( - first_object, first_offset, - m, VM_FAULT_READ_BEHIND, VM_FAULT_READ_AHEAD, - marray, &reqpage); + first_object, first_offset, + m, VM_FAULT_READ_BEHIND, VM_FAULT_READ_AHEAD, + marray, &reqpage); /* - * Call the pager to retrieve the data, if any, - * after releasing the lock on the map. + * Call the pager to retrieve the data, if any, after + * releasing the lock on the map. */ UNLOCK_MAP; rv = faultcount ? vm_pager_get_pages(object->pager, - marray, faultcount, reqpage, TRUE): VM_PAGER_FAIL; + marray, faultcount, reqpage, TRUE) : VM_PAGER_FAIL; if (rv == VM_PAGER_OK) { /* - * Found the page. - * Leave it busy while we play with it. + * Found the page. Leave it busy while we play + * with it. */ vm_object_lock(object); /* - * Relookup in case pager changed page. - * Pager is responsible for disposition - * of old page if moved. + * Relookup in case pager changed page. Pager + * is responsible for disposition of old page + * if moved. */ m = vm_page_lookup(object, offset); - - m->flags &= ~PG_FAKE; + if (!m) { + printf("vm_fault: error fetching offset: %lx (fc: %d, rq: %d)\n", + offset, faultcount, reqpage); + } + m->valid = VM_PAGE_BITS_ALL; pmap_clear_modify(VM_PAGE_TO_PHYS(m)); hardfault++; break; } - /* - * Remove the bogus page (which does not - * exist at this object/offset); before - * doing so, we must get back our object - * lock to preserve our invariant. - * - * Also wake up any other thread that may want - * to bring in this page. - * - * If this is the top-level object, we must - * leave the busy page to prevent another - * thread from rushing past us, and inserting - * the page in that object at the same time - * that we are. + * Remove the bogus page (which does not exist at this + * object/offset); before doing so, we must get back + * our object lock to preserve our invariant. + * + * Also wake up any other thread that may want to bring + * in this page. + * + * If this is the top-level object, we must leave the + * busy page to prevent another thread from rushing + * past us, and inserting the page in that object at + * the same time that we are. */ if (rv == VM_PAGER_ERROR) printf("vm_fault: pager input (probably hardware) error, PID %d failure\n", - curproc->p_pid); + curproc->p_pid); vm_object_lock(object); /* * Data outside the range of the pager or an I/O error */ /* - * XXX - the check for kernel_map is a kludge to work around - * having the machine panic on a kernel space fault w/ I/O error. + * XXX - the check for kernel_map is a kludge to work + * around having the machine panic on a kernel space + * fault w/ I/O error. */ if (((map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { FREE_PAGE(m); @@ -447,25 +426,24 @@ vm_fault(map, vaddr, fault_type, change_wiring) */ } } - /* - * We get here if the object has no pager (or unwiring) - * or the pager doesn't have the page. + * We get here if the object has no pager (or unwiring) or the + * pager doesn't have the page. */ if (object == first_object) first_m = m; /* - * Move on to the next object. Lock the next - * object before unlocking the current one. + * Move on to the next object. Lock the next object before + * unlocking the current one. */ offset += object->shadow_offset; next_object = object->shadow; if (next_object == NULL) { /* - * If there's no object left, fill the page - * in the top object with zeros. + * If there's no object left, fill the page in the top + * object with zeros. */ if (object != first_object) { object->paging_in_progress--; @@ -481,11 +459,10 @@ vm_fault(map, vaddr, fault_type, change_wiring) first_m = NULL; vm_page_zero_fill(m); + m->valid = VM_PAGE_BITS_ALL; cnt.v_zfod++; - m->flags &= ~PG_FAKE; break; - } - else { + } else { vm_object_lock(next_object); if (object != first_object) { object->paging_in_progress--; @@ -498,80 +475,73 @@ vm_fault(map, vaddr, fault_type, change_wiring) } } - if ((m->flags & (PG_ACTIVE|PG_INACTIVE) != 0) || - (m->flags & PG_BUSY) == 0) + if ((m->flags & (PG_ACTIVE | PG_INACTIVE | PG_CACHE) != 0) || + (m->flags & PG_BUSY) == 0) panic("vm_fault: absent or active or inactive or not busy after main loop"); /* - * PAGE HAS BEEN FOUND. - * [Loop invariant still holds -- the object lock - * is held.] + * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock + * is held.] */ old_m = m; /* save page that would be copied */ /* - * If the page is being written, but isn't - * already owned by the top-level object, - * we have to copy it into a new page owned - * by the top-level object. + * If the page is being written, but isn't already owned by the + * top-level object, we have to copy it into a new page owned by the + * top-level object. */ if (object != first_object) { - /* - * We only really need to copy if we - * want to write it. + /* + * We only really need to copy if we want to write it. */ - if (fault_type & VM_PROT_WRITE) { + if (fault_type & VM_PROT_WRITE) { /* - * If we try to collapse first_object at this - * point, we may deadlock when we try to get - * the lock on an intermediate object (since we - * have the bottom object locked). We can't - * unlock the bottom object, because the page - * we found may move (by collapse) if we do. - * - * Instead, we first copy the page. Then, when - * we have no more use for the bottom object, - * we unlock it and try to collapse. - * - * Note that we copy the page even if we didn't - * need to... that's the breaks. + * If we try to collapse first_object at this point, + * we may deadlock when we try to get the lock on an + * intermediate object (since we have the bottom + * object locked). We can't unlock the bottom object, + * because the page we found may move (by collapse) if + * we do. + * + * Instead, we first copy the page. Then, when we have + * no more use for the bottom object, we unlock it and + * try to collapse. + * + * Note that we copy the page even if we didn't need + * to... that's the breaks. */ - /* - * We already have an empty page in - * first_object - use it. + /* + * We already have an empty page in first_object - use + * it. */ vm_page_copy(m, first_m); - first_m->flags &= ~PG_FAKE; + first_m->valid = VM_PAGE_BITS_ALL; /* - * If another map is truly sharing this - * page with us, we have to flush all - * uses of the original page, since we - * can't distinguish those which want the - * original from those which need the - * new copy. - * - * XXX If we know that only one map has - * access to this page, then we could - * avoid the pmap_page_protect() call. + * If another map is truly sharing this page with us, + * we have to flush all uses of the original page, + * since we can't distinguish those which want the + * original from those which need the new copy. + * + * XXX If we know that only one map has access to this + * page, then we could avoid the pmap_page_protect() + * call. */ vm_page_lock_queues(); vm_page_activate(m); pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); - if ((m->flags & PG_CLEAN) == 0) - m->flags |= PG_LAUNDRY; vm_page_unlock_queues(); /* - * We no longer need the old page or object. + * We no longer need the old page or object. */ PAGE_WAKEUP(m); object->paging_in_progress--; @@ -580,7 +550,7 @@ vm_fault(map, vaddr, fault_type, change_wiring) vm_object_unlock(object); /* - * Only use the new page below... + * Only use the new page below... */ cnt.v_cow_faults++; @@ -589,49 +559,46 @@ vm_fault(map, vaddr, fault_type, change_wiring) offset = first_offset; /* - * Now that we've gotten the copy out of the - * way, let's try to collapse the top object. + * Now that we've gotten the copy out of the way, + * let's try to collapse the top object. */ vm_object_lock(object); /* - * But we have to play ugly games with - * paging_in_progress to do that... + * But we have to play ugly games with + * paging_in_progress to do that... */ object->paging_in_progress--; if (object->paging_in_progress == 0) wakeup((caddr_t) object); vm_object_collapse(object); object->paging_in_progress++; - } - else { - prot &= ~VM_PROT_WRITE; + } else { + prot &= ~VM_PROT_WRITE; m->flags |= PG_COPYONWRITE; } } - - if (m->flags & (PG_ACTIVE|PG_INACTIVE)) + if (m->flags & (PG_ACTIVE | PG_INACTIVE | PG_CACHE)) panic("vm_fault: active or inactive before copy object handling"); /* - * If the page is being written, but hasn't been - * copied to the copy-object, we have to copy it there. + * If the page is being written, but hasn't been copied to the + * copy-object, we have to copy it there. */ - RetryCopy: +RetryCopy: if (first_object->copy != NULL) { vm_object_t copy_object = first_object->copy; vm_offset_t copy_offset; vm_page_t copy_m; /* - * We only need to copy if we want to write it. + * We only need to copy if we want to write it. */ if ((fault_type & VM_PROT_WRITE) == 0) { prot &= ~VM_PROT_WRITE; m->flags |= PG_COPYONWRITE; - } - else { + } else { /* - * Try to get the lock on the copy_object. + * Try to get the lock on the copy_object. */ if (!vm_object_lock_try(copy_object)) { vm_object_unlock(object); @@ -639,64 +606,59 @@ vm_fault(map, vaddr, fault_type, change_wiring) vm_object_lock(object); goto RetryCopy; } - /* - * Make another reference to the copy-object, - * to keep it from disappearing during the - * copy. + * Make another reference to the copy-object, to keep + * it from disappearing during the copy. */ copy_object->ref_count++; /* - * Does the page exist in the copy? + * Does the page exist in the copy? */ copy_offset = first_offset - - copy_object->shadow_offset; + - copy_object->shadow_offset; copy_m = vm_page_lookup(copy_object, copy_offset); page_exists = (copy_m != NULL); if (page_exists) { - if (copy_m->flags & (PG_BUSY|PG_VMIO)) { + if ((copy_m->flags & PG_BUSY) || copy_m->busy) { /* - * If the page is being brought - * in, wait for it and then retry. + * If the page is being brought in, + * wait for it and then retry. */ RELEASE_PAGE(m); copy_object->ref_count--; vm_object_unlock(copy_object); UNLOCK_THINGS; spl = splhigh(); - if( copy_m->flags & (PG_BUSY|PG_VMIO)) { - copy_m->flags |= PG_WANTED; - tsleep((caddr_t)copy_m,PSWP,"vmpfwc",0); + if ((copy_m->flags & PG_BUSY) || copy_m->busy) { + copy_m->flags |= PG_WANTED | PG_REFERENCED; + tsleep((caddr_t) copy_m, PSWP, "vmpfwc", 0); } splx(spl); vm_object_deallocate(first_object); goto RetryFault; } } - /* - * If the page is not in memory (in the object) - * and the object has a pager, we have to check - * if the pager has the data in secondary - * storage. + * If the page is not in memory (in the object) and + * the object has a pager, we have to check if the + * pager has the data in secondary storage. */ if (!page_exists) { /* - * If we don't allocate a (blank) page - * here... another thread could try - * to page it in, allocate a page, and - * then block on the busy page in its - * shadow (first_object). Then we'd - * trip over the busy page after we - * found that the copy_object's pager - * doesn't have the page... + * If we don't allocate a (blank) page here... + * another thread could try to page it in, + * allocate a page, and then block on the busy + * page in its shadow (first_object). Then + * we'd trip over the busy page after we found + * that the copy_object's pager doesn't have + * the page... */ - copy_m = vm_page_alloc(copy_object, copy_offset); + copy_m = vm_page_alloc(copy_object, copy_offset, 0); if (copy_m == NULL) { /* - * Wait for a page, then retry. + * Wait for a page, then retry. */ RELEASE_PAGE(m); copy_object->ref_count--; @@ -705,15 +667,14 @@ vm_fault(map, vaddr, fault_type, change_wiring) VM_WAIT; goto RetryFault; } - - if (copy_object->pager != NULL) { + if (copy_object->pager != NULL) { vm_object_unlock(object); vm_object_unlock(copy_object); UNLOCK_MAP; page_exists = vm_pager_has_page( - copy_object->pager, - (copy_offset + copy_object->paging_offset)); + copy_object->pager, + (copy_offset + copy_object->paging_offset)); vm_object_lock(copy_object); @@ -730,12 +691,12 @@ vm_fault(map, vaddr, fault_type, change_wiring) if (copy_object->shadow != object || copy_object->ref_count == 1) { /* - * Gaah... start over! + * Gaah... start over! */ FREE_PAGE(copy_m); vm_object_unlock(copy_object); vm_object_deallocate(copy_object); - /* may block */ + /* may block */ vm_object_lock(object); goto RetryCopy; } @@ -743,7 +704,7 @@ vm_fault(map, vaddr, fault_type, change_wiring) if (page_exists) { /* - * We didn't need the page + * We didn't need the page */ FREE_PAGE(copy_m); } @@ -751,121 +712,108 @@ vm_fault(map, vaddr, fault_type, change_wiring) } if (!page_exists) { /* - * Must copy page into copy-object. + * Must copy page into copy-object. */ vm_page_copy(m, copy_m); - copy_m->flags &= ~PG_FAKE; + copy_m->valid = VM_PAGE_BITS_ALL; /* - * Things to remember: - * 1. The copied page must be marked 'dirty' - * so it will be paged out to the copy - * object. - * 2. If the old page was in use by any users - * of the copy-object, it must be removed - * from all pmaps. (We can't know which - * pmaps use it.) + * Things to remember: 1. The copied page must + * be marked 'dirty' so it will be paged out + * to the copy object. 2. If the old page was + * in use by any users of the copy-object, it + * must be removed from all pmaps. (We can't + * know which pmaps use it.) */ vm_page_lock_queues(); vm_page_activate(old_m); - pmap_page_protect(VM_PAGE_TO_PHYS(old_m), - VM_PROT_NONE); - if ((old_m->flags & PG_CLEAN) == 0) - old_m->flags |= PG_LAUNDRY; - copy_m->flags &= ~PG_CLEAN; + VM_PROT_NONE); + copy_m->dirty = VM_PAGE_BITS_ALL; vm_page_activate(copy_m); vm_page_unlock_queues(); PAGE_WAKEUP(copy_m); } /* - * The reference count on copy_object must be - * at least 2: one for our extra reference, - * and at least one from the outside world - * (we checked that when we last locked - * copy_object). + * The reference count on copy_object must be at least + * 2: one for our extra reference, and at least one + * from the outside world (we checked that when we + * last locked copy_object). */ copy_object->ref_count--; vm_object_unlock(copy_object); m->flags &= ~PG_COPYONWRITE; } } - - if (m->flags & (PG_ACTIVE | PG_INACTIVE)) + if (m->flags & (PG_ACTIVE | PG_INACTIVE | PG_CACHE)) panic("vm_fault: active or inactive before retrying lookup"); /* - * We must verify that the maps have not changed - * since our last lookup. + * We must verify that the maps have not changed since our last + * lookup. */ if (!lookup_still_valid) { - vm_object_t retry_object; - vm_offset_t retry_offset; - vm_prot_t retry_prot; + vm_object_t retry_object; + vm_offset_t retry_offset; + vm_prot_t retry_prot; /* - * Since map entries may be pageable, make sure we can - * take a page fault on them. + * Since map entries may be pageable, make sure we can take a + * page fault on them. */ vm_object_unlock(object); /* - * To avoid trying to write_lock the map while another - * thread has it read_locked (in vm_map_pageable), we - * do not try for write permission. If the page is - * still writable, we will get write permission. If it - * is not, or has been marked needs_copy, we enter the - * mapping without write permission, and will merely - * take another fault. + * To avoid trying to write_lock the map while another thread + * has it read_locked (in vm_map_pageable), we do not try for + * write permission. If the page is still writable, we will + * get write permission. If it is not, or has been marked + * needs_copy, we enter the mapping without write permission, + * and will merely take another fault. */ - result = vm_map_lookup(&map, vaddr, - fault_type & ~VM_PROT_WRITE, &entry, - &retry_object, &retry_offset, &retry_prot, - &wired, &su); + result = vm_map_lookup(&map, vaddr, fault_type & ~VM_PROT_WRITE, + &entry, &retry_object, &retry_offset, &retry_prot, &wired, &su); vm_object_lock(object); /* - * If we don't need the page any longer, put it on the - * active list (the easiest thing to do here). If no - * one needs it, pageout will grab it eventually. + * If we don't need the page any longer, put it on the active + * list (the easiest thing to do here). If no one needs it, + * pageout will grab it eventually. */ if (result != KERN_SUCCESS) { RELEASE_PAGE(m); UNLOCK_AND_DEALLOCATE; - return(result); + return (result); } - lookup_still_valid = TRUE; if ((retry_object != first_object) || - (retry_offset != first_offset)) { + (retry_offset != first_offset)) { RELEASE_PAGE(m); UNLOCK_AND_DEALLOCATE; goto RetryFault; } - /* - * Check whether the protection has changed or the object - * has been copied while we left the map unlocked. - * Changing from read to write permission is OK - we leave - * the page write-protected, and catch the write fault. - * Changing from write to read permission means that we - * can't mark the page write-enabled after all. + * Check whether the protection has changed or the object has + * been copied while we left the map unlocked. Changing from + * read to write permission is OK - we leave the page + * write-protected, and catch the write fault. Changing from + * write to read permission means that we can't mark the page + * write-enabled after all. */ prot &= retry_prot; if (m->flags & PG_COPYONWRITE) prot &= ~VM_PROT_WRITE; } - /* - * (the various bits we're fiddling with here are locked by - * the object's lock) + * (the various bits we're fiddling with here are locked by the + * object's lock) */ /* XXX This distorts the meaning of the copy_on_write bit */ @@ -874,28 +822,27 @@ vm_fault(map, vaddr, fault_type, change_wiring) m->flags &= ~PG_COPYONWRITE; /* - * It's critically important that a wired-down page be faulted - * only once in each map for which it is wired. + * It's critically important that a wired-down page be faulted only + * once in each map for which it is wired. */ - if (m->flags & (PG_ACTIVE | PG_INACTIVE)) + if (m->flags & (PG_ACTIVE | PG_INACTIVE | PG_CACHE)) panic("vm_fault: active or inactive before pmap_enter"); vm_object_unlock(object); /* - * Put this page into the physical map. - * We had to do the unlock above because pmap_enter - * may cause other faults. We don't put the - * page back on the active queue until later so - * that the page-out daemon won't find us (yet). + * Put this page into the physical map. We had to do the unlock above + * because pmap_enter may cause other faults. We don't put the page + * back on the active queue until later so that the page-out daemon + * won't find us (yet). */ pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired); /* - * If the page is not wired down, then put it where the - * pageout daemon can find it. + * If the page is not wired down, then put it where the pageout daemon + * can find it. */ vm_object_lock(object); vm_page_lock_queues(); @@ -904,29 +851,27 @@ vm_fault(map, vaddr, fault_type, change_wiring) vm_page_wire(m); else vm_page_unwire(m); - } - else { + } else { vm_page_activate(m); } - if( curproc && curproc->p_stats) { + if (curproc && curproc->p_stats) { if (hardfault) { curproc->p_stats->p_ru.ru_majflt++; } else { curproc->p_stats->p_ru.ru_minflt++; } } - vm_page_unlock_queues(); /* - * Unlock everything, and return + * Unlock everything, and return */ PAGE_WAKEUP(m); UNLOCK_AND_DEALLOCATE; - return(KERN_SUCCESS); + return (KERN_SUCCESS); } @@ -937,27 +882,26 @@ vm_fault(map, vaddr, fault_type, change_wiring) */ int vm_fault_wire(map, start, end) - vm_map_t map; - vm_offset_t start, end; + vm_map_t map; + vm_offset_t start, end; { - register vm_offset_t va; - register pmap_t pmap; + register vm_offset_t va; + register pmap_t pmap; int rv; pmap = vm_map_pmap(map); /* - * Inform the physical mapping system that the - * range of addresses may not fault, so that - * page tables and such can be locked down as well. + * Inform the physical mapping system that the range of addresses may + * not fault, so that page tables and such can be locked down as well. */ pmap_pageable(pmap, start, end, FALSE); /* - * We simulate a fault to get the page and enter it - * in the physical map. + * We simulate a fault to get the page and enter it in the physical + * map. */ for (va = start; va < end; va += PAGE_SIZE) { @@ -965,10 +909,10 @@ vm_fault_wire(map, start, end) if (rv) { if (va != start) vm_fault_unwire(map, start, va); - return(rv); + return (rv); } } - return(KERN_SUCCESS); + return (KERN_SUCCESS); } @@ -979,18 +923,18 @@ vm_fault_wire(map, start, end) */ void vm_fault_unwire(map, start, end) - vm_map_t map; - vm_offset_t start, end; + vm_map_t map; + vm_offset_t start, end; { - register vm_offset_t va, pa; - register pmap_t pmap; + register vm_offset_t va, pa; + register pmap_t pmap; pmap = vm_map_pmap(map); /* - * Since the pages are wired down, we must be able to - * get their mappings from the physical map system. + * Since the pages are wired down, we must be able to get their + * mappings from the physical map system. */ vm_page_lock_queues(); @@ -1006,9 +950,8 @@ vm_fault_unwire(map, start, end) vm_page_unlock_queues(); /* - * Inform the physical mapping system that the range - * of addresses may fault, so that page tables and - * such may be unwired themselves. + * Inform the physical mapping system that the range of addresses may + * fault, so that page tables and such may be unwired themselves. */ pmap_pageable(pmap, start, end, TRUE); @@ -1029,55 +972,54 @@ vm_fault_unwire(map, start, end) void vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) - vm_map_t dst_map; - vm_map_t src_map; - vm_map_entry_t dst_entry; - vm_map_entry_t src_entry; + vm_map_t dst_map; + vm_map_t src_map; + vm_map_entry_t dst_entry; + vm_map_entry_t src_entry; { - vm_object_t dst_object; - vm_object_t src_object; - vm_offset_t dst_offset; - vm_offset_t src_offset; - vm_prot_t prot; - vm_offset_t vaddr; - vm_page_t dst_m; - vm_page_t src_m; + vm_object_t dst_object; + vm_object_t src_object; + vm_offset_t dst_offset; + vm_offset_t src_offset; + vm_prot_t prot; + vm_offset_t vaddr; + vm_page_t dst_m; + vm_page_t src_m; #ifdef lint src_map++; -#endif lint +#endif /* lint */ src_object = src_entry->object.vm_object; src_offset = src_entry->offset; /* - * Create the top-level object for the destination entry. - * (Doesn't actually shadow anything - we copy the pages - * directly.) + * Create the top-level object for the destination entry. (Doesn't + * actually shadow anything - we copy the pages directly.) */ dst_object = vm_object_allocate( - (vm_size_t) (dst_entry->end - dst_entry->start)); + (vm_size_t) (dst_entry->end - dst_entry->start)); dst_entry->object.vm_object = dst_object; dst_entry->offset = 0; - prot = dst_entry->max_protection; + prot = dst_entry->max_protection; /* - * Loop through all of the pages in the entry's range, copying - * each one from the source object (it should be there) to the - * destination object. + * Loop through all of the pages in the entry's range, copying each + * one from the source object (it should be there) to the destination + * object. */ for (vaddr = dst_entry->start, dst_offset = 0; - vaddr < dst_entry->end; - vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) { + vaddr < dst_entry->end; + vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) { /* - * Allocate a page in the destination object + * Allocate a page in the destination object */ vm_object_lock(dst_object); do { - dst_m = vm_page_alloc(dst_object, dst_offset); + dst_m = vm_page_alloc(dst_object, dst_offset, 0); if (dst_m == NULL) { vm_object_unlock(dst_object); VM_WAIT; @@ -1086,9 +1028,9 @@ vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) } while (dst_m == NULL); /* - * Find the page in the source object, and copy it in. - * (Because the source is wired down, the page will be - * in memory.) + * Find the page in the source object, and copy it in. + * (Because the source is wired down, the page will be in + * memory.) */ vm_object_lock(src_object); src_m = vm_page_lookup(src_object, dst_offset + src_offset); @@ -1098,16 +1040,16 @@ vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) vm_page_copy(src_m, dst_m); /* - * Enter it in the pmap... + * Enter it in the pmap... */ vm_object_unlock(src_object); vm_object_unlock(dst_object); pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m), - prot, FALSE); + prot, FALSE); /* - * Mark it no longer busy, and put it on the active list. + * Mark it no longer busy, and put it on the active list. */ vm_object_lock(dst_object); vm_page_lock_queues(); @@ -1122,7 +1064,7 @@ vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) /* * looks page up in shadow chain */ - + int vm_fault_page_lookup(object, offset, rtobject, rtoffset, rtm) vm_object_t object; @@ -1137,16 +1079,14 @@ vm_fault_page_lookup(object, offset, rtobject, rtoffset, rtm) *rtobject = 0; *rtoffset = 0; - - while (!(m=vm_page_lookup(object, offset))) { + while (!(m = vm_page_lookup(object, offset))) { if (object->pager) { - if (vm_pager_has_page(object->pager, object->paging_offset+offset)) { + if (vm_pager_has_page(object->pager, object->paging_offset + offset)) { *rtobject = object; *rtoffset = offset; return 1; } } - if (!object->shadow) return 0; else { @@ -1202,36 +1142,19 @@ vm_fault_additional_pages(first_object, first_offset, m, rbehind, raheada, marra * if the requested page is not available, then give up now */ - if (!vm_pager_has_page(object->pager, object->paging_offset+offset)) + if (!vm_pager_has_page(object->pager, object->paging_offset + offset)) return 0; /* - * if there is no getmulti routine for this pager, then just allow - * one page to be read. - */ -/* - if (!object->pager->pg_ops->pgo_getpages) { - *reqpage = 0; - marray[0] = m; - return 1; - } -*/ - - /* * try to do any readahead that we might have free pages for. */ rahead = raheada; - if (rahead > (cnt.v_free_count - cnt.v_free_reserved)) { - rahead = cnt.v_free_count - cnt.v_free_reserved; - rbehind = 0; + if ((rahead + rbehind) > ((cnt.v_free_count + cnt.v_cache_count) - cnt.v_free_reserved)) { + rahead = ((cnt.v_free_count + cnt.v_cache_count) - cnt.v_free_reserved) / 2; + rbehind = rahead; + if (!rahead) + wakeup((caddr_t) & vm_pages_needed); } - - if (cnt.v_free_count < cnt.v_free_min) { - if (rahead > VM_FAULT_READ_AHEAD_MIN) - rahead = VM_FAULT_READ_AHEAD_MIN; - rbehind = 0; - } - /* * if we don't have any free pages, then just read one page. */ @@ -1240,23 +1163,22 @@ vm_fault_additional_pages(first_object, first_offset, m, rbehind, raheada, marra marray[0] = m; return 1; } - /* - * scan backward for the read behind pages -- - * in memory or on disk not in same object + * scan backward for the read behind pages -- in memory or on disk not + * in same object */ toffset = offset - NBPG; - if( toffset < offset) { - if( rbehind*NBPG > offset) + if (toffset < offset) { + if (rbehind * NBPG > offset) rbehind = offset / NBPG; - startoffset = offset - rbehind*NBPG; + startoffset = offset - rbehind * NBPG; while (toffset >= startoffset) { if (!vm_fault_page_lookup(first_object, toffset - offsetdiff, &rtobject, &rtoffset, &rtm) || rtm != 0 || rtobject != object) { startoffset = toffset + NBPG; break; } - if( toffset == 0) + if (toffset == 0) break; toffset -= NBPG; } @@ -1265,11 +1187,11 @@ vm_fault_additional_pages(first_object, first_offset, m, rbehind, raheada, marra } /* - * scan forward for the read ahead pages -- - * in memory or on disk not in same object + * scan forward for the read ahead pages -- in memory or on disk not + * in same object */ toffset = offset + NBPG; - endoffset = offset + (rahead+1)*NBPG; + endoffset = offset + (rahead + 1) * NBPG; while (toffset < object->size && toffset < endoffset) { if (!vm_fault_page_lookup(first_object, toffset - offsetdiff, &rtobject, &rtoffset, &rtm) || rtm != 0 || rtobject != object) { @@ -1284,16 +1206,16 @@ vm_fault_additional_pages(first_object, first_offset, m, rbehind, raheada, marra /* calculate the page offset of the required page */ treqpage = (offset - startoffset) / NBPG; - + /* see if we have space (again) */ - if (cnt.v_free_count >= cnt.v_free_reserved + size) { + if ((cnt.v_free_count + cnt.v_cache_count) > (cnt.v_free_reserved + size)) { bzero(marray, (rahead + rbehind + 1) * sizeof(vm_page_t)); /* * get our pages and don't block for them */ for (i = 0; i < size; i++) { if (i != treqpage) - rtm = vm_page_alloc(object, startoffset + i * NBPG); + rtm = vm_page_alloc(object, startoffset + i * NBPG, 0); else rtm = m; marray[i] = rtm; @@ -1305,8 +1227,8 @@ vm_fault_additional_pages(first_object, first_offset, m, rbehind, raheada, marra } /* - * if we could not get our block of pages, then - * free the readahead/readbehind pages. + * if we could not get our block of pages, then free the + * readahead/readbehind pages. */ if (i < size) { for (i = 0; i < size; i++) { @@ -1316,8 +1238,7 @@ vm_fault_additional_pages(first_object, first_offset, m, rbehind, raheada, marra *reqpage = 0; marray[0] = m; return 1; - } - + } *reqpage = treqpage; return size; } @@ -1325,4 +1246,3 @@ vm_fault_additional_pages(first_object, first_offset, m, rbehind, raheada, marra marray[0] = m; return 1; } - diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index dd8eeaaf6bda..b31e412059c0 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -38,17 +38,17 @@ * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -59,7 +59,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_glue.c,v 1.9 1994/11/13 12:47:07 davidg Exp $ + * $Id: vm_glue.c,v 1.10 1994/12/18 06:31:31 davidg Exp $ */ #include <sys/param.h> @@ -81,8 +81,9 @@ #include <machine/cpu.h> extern char kstack[]; -int avefree = 0; /* XXX */ -int readbuffers = 0; /* XXX allow kgdb to read kernel buffer pool */ +int avefree = 0; /* XXX */ +int readbuffers = 0; /* XXX allow kgdb to read kernel buffer pool */ + /* vm_map_t upages_map; */ int @@ -95,9 +96,9 @@ kernacc(addr, len, rw) vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; saddr = trunc_page(addr); - eaddr = round_page(addr+len); + eaddr = round_page(addr + len); rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot); - return(rv == TRUE); + return (rv == TRUE); } int @@ -111,20 +112,19 @@ useracc(addr, len, rw) /* * XXX - check separately to disallow access to user area and user * page tables - they are in the map. - * - * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. It was - * once only used (as an end address) in trap.c. Use it as an end - * address here too. This bogusness has spread. I just fixed - * where it was used as a max in vm_mmap.c. + * + * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. It was once + * only used (as an end address) in trap.c. Use it as an end address + * here too. This bogusness has spread. I just fixed where it was + * used as a max in vm_mmap.c. */ if ((vm_offset_t) addr + len > /* XXX */ VM_MAXUSER_ADDRESS || (vm_offset_t) addr + len < (vm_offset_t) addr) { return (FALSE); } - rv = vm_map_check_protection(&curproc->p_vmspace->vm_map, - trunc_page(addr), round_page(addr+len), prot); - return(rv == TRUE); + trunc_page(addr), round_page(addr + len), prot); + return (rv == TRUE); } #ifdef KGDB @@ -140,29 +140,29 @@ chgkprot(addr, len, rw) vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; vm_map_protect(kernel_map, trunc_page(addr), - round_page(addr+len), prot, FALSE); + round_page(addr + len), prot, FALSE); } #endif void vslock(addr, len) - caddr_t addr; - u_int len; + caddr_t addr; + u_int len; { vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr), - round_page(addr+len), FALSE); + round_page(addr + len), FALSE); } void vsunlock(addr, len, dirtied) - caddr_t addr; - u_int len; + caddr_t addr; + u_int len; int dirtied; { #ifdef lint dirtied++; -#endif lint - vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr), - round_page(addr+len), TRUE); +#endif /* lint */ + vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr), + round_page(addr + len), TRUE); } /* @@ -186,15 +186,17 @@ vm_fork(p1, p2, isvfork) int i; struct vm_map *vp; - while( cnt.v_free_count < cnt.v_free_min) + while ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) { VM_WAIT; + } /* * avoid copying any of the parent's pagetables or other per-process - * objects that reside in the map by marking all of them non-inheritable + * objects that reside in the map by marking all of them + * non-inheritable */ - (void)vm_map_inherit(&p1->p_vmspace->vm_map, - UPT_MIN_ADDRESS - UPAGES * NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE); + (void) vm_map_inherit(&p1->p_vmspace->vm_map, + UPT_MIN_ADDRESS - UPAGES * NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE); p2->p_vmspace = vmspace_fork(p1->p_vmspace); #ifdef SYSVSHM @@ -203,7 +205,8 @@ vm_fork(p1, p2, isvfork) #endif /* - * Allocate a wired-down (for now) pcb and kernel stack for the process + * Allocate a wired-down (for now) pcb and kernel stack for the + * process */ addr = (vm_offset_t) kstack; @@ -211,56 +214,57 @@ vm_fork(p1, p2, isvfork) vp = &p2->p_vmspace->vm_map; /* ream out old pagetables and kernel stack */ - (void)vm_deallocate(vp, addr, UPT_MAX_ADDRESS - addr); + (void) vm_deallocate(vp, addr, UPT_MAX_ADDRESS - addr); /* get new pagetables and kernel stack */ - (void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE); + (void) vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE); /* force in the page table encompassing the UPAGES */ - ptaddr = trunc_page((u_int)vtopte(addr)); + ptaddr = trunc_page((u_int) vtopte(addr)); vm_map_pageable(vp, ptaddr, ptaddr + NBPG, FALSE); /* and force in (demand-zero) the UPAGES */ vm_map_pageable(vp, addr, addr + UPAGES * NBPG, FALSE); /* get a kernel virtual address for the UPAGES for this proc */ - up = (struct user *)kmem_alloc_pageable(kernel_map, UPAGES * NBPG); + up = (struct user *) kmem_alloc_pageable(u_map, UPAGES * NBPG); /* and force-map the upages into the kernel pmap */ for (i = 0; i < UPAGES; i++) - pmap_enter(vm_map_pmap(kernel_map), - ((vm_offset_t) up) + NBPG * i, - pmap_extract(vp->pmap, addr + NBPG * i), - VM_PROT_READ|VM_PROT_WRITE, 1); + pmap_enter(vm_map_pmap(u_map), + ((vm_offset_t) up) + NBPG * i, + pmap_extract(vp->pmap, addr + NBPG * i), + VM_PROT_READ | VM_PROT_WRITE, 1); - /* and allow the UPAGES page table entry to be paged (at the vm system level) */ + /* + * and allow the UPAGES page table entry to be paged (at the vm system + * level) + */ vm_map_pageable(vp, ptaddr, ptaddr + NBPG, TRUE); p2->p_addr = up; /* - * p_stats and p_sigacts currently point at fields - * in the user struct but not at &u, instead at p_addr. - * Copy p_sigacts and parts of p_stats; zero the rest - * of p_stats (statistics). + * p_stats and p_sigacts currently point at fields in the user struct + * but not at &u, instead at p_addr. Copy p_sigacts and parts of + * p_stats; zero the rest of p_stats (statistics). */ p2->p_stats = &up->u_stats; p2->p_sigacts = &up->u_sigacts; up->u_sigacts = *p1->p_sigacts; bzero(&up->u_stats.pstat_startzero, - (unsigned) ((caddr_t)&up->u_stats.pstat_endzero - - (caddr_t)&up->u_stats.pstat_startzero)); + (unsigned) ((caddr_t) & up->u_stats.pstat_endzero - + (caddr_t) & up->u_stats.pstat_startzero)); bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy, - ((caddr_t)&up->u_stats.pstat_endcopy - - (caddr_t)&up->u_stats.pstat_startcopy)); + ((caddr_t) & up->u_stats.pstat_endcopy - + (caddr_t) & up->u_stats.pstat_startcopy)); + - /* - * cpu_fork will copy and update the kernel stack and pcb, - * and make the child ready to run. It marks the child - * so that it can return differently than the parent. - * It returns twice, once in the parent process and - * once in the child. + * cpu_fork will copy and update the kernel stack and pcb, and make + * the child ready to run. It marks the child so that it can return + * differently than the parent. It returns twice, once in the parent + * process and once in the child. */ return (cpu_fork(p1, p2)); } @@ -276,27 +280,26 @@ vm_init_limits(p) int rss_limit; /* - * Set up the initial limits on process VM. - * Set the maximum resident set size to be half - * of (reasonably) available memory. Since this - * is a soft limit, it comes into effect only - * when the system is out of memory - half of - * main memory helps to favor smaller processes, + * Set up the initial limits on process VM. Set the maximum resident + * set size to be half of (reasonably) available memory. Since this + * is a soft limit, it comes into effect only when the system is out + * of memory - half of main memory helps to favor smaller processes, * and reduces thrashing of the object cache. */ - p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ; - p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ; - p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ; - p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ; - /* limit the limit to no less than 2MB */ + p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ; + p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ; + p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ; + p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ; + /* limit the limit to no less than 2MB */ rss_limit = max(cnt.v_free_count / 2, 512); p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(rss_limit); p->p_rlimit[RLIMIT_RSS].rlim_max = RLIM_INFINITY; } #ifdef DEBUG -int enableswap = 1; -int swapdebug = 0; +int enableswap = 1; +int swapdebug = 0; + #define SDB_FOLLOW 1 #define SDB_SWAPIN 2 #define SDB_SWAPOUT 4 @@ -304,7 +307,7 @@ int swapdebug = 0; void faultin(p) -struct proc *p; + struct proc *p; { vm_offset_t i; vm_offset_t ptaddr; @@ -317,22 +320,23 @@ struct proc *p; map = &p->p_vmspace->vm_map; /* force the page table encompassing the kernel stack (upages) */ - ptaddr = trunc_page((u_int)vtopte(kstack)); + ptaddr = trunc_page((u_int) vtopte(kstack)); vm_map_pageable(map, ptaddr, ptaddr + NBPG, FALSE); /* wire in the UPAGES */ vm_map_pageable(map, (vm_offset_t) kstack, - (vm_offset_t) kstack + UPAGES * NBPG, FALSE); + (vm_offset_t) kstack + UPAGES * NBPG, FALSE); /* and map them nicely into the kernel pmap */ for (i = 0; i < UPAGES; i++) { vm_offset_t off = i * NBPG; vm_offset_t pa = (vm_offset_t) - pmap_extract(&p->p_vmspace->vm_pmap, - (vm_offset_t) kstack + off); - pmap_enter(vm_map_pmap(kernel_map), - ((vm_offset_t)p->p_addr) + off, - pa, VM_PROT_READ|VM_PROT_WRITE, 1); + pmap_extract(&p->p_vmspace->vm_pmap, + (vm_offset_t) kstack + off); + + pmap_enter(vm_map_pmap(u_map), + ((vm_offset_t) p->p_addr) + off, + pa, VM_PROT_READ | VM_PROT_WRITE, 1); } /* and let the page table pages go (at least above pmap level) */ @@ -343,18 +347,15 @@ struct proc *p; if (p->p_stat == SRUN) setrunqueue(p); - p->p_flag |= P_INMEM; + p->p_flag |= P_INMEM; /* undo the effect of setting SLOCK above */ --p->p_lock; splx(s); } - } - -int swapinreq; -int percentactive; + /* * This swapin algorithm attempts to swap-in processes only if there * is enough space for them. Of course, if a process waits for a long @@ -367,95 +368,45 @@ scheduler() register int pri; struct proc *pp; int ppri; - int lastidle, lastrun; - int curidle, currun; - int forceload; - int percent; - int ntries; - - lastidle = 0; - lastrun = 0; loop: - ntries = 0; - - curidle = cp_time[CP_IDLE]; - currun = cp_time[CP_USER] + cp_time[CP_SYS] + cp_time[CP_NICE]; - percent = (100*(currun-lastrun)) / ( 1 + (currun-lastrun) + (curidle-lastidle)); - lastrun = currun; - lastidle = curidle; - if( percent > 100) - percent = 100; - percentactive = percent; - - if( percentactive < 25) - forceload = 1; - else - forceload = 0; - -loop1: + while ((cnt.v_free_count + cnt.v_cache_count) < (cnt.v_free_reserved + UPAGES + 2)) { + VM_WAIT; + tsleep((caddr_t) & proc0, PVM, "schedm", 0); + } + pp = NULL; ppri = INT_MIN; - for (p = (struct proc *)allproc; p != NULL; p = p->p_next) { - if (p->p_stat == SRUN && (p->p_flag & (P_INMEM|P_SWAPPING)) == 0) { + for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { + if (p->p_stat == SRUN && (p->p_flag & (P_INMEM | P_SWAPPING)) == 0) { int mempri; + pri = p->p_swtime + p->p_slptime - p->p_nice * 8; mempri = pri > 0 ? pri : 0; - /* + /* * if this process is higher priority and there is - * enough space, then select this process instead - * of the previous selection. + * enough space, then select this process instead of + * the previous selection. */ - if (pri > ppri && - (((cnt.v_free_count + (mempri * (4*PAGE_SIZE) / PAGE_SIZE) >= (p->p_vmspace->vm_swrss)) || (ntries > 0 && forceload)))) { + if (pri > ppri) { pp = p; ppri = pri; } } } - if ((pp == NULL) && (ntries == 0) && forceload) { - ++ntries; - goto loop1; - } - /* * Nothing to do, back to sleep */ if ((p = pp) == NULL) { - tsleep((caddr_t)&proc0, PVM, "sched", 0); + tsleep((caddr_t) & proc0, PVM, "sched", 0); goto loop; } - /* * We would like to bring someone in. (only if there is space). */ -/* - printf("swapin: %d, free: %d, res: %d, min: %d\n", - p->p_pid, cnt.v_free_count, cnt.v_free_reserved, cnt.v_free_min); -*/ - (void) splhigh(); - if ((forceload && (cnt.v_free_count > (cnt.v_free_reserved + UPAGES + 1))) || - (cnt.v_free_count >= cnt.v_free_min)) { - spl0(); - faultin(p); - p->p_swtime = 0; - goto loop; - } - /* - * log the memory shortage - */ - swapinreq += p->p_vmspace->vm_swrss; - /* - * Not enough memory, jab the pageout daemon and wait til the - * coast is clear. - */ - if( cnt.v_free_count < cnt.v_free_min) { - VM_WAIT; - } else { - tsleep((caddr_t)&proc0, PVM, "sched", 0); - } - (void) spl0(); + faultin(p); + p->p_swtime = 0; goto loop; } @@ -464,6 +415,7 @@ loop1: ((p)->p_flag & (P_TRACED|P_NOSWAP|P_SYSTEM|P_INMEM|P_WEXIT|P_PHYSIO|P_SWAPPING)) == P_INMEM) extern int vm_pageout_free_min; + /* * Swapout is driven by the pageout daemon. Very simple, we find eligible * procs and unwire their u-areas. We try to always "swap" at least one @@ -480,98 +432,58 @@ swapout_threads() int outpri, outpri2; int tpri; int didswap = 0; - int swapneeded = swapinreq; extern int maxslp; - int runnablenow; - runnablenow = 0; outp = outp2 = NULL; outpri = outpri2 = INT_MIN; - for (p = (struct proc *)allproc; p != NULL; p = p->p_next) { +retry: + for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { if (!swappable(p)) continue; switch (p->p_stat) { - case SRUN: - ++runnablenow; - /* - * count the process as being in a runnable state - */ - if ((tpri = p->p_swtime + p->p_nice * 8) > outpri2) { - outp2 = p; - outpri2 = tpri; - } + default: continue; - + case SSLEEP: case SSTOP: /* - * do not swapout a realtime process - */ - if (p->p_rtprio.type == RTP_PRIO_REALTIME) - continue; + * do not swapout a realtime process + */ + if (p->p_rtprio.type == RTP_PRIO_REALTIME) + continue; + + /* + * do not swapout a process waiting on a critical + * event of some kind + */ + if ((p->p_priority & 0x7f) < PSOCK) + continue; /* - * do not swapout a process that is waiting for VM datastructures - * there is a possible deadlock. + * do not swapout a process that is waiting for VM + * datastructures there is a possible deadlock. */ - if (!lock_try_write( &p->p_vmspace->vm_map.lock)) { + if (!lock_try_write(&p->p_vmspace->vm_map.lock)) { continue; } - vm_map_unlock( &p->p_vmspace->vm_map); + vm_map_unlock(&p->p_vmspace->vm_map); /* - * If the process has been asleep for awhile and had most - * of its pages taken away already, swap it out. + * If the process has been asleep for awhile and had + * most of its pages taken away already, swap it out. */ if (p->p_slptime > maxslp) { swapout(p); didswap++; - } else if ((tpri = p->p_slptime + p->p_nice * 8) > outpri && - (p->p_vmspace->vm_pmap.pm_stats.resident_count <= 6)) { - outp = p; - outpri = tpri ; + goto retry; } - continue; } } /* - * We swapout only if there are more than two runnable processes or if - * another process needs some space to swapin. - */ - if ((swapinreq || ((percentactive > 90) && (runnablenow > 2))) && - (((cnt.v_free_count + cnt.v_inactive_count) <= (cnt.v_free_target + cnt.v_inactive_target)) || - (cnt.v_free_count < cnt.v_free_min))) { - if ((p = outp) == 0) { - p = outp2; - } - - /* - * Only swapout processes that have already had most - * of their pages taken away. - */ - if (p && (p->p_vmspace->vm_pmap.pm_stats.resident_count <= 6)) { - swapout(p); - didswap = 1; - } - } - - /* - * if we previously had found a process to swapout, and we need to swapout - * more then try again. - */ -#if 0 - if( p && swapinreq) - goto swapmore; -#endif - - /* * If we swapped something out, and another process needed memory, * then wakeup the sched process. */ - if (didswap) { - if (swapneeded) - wakeup((caddr_t)&proc0); - swapinreq = 0; - } + if (didswap) + wakeup((caddr_t) & proc0); } void @@ -585,11 +497,7 @@ swapout(p) * remember the process resident count */ p->p_vmspace->vm_swrss = - p->p_vmspace->vm_pmap.pm_stats.resident_count; - /* - * and decrement the amount of needed space - */ - swapinreq -= min(swapinreq, p->p_vmspace->vm_pmap.pm_stats.resident_count); + p->p_vmspace->vm_pmap.pm_stats.resident_count; (void) splhigh(); p->p_flag &= ~P_INMEM; @@ -598,12 +506,14 @@ swapout(p) (void) spl0(); p->p_flag |= P_SWAPPING; -/* let the upages be paged */ - pmap_remove(vm_map_pmap(kernel_map), - (vm_offset_t) p->p_addr, ((vm_offset_t) p->p_addr) + UPAGES * NBPG); + /* + * let the upages be paged + */ + pmap_remove(vm_map_pmap(u_map), + (vm_offset_t) p->p_addr, ((vm_offset_t) p->p_addr) + UPAGES * NBPG); vm_map_pageable(map, (vm_offset_t) kstack, - (vm_offset_t) kstack + UPAGES * NBPG, TRUE); + (vm_offset_t) kstack + UPAGES * NBPG, TRUE); p->p_flag &= ~P_SWAPPING; p->p_swtime = 0; @@ -630,7 +540,7 @@ void thread_block(char *msg) { if (curproc->p_thread) - tsleep((caddr_t)curproc->p_thread, PVM, msg, 0); + tsleep((caddr_t) curproc->p_thread, PVM, msg, 0); } @@ -644,7 +554,7 @@ thread_sleep_(event, lock, wmesg) curproc->p_thread = event; simple_unlock(lock); if (curproc->p_thread) { - tsleep((caddr_t)event, PVM, wmesg, 0); + tsleep((caddr_t) event, PVM, wmesg, 0); } } @@ -653,7 +563,7 @@ void thread_wakeup(event) int event; { - wakeup((caddr_t)event); + wakeup((caddr_t) event); } #endif @@ -663,16 +573,17 @@ thread_wakeup(event) int indent = 0; -#include <machine/stdarg.h> /* see subr_prf.c */ +#include <machine/stdarg.h> /* see subr_prf.c */ /*ARGSUSED2*/ void #if __STDC__ -iprintf(const char *fmt, ...) +iprintf(const char *fmt,...) #else -iprintf(fmt /* , va_alist */) +iprintf(fmt /* , va_alist */ ) char *fmt; - /* va_dcl */ + + /* va_dcl */ #endif { register int i; diff --git a/sys/vm/vm_inherit.h b/sys/vm/vm_inherit.h index b4691b4ac562..ee212e19bdc5 100644 --- a/sys/vm/vm_inherit.h +++ b/sys/vm/vm_inherit.h @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -40,17 +40,17 @@ * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id$ + * $Id: vm_inherit.h,v 1.2 1994/08/02 07:55:20 davidg Exp $ */ /* @@ -82,4 +82,4 @@ #define VM_INHERIT_DEFAULT VM_INHERIT_COPY -#endif /* _VM_INHERIT_ */ +#endif /* _VM_INHERIT_ */ diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c index 34ae5735790d..6406c1ed645c 100644 --- a/sys/vm/vm_init.c +++ b/sys/vm/vm_init.c @@ -1,5 +1,4 @@ - -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -41,17 +40,17 @@ * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -62,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_init.c,v 1.3 1994/08/02 07:55:21 davidg Exp $ + * $Id: vm_init.c,v 1.4 1994/10/09 01:52:09 phk Exp $ */ /* @@ -86,13 +85,12 @@ void vm_mem_init() { - extern vm_offset_t avail_start, avail_end; - extern vm_offset_t virtual_avail, virtual_end; + extern vm_offset_t avail_start, avail_end; + extern vm_offset_t virtual_avail, virtual_end; /* - * Initializes resident memory structures. - * From here on, all physical memory is accounted for, - * and we use only virtual addresses. + * Initializes resident memory structures. From here on, all physical + * memory is accounted for, and we use only virtual addresses. */ vm_set_page_size(); diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 072c90685207..d59bbb8fd5f2 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -40,17 +40,17 @@ * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_kern.c,v 1.6 1994/08/07 14:53:26 davidg Exp $ + * $Id: vm_kern.c,v 1.7 1994/08/18 22:36:02 wollman Exp $ */ /* @@ -78,14 +78,16 @@ #include <vm/vm_pageout.h> #include <vm/vm_kern.h> -vm_map_t buffer_map; -vm_map_t kernel_map; -vm_map_t kmem_map; -vm_map_t mb_map; -vm_map_t io_map; -vm_map_t clean_map; -vm_map_t pager_map; -vm_map_t phys_map; +vm_map_t buffer_map; +vm_map_t kernel_map; +vm_map_t kmem_map; +vm_map_t mb_map; +vm_map_t io_map; +vm_map_t clean_map; +vm_map_t pager_map; +vm_map_t phys_map; +vm_map_t exec_map; +vm_map_t u_map; /* * kmem_alloc_pageable: @@ -94,12 +96,13 @@ vm_map_t phys_map; * map must be "kernel_map" below. */ -vm_offset_t kmem_alloc_pageable(map, size) - vm_map_t map; - register vm_size_t size; +vm_offset_t +kmem_alloc_pageable(map, size) + vm_map_t map; + register vm_size_t size; { - vm_offset_t addr; - register int result; + vm_offset_t addr; + register int result; #if 0 if (map != kernel_map) @@ -110,38 +113,37 @@ vm_offset_t kmem_alloc_pageable(map, size) addr = vm_map_min(map); result = vm_map_find(map, NULL, (vm_offset_t) 0, - &addr, size, TRUE); + &addr, size, TRUE); if (result != KERN_SUCCESS) { - return(0); + return (0); } - - return(addr); + return (addr); } /* * Allocate wired-down memory in the kernel's address map * or a submap. */ -vm_offset_t kmem_alloc(map, size) - register vm_map_t map; - register vm_size_t size; +vm_offset_t +kmem_alloc(map, size) + register vm_map_t map; + register vm_size_t size; { - vm_offset_t addr; - register vm_offset_t offset; - vm_offset_t i; + vm_offset_t addr; + register vm_offset_t offset; + vm_offset_t i; size = round_page(size); /* - * Use the kernel object for wired-down kernel pages. - * Assume that no region of the kernel object is - * referenced more than once. + * Use the kernel object for wired-down kernel pages. Assume that no + * region of the kernel object is referenced more than once. */ /* - * Locate sufficient space in the map. This will give us the - * final virtual address for the new memory, and thus will tell - * us the offset within the kernel map. + * Locate sufficient space in the map. This will give us the final + * virtual address for the new memory, and thus will tell us the + * offset within the kernel map. */ vm_map_lock(map); if (vm_map_findspace(map, 0, size, &addr)) { @@ -154,56 +156,50 @@ vm_offset_t kmem_alloc(map, size) vm_map_unlock(map); /* - * Guarantee that there are pages already in this object - * before calling vm_map_pageable. This is to prevent the - * following scenario: - * - * 1) Threads have swapped out, so that there is a - * pager for the kernel_object. - * 2) The kmsg zone is empty, and so we are kmem_allocing - * a new page for it. - * 3) vm_map_pageable calls vm_fault; there is no page, - * but there is a pager, so we call - * pager_data_request. But the kmsg zone is empty, - * so we must kmem_alloc. - * 4) goto 1 - * 5) Even if the kmsg zone is not empty: when we get - * the data back from the pager, it will be (very - * stale) non-zero data. kmem_alloc is defined to - * return zero-filled memory. - * - * We're intentionally not activating the pages we allocate - * to prevent a race with page-out. vm_map_pageable will wire - * the pages. + * Guarantee that there are pages already in this object before + * calling vm_map_pageable. This is to prevent the following + * scenario: + * + * 1) Threads have swapped out, so that there is a pager for the + * kernel_object. 2) The kmsg zone is empty, and so we are + * kmem_allocing a new page for it. 3) vm_map_pageable calls vm_fault; + * there is no page, but there is a pager, so we call + * pager_data_request. But the kmsg zone is empty, so we must + * kmem_alloc. 4) goto 1 5) Even if the kmsg zone is not empty: when + * we get the data back from the pager, it will be (very stale) + * non-zero data. kmem_alloc is defined to return zero-filled memory. + * + * We're intentionally not activating the pages we allocate to prevent a + * race with page-out. vm_map_pageable will wire the pages. */ vm_object_lock(kernel_object); - for (i = 0 ; i < size; i+= PAGE_SIZE) { - vm_page_t mem; + for (i = 0; i < size; i += PAGE_SIZE) { + vm_page_t mem; - while ((mem = vm_page_alloc(kernel_object, offset+i)) == NULL) { + while ((mem = vm_page_alloc(kernel_object, offset + i, 0)) == NULL) { vm_object_unlock(kernel_object); VM_WAIT; vm_object_lock(kernel_object); } vm_page_zero_fill(mem); mem->flags &= ~PG_BUSY; + mem->valid |= VM_PAGE_BITS_ALL; } vm_object_unlock(kernel_object); - + /* - * And finally, mark the data as non-pageable. + * And finally, mark the data as non-pageable. */ (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE); /* - * Try to coalesce the map + * Try to coalesce the map */ - vm_map_simplify(map, addr); - return(addr); + return (addr); } /* @@ -213,10 +209,11 @@ vm_offset_t kmem_alloc(map, size) * with kmem_alloc, and return the physical pages * associated with that region. */ -void kmem_free(map, addr, size) - vm_map_t map; - register vm_offset_t addr; - vm_size_t size; +void +kmem_free(map, addr, size) + vm_map_t map; + register vm_offset_t addr; + vm_size_t size; { (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size)); } @@ -234,20 +231,21 @@ void kmem_free(map, addr, size) * min, max Returned endpoints of map * pageable Can the region be paged */ -vm_map_t kmem_suballoc(parent, min, max, size, pageable) - register vm_map_t parent; - vm_offset_t *min, *max; - register vm_size_t size; - boolean_t pageable; +vm_map_t +kmem_suballoc(parent, min, max, size, pageable) + register vm_map_t parent; + vm_offset_t *min, *max; + register vm_size_t size; + boolean_t pageable; { - register int ret; - vm_map_t result; + register int ret; + vm_map_t result; size = round_page(size); *min = (vm_offset_t) vm_map_min(parent); ret = vm_map_find(parent, NULL, (vm_offset_t) 0, - min, size, TRUE); + min, size, TRUE); if (ret != KERN_SUCCESS) { printf("kmem_suballoc: bad status return of %d.\n", ret); panic("kmem_suballoc"); @@ -259,7 +257,7 @@ vm_map_t kmem_suballoc(parent, min, max, size, pageable) panic("kmem_suballoc: cannot create submap"); if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS) panic("kmem_suballoc: unable to change range to submap"); - return(result); + return (result); } /* @@ -280,14 +278,14 @@ vm_map_t kmem_suballoc(parent, min, max, size, pageable) */ vm_offset_t kmem_malloc(map, size, canwait) - register vm_map_t map; - register vm_size_t size; - boolean_t canwait; + register vm_map_t map; + register vm_size_t size; + boolean_t canwait; { - register vm_offset_t offset, i; - vm_map_entry_t entry; - vm_offset_t addr; - vm_page_t m; + register vm_offset_t offset, i; + vm_map_entry_t entry; + vm_offset_t addr; + vm_page_t m; if (map != kmem_map && map != mb_map) panic("kern_malloc_alloc: map != {kmem,mb}_map"); @@ -296,15 +294,15 @@ kmem_malloc(map, size, canwait) addr = vm_map_min(map); /* - * Locate sufficient space in the map. This will give us the - * final virtual address for the new memory, and thus will tell - * us the offset within the kernel map. + * Locate sufficient space in the map. This will give us the final + * virtual address for the new memory, and thus will tell us the + * offset within the kernel map. */ vm_map_lock(map); if (vm_map_findspace(map, 0, size, &addr)) { vm_map_unlock(map); #if 0 - if (canwait) /* XXX should wait */ + if (canwait) /* XXX should wait */ panic("kmem_malloc: %s too small", map == kmem_map ? "kmem_map" : "mb_map"); #endif @@ -317,29 +315,28 @@ kmem_malloc(map, size, canwait) vm_map_insert(map, kmem_object, offset, addr, addr + size); /* - * If we can wait, just mark the range as wired - * (will fault pages as necessary). + * If we can wait, just mark the range as wired (will fault pages as + * necessary). */ if (canwait) { vm_map_unlock(map); (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, - FALSE); + FALSE); vm_map_simplify(map, addr); - return(addr); + return (addr); } - /* * If we cannot wait then we must allocate all memory up front, * pulling it off the active queue to prevent pageout. */ vm_object_lock(kmem_object); for (i = 0; i < size; i += PAGE_SIZE) { - m = vm_page_alloc(kmem_object, offset + i); + m = vm_page_alloc(kmem_object, offset + i, 1); /* - * Ran out of space, free everything up and return. - * Don't need to lock page queues here as we know - * that the pages we got aren't on any queues. + * Ran out of space, free everything up and return. Don't need + * to lock page queues here as we know that the pages we got + * aren't on any queues. */ if (m == NULL) { while (i != 0) { @@ -350,20 +347,21 @@ kmem_malloc(map, size, canwait) vm_object_unlock(kmem_object); vm_map_delete(map, addr, addr + size); vm_map_unlock(map); - return(0); + return (0); } #if 0 vm_page_zero_fill(m); #endif m->flags &= ~PG_BUSY; + m->valid |= VM_PAGE_BITS_ALL; } vm_object_unlock(kmem_object); /* - * Mark map entry as non-pageable. - * Assert: vm_map_insert() will never be able to extend the previous - * entry so there will be a new entry exactly corresponding to this - * address range and it will have wired_count == 0. + * Mark map entry as non-pageable. Assert: vm_map_insert() will never + * be able to extend the previous entry so there will be a new entry + * exactly corresponding to this address range and it will have + * wired_count == 0. */ if (!vm_map_lookup_entry(map, addr, &entry) || entry->start != addr || entry->end != addr + size || @@ -372,20 +370,20 @@ kmem_malloc(map, size, canwait) entry->wired_count++; /* - * Loop thru pages, entering them in the pmap. - * (We cannot add them to the wired count without - * wrapping the vm_page_queue_lock in splimp...) + * Loop thru pages, entering them in the pmap. (We cannot add them to + * the wired count without wrapping the vm_page_queue_lock in + * splimp...) */ for (i = 0; i < size; i += PAGE_SIZE) { vm_object_lock(kmem_object); m = vm_page_lookup(kmem_object, offset + i); vm_object_unlock(kmem_object); - pmap_kenter( addr + i, VM_PAGE_TO_PHYS(m)); + pmap_kenter(addr + i, VM_PAGE_TO_PHYS(m)); } vm_map_unlock(map); vm_map_simplify(map, addr); - return(addr); + return (addr); } /* @@ -395,18 +393,19 @@ kmem_malloc(map, size, canwait) * has no room, the caller sleeps waiting for more memory in the submap. * */ -vm_offset_t kmem_alloc_wait(map, size) - vm_map_t map; - vm_size_t size; +vm_offset_t +kmem_alloc_wait(map, size) + vm_map_t map; + vm_size_t size; { - vm_offset_t addr; + vm_offset_t addr; size = round_page(size); for (;;) { /* - * To make this work for more than one map, - * use the map's lock to lock out sleepers/wakers. + * To make this work for more than one map, use the map's lock + * to lock out sleepers/wakers. */ vm_map_lock(map); if (vm_map_findspace(map, 0, size, &addr) == 0) @@ -416,11 +415,11 @@ vm_offset_t kmem_alloc_wait(map, size) vm_map_unlock(map); return (0); } - assert_wait((int)map, TRUE); + assert_wait((int) map, TRUE); vm_map_unlock(map); thread_block("kmaw"); } - vm_map_insert(map, NULL, (vm_offset_t)0, addr, addr + size); + vm_map_insert(map, NULL, (vm_offset_t) 0, addr, addr + size); vm_map_unlock(map); return (addr); } @@ -431,14 +430,15 @@ vm_offset_t kmem_alloc_wait(map, size) * Returns memory to a submap of the kernel, and wakes up any threads * waiting for memory in that map. */ -void kmem_free_wakeup(map, addr, size) - vm_map_t map; - vm_offset_t addr; - vm_size_t size; +void +kmem_free_wakeup(map, addr, size) + vm_map_t map; + vm_offset_t addr; + vm_size_t size; { vm_map_lock(map); (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); - thread_wakeup((int)map); + thread_wakeup((int) map); vm_map_unlock(map); } @@ -448,7 +448,8 @@ void kmem_free_wakeup(map, addr, size) * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and * the range between `start' and `end' as free. */ -void kmem_init(start, end) +void +kmem_init(start, end) vm_offset_t start, end; { register vm_map_t m; @@ -457,7 +458,7 @@ void kmem_init(start, end) vm_map_lock(m); /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ kernel_map = m; - (void) vm_map_insert(m, NULL, (vm_offset_t)0, + (void) vm_map_insert(m, NULL, (vm_offset_t) 0, VM_MIN_KERNEL_ADDRESS, start); /* ... and ending with the completion of the above `insert' */ vm_map_unlock(m); diff --git a/sys/vm/vm_kern.h b/sys/vm/vm_kern.h index 087dd378bb64..627c70d2a6f1 100644 --- a/sys/vm/vm_kern.h +++ b/sys/vm/vm_kern.h @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -40,17 +40,17 @@ * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -61,20 +61,24 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_kern.h,v 1.3 1994/08/02 07:55:23 davidg Exp $ + * $Id: vm_kern.h,v 1.4 1994/08/18 22:36:03 wollman Exp $ */ #ifndef _VM_VM_KERN_H_ #define _VM_VM_KERN_H_ 1 /* Kernel memory management definitions. */ -extern vm_map_t buffer_map; -extern vm_map_t kernel_map; -extern vm_map_t kmem_map; -extern vm_map_t mb_map; -extern vm_map_t io_map; -extern vm_map_t clean_map; -extern vm_map_t pager_map; -extern vm_map_t phys_map; +extern vm_map_t buffer_map; +extern vm_map_t kernel_map; +extern vm_map_t kmem_map; +extern vm_map_t mb_map; +extern vm_map_t io_map; +extern vm_map_t clean_map; +extern vm_map_t pager_map; +extern vm_map_t phys_map; +extern vm_map_t exec_map; +extern vm_map_t u_map; + +extern vm_offset_t kernel_vm_end; -#endif /* _VM_VM_KERN_H_ */ +#endif /* _VM_VM_KERN_H_ */ diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 1e2416757ac3..94b064af984c 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -40,17 +40,17 @@ * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_map.c,v 1.8 1994/12/18 10:28:40 davidg Exp $ + * $Id: vm_map.c,v 1.9 1994/12/18 13:58:41 davidg Exp $ */ /* @@ -135,19 +135,20 @@ * maps and requires map entries. */ -vm_offset_t kentry_data; -vm_size_t kentry_data_size; -vm_map_entry_t kentry_free; -vm_map_t kmap_free; +vm_offset_t kentry_data; +vm_size_t kentry_data_size; +vm_map_entry_t kentry_free; +vm_map_t kmap_free; -int kentry_count; -static vm_offset_t mapvm_start=0, mapvm=0, mapvmmax; -static int mapvmpgcnt=0; +int kentry_count; +static vm_offset_t mapvm_start = 0, mapvm = 0, mapvmmax; +static int mapvmpgcnt = 0; -static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t)); -static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t)); +static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t)); +static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t)); -void vm_map_startup() +void +vm_map_startup() { register int i; register vm_map_entry_t mep; @@ -166,8 +167,8 @@ void vm_map_startup() mp++->header.next = NULL; /* - * Form a free list of statically allocated kernel map entries - * with the rest. + * Form a free list of statically allocated kernel map entries with + * the rest. */ kentry_free = mep = (vm_map_entry_t) mp; kentry_count = i = (kentry_data_size - MAX_KMAP * sizeof *mp) / sizeof *mep; @@ -189,8 +190,10 @@ vmspace_alloc(min, max, pageable) int pageable; { register struct vmspace *vm; + if (mapvmpgcnt == 0 && mapvm == 0) { int s; + mapvmpgcnt = (cnt.v_page_count * sizeof(struct vm_map_entry) + PAGE_SIZE - 1) / PAGE_SIZE; s = splhigh(); mapvm_start = mapvm = kmem_alloc_pageable(kmem_map, mapvmpgcnt * PAGE_SIZE); @@ -199,12 +202,11 @@ vmspace_alloc(min, max, pageable) if (!mapvm) mapvmpgcnt = 0; } - MALLOC(vm, struct vmspace *, sizeof(struct vmspace), M_VMMAP, M_WAITOK); - bzero(vm, (caddr_t) &vm->vm_startcopy - (caddr_t) vm); + bzero(vm, (caddr_t) & vm->vm_startcopy - (caddr_t) vm); vm_map_init(&vm->vm_map, min, max, pageable); pmap_pinit(&vm->vm_pmap); - vm->vm_map.pmap = &vm->vm_pmap; /* XXX */ + vm->vm_map.pmap = &vm->vm_pmap; /* XXX */ vm->vm_refcnt = 1; return (vm); } @@ -217,8 +219,8 @@ vmspace_free(vm) if (--vm->vm_refcnt == 0) { /* * Lock the map, to wait out all other references to it. - * Delete all of the mappings and pages they hold, - * then call the pmap module to reclaim anything left. + * Delete all of the mappings and pages they hold, then call + * the pmap module to reclaim anything left. */ vm_map_lock(&vm->vm_map); (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset, @@ -235,12 +237,13 @@ vmspace_free(vm) * the given physical map structure, and having * the given lower and upper address bounds. */ -vm_map_t vm_map_create(pmap, min, max, pageable) - pmap_t pmap; - vm_offset_t min, max; - boolean_t pageable; +vm_map_t +vm_map_create(pmap, min, max, pageable) + pmap_t pmap; + vm_offset_t min, max; + boolean_t pageable; { - register vm_map_t result; + register vm_map_t result; if (kmem_map == NULL) { result = kmap_free; @@ -249,11 +252,11 @@ vm_map_t vm_map_create(pmap, min, max, pageable) panic("vm_map_create: out of maps"); } else MALLOC(result, vm_map_t, sizeof(struct vm_map), - M_VMMAP, M_WAITOK); + M_VMMAP, M_WAITOK); vm_map_init(result, min, max, pageable); result->pmap = pmap; - return(result); + return (result); } /* @@ -264,8 +267,8 @@ vm_map_t vm_map_create(pmap, min, max, pageable) void vm_map_init(map, min, max, pageable) register struct vm_map *map; - vm_offset_t min, max; - boolean_t pageable; + vm_offset_t min, max; + boolean_t pageable; { map->header.next = map->header.prev = &map->header; map->nentries = 0; @@ -294,10 +297,11 @@ static int mappoolcnt; vm_map_entry_t vm_map_entry_create(map) - vm_map_t map; + vm_map_t map; { - vm_map_entry_t entry; + vm_map_entry_t entry; int i; + #define KENTRY_LOW_WATER 64 #define MAPENTRY_LOW_WATER 128 @@ -307,15 +311,17 @@ vm_map_entry_create(map) if (kentry_count < KENTRY_LOW_WATER) { if (mapvmpgcnt && mapvm) { vm_page_t m; - m = vm_page_alloc(kmem_object, - mapvm-vm_map_min(kmem_map)); + + m = vm_page_alloc(kmem_object, + mapvm - vm_map_min(kmem_map), 0); if (m) { int newentries; - newentries = (NBPG/sizeof (struct vm_map_entry)); + + newentries = (NBPG / sizeof(struct vm_map_entry)); vm_page_wire(m); m->flags &= ~PG_BUSY; pmap_enter(vm_map_pmap(kmem_map), mapvm, - VM_PAGE_TO_PHYS(m), VM_PROT_DEFAULT, 1); + VM_PAGE_TO_PHYS(m), VM_PROT_DEFAULT, 1); entry = (vm_map_entry_t) mapvm; mapvm += NBPG; @@ -328,7 +334,6 @@ vm_map_entry_create(map) } } } - if (map == kernel_map || map == kmem_map || map == pager_map) { entry = kentry_free; @@ -337,14 +342,12 @@ vm_map_entry_create(map) --kentry_count; return entry; } - entry = mappool; if (entry) { mappool = entry->next; --mappoolcnt; return entry; } - } else { entry = mappool; if (entry) { @@ -352,14 +355,13 @@ vm_map_entry_create(map) --mappoolcnt; return entry; } - MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry), - M_VMMAPENT, M_WAITOK); + M_VMMAPENT, M_WAITOK); } if (entry == NULL) panic("vm_map_entry_create: out of map entries"); - return(entry); + return (entry); } /* @@ -369,12 +371,12 @@ vm_map_entry_create(map) */ void vm_map_entry_dispose(map, entry) - vm_map_t map; - vm_map_entry_t entry; + vm_map_t map; + vm_map_entry_t entry; { - if ( (kentry_count < KENTRY_LOW_WATER) || - ((vm_offset_t)entry >= kentry_data && (vm_offset_t)entry < (kentry_data + kentry_data_size)) || - ((vm_offset_t)entry >= mapvm_start && (vm_offset_t)entry < mapvmmax)) { + if ((kentry_count < KENTRY_LOW_WATER) || + ((vm_offset_t) entry >= kentry_data && (vm_offset_t) entry < (kentry_data + kentry_data_size)) || + ((vm_offset_t) entry >= mapvm_start && (vm_offset_t) entry < mapvmmax)) { entry->next = kentry_free; kentry_free = entry; ++kentry_count; @@ -386,7 +388,6 @@ vm_map_entry_dispose(map, entry) ++mappoolcnt; return; } - FREE(entry, M_VMMAPENT); } } @@ -417,8 +418,9 @@ vm_map_entry_dispose(map, entry) * Creates another valid reference to the given map. * */ -void vm_map_reference(map) - register vm_map_t map; +void +vm_map_reference(map) + register vm_map_t map; { if (map == NULL) return; @@ -435,10 +437,11 @@ void vm_map_reference(map) * destroying it if no references remain. * The map should not be locked. */ -void vm_map_deallocate(map) - register vm_map_t map; +void +vm_map_deallocate(map) + register vm_map_t map; { - register int c; + register int c; if (map == NULL) return; @@ -450,10 +453,8 @@ void vm_map_deallocate(map) if (c > 0) { return; } - /* - * Lock the map, to wait out all other references - * to it. + * Lock the map, to wait out all other references to it. */ vm_map_lock(map); @@ -476,47 +477,45 @@ void vm_map_deallocate(map) */ int vm_map_insert(map, object, offset, start, end) - vm_map_t map; - vm_object_t object; - vm_offset_t offset; - vm_offset_t start; - vm_offset_t end; + vm_map_t map; + vm_object_t object; + vm_offset_t offset; + vm_offset_t start; + vm_offset_t end; { - register vm_map_entry_t new_entry; - register vm_map_entry_t prev_entry; - vm_map_entry_t temp_entry; + register vm_map_entry_t new_entry; + register vm_map_entry_t prev_entry; + vm_map_entry_t temp_entry; /* - * Check that the start and end points are not bogus. + * Check that the start and end points are not bogus. */ if ((start < map->min_offset) || (end > map->max_offset) || - (start >= end)) - return(KERN_INVALID_ADDRESS); + (start >= end)) + return (KERN_INVALID_ADDRESS); /* - * Find the entry prior to the proposed - * starting address; if it's part of an - * existing entry, this range is bogus. + * Find the entry prior to the proposed starting address; if it's part + * of an existing entry, this range is bogus. */ if (vm_map_lookup_entry(map, start, &temp_entry)) - return(KERN_NO_SPACE); + return (KERN_NO_SPACE); prev_entry = temp_entry; /* - * Assert that the next entry doesn't overlap the - * end point. + * Assert that the next entry doesn't overlap the end point. */ if ((prev_entry->next != &map->header) && - (prev_entry->next->start < end)) - return(KERN_NO_SPACE); + (prev_entry->next->start < end)) + return (KERN_NO_SPACE); /* - * See if we can avoid creating a new entry by - * extending one of our neighbors. + * See if we can avoid creating a new entry by extending one of our + * neighbors. */ if (object == NULL) { @@ -531,26 +530,25 @@ vm_map_insert(map, object, offset, start, end) (prev_entry->wired_count == 0)) { if (vm_object_coalesce(prev_entry->object.vm_object, - NULL, - prev_entry->offset, - (vm_offset_t) 0, - (vm_size_t)(prev_entry->end - - prev_entry->start), - (vm_size_t)(end - prev_entry->end))) { + NULL, + prev_entry->offset, + (vm_offset_t) 0, + (vm_size_t) (prev_entry->end + - prev_entry->start), + (vm_size_t) (end - prev_entry->end))) { /* - * Coalesced the two objects - can extend - * the previous map entry to include the - * new range. + * Coalesced the two objects - can extend the + * previous map entry to include the new + * range. */ map->size += (end - prev_entry->end); prev_entry->end = end; - return(KERN_SUCCESS); + return (KERN_SUCCESS); } } } - /* - * Create a new entry + * Create a new entry */ new_entry = vm_map_entry_create(map); @@ -571,22 +569,21 @@ vm_map_insert(map, object, offset, start, end) new_entry->max_protection = VM_PROT_DEFAULT; new_entry->wired_count = 0; } - /* - * Insert the new entry into the list + * Insert the new entry into the list */ vm_map_entry_link(map, prev_entry, new_entry); map->size += new_entry->end - new_entry->start; /* - * Update the free space hint + * Update the free space hint */ if ((map->first_free == prev_entry) && (prev_entry->end >= new_entry->start)) map->first_free = new_entry; - return(KERN_SUCCESS); + return (KERN_SUCCESS); } /* @@ -610,17 +607,17 @@ vm_map_insert(map, object, offset, start, end) * result indicates whether the address is * actually contained in the map. */ -boolean_t vm_map_lookup_entry(map, address, entry) - register vm_map_t map; - register vm_offset_t address; - vm_map_entry_t *entry; /* OUT */ +boolean_t +vm_map_lookup_entry(map, address, entry) + register vm_map_t map; + register vm_offset_t address; + vm_map_entry_t *entry; /* OUT */ { - register vm_map_entry_t cur; - register vm_map_entry_t last; + register vm_map_entry_t cur; + register vm_map_entry_t last; /* - * Start looking either from the head of the - * list, or from the hint. + * Start looking either from the head of the list, or from the hint. */ simple_lock(&map->hint_lock); @@ -631,46 +628,43 @@ boolean_t vm_map_lookup_entry(map, address, entry) cur = cur->next; if (address >= cur->start) { - /* - * Go from hint to end of list. - * - * But first, make a quick check to see if - * we are already looking at the entry we - * want (which is usually the case). - * Note also that we don't need to save the hint - * here... it is the same hint (unless we are - * at the header, in which case the hint didn't - * buy us anything anyway). + /* + * Go from hint to end of list. + * + * But first, make a quick check to see if we are already looking + * at the entry we want (which is usually the case). Note also + * that we don't need to save the hint here... it is the same + * hint (unless we are at the header, in which case the hint + * didn't buy us anything anyway). */ last = &map->header; if ((cur != last) && (cur->end > address)) { *entry = cur; - return(TRUE); + return (TRUE); } - } - else { - /* - * Go from start to hint, *inclusively* + } else { + /* + * Go from start to hint, *inclusively* */ last = cur->next; cur = map->header.next; } /* - * Search linearly + * Search linearly */ while (cur != last) { if (cur->end > address) { if (address >= cur->start) { - /* - * Save this lookup for future - * hints, and return + /* + * Save this lookup for future hints, and + * return */ *entry = cur; SAVE_HINT(map, cur); - return(TRUE); + return (TRUE); } break; } @@ -678,7 +672,7 @@ boolean_t vm_map_lookup_entry(map, address, entry) } *entry = cur->prev; SAVE_HINT(map, *entry); - return(FALSE); + return (FALSE); } /* @@ -701,22 +695,23 @@ vm_map_findspace(map, start, length, addr) return (1); /* - * Look for the first possible address; if there's already - * something at this address, we have to start after it. + * Look for the first possible address; if there's already something + * at this address, we have to start after it. */ if (start == map->min_offset) { if ((entry = map->first_free) != &map->header) start = entry->end; } else { vm_map_entry_t tmp; + if (vm_map_lookup_entry(map, start, &tmp)) start = tmp->end; entry = tmp; } /* - * Look through the rest of the map, trying to fit a new region in - * the gap between existing regions, or after the very last region. + * Look through the rest of the map, trying to fit a new region in the + * gap between existing regions, or after the very last region. */ for (;; start = (entry = next)->end) { /* @@ -735,6 +730,8 @@ vm_map_findspace(map, start, length, addr) } SAVE_HINT(map, entry); *addr = start; + if (map == kernel_map && round_page(start + length) > kernel_vm_end) + pmap_growkernel(round_page(start + length)); return (0); } @@ -747,15 +744,15 @@ vm_map_findspace(map, start, length, addr) */ int vm_map_find(map, object, offset, addr, length, find_space) - vm_map_t map; - vm_object_t object; - vm_offset_t offset; - vm_offset_t *addr; /* IN/OUT */ - vm_size_t length; - boolean_t find_space; + vm_map_t map; + vm_object_t object; + vm_offset_t offset; + vm_offset_t *addr; /* IN/OUT */ + vm_size_t length; + boolean_t find_space; { - register vm_offset_t start; - int result, s = 0; + register vm_offset_t start; + int result, s = 0; start = *addr; vm_map_lock(map); @@ -788,59 +785,56 @@ vm_map_find(map, object, offset, addr, length, find_space) * removing extra sharing maps * [XXX maybe later] merging with a neighbor */ -void vm_map_simplify_entry(map, entry) - vm_map_t map; - vm_map_entry_t entry; +void +vm_map_simplify_entry(map, entry) + vm_map_t map; + vm_map_entry_t entry; { #ifdef lint map++; #endif /* - * If this entry corresponds to a sharing map, then - * see if we can remove the level of indirection. - * If it's not a sharing map, then it points to - * a VM object, so see if we can merge with either - * of our neighbors. + * If this entry corresponds to a sharing map, then see if we can + * remove the level of indirection. If it's not a sharing map, then it + * points to a VM object, so see if we can merge with either of our + * neighbors. */ if (entry->is_sub_map) return; if (entry->is_a_map) { #if 0 - vm_map_t my_share_map; - int count; + vm_map_t my_share_map; + int count; - my_share_map = entry->object.share_map; + my_share_map = entry->object.share_map; simple_lock(&my_share_map->ref_lock); count = my_share_map->ref_count; simple_unlock(&my_share_map->ref_lock); - + if (count == 1) { - /* Can move the region from - * entry->start to entry->end (+ entry->offset) - * in my_share_map into place of entry. - * Later. + /* + * Can move the region from entry->start to entry->end + * (+ entry->offset) in my_share_map into place of + * entry. Later. */ } #endif - } - else { + } else { /* - * Try to merge with our neighbors. - * - * Conditions for merge are: - * - * 1. entries are adjacent. - * 2. both entries point to objects - * with null pagers. - * - * If a merge is possible, we replace the two - * entries with a single entry, then merge - * the two objects into a single object. - * - * Now, all that is left to do is write the - * code! + * Try to merge with our neighbors. + * + * Conditions for merge are: + * + * 1. entries are adjacent. 2. both entries point to objects + * with null pagers. + * + * If a merge is possible, we replace the two entries with a + * single entry, then merge the two objects into a single + * object. + * + * Now, all that is left to do is write the code! */ } } @@ -862,25 +856,24 @@ void vm_map_simplify_entry(map, entry) * This routine is called only when it is known that * the entry must be split. */ -static void _vm_map_clip_start(map, entry, start) - register vm_map_t map; - register vm_map_entry_t entry; - register vm_offset_t start; +static void +_vm_map_clip_start(map, entry, start) + register vm_map_t map; + register vm_map_entry_t entry; + register vm_offset_t start; { - register vm_map_entry_t new_entry; + register vm_map_entry_t new_entry; /* - * See if we can simplify this entry first + * See if we can simplify this entry first */ - + /* vm_map_simplify_entry(map, entry); */ /* - * Split off the front portion -- - * note that we must insert the new - * entry BEFORE this one, so that - * this entry has the specified starting - * address. + * Split off the front portion -- note that we must insert the new + * entry BEFORE this one, so that this entry has the specified + * starting address. */ new_entry = vm_map_entry_create(map); @@ -893,7 +886,7 @@ static void _vm_map_clip_start(map, entry, start) vm_map_entry_link(map, entry->prev, new_entry); if (entry->is_a_map || entry->is_sub_map) - vm_map_reference(new_entry->object.share_map); + vm_map_reference(new_entry->object.share_map); else vm_object_reference(new_entry->object.vm_object); } @@ -916,16 +909,16 @@ static void _vm_map_clip_start(map, entry, start) * This routine is called only when it is known that * the entry must be split. */ -static void _vm_map_clip_end(map, entry, end) - register vm_map_t map; - register vm_map_entry_t entry; - register vm_offset_t end; +static void +_vm_map_clip_end(map, entry, end) + register vm_map_t map; + register vm_map_entry_t entry; + register vm_offset_t end; { - register vm_map_entry_t new_entry; + register vm_map_entry_t new_entry; /* - * Create a new entry and insert it - * AFTER the specified entry + * Create a new entry and insert it AFTER the specified entry */ new_entry = vm_map_entry_create(map); @@ -937,7 +930,7 @@ static void _vm_map_clip_end(map, entry, end) vm_map_entry_link(map, entry, new_entry); if (entry->is_a_map || entry->is_sub_map) - vm_map_reference(new_entry->object.share_map); + vm_map_reference(new_entry->object.share_map); else vm_object_reference(new_entry->object.vm_object); } @@ -978,13 +971,13 @@ static void _vm_map_clip_end(map, entry, end) */ int vm_map_submap(map, start, end, submap) - register vm_map_t map; - register vm_offset_t start; - register vm_offset_t end; - vm_map_t submap; + register vm_map_t map; + register vm_offset_t start; + register vm_offset_t end; + vm_map_t submap; { - vm_map_entry_t entry; - register int result = KERN_INVALID_ARGUMENT; + vm_map_entry_t entry; + register int result = KERN_INVALID_ARGUMENT; vm_map_lock(map); @@ -992,8 +985,7 @@ vm_map_submap(map, start, end, submap) if (vm_map_lookup_entry(map, start, &entry)) { vm_map_clip_start(map, entry, start); - } - else + } else entry = entry->next; vm_map_clip_end(map, entry, end); @@ -1009,7 +1001,7 @@ vm_map_submap(map, start, end, submap) } vm_map_unlock(map); - return(result); + return (result); } /* @@ -1022,14 +1014,14 @@ vm_map_submap(map, start, end, submap) */ int vm_map_protect(map, start, end, new_prot, set_max) - register vm_map_t map; - register vm_offset_t start; - register vm_offset_t end; - register vm_prot_t new_prot; - register boolean_t set_max; + register vm_map_t map; + register vm_offset_t start; + register vm_offset_t end; + register vm_prot_t new_prot; + register boolean_t set_max; { - register vm_map_entry_t current; - vm_map_entry_t entry; + register vm_map_entry_t current; + vm_map_entry_t entry; vm_map_lock(map); @@ -1037,50 +1029,47 @@ vm_map_protect(map, start, end, new_prot, set_max) if (vm_map_lookup_entry(map, start, &entry)) { vm_map_clip_start(map, entry, start); - } - else + } else entry = entry->next; /* - * Make a first pass to check for protection - * violations. + * Make a first pass to check for protection violations. */ current = entry; while ((current != &map->header) && (current->start < end)) { if (current->is_sub_map) - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); if ((new_prot & current->max_protection) != new_prot) { vm_map_unlock(map); - return(KERN_PROTECTION_FAILURE); + return (KERN_PROTECTION_FAILURE); } - current = current->next; } /* - * Go back and fix up protections. - * [Note that clipping is not necessary the second time.] + * Go back and fix up protections. [Note that clipping is not + * necessary the second time.] */ current = entry; while ((current != &map->header) && (current->start < end)) { - vm_prot_t old_prot; + vm_prot_t old_prot; vm_map_clip_end(map, current, end); old_prot = current->protection; if (set_max) current->protection = - (current->max_protection = new_prot) & - old_prot; + (current->max_protection = new_prot) & + old_prot; else current->protection = new_prot; /* - * Update physical map if necessary. - * Worry about copy-on-write here -- CHECK THIS XXX + * Update physical map if necessary. Worry about copy-on-write + * here -- CHECK THIS XXX */ if (current->protection != old_prot) { @@ -1090,40 +1079,39 @@ vm_map_protect(map, start, end, new_prot, set_max) #define max(a,b) ((a) > (b) ? (a) : (b)) if (current->is_a_map) { - vm_map_entry_t share_entry; - vm_offset_t share_end; + vm_map_entry_t share_entry; + vm_offset_t share_end; vm_map_lock(current->object.share_map); (void) vm_map_lookup_entry( - current->object.share_map, - current->offset, - &share_entry); + current->object.share_map, + current->offset, + &share_entry); share_end = current->offset + - (current->end - current->start); + (current->end - current->start); while ((share_entry != ¤t->object.share_map->header) && - (share_entry->start < share_end)) { + (share_entry->start < share_end)) { pmap_protect(map->pmap, - (max(share_entry->start, - current->offset) - - current->offset + - current->start), - min(share_entry->end, - share_end) - + (max(share_entry->start, + current->offset) - current->offset + - current->start, - current->protection & - MASK(share_entry)); + current->start), + min(share_entry->end, + share_end) - + current->offset + + current->start, + current->protection & + MASK(share_entry)); share_entry = share_entry->next; } vm_map_unlock(current->object.share_map); - } - else - pmap_protect(map->pmap, current->start, - current->end, - current->protection & MASK(entry)); + } else + pmap_protect(map->pmap, current->start, + current->end, + current->protection & MASK(entry)); #undef max #undef MASK } @@ -1131,7 +1119,7 @@ vm_map_protect(map, start, end, new_prot, set_max) } vm_map_unlock(map); - return(KERN_SUCCESS); + return (KERN_SUCCESS); } /* @@ -1144,13 +1132,13 @@ vm_map_protect(map, start, end, new_prot, set_max) */ int vm_map_inherit(map, start, end, new_inheritance) - register vm_map_t map; - register vm_offset_t start; - register vm_offset_t end; - register vm_inherit_t new_inheritance; + register vm_map_t map; + register vm_offset_t start; + register vm_offset_t end; + register vm_inherit_t new_inheritance; { - register vm_map_entry_t entry; - vm_map_entry_t temp_entry; + register vm_map_entry_t entry; + vm_map_entry_t temp_entry; switch (new_inheritance) { case VM_INHERIT_NONE: @@ -1158,7 +1146,7 @@ vm_map_inherit(map, start, end, new_inheritance) case VM_INHERIT_SHARE: break; default: - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); } vm_map_lock(map); @@ -1168,8 +1156,7 @@ vm_map_inherit(map, start, end, new_inheritance) if (vm_map_lookup_entry(map, start, &temp_entry)) { entry = temp_entry; vm_map_clip_start(map, entry, start); - } - else + } else entry = temp_entry->next; while ((entry != &map->header) && (entry->start < end)) { @@ -1181,7 +1168,7 @@ vm_map_inherit(map, start, end, new_inheritance) } vm_map_unlock(map); - return(KERN_SUCCESS); + return (KERN_SUCCESS); } /* @@ -1197,37 +1184,37 @@ vm_map_inherit(map, start, end, new_inheritance) */ int vm_map_pageable(map, start, end, new_pageable) - register vm_map_t map; - register vm_offset_t start; - register vm_offset_t end; - register boolean_t new_pageable; + register vm_map_t map; + register vm_offset_t start; + register vm_offset_t end; + register boolean_t new_pageable; { - register vm_map_entry_t entry; - vm_map_entry_t start_entry; - register vm_offset_t failed = 0; - int rv; + register vm_map_entry_t entry; + vm_map_entry_t start_entry; + register vm_offset_t failed = 0; + int rv; vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); /* - * Only one pageability change may take place at one - * time, since vm_fault assumes it will be called - * only once for each wiring/unwiring. Therefore, we - * have to make sure we're actually changing the pageability - * for the entire region. We do so before making any changes. + * Only one pageability change may take place at one time, since + * vm_fault assumes it will be called only once for each + * wiring/unwiring. Therefore, we have to make sure we're actually + * changing the pageability for the entire region. We do so before + * making any changes. */ if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) { vm_map_unlock(map); - return(KERN_INVALID_ADDRESS); + return (KERN_INVALID_ADDRESS); } entry = start_entry; /* - * Actions are rather different for wiring and unwiring, - * so we have two separate cases. + * Actions are rather different for wiring and unwiring, so we have + * two separate cases. */ if (new_pageable) { @@ -1235,192 +1222,185 @@ vm_map_pageable(map, start, end, new_pageable) vm_map_clip_start(map, entry, start); /* - * Unwiring. First ensure that the range to be - * unwired is really wired down and that there - * are no holes. + * Unwiring. First ensure that the range to be unwired is + * really wired down and that there are no holes. */ while ((entry != &map->header) && (entry->start < end)) { - if (entry->wired_count == 0 || - (entry->end < end && - (entry->next == &map->header || - entry->next->start > entry->end))) { - vm_map_unlock(map); - return(KERN_INVALID_ARGUMENT); - } - entry = entry->next; + if (entry->wired_count == 0 || + (entry->end < end && + (entry->next == &map->header || + entry->next->start > entry->end))) { + vm_map_unlock(map); + return (KERN_INVALID_ARGUMENT); + } + entry = entry->next; } /* - * Now decrement the wiring count for each region. - * If a region becomes completely unwired, - * unwire its physical pages and mappings. + * Now decrement the wiring count for each region. If a region + * becomes completely unwired, unwire its physical pages and + * mappings. */ lock_set_recursive(&map->lock); entry = start_entry; while ((entry != &map->header) && (entry->start < end)) { - vm_map_clip_end(map, entry, end); + vm_map_clip_end(map, entry, end); - entry->wired_count--; - if (entry->wired_count == 0) - vm_fault_unwire(map, entry->start, entry->end); + entry->wired_count--; + if (entry->wired_count == 0) + vm_fault_unwire(map, entry->start, entry->end); - entry = entry->next; + entry = entry->next; } lock_clear_recursive(&map->lock); - } - - else { + } else { /* - * Wiring. We must do this in two passes: - * - * 1. Holding the write lock, we create any shadow - * or zero-fill objects that need to be created. - * Then we clip each map entry to the region to be - * wired and increment its wiring count. We - * create objects before clipping the map entries - * to avoid object proliferation. - * - * 2. We downgrade to a read lock, and call - * vm_fault_wire to fault in the pages for any - * newly wired area (wired_count is 1). - * - * Downgrading to a read lock for vm_fault_wire avoids - * a possible deadlock with another thread that may have - * faulted on one of the pages to be wired (it would mark - * the page busy, blocking us, then in turn block on the - * map lock that we hold). Because of problems in the - * recursive lock package, we cannot upgrade to a write - * lock in vm_map_lookup. Thus, any actions that require - * the write lock must be done beforehand. Because we - * keep the read lock on the map, the copy-on-write status - * of the entries we modify here cannot change. + * Wiring. We must do this in two passes: + * + * 1. Holding the write lock, we create any shadow or zero-fill + * objects that need to be created. Then we clip each map + * entry to the region to be wired and increment its wiring + * count. We create objects before clipping the map entries + * to avoid object proliferation. + * + * 2. We downgrade to a read lock, and call vm_fault_wire to + * fault in the pages for any newly wired area (wired_count is + * 1). + * + * Downgrading to a read lock for vm_fault_wire avoids a possible + * deadlock with another thread that may have faulted on one + * of the pages to be wired (it would mark the page busy, + * blocking us, then in turn block on the map lock that we + * hold). Because of problems in the recursive lock package, + * we cannot upgrade to a write lock in vm_map_lookup. Thus, + * any actions that require the write lock must be done + * beforehand. Because we keep the read lock on the map, the + * copy-on-write status of the entries we modify here cannot + * change. */ /* - * Pass 1. + * Pass 1. */ while ((entry != &map->header) && (entry->start < end)) { - if (entry->wired_count == 0) { + if (entry->wired_count == 0) { - /* - * Perform actions of vm_map_lookup that need - * the write lock on the map: create a shadow - * object for a copy-on-write region, or an - * object for a zero-fill region. - * - * We don't have to do this for entries that - * point to sharing maps, because we won't hold - * the lock on the sharing map. - */ - if (!entry->is_a_map) { - if (entry->needs_copy && - ((entry->protection & VM_PROT_WRITE) != 0)) { + /* + * Perform actions of vm_map_lookup that need + * the write lock on the map: create a shadow + * object for a copy-on-write region, or an + * object for a zero-fill region. + * + * We don't have to do this for entries that + * point to sharing maps, because we won't + * hold the lock on the sharing map. + */ + if (!entry->is_a_map) { + if (entry->needs_copy && + ((entry->protection & VM_PROT_WRITE) != 0)) { - vm_object_shadow(&entry->object.vm_object, - &entry->offset, - (vm_size_t)(entry->end + vm_object_shadow(&entry->object.vm_object, + &entry->offset, + (vm_size_t) (entry->end - entry->start)); - entry->needs_copy = FALSE; - } - else if (entry->object.vm_object == NULL) { - entry->object.vm_object = - vm_object_allocate((vm_size_t)(entry->end - - entry->start)); - entry->offset = (vm_offset_t)0; - } + entry->needs_copy = FALSE; + } else if (entry->object.vm_object == NULL) { + entry->object.vm_object = + vm_object_allocate((vm_size_t) (entry->end + - entry->start)); + entry->offset = (vm_offset_t) 0; + } + } } - } - vm_map_clip_start(map, entry, start); - vm_map_clip_end(map, entry, end); - entry->wired_count++; - - /* - * Check for holes - */ - if (entry->end < end && - (entry->next == &map->header || - entry->next->start > entry->end)) { + vm_map_clip_start(map, entry, start); + vm_map_clip_end(map, entry, end); + entry->wired_count++; + /* - * Found one. Object creation actions - * do not need to be undone, but the - * wired counts need to be restored. + * Check for holes */ - while (entry != &map->header && entry->end > start) { - entry->wired_count--; - entry = entry->prev; + if (entry->end < end && + (entry->next == &map->header || + entry->next->start > entry->end)) { + /* + * Found one. Object creation actions do not + * need to be undone, but the wired counts + * need to be restored. + */ + while (entry != &map->header && entry->end > start) { + entry->wired_count--; + entry = entry->prev; + } + vm_map_unlock(map); + return (KERN_INVALID_ARGUMENT); } - vm_map_unlock(map); - return(KERN_INVALID_ARGUMENT); - } - entry = entry->next; + entry = entry->next; } /* - * Pass 2. + * Pass 2. */ /* * HACK HACK HACK HACK - * - * If we are wiring in the kernel map or a submap of it, - * unlock the map to avoid deadlocks. We trust that the - * kernel threads are well-behaved, and therefore will - * not do anything destructive to this region of the map - * while we have it unlocked. We cannot trust user threads - * to do the same. - * + * + * If we are wiring in the kernel map or a submap of it, unlock + * the map to avoid deadlocks. We trust that the kernel + * threads are well-behaved, and therefore will not do + * anything destructive to this region of the map while we + * have it unlocked. We cannot trust user threads to do the + * same. + * * HACK HACK HACK HACK */ if (vm_map_pmap(map) == kernel_pmap) { - vm_map_unlock(map); /* trust me ... */ - } - else { - lock_set_recursive(&map->lock); - lock_write_to_read(&map->lock); + vm_map_unlock(map); /* trust me ... */ + } else { + lock_set_recursive(&map->lock); + lock_write_to_read(&map->lock); } rv = 0; entry = start_entry; while (entry != &map->header && entry->start < end) { - /* - * If vm_fault_wire fails for any page we need to - * undo what has been done. We decrement the wiring - * count for those pages which have not yet been - * wired (now) and unwire those that have (later). - * - * XXX this violates the locking protocol on the map, - * needs to be fixed. - */ - if (rv) - entry->wired_count--; - else if (entry->wired_count == 1) { - rv = vm_fault_wire(map, entry->start, entry->end); - if (rv) { - failed = entry->start; - entry->wired_count--; + /* + * If vm_fault_wire fails for any page we need to undo + * what has been done. We decrement the wiring count + * for those pages which have not yet been wired (now) + * and unwire those that have (later). + * + * XXX this violates the locking protocol on the map, + * needs to be fixed. + */ + if (rv) + entry->wired_count--; + else if (entry->wired_count == 1) { + rv = vm_fault_wire(map, entry->start, entry->end); + if (rv) { + failed = entry->start; + entry->wired_count--; + } } - } - entry = entry->next; + entry = entry->next; } if (vm_map_pmap(map) == kernel_pmap) { - vm_map_lock(map); - } - else { - lock_clear_recursive(&map->lock); + vm_map_lock(map); + } else { + lock_clear_recursive(&map->lock); } if (rv) { - vm_map_unlock(map); - (void) vm_map_pageable(map, start, failed, TRUE); - return(rv); + vm_map_unlock(map); + (void) vm_map_pageable(map, start, failed, TRUE); + return (rv); } } vm_map_unlock(map); - return(KERN_SUCCESS); + return (KERN_SUCCESS); } /* @@ -1434,11 +1414,11 @@ vm_map_pageable(map, start, end, new_pageable) */ int vm_map_clean(map, start, end, syncio, invalidate) - vm_map_t map; - vm_offset_t start; - vm_offset_t end; - boolean_t syncio; - boolean_t invalidate; + vm_map_t map; + vm_offset_t start; + vm_offset_t end; + boolean_t syncio; + boolean_t invalidate; { register vm_map_entry_t current; vm_map_entry_t entry; @@ -1450,22 +1430,21 @@ vm_map_clean(map, start, end, syncio, invalidate) VM_MAP_RANGE_CHECK(map, start, end); if (!vm_map_lookup_entry(map, start, &entry)) { vm_map_unlock_read(map); - return(KERN_INVALID_ADDRESS); + return (KERN_INVALID_ADDRESS); } - /* * Make a first pass to check for holes. */ for (current = entry; current->start < end; current = current->next) { if (current->is_sub_map) { vm_map_unlock_read(map); - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); } if (end > current->end && (current->next == &map->header || - current->end != current->next->start)) { + current->end != current->next->start)) { vm_map_unlock_read(map); - return(KERN_INVALID_ADDRESS); + return (KERN_INVALID_ADDRESS); } } @@ -1496,24 +1475,24 @@ vm_map_clean(map, start, end, syncio, invalidate) vm_object_lock(object); } /* - * Flush pages if writing is allowed. - * XXX should we continue on an error? + * Flush pages if writing is allowed. XXX should we continue + * on an error? */ if ((current->protection & VM_PROT_WRITE) && - !vm_object_page_clean(object, offset, offset+size, - syncio, FALSE)) { + !vm_object_page_clean(object, offset, offset + size, + syncio, FALSE)) { vm_object_unlock(object); vm_map_unlock_read(map); - return(KERN_FAILURE); + return (KERN_FAILURE); } if (invalidate) - vm_object_page_remove(object, offset, offset+size); + vm_object_page_remove(object, offset, offset + size); vm_object_unlock(object); start += size; } vm_map_unlock_read(map); - return(KERN_SUCCESS); + return (KERN_SUCCESS); } /* @@ -1524,9 +1503,10 @@ vm_map_clean(map, start, end, syncio, invalidate) * The map in question should be locked. * [This is the reason for this routine's existence.] */ -void vm_map_entry_unwire(map, entry) - vm_map_t map; - register vm_map_entry_t entry; +void +vm_map_entry_unwire(map, entry) + vm_map_t map; + register vm_map_entry_t entry; { vm_fault_unwire(map, entry->start, entry->end); entry->wired_count = 0; @@ -1536,21 +1516,22 @@ void vm_map_entry_unwire(map, entry) * vm_map_entry_delete: [ internal use only ] * * Deallocate the given entry from the target map. - */ -void vm_map_entry_delete(map, entry) - register vm_map_t map; - register vm_map_entry_t entry; + */ +void +vm_map_entry_delete(map, entry) + register vm_map_t map; + register vm_map_entry_t entry; { if (entry->wired_count != 0) vm_map_entry_unwire(map, entry); - + vm_map_entry_unlink(map, entry); map->size -= entry->end - entry->start; if (entry->is_a_map || entry->is_sub_map) vm_map_deallocate(entry->object.share_map); else - vm_object_deallocate(entry->object.vm_object); + vm_object_deallocate(entry->object.vm_object); vm_map_entry_dispose(map, entry); } @@ -1566,15 +1547,15 @@ void vm_map_entry_delete(map, entry) */ int vm_map_delete(map, start, end) - register vm_map_t map; - vm_offset_t start; - register vm_offset_t end; + register vm_map_t map; + vm_offset_t start; + register vm_offset_t end; { - register vm_map_entry_t entry; - vm_map_entry_t first_entry; + register vm_map_entry_t entry; + vm_map_entry_t first_entry; /* - * Find the start of the region, and clip it + * Find the start of the region, and clip it */ if (!vm_map_lookup_entry(map, start, &first_entry)) @@ -1584,28 +1565,28 @@ vm_map_delete(map, start, end) vm_map_clip_start(map, entry, start); /* - * Fix the lookup hint now, rather than each - * time though the loop. + * Fix the lookup hint now, rather than each time though the + * loop. */ SAVE_HINT(map, entry->prev); } /* - * Save the free space hint + * Save the free space hint */ if (map->first_free->start >= start) map->first_free = entry->prev; /* - * Step through all entries in this region + * Step through all entries in this region */ while ((entry != &map->header) && (entry->start < end)) { - vm_map_entry_t next; - register vm_offset_t s, e; - register vm_object_t object; + vm_map_entry_t next; + register vm_offset_t s, e; + register vm_object_t object; vm_map_clip_end(map, entry, end); @@ -1614,9 +1595,8 @@ vm_map_delete(map, start, end) e = entry->end; /* - * Unwire before removing addresses from the pmap; - * otherwise, unwiring will put the entries back in - * the pmap. + * Unwire before removing addresses from the pmap; otherwise, + * unwiring will put the entries back in the pmap. */ object = entry->object.vm_object; @@ -1624,34 +1604,32 @@ vm_map_delete(map, start, end) vm_map_entry_unwire(map, entry); /* - * If this is a sharing map, we must remove - * *all* references to this data, since we can't - * find all of the physical maps which are sharing - * it. + * If this is a sharing map, we must remove *all* references + * to this data, since we can't find all of the physical maps + * which are sharing it. */ if (object == kernel_object || object == kmem_object) vm_object_page_remove(object, entry->offset, - entry->offset + (e - s)); + entry->offset + (e - s)); else if (!map->is_main_map) vm_object_pmap_remove(object, - entry->offset, - entry->offset + (e - s)); + entry->offset, + entry->offset + (e - s)); else pmap_remove(map->pmap, s, e); /* - * Delete the entry (which may delete the object) - * only after removing all pmap entries pointing - * to its pages. (Otherwise, its page frames may - * be reallocated, and any modify bits will be - * set in the wrong object!) + * Delete the entry (which may delete the object) only after + * removing all pmap entries pointing to its pages. + * (Otherwise, its page frames may be reallocated, and any + * modify bits will be set in the wrong object!) */ vm_map_entry_delete(map, entry); entry = next; } - return(KERN_SUCCESS); + return (KERN_SUCCESS); } /* @@ -1662,11 +1640,11 @@ vm_map_delete(map, start, end) */ int vm_map_remove(map, start, end) - register vm_map_t map; - register vm_offset_t start; - register vm_offset_t end; + register vm_map_t map; + register vm_offset_t start; + register vm_offset_t end; { - register int result, s = 0; + register int result, s = 0; if (map == kmem_map) s = splhigh(); @@ -1679,7 +1657,7 @@ vm_map_remove(map, start, end) if (map == kmem_map) splx(s); - return(result); + return (result); } /* @@ -1689,48 +1667,45 @@ vm_map_remove(map, start, end) * privilege on the entire address region given. * The entire region must be allocated. */ -boolean_t vm_map_check_protection(map, start, end, protection) - register vm_map_t map; - register vm_offset_t start; - register vm_offset_t end; - register vm_prot_t protection; +boolean_t +vm_map_check_protection(map, start, end, protection) + register vm_map_t map; + register vm_offset_t start; + register vm_offset_t end; + register vm_prot_t protection; { - register vm_map_entry_t entry; - vm_map_entry_t tmp_entry; + register vm_map_entry_t entry; + vm_map_entry_t tmp_entry; if (!vm_map_lookup_entry(map, start, &tmp_entry)) { - return(FALSE); + return (FALSE); } - entry = tmp_entry; while (start < end) { if (entry == &map->header) { - return(FALSE); + return (FALSE); } - /* - * No holes allowed! + * No holes allowed! */ if (start < entry->start) { - return(FALSE); + return (FALSE); } - /* * Check protection associated with entry. */ if ((entry->protection & protection) != protection) { - return(FALSE); + return (FALSE); } - /* go to next entry */ start = entry->end; entry = entry->next; } - return(TRUE); + return (TRUE); } /* @@ -1739,11 +1714,12 @@ boolean_t vm_map_check_protection(map, start, end, protection) * Copies the contents of the source entry to the destination * entry. The entries *must* be aligned properly. */ -void vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) - vm_map_t src_map, dst_map; - register vm_map_entry_t src_entry, dst_entry; +void +vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) + vm_map_t src_map, dst_map; + register vm_map_entry_t src_entry, dst_entry; { - vm_object_t temp_object; + vm_object_t temp_object; if (src_entry->is_sub_map || dst_entry->is_sub_map) return; @@ -1753,110 +1729,101 @@ void vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) printf("vm_map_copy_entry: copying over permanent data!\n"); /* - * If our destination map was wired down, - * unwire it now. + * If our destination map was wired down, unwire it now. */ if (dst_entry->wired_count != 0) vm_map_entry_unwire(dst_map, dst_entry); /* - * If we're dealing with a sharing map, we - * must remove the destination pages from - * all maps (since we cannot know which maps - * this sharing map belongs in). + * If we're dealing with a sharing map, we must remove the destination + * pages from all maps (since we cannot know which maps this sharing + * map belongs in). */ if (dst_map->is_main_map) pmap_remove(dst_map->pmap, dst_entry->start, dst_entry->end); else vm_object_pmap_remove(dst_entry->object.vm_object, - dst_entry->offset, - dst_entry->offset + - (dst_entry->end - dst_entry->start)); + dst_entry->offset, + dst_entry->offset + + (dst_entry->end - dst_entry->start)); if (src_entry->wired_count == 0) { - boolean_t src_needs_copy; + boolean_t src_needs_copy; /* - * If the source entry is marked needs_copy, - * it is already write-protected. + * If the source entry is marked needs_copy, it is already + * write-protected. */ if (!src_entry->needs_copy) { - boolean_t su; + boolean_t su; /* - * If the source entry has only one mapping, - * we can just protect the virtual address - * range. + * If the source entry has only one mapping, we can + * just protect the virtual address range. */ if (!(su = src_map->is_main_map)) { simple_lock(&src_map->ref_lock); su = (src_map->ref_count == 1); simple_unlock(&src_map->ref_lock); } - if (su) { pmap_protect(src_map->pmap, - src_entry->start, - src_entry->end, - src_entry->protection & ~VM_PROT_WRITE); - } - else { + src_entry->start, + src_entry->end, + src_entry->protection & ~VM_PROT_WRITE); + } else { vm_object_pmap_copy(src_entry->object.vm_object, - src_entry->offset, - src_entry->offset + (src_entry->end - -src_entry->start)); + src_entry->offset, + src_entry->offset + (src_entry->end + - src_entry->start)); } } - /* - * Make a copy of the object. + * Make a copy of the object. */ temp_object = dst_entry->object.vm_object; vm_object_copy(src_entry->object.vm_object, - src_entry->offset, - (vm_size_t)(src_entry->end - - src_entry->start), - &dst_entry->object.vm_object, - &dst_entry->offset, - &src_needs_copy); + src_entry->offset, + (vm_size_t) (src_entry->end - + src_entry->start), + &dst_entry->object.vm_object, + &dst_entry->offset, + &src_needs_copy); /* - * If we didn't get a copy-object now, mark the - * source map entry so that a shadow will be created - * to hold its changed pages. + * If we didn't get a copy-object now, mark the source map + * entry so that a shadow will be created to hold its changed + * pages. */ if (src_needs_copy) src_entry->needs_copy = TRUE; /* - * The destination always needs to have a shadow - * created. + * The destination always needs to have a shadow created. */ dst_entry->needs_copy = TRUE; /* - * Mark the entries copy-on-write, so that write-enabling - * the entry won't make copy-on-write pages writable. + * Mark the entries copy-on-write, so that write-enabling the + * entry won't make copy-on-write pages writable. */ src_entry->copy_on_write = TRUE; dst_entry->copy_on_write = TRUE; /* - * Get rid of the old object. + * Get rid of the old object. */ vm_object_deallocate(temp_object); pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start, - dst_entry->end - dst_entry->start, src_entry->start); - } - else { + dst_entry->end - dst_entry->start, src_entry->start); + } else { /* - * Of course, wired down pages can't be set copy-on-write. - * Cause wired pages to be copied into the new - * map by simulating faults (the new pages are - * pageable) + * Of course, wired down pages can't be set copy-on-write. + * Cause wired pages to be copied into the new map by + * simulating faults (the new pages are pageable) */ vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry); } @@ -1878,40 +1845,40 @@ void vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) */ int vm_map_copy(dst_map, src_map, - dst_addr, len, src_addr, - dst_alloc, src_destroy) - vm_map_t dst_map; - vm_map_t src_map; - vm_offset_t dst_addr; - vm_size_t len; - vm_offset_t src_addr; - boolean_t dst_alloc; - boolean_t src_destroy; + dst_addr, len, src_addr, + dst_alloc, src_destroy) + vm_map_t dst_map; + vm_map_t src_map; + vm_offset_t dst_addr; + vm_size_t len; + vm_offset_t src_addr; + boolean_t dst_alloc; + boolean_t src_destroy; { register - vm_map_entry_t src_entry; + vm_map_entry_t src_entry; register - vm_map_entry_t dst_entry; - vm_map_entry_t tmp_entry; - vm_offset_t src_start; - vm_offset_t src_end; - vm_offset_t dst_start; - vm_offset_t dst_end; - vm_offset_t src_clip; - vm_offset_t dst_clip; - int result; - boolean_t old_src_destroy; + vm_map_entry_t dst_entry; + vm_map_entry_t tmp_entry; + vm_offset_t src_start; + vm_offset_t src_end; + vm_offset_t dst_start; + vm_offset_t dst_end; + vm_offset_t src_clip; + vm_offset_t dst_clip; + int result; + boolean_t old_src_destroy; /* - * XXX While we figure out why src_destroy screws up, - * we'll do it by explicitly vm_map_delete'ing at the end. + * XXX While we figure out why src_destroy screws up, we'll do it by + * explicitly vm_map_delete'ing at the end. */ old_src_destroy = src_destroy; src_destroy = FALSE; /* - * Compute start and end of region in both maps + * Compute start and end of region in both maps */ src_start = src_addr; @@ -1920,68 +1887,60 @@ vm_map_copy(dst_map, src_map, dst_end = dst_start + len; /* - * Check that the region can exist in both source - * and destination. + * Check that the region can exist in both source and destination. */ if ((dst_end < dst_start) || (src_end < src_start)) - return(KERN_NO_SPACE); + return (KERN_NO_SPACE); /* - * Lock the maps in question -- we avoid deadlock - * by ordering lock acquisition by map value + * Lock the maps in question -- we avoid deadlock by ordering lock + * acquisition by map value */ if (src_map == dst_map) { vm_map_lock(src_map); - } - else if ((int) src_map < (int) dst_map) { - vm_map_lock(src_map); + } else if ((int) src_map < (int) dst_map) { + vm_map_lock(src_map); vm_map_lock(dst_map); } else { vm_map_lock(dst_map); - vm_map_lock(src_map); + vm_map_lock(src_map); } result = KERN_SUCCESS; /* - * Check protections... source must be completely readable and - * destination must be completely writable. [Note that if we're - * allocating the destination region, we don't have to worry - * about protection, but instead about whether the region - * exists.] + * Check protections... source must be completely readable and + * destination must be completely writable. [Note that if we're + * allocating the destination region, we don't have to worry about + * protection, but instead about whether the region exists.] */ if (src_map->is_main_map && dst_map->is_main_map) { if (!vm_map_check_protection(src_map, src_start, src_end, - VM_PROT_READ)) { + VM_PROT_READ)) { result = KERN_PROTECTION_FAILURE; goto Return; } - if (dst_alloc) { /* XXX Consider making this a vm_map_find instead */ if ((result = vm_map_insert(dst_map, NULL, - (vm_offset_t) 0, dst_start, dst_end)) != KERN_SUCCESS) + (vm_offset_t) 0, dst_start, dst_end)) != KERN_SUCCESS) goto Return; - } - else if (!vm_map_check_protection(dst_map, dst_start, dst_end, - VM_PROT_WRITE)) { + } else if (!vm_map_check_protection(dst_map, dst_start, dst_end, + VM_PROT_WRITE)) { result = KERN_PROTECTION_FAILURE; goto Return; } } - /* - * Find the start entries and clip. - * - * Note that checking protection asserts that the - * lookup cannot fail. - * - * Also note that we wait to do the second lookup - * until we have done the first clip, as the clip - * may affect which entry we get! + * Find the start entries and clip. + * + * Note that checking protection asserts that the lookup cannot fail. + * + * Also note that we wait to do the second lookup until we have done the + * first clip, as the clip may affect which entry we get! */ (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry); @@ -1993,38 +1952,36 @@ vm_map_copy(dst_map, src_map, vm_map_clip_start(dst_map, dst_entry, dst_start); /* - * If both source and destination entries are the same, - * retry the first lookup, as it may have changed. + * If both source and destination entries are the same, retry the + * first lookup, as it may have changed. */ if (src_entry == dst_entry) { (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry); src_entry = tmp_entry; } - /* - * If source and destination entries are still the same, - * a null copy is being performed. + * If source and destination entries are still the same, a null copy + * is being performed. */ if (src_entry == dst_entry) goto Return; /* - * Go through entries until we get to the end of the - * region. + * Go through entries until we get to the end of the region. */ while (src_start < src_end) { /* - * Clip the entries to the endpoint of the entire region. + * Clip the entries to the endpoint of the entire region. */ vm_map_clip_end(src_map, src_entry, src_end); vm_map_clip_end(dst_map, dst_entry, dst_end); /* - * Clip each entry to the endpoint of the other entry. + * Clip each entry to the endpoint of the other entry. */ src_clip = src_entry->start + (dst_entry->end - dst_entry->start); @@ -2034,25 +1991,24 @@ vm_map_copy(dst_map, src_map, vm_map_clip_end(dst_map, dst_entry, dst_clip); /* - * Both entries now match in size and relative endpoints. - * - * If both entries refer to a VM object, we can - * deal with them now. + * Both entries now match in size and relative endpoints. + * + * If both entries refer to a VM object, we can deal with them + * now. */ if (!src_entry->is_a_map && !dst_entry->is_a_map) { vm_map_copy_entry(src_map, dst_map, src_entry, - dst_entry); - } - else { - register vm_map_t new_dst_map; - vm_offset_t new_dst_start; - vm_size_t new_size; - vm_map_t new_src_map; - vm_offset_t new_src_start; + dst_entry); + } else { + register vm_map_t new_dst_map; + vm_offset_t new_dst_start; + vm_size_t new_size; + vm_map_t new_src_map; + vm_offset_t new_src_start; /* - * We have to follow at least one sharing map. + * We have to follow at least one sharing map. */ new_size = (dst_entry->end - dst_entry->start); @@ -2060,28 +2016,26 @@ vm_map_copy(dst_map, src_map, if (src_entry->is_a_map) { new_src_map = src_entry->object.share_map; new_src_start = src_entry->offset; - } - else { - new_src_map = src_map; + } else { + new_src_map = src_map; new_src_start = src_entry->start; lock_set_recursive(&src_map->lock); } if (dst_entry->is_a_map) { - vm_offset_t new_dst_end; + vm_offset_t new_dst_end; new_dst_map = dst_entry->object.share_map; new_dst_start = dst_entry->offset; /* - * Since the destination sharing entries - * will be merely deallocated, we can - * do that now, and replace the region - * with a null object. [This prevents - * splitting the source map to match - * the form of the destination map.] - * Note that we can only do so if the - * source and destination do not overlap. + * Since the destination sharing entries will + * be merely deallocated, we can do that now, + * and replace the region with a null object. + * [This prevents splitting the source map to + * match the form of the destination map.] + * Note that we can only do so if the source + * and destination do not overlap. */ new_dst_end = new_dst_start + new_size; @@ -2089,29 +2043,28 @@ vm_map_copy(dst_map, src_map, if (new_dst_map != new_src_map) { vm_map_lock(new_dst_map); (void) vm_map_delete(new_dst_map, - new_dst_start, - new_dst_end); + new_dst_start, + new_dst_end); (void) vm_map_insert(new_dst_map, - NULL, - (vm_offset_t) 0, - new_dst_start, - new_dst_end); + NULL, + (vm_offset_t) 0, + new_dst_start, + new_dst_end); vm_map_unlock(new_dst_map); } - } - else { - new_dst_map = dst_map; + } else { + new_dst_map = dst_map; new_dst_start = dst_entry->start; lock_set_recursive(&dst_map->lock); } /* - * Recursively copy the sharing map. + * Recursively copy the sharing map. */ (void) vm_map_copy(new_dst_map, new_src_map, - new_dst_start, new_size, new_src_start, - FALSE, FALSE); + new_dst_start, new_size, new_src_start, + FALSE, FALSE); if (dst_map == new_dst_map) lock_clear_recursive(&dst_map->lock); @@ -2120,7 +2073,7 @@ vm_map_copy(dst_map, src_map, } /* - * Update variables for next pass through the loop. + * Update variables for next pass through the loop. */ src_start = src_entry->end; @@ -2129,29 +2082,28 @@ vm_map_copy(dst_map, src_map, dst_entry = dst_entry->next; /* - * If the source is to be destroyed, here is the - * place to do it. + * If the source is to be destroyed, here is the place to do + * it. */ if (src_destroy && src_map->is_main_map && - dst_map->is_main_map) + dst_map->is_main_map) vm_map_entry_delete(src_map, src_entry->prev); } /* - * Update the physical maps as appropriate + * Update the physical maps as appropriate */ if (src_map->is_main_map && dst_map->is_main_map) { if (src_destroy) pmap_remove(src_map->pmap, src_addr, src_addr + len); } - /* - * Unlock the maps + * Unlock the maps */ - Return: ; +Return:; if (old_src_destroy) vm_map_delete(src_map, src_addr, src_addr + len); @@ -2160,7 +2112,7 @@ vm_map_copy(dst_map, src_map, if (src_map != dst_map) vm_map_unlock(dst_map); - return(result); + return (result); } /* @@ -2177,20 +2129,20 @@ vmspace_fork(vm1) register struct vmspace *vm1; { register struct vmspace *vm2; - vm_map_t old_map = &vm1->vm_map; - vm_map_t new_map; - vm_map_entry_t old_entry; - vm_map_entry_t new_entry; - pmap_t new_pmap; + vm_map_t old_map = &vm1->vm_map; + vm_map_t new_map; + vm_map_entry_t old_entry; + vm_map_entry_t new_entry; + pmap_t new_pmap; vm_map_lock(old_map); vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset, old_map->entries_pageable); bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy, - (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); - new_pmap = &vm2->vm_pmap; /* XXX */ - new_map = &vm2->vm_map; /* XXX */ + (caddr_t) (vm1 + 1) - (caddr_t) & vm1->vm_startcopy); + new_pmap = &vm2->vm_pmap; /* XXX */ + new_map = &vm2->vm_map; /* XXX */ old_entry = old_map->header.next; @@ -2204,54 +2156,52 @@ vmspace_fork(vm1) case VM_INHERIT_SHARE: /* - * If we don't already have a sharing map: + * If we don't already have a sharing map: */ if (!old_entry->is_a_map) { - vm_map_t new_share_map; - vm_map_entry_t new_share_entry; - + vm_map_t new_share_map; + vm_map_entry_t new_share_entry; + /* - * Create a new sharing map + * Create a new sharing map */ - + new_share_map = vm_map_create(NULL, - old_entry->start, - old_entry->end, - TRUE); + old_entry->start, + old_entry->end, + TRUE); new_share_map->is_main_map = FALSE; /* - * Create the only sharing entry from the - * old task map entry. + * Create the only sharing entry from the old + * task map entry. */ new_share_entry = - vm_map_entry_create(new_share_map); + vm_map_entry_create(new_share_map); *new_share_entry = *old_entry; new_share_entry->wired_count = 0; /* - * Insert the entry into the new sharing - * map + * Insert the entry into the new sharing map */ vm_map_entry_link(new_share_map, - new_share_map->header.prev, - new_share_entry); + new_share_map->header.prev, + new_share_entry); /* - * Fix up the task map entry to refer - * to the sharing map now. + * Fix up the task map entry to refer to the + * sharing map now. */ old_entry->is_a_map = TRUE; old_entry->object.share_map = new_share_map; old_entry->offset = old_entry->start; } - /* - * Clone the entry, referencing the sharing map. + * Clone the entry, referencing the sharing map. */ new_entry = vm_map_entry_create(new_map); @@ -2260,27 +2210,26 @@ vmspace_fork(vm1) vm_map_reference(new_entry->object.share_map); /* - * Insert the entry into the new map -- we - * know we're inserting at the end of the new - * map. + * Insert the entry into the new map -- we know we're + * inserting at the end of the new map. */ vm_map_entry_link(new_map, new_map->header.prev, - new_entry); + new_entry); /* - * Update the physical map + * Update the physical map */ pmap_copy(new_map->pmap, old_map->pmap, - new_entry->start, - (old_entry->end - old_entry->start), - old_entry->start); + new_entry->start, + (old_entry->end - old_entry->start), + old_entry->start); break; case VM_INHERIT_COPY: /* - * Clone the entry and link into the map. + * Clone the entry and link into the map. */ new_entry = vm_map_entry_create(new_map); @@ -2289,23 +2238,22 @@ vmspace_fork(vm1) new_entry->object.vm_object = NULL; new_entry->is_a_map = FALSE; vm_map_entry_link(new_map, new_map->header.prev, - new_entry); + new_entry); if (old_entry->is_a_map) { - int check; + int check; check = vm_map_copy(new_map, - old_entry->object.share_map, - new_entry->start, - (vm_size_t)(new_entry->end - - new_entry->start), - old_entry->offset, - FALSE, FALSE); + old_entry->object.share_map, + new_entry->start, + (vm_size_t) (new_entry->end - + new_entry->start), + old_entry->offset, + FALSE, FALSE); if (check != KERN_SUCCESS) printf("vm_map_fork: copy in share_map region failed\n"); - } - else { + } else { vm_map_copy_entry(old_map, new_map, old_entry, - new_entry); + new_entry); } break; } @@ -2315,7 +2263,7 @@ vmspace_fork(vm1) new_map->size = old_map->size; vm_map_unlock(old_map); - return(vm2); + return (vm2); } /* @@ -2342,29 +2290,29 @@ vmspace_fork(vm1) */ int vm_map_lookup(var_map, vaddr, fault_type, out_entry, - object, offset, out_prot, wired, single_use) - vm_map_t *var_map; /* IN/OUT */ - register vm_offset_t vaddr; - register vm_prot_t fault_type; - - vm_map_entry_t *out_entry; /* OUT */ - vm_object_t *object; /* OUT */ - vm_offset_t *offset; /* OUT */ - vm_prot_t *out_prot; /* OUT */ - boolean_t *wired; /* OUT */ - boolean_t *single_use; /* OUT */ + object, offset, out_prot, wired, single_use) + vm_map_t *var_map; /* IN/OUT */ + register vm_offset_t vaddr; + register vm_prot_t fault_type; + + vm_map_entry_t *out_entry; /* OUT */ + vm_object_t *object; /* OUT */ + vm_offset_t *offset; /* OUT */ + vm_prot_t *out_prot; /* OUT */ + boolean_t *wired; /* OUT */ + boolean_t *single_use; /* OUT */ { - vm_map_t share_map; - vm_offset_t share_offset; - register vm_map_entry_t entry; - register vm_map_t map = *var_map; - register vm_prot_t prot; - register boolean_t su; + vm_map_t share_map; + vm_offset_t share_offset; + register vm_map_entry_t entry; + register vm_map_t map = *var_map; + register vm_prot_t prot; + register boolean_t su; - RetryLookup: ; +RetryLookup:; /* - * Lookup the faulting address. + * Lookup the faulting address. */ vm_map_lock_read(map); @@ -2376,8 +2324,8 @@ vm_map_lookup(var_map, vaddr, fault_type, out_entry, } /* - * If the map has an interesting hint, try it before calling - * full blown lookup routine. + * If the map has an interesting hint, try it before calling full + * blown lookup routine. */ simple_lock(&map->hint_lock); @@ -2388,11 +2336,11 @@ vm_map_lookup(var_map, vaddr, fault_type, out_entry, if ((entry == &map->header) || (vaddr < entry->start) || (vaddr >= entry->end)) { - vm_map_entry_t tmp_entry; + vm_map_entry_t tmp_entry; /* - * Entry was either not a valid hint, or the vaddr - * was not contained in the entry, so do a full lookup. + * Entry was either not a valid hint, or the vaddr was not + * contained in the entry, so do a full lookup. */ if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) RETURN(KERN_INVALID_ADDRESS); @@ -2400,22 +2348,19 @@ vm_map_lookup(var_map, vaddr, fault_type, out_entry, entry = tmp_entry; *out_entry = entry; } - /* - * Handle submaps. + * Handle submaps. */ if (entry->is_sub_map) { - vm_map_t old_map = map; + vm_map_t old_map = map; *var_map = map = entry->object.sub_map; vm_map_unlock_read(old_map); goto RetryLookup; } - /* - * Check whether this task is allowed to have - * this page. + * Check whether this task is allowed to have this page. */ prot = entry->protection; @@ -2423,8 +2368,8 @@ vm_map_lookup(var_map, vaddr, fault_type, out_entry, RETURN(KERN_PROTECTION_FAILURE); /* - * If this page is not pageable, we have to get - * it for all possible accesses. + * If this page is not pageable, we have to get it for all possible + * accesses. */ *wired = (entry->wired_count != 0); @@ -2432,33 +2377,31 @@ vm_map_lookup(var_map, vaddr, fault_type, out_entry, prot = fault_type = entry->protection; /* - * If we don't already have a VM object, track - * it down. + * If we don't already have a VM object, track it down. */ su = !entry->is_a_map; if (su) { - share_map = map; + share_map = map; share_offset = vaddr; - } - else { - vm_map_entry_t share_entry; + } else { + vm_map_entry_t share_entry; /* - * Compute the sharing map, and offset into it. + * Compute the sharing map, and offset into it. */ share_map = entry->object.share_map; share_offset = (vaddr - entry->start) + entry->offset; /* - * Look for the backing store object and offset + * Look for the backing store object and offset */ vm_map_lock_read(share_map); if (!vm_map_lookup_entry(share_map, share_offset, - &share_entry)) { + &share_entry)) { vm_map_unlock_read(share_map); RETURN(KERN_INVALID_ADDRESS); } @@ -2466,25 +2409,24 @@ vm_map_lookup(var_map, vaddr, fault_type, out_entry, } /* - * If the entry was copy-on-write, we either ... + * If the entry was copy-on-write, we either ... */ if (entry->needs_copy) { - /* - * If we want to write the page, we may as well - * handle that now since we've got the sharing - * map locked. - * - * If we don't need to write the page, we just - * demote the permissions allowed. + /* + * If we want to write the page, we may as well handle that + * now since we've got the sharing map locked. + * + * If we don't need to write the page, we just demote the + * permissions allowed. */ if (fault_type & VM_PROT_WRITE) { /* - * Make a new object, and place it in the - * object chain. Note that no new references - * have appeared -- one just moved from the - * share map to the new object. + * Make a new object, and place it in the object + * chain. Note that no new references have appeared + * -- one just moved from the share map to the new + * object. */ if (lock_read_to_write(&share_map->lock)) { @@ -2492,28 +2434,25 @@ vm_map_lookup(var_map, vaddr, fault_type, out_entry, vm_map_unlock_read(map); goto RetryLookup; } - vm_object_shadow( - &entry->object.vm_object, - &entry->offset, - (vm_size_t) (entry->end - entry->start)); - + &entry->object.vm_object, + &entry->offset, + (vm_size_t) (entry->end - entry->start)); + entry->needs_copy = FALSE; - + lock_write_to_read(&share_map->lock); - } - else { + } else { /* - * We're attempting to read a copy-on-write - * page -- don't allow writes. + * We're attempting to read a copy-on-write page -- + * don't allow writes. */ prot &= (~VM_PROT_WRITE); } } - /* - * Create an object if necessary. + * Create an object if necessary. */ if (entry->object.vm_object == NULL) { @@ -2522,23 +2461,21 @@ vm_map_lookup(var_map, vaddr, fault_type, out_entry, vm_map_unlock_read(map); goto RetryLookup; } - entry->object.vm_object = vm_object_allocate( - (vm_size_t)(entry->end - entry->start)); + (vm_size_t) (entry->end - entry->start)); entry->offset = 0; lock_write_to_read(&share_map->lock); } - /* - * Return the object/offset from this entry. If the entry - * was copy-on-write or empty, it has been fixed up. + * Return the object/offset from this entry. If the entry was + * copy-on-write or empty, it has been fixed up. */ *offset = (share_offset - entry->start) + entry->offset; *object = entry->object.vm_object; /* - * Return whether this is the only map sharing this data. + * Return whether this is the only map sharing this data. */ if (!su) { @@ -2546,12 +2483,11 @@ vm_map_lookup(var_map, vaddr, fault_type, out_entry, su = (share_map->ref_count == 1); simple_unlock(&share_map->ref_lock); } - *out_prot = prot; *single_use = su; - return(KERN_SUCCESS); - + return (KERN_SUCCESS); + #undef RETURN } @@ -2562,19 +2498,20 @@ vm_map_lookup(var_map, vaddr, fault_type, out_entry, * (according to the handle returned by that lookup). */ -void vm_map_lookup_done(map, entry) - register vm_map_t map; - vm_map_entry_t entry; +void +vm_map_lookup_done(map, entry) + register vm_map_t map; + vm_map_entry_t entry; { /* - * If this entry references a map, unlock it first. + * If this entry references a map, unlock it first. */ if (entry->is_a_map) vm_map_unlock_read(entry->object.share_map); /* - * Unlock the main-level map + * Unlock the main-level map */ vm_map_unlock_read(map); @@ -2592,39 +2529,40 @@ void vm_map_lookup_done(map, entry) * at allocation time because the adjacent entry * is often wired down. */ -void vm_map_simplify(map, start) - vm_map_t map; - vm_offset_t start; +void +vm_map_simplify(map, start) + vm_map_t map; + vm_offset_t start; { - vm_map_entry_t this_entry; - vm_map_entry_t prev_entry; + vm_map_entry_t this_entry; + vm_map_entry_t prev_entry; vm_map_lock(map); if ( - (vm_map_lookup_entry(map, start, &this_entry)) && - ((prev_entry = this_entry->prev) != &map->header) && - - (prev_entry->end == start) && - (map->is_main_map) && - - (prev_entry->is_a_map == FALSE) && - (prev_entry->is_sub_map == FALSE) && - - (this_entry->is_a_map == FALSE) && - (this_entry->is_sub_map == FALSE) && - - (prev_entry->inheritance == this_entry->inheritance) && - (prev_entry->protection == this_entry->protection) && - (prev_entry->max_protection == this_entry->max_protection) && - (prev_entry->wired_count == this_entry->wired_count) && - - (prev_entry->copy_on_write == this_entry->copy_on_write) && - (prev_entry->needs_copy == this_entry->needs_copy) && - - (prev_entry->object.vm_object == this_entry->object.vm_object) && - ((prev_entry->offset + (prev_entry->end - prev_entry->start)) - == this_entry->offset) - ) { + (vm_map_lookup_entry(map, start, &this_entry)) && + ((prev_entry = this_entry->prev) != &map->header) && + + (prev_entry->end == start) && + (map->is_main_map) && + + (prev_entry->is_a_map == FALSE) && + (prev_entry->is_sub_map == FALSE) && + + (this_entry->is_a_map == FALSE) && + (this_entry->is_sub_map == FALSE) && + + (prev_entry->inheritance == this_entry->inheritance) && + (prev_entry->protection == this_entry->protection) && + (prev_entry->max_protection == this_entry->max_protection) && + (prev_entry->wired_count == this_entry->wired_count) && + + (prev_entry->copy_on_write == this_entry->copy_on_write) && + (prev_entry->needs_copy == this_entry->needs_copy) && + + (prev_entry->object.vm_object == this_entry->object.vm_object) && + ((prev_entry->offset + (prev_entry->end - prev_entry->start)) + == this_entry->offset) + ) { if (map->first_free == this_entry) map->first_free = prev_entry; @@ -2632,7 +2570,7 @@ void vm_map_simplify(map, start) SAVE_HINT(map, prev_entry); vm_map_entry_unlink(map, this_entry); prev_entry->end = this_entry->end; - vm_object_deallocate(this_entry->object.vm_object); + vm_object_deallocate(this_entry->object.vm_object); vm_map_entry_dispose(map, this_entry); } } @@ -2642,64 +2580,63 @@ void vm_map_simplify(map, start) /* * vm_map_print: [ debug ] */ -void vm_map_print(map, full) - register vm_map_t map; - boolean_t full; +void +vm_map_print(map, full) + register vm_map_t map; + boolean_t full; { - register vm_map_entry_t entry; + register vm_map_entry_t entry; extern int indent; iprintf("%s map 0x%x: pmap=0x%x,ref=%d,nentries=%d,version=%d\n", - (map->is_main_map ? "Task" : "Share"), - (int) map, (int) (map->pmap), map->ref_count, map->nentries, - map->timestamp); + (map->is_main_map ? "Task" : "Share"), + (int) map, (int) (map->pmap), map->ref_count, map->nentries, + map->timestamp); if (!full && indent) return; indent += 2; for (entry = map->header.next; entry != &map->header; - entry = entry->next) { + entry = entry->next) { iprintf("map entry 0x%x: start=0x%x, end=0x%x, ", - (int) entry, (int) entry->start, (int) entry->end); + (int) entry, (int) entry->start, (int) entry->end); if (map->is_main_map) { - static char *inheritance_name[4] = - { "share", "copy", "none", "donate_copy"}; + static char *inheritance_name[4] = + {"share", "copy", "none", "donate_copy"}; + printf("prot=%x/%x/%s, ", - entry->protection, - entry->max_protection, - inheritance_name[entry->inheritance]); + entry->protection, + entry->max_protection, + inheritance_name[entry->inheritance]); if (entry->wired_count != 0) printf("wired, "); } - if (entry->is_a_map || entry->is_sub_map) { - printf("share=0x%x, offset=0x%x\n", - (int) entry->object.share_map, - (int) entry->offset); + printf("share=0x%x, offset=0x%x\n", + (int) entry->object.share_map, + (int) entry->offset); if ((entry->prev == &map->header) || (!entry->prev->is_a_map) || (entry->prev->object.share_map != - entry->object.share_map)) { + entry->object.share_map)) { indent += 2; vm_map_print(entry->object.share_map, full); indent -= 2; } - - } - else { + } else { printf("object=0x%x, offset=0x%x", - (int) entry->object.vm_object, - (int) entry->offset); + (int) entry->object.vm_object, + (int) entry->offset); if (entry->copy_on_write) printf(", copy (%s)", - entry->needs_copy ? "needed" : "done"); + entry->needs_copy ? "needed" : "done"); printf("\n"); if ((entry->prev == &map->header) || (entry->prev->is_a_map) || (entry->prev->object.vm_object != - entry->object.vm_object)) { + entry->object.vm_object)) { indent += 2; vm_object_print(entry->object.vm_object, full); indent -= 2; diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 4f5b64533c4e..c940e8af0d93 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -40,17 +40,17 @@ * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id$ + * $Id: vm_map.h,v 1.3 1994/08/02 07:55:26 davidg Exp $ */ /* @@ -86,9 +86,9 @@ */ union vm_map_object { - struct vm_object *vm_object; /* object object */ - struct vm_map *share_map; /* share map */ - struct vm_map *sub_map; /* belongs to another map */ + struct vm_object *vm_object; /* object object */ + struct vm_map *share_map; /* share map */ + struct vm_map *sub_map; /* belongs to another map */ }; /* @@ -98,22 +98,22 @@ union vm_map_object { * Also included is control information for virtual copy operations. */ struct vm_map_entry { - struct vm_map_entry *prev; /* previous entry */ - struct vm_map_entry *next; /* next entry */ - vm_offset_t start; /* start address */ - vm_offset_t end; /* end address */ - union vm_map_object object; /* object I point to */ - vm_offset_t offset; /* offset into object */ - boolean_t is_a_map:1, /* Is "object" a map? */ - is_sub_map:1, /* Is "object" a submap? */ - /* Only in sharing maps: */ - copy_on_write:1,/* is data copy-on-write */ - needs_copy:1; /* does object need to be copied */ - /* Only in task maps: */ - vm_prot_t protection; /* protection code */ - vm_prot_t max_protection; /* maximum protection */ - vm_inherit_t inheritance; /* inheritance */ - int wired_count; /* can be paged if = 0 */ + struct vm_map_entry *prev; /* previous entry */ + struct vm_map_entry *next; /* next entry */ + vm_offset_t start; /* start address */ + vm_offset_t end; /* end address */ + union vm_map_object object; /* object I point to */ + vm_offset_t offset; /* offset into object */ + boolean_t is_a_map:1, /* Is "object" a map? */ + is_sub_map:1, /* Is "object" a submap? */ + /* Only in sharing maps: */ + copy_on_write:1, /* is data copy-on-write */ + needs_copy:1; /* does object need to be copied */ + /* Only in task maps: */ + vm_prot_t protection; /* protection code */ + vm_prot_t max_protection; /* maximum protection */ + vm_inherit_t inheritance; /* inheritance */ + int wired_count; /* can be paged if = 0 */ }; /* @@ -123,19 +123,19 @@ struct vm_map_entry { * insertion, or removal. */ struct vm_map { - struct pmap * pmap; /* Physical map */ - lock_data_t lock; /* Lock for map data */ - struct vm_map_entry header; /* List of entries */ - int nentries; /* Number of entries */ - vm_size_t size; /* virtual size */ - boolean_t is_main_map; /* Am I a main map? */ - int ref_count; /* Reference count */ - simple_lock_data_t ref_lock; /* Lock for ref_count field */ - vm_map_entry_t hint; /* hint for quick lookups */ - simple_lock_data_t hint_lock; /* lock for hint storage */ - vm_map_entry_t first_free; /* First free space hint */ - boolean_t entries_pageable; /* map entries pageable?? */ - unsigned int timestamp; /* Version number */ + struct pmap *pmap; /* Physical map */ + lock_data_t lock; /* Lock for map data */ + struct vm_map_entry header; /* List of entries */ + int nentries; /* Number of entries */ + vm_size_t size; /* virtual size */ + boolean_t is_main_map; /* Am I a main map? */ + int ref_count; /* Reference count */ + simple_lock_data_t ref_lock; /* Lock for ref_count field */ + vm_map_entry_t hint; /* hint for quick lookups */ + simple_lock_data_t hint_lock; /* lock for hint storage */ + vm_map_entry_t first_free; /* First free space hint */ + boolean_t entries_pageable; /* map entries pageable?? */ + unsigned int timestamp; /* Version number */ #define min_offset header.start #define max_offset header.end }; @@ -150,9 +150,9 @@ struct vm_map { * does not include a reference for the imbedded share_map.] */ typedef struct { - int main_timestamp; - vm_map_t share_map; - int share_timestamp; + int main_timestamp; + vm_map_t share_map; + int share_timestamp; } vm_map_version_t; /* @@ -181,50 +181,36 @@ typedef struct { #define MAX_KMAPENT 128 #ifdef KERNEL -boolean_t vm_map_check_protection __P((vm_map_t, - vm_offset_t, vm_offset_t, vm_prot_t)); -int vm_map_copy __P((vm_map_t, vm_map_t, vm_offset_t, - vm_size_t, vm_offset_t, boolean_t, boolean_t)); -void vm_map_copy_entry __P((vm_map_t, - vm_map_t, vm_map_entry_t, vm_map_entry_t)); +boolean_t vm_map_check_protection __P((vm_map_t, vm_offset_t, vm_offset_t, vm_prot_t)); +int vm_map_copy __P((vm_map_t, vm_map_t, vm_offset_t, vm_size_t, vm_offset_t, boolean_t, boolean_t)); +void vm_map_copy_entry __P((vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t)); struct pmap; -vm_map_t vm_map_create __P((struct pmap *, - vm_offset_t, vm_offset_t, boolean_t)); -void vm_map_deallocate __P((vm_map_t)); -int vm_map_delete __P((vm_map_t, vm_offset_t, vm_offset_t)); -vm_map_entry_t vm_map_entry_create __P((vm_map_t)); -void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t)); -void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t)); -void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t)); -int vm_map_find __P((vm_map_t, vm_object_t, - vm_offset_t, vm_offset_t *, vm_size_t, boolean_t)); -int vm_map_findspace __P((vm_map_t, - vm_offset_t, vm_size_t, vm_offset_t *)); -int vm_map_inherit __P((vm_map_t, - vm_offset_t, vm_offset_t, vm_inherit_t)); -void vm_map_init __P((struct vm_map *, - vm_offset_t, vm_offset_t, boolean_t)); -int vm_map_insert __P((vm_map_t, - vm_object_t, vm_offset_t, vm_offset_t, vm_offset_t)); -int vm_map_lookup __P((vm_map_t *, vm_offset_t, vm_prot_t, - vm_map_entry_t *, vm_object_t *, vm_offset_t *, vm_prot_t *, - boolean_t *, boolean_t *)); -void vm_map_lookup_done __P((vm_map_t, vm_map_entry_t)); -boolean_t vm_map_lookup_entry __P((vm_map_t, - vm_offset_t, vm_map_entry_t *)); -int vm_map_pageable __P((vm_map_t, - vm_offset_t, vm_offset_t, boolean_t)); -int vm_map_clean __P((vm_map_t, - vm_offset_t, vm_offset_t, boolean_t, boolean_t)); -void vm_map_print __P((vm_map_t, boolean_t)); -int vm_map_protect __P((vm_map_t, - vm_offset_t, vm_offset_t, vm_prot_t, boolean_t)); -void vm_map_reference __P((vm_map_t)); -int vm_map_remove __P((vm_map_t, vm_offset_t, vm_offset_t)); -void vm_map_simplify __P((vm_map_t, vm_offset_t)); -void vm_map_simplify_entry __P((vm_map_t, vm_map_entry_t)); -void vm_map_startup __P((void)); -int vm_map_submap __P((vm_map_t, - vm_offset_t, vm_offset_t, vm_map_t)); +vm_map_t vm_map_create __P((struct pmap *, vm_offset_t, vm_offset_t, boolean_t)); +void vm_map_deallocate __P((vm_map_t)); +int vm_map_delete __P((vm_map_t, vm_offset_t, vm_offset_t)); +vm_map_entry_t vm_map_entry_create __P((vm_map_t)); +void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t)); +void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t)); +void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t)); +int vm_map_find __P((vm_map_t, vm_object_t, vm_offset_t, vm_offset_t *, vm_size_t, boolean_t)); +int vm_map_findspace __P((vm_map_t, vm_offset_t, vm_size_t, vm_offset_t *)); +int vm_map_inherit __P((vm_map_t, vm_offset_t, vm_offset_t, vm_inherit_t)); +void vm_map_init __P((struct vm_map *, vm_offset_t, vm_offset_t, boolean_t)); +int vm_map_insert __P((vm_map_t, vm_object_t, vm_offset_t, vm_offset_t, vm_offset_t)); +int vm_map_lookup __P((vm_map_t *, vm_offset_t, vm_prot_t, vm_map_entry_t *, vm_object_t *, + vm_offset_t *, vm_prot_t *, boolean_t *, boolean_t *)); +void vm_map_lookup_done __P((vm_map_t, vm_map_entry_t)); +boolean_t vm_map_lookup_entry __P((vm_map_t, vm_offset_t, vm_map_entry_t *)); +int vm_map_pageable __P((vm_map_t, vm_offset_t, vm_offset_t, boolean_t)); +int vm_map_clean __P((vm_map_t, vm_offset_t, vm_offset_t, boolean_t, boolean_t)); +void vm_map_print __P((vm_map_t, boolean_t)); +int vm_map_protect __P((vm_map_t, vm_offset_t, vm_offset_t, vm_prot_t, boolean_t)); +void vm_map_reference __P((vm_map_t)); +int vm_map_remove __P((vm_map_t, vm_offset_t, vm_offset_t)); +void vm_map_simplify __P((vm_map_t, vm_offset_t)); +void vm_map_simplify_entry __P((vm_map_t, vm_map_entry_t)); +void vm_map_startup __P((void)); +int vm_map_submap __P((vm_map_t, vm_offset_t, vm_offset_t, vm_map_t)); + #endif -#endif /* _VM_MAP_ */ +#endif /* _VM_MAP_ */ diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index e1f0072650e2..9cdd5b9e6b55 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)vm_meter.c 8.4 (Berkeley) 1/4/94 - * $Id: vm_meter.c,v 1.3 1994/08/02 07:55:27 davidg Exp $ + * $Id: vm_meter.c,v 1.4 1994/09/12 11:38:31 davidg Exp $ */ #include <sys/param.h> @@ -41,9 +41,9 @@ #include <vm/vm.h> #include <sys/sysctl.h> -struct loadavg averunnable; /* load average, of runnable procs */ +struct loadavg averunnable; /* load average, of runnable procs */ -int maxslp = MAXSLP; +int maxslp = MAXSLP; void vmmeter() @@ -51,15 +51,15 @@ vmmeter() if (time.tv_sec % 5 == 0) loadav(&averunnable); - if (proc0.p_slptime > maxslp/2) - wakeup((caddr_t)&proc0); + if (proc0.p_slptime > maxslp / 2) + wakeup((caddr_t) & proc0); } /* * Constants for averages over 1, 5, and 15 minutes * when sampling at 5 second intervals. */ -fixpt_t cexp[3] = { +fixpt_t cexp[3] = { 0.9200444146293232 * FSCALE, /* exp(-1/12) */ 0.9834714538216174 * FSCALE, /* exp(-1/60) */ 0.9944598480048967 * FSCALE, /* exp(-1/180) */ @@ -76,7 +76,7 @@ loadav(avg) register int i, nrun; register struct proc *p; - for (nrun = 0, p = (struct proc *)allproc; p != NULL; p = p->p_next) { + for (nrun = 0, p = (struct proc *) allproc; p != NULL; p = p->p_next) { switch (p->p_stat) { case SSLEEP: if (p->p_priority > PZERO || p->p_slptime != 0) @@ -89,7 +89,7 @@ loadav(avg) } for (i = 0; i < 3; i++) avg->ldavg[i] = (cexp[i] * avg->ldavg[i] + - nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT; + nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT; } /* @@ -109,17 +109,32 @@ vm_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) /* all sysctl names at this level are terminal */ if (namelen != 1) - return (ENOTDIR); /* overloaded */ + return (ENOTDIR); /* overloaded */ switch (name[0]) { case VM_LOADAVG: averunnable.fscale = FSCALE; return (sysctl_rdstruct(oldp, oldlenp, newp, &averunnable, - sizeof(averunnable))); + sizeof(averunnable))); case VM_METER: vmtotal(&vmtotals); return (sysctl_rdstruct(oldp, oldlenp, newp, &vmtotals, - sizeof(vmtotals))); + sizeof(vmtotals))); + case VM_V_FREE_MIN: + return (sysctl_int(oldp, oldlenp, newp, newlen, &cnt.v_free_min)); + case VM_V_FREE_TARGET: + return (sysctl_int(oldp, oldlenp, newp, newlen, &cnt.v_free_target)); + case VM_V_FREE_RESERVED: + return (sysctl_int(oldp, oldlenp, newp, newlen, &cnt.v_free_reserved)); + case VM_V_INACTIVE_TARGET: + return (sysctl_int(oldp, oldlenp, newp, newlen, &cnt.v_inactive_target)); + case VM_V_CACHE_MIN: + return (sysctl_int(oldp, oldlenp, newp, newlen, &cnt.v_cache_min)); + case VM_V_CACHE_MAX: + return (sysctl_int(oldp, oldlenp, newp, newlen, &cnt.v_cache_max)); + case VM_V_PAGEOUT_FREE_MIN: + return (sysctl_int(oldp, oldlenp, newp, newlen, &cnt.v_pageout_free_min)); + default: return (EOPNOTSUPP); } @@ -135,7 +150,7 @@ vmtotal(totalp) register struct vmtotal *totalp; { register struct proc *p; - register vm_map_entry_t entry; + register vm_map_entry_t entry; register vm_object_t object; register vm_map_t map; int paging; @@ -146,14 +161,14 @@ vmtotal(totalp) */ simple_lock(&vm_object_list_lock); for (object = vm_object_list.tqh_first; - object != NULL; - object = object->object_list.tqe_next) + object != NULL; + object = object->object_list.tqe_next) object->flags &= ~OBJ_ACTIVE; simple_unlock(&vm_object_list_lock); /* * Calculate process statistics. */ - for (p = (struct proc *)allproc; p != NULL; p = p->p_next) { + for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { if (p->p_flag & P_SYSTEM) continue; switch (p->p_stat) { @@ -188,7 +203,7 @@ vmtotal(totalp) */ paging = 0; for (map = &p->p_vmspace->vm_map, entry = map->header.next; - entry != &map->header; entry = entry->next) { + entry != &map->header; entry = entry->next) { if (entry->is_a_map || entry->is_sub_map || entry->object.vm_object == NULL) continue; @@ -203,8 +218,8 @@ vmtotal(totalp) */ simple_lock(&vm_object_list_lock); for (object = vm_object_list.tqh_first; - object != NULL; - object = object->object_list.tqe_next) { + object != NULL; + object = object->object_list.tqe_next) { totalp->t_vm += num_pages(object->size); totalp->t_rm += object->resident_page_count; if (object->flags & OBJ_ACTIVE) { @@ -221,5 +236,5 @@ vmtotal(totalp) } } } - totalp->t_free = cnt.v_free_count; + totalp->t_free = cnt.v_free_count + cnt.v_cache_count; } diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 5afece835033..4b3027c6dee6 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -38,7 +38,7 @@ * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ * * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 - * $Id: vm_mmap.c,v 1.6 1994/09/02 15:06:51 davidg Exp $ + * $Id: vm_mmap.c,v 1.7 1994/10/09 01:52:11 phk Exp $ */ /* @@ -63,6 +63,7 @@ #ifdef DEBUG int mmapdebug = 0; + #define MDB_FOLLOW 0x01 #define MDB_SYNC 0x02 #define MDB_MAPIT 0x04 @@ -71,8 +72,9 @@ int mmapdebug = 0; void pmap_object_init_pt(); struct sbrk_args { - int incr; + int incr; }; + /* ARGSUSED */ int sbrk(p, uap, retval) @@ -86,8 +88,9 @@ sbrk(p, uap, retval) } struct sstk_args { - int incr; + int incr; }; + /* ARGSUSED */ int sstk(p, uap, retval) @@ -102,8 +105,9 @@ sstk(p, uap, retval) #if defined(COMPAT_43) || defined(COMPAT_SUNOS) struct getpagesize_args { - int dummy; + int dummy; }; + /* ARGSUSED */ int ogetpagesize(p, uap, retval) @@ -115,16 +119,16 @@ ogetpagesize(p, uap, retval) *retval = PAGE_SIZE; return (0); } -#endif /* COMPAT_43 || COMPAT_SUNOS */ +#endif /* COMPAT_43 || COMPAT_SUNOS */ struct mmap_args { - caddr_t addr; - size_t len; - int prot; - int flags; - int fd; - long pad; - off_t pos; + caddr_t addr; + size_t len; + int prot; + int flags; + int fd; + long pad; + off_t pos; }; int @@ -147,21 +151,21 @@ mmap(p, uap, retval) #ifdef DEBUG if (mmapdebug & MDB_FOLLOW) printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n", - p->p_pid, uap->addr, uap->len, prot, - flags, uap->fd, (vm_offset_t)uap->pos); + p->p_pid, uap->addr, uap->len, prot, + flags, uap->fd, (vm_offset_t) uap->pos); #endif /* - * Address (if FIXED) must be page aligned. - * Size is implicitly rounded to a page boundary. + * Address (if FIXED) must be page aligned. Size is implicitly rounded + * to a page boundary. */ addr = (vm_offset_t) uap->addr; if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) || - (ssize_t)uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1)) + (ssize_t) uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1)) return (EINVAL); size = (vm_size_t) round_page(uap->len); /* - * Check for illegal addresses. Watch out for address wrap... - * Note that VM_*_ADDRESS are not constants due to casts (argh). + * Check for illegal addresses. Watch out for address wrap... Note + * that VM_*_ADDRESS are not constants due to casts (argh). */ if (flags & MAP_FIXED) { if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) @@ -174,11 +178,10 @@ mmap(p, uap, retval) return (EINVAL); } /* - * XXX if no hint provided for a non-fixed mapping place it after - * the end of the largest possible heap. - * - * There should really be a pmap call to determine a reasonable - * location. + * XXX if no hint provided for a non-fixed mapping place it after the + * end of the largest possible heap. + * + * There should really be a pmap call to determine a reasonable location. */ if (addr == 0 && (flags & MAP_FIXED) == 0) addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ); @@ -190,20 +193,20 @@ mmap(p, uap, retval) maxprot = VM_PROT_ALL; } else { /* - * Mapping file, get fp for validation. - * Obtain vnode and make sure it is of appropriate type. + * Mapping file, get fp for validation. Obtain vnode and make + * sure it is of appropriate type. */ - if (((unsigned)uap->fd) >= fdp->fd_nfiles || + if (((unsigned) uap->fd) >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); if (fp->f_type != DTYPE_VNODE) return (EINVAL); - vp = (struct vnode *)fp->f_data; + vp = (struct vnode *) fp->f_data; if (vp->v_type != VREG && vp->v_type != VCHR) return (EINVAL); /* - * XXX hack to handle use of /dev/zero to map anon - * memory (ala SunOS). + * XXX hack to handle use of /dev/zero to map anon memory (ala + * SunOS). */ if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { handle = NULL; @@ -216,8 +219,8 @@ mmap(p, uap, retval) * writability if mapping is shared; in this case, * current and max prot are dictated by the open file. * XXX use the vnode instead? Problem is: what - * credentials do we use for determination? - * What if proc does a setuid? + * credentials do we use for determination? What if + * proc does a setuid? */ maxprot = VM_PROT_EXECUTE; /* ??? */ if (fp->f_flag & FREAD) @@ -231,24 +234,24 @@ mmap(p, uap, retval) return (EACCES); } else maxprot |= VM_PROT_WRITE; - handle = (caddr_t)vp; + handle = (caddr_t) vp; } } error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, - flags, handle, (vm_offset_t)uap->pos); + flags, handle, (vm_offset_t) uap->pos); if (error == 0) - *retval = (int)addr; + *retval = (int) addr; return (error); } #ifdef COMPAT_43 struct ommap_args { - caddr_t addr; - int len; - int prot; - int flags; - int fd; - long pos; + caddr_t addr; + int len; + int prot; + int flags; + int fd; + long pos; }; int ommap(p, uap, retval) @@ -261,12 +264,13 @@ ommap(p, uap, retval) 0, PROT_EXEC, PROT_WRITE, - PROT_EXEC|PROT_WRITE, + PROT_EXEC | PROT_WRITE, PROT_READ, - PROT_EXEC|PROT_READ, - PROT_WRITE|PROT_READ, - PROT_EXEC|PROT_WRITE|PROT_READ, + PROT_EXEC | PROT_READ, + PROT_WRITE | PROT_READ, + PROT_EXEC | PROT_WRITE | PROT_READ, }; + #define OMAP_ANON 0x0002 #define OMAP_COPY 0x0020 #define OMAP_SHARED 0x0010 @@ -275,7 +279,7 @@ ommap(p, uap, retval) nargs.addr = uap->addr; nargs.len = uap->len; - nargs.prot = cvtbsdprot[uap->prot&0x7]; + nargs.prot = cvtbsdprot[uap->prot & 0x7]; nargs.flags = 0; if (uap->flags & OMAP_ANON) nargs.flags |= MAP_ANON; @@ -293,12 +297,12 @@ ommap(p, uap, retval) nargs.pos = uap->pos; return (mmap(p, &nargs, retval)); } -#endif /* COMPAT_43 */ +#endif /* COMPAT_43 */ struct msync_args { - caddr_t addr; - int len; + caddr_t addr; + int len; }; int msync(p, uap, retval) @@ -313,22 +317,21 @@ msync(p, uap, retval) boolean_t syncio, invalidate; #ifdef DEBUG - if (mmapdebug & (MDB_FOLLOW|MDB_SYNC)) + if (mmapdebug & (MDB_FOLLOW | MDB_SYNC)) printf("msync(%d): addr %x len %x\n", - p->p_pid, uap->addr, uap->len); + p->p_pid, uap->addr, uap->len); #endif - if (((int)uap->addr & PAGE_MASK) || uap->addr + uap->len < uap->addr) + if (((int) uap->addr & PAGE_MASK) || uap->addr + uap->len < uap->addr) return (EINVAL); map = &p->p_vmspace->vm_map; - addr = (vm_offset_t)uap->addr; - size = (vm_size_t)uap->len; + addr = (vm_offset_t) uap->addr; + size = (vm_size_t) uap->len; /* * XXX Gak! If size is zero we are supposed to sync "all modified - * pages with the region containing addr". Unfortunately, we - * don't really keep track of individual mmaps so we approximate - * by flushing the range of the map entry containing addr. - * This can be incorrect if the region splits or is coalesced - * with a neighbor. + * pages with the region containing addr". Unfortunately, we don't + * really keep track of individual mmaps so we approximate by flushing + * the range of the map entry containing addr. This can be incorrect + * if the region splits or is coalesced with a neighbor. */ if (size == 0) { vm_map_entry_t entry; @@ -344,23 +347,23 @@ msync(p, uap, retval) #ifdef DEBUG if (mmapdebug & MDB_SYNC) printf("msync: cleaning/flushing address range [%x-%x)\n", - addr, addr+size); + addr, addr + size); #endif /* - * Could pass this in as a third flag argument to implement - * Sun's MS_ASYNC. + * Could pass this in as a third flag argument to implement Sun's + * MS_ASYNC. */ syncio = TRUE; /* - * XXX bummer, gotta flush all cached pages to ensure - * consistency with the file system cache. Otherwise, we could - * pass this in to implement Sun's MS_INVALIDATE. + * XXX bummer, gotta flush all cached pages to ensure consistency with + * the file system cache. Otherwise, we could pass this in to + * implement Sun's MS_INVALIDATE. */ invalidate = TRUE; /* * Clean the pages and interpret the return value. */ - rv = vm_map_clean(map, addr, addr+size, syncio, invalidate); + rv = vm_map_clean(map, addr, addr + size, syncio, invalidate); switch (rv) { case KERN_SUCCESS: break; @@ -375,8 +378,8 @@ msync(p, uap, retval) } struct munmap_args { - caddr_t addr; - int len; + caddr_t addr; + int len; }; int munmap(p, uap, retval) @@ -391,18 +394,18 @@ munmap(p, uap, retval) #ifdef DEBUG if (mmapdebug & MDB_FOLLOW) printf("munmap(%d): addr %x len %x\n", - p->p_pid, uap->addr, uap->len); + p->p_pid, uap->addr, uap->len); #endif addr = (vm_offset_t) uap->addr; if ((addr & PAGE_MASK) || uap->len < 0) - return(EINVAL); + return (EINVAL); size = (vm_size_t) round_page(uap->len); if (size == 0) - return(0); + return (0); /* - * Check for illegal addresses. Watch out for address wrap... - * Note that VM_*_ADDRESS are not constants due to casts (argh). + * Check for illegal addresses. Watch out for address wrap... Note + * that VM_*_ADDRESS are not constants due to casts (argh). */ if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) return (EINVAL); @@ -417,10 +420,10 @@ munmap(p, uap, retval) * Make sure entire range is allocated. */ if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) - return(EINVAL); + return (EINVAL); /* returns nothing but KERN_SUCCESS anyway */ - (void) vm_map_remove(map, addr, addr+size); - return(0); + (void) vm_map_remove(map, addr, addr + size); + return (0); } void @@ -440,9 +443,9 @@ munmapfd(p, fd) } struct mprotect_args { - caddr_t addr; - int len; - int prot; + caddr_t addr; + int len; + int prot; }; int mprotect(p, uap, retval) @@ -457,17 +460,17 @@ mprotect(p, uap, retval) #ifdef DEBUG if (mmapdebug & MDB_FOLLOW) printf("mprotect(%d): addr %x len %x prot %d\n", - p->p_pid, uap->addr, uap->len, uap->prot); + p->p_pid, uap->addr, uap->len, uap->prot); #endif - addr = (vm_offset_t)uap->addr; + addr = (vm_offset_t) uap->addr; if ((addr & PAGE_MASK) || uap->len < 0) - return(EINVAL); - size = (vm_size_t)uap->len; + return (EINVAL); + size = (vm_size_t) uap->len; prot = uap->prot & VM_PROT_ALL; - switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot, - FALSE)) { + switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, + FALSE)) { case KERN_SUCCESS: return (0); case KERN_PROTECTION_FAILURE: @@ -477,10 +480,11 @@ mprotect(p, uap, retval) } struct madvise_args { - caddr_t addr; - int len; - int behav; + caddr_t addr; + int len; + int behav; }; + /* ARGSUSED */ int madvise(p, uap, retval) @@ -494,10 +498,11 @@ madvise(p, uap, retval) } struct mincore_args { - caddr_t addr; - int len; - char *vec; + caddr_t addr; + int len; + char *vec; }; + /* ARGSUSED */ int mincore(p, uap, retval) @@ -511,8 +516,8 @@ mincore(p, uap, retval) } struct mlock_args { - caddr_t addr; - size_t len; + caddr_t addr; + size_t len; }; int mlock(p, uap, retval) @@ -528,12 +533,12 @@ mlock(p, uap, retval) #ifdef DEBUG if (mmapdebug & MDB_FOLLOW) printf("mlock(%d): addr %x len %x\n", - p->p_pid, uap->addr, uap->len); + p->p_pid, uap->addr, uap->len); #endif - addr = (vm_offset_t)uap->addr; + addr = (vm_offset_t) uap->addr; if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr) return (EINVAL); - size = round_page((vm_size_t)uap->len); + size = round_page((vm_size_t) uap->len); if (atop(size) + cnt.v_wire_count > vm_page_max_wired) return (EAGAIN); #ifdef pmap_wired_count @@ -546,13 +551,13 @@ mlock(p, uap, retval) return (error); #endif - error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE); + error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE); return (error == KERN_SUCCESS ? 0 : ENOMEM); } struct munlock_args { - caddr_t addr; - size_t len; + caddr_t addr; + size_t len; }; int munlock(p, uap, retval) @@ -567,9 +572,9 @@ munlock(p, uap, retval) #ifdef DEBUG if (mmapdebug & MDB_FOLLOW) printf("munlock(%d): addr %x len %x\n", - p->p_pid, uap->addr, uap->len); + p->p_pid, uap->addr, uap->len); #endif - addr = (vm_offset_t)uap->addr; + addr = (vm_offset_t) uap->addr; if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr) return (EINVAL); #ifndef pmap_wired_count @@ -577,9 +582,9 @@ munlock(p, uap, retval) if (error) return (error); #endif - size = round_page((vm_size_t)uap->len); + size = round_page((vm_size_t) uap->len); - error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE); + error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE); return (error == KERN_SUCCESS ? 0 : ENOMEM); } @@ -613,21 +618,21 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) *addr = round_page(*addr); } else { fitit = FALSE; - (void)vm_deallocate(map, *addr, size); + (void) vm_deallocate(map, *addr, size); } /* - * Lookup/allocate pager. All except an unnamed anonymous lookup - * gain a reference to ensure continued existance of the object. - * (XXX the exception is to appease the pageout daemon) + * Lookup/allocate pager. All except an unnamed anonymous lookup gain + * a reference to ensure continued existance of the object. (XXX the + * exception is to appease the pageout daemon) */ if (flags & MAP_ANON) type = PG_DFLT; else { - vp = (struct vnode *)handle; + vp = (struct vnode *) handle; if (vp->v_type == VCHR) { type = PG_DEVICE; - handle = (caddr_t)vp->v_rdev; + handle = (caddr_t) vp->v_rdev; } else type = PG_VNODE; } @@ -638,6 +643,9 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) * Find object and release extra reference gained by lookup */ object = vm_object_lookup(pager); + if (handle && object == NULL) { + panic("vm_mmap: vm_object_lookup failed"); + } vm_object_deallocate(object); /* @@ -645,7 +653,7 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) */ if (flags & MAP_ANON) { rv = vm_allocate_with_pager(map, addr, size, fitit, - pager, foff, TRUE); + pager, foff, TRUE); if (rv != KERN_SUCCESS) { if (handle == NULL) vm_pager_deallocate(pager); @@ -654,34 +662,32 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) goto out; } /* - * Don't cache anonymous objects. - * Loses the reference gained by vm_pager_allocate. - * Note that object will be NULL when handle == NULL, - * this is ok since vm_allocate_with_pager has made - * sure that these objects are uncached. + * Don't cache anonymous objects. Loses the reference gained + * by vm_pager_allocate. Note that object will be NULL when + * handle == NULL, this is ok since vm_allocate_with_pager has + * made sure that these objects are uncached. */ (void) pager_cache(object, FALSE); #ifdef DEBUG if (mmapdebug & MDB_MAPIT) printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n", - curproc->p_pid, *addr, size, pager); + curproc->p_pid, *addr, size, pager); #endif } /* - * Must be a mapped file. - * Distinguish between character special and regular files. + * Must be a mapped file. Distinguish between character special and + * regular files. */ else if (vp->v_type == VCHR) { rv = vm_allocate_with_pager(map, addr, size, fitit, - pager, foff, FALSE); + pager, foff, FALSE); /* - * Uncache the object and lose the reference gained - * by vm_pager_allocate(). If the call to - * vm_allocate_with_pager() was sucessful, then we - * gained an additional reference ensuring the object - * will continue to exist. If the call failed then - * the deallocate call below will terminate the - * object which is fine. + * Uncache the object and lose the reference gained by + * vm_pager_allocate(). If the call to + * vm_allocate_with_pager() was sucessful, then we gained an + * additional reference ensuring the object will continue to + * exist. If the call failed then the deallocate call below + * will terminate the object which is fine. */ (void) pager_cache(object, FALSE); if (rv != KERN_SUCCESS) @@ -694,23 +700,23 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) #ifdef DEBUG if (object == NULL) printf("vm_mmap: no object: vp %x, pager %x\n", - vp, pager); + vp, pager); #endif /* - * Map it directly. - * Allows modifications to go out to the vnode. + * Map it directly. Allows modifications to go out to the + * vnode. */ if (flags & MAP_SHARED) { rv = vm_allocate_with_pager(map, addr, size, - fitit, pager, - foff, FALSE); + fitit, pager, + foff, FALSE); if (rv != KERN_SUCCESS) { vm_object_deallocate(object); goto out; } /* - * Don't cache the object. This is the easiest way - * of ensuring that data gets back to the filesystem + * Don't cache the object. This is the easiest way of + * ensuring that data gets back to the filesystem * because vnode_pager_deallocate() will fsync the * vnode. pager_cache() will lose the extra ref. */ @@ -719,43 +725,42 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) else vm_object_deallocate(object); - if( map->pmap) - pmap_object_init_pt(map->pmap, *addr, object, foff, size); + if (map->pmap) + pmap_object_init_pt(map->pmap, *addr, object, foff, size); } /* - * Copy-on-write of file. Two flavors. - * MAP_COPY is true COW, you essentially get a snapshot of - * the region at the time of mapping. MAP_PRIVATE means only - * that your changes are not reflected back to the object. - * Changes made by others will be seen. + * Copy-on-write of file. Two flavors. MAP_COPY is true COW, + * you essentially get a snapshot of the region at the time of + * mapping. MAP_PRIVATE means only that your changes are not + * reflected back to the object. Changes made by others will + * be seen. */ else { vm_map_t tmap; vm_offset_t off; /* locate and allocate the target address space */ - rv = vm_map_find(map, NULL, (vm_offset_t)0, - addr, size, fitit); + rv = vm_map_find(map, NULL, (vm_offset_t) 0, + addr, size, fitit); if (rv != KERN_SUCCESS) { vm_object_deallocate(object); goto out; } tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS, - VM_MIN_ADDRESS+size, TRUE); + VM_MIN_ADDRESS + size, TRUE); off = VM_MIN_ADDRESS; rv = vm_allocate_with_pager(tmap, &off, size, - TRUE, pager, - foff, FALSE); + TRUE, pager, + foff, FALSE); if (rv != KERN_SUCCESS) { vm_object_deallocate(object); vm_map_deallocate(tmap); goto out; } /* - * (XXX) - * MAP_PRIVATE implies that we see changes made by - * others. To ensure that we need to guarentee that - * no copy object is created (otherwise original + * (XXX) MAP_PRIVATE implies that we see changes made + * by others. To ensure that we need to guarentee + * that no copy object is created (otherwise original * pages would be pushed to the copy object and we * would never see changes made by others). We * totally sleeze it right now by marking the object @@ -764,13 +769,12 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) if ((flags & MAP_COPY) == 0) object->flags |= OBJ_INTERNAL; rv = vm_map_copy(map, tmap, *addr, size, off, - FALSE, FALSE); + FALSE, FALSE); object->flags &= ~OBJ_INTERNAL; /* - * (XXX) - * My oh my, this only gets worse... - * Force creation of a shadow object so that - * vm_map_fork will do the right thing. + * (XXX) My oh my, this only gets worse... Force + * creation of a shadow object so that vm_map_fork + * will do the right thing. */ if ((flags & MAP_COPY) == 0) { vm_map_t tmap; @@ -782,19 +786,18 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) tmap = map; vm_map_lookup(&tmap, *addr, VM_PROT_WRITE, - &tentry, &tobject, &toffset, - &tprot, &twired, &tsu); + &tentry, &tobject, &toffset, + &tprot, &twired, &tsu); vm_map_lookup_done(tmap, tentry); } /* - * (XXX) - * Map copy code cannot detect sharing unless a + * (XXX) Map copy code cannot detect sharing unless a * sharing map is involved. So we cheat and write * protect everything ourselves. */ vm_object_pmap_copy(object, foff, foff + size); - if( map->pmap) - pmap_object_init_pt(map->pmap, *addr, object, foff, size); + if (map->pmap) + pmap_object_init_pt(map->pmap, *addr, object, foff, size); vm_object_deallocate(object); vm_map_deallocate(tmap); if (rv != KERN_SUCCESS) @@ -803,18 +806,18 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) #ifdef DEBUG if (mmapdebug & MDB_MAPIT) printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n", - curproc->p_pid, *addr, size, pager); + curproc->p_pid, *addr, size, pager); #endif } /* - * Correct protection (default is VM_PROT_ALL). - * If maxprot is different than prot, we must set both explicitly. + * Correct protection (default is VM_PROT_ALL). If maxprot is + * different than prot, we must set both explicitly. */ rv = KERN_SUCCESS; if (maxprot != VM_PROT_ALL) - rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE); + rv = vm_map_protect(map, *addr, *addr + size, maxprot, TRUE); if (rv == KERN_SUCCESS && prot != maxprot) - rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE); + rv = vm_map_protect(map, *addr, *addr + size, prot, FALSE); if (rv != KERN_SUCCESS) { (void) vm_deallocate(map, *addr, size); goto out; @@ -823,7 +826,7 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) * Shared memory is also shared with children. */ if (flags & MAP_SHARED) { - rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE); + rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); if (rv != KERN_SUCCESS) { (void) vm_deallocate(map, *addr, size); goto out; diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index f529ad20d0bd..18fcac41e63f 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -40,17 +40,17 @@ * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_object.c,v 1.13 1994/12/23 05:00:19 davidg Exp $ + * $Id: vm_object.c,v 1.14 1995/01/05 04:30:40 davidg Exp $ */ /* @@ -73,11 +73,15 @@ #include <sys/kernel.h> #include <sys/proc.h> /* for curproc, pageproc */ #include <sys/malloc.h> +#include <sys/vnode.h> +#include <sys/mount.h> #include <vm/vm.h> #include <vm/vm_page.h> #include <vm/vm_pageout.h> +#include <vm/vm_pager.h> #include <vm/swap_pager.h> +#include <vm/vnode_pager.h> static void _vm_object_allocate(vm_size_t, vm_object_t); static void vm_object_rcollapse(vm_object_t, vm_object_t); @@ -109,21 +113,22 @@ static void vm_object_rcollapse(vm_object_t, vm_object_t); */ -struct vm_object kernel_object_store; -struct vm_object kmem_object_store; +struct vm_object kernel_object_store; +struct vm_object kmem_object_store; -extern int vm_cache_max; -#define VM_OBJECT_HASH_COUNT 157 +int vm_object_cache_max; + +#define VM_OBJECT_HASH_COUNT 509 struct vm_object_hash_head vm_object_hashtable[VM_OBJECT_HASH_COUNT]; -long object_collapses = 0; -long object_bypasses = 0; +long object_collapses = 0; +long object_bypasses = 0; static void _vm_object_allocate(size, object) - vm_size_t size; - register vm_object_t object; + vm_size_t size; + register vm_object_t object; { bzero(object, sizeof *object); TAILQ_INIT(&object->memq); @@ -137,7 +142,7 @@ _vm_object_allocate(size, object) object->copy = NULL; /* - * Object starts out read-write, with no pager. + * Object starts out read-write, with no pager. */ object->pager = NULL; @@ -160,24 +165,25 @@ _vm_object_allocate(size, object) void vm_object_init(vm_offset_t nothing) { - register int i; + register int i; TAILQ_INIT(&vm_object_cached_list); TAILQ_INIT(&vm_object_list); vm_object_count = 0; simple_lock_init(&vm_cache_lock); simple_lock_init(&vm_object_list_lock); + vm_object_cache_max = (cnt.v_page_count - 500) / 8; for (i = 0; i < VM_OBJECT_HASH_COUNT; i++) TAILQ_INIT(&vm_object_hashtable[i]); kernel_object = &kernel_object_store; _vm_object_allocate(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, - kernel_object); + kernel_object); kmem_object = &kmem_object_store; _vm_object_allocate(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, - kmem_object); + kmem_object); } /* @@ -188,17 +194,17 @@ vm_object_init(vm_offset_t nothing) vm_object_t vm_object_allocate(size) - vm_size_t size; + vm_size_t size; { - register vm_object_t result; + register vm_object_t result; result = (vm_object_t) - malloc((u_long)sizeof *result, M_VMOBJ, M_WAITOK); - + malloc((u_long) sizeof *result, M_VMOBJ, M_WAITOK); + _vm_object_allocate(size, result); - return(result); + return (result); } @@ -209,7 +215,7 @@ vm_object_allocate(size) */ inline void vm_object_reference(object) - register vm_object_t object; + register vm_object_t object; { if (object == NULL) return; @@ -232,78 +238,70 @@ vm_object_reference(object) */ void vm_object_deallocate(object) - vm_object_t object; + vm_object_t object; { - vm_object_t temp; + vm_object_t temp; while (object != NULL) { /* - * The cache holds a reference (uncounted) to - * the object; we must lock it before removing - * the object. + * The cache holds a reference (uncounted) to the object; we + * must lock it before removing the object. */ vm_object_cache_lock(); /* - * Lose the reference + * Lose the reference */ vm_object_lock(object); if (--(object->ref_count) != 0) { - if( object->ref_count == 1) { - if( object->reverse_shadow_head.tqh_first) { + if (object->ref_count == 1) { + if (object->reverse_shadow_head.tqh_first) { ++object->reverse_shadow_head.tqh_first->ref_count; - vm_object_rcollapse(object->reverse_shadow_head.tqh_first, object); + if (vm_object_lock_try(object->reverse_shadow_head.tqh_first)) { + vm_object_rcollapse(object->reverse_shadow_head.tqh_first, object); + vm_object_unlock(object->reverse_shadow_head.tqh_first); + } vm_object_deallocate(object->reverse_shadow_head.tqh_first); } } - vm_object_unlock(object); /* - * If there are still references, then - * we are done. + * If there are still references, then we are done. */ vm_object_cache_unlock(); return; } - /* - * See if this object can persist. If so, enter - * it in the cache, then deactivate all of its - * pages. + * See if this object can persist. If so, enter it in the + * cache, then deactivate all of its pages. */ if (object->flags & OBJ_CANPERSIST) { TAILQ_INSERT_TAIL(&vm_object_cached_list, object, - cached_list); + cached_list); vm_object_cached++; vm_object_cache_unlock(); -/* - * this code segment was removed because it kills performance with - * large -- repetively used binaries. The functionality now resides - * in the pageout daemon - * vm_object_deactivate_pages(object); - */ vm_object_unlock(object); vm_object_cache_trim(); return; } - /* - * Make sure no one can look us up now. + * Make sure no one can look us up now. */ + object->flags |= OBJ_DEAD; vm_object_remove(object->pager); vm_object_cache_unlock(); - + temp = object->shadow; - if( temp) + if (temp) TAILQ_REMOVE(&temp->reverse_shadow_head, object, reverse_shadow_list); vm_object_terminate(object); - /* unlocks and deallocates object */ + /* unlocks and deallocates object */ object = temp; } } @@ -316,15 +314,15 @@ vm_object_deallocate(object) */ void vm_object_terminate(object) - register vm_object_t object; + register vm_object_t object; { - register vm_page_t p; - vm_object_t shadow_object; + register vm_page_t p, next; + vm_object_t shadow_object; int s; + struct vnode *vp = NULL; /* - * Detach the object from its shadow if we are the shadow's - * copy. + * Detach the object from its shadow if we are the shadow's copy. */ if ((shadow_object = object->shadow) != NULL) { vm_object_lock(shadow_object); @@ -336,96 +334,96 @@ vm_object_terminate(object) */ vm_object_unlock(shadow_object); } + if (object->pager && (object->pager->pg_type == PG_VNODE)) { + vn_pager_t vnp = object->pager->pg_data; + vp = vnp->vnp_vp; + VOP_FSYNC(vp, NOCRED, MNT_WAIT, NULL); + vinvalbuf(vp, 0, NOCRED, NULL, 0, 0); + } /* - * Wait until the pageout daemon is through - * with the object. + * Wait until the pageout daemon is through with the object. */ + s = splhigh(); while (object->paging_in_progress) { - vm_object_sleep((int)object, object, FALSE); + vm_object_unlock(object); + tsleep((caddr_t) object, PVM, "objtrm", 0); vm_object_lock(object); } + splx(s); /* - * While the paging system is locked, - * pull the object's pages off the active - * and inactive queues. This keeps the - * pageout daemon from playing with them - * during vm_pager_deallocate. - * - * We can't free the pages yet, because the - * object's pager may have to write them out - * before deallocating the paging space. + * While the paging system is locked, pull the object's pages off the + * active and inactive queues. This keeps the pageout daemon from + * playing with them during vm_pager_deallocate. + * + * We can't free the pages yet, because the object's pager may have to + * write them out before deallocating the paging space. */ - for( p = object->memq.tqh_first; p; p=p->listq.tqe_next) { + for (p = object->memq.tqh_first; p; p = next) { VM_PAGE_CHECK(p); + next = p->listq.tqe_next; vm_page_lock_queues(); - s = splhigh(); - if (p->flags & PG_ACTIVE) { - TAILQ_REMOVE(&vm_page_queue_active, p, pageq); - p->flags &= ~PG_ACTIVE; - cnt.v_active_count--; - } - - if (p->flags & PG_INACTIVE) { - TAILQ_REMOVE(&vm_page_queue_inactive, p, pageq); - p->flags &= ~PG_INACTIVE; - cnt.v_inactive_count--; - } - splx(s); + if (p->flags & PG_CACHE) + vm_page_free(p); + else + vm_page_unqueue(p); vm_page_unlock_queues(); + p = next; } - - vm_object_unlock(object); if (object->paging_in_progress != 0) panic("vm_object_deallocate: pageout in progress"); /* - * Clean and free the pages, as appropriate. - * All references to the object are gone, - * so we don't need to lock it. + * Clean and free the pages, as appropriate. All references to the + * object are gone, so we don't need to lock it. */ if ((object->flags & OBJ_INTERNAL) == 0) { - vm_object_lock(object); (void) vm_object_page_clean(object, 0, 0, TRUE, TRUE); + } + /* + * one last time -- get rid of buffers that might have been created + * for the vm_object_page_clean + */ + if (vp != NULL) { vm_object_unlock(object); + vinvalbuf(vp, 0, NOCRED, NULL, 0, 0); + vm_object_lock(object); } - /* - * Now free the pages. - * For internal objects, this also removes them from paging queues. + * Now free the pages. For internal objects, this also removes them + * from paging queues. */ while ((p = object->memq.tqh_first) != NULL) { VM_PAGE_CHECK(p); vm_page_lock_queues(); + PAGE_WAKEUP(p); vm_page_free(p); cnt.v_pfree++; vm_page_unlock_queues(); } + vm_object_unlock(object); /* - * Let the pager know object is dead. + * Let the pager know object is dead. */ - if (object->pager != NULL) vm_pager_deallocate(object->pager); - simple_lock(&vm_object_list_lock); TAILQ_REMOVE(&vm_object_list, object, object_list); vm_object_count--; simple_unlock(&vm_object_list_lock); /* - * Free the space for the object. + * Free the space for the object. */ - - free((caddr_t)object, M_VMOBJ); + free((caddr_t) object, M_VMOBJ); } /* @@ -441,13 +439,13 @@ vm_object_terminate(object) #if 1 boolean_t vm_object_page_clean(object, start, end, syncio, de_queue) - register vm_object_t object; - register vm_offset_t start; - register vm_offset_t end; - boolean_t syncio; - boolean_t de_queue; + register vm_object_t object; + register vm_offset_t start; + register vm_offset_t end; + boolean_t syncio; + boolean_t de_queue; { - register vm_page_t p, nextp; + register vm_page_t p, nextp; int size; if (object->pager == NULL) @@ -464,29 +462,32 @@ again: * Wait until the pageout daemon is through with the object. */ while (object->paging_in_progress) { - vm_object_sleep((int)object, object, FALSE); + tsleep(object, PVM, "objpcw", 0); } nextp = object->memq.tqh_first; - while ( (p = nextp) && ((start == end) || (size != 0) ) ) { + while ((p = nextp) && ((start == end) || (size != 0))) { nextp = p->listq.tqe_next; if (start == end || (p->offset >= start && p->offset < end)) { - if (p->flags & PG_BUSY) - continue; + if ((p->flags & PG_BUSY) || p->busy) { + int s = splhigh(); + p->flags |= PG_WANTED; + tsleep(p, PVM, "objpcn", 0); + splx(s); + goto again; + } size -= PAGE_SIZE; - if ((p->flags & PG_CLEAN) - && pmap_is_modified(VM_PAGE_TO_PHYS(p))) - p->flags &= ~PG_CLEAN; + vm_page_test_dirty(p); - if ((p->flags & PG_CLEAN) == 0) { - vm_pageout_clean(p,VM_PAGEOUT_FORCE); + if ((p->dirty & p->valid) != 0) { + vm_pageout_clean(p, VM_PAGEOUT_FORCE); goto again; } } } - wakeup((caddr_t)object); + wakeup((caddr_t) object); return 1; } #endif @@ -510,13 +511,13 @@ again: #if 0 boolean_t vm_object_page_clean(object, start, end, syncio, de_queue) - register vm_object_t object; - register vm_offset_t start; - register vm_offset_t end; - boolean_t syncio; - boolean_t de_queue; + register vm_object_t object; + register vm_offset_t start; + register vm_offset_t end; + boolean_t syncio; + boolean_t de_queue; { - register vm_page_t p; + register vm_page_t p; int onqueue; boolean_t noerror = TRUE; @@ -525,8 +526,8 @@ vm_object_page_clean(object, start, end, syncio, de_queue) /* * If it is an internal object and there is no pager, attempt to - * allocate one. Note that vm_object_collapse may relocate one - * from a collapsed object so we must recheck afterward. + * allocate one. Note that vm_object_collapse may relocate one from a + * collapsed object so we must recheck afterward. */ if ((object->flags & OBJ_INTERNAL) && object->pager == NULL) { vm_object_collapse(object); @@ -534,9 +535,9 @@ vm_object_page_clean(object, start, end, syncio, de_queue) vm_pager_t pager; vm_object_unlock(object); - pager = vm_pager_allocate(PG_DFLT, (caddr_t)0, - object->size, VM_PROT_ALL, - (vm_offset_t)0); + pager = vm_pager_allocate(PG_DFLT, (caddr_t) 0, + object->size, VM_PROT_ALL, + (vm_offset_t) 0); if (pager) vm_object_setpager(object, pager, 0, FALSE); vm_object_lock(object); @@ -550,7 +551,7 @@ again: * Wait until the pageout daemon is through with the object. */ while (object->paging_in_progress) { - vm_object_sleep((int)object, object, FALSE); + vm_object_sleep((int) object, object, FALSE); vm_object_lock(object); } /* @@ -560,26 +561,24 @@ again: onqueue = 0; if ((start == end || p->offset >= start && p->offset < end) && !(p->flags & PG_FICTITIOUS)) { - if ((p->flags & PG_CLEAN) && - pmap_is_modified(VM_PAGE_TO_PHYS(p))) - p->flags &= ~PG_CLEAN; + vm_page_test_dirty(p); /* - * Remove the page from any paging queue. - * This needs to be done if either we have been - * explicitly asked to do so or it is about to - * be cleaned (see comment below). + * Remove the page from any paging queue. This needs + * to be done if either we have been explicitly asked + * to do so or it is about to be cleaned (see comment + * below). */ - if (de_queue || !(p->flags & PG_CLEAN)) { + if (de_queue || (p->dirty & p->valid)) { vm_page_lock_queues(); if (p->flags & PG_ACTIVE) { TAILQ_REMOVE(&vm_page_queue_active, - p, pageq); + p, pageq); p->flags &= ~PG_ACTIVE; cnt.v_active_count--; onqueue = 1; } else if (p->flags & PG_INACTIVE) { TAILQ_REMOVE(&vm_page_queue_inactive, - p, pageq); + p, pageq); p->flags &= ~PG_INACTIVE; cnt.v_inactive_count--; onqueue = -1; @@ -589,27 +588,27 @@ again: } /* * To ensure the state of the page doesn't change - * during the clean operation we do two things. - * First we set the busy bit and write-protect all - * mappings to ensure that write accesses to the - * page block (in vm_fault). Second, we remove - * the page from any paging queue to foil the - * pageout daemon (vm_pageout_scan). + * during the clean operation we do two things. First + * we set the busy bit and write-protect all mappings + * to ensure that write accesses to the page block (in + * vm_fault). Second, we remove the page from any + * paging queue to foil the pageout daemon + * (vm_pageout_scan). */ pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_READ); - if (!(p->flags & PG_CLEAN)) { + if (p->dirty & p->valid) { p->flags |= PG_BUSY; object->paging_in_progress++; vm_object_unlock(object); /* - * XXX if put fails we mark the page as - * clean to avoid an infinite loop. - * Will loose changes to the page. + * XXX if put fails we mark the page as clean + * to avoid an infinite loop. Will loose + * changes to the page. */ if (vm_pager_put(object->pager, p, syncio)) { printf("%s: pager_put error\n", - "vm_object_page_clean"); - p->flags |= PG_CLEAN; + "vm_object_page_clean"); + p->dirty = 0; noerror = FALSE; } vm_object_lock(object); @@ -641,9 +640,9 @@ again: */ void vm_object_deactivate_pages(object) - register vm_object_t object; + register vm_object_t object; { - register vm_page_t p, next; + register vm_page_t p, next; for (p = object->memq.tqh_first; p != NULL; p = next) { next = p->listq.tqe_next; @@ -659,10 +658,10 @@ vm_object_deactivate_pages(object) void vm_object_cache_trim() { - register vm_object_t object; + register vm_object_t object; vm_object_cache_lock(); - while (vm_object_cached > vm_cache_max) { + while (vm_object_cached > vm_object_cache_max) { object = vm_object_cached_list.tqh_first; vm_object_cache_unlock(); @@ -686,12 +685,13 @@ vm_object_cache_trim() * * The object must *not* be locked. */ -void vm_object_pmap_copy(object, start, end) - register vm_object_t object; - register vm_offset_t start; - register vm_offset_t end; +void +vm_object_pmap_copy(object, start, end) + register vm_object_t object; + register vm_offset_t start; + register vm_offset_t end; { - register vm_page_t p; + register vm_page_t p; if (object == NULL) return; @@ -716,11 +716,11 @@ void vm_object_pmap_copy(object, start, end) */ void vm_object_pmap_remove(object, start, end) - register vm_object_t object; - register vm_offset_t start; - register vm_offset_t end; + register vm_object_t object; + register vm_offset_t start; + register vm_offset_t end; { - register vm_page_t p; + register vm_page_t p; int s; if (object == NULL) @@ -732,7 +732,7 @@ again: for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { if ((start <= p->offset) && (p->offset < end)) { s = splhigh(); - if (p->flags & PG_BUSY) { + if ((p->flags & PG_BUSY) || p->busy) { p->flags |= PG_WANTED; tsleep((caddr_t) p, PVM, "vmopmr", 0); splx(s); @@ -740,13 +740,11 @@ again: } splx(s); pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); - if ((p->flags & PG_CLEAN) == 0) - p->flags |= PG_LAUNDRY; } } vm_object_unlock(object); --object->paging_in_progress; - if( object->paging_in_progress == 0) + if (object->paging_in_progress == 0) wakeup((caddr_t) object); } @@ -761,58 +759,56 @@ again: * May defer the copy until later if the object is not backed * up by a non-default pager. */ -void vm_object_copy(src_object, src_offset, size, - dst_object, dst_offset, src_needs_copy) - register vm_object_t src_object; - vm_offset_t src_offset; - vm_size_t size; - vm_object_t *dst_object; /* OUT */ - vm_offset_t *dst_offset; /* OUT */ - boolean_t *src_needs_copy; /* OUT */ +void +vm_object_copy(src_object, src_offset, size, + dst_object, dst_offset, src_needs_copy) + register vm_object_t src_object; + vm_offset_t src_offset; + vm_size_t size; + vm_object_t *dst_object;/* OUT */ + vm_offset_t *dst_offset;/* OUT */ + boolean_t *src_needs_copy; /* OUT */ { - register vm_object_t new_copy; - register vm_object_t old_copy; - vm_offset_t new_start, new_end; + register vm_object_t new_copy; + register vm_object_t old_copy; + vm_offset_t new_start, new_end; - register vm_page_t p; + register vm_page_t p; if (src_object == NULL) { /* - * Nothing to copy + * Nothing to copy */ *dst_object = NULL; *dst_offset = 0; *src_needs_copy = FALSE; return; } - - /* - * If the object's pager is null_pager or the - * default pager, we don't have to make a copy - * of it. Instead, we set the needs copy flag and - * make a shadow later. + * If the object's pager is null_pager or the default pager, we don't + * have to make a copy of it. Instead, we set the needs copy flag and + * make a shadow later. */ vm_object_lock(src_object); /* - * Try to collapse the object before copying it. + * Try to collapse the object before copying it. */ vm_object_collapse(src_object); if (src_object->pager == NULL || - src_object->pager->pg_type == PG_SWAP || + src_object->pager->pg_type == PG_SWAP || (src_object->flags & OBJ_INTERNAL)) { /* - * Make another reference to the object + * Make another reference to the object */ src_object->ref_count++; /* - * Mark all of the pages copy-on-write. + * Mark all of the pages copy-on-write. */ for (p = src_object->memq.tqh_first; p; p = p->listq.tqe_next) if (src_offset <= p->offset && @@ -822,45 +818,41 @@ void vm_object_copy(src_object, src_offset, size, *dst_object = src_object; *dst_offset = src_offset; - + /* - * Must make a shadow when write is desired + * Must make a shadow when write is desired */ *src_needs_copy = TRUE; return; } - - /* - * If the object has a pager, the pager wants to - * see all of the changes. We need a copy-object - * for the changed pages. - * - * If there is a copy-object, and it is empty, - * no changes have been made to the object since the - * copy-object was made. We can use the same copy- - * object. + * If the object has a pager, the pager wants to see all of the + * changes. We need a copy-object for the changed pages. + * + * If there is a copy-object, and it is empty, no changes have been made + * to the object since the copy-object was made. We can use the same + * copy- object. */ - Retry1: +Retry1: old_copy = src_object->copy; if (old_copy != NULL) { /* - * Try to get the locks (out of order) + * Try to get the locks (out of order) */ if (!vm_object_lock_try(old_copy)) { vm_object_unlock(src_object); /* should spin a bit here... */ + tsleep((caddr_t) old_copy, PVM, "cpylck", 1); vm_object_lock(src_object); goto Retry1; } - if (old_copy->resident_page_count == 0 && old_copy->pager == NULL) { /* - * Return another reference to - * the existing copy-object. + * Return another reference to the existing + * copy-object. */ old_copy->ref_count++; vm_object_unlock(old_copy); @@ -875,61 +867,58 @@ void vm_object_copy(src_object, src_offset, size, vm_object_unlock(src_object); /* - * If the object has a pager, the pager wants - * to see all of the changes. We must make - * a copy-object and put the changed pages there. - * - * The copy-object is always made large enough to - * completely shadow the original object, since - * it may have several users who want to shadow - * the original object at different points. + * If the object has a pager, the pager wants to see all of the + * changes. We must make a copy-object and put the changed pages + * there. + * + * The copy-object is always made large enough to completely shadow the + * original object, since it may have several users who want to shadow + * the original object at different points. */ new_copy = vm_object_allocate(src_object->size); - Retry2: +Retry2: vm_object_lock(src_object); /* - * Copy object may have changed while we were unlocked + * Copy object may have changed while we were unlocked */ old_copy = src_object->copy; if (old_copy != NULL) { /* - * Try to get the locks (out of order) + * Try to get the locks (out of order) */ if (!vm_object_lock_try(old_copy)) { vm_object_unlock(src_object); + tsleep((caddr_t) old_copy, PVM, "cpylck", 1); goto Retry2; } - /* - * Consistency check + * Consistency check */ if (old_copy->shadow != src_object || old_copy->shadow_offset != (vm_offset_t) 0) panic("vm_object_copy: copy/shadow inconsistency"); /* - * Make the old copy-object shadow the new one. - * It will receive no more pages from the original - * object. + * Make the old copy-object shadow the new one. It will + * receive no more pages from the original object. */ src_object->ref_count--; /* remove ref. from old_copy */ - if( old_copy->shadow) + if (old_copy->shadow) TAILQ_REMOVE(&old_copy->shadow->reverse_shadow_head, old_copy, reverse_shadow_list); old_copy->shadow = new_copy; TAILQ_INSERT_TAIL(&old_copy->shadow->reverse_shadow_head, old_copy, reverse_shadow_list); - new_copy->ref_count++; /* locking not needed - we - have the only pointer */ + new_copy->ref_count++; /* locking not needed - we have the + * only pointer */ vm_object_unlock(old_copy); /* done with old_copy */ } - new_start = (vm_offset_t) 0; /* always shadow original at 0 */ - new_end = (vm_offset_t) new_copy->size; /* for the whole object */ + new_end = (vm_offset_t) new_copy->size; /* for the whole object */ /* - * Point the new copy at the existing object. + * Point the new copy at the existing object. */ new_copy->shadow = src_object; @@ -939,8 +928,7 @@ void vm_object_copy(src_object, src_offset, size, src_object->copy = new_copy; /* - * Mark all the affected pages of the existing object - * copy-on-write. + * Mark all the affected pages of the existing object copy-on-write. */ for (p = src_object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) if ((new_start <= p->offset) && (p->offset < new_end)) @@ -966,42 +954,41 @@ void vm_object_copy(src_object, src_offset, size, void vm_object_shadow(object, offset, length) - vm_object_t *object; /* IN/OUT */ - vm_offset_t *offset; /* IN/OUT */ - vm_size_t length; + vm_object_t *object; /* IN/OUT */ + vm_offset_t *offset; /* IN/OUT */ + vm_size_t length; { - register vm_object_t source; - register vm_object_t result; + register vm_object_t source; + register vm_object_t result; source = *object; /* - * Allocate a new object with the given length + * Allocate a new object with the given length */ if ((result = vm_object_allocate(length)) == NULL) panic("vm_object_shadow: no object for shadowing"); /* - * The new object shadows the source object, adding - * a reference to it. Our caller changes his reference - * to point to the new object, removing a reference to - * the source object. Net result: no change of reference - * count. + * The new object shadows the source object, adding a reference to it. + * Our caller changes his reference to point to the new object, + * removing a reference to the source object. Net result: no change + * of reference count. */ result->shadow = source; if (source) TAILQ_INSERT_TAIL(&result->shadow->reverse_shadow_head, result, reverse_shadow_list); - + /* - * Store the offset into the source object, - * and fix up the offset into the new object. + * Store the offset into the source object, and fix up the offset into + * the new object. */ result->shadow_offset = *offset; /* - * Return the new things + * Return the new things */ *offset = 0; @@ -1014,23 +1001,19 @@ vm_object_shadow(object, offset, length) void vm_object_setpager(object, pager, paging_offset, - read_only) - vm_object_t object; - vm_pager_t pager; - vm_offset_t paging_offset; - boolean_t read_only; + read_only) + vm_object_t object; + vm_pager_t pager; + vm_offset_t paging_offset; + boolean_t read_only; { -#ifdef lint - read_only++; /* No longer used */ -#endif lint - - vm_object_lock(object); /* XXX ? */ + vm_object_lock(object); /* XXX ? */ if (object->pager && object->pager != pager) { panic("!!!pager already allocated!!!\n"); } object->pager = pager; object->paging_offset = paging_offset; - vm_object_unlock(object); /* XXX ? */ + vm_object_unlock(object); /* XXX ? */ } /* @@ -1045,36 +1028,37 @@ vm_object_setpager(object, pager, paging_offset, * specified pager and paging id. */ -vm_object_t vm_object_lookup(pager) - vm_pager_t pager; +vm_object_t +vm_object_lookup(pager) + vm_pager_t pager; { - register vm_object_hash_entry_t entry; - vm_object_t object; + register vm_object_hash_entry_t entry; + vm_object_t object; cnt.v_lookups++; vm_object_cache_lock(); for (entry = vm_object_hashtable[vm_object_hash(pager)].tqh_first; - entry != NULL; - entry = entry->hash_links.tqe_next) { + entry != NULL; + entry = entry->hash_links.tqe_next) { object = entry->object; if (object->pager == pager) { vm_object_lock(object); if (object->ref_count == 0) { TAILQ_REMOVE(&vm_object_cached_list, object, - cached_list); + cached_list); vm_object_cached--; } object->ref_count++; vm_object_unlock(object); vm_object_cache_unlock(); cnt.v_hits++; - return(object); + return (object); } } vm_object_cache_unlock(); - return(NULL); + return (NULL); } /* @@ -1082,16 +1066,17 @@ vm_object_t vm_object_lookup(pager) * the hash table. */ -void vm_object_enter(object, pager) - vm_object_t object; - vm_pager_t pager; +void +vm_object_enter(object, pager) + vm_object_t object; + vm_pager_t pager; { - struct vm_object_hash_head *bucket; - register vm_object_hash_entry_t entry; + struct vm_object_hash_head *bucket; + register vm_object_hash_entry_t entry; /* - * We don't cache null objects, and we can't cache - * objects with the null pager. + * We don't cache null objects, and we can't cache objects with the + * null pager. */ if (object == NULL) @@ -1101,7 +1086,7 @@ void vm_object_enter(object, pager) bucket = &vm_object_hashtable[vm_object_hash(pager)]; entry = (vm_object_hash_entry_t) - malloc((u_long)sizeof *entry, M_VMOBJHASH, M_WAITOK); + malloc((u_long) sizeof *entry, M_VMOBJHASH, M_WAITOK); entry->object = object; object->flags |= OBJ_CANPERSIST; @@ -1120,21 +1105,21 @@ void vm_object_enter(object, pager) */ void vm_object_remove(pager) - register vm_pager_t pager; + register vm_pager_t pager; { - struct vm_object_hash_head *bucket; - register vm_object_hash_entry_t entry; - register vm_object_t object; + struct vm_object_hash_head *bucket; + register vm_object_hash_entry_t entry; + register vm_object_t object; bucket = &vm_object_hashtable[vm_object_hash(pager)]; for (entry = bucket->tqh_first; - entry != NULL; - entry = entry->hash_links.tqe_next) { + entry != NULL; + entry = entry->hash_links.tqe_next) { object = entry->object; if (object->pager == pager) { TAILQ_REMOVE(bucket, entry, hash_links); - free((caddr_t)entry, M_VMOBJHASH); + free((caddr_t) entry, M_VMOBJHASH); break; } } @@ -1146,20 +1131,20 @@ vm_object_rcollapse(object, sobject) { register vm_object_t backing_object; register vm_offset_t backing_offset, new_offset; - register vm_page_t p, pp; - register vm_size_t size; + register vm_page_t p, pp; + register vm_size_t size; int s; - if( !object) + if (!object) return; backing_object = object->shadow; - if( backing_object != sobject) { + if (backing_object != sobject) { printf("backing obj != sobject!!!\n"); return; } - if( !backing_object) + if (!backing_object) return; - if( (backing_object->flags & OBJ_INTERNAL) == 0) + if ((backing_object->flags & OBJ_INTERNAL) == 0) return; if (backing_object->shadow != NULL && backing_object->shadow->copy == backing_object) @@ -1169,8 +1154,8 @@ vm_object_rcollapse(object, sobject) backing_object->ref_count += 2; s = splbio(); - while( backing_object->paging_in_progress) { - tsleep( backing_object, PVM, "rcolow", 0); + while (backing_object->paging_in_progress) { + tsleep(backing_object, PVM, "rcolow", 0); } splx(s); @@ -1179,31 +1164,34 @@ vm_object_rcollapse(object, sobject) p = backing_object->memq.tqh_first; while (p) { vm_page_t next; + next = p->listq.tqe_next; - + pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); new_offset = (p->offset - backing_offset); if (p->offset < backing_offset || new_offset >= size) { + if (backing_object->pager) + swap_pager_freespace(backing_object->pager, + backing_object->paging_offset + p->offset, PAGE_SIZE); vm_page_lock_queues(); - if( backing_object->pager) - swap_pager_freespace(backing_object->pager, backing_object->paging_offset + p->offset, PAGE_SIZE); - pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); vm_page_free(p); vm_page_unlock_queues(); } else { - pp = vm_page_lookup(object, new_offset); - if (pp != NULL || (object->pager && vm_pager_has_page(object->pager, - object->paging_offset + new_offset))) { + pp = vm_page_lookup(object, new_offset); + if (pp != NULL || + (object->pager && + vm_pager_has_page(object->pager, object->paging_offset + new_offset))) { + if (backing_object->pager) + swap_pager_freespace(backing_object->pager, + backing_object->paging_offset + p->offset, PAGE_SIZE); vm_page_lock_queues(); - if( backing_object->pager) - swap_pager_freespace(backing_object->pager, backing_object->paging_offset + p->offset, PAGE_SIZE); - pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); vm_page_free(p); vm_page_unlock_queues(); - } else { - if (!backing_object->pager || !vm_pager_has_page(backing_object->pager, backing_object->paging_offset + p->offset)) + } else { + if (!backing_object->pager || + !vm_pager_has_page(backing_object->pager, backing_object->paging_offset + p->offset)) vm_page_rename(p, object, new_offset); - } + } } p = next; } @@ -1221,13 +1209,13 @@ vm_object_qcollapse(object) { register vm_object_t backing_object; register vm_offset_t backing_offset, new_offset; - register vm_page_t p, pp; - register vm_size_t size; + register vm_page_t p, pp; + register vm_size_t size; backing_object = object->shadow; - if( !backing_object) + if (!backing_object) return; - if( (backing_object->flags & OBJ_INTERNAL) == 0) + if ((backing_object->flags & OBJ_INTERNAL) == 0) return; if (backing_object->shadow != NULL && backing_object->shadow->copy == backing_object) @@ -1242,43 +1230,46 @@ vm_object_qcollapse(object) p = backing_object->memq.tqh_first; while (p) { vm_page_t next; + next = p->listq.tqe_next; - if( (p->flags & (PG_BUSY|PG_FAKE|PG_FICTITIOUS)) || - p->hold_count || p->wire_count) { + if ((p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) || + !p->valid || p->hold_count || p->wire_count || p->busy || p->bmapped) { p = next; continue; } - + pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); new_offset = (p->offset - backing_offset); if (p->offset < backing_offset || new_offset >= size) { + if (backing_object->pager) + swap_pager_freespace(backing_object->pager, + backing_object->paging_offset + p->offset, PAGE_SIZE); vm_page_lock_queues(); - if( backing_object->pager) - swap_pager_freespace(backing_object->pager, backing_object->paging_offset + p->offset, PAGE_SIZE); - pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); vm_page_free(p); vm_page_unlock_queues(); } else { - pp = vm_page_lookup(object, new_offset); - if (pp != NULL || (object->pager && vm_pager_has_page(object->pager, - object->paging_offset + new_offset))) { + pp = vm_page_lookup(object, new_offset); + if (pp != NULL || (object->pager && vm_pager_has_page(object->pager, + object->paging_offset + new_offset))) { + if (backing_object->pager) + swap_pager_freespace(backing_object->pager, + backing_object->paging_offset + p->offset, PAGE_SIZE); vm_page_lock_queues(); - if( backing_object->pager) - swap_pager_freespace(backing_object->pager, backing_object->paging_offset + p->offset, PAGE_SIZE); - pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); vm_page_free(p); vm_page_unlock_queues(); - } else { - if (!backing_object->pager || !vm_pager_has_page(backing_object->pager, backing_object->paging_offset + p->offset)) + } else { + if (!backing_object->pager || + !vm_pager_has_page(backing_object->pager, backing_object->paging_offset + p->offset)) vm_page_rename(p, object, new_offset); - } + } } p = next; } backing_object->ref_count -= 2; } -boolean_t vm_object_collapse_allowed = TRUE; +boolean_t vm_object_collapse_allowed = TRUE; + /* * vm_object_collapse: * @@ -1295,108 +1286,99 @@ boolean_t vm_object_collapse_allowed = TRUE; */ void vm_object_collapse(object) - register vm_object_t object; + register vm_object_t object; { - register vm_object_t backing_object; - register vm_offset_t backing_offset; - register vm_size_t size; - register vm_offset_t new_offset; - register vm_page_t p, pp; + register vm_object_t backing_object; + register vm_offset_t backing_offset; + register vm_size_t size; + register vm_offset_t new_offset; + register vm_page_t p, pp; if (!vm_object_collapse_allowed) return; while (TRUE) { /* - * Verify that the conditions are right for collapse: - * - * The object exists and no pages in it are currently - * being paged out. + * Verify that the conditions are right for collapse: + * + * The object exists and no pages in it are currently being paged + * out. */ if (object == NULL) return; if (object->paging_in_progress != 0) { - if( object->shadow) + if (object->shadow) vm_object_qcollapse(object); return; } - /* - * There is a backing object, and + * There is a backing object, and */ - + if ((backing_object = object->shadow) == NULL) return; - + vm_object_lock(backing_object); /* - * ... - * The backing object is not read_only, - * and no pages in the backing object are - * currently being paged out. - * The backing object is internal. + * ... The backing object is not read_only, and no pages in + * the backing object are currently being paged out. The + * backing object is internal. */ - + if ((backing_object->flags & OBJ_INTERNAL) == 0 || backing_object->paging_in_progress != 0) { vm_object_unlock(backing_object); vm_object_qcollapse(object); return; } - /* - * The backing object can't be a copy-object: - * the shadow_offset for the copy-object must stay - * as 0. Furthermore (for the 'we have all the - * pages' case), if we bypass backing_object and - * just shadow the next object in the chain, old - * pages from that object would then have to be copied - * BOTH into the (former) backing_object and into the - * parent object. + * The backing object can't be a copy-object: the + * shadow_offset for the copy-object must stay as 0. + * Furthermore (for the 'we have all the pages' case), if we + * bypass backing_object and just shadow the next object in + * the chain, old pages from that object would then have to be + * copied BOTH into the (former) backing_object and into the + * parent object. */ if (backing_object->shadow != NULL && backing_object->shadow->copy == backing_object) { vm_object_unlock(backing_object); return; } - /* * we can deal only with the swap pager */ - if ((object->pager && - object->pager->pg_type != PG_SWAP) || - (backing_object->pager && - backing_object->pager->pg_type != PG_SWAP)) { + if ((object->pager && + object->pager->pg_type != PG_SWAP) || + (backing_object->pager && + backing_object->pager->pg_type != PG_SWAP)) { vm_object_unlock(backing_object); return; } - - /* - * We know that we can either collapse the backing - * object (if the parent is the only reference to - * it) or (perhaps) remove the parent's reference - * to it. + * We know that we can either collapse the backing object (if + * the parent is the only reference to it) or (perhaps) remove + * the parent's reference to it. */ backing_offset = object->shadow_offset; size = object->size; /* - * If there is exactly one reference to the backing - * object, we can collapse it into the parent. + * If there is exactly one reference to the backing object, we + * can collapse it into the parent. */ - + if (backing_object->ref_count == 1) { /* - * We can collapse the backing object. - * - * Move all in-memory pages from backing_object - * to the parent. Pages that have been paged out - * will be overwritten by any of the parent's - * pages that shadow them. + * We can collapse the backing object. + * + * Move all in-memory pages from backing_object to the + * parent. Pages that have been paged out will be + * overwritten by any of the parent's pages that + * shadow them. */ while ((p = backing_object->memq.tqh_first) != 0) { @@ -1404,56 +1386,60 @@ vm_object_collapse(object) new_offset = (p->offset - backing_offset); /* - * If the parent has a page here, or if - * this page falls outside the parent, - * dispose of it. - * - * Otherwise, move it as planned. + * If the parent has a page here, or if this + * page falls outside the parent, dispose of + * it. + * + * Otherwise, move it as planned. */ if (p->offset < backing_offset || new_offset >= size) { vm_page_lock_queues(); pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); + PAGE_WAKEUP(p); vm_page_free(p); vm_page_unlock_queues(); } else { - pp = vm_page_lookup(object, new_offset); - if (pp != NULL || (object->pager && vm_pager_has_page(object->pager, - object->paging_offset + new_offset))) { - vm_page_lock_queues(); - pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); - vm_page_free(p); - vm_page_unlock_queues(); - } else { - vm_page_rename(p, object, new_offset); - } + pp = vm_page_lookup(object, new_offset); + if (pp != NULL || (object->pager && vm_pager_has_page(object->pager, + object->paging_offset + new_offset))) { + vm_page_lock_queues(); + pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); + PAGE_WAKEUP(p); + vm_page_free(p); + vm_page_unlock_queues(); + } else { + vm_page_rename(p, object, new_offset); + } } } /* - * Move the pager from backing_object to object. + * Move the pager from backing_object to object. */ if (backing_object->pager) { backing_object->paging_in_progress++; if (object->pager) { vm_pager_t bopager; + object->paging_in_progress++; /* * copy shadow object pages into ours - * and destroy unneeded pages in shadow object. + * and destroy unneeded pages in + * shadow object. */ bopager = backing_object->pager; backing_object->pager = NULL; vm_object_remove(backing_object->pager); swap_pager_copy( - bopager, backing_object->paging_offset, - object->pager, object->paging_offset, - object->shadow_offset); + bopager, backing_object->paging_offset, + object->pager, object->paging_offset, + object->shadow_offset); object->paging_in_progress--; if (object->paging_in_progress == 0) - wakeup((caddr_t)object); + wakeup((caddr_t) object); } else { object->paging_in_progress++; /* @@ -1469,26 +1455,27 @@ vm_object_collapse(object) swap_pager_freespace(object->pager, 0, object->paging_offset); object->paging_in_progress--; if (object->paging_in_progress == 0) - wakeup((caddr_t)object); + wakeup((caddr_t) object); } backing_object->paging_in_progress--; if (backing_object->paging_in_progress == 0) - wakeup((caddr_t)backing_object); + wakeup((caddr_t) backing_object); } - - /* - * Object now shadows whatever backing_object did. - * Note that the reference to backing_object->shadow - * moves from within backing_object to within object. + * Object now shadows whatever backing_object did. + * Note that the reference to backing_object->shadow + * moves from within backing_object to within object. */ - TAILQ_REMOVE(&object->shadow->reverse_shadow_head, object, reverse_shadow_list); - if( backing_object->shadow) - TAILQ_REMOVE(&backing_object->shadow->reverse_shadow_head, backing_object, reverse_shadow_list); + TAILQ_REMOVE(&object->shadow->reverse_shadow_head, object, + reverse_shadow_list); + if (backing_object->shadow) + TAILQ_REMOVE(&backing_object->shadow->reverse_shadow_head, + backing_object, reverse_shadow_list); object->shadow = backing_object->shadow; - if( object->shadow) - TAILQ_INSERT_TAIL(&object->shadow->reverse_shadow_head, object, reverse_shadow_list); + if (object->shadow) + TAILQ_INSERT_TAIL(&object->shadow->reverse_shadow_head, + object, reverse_shadow_list); object->shadow_offset += backing_object->shadow_offset; if (object->shadow != NULL && @@ -1496,67 +1483,63 @@ vm_object_collapse(object) panic("vm_object_collapse: we collapsed a copy-object!"); } /* - * Discard backing_object. - * - * Since the backing object has no pages, no - * pager left, and no object references within it, - * all that is necessary is to dispose of it. + * Discard backing_object. + * + * Since the backing object has no pages, no pager left, + * and no object references within it, all that is + * necessary is to dispose of it. */ vm_object_unlock(backing_object); simple_lock(&vm_object_list_lock); TAILQ_REMOVE(&vm_object_list, backing_object, - object_list); + object_list); vm_object_count--; simple_unlock(&vm_object_list_lock); - free((caddr_t)backing_object, M_VMOBJ); + free((caddr_t) backing_object, M_VMOBJ); object_collapses++; - } - else { + } else { /* - * If all of the pages in the backing object are - * shadowed by the parent object, the parent - * object no longer has to shadow the backing - * object; it can shadow the next one in the - * chain. - * - * The backing object must not be paged out - we'd - * have to check all of the paged-out pages, as - * well. + * If all of the pages in the backing object are + * shadowed by the parent object, the parent object no + * longer has to shadow the backing object; it can + * shadow the next one in the chain. + * + * The backing object must not be paged out - we'd have + * to check all of the paged-out pages, as well. */ if (backing_object->pager != NULL) { vm_object_unlock(backing_object); return; } - /* - * Should have a check for a 'small' number - * of pages here. + * Should have a check for a 'small' number of pages + * here. */ - for( p = backing_object->memq.tqh_first;p;p=p->listq.tqe_next) { + for (p = backing_object->memq.tqh_first; p; p = p->listq.tqe_next) { new_offset = (p->offset - backing_offset); /* - * If the parent has a page here, or if - * this page falls outside the parent, - * keep going. - * - * Otherwise, the backing_object must be - * left in the chain. + * If the parent has a page here, or if this + * page falls outside the parent, keep going. + * + * Otherwise, the backing_object must be left in + * the chain. */ if (p->offset >= backing_offset && new_offset <= size && - ((pp = vm_page_lookup(object, new_offset)) == NULL || (pp->flags & PG_FAKE)) && - (!object->pager || !vm_pager_has_page(object->pager, object->paging_offset+new_offset))) { + ((pp = vm_page_lookup(object, new_offset)) == NULL || + !pp->valid) && + (!object->pager || !vm_pager_has_page(object->pager, object->paging_offset + new_offset))) { /* - * Page still needed. - * Can't go any further. + * Page still needed. Can't go any + * further. */ vm_object_unlock(backing_object); return; @@ -1564,42 +1547,42 @@ vm_object_collapse(object) } /* - * Make the parent shadow the next object - * in the chain. Deallocating backing_object - * will not remove it, since its reference - * count is at least 2. + * Make the parent shadow the next object in the + * chain. Deallocating backing_object will not remove + * it, since its reference count is at least 2. */ - TAILQ_REMOVE(&object->shadow->reverse_shadow_head, object, reverse_shadow_list); + TAILQ_REMOVE(&object->shadow->reverse_shadow_head, + object, reverse_shadow_list); vm_object_reference(object->shadow = backing_object->shadow); - if( object->shadow) - TAILQ_INSERT_TAIL(&object->shadow->reverse_shadow_head, object, reverse_shadow_list); + if (object->shadow) + TAILQ_INSERT_TAIL(&object->shadow->reverse_shadow_head, + object, reverse_shadow_list); object->shadow_offset += backing_object->shadow_offset; /* - * Backing object might have had a copy pointer - * to us. If it did, clear it. + * Backing object might have had a copy pointer to us. + * If it did, clear it. */ if (backing_object->copy == object) { backing_object->copy = NULL; } - - /* Drop the reference count on backing_object. - * Since its ref_count was at least 2, it - * will not vanish; so we don't need to call - * vm_object_deallocate. + /* + * Drop the reference count on backing_object. Since + * its ref_count was at least 2, it will not vanish; + * so we don't need to call vm_object_deallocate. */ if (backing_object->ref_count == 1) printf("should have called obj deallocate\n"); backing_object->ref_count--; vm_object_unlock(backing_object); - object_bypasses ++; + object_bypasses++; } /* - * Try again with this object's new backing object. + * Try again with this object's new backing object. */ } } @@ -1614,11 +1597,11 @@ vm_object_collapse(object) */ void vm_object_page_remove(object, start, end) - register vm_object_t object; - register vm_offset_t start; - register vm_offset_t end; + register vm_object_t object; + register vm_offset_t start; + register vm_offset_t end; { - register vm_page_t p, next; + register vm_page_t p, next; vm_offset_t size; int s; @@ -1629,13 +1612,17 @@ vm_object_page_remove(object, start, end) start = trunc_page(start); end = round_page(end); again: - size = end-start; - if (size > 4*PAGE_SIZE || size >= object->size/4) { - for (p = object->memq.tqh_first; (p != NULL && size > 0); p = next) { + size = end - start; + if (size > 4 * PAGE_SIZE || size >= object->size / 4) { + for (p = object->memq.tqh_first; p != NULL; p = next) { next = p->listq.tqe_next; if ((start <= p->offset) && (p->offset < end)) { - s=splhigh(); - if (p->flags & PG_BUSY) { + s = splhigh(); + if (p->bmapped) { + splx(s); + continue; + } + if ((p->flags & PG_BUSY) || p->busy) { p->flags |= PG_WANTED; tsleep((caddr_t) p, PVM, "vmopar", 0); splx(s); @@ -1644,16 +1631,20 @@ again: splx(s); pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); vm_page_lock_queues(); + PAGE_WAKEUP(p); vm_page_free(p); vm_page_unlock_queues(); - size -= PAGE_SIZE; } } } else { while (size > 0) { while ((p = vm_page_lookup(object, start)) != 0) { s = splhigh(); - if (p->flags & PG_BUSY) { + if (p->bmapped) { + splx(s); + break; + } + if ((p->flags & PG_BUSY) || p->busy) { p->flags |= PG_WANTED; tsleep((caddr_t) p, PVM, "vmopar", 0); splx(s); @@ -1662,6 +1653,7 @@ again: splx(s); pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); vm_page_lock_queues(); + PAGE_WAKEUP(p); vm_page_free(p); vm_page_unlock_queues(); } @@ -1670,7 +1662,7 @@ again: } } --object->paging_in_progress; - if( object->paging_in_progress == 0) + if (object->paging_in_progress == 0) wakeup((caddr_t) object); } @@ -1696,85 +1688,75 @@ again: * Conditions: * The object must *not* be locked. */ -boolean_t vm_object_coalesce(prev_object, next_object, - prev_offset, next_offset, - prev_size, next_size) - - register vm_object_t prev_object; - vm_object_t next_object; - vm_offset_t prev_offset, next_offset; - vm_size_t prev_size, next_size; +boolean_t +vm_object_coalesce(prev_object, next_object, + prev_offset, next_offset, + prev_size, next_size) + register vm_object_t prev_object; + vm_object_t next_object; + vm_offset_t prev_offset, next_offset; + vm_size_t prev_size, next_size; { - vm_size_t newsize; - -#ifdef lint - next_offset++; -#endif + vm_size_t newsize; if (next_object != NULL) { - return(FALSE); + return (FALSE); } - if (prev_object == NULL) { - return(TRUE); + return (TRUE); } - vm_object_lock(prev_object); /* - * Try to collapse the object first + * Try to collapse the object first */ vm_object_collapse(prev_object); /* - * Can't coalesce if: - * . more than one reference - * . paged out - * . shadows another object - * . has a copy elsewhere - * (any of which mean that the pages not mapped to - * prev_entry may be in use anyway) + * Can't coalesce if: . more than one reference . paged out . shadows + * another object . has a copy elsewhere (any of which mean that the + * pages not mapped to prev_entry may be in use anyway) */ if (prev_object->ref_count > 1 || - prev_object->pager != NULL || - prev_object->shadow != NULL || - prev_object->copy != NULL) { + prev_object->pager != NULL || + prev_object->shadow != NULL || + prev_object->copy != NULL) { vm_object_unlock(prev_object); - return(FALSE); + return (FALSE); } - /* - * Remove any pages that may still be in the object from - * a previous deallocation. + * Remove any pages that may still be in the object from a previous + * deallocation. */ vm_object_page_remove(prev_object, - prev_offset + prev_size, - prev_offset + prev_size + next_size); + prev_offset + prev_size, + prev_offset + prev_size + next_size); /* - * Extend the object if necessary. + * Extend the object if necessary. */ newsize = prev_offset + prev_size + next_size; if (newsize > prev_object->size) prev_object->size = newsize; vm_object_unlock(prev_object); - return(TRUE); + return (TRUE); } /* * returns page after looking up in shadow chain */ - + vm_page_t vm_object_page_lookup(object, offset) vm_object_t object; vm_offset_t offset; { vm_page_t m; - if (!(m=vm_page_lookup(object, offset))) { + + if (!(m = vm_page_lookup(object, offset))) { if (!object->shadow) return 0; else @@ -1788,11 +1770,12 @@ vm_object_page_lookup(object, offset) /* * vm_object_print: [ debug ] */ -void vm_object_print(object, full) - vm_object_t object; - boolean_t full; +void +vm_object_print(object, full) + vm_object_t object; + boolean_t full; { - register vm_page_t p; + register vm_page_t p; extern indent; register int count; @@ -1801,13 +1784,13 @@ void vm_object_print(object, full) return; iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ", - (int) object, (int) object->size, - object->resident_page_count, object->ref_count); + (int) object, (int) object->size, + object->resident_page_count, object->ref_count); printf("pager=0x%x+0x%x, shadow=(0x%x)+0x%x\n", - (int) object->pager, (int) object->paging_offset, - (int) object->shadow, (int) object->shadow_offset); + (int) object->pager, (int) object->paging_offset, + (int) object->shadow, (int) object->shadow_offset); printf("cache: next=%p, prev=%p\n", - object->cached_list.tqe_next, object->cached_list.tqe_prev); + object->cached_list.tqe_next, object->cached_list.tqe_prev); if (!full) return; @@ -1825,11 +1808,11 @@ void vm_object_print(object, full) printf(","); count++; - printf("(off=0x%lx,page=0x%lx)", - (u_long)p->offset, (u_long)VM_PAGE_TO_PHYS(p)); + printf("(off=0x%lx,page=0x%lx)", + (u_long) p->offset, (u_long) VM_PAGE_TO_PHYS(p)); } if (count != 0) printf("\n"); indent -= 2; } -#endif /* defined(DEBUG) || defined(DDB) */ +#endif /* defined(DEBUG) || defined(DDB) */ diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index cec90ae42544..03661df96c9d 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -40,17 +40,17 @@ * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_object.h,v 1.2 1994/08/02 07:55:31 davidg Exp $ + * $Id: vm_object.h,v 1.3 1994/11/06 05:07:52 davidg Exp $ */ /* @@ -81,97 +81,131 @@ */ struct vm_object { - struct pglist memq; /* Resident memory */ - TAILQ_ENTRY(vm_object) object_list; /* list of all objects */ - u_short flags; /* see below */ - u_short paging_in_progress; /* Paging (in or out) so - don't collapse or destroy */ - simple_lock_data_t Lock; /* Synchronization */ - int ref_count; /* How many refs?? */ - vm_size_t size; /* Object size */ - int resident_page_count; - /* number of resident pages */ - struct vm_object *copy; /* Object that holds copies of - my changed pages */ - vm_pager_t pager; /* Where to get data */ - vm_offset_t paging_offset; /* Offset into paging space */ - struct vm_object *shadow; /* My shadow */ - vm_offset_t shadow_offset; /* Offset in shadow */ - TAILQ_ENTRY(vm_object) cached_list; /* for persistence */ - TAILQ_ENTRY(vm_object) reverse_shadow_list; /* chain of objects that are shadowed */ - TAILQ_HEAD(rslist, vm_object) reverse_shadow_head; /* objects that this is a shadow for */ + struct pglist memq; /* Resident memory */ + TAILQ_ENTRY(vm_object) object_list; /* list of all objects */ + u_short flags; /* see below */ + u_short paging_in_progress; /* Paging (in or out) so don't collapse or destroy */ + int ref_count; /* How many refs?? */ + struct { + int recursion; /* object locking */ + struct proc *proc; /* process owned */ + } lock; + vm_size_t size; /* Object size */ + int resident_page_count; + /* number of resident pages */ + struct vm_object *copy; /* Object that holds copies of my changed pages */ + vm_pager_t pager; /* Where to get data */ + vm_offset_t paging_offset; /* Offset into paging space */ + struct vm_object *shadow; /* My shadow */ + vm_offset_t shadow_offset; /* Offset in shadow */ + TAILQ_ENTRY(vm_object) cached_list; /* for persistence */ + TAILQ_ENTRY(vm_object) reverse_shadow_list; /* chain of objects that are shadowed */ + TAILQ_HEAD(rslist, vm_object) reverse_shadow_head; /* objects that this is a shadow for */ }; + /* * Flags */ -#define OBJ_CANPERSIST 0x0001 /* allow to persist */ -#define OBJ_INTERNAL 0x0002 /* internally created object */ -#define OBJ_ACTIVE 0x0004 /* used to mark active objects */ +#define OBJ_CANPERSIST 0x0001 /* allow to persist */ +#define OBJ_INTERNAL 0x0002 /* internally created object */ +#define OBJ_ACTIVE 0x0004 /* used to mark active objects */ +#define OBJ_DEAD 0x0008 /* used to mark dead objects during rundown */ +#define OBJ_ILOCKED 0x0010 /* lock from modification */ +#define OBJ_ILOCKWT 0x0020 /* wait for lock from modification */ TAILQ_HEAD(vm_object_hash_head, vm_object_hash_entry); struct vm_object_hash_entry { - TAILQ_ENTRY(vm_object_hash_entry) hash_links; /* hash chain links */ - vm_object_t object; /* object represened */ + TAILQ_ENTRY(vm_object_hash_entry) hash_links; /* hash chain links */ + vm_object_t object; /* object represened */ }; -typedef struct vm_object_hash_entry *vm_object_hash_entry_t; +typedef struct vm_object_hash_entry *vm_object_hash_entry_t; #ifdef KERNEL TAILQ_HEAD(object_q, vm_object); -struct object_q vm_object_cached_list; /* list of objects persisting */ -int vm_object_cached; /* size of cached list */ -simple_lock_data_t vm_cache_lock; /* lock for object cache */ +struct object_q vm_object_cached_list; /* list of objects persisting */ +int vm_object_cached; /* size of cached list */ +simple_lock_data_t vm_cache_lock; /* lock for object cache */ + +struct object_q vm_object_list; /* list of allocated objects */ +long vm_object_count; /* count of all objects */ +simple_lock_data_t vm_object_list_lock; -struct object_q vm_object_list; /* list of allocated objects */ -long vm_object_count; /* count of all objects */ -simple_lock_data_t vm_object_list_lock; - /* lock for object list and count */ + /* lock for object list and count */ -vm_object_t kernel_object; /* the single kernel object */ -vm_object_t kmem_object; +vm_object_t kernel_object; /* the single kernel object */ +vm_object_t kmem_object; #define vm_object_cache_lock() simple_lock(&vm_cache_lock) #define vm_object_cache_unlock() simple_unlock(&vm_cache_lock) -#endif /* KERNEL */ +#endif /* KERNEL */ +#define vm_object_sleep(event, object, interruptible) \ + thread_sleep((event), &(object)->Lock, (interruptible)) +#if 0 #define vm_object_lock_init(object) simple_lock_init(&(object)->Lock) #define vm_object_lock(object) simple_lock(&(object)->Lock) #define vm_object_unlock(object) simple_unlock(&(object)->Lock) #define vm_object_lock_try(object) simple_lock_try(&(object)->Lock) -#define vm_object_sleep(event, object, interruptible) \ - thread_sleep((event), &(object)->Lock, (interruptible)) +#endif +#define vm_object_lock_init(object) (object->flags &= ~OBJ_ILOCKED, object->lock.recursion = 0, object->lock.proc = 0) + +static __inline void +vm_object_lock(vm_object_t obj) +{ + if (obj->flags & OBJ_ILOCKED) { + ++obj->lock.recursion; + return; + } + obj->flags |= OBJ_ILOCKED; + obj->lock.recursion = 1; +} + +static __inline void +vm_object_unlock(vm_object_t obj) +{ + --obj->lock.recursion; + if (obj->lock.recursion != 0) + return; + obj->flags &= ~OBJ_ILOCKED; +} + +static __inline int +vm_object_lock_try(vm_object_t obj) +{ + if (obj->flags & OBJ_ILOCKED) { + ++obj->lock.recursion; + return 1; + } + obj->flags |= OBJ_ILOCKED; + obj->lock.recursion = 1; + return 1; +} #ifdef KERNEL -vm_object_t vm_object_allocate __P((vm_size_t)); -void vm_object_cache_clear __P((void)); -void vm_object_cache_trim __P((void)); -boolean_t vm_object_coalesce __P((vm_object_t, vm_object_t, - vm_offset_t, vm_offset_t, vm_offset_t, vm_size_t)); -void vm_object_collapse __P((vm_object_t)); -void vm_object_copy __P((vm_object_t, vm_offset_t, vm_size_t, - vm_object_t *, vm_offset_t *, boolean_t *)); -void vm_object_deactivate_pages __P((vm_object_t)); -void vm_object_deallocate __P((vm_object_t)); -void vm_object_enter __P((vm_object_t, vm_pager_t)); -void vm_object_init __P((vm_size_t)); -vm_object_t vm_object_lookup __P((vm_pager_t)); -boolean_t vm_object_page_clean __P((vm_object_t, - vm_offset_t, vm_offset_t, boolean_t, boolean_t)); -void vm_object_page_remove __P((vm_object_t, - vm_offset_t, vm_offset_t)); -void vm_object_pmap_copy __P((vm_object_t, - vm_offset_t, vm_offset_t)); -void vm_object_pmap_remove __P((vm_object_t, - vm_offset_t, vm_offset_t)); -void vm_object_print __P((vm_object_t, boolean_t)); -void vm_object_reference __P((vm_object_t)); -void vm_object_remove __P((vm_pager_t)); -void vm_object_setpager __P((vm_object_t, - vm_pager_t, vm_offset_t, boolean_t)); -void vm_object_shadow __P((vm_object_t *, - vm_offset_t *, vm_size_t)); -void vm_object_terminate __P((vm_object_t)); +vm_object_t vm_object_allocate __P((vm_size_t)); +void vm_object_cache_clear __P((void)); +void vm_object_cache_trim __P((void)); +boolean_t vm_object_coalesce __P((vm_object_t, vm_object_t, vm_offset_t, vm_offset_t, vm_offset_t, vm_size_t)); +void vm_object_collapse __P((vm_object_t)); +void vm_object_copy __P((vm_object_t, vm_offset_t, vm_size_t, vm_object_t *, vm_offset_t *, boolean_t *)); +void vm_object_deactivate_pages __P((vm_object_t)); +void vm_object_deallocate __P((vm_object_t)); +void vm_object_enter __P((vm_object_t, vm_pager_t)); +void vm_object_init __P((vm_size_t)); +vm_object_t vm_object_lookup __P((vm_pager_t)); +boolean_t vm_object_page_clean __P((vm_object_t, vm_offset_t, vm_offset_t, boolean_t, boolean_t)); +void vm_object_page_remove __P((vm_object_t, vm_offset_t, vm_offset_t)); +void vm_object_pmap_copy __P((vm_object_t, vm_offset_t, vm_offset_t)); +void vm_object_pmap_remove __P((vm_object_t, vm_offset_t, vm_offset_t)); +void vm_object_print __P((vm_object_t, boolean_t)); +void vm_object_reference __P((vm_object_t)); +void vm_object_remove __P((vm_pager_t)); +void vm_object_setpager __P((vm_object_t, vm_pager_t, vm_offset_t, boolean_t)); +void vm_object_shadow __P((vm_object_t *, vm_offset_t *, vm_size_t)); +void vm_object_terminate __P((vm_object_t)); + #endif -#endif /* _VM_OBJECT_ */ +#endif /* _VM_OBJECT_ */ diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 757d4fd640ae..dc19ea067af4 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 - * $Id: vm_page.c,v 1.11 1994/10/18 14:59:19 davidg Exp $ + * $Id: vm_page.c,v 1.12 1994/10/23 06:15:04 davidg Exp $ */ /* @@ -42,17 +42,17 @@ * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -82,27 +82,39 @@ * page structure. */ -struct pglist *vm_page_buckets; /* Array of buckets */ -int vm_page_bucket_count = 0; /* How big is array? */ -int vm_page_hash_mask; /* Mask for hash function */ -simple_lock_data_t bucket_lock; /* lock for all buckets XXX */ +struct pglist *vm_page_buckets; /* Array of buckets */ +int vm_page_bucket_count = 0; /* How big is array? */ +int vm_page_hash_mask; /* Mask for hash function */ +simple_lock_data_t bucket_lock; /* lock for all buckets XXX */ -struct pglist vm_page_queue_free; -struct pglist vm_page_queue_active; -struct pglist vm_page_queue_inactive; -simple_lock_data_t vm_page_queue_lock; -simple_lock_data_t vm_page_queue_free_lock; +struct pglist vm_page_queue_free; +struct pglist vm_page_queue_active; +struct pglist vm_page_queue_inactive; +struct pglist vm_page_queue_cache; +simple_lock_data_t vm_page_queue_lock; +simple_lock_data_t vm_page_queue_free_lock; /* has physical page allocation been initialized? */ boolean_t vm_page_startup_initialized; -vm_page_t vm_page_array; -long first_page; -long last_page; -vm_offset_t first_phys_addr; -vm_offset_t last_phys_addr; -vm_size_t page_mask; -int page_shift; +vm_page_t vm_page_array; +int vm_page_array_size; +long first_page; +long last_page; +vm_offset_t first_phys_addr; +vm_offset_t last_phys_addr; +vm_size_t page_mask; +int page_shift; + +/* + * map of contiguous valid DEV_BSIZE chunks in a page + * (this list is valid for page sizes upto 16*DEV_BSIZE) + */ +static u_short vm_page_dev_bsize_chunks[] = { + 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, + 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff +}; + /* * vm_set_page_size: @@ -113,7 +125,8 @@ int page_shift; * * Sets page_shift and page_mask from cnt.v_page_size. */ -void vm_set_page_size() +void +vm_set_page_size() { if (cnt.v_page_size == 0) @@ -121,7 +134,7 @@ void vm_set_page_size() page_mask = cnt.v_page_size - 1; if ((page_mask & cnt.v_page_size) != 0) panic("vm_set_page_size: page size not a power of two"); - for (page_shift = 0; ; page_shift++) + for (page_shift = 0;; page_shift++) if ((1 << page_shift) == cnt.v_page_size) break; } @@ -138,24 +151,25 @@ void vm_set_page_size() vm_offset_t vm_page_startup(starta, enda, vaddr) - register vm_offset_t starta; - vm_offset_t enda; - register vm_offset_t vaddr; + register vm_offset_t starta; + vm_offset_t enda; + register vm_offset_t vaddr; { - register vm_offset_t mapped; - register vm_page_t m; + register vm_offset_t mapped; + register vm_page_t m; register struct pglist *bucket; - vm_size_t npages, page_range; - register vm_offset_t new_start; - int i; - vm_offset_t pa; + vm_size_t npages, page_range; + register vm_offset_t new_start; + int i; + vm_offset_t pa; int nblocks; - vm_offset_t first_managed_page; + vm_offset_t first_managed_page; - extern vm_offset_t kentry_data; - extern vm_size_t kentry_data_size; + extern vm_offset_t kentry_data; + extern vm_size_t kentry_data_size; extern vm_offset_t phys_avail[]; -/* the biggest memory array is the second group of pages */ + + /* the biggest memory array is the second group of pages */ vm_offset_t start; vm_offset_t biggestone, biggestsize; @@ -169,11 +183,12 @@ vm_page_startup(starta, enda, vaddr) for (i = 0; phys_avail[i + 1]; i += 2) { phys_avail[i] = round_page(phys_avail[i]); - phys_avail[i+1] = trunc_page(phys_avail[i+1]); + phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); } - + for (i = 0; phys_avail[i + 1]; i += 2) { - int size = phys_avail[i+1] - phys_avail[i]; + int size = phys_avail[i + 1] - phys_avail[i]; + if (size > biggestsize) { biggestone = i; biggestsize = size; @@ -186,56 +201,54 @@ vm_page_startup(starta, enda, vaddr) /* - * Initialize the locks + * Initialize the locks */ simple_lock_init(&vm_page_queue_free_lock); simple_lock_init(&vm_page_queue_lock); /* - * Initialize the queue headers for the free queue, - * the active queue and the inactive queue. + * Initialize the queue headers for the free queue, the active queue + * and the inactive queue. */ TAILQ_INIT(&vm_page_queue_free); TAILQ_INIT(&vm_page_queue_active); TAILQ_INIT(&vm_page_queue_inactive); + TAILQ_INIT(&vm_page_queue_cache); /* - * Allocate (and initialize) the hash table buckets. - * - * The number of buckets MUST BE a power of 2, and - * the actual value is the next power of 2 greater - * than the number of physical pages in the system. - * - * Note: - * This computation can be tweaked if desired. + * Allocate (and initialize) the hash table buckets. + * + * The number of buckets MUST BE a power of 2, and the actual value is + * the next power of 2 greater than the number of physical pages in + * the system. + * + * Note: This computation can be tweaked if desired. */ - vm_page_buckets = (struct pglist *)vaddr; + vm_page_buckets = (struct pglist *) vaddr; bucket = vm_page_buckets; if (vm_page_bucket_count == 0) { vm_page_bucket_count = 1; while (vm_page_bucket_count < atop(total)) vm_page_bucket_count <<= 1; } - - vm_page_hash_mask = vm_page_bucket_count - 1; /* - * Validate these addresses. + * Validate these addresses. */ new_start = start + vm_page_bucket_count * sizeof(struct pglist); new_start = round_page(new_start); mapped = vaddr; vaddr = pmap_map(mapped, start, new_start, - VM_PROT_READ|VM_PROT_WRITE); + VM_PROT_READ | VM_PROT_WRITE); start = new_start; bzero((caddr_t) mapped, vaddr - mapped); mapped = vaddr; - for (i = 0; i< vm_page_bucket_count; i++) { + for (i = 0; i < vm_page_bucket_count; i++) { TAILQ_INIT(bucket); bucket++; } @@ -243,49 +256,54 @@ vm_page_startup(starta, enda, vaddr) simple_lock_init(&bucket_lock); /* - * round (or truncate) the addresses to our page size. + * round (or truncate) the addresses to our page size. */ /* - * Pre-allocate maps and map entries that cannot be dynamically - * allocated via malloc(). The maps include the kernel_map and - * kmem_map which must be initialized before malloc() will - * work (obviously). Also could include pager maps which would - * be allocated before kmeminit. - * - * Allow some kernel map entries... this should be plenty - * since people shouldn't be cluttering up the kernel - * map (they should use their own maps). + * Pre-allocate maps and map entries that cannot be dynamically + * allocated via malloc(). The maps include the kernel_map and + * kmem_map which must be initialized before malloc() will work + * (obviously). Also could include pager maps which would be + * allocated before kmeminit. + * + * Allow some kernel map entries... this should be plenty since people + * shouldn't be cluttering up the kernel map (they should use their + * own maps). */ kentry_data_size = MAX_KMAP * sizeof(struct vm_map) + - MAX_KMAPENT * sizeof(struct vm_map_entry); + MAX_KMAPENT * sizeof(struct vm_map_entry); kentry_data_size = round_page(kentry_data_size); kentry_data = (vm_offset_t) vaddr; vaddr += kentry_data_size; /* - * Validate these zone addresses. + * Validate these zone addresses. */ new_start = start + (vaddr - mapped); - pmap_map(mapped, start, new_start, VM_PROT_READ|VM_PROT_WRITE); + pmap_map(mapped, start, new_start, VM_PROT_READ | VM_PROT_WRITE); bzero((caddr_t) mapped, (vaddr - mapped)); start = round_page(new_start); /* - * Compute the number of pages of memory that will be - * available for use (taking into account the overhead - * of a page structure per page). + * Compute the number of pages of memory that will be available for + * use (taking into account the overhead of a page structure per + * page). */ - npages = (total - (start - phys_avail[biggestone])) / (PAGE_SIZE + sizeof(struct vm_page)); first_page = phys_avail[0] / PAGE_SIZE; - page_range = (phys_avail[(nblocks-1)*2 + 1] - phys_avail[0]) / PAGE_SIZE; + /* for VM_PAGE_CHECK() */ + last_page = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE; + + page_range = last_page - (phys_avail[0] / PAGE_SIZE); + npages = (total - (page_range * sizeof(struct vm_page)) - + (start - phys_avail[biggestone])) / PAGE_SIZE; + /* - * Initialize the mem entry structures now, and - * put them in the free queue. + * Initialize the mem entry structures now, and put them in the free + * queue. */ vm_page_array = (vm_page_t) vaddr; @@ -293,25 +311,26 @@ vm_page_startup(starta, enda, vaddr) /* - * Validate these addresses. + * Validate these addresses. */ - new_start = round_page(start + page_range * sizeof (struct vm_page)); + new_start = round_page(start + page_range * sizeof(struct vm_page)); mapped = pmap_map(mapped, start, new_start, - VM_PROT_READ|VM_PROT_WRITE); + VM_PROT_READ | VM_PROT_WRITE); start = new_start; first_managed_page = start / PAGE_SIZE; /* - * Clear all of the page structures + * Clear all of the page structures */ - bzero((caddr_t)vm_page_array, page_range * sizeof(struct vm_page)); + bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page)); + vm_page_array_size = page_range; cnt.v_page_count = 0; - cnt.v_free_count= 0; + cnt.v_free_count = 0; for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) { - if (i == biggestone) + if (i == biggestone) pa = ptoa(first_managed_page); else pa = phys_avail[i]; @@ -319,7 +338,8 @@ vm_page_startup(starta, enda, vaddr) ++cnt.v_page_count; ++cnt.v_free_count; m = PHYS_TO_VM_PAGE(pa); - m->flags = PG_CLEAN | PG_FREE; + m->flags = PG_FREE; + vm_page_set_clean(m, 0, PAGE_SIZE); m->object = 0; m->phys_addr = pa; m->hold_count = 0; @@ -329,12 +349,12 @@ vm_page_startup(starta, enda, vaddr) } /* - * Initialize vm_pages_needed lock here - don't wait for pageout - * daemon XXX + * Initialize vm_pages_needed lock here - don't wait for pageout + * daemon XXX */ simple_lock_init(&vm_pages_needed_lock); - return(mapped); + return (mapped); } /* @@ -349,7 +369,7 @@ vm_page_hash(object, offset) vm_object_t object; vm_offset_t offset; { - return ((unsigned)object + offset/NBPG) & vm_page_hash_mask; + return ((unsigned) object + offset / NBPG) & vm_page_hash_mask; } /* @@ -361,13 +381,14 @@ vm_page_hash(object, offset) * The object and page must be locked. */ -void vm_page_insert(mem, object, offset) - register vm_page_t mem; - register vm_object_t object; - register vm_offset_t offset; +void +vm_page_insert(mem, object, offset) + register vm_page_t mem; + register vm_object_t object; + register vm_offset_t offset; { - register struct pglist *bucket; - int s; + register struct pglist *bucket; + int s; VM_PAGE_CHECK(mem); @@ -375,14 +396,14 @@ void vm_page_insert(mem, object, offset) panic("vm_page_insert: already inserted"); /* - * Record the object/offset pair in this page + * Record the object/offset pair in this page */ mem->object = object; mem->offset = offset; /* - * Insert it into the object_object/offset hash table + * Insert it into the object_object/offset hash table */ bucket = &vm_page_buckets[vm_page_hash(object, offset)]; @@ -393,15 +414,14 @@ void vm_page_insert(mem, object, offset) (void) splx(s); /* - * Now link into the object's list of backed pages. + * Now link into the object's list of backed pages. */ TAILQ_INSERT_TAIL(&object->memq, mem, listq); mem->flags |= PG_TABLED; /* - * And show that the object has one more resident - * page. + * And show that the object has one more resident page. */ object->resident_page_count++; @@ -417,19 +437,21 @@ void vm_page_insert(mem, object, offset) * The object and page must be locked. */ -void vm_page_remove(mem) - register vm_page_t mem; +void +vm_page_remove(mem) + register vm_page_t mem; { - register struct pglist *bucket; - int s; + register struct pglist *bucket; + int s; VM_PAGE_CHECK(mem); + if (!(mem->flags & PG_TABLED)) return; /* - * Remove from the object_object/offset hash table + * Remove from the object_object/offset hash table */ bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)]; @@ -440,14 +462,13 @@ void vm_page_remove(mem) (void) splx(s); /* - * Now remove from the object's list of backed pages. + * Now remove from the object's list of backed pages. */ TAILQ_REMOVE(&mem->object->memq, mem, listq); /* - * And show that the object has one fewer resident - * page. + * And show that the object has one fewer resident page. */ mem->object->resident_page_count--; @@ -464,16 +485,17 @@ void vm_page_remove(mem) * The object must be locked. No side effects. */ -vm_page_t vm_page_lookup(object, offset) - register vm_object_t object; - register vm_offset_t offset; +vm_page_t +vm_page_lookup(object, offset) + register vm_object_t object; + register vm_offset_t offset; { - register vm_page_t mem; - register struct pglist *bucket; - int s; + register vm_page_t mem; + register struct pglist *bucket; + int s; /* - * Search the hash table for this object/offset pair + * Search the hash table for this object/offset pair */ bucket = &vm_page_buckets[vm_page_hash(object, offset)]; @@ -485,13 +507,13 @@ vm_page_t vm_page_lookup(object, offset) if ((mem->object == object) && (mem->offset == offset)) { simple_unlock(&bucket_lock); splx(s); - return(mem); + return (mem); } } simple_unlock(&bucket_lock); splx(s); - return(NULL); + return (NULL); } /* @@ -502,21 +524,77 @@ vm_page_t vm_page_lookup(object, offset) * * The object must be locked. */ -void vm_page_rename(mem, new_object, new_offset) - register vm_page_t mem; - register vm_object_t new_object; - vm_offset_t new_offset; +void +vm_page_rename(mem, new_object, new_offset) + register vm_page_t mem; + register vm_object_t new_object; + vm_offset_t new_offset; { + int s; + if (mem->object == new_object) return; - vm_page_lock_queues(); /* keep page from moving out from - under pageout daemon */ - vm_page_remove(mem); + vm_page_lock_queues(); /* keep page from moving out from under pageout daemon */ + s = splhigh(); + vm_page_remove(mem); vm_page_insert(mem, new_object, new_offset); + splx(s); vm_page_unlock_queues(); } +int +vm_page_unqueue(vm_page_t mem) +{ + int s, origflags; + + s = splhigh(); + origflags = mem->flags; + if (mem->flags & PG_ACTIVE) { + TAILQ_REMOVE(&vm_page_queue_active, mem, pageq); + cnt.v_active_count--; + mem->flags &= ~PG_ACTIVE; + } else if (mem->flags & PG_INACTIVE) { + TAILQ_REMOVE(&vm_page_queue_inactive, mem, pageq); + cnt.v_inactive_count--; + mem->flags &= ~PG_INACTIVE; + } else if (mem->flags & PG_CACHE) { + TAILQ_REMOVE(&vm_page_queue_cache, mem, pageq); + cnt.v_cache_count--; + mem->flags &= ~PG_CACHE; + if (cnt.v_cache_count + cnt.v_free_count < cnt.v_free_reserved) + wakeup((caddr_t) & vm_pages_needed); + } + splx(s); + return origflags; +} + +void +vm_page_requeue(vm_page_t mem, int flags) +{ + int s; + + if (mem->wire_count) + return; + s = splhigh(); + if (flags & PG_CACHE) { + TAILQ_INSERT_TAIL(&vm_page_queue_cache, mem, pageq); + mem->flags |= PG_CACHE; + cnt.v_cache_count++; + } else if (flags & PG_ACTIVE) { + TAILQ_INSERT_TAIL(&vm_page_queue_active, mem, pageq); + mem->flags |= PG_ACTIVE; + cnt.v_active_count++; + } else if (flags & PG_INACTIVE) { + TAILQ_INSERT_TAIL(&vm_page_queue_inactive, mem, pageq); + mem->flags |= PG_INACTIVE; + cnt.v_inactive_count++; + } + TAILQ_REMOVE(&mem->object->memq, mem, listq); + TAILQ_INSERT_TAIL(&mem->object->memq, mem, listq); + splx(s); +} + /* * vm_page_alloc: * @@ -526,47 +604,61 @@ void vm_page_rename(mem, new_object, new_offset) * Object must be locked. */ vm_page_t -vm_page_alloc(object, offset) - vm_object_t object; - vm_offset_t offset; +vm_page_alloc(object, offset, inttime) + vm_object_t object; + vm_offset_t offset; + int inttime; { - register vm_page_t mem; - int s; + register vm_page_t mem; + int s; - s = splhigh(); simple_lock(&vm_page_queue_free_lock); - if ( object != kernel_object && - object != kmem_object && - curproc != pageproc && curproc != &proc0 && - cnt.v_free_count < cnt.v_free_reserved) { + s = splhigh(); + + if (object != kernel_object && + object != kmem_object && + curproc != pageproc && + curproc != &proc0 && + (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_reserved) { simple_unlock(&vm_page_queue_free_lock); splx(s); - /* - * this wakeup seems unnecessary, but there is code that - * might just check to see if there are free pages, and - * punt if there aren't. VM_WAIT does this too, but - * redundant wakeups aren't that bad... - */ - if (curproc != pageproc) - wakeup((caddr_t) &vm_pages_needed); - return(NULL); + return (NULL); } - if (( mem = vm_page_queue_free.tqh_first) == 0) { - simple_unlock(&vm_page_queue_free_lock); - printf("No pages???\n"); - splx(s); - /* - * comment above re: wakeups applies here too... - */ - if (curproc != pageproc) - wakeup((caddr_t) &vm_pages_needed); - return(NULL); + if (inttime) { + if ((mem = vm_page_queue_free.tqh_first) == 0) { + for (mem = vm_page_queue_cache.tqh_first; mem; mem = mem->pageq.tqe_next) { + if ((mem->object->flags & OBJ_ILOCKED) == 0) { + TAILQ_REMOVE(&vm_page_queue_cache, mem, pageq); + vm_page_remove(mem); + cnt.v_cache_count--; + goto gotpage; + } + } + splx(s); + return NULL; + } + } else { + if ((cnt.v_free_count < 3) || + (mem = vm_page_queue_free.tqh_first) == 0) { + mem = vm_page_queue_cache.tqh_first; + if (mem) { + TAILQ_REMOVE(&vm_page_queue_cache, mem, pageq); + vm_page_remove(mem); + cnt.v_cache_count--; + goto gotpage; + } + simple_unlock(&vm_page_queue_free_lock); + splx(s); + /* wakeup((caddr_t) &vm_pages_needed); */ + return (NULL); + } } TAILQ_REMOVE(&vm_page_queue_free, mem, pageq); - cnt.v_free_count--; + +gotpage: simple_unlock(&vm_page_queue_free_lock); VM_PAGE_INIT(mem, object, offset); @@ -577,18 +669,18 @@ vm_page_alloc(object, offset) * we would be nearly out of memory. */ if (curproc != pageproc && - (cnt.v_free_count < cnt.v_free_reserved)) - wakeup((caddr_t) &vm_pages_needed); + ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min)) + wakeup((caddr_t) & vm_pages_needed); - return(mem); + return (mem); } vm_offset_t vm_page_alloc_contig(size, low, high, alignment) - vm_offset_t size; - vm_offset_t low; - vm_offset_t high; - vm_offset_t alignment; + vm_offset_t size; + vm_offset_t low; + vm_offset_t high; + vm_offset_t alignment; { int i, s, start; vm_offset_t addr, phys, tmp_addr; @@ -619,7 +711,6 @@ again: splx(s); return (NULL); } - start = i; /* @@ -627,7 +718,7 @@ again: */ for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { if ((VM_PAGE_TO_PHYS(&pga[i]) != - (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) || + (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) || ((pga[i].flags & PG_FREE) != PG_FREE)) { start++; goto again; @@ -636,8 +727,8 @@ again: /* * We've found a contiguous chunk that meets are requirements. - * Allocate kernel VM, unfree and assign the physical pages to it - * and return kernel VM pointer. + * Allocate kernel VM, unfree and assign the physical pages to it and + * return kernel VM pointer. */ tmp_addr = addr = kmem_alloc_pageable(kernel_map, size); @@ -645,11 +736,11 @@ again: TAILQ_REMOVE(&vm_page_queue_free, &pga[i], pageq); cnt.v_free_count--; vm_page_wire(&pga[i]); - pga[i].flags = PG_CLEAN; /* shut off PG_FREE and any other flags */ + vm_page_set_clean(&pga[i], 0, PAGE_SIZE); pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(&pga[i])); tmp_addr += PAGE_SIZE; } - + splx(s); return (addr); } @@ -662,22 +753,20 @@ again: * * Object and page must be locked prior to entry. */ -void vm_page_free(mem) - register vm_page_t mem; +void +vm_page_free(mem) + register vm_page_t mem; { int s; + s = splhigh(); vm_page_remove(mem); - if (mem->flags & PG_ACTIVE) { - TAILQ_REMOVE(&vm_page_queue_active, mem, pageq); - mem->flags &= ~PG_ACTIVE; - cnt.v_active_count--; - } + vm_page_unqueue(mem); - if (mem->flags & PG_INACTIVE) { - TAILQ_REMOVE(&vm_page_queue_inactive, mem, pageq); - mem->flags &= ~PG_INACTIVE; - cnt.v_inactive_count--; + if (mem->bmapped || mem->busy || mem->flags & PG_BUSY) { + printf("vm_page_free: offset(%d), bmapped(%d), busy(%d), PG_BUSY(%d)\n", + mem->offset, mem->bmapped, mem->busy, (mem->flags & PG_BUSY) ? 1 : 0); + panic("vm_page_free: freeing busy page\n"); } if (mem->flags & PG_FREE) panic("vm_page_free: freeing free page"); @@ -693,7 +782,6 @@ void vm_page_free(mem) cnt.v_wire_count--; mem->wire_count = 0; } - mem->flags |= PG_FREE; TAILQ_INSERT_TAIL(&vm_page_queue_free, mem, pageq); @@ -701,31 +789,25 @@ void vm_page_free(mem) simple_unlock(&vm_page_queue_free_lock); splx(s); /* - * if pageout daemon needs pages, then tell it that there - * are some free. + * if pageout daemon needs pages, then tell it that there are + * some free. */ if (vm_pageout_pages_needed) - wakeup((caddr_t)&vm_pageout_pages_needed); + wakeup((caddr_t) & vm_pageout_pages_needed); /* - * wakeup processes that are waiting on memory if we - * hit a high water mark. + * wakeup processes that are waiting on memory if we hit a + * high water mark. And wakeup scheduler process if we have + * lots of memory. this process will swapin processes. */ - if (cnt.v_free_count == cnt.v_free_min) { - wakeup((caddr_t)&cnt.v_free_count); - } - - /* - * wakeup scheduler process if we have lots of memory. - * this process will swapin processes. - */ - if (cnt.v_free_count == cnt.v_free_target) { - wakeup((caddr_t)&proc0); + if ((cnt.v_free_count + cnt.v_cache_count) == cnt.v_free_min) { + wakeup((caddr_t) & cnt.v_free_count); + wakeup((caddr_t) & proc0); } } else { splx(s); } - if( mem->flags & PG_WANTED) + if (mem->flags & PG_WANTED) wakeup((caddr_t) mem); cnt.v_tfree++; } @@ -740,25 +822,16 @@ void vm_page_free(mem) * * The page queues must be locked. */ -void vm_page_wire(mem) - register vm_page_t mem; +void +vm_page_wire(mem) + register vm_page_t mem; { int s; + VM_PAGE_CHECK(mem); if (mem->wire_count == 0) { - s = splhigh(); - if (mem->flags & PG_ACTIVE) { - TAILQ_REMOVE(&vm_page_queue_active, mem, pageq); - cnt.v_active_count--; - mem->flags &= ~PG_ACTIVE; - } - if (mem->flags & PG_INACTIVE) { - TAILQ_REMOVE(&vm_page_queue_inactive, mem, pageq); - cnt.v_inactive_count--; - mem->flags &= ~PG_INACTIVE; - } - splx(s); + vm_page_unqueue(mem); cnt.v_wire_count++; } mem->wire_count++; @@ -772,15 +845,17 @@ void vm_page_wire(mem) * * The page queues must be locked. */ -void vm_page_unwire(mem) - register vm_page_t mem; +void +vm_page_unwire(mem) + register vm_page_t mem; { int s; + VM_PAGE_CHECK(mem); s = splhigh(); - if( mem->wire_count) + if (mem->wire_count) mem->wire_count--; if (mem->wire_count == 0) { TAILQ_INSERT_TAIL(&vm_page_queue_active, mem, pageq); @@ -802,47 +877,67 @@ void vm_page_unwire(mem) */ void vm_page_deactivate(m) - register vm_page_t m; + register vm_page_t m; { int spl; + VM_PAGE_CHECK(m); /* - * Only move active pages -- ignore locked or already - * inactive ones. - * - * XXX: sometimes we get pages which aren't wired down - * or on any queue - we need to put them on the inactive - * queue also, otherwise we lose track of them. - * Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93. + * Only move active pages -- ignore locked or already inactive ones. + * + * XXX: sometimes we get pages which aren't wired down or on any queue - + * we need to put them on the inactive queue also, otherwise we lose + * track of them. Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93. */ spl = splhigh(); if (!(m->flags & PG_INACTIVE) && m->wire_count == 0 && - m->hold_count == 0) { - + m->hold_count == 0) { pmap_clear_reference(VM_PAGE_TO_PHYS(m)); - if (m->flags & PG_ACTIVE) { - TAILQ_REMOVE(&vm_page_queue_active, m, pageq); - m->flags &= ~PG_ACTIVE; - cnt.v_active_count--; - } + vm_page_unqueue(m); TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); m->flags |= PG_INACTIVE; cnt.v_inactive_count++; -#define NOT_DEACTIVATE_PROTECTS -#ifndef NOT_DEACTIVATE_PROTECTS - pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); -#else - if ((m->flags & PG_CLEAN) && - pmap_is_modified(VM_PAGE_TO_PHYS(m))) - m->flags &= ~PG_CLEAN; -#endif - if ((m->flags & PG_CLEAN) == 0) - m->flags |= PG_LAUNDRY; - } + m->act_count = 0; + } splx(spl); } + +/* + * vm_page_cache + * + * Put the specified page onto the page cache queue (if appropriate). + */ + +void +vm_page_cache(m) + register vm_page_t m; +{ + int s; + + VM_PAGE_CHECK(m); + if ((m->flags & (PG_CACHE | PG_BUSY)) || m->busy || m->wire_count || + m->bmapped) + return; + + s = splhigh(); + vm_page_unqueue(m); + pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); + + TAILQ_INSERT_TAIL(&vm_page_queue_cache, m, pageq); + m->flags |= PG_CACHE; + cnt.v_cache_count++; + if ((cnt.v_free_count + cnt.v_cache_count) == cnt.v_free_min) { + wakeup((caddr_t) & cnt.v_free_count); + wakeup((caddr_t) & proc0); + } + if (vm_pageout_pages_needed) + wakeup((caddr_t) & vm_pageout_pages_needed); + + splx(s); +} + /* * vm_page_activate: * @@ -851,27 +946,29 @@ vm_page_deactivate(m) * The page queues must be locked. */ -void vm_page_activate(m) - register vm_page_t m; +void +vm_page_activate(m) + register vm_page_t m; { int s; + VM_PAGE_CHECK(m); s = splhigh(); - if (m->flags & PG_INACTIVE) { - TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq); - cnt.v_inactive_count--; - m->flags &= ~PG_INACTIVE; - } - if (m->wire_count == 0) { - if (m->flags & PG_ACTIVE) - panic("vm_page_activate: already active"); + if (m->flags & PG_ACTIVE) + panic("vm_page_activate: already active"); + + vm_page_unqueue(m); + if (m->wire_count == 0) { TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); m->flags |= PG_ACTIVE; TAILQ_REMOVE(&m->object->memq, m, listq); TAILQ_INSERT_TAIL(&m->object->memq, m, listq); - m->act_count = 1; + if (m->act_count < 5) + m->act_count = 5; + else + m->act_count += 1; cnt.v_active_count++; } splx(s); @@ -887,12 +984,13 @@ void vm_page_activate(m) boolean_t vm_page_zero_fill(m) - vm_page_t m; + vm_page_t m; { VM_PAGE_CHECK(m); pmap_zero_page(VM_PAGE_TO_PHYS(m)); - return(TRUE); + m->valid = VM_PAGE_BITS_ALL; + return (TRUE); } /* @@ -902,11 +1000,153 @@ vm_page_zero_fill(m) */ void vm_page_copy(src_m, dest_m) - vm_page_t src_m; - vm_page_t dest_m; + vm_page_t src_m; + vm_page_t dest_m; { VM_PAGE_CHECK(src_m); VM_PAGE_CHECK(dest_m); pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m)); + dest_m->valid = VM_PAGE_BITS_ALL; +} + + +/* + * mapping function for valid bits or for dirty bits in + * a page + */ +inline int +vm_page_bits(int base, int size) +{ + u_short chunk; + + size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); + base = (base % PAGE_SIZE) / DEV_BSIZE; + chunk = vm_page_dev_bsize_chunks[size / DEV_BSIZE]; + return (chunk << base) & VM_PAGE_BITS_ALL; +} + +/* + * set a page (partially) valid + */ +void +vm_page_set_valid(m, base, size) + vm_page_t m; + int base; + int size; +{ + m->valid |= vm_page_bits(base, size); +} + +/* + * set a page (partially) invalid + */ +void +vm_page_set_invalid(m, base, size) + vm_page_t m; + int base; + int size; +{ + int bits; + + m->valid &= ~(bits = vm_page_bits(base, size)); + if (m->valid == 0) + m->dirty &= ~bits; +} + +/* + * is (partial) page valid? + */ +int +vm_page_is_valid(m, base, size) + vm_page_t m; + int base; + int size; +{ + int bits; + + if (m->valid && ((m->valid & (bits = vm_page_bits(base, size))) == bits)) + return 1; + else + return 0; +} + + +/* + * set a page (partially) dirty + */ +void +vm_page_set_dirty(m, base, size) + vm_page_t m; + int base; + int size; +{ + if ((base != 0) || (size != PAGE_SIZE)) { + if (pmap_is_modified(VM_PAGE_TO_PHYS(m))) { + m->dirty = VM_PAGE_BITS_ALL; + pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + return; + } + m->dirty |= vm_page_bits(base, size); + } else { + m->dirty = VM_PAGE_BITS_ALL; + pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + } +} + +void +vm_page_test_dirty(m) + vm_page_t m; +{ + if ((!m->dirty || (m->dirty != vm_page_bits(0, PAGE_SIZE))) && + pmap_is_modified(VM_PAGE_TO_PHYS(m))) { + pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + m->dirty = VM_PAGE_BITS_ALL; + } +} + +/* + * set a page (partially) clean + */ +void +vm_page_set_clean(m, base, size) + vm_page_t m; + int base; + int size; +{ + m->dirty &= ~vm_page_bits(base, size); +} + +/* + * is (partial) page clean + */ +int +vm_page_is_clean(m, base, size) + vm_page_t m; + int base; + int size; +{ + if (pmap_is_modified(VM_PAGE_TO_PHYS(m))) { + m->dirty = VM_PAGE_BITS_ALL; + pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + } + if ((m->dirty & m->valid & vm_page_bits(base, size)) == 0) + return 1; + else + return 0; +} + +void +print_page_info() +{ + printf("cnt.v_free_count: %d\n", cnt.v_free_count); + printf("cnt.v_cache_count: %d\n", cnt.v_cache_count); + printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count); + printf("cnt.v_active_count: %d\n", cnt.v_active_count); + printf("cnt.v_wire_count: %d\n", cnt.v_wire_count); + printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved); + printf("cnt.v_free_min: %d\n", cnt.v_free_min); + printf("cnt.v_free_target: %d\n", cnt.v_free_target); + printf("cnt.v_cache_min: %d\n", cnt.v_cache_min); + printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target); } diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 25d8d4fcf63e..140c5271b90c 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -40,17 +40,17 @@ * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_page.h,v 1.9 1994/10/21 01:19:28 wollman Exp $ + * $Id: vm_page.h,v 1.10 1994/11/14 08:19:08 bde Exp $ */ /* @@ -99,21 +99,22 @@ TAILQ_HEAD(pglist, vm_page); struct vm_page { - TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO - * queue or free list (P) */ - TAILQ_ENTRY(vm_page) hashq; /* hash table links (O)*/ - TAILQ_ENTRY(vm_page) listq; /* pages in same object (O)*/ + TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO queue or free list (P) */ + TAILQ_ENTRY(vm_page) hashq; /* hash table links (O) */ + TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ - vm_object_t object; /* which object am I in (O,P)*/ - vm_offset_t offset; /* offset into object (O,P) */ + vm_object_t object; /* which object am I in (O,P) */ + vm_offset_t offset; /* offset into object (O,P) */ + vm_offset_t phys_addr; /* physical address of page */ - u_short wire_count; /* wired down maps refs (P) */ - u_short flags; /* see below */ - short hold_count; /* page hold count */ - u_short act_count; /* page usage count */ - u_short busy; /* page busy count */ - - vm_offset_t phys_addr; /* physical address of page */ + u_short wire_count; /* wired down maps refs (P) */ + u_short flags; /* see below */ + short hold_count; /* page hold count */ + u_short act_count; /* page usage count */ + u_short bmapped; /* number of buffers mapped */ + u_short busy; /* page busy count */ + u_short valid; /* map of valid DEV_BSIZE chunks */ + u_short dirty; /* map of dirty DEV_BSIZE chunks */ }; /* @@ -123,7 +124,7 @@ struct vm_page { */ #define PG_INACTIVE 0x0001 /* page is in inactive list (P) */ #define PG_ACTIVE 0x0002 /* page is in active list (P) */ -#define PG_LAUNDRY 0x0004 /* page is being cleaned now (P)*/ +#define PG_LAUNDRY 0x0004 /* page is being cleaned now (P) */ #define PG_CLEAN 0x0008 /* page has not been modified */ #define PG_BUSY 0x0010 /* page is in transit (O) */ #define PG_WANTED 0x0020 /* someone is waiting for page (O) */ @@ -135,7 +136,7 @@ struct vm_page { #define PG_DIRTY 0x0800 /* client flag to set when dirty */ #define PG_REFERENCED 0x1000 /* page has been referenced */ #define PG_VMIO 0x2000 /* VMIO flag */ -#define PG_PAGEROWNED 0x4000 /* DEBUG: async paging op in progress */ +#define PG_CACHE 0x4000 /* On VMIO cache */ #define PG_FREE 0x8000 /* page is in free list */ #if VM_PAGE_DEBUG @@ -147,15 +148,15 @@ struct vm_page { (PG_ACTIVE | PG_INACTIVE))) \ panic("vm_page_check: not valid!"); \ } -#else /* VM_PAGE_DEBUG */ +#else /* VM_PAGE_DEBUG */ #define VM_PAGE_CHECK(mem) -#endif /* VM_PAGE_DEBUG */ +#endif /* VM_PAGE_DEBUG */ #ifdef KERNEL /* * Each pageable resident page falls into one of three lists: * - * free + * free * Available for allocation now. * inactive * Not referenced in any map, but still has an @@ -168,26 +169,21 @@ struct vm_page { * ordered, in LRU-like fashion. */ -extern -struct pglist vm_page_queue_free; /* memory free queue */ -extern -struct pglist vm_page_queue_active; /* active memory queue */ -extern -struct pglist vm_page_queue_inactive; /* inactive memory queue */ - -extern -vm_page_t vm_page_array; /* First resident page in table */ -extern -long first_page; /* first physical page number */ - /* ... represented in vm_page_array */ -extern -long last_page; /* last physical page number */ - /* ... represented in vm_page_array */ - /* [INCLUSIVE] */ -extern -vm_offset_t first_phys_addr; /* physical address for first_page */ -extern -vm_offset_t last_phys_addr; /* physical address for last_page */ +extern struct pglist vm_page_queue_free; /* memory free queue */ +extern struct pglist vm_page_queue_active; /* active memory queue */ +extern struct pglist vm_page_queue_inactive; /* inactive memory queue */ +extern struct pglist vm_page_queue_cache; /* cache memory queue */ + +extern vm_page_t vm_page_array; /* First resident page in table */ +extern long first_page; /* first physical page number */ + + /* ... represented in vm_page_array */ +extern long last_page; /* last physical page number */ + + /* ... represented in vm_page_array */ + /* [INCLUSIVE] */ +extern vm_offset_t first_phys_addr; /* physical address for first_page */ +extern vm_offset_t last_phys_addr; /* physical address for last_page */ #define VM_PAGE_TO_PHYS(entry) ((entry)->phys_addr) @@ -197,11 +193,8 @@ vm_offset_t last_phys_addr; /* physical address for last_page */ #define PHYS_TO_VM_PAGE(pa) \ (&vm_page_array[atop(pa) - first_page ]) -extern -simple_lock_data_t vm_page_queue_lock; /* lock on active and inactive - page queues */ -extern /* lock on free page queue */ -simple_lock_data_t vm_page_queue_free_lock; +extern simple_lock_data_t vm_page_queue_lock; /* lock on active and inactive page queues */ +extern simple_lock_data_t vm_page_queue_free_lock; /* lock on free page queue */ /* * Functions implemented as macros @@ -231,21 +224,41 @@ simple_lock_data_t vm_page_queue_free_lock; (mem)->wire_count = 0; \ (mem)->hold_count = 0; \ (mem)->act_count = 0; \ + (mem)->busy = 0; \ + (mem)->valid = 0; \ + (mem)->dirty = 0; \ + (mem)->bmapped = 0; \ } -void vm_page_activate __P((vm_page_t)); -vm_page_t vm_page_alloc __P((vm_object_t, vm_offset_t)); -void vm_page_copy __P((vm_page_t, vm_page_t)); -void vm_page_deactivate __P((vm_page_t)); -void vm_page_free __P((vm_page_t)); -void vm_page_insert __P((vm_page_t, vm_object_t, vm_offset_t)); -vm_page_t vm_page_lookup __P((vm_object_t, vm_offset_t)); -void vm_page_remove __P((vm_page_t)); -void vm_page_rename __P((vm_page_t, vm_object_t, vm_offset_t)); -vm_offset_t vm_page_startup __P((vm_offset_t, vm_offset_t, vm_offset_t)); -void vm_page_unwire __P((vm_page_t)); -void vm_page_wire __P((vm_page_t)); -boolean_t vm_page_zero_fill __P((vm_page_t)); +#if PAGE_SIZE == 4096 +#define VM_PAGE_BITS_ALL 0xff +#endif + +#if PAGE_SIZE == 8192 +#define VM_PAGE_BITS_ALL 0xffff +#endif + + +void vm_page_activate __P((vm_page_t)); +vm_page_t vm_page_alloc __P((vm_object_t, vm_offset_t, int)); +void vm_page_copy __P((vm_page_t, vm_page_t)); +void vm_page_deactivate __P((vm_page_t)); +void vm_page_free __P((vm_page_t)); +void vm_page_insert __P((vm_page_t, vm_object_t, vm_offset_t)); +vm_page_t vm_page_lookup __P((vm_object_t, vm_offset_t)); +void vm_page_remove __P((vm_page_t)); +void vm_page_rename __P((vm_page_t, vm_object_t, vm_offset_t)); +vm_offset_t vm_page_startup __P((vm_offset_t, vm_offset_t, vm_offset_t)); +void vm_page_unwire __P((vm_page_t)); +void vm_page_wire __P((vm_page_t)); +boolean_t vm_page_zero_fill __P((vm_page_t)); +void vm_page_set_dirty __P((vm_page_t, int, int)); +void vm_page_set_clean __P((vm_page_t, int, int)); +int vm_page_is_clean __P((vm_page_t, int, int)); +void vm_page_set_valid __P((vm_page_t, int, int)); +void vm_page_set_invalid __P((vm_page_t, int, int)); +int vm_page_is_valid __P((vm_page_t, int, int)); +void vm_page_test_dirty __P((vm_page_t)); /* @@ -268,13 +281,13 @@ static __inline void vm_page_unhold(vm_page_t mem) { #ifdef DIAGNOSTIC - if( --mem->hold_count < 0) + if (--mem->hold_count < 0) panic("vm_page_unhold: hold count < 0!!!"); #else --mem->hold_count; #endif } -#endif /* KERNEL */ +#endif /* KERNEL */ -#endif /* !_VM_PAGE_ */ +#endif /* !_VM_PAGE_ */ diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 9769d43f52bd..428d93382281 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson @@ -44,17 +44,17 @@ * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -65,7 +65,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_pageout.c,v 1.27 1994/11/25 07:58:29 davidg Exp $ + * $Id: vm_pageout.c,v 1.28 1995/01/02 22:56:00 ats Exp $ */ /* @@ -77,6 +77,7 @@ #include <sys/proc.h> #include <sys/resourcevar.h> #include <sys/malloc.h> +#include <sys/kernel.h> #include <vm/vm.h> #include <vm/vm_page.h> @@ -84,18 +85,15 @@ #include <vm/swap_pager.h> extern vm_map_t kmem_map; -int vm_pages_needed; /* Event on which pageout daemon sleeps */ -int vm_pagescanner; /* Event on which pagescanner sleeps */ -int vm_pageout_free_min = 0; /* Stop pageout to wait for pagers at this free level */ +int vm_pages_needed; /* Event on which pageout daemon sleeps */ +int vm_pagescanner; /* Event on which pagescanner sleeps */ -int vm_pageout_pages_needed = 0; /* flag saying that the pageout daemon needs pages */ -int vm_page_pagesfreed; -int vm_desired_cache_size; +int vm_pageout_pages_needed = 0;/* flag saying that the pageout daemon needs pages */ +int vm_page_pagesfreed; extern int npendingio; -extern int hz; -int vm_pageout_proc_limit; -int vm_pageout_req_swapout; +int vm_pageout_proc_limit; +int vm_pageout_req_swapout; int vm_daemon_needed; extern int nswiodone; extern int swap_pager_full; @@ -104,53 +102,50 @@ extern int swap_pager_ready(); #define MAXREF 32767 -#define MAXSCAN 512 /* maximum number of pages to scan in active queue */ - /* set the "clock" hands to be (MAXSCAN * 4096) Bytes */ +#define MAXSCAN 512 /* maximum number of pages to scan in active queue */ #define ACT_DECLINE 1 #define ACT_ADVANCE 3 #define ACT_MAX 100 +#define MAXISCAN 256 +#define MINTOFREE 6 +#define MINFREE 2 -#define LOWATER ((2048*1024)/NBPG) +#define MAXLAUNDER (cnt.v_page_count > 1800 ? 32 : 16) #define VM_PAGEOUT_PAGE_COUNT 8 int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT; int vm_pageout_req_do_stats; -int vm_page_max_wired = 0; /* XXX max # of wired pages system-wide */ - +int vm_page_max_wired = 0; /* XXX max # of wired pages system-wide */ /* * vm_pageout_clean: * cleans a vm_page */ int -vm_pageout_clean(m, sync) +vm_pageout_clean(m, sync) register vm_page_t m; int sync; { /* - * Clean the page and remove it from the - * laundry. - * - * We set the busy bit to cause - * potential page faults on this page to - * block. - * - * And we set pageout-in-progress to keep - * the object from disappearing during - * pageout. This guarantees that the - * page won't move from the inactive - * queue. (However, any other page on - * the inactive queue may move!) + * Clean the page and remove it from the laundry. + * + * We set the busy bit to cause potential page faults on this page to + * block. + * + * And we set pageout-in-progress to keep the object from disappearing + * during pageout. This guarantees that the page won't move from the + * inactive queue. (However, any other page on the inactive queue may + * move!) */ - register vm_object_t object; - register vm_pager_t pager; - int pageout_status[VM_PAGEOUT_PAGE_COUNT]; - vm_page_t ms[VM_PAGEOUT_PAGE_COUNT]; - int pageout_count; - int anyok=0; - int i; + register vm_object_t object; + register vm_pager_t pager; + int pageout_status[VM_PAGEOUT_PAGE_COUNT]; + vm_page_t ms[VM_PAGEOUT_PAGE_COUNT]; + int pageout_count; + int anyok = 0; + int i; vm_offset_t offset = m->offset; object = m->object; @@ -158,57 +153,64 @@ vm_pageout_clean(m, sync) printf("pager: object missing\n"); return 0; } - + if (!object->pager && (object->flags & OBJ_INTERNAL) == 0) { + printf("pager: non internal obj without pager\n"); + } /* - * Try to collapse the object before - * making a pager for it. We must - * unlock the page queues first. - * We try to defer the creation of a pager - * until all shadows are not paging. This - * allows vm_object_collapse to work better and - * helps control swap space size. - * (J. Dyson 11 Nov 93) + * Try to collapse the object before making a pager for it. We must + * unlock the page queues first. We try to defer the creation of a + * pager until all shadows are not paging. This allows + * vm_object_collapse to work better and helps control swap space + * size. (J. Dyson 11 Nov 93) */ if (!object->pager && - cnt.v_free_count < vm_pageout_free_min) + (cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min) return 0; - if( !sync) { - if (object->shadow) { - vm_object_collapse(object); - } + if ((!sync && m->bmapped != 0 && m->hold_count != 0) || + ((m->busy != 0) || (m->flags & PG_BUSY))) + return 0; - if ((m->busy != 0) || - (m->flags & PG_BUSY) || (m->hold_count != 0)) { - return 0; - } + if (!sync && object->shadow) { + vm_object_collapse(object); } - pageout_count = 1; ms[0] = m; pager = object->pager; if (pager) { for (i = 1; i < vm_pageout_page_count; i++) { - ms[i] = vm_page_lookup(object, offset+i*NBPG); - if (ms[i]) { - if (((ms[i]->flags & PG_CLEAN) != 0) && - pmap_is_modified(VM_PAGE_TO_PHYS(ms[i]))) { - ms[i]->flags &= ~PG_CLEAN; - } - if (( ((ms[i]->flags & (PG_CLEAN|PG_INACTIVE|PG_BUSY)) == PG_INACTIVE) - || ( (ms[i]->flags & (PG_CLEAN|PG_BUSY)) == 0 && sync == VM_PAGEOUT_FORCE)) - && (ms[i]->wire_count == 0) - && (ms[i]->busy == 0) - && (ms[i]->hold_count == 0)) + vm_page_t mt; + + ms[i] = mt = vm_page_lookup(object, offset + i * NBPG); + if (mt) { + vm_page_test_dirty(mt); + /* + * we can cluster ONLY if: ->> the page is NOT + * busy, and is NOT clean the page is not + * wired, busy, held, or mapped into a buffer. + * and one of the following: 1) The page is + * inactive, or a seldom used active page. 2) + * or we force the issue. + */ + if ((mt->dirty & mt->valid) != 0 + && (((mt->flags & (PG_BUSY | PG_INACTIVE)) == PG_INACTIVE) + || sync == VM_PAGEOUT_FORCE) + && (mt->wire_count == 0) + && (mt->busy == 0) + && (mt->hold_count == 0) + && (mt->bmapped == 0)) pageout_count++; else break; } else break; } - for(i=0;i<pageout_count;i++) { + /* + * we allow reads during pageouts... + */ + for (i = 0; i < pageout_count; i++) { ms[i]->flags |= PG_BUSY; pmap_page_protect(VM_PAGE_TO_PHYS(ms[i]), VM_PROT_READ); } @@ -221,32 +223,29 @@ vm_pageout_clean(m, sync) object->paging_in_progress++; - pager = vm_pager_allocate(PG_DFLT, (caddr_t)0, - object->size, VM_PROT_ALL, 0); + pager = vm_pager_allocate(PG_DFLT, (caddr_t) 0, + object->size, VM_PROT_ALL, 0); if (pager != NULL) { vm_object_setpager(object, pager, 0, FALSE); } } /* - * If there is no pager for the page, - * use the default pager. If there's - * no place to put the page at the - * moment, leave it in the laundry and - * hope that there will be paging space - * later. + * If there is no pager for the page, use the default pager. If + * there's no place to put the page at the moment, leave it in the + * laundry and hope that there will be paging space later. */ - if ((pager && pager->pg_type == PG_SWAP) || - cnt.v_free_count >= vm_pageout_free_min) { - if( pageout_count == 1) { + if ((pager && pager->pg_type == PG_SWAP) || + (cnt.v_free_count + cnt.v_cache_count) >= cnt.v_pageout_free_min) { + if (pageout_count == 1) { pageout_status[0] = pager ? - vm_pager_put(pager, m, - ((sync || (object == kernel_object)) ? TRUE: FALSE)) : - VM_PAGER_FAIL; + vm_pager_put(pager, m, + ((sync || (object == kernel_object)) ? TRUE : FALSE)) : + VM_PAGER_FAIL; } else { - if( !pager) { - for(i=0;i<pageout_count;i++) + if (!pager) { + for (i = 0; i < pageout_count; i++) pageout_status[i] = VM_PAGER_FAIL; } else { vm_pager_put_pages(pager, ms, pageout_count, @@ -254,39 +253,34 @@ vm_pageout_clean(m, sync) pageout_status); } } - } else { - for(i=0;i<pageout_count;i++) + for (i = 0; i < pageout_count; i++) pageout_status[i] = VM_PAGER_FAIL; } - for(i=0;i<pageout_count;i++) { + for (i = 0; i < pageout_count; i++) { switch (pageout_status[i]) { case VM_PAGER_OK: - ms[i]->flags &= ~PG_LAUNDRY; ++anyok; break; case VM_PAGER_PEND: - ms[i]->flags &= ~PG_LAUNDRY; ++anyok; break; case VM_PAGER_BAD: /* - * Page outside of range of object. - * Right now we essentially lose the - * changes by pretending it worked. + * Page outside of range of object. Right now we + * essentially lose the changes by pretending it + * worked. */ - ms[i]->flags &= ~PG_LAUNDRY; - ms[i]->flags |= PG_CLEAN; pmap_clear_modify(VM_PAGE_TO_PHYS(ms[i])); + ms[i]->dirty = 0; break; case VM_PAGER_ERROR: case VM_PAGER_FAIL: /* - * If page couldn't be paged out, then - * reactivate the page so it doesn't - * clog the inactive list. (We will - * try paging out it again later). + * If page couldn't be paged out, then reactivate the + * page so it doesn't clog the inactive list. (We + * will try paging out it again later). */ if (ms[i]->flags & PG_INACTIVE) vm_page_activate(ms[i]); @@ -297,21 +291,20 @@ vm_pageout_clean(m, sync) /* - * If the operation is still going, leave - * the page busy to block all other accesses. - * Also, leave the paging in progress - * indicator set so that we don't attempt an - * object collapse. + * If the operation is still going, leave the page busy to + * block all other accesses. Also, leave the paging in + * progress indicator set so that we don't attempt an object + * collapse. */ if (pageout_status[i] != VM_PAGER_PEND) { PAGE_WAKEUP(ms[i]); if (--object->paging_in_progress == 0) wakeup((caddr_t) object); if ((ms[i]->flags & PG_REFERENCED) || - pmap_is_referenced(VM_PAGE_TO_PHYS(ms[i]))) { + pmap_is_referenced(VM_PAGE_TO_PHYS(ms[i]))) { pmap_clear_reference(VM_PAGE_TO_PHYS(ms[i])); ms[i]->flags &= ~PG_REFERENCED; - if( ms[i]->flags & PG_INACTIVE) + if (ms[i]->flags & PG_INACTIVE) vm_page_activate(ms[i]); } } @@ -330,12 +323,13 @@ vm_pageout_clean(m, sync) * The object and map must be locked. */ int -vm_pageout_object_deactivate_pages(map, object, count) +vm_pageout_object_deactivate_pages(map, object, count, map_remove_only) vm_map_t map; vm_object_t object; int count; + int map_remove_only; { - register vm_page_t p, next; + register vm_page_t p, next; int rcount; int dcount; @@ -344,11 +338,12 @@ vm_pageout_object_deactivate_pages(map, object, count) count = 1; if (object->shadow) { - if( object->shadow->ref_count == 1) - dcount += vm_pageout_object_deactivate_pages(map, object->shadow, count/2); + if (object->shadow->ref_count == 1) + dcount += vm_pageout_object_deactivate_pages(map, object->shadow, count / 2 + 1, map_remove_only); + else + dcount += vm_pageout_object_deactivate_pages(map, object->shadow, count / 2 + 1, 1); } - - if (object->paging_in_progress) + if (object->paging_in_progress || !vm_object_lock_try(object)) return dcount; /* @@ -360,30 +355,38 @@ vm_pageout_object_deactivate_pages(map, object, count) next = p->listq.tqe_next; cnt.v_pdpages++; vm_page_lock_queues(); + if (p->wire_count != 0 || + p->hold_count != 0 || + p->bmapped != 0 || + p->busy != 0 || + !pmap_page_exists(vm_map_pmap(map), VM_PAGE_TO_PHYS(p))) { + p = next; + continue; + } /* - * if a page is active, not wired and is in the processes pmap, - * then deactivate the page. + * if a page is active, not wired and is in the processes + * pmap, then deactivate the page. */ - if ((p->flags & (PG_ACTIVE|PG_BUSY)) == PG_ACTIVE && - p->wire_count == 0 && - p->hold_count == 0 && - p->busy == 0 && - pmap_page_exists(vm_map_pmap(map), VM_PAGE_TO_PHYS(p))) { + if ((p->flags & (PG_ACTIVE | PG_BUSY)) == PG_ACTIVE) { if (!pmap_is_referenced(VM_PAGE_TO_PHYS(p)) && - (p->flags & PG_REFERENCED) == 0) { + (p->flags & PG_REFERENCED) == 0) { p->act_count -= min(p->act_count, ACT_DECLINE); /* - * if the page act_count is zero -- then we deactivate + * if the page act_count is zero -- then we + * deactivate */ if (!p->act_count) { - vm_page_deactivate(p); + if (!map_remove_only) + vm_page_deactivate(p); pmap_page_protect(VM_PAGE_TO_PHYS(p), - VM_PROT_NONE); - /* - * else if on the next go-around we will deactivate the page - * we need to place the page on the end of the queue to age - * the other pages in memory. - */ + VM_PROT_NONE); + /* + * else if on the next go-around we + * will deactivate the page we need to + * place the page on the end of the + * queue to age the other pages in + * memory. + */ } else { TAILQ_REMOVE(&vm_page_queue_active, p, pageq); TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq); @@ -397,12 +400,12 @@ vm_pageout_object_deactivate_pages(map, object, count) --count; ++dcount; if (count <= 0 && - cnt.v_inactive_count > cnt.v_inactive_target) { - vm_page_unlock_queues(); - return dcount; + cnt.v_inactive_count > cnt.v_inactive_target) { + vm_page_unlock_queues(); + vm_object_unlock(object); + return dcount; } } - } else { /* * Move the page to the bottom of the queue. @@ -417,11 +420,14 @@ vm_pageout_object_deactivate_pages(map, object, count) TAILQ_REMOVE(&object->memq, p, listq); TAILQ_INSERT_TAIL(&object->memq, p, listq); } + } else if ((p->flags & (PG_INACTIVE | PG_BUSY)) == PG_INACTIVE) { + pmap_page_protect(VM_PAGE_TO_PHYS(p), + VM_PROT_NONE); } - vm_page_unlock_queues(); p = next; } + vm_object_unlock(object); return dcount; } @@ -436,11 +442,12 @@ vm_pageout_map_deactivate_pages(map, entry, count, freeer) vm_map_t map; vm_map_entry_t entry; int *count; - int (*freeer)(vm_map_t, vm_object_t, int); + int (*freeer) (vm_map_t, vm_object_t, int); { vm_map_t tmpm; vm_map_entry_t tmpe; vm_object_t obj; + if (*count <= 0) return; vm_map_reference(map); @@ -451,18 +458,18 @@ vm_pageout_map_deactivate_pages(map, entry, count, freeer) if (entry == 0) { tmpe = map->header.next; while (tmpe != &map->header && *count > 0) { - vm_pageout_map_deactivate_pages(map, tmpe, count, freeer); + vm_pageout_map_deactivate_pages(map, tmpe, count, freeer, 0); tmpe = tmpe->next; }; } else if (entry->is_sub_map || entry->is_a_map) { tmpm = entry->object.share_map; tmpe = tmpm->header.next; while (tmpe != &tmpm->header && *count > 0) { - vm_pageout_map_deactivate_pages(tmpm, tmpe, count, freeer); + vm_pageout_map_deactivate_pages(tmpm, tmpe, count, freeer, 0); tmpe = tmpe->next; }; } else if ((obj = entry->object.vm_object) != 0) { - *count -= (*freeer)(map, obj, *count); + *count -= (*freeer) (map, obj, *count); } lock_read_done(&map->lock); vm_map_deallocate(map); @@ -470,248 +477,309 @@ vm_pageout_map_deactivate_pages(map, entry, count, freeer) } void -vm_req_vmdaemon() { - extern int ticks; - static lastrun = 0; - if( (ticks > (lastrun + hz/10)) || (ticks < lastrun)) { - wakeup((caddr_t) &vm_daemon_needed); +vm_req_vmdaemon() +{ + extern int ticks; + static int lastrun = 0; + + if ((ticks > (lastrun + hz / 10)) || (ticks < lastrun)) { + wakeup((caddr_t) & vm_daemon_needed); lastrun = ticks; } } +void +vm_pageout_inactive_stats(int maxiscan) +{ + vm_page_t m; + int s; + + if (maxiscan > cnt.v_inactive_count) + maxiscan = cnt.v_inactive_count; + m = vm_page_queue_inactive.tqh_first; + while (m && (maxiscan-- > 0)) { + vm_page_t next; + + next = m->pageq.tqe_next; + + if (((m->flags & PG_REFERENCED) == 0) && + pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { + m->flags |= PG_REFERENCED; + } + if (m->object->ref_count == 0) { + m->flags &= ~PG_REFERENCED; + pmap_clear_reference(VM_PAGE_TO_PHYS(m)); + } + if (m->flags & PG_REFERENCED) { + m->flags &= ~PG_REFERENCED; + pmap_clear_reference(VM_PAGE_TO_PHYS(m)); + vm_page_activate(m); + /* + * heuristic alert -- if a page is being re-activated, + * it probably will be used one more time... + */ + ++m->act_count; + ++m->act_count; + } + m = next; + } +} + + /* * vm_pageout_scan does the dirty work for the pageout daemon. */ int vm_pageout_scan() { - vm_page_t m; - int page_shortage, maxscan, maxlaunder; - int pages_freed; - int desired_free; - vm_page_t next; - struct proc *p, *bigproc; + vm_page_t m; + int page_shortage, maxscan, maxlaunder; + int pages_freed; + int desired_free; + vm_page_t next; + struct proc *p, *bigproc; vm_offset_t size, bigsize; - vm_object_t object; - int force_wakeup = 0; - int cache_size, orig_cache_size; + vm_object_t object; + int force_wakeup = 0; + int cache_size, orig_cache_size; + int minscan; + int mintofree; + +#ifdef LFS + lfs_reclaim_buffers(); +#endif /* calculate the total cached size */ - if( cnt.v_inactive_count < cnt.v_inactive_target) { + if ((cnt.v_inactive_count + cnt.v_free_count + cnt.v_cache_count) < + (cnt.v_inactive_target + cnt.v_free_min)) { vm_req_vmdaemon(); } - -morefree: /* * now swap processes out if we are in low memory conditions */ - if ((cnt.v_free_count <= cnt.v_free_min) && !swap_pager_full && vm_swap_size&& vm_pageout_req_swapout == 0) { + if ((cnt.v_free_count <= cnt.v_free_min) && + !swap_pager_full && vm_swap_size && vm_pageout_req_swapout == 0) { vm_pageout_req_swapout = 1; vm_req_vmdaemon(); } - pages_freed = 0; desired_free = cnt.v_free_target; /* - * Start scanning the inactive queue for pages we can free. - * We keep scanning until we have enough free pages or - * we have scanned through the entire queue. If we - * encounter dirty pages, we start cleaning them. + * Start scanning the inactive queue for pages we can free. We keep + * scanning until we have enough free pages or we have scanned through + * the entire queue. If we encounter dirty pages, we start cleaning + * them. */ - maxlaunder = 128; - maxscan = cnt.v_inactive_count; + + vm_pageout_inactive_stats(MAXISCAN); + maxlaunder = (cnt.v_inactive_target > MAXLAUNDER) ? + MAXLAUNDER : cnt.v_inactive_target; + rescan1: + maxscan = cnt.v_inactive_count; + mintofree = MINTOFREE; m = vm_page_queue_inactive.tqh_first; - while (m && (maxscan-- > 0) && - (cnt.v_free_count < desired_free) ) { - vm_page_t next; + while (m && + (maxscan-- > 0) && + (((cnt.v_free_count + cnt.v_cache_count) < desired_free) || + (--mintofree > 0))) { + vm_page_t next; cnt.v_pdpages++; next = m->pageq.tqe_next; - if( (m->flags & PG_INACTIVE) == 0) { +#if defined(VM_DIAGNOSE) + if ((m->flags & PG_INACTIVE) == 0) { printf("vm_pageout_scan: page not inactive?\n"); - continue; - } - - /* - * activate held pages - */ - if (m->hold_count != 0) { - vm_page_activate(m); - m = next; - continue; + break; } +#endif /* * dont mess with busy pages */ - if (m->busy || (m->flags & PG_BUSY)) { + if (m->hold_count || m->busy || (m->flags & PG_BUSY) || + m->bmapped != 0) { + TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq); + TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); m = next; continue; } - - if (((m->flags & PG_CLEAN) != 0) && pmap_is_modified(VM_PAGE_TO_PHYS(m))) { - m->flags &= ~PG_CLEAN; - m->flags |= PG_LAUNDRY; - } - - if (((m->flags & PG_REFERENCED) == 0) && pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { + if (((m->flags & PG_REFERENCED) == 0) && + pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { m->flags |= PG_REFERENCED; + } + if (m->object->ref_count == 0) { + m->flags &= ~PG_REFERENCED; + pmap_clear_reference(VM_PAGE_TO_PHYS(m)); + } + if ((m->flags & PG_REFERENCED) != 0) { + m->flags &= ~PG_REFERENCED; pmap_clear_reference(VM_PAGE_TO_PHYS(m)); + vm_page_activate(m); + ++m->act_count; + ++m->act_count; + m = next; + continue; } + vm_page_test_dirty(m); - if (m->flags & PG_CLEAN) { - /* - * If we're not low on memory and the page has been reference, - * then reactivate the page. - */ - if ((cnt.v_free_count > vm_pageout_free_min) && - ((m->flags & PG_REFERENCED) != 0)) { - m->flags &= ~PG_REFERENCED; - vm_page_activate(m); - } else if (m->act_count == 0) { - pmap_page_protect(VM_PAGE_TO_PHYS(m), - VM_PROT_NONE); - vm_page_free(m); - ++cnt.v_dfree; - ++pages_freed; - } else { - m->act_count -= min(m->act_count, ACT_DECLINE); - TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq); - TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); - } - } else if ((m->flags & PG_LAUNDRY) && maxlaunder > 0) { + if ((m->dirty & m->valid) == 0) { + if (((cnt.v_free_count + cnt.v_cache_count) < desired_free) || + (cnt.v_cache_count < cnt.v_cache_min)) + vm_page_cache(m); + } else if (maxlaunder > 0) { int written; - if ((m->flags & PG_REFERENCED) != 0) { - m->flags &= ~PG_REFERENCED; - vm_page_activate(m); + + TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq); + TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); + + object = m->object; + if (!vm_object_lock_try(object)) { m = next; continue; } - /* - * If a page is dirty, then it is either - * being washed (but not yet cleaned) - * or it is still in the laundry. If it is - * still in the laundry, then we start the - * cleaning operation. + * If a page is dirty, then it is either being washed + * (but not yet cleaned) or it is still in the + * laundry. If it is still in the laundry, then we + * start the cleaning operation. */ - written = vm_pageout_clean(m,0); - if (written) - maxlaunder -= written; + written = vm_pageout_clean(m, 0); + vm_object_unlock(object); - if (!next) + if (!next) { break; + } + maxlaunder -= written; /* - * if the next page has been re-activated, start scanning again + * if the next page has been re-activated, start + * scanning again */ - if ((written != 0) || ((next->flags & PG_INACTIVE) == 0)) + if ((next->flags & PG_INACTIVE) == 0) { goto rescan1; - } else if ((m->flags & PG_REFERENCED) != 0) { - m->flags &= ~PG_REFERENCED; - vm_page_activate(m); - } + } + } else { + TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq); + TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); + } m = next; } /* - * Compute the page shortage. If we are still very low on memory - * be sure that we will move a minimal amount of pages from active - * to inactive. + * Compute the page shortage. If we are still very low on memory be + * sure that we will move a minimal amount of pages from active to + * inactive. */ - page_shortage = cnt.v_inactive_target - - (cnt.v_free_count + cnt.v_inactive_count); - + page_shortage = cnt.v_inactive_target - + (cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count); if (page_shortage <= 0) { if (pages_freed == 0) { - if( cnt.v_free_count < cnt.v_free_min) { - page_shortage = cnt.v_free_min - cnt.v_free_count + 1; - } else if(((cnt.v_free_count + cnt.v_inactive_count) < - (cnt.v_free_min + cnt.v_inactive_target))) { - page_shortage = 1; - } else { - page_shortage = 0; + if ((cnt.v_free_count + cnt.v_cache_count) < desired_free) { + page_shortage = + desired_free - (cnt.v_free_count + cnt.v_cache_count); } } - } - maxscan = cnt.v_active_count; + minscan = cnt.v_active_count; + if (minscan > MAXSCAN) + minscan = MAXSCAN; m = vm_page_queue_active.tqh_first; - while (m && maxscan-- && (page_shortage > 0)) { + while (m && ((maxscan > 0 && (page_shortage > 0)) || minscan > 0)) { + if (maxscan) + --maxscan; + if (minscan) + --minscan; cnt.v_pdpages++; next = m->pageq.tqe_next; /* - * Don't deactivate pages that are busy. + * Don't deactivate pages that are busy. */ if ((m->busy != 0) || - (m->flags & PG_BUSY) || (m->hold_count != 0)) { + (m->flags & PG_BUSY) || + (m->hold_count != 0) || + (m->bmapped != 0)) { m = next; continue; } + if (m->object->ref_count && ((m->flags & PG_REFERENCED) || + pmap_is_referenced(VM_PAGE_TO_PHYS(m)))) { + int s; - if ((m->flags & PG_REFERENCED) || - pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { pmap_clear_reference(VM_PAGE_TO_PHYS(m)); m->flags &= ~PG_REFERENCED; - if (m->act_count < ACT_MAX) + if (m->act_count < ACT_MAX) { m->act_count += ACT_ADVANCE; + } TAILQ_REMOVE(&vm_page_queue_active, m, pageq); TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); + s = splhigh(); TAILQ_REMOVE(&m->object->memq, m, listq); TAILQ_INSERT_TAIL(&m->object->memq, m, listq); + splx(s); } else { m->act_count -= min(m->act_count, ACT_DECLINE); /* * if the page act_count is zero -- then we deactivate */ - if (!m->act_count) { - vm_page_deactivate(m); - --page_shortage; - /* - * else if on the next go-around we will deactivate the page - * we need to place the page on the end of the queue to age - * the other pages in memory. - */ + if (!m->act_count && (page_shortage > 0)) { + if (m->object->ref_count == 0) { + vm_page_test_dirty(m); + + m->flags &= ~PG_REFERENCED; + pmap_clear_reference(VM_PAGE_TO_PHYS(m)); + + --page_shortage; + if ((m->dirty & m->valid) == 0) { + m->act_count = 0; + vm_page_cache(m); + } else { + vm_page_deactivate(m); + } + } else { + + m->flags &= ~PG_REFERENCED; + pmap_clear_reference(VM_PAGE_TO_PHYS(m)); + + vm_page_deactivate(m); + --page_shortage; + } } else { TAILQ_REMOVE(&vm_page_queue_active, m, pageq); TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); - TAILQ_REMOVE(&m->object->memq, m, listq); - TAILQ_INSERT_TAIL(&m->object->memq, m, listq); } } m = next; } /* - * if we have not freed any pages and we are desparate for memory - * then we keep trying until we get some (any) memory. + * We try to maintain some *really* free pages, this allows interrupt + * code to be guaranteed space. */ - - if (!force_wakeup && (swap_pager_full || !force_wakeup || - (pages_freed == 0 && (cnt.v_free_count < cnt.v_free_min)))){ - vm_pager_sync(); - force_wakeup = 1; - goto morefree; + while (cnt.v_free_count < MINFREE) { + m = vm_page_queue_cache.tqh_first; + if (!m) + break; + vm_page_free(m); } /* - * make sure that we have swap space -- if we are low on - * memory and swap -- then kill the biggest process. + * make sure that we have swap space -- if we are low on memory and + * swap -- then kill the biggest process. */ if ((vm_swap_size == 0 || swap_pager_full) && - (cnt.v_free_count < cnt.v_free_min)) { + ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min)) { bigproc = NULL; bigsize = 0; - for (p = (struct proc *)allproc; p != NULL; p = p->p_next) { + for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { /* * if this is a system process, skip it */ @@ -719,7 +787,6 @@ rescan1: ((p->p_pid < 48) && (vm_swap_size != 0))) { continue; } - /* * if the process is in a non-running type state, * don't touch it. @@ -741,12 +808,12 @@ rescan1: } } if (bigproc != NULL) { - printf("Process %lu killed by vm_pageout -- out of swap\n", (u_long)bigproc->p_pid); + printf("Process %lu killed by vm_pageout -- out of swap\n", (u_long) bigproc->p_pid); psignal(bigproc, SIGKILL); bigproc->p_estcpu = 0; bigproc->p_nice = PRIO_MIN; resetpriority(bigproc); - wakeup( (caddr_t) &cnt.v_free_count); + wakeup((caddr_t) & cnt.v_free_count); } } vm_page_pagesfreed += pages_freed; @@ -762,78 +829,68 @@ vm_pageout() (void) spl0(); /* - * Initialize some paging parameters. + * Initialize some paging parameters. */ - cnt.v_free_min = 12; + if (cnt.v_page_count > 1024) + cnt.v_free_min = 4 + (cnt.v_page_count - 1024) / 200; + else + cnt.v_free_min = 4; /* - * free_reserved needs to include enough for the largest - * swap pager structures plus enough for any pv_entry - * structs when paging. + * free_reserved needs to include enough for the largest swap pager + * structures plus enough for any pv_entry structs when paging. */ - vm_pageout_free_min = 4 + cnt.v_page_count / 1024; - cnt.v_free_reserved = vm_pageout_free_min + 2; - if (cnt.v_free_min < 8) - cnt.v_free_min = 8; - if (cnt.v_free_min > 32) - cnt.v_free_min = 32; - cnt.v_free_target = 2*cnt.v_free_min + cnt.v_free_reserved; - cnt.v_inactive_target = cnt.v_free_count / 12; + cnt.v_pageout_free_min = 5 + cnt.v_page_count / 1024; + cnt.v_free_reserved = cnt.v_pageout_free_min + 2; + cnt.v_free_target = 3 * cnt.v_free_min + cnt.v_free_reserved; + cnt.v_inactive_target = cnt.v_free_count / 4; + if (cnt.v_inactive_target > 512) + cnt.v_inactive_target = 512; cnt.v_free_min += cnt.v_free_reserved; - vm_desired_cache_size = cnt.v_page_count / 3; + if (cnt.v_page_count > 1024) { + cnt.v_cache_max = (cnt.v_free_count - 1024) / 2; + cnt.v_cache_min = (cnt.v_free_count - 1024) / 20; + } else { + cnt.v_cache_min = 0; + cnt.v_cache_max = 0; + } - /* XXX does not really belong here */ + /* XXX does not really belong here */ if (vm_page_max_wired == 0) vm_page_max_wired = cnt.v_free_count / 3; (void) swap_pager_alloc(0, 0, 0, 0); - /* - * The pageout daemon is never done, so loop - * forever. + * The pageout daemon is never done, so loop forever. */ while (TRUE) { - int force_wakeup; - - tsleep((caddr_t) &vm_pages_needed, PVM, "psleep", 0); + tsleep((caddr_t) & vm_pages_needed, PVM, "psleep", 0); cnt.v_pdwakeups++; - vm_pager_sync(); - /* - * The force wakeup hack added to eliminate delays and potiential - * deadlock. It was possible for the page daemon to indefintely - * postpone waking up a process that it might be waiting for memory - * on. The putmulti stuff seems to have aggravated the situation. - */ - force_wakeup = vm_pageout_scan(); + vm_pageout_scan(); vm_pager_sync(); - if( force_wakeup) - wakeup( (caddr_t) &cnt.v_free_count); + wakeup((caddr_t) & cnt.v_free_count); wakeup((caddr_t) kmem_map); } } void -vm_daemon() { +vm_daemon() +{ int cache_size; vm_object_t object; struct proc *p; - while(TRUE) { - tsleep((caddr_t) &vm_daemon_needed, PUSER, "psleep", 0); - if( vm_pageout_req_swapout) { + + while (TRUE) { + tsleep((caddr_t) & vm_daemon_needed, PUSER, "psleep", 0); + swapout_threads(); /* - * swap out inactive processes + * scan the processes for exceeding their rlimits or if + * process is swapped out -- deactivate pages */ - swapout_threads(); - vm_pageout_req_swapout = 0; - } - /* - * scan the processes for exceeding their rlimits or if process - * is swapped out -- deactivate pages - */ - for (p = (struct proc *)allproc; p != NULL; p = p->p_next) { + for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { int overage; quad_t limit; vm_offset_t size; @@ -842,77 +899,60 @@ vm_daemon() { * if this is a system process or if we have already * looked at this process, skip it. */ - if (p->p_flag & (P_SYSTEM|P_WEXIT)) { + if (p->p_flag & (P_SYSTEM | P_WEXIT)) { continue; } - - /* - * if the process is in a non-running type state, - * don't touch it. - */ + /* + * if the process is in a non-running type state, + * don't touch it. + */ if (p->p_stat != SRUN && p->p_stat != SSLEEP) { continue; } - - /* - * get a limit - */ + /* + * get a limit + */ limit = qmin(p->p_rlimit[RLIMIT_RSS].rlim_cur, - p->p_rlimit[RLIMIT_RSS].rlim_max); - - /* - * let processes that are swapped out really be swapped out - * set the limit to nothing (will force a swap-out.) - */ + p->p_rlimit[RLIMIT_RSS].rlim_max); + + /* + * let processes that are swapped out really be + * swapped out set the limit to nothing (will force a + * swap-out.) + */ if ((p->p_flag & P_INMEM) == 0) - limit = 0; + limit = 0; /* XXX */ size = p->p_vmspace->vm_pmap.pm_stats.resident_count * NBPG; if (limit >= 0 && size >= limit) { overage = (size - limit) / NBPG; + if (limit == 0) + overage += 20; vm_pageout_map_deactivate_pages(&p->p_vmspace->vm_map, - (vm_map_entry_t) 0, &overage, vm_pageout_object_deactivate_pages); + (vm_map_entry_t) 0, &overage, vm_pageout_object_deactivate_pages); } } + } /* - * We manage the cached memory by attempting to keep it - * at about the desired level. - * We deactivate the pages for the oldest cached objects - * first. This keeps pages that are "cached" from hogging - * physical memory. + * we remove cached objects that have no RSS... */ restart: - cache_size = 0; - object = vm_object_cached_list.tqh_first; - /* calculate the total cached size */ - while( object) { - cache_size += object->resident_page_count; - object = object->cached_list.tqe_next; - } - - vm_object_cache_lock(); - object = vm_object_cached_list.tqh_first; - while ( object) { - vm_object_cache_unlock(); + vm_object_cache_lock(); + object = vm_object_cached_list.tqh_first; + while (object) { + vm_object_cache_unlock(); /* * if there are no resident pages -- get rid of the object */ - if( object->resident_page_count == 0) { - if (object != vm_object_lookup(object->pager)) - panic("vm_object_cache_trim: I'm sooo confused."); - pager_cache(object, FALSE); - goto restart; - } else if( cache_size >= (vm_swap_size?vm_desired_cache_size:0)) { - /* - * if there are resident pages -- deactivate them - */ - vm_object_deactivate_pages(object); - cache_size -= object->resident_page_count; - } - object = object->cached_list.tqe_next; - vm_object_cache_lock(); + if (object->resident_page_count == 0) { + if (object != vm_object_lookup(object->pager)) + panic("vm_object_cache_trim: I'm sooo confused."); + pager_cache(object, FALSE); + goto restart; } - vm_object_cache_unlock(); + object = object->cached_list.tqe_next; + vm_object_cache_lock(); } + vm_object_cache_unlock(); } diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h index 77c43739f31b..562a2aceb496 100644 --- a/sys/vm/vm_pageout.h +++ b/sys/vm/vm_pageout.h @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -40,17 +40,17 @@ * All rights reserved. * * Author: Avadis Tevanian, Jr. - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_pageout.h,v 1.5 1994/08/21 07:19:45 paul Exp $ + * $Id: vm_pageout.h,v 1.6 1994/10/09 01:52:16 phk Exp $ */ #ifndef _VM_VM_PAGEOUT_H_ @@ -75,8 +75,8 @@ * Exported data structures. */ -extern int vm_pages_needed; /* should be some "event" structure */ -simple_lock_data_t vm_pages_needed_lock; +extern int vm_pages_needed; /* should be some "event" structure */ +simple_lock_data_t vm_pages_needed_lock; extern int vm_pageout_pages_needed; #define VM_PAGEOUT_ASYNC 0 @@ -93,26 +93,30 @@ extern int vm_pageout_pages_needed; #define VM_WAIT vm_wait() -inline static void vm_wait() { +inline static void +vm_wait() +{ int s; + s = splhigh(); if (curproc == pageproc) { vm_pageout_pages_needed = 1; - tsleep((caddr_t) &vm_pageout_pages_needed, PSWP, "vmwait", 0); + tsleep((caddr_t) & vm_pageout_pages_needed, PSWP, "vmwait", 0); vm_pageout_pages_needed = 0; } else { - wakeup((caddr_t) &vm_pages_needed); - tsleep((caddr_t) &cnt.v_free_count, PVM, "vmwait", 0); + wakeup((caddr_t) & vm_pages_needed); + tsleep((caddr_t) & cnt.v_free_count, PVM, "vmwait", 0); } splx(s); } #ifdef KERNEL -int vm_pageout_scan __P((void)); -void vm_pageout_page __P((vm_page_t, vm_object_t)); -void vm_pageout_cluster __P((vm_page_t, vm_object_t)); -int vm_pageout_clean __P((vm_page_t, int)); +int vm_pageout_scan __P((void)); +void vm_pageout_page __P((vm_page_t, vm_object_t)); +void vm_pageout_cluster __P((vm_page_t, vm_object_t)); +int vm_pageout_clean __P((vm_page_t, int)); + #endif #endif diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c index d82ec250b5e5..be57e5ec0fdf 100644 --- a/sys/vm/vm_pager.c +++ b/sys/vm/vm_pager.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -40,17 +40,17 @@ * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_pager.c,v 1.9 1994/12/19 00:02:56 davidg Exp $ + * $Id: vm_pager.c,v 1.10 1994/12/23 04:56:51 davidg Exp $ */ /* @@ -89,7 +89,7 @@ struct pagerops *pagertab[] = { &vnodepagerops, /* PG_VNODE */ &devicepagerops, /* PG_DEV */ }; -int npagers = sizeof (pagertab) / sizeof (pagertab[0]); +int npagers = sizeof(pagertab) / sizeof(pagertab[0]); struct pagerops *dfltpagerops = NULL; /* default pager */ @@ -120,7 +120,7 @@ vm_pager_init() */ for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++) if (pgops) - (*(*pgops)->pgo_init)(); + (*(*pgops)->pgo_init) (); if (dfltpagerops == NULL) panic("no default pager"); } @@ -130,6 +130,7 @@ vm_pager_bufferinit() { struct buf *bp; int i; + bp = swbuf; /* * Now set up swap and physical I/O buffer headers. @@ -143,8 +144,8 @@ vm_pager_bufferinit() bp->b_vnbufs.le_next = NOLIST; bp->b_actf = NULL; - swapbkva = kmem_alloc_pageable( pager_map, nswbuf * MAXPHYS); - if( !swapbkva) + swapbkva = kmem_alloc_pageable(pager_map, nswbuf * MAXPHYS); + if (!swapbkva) panic("Not enough pager_map VM space for physical buffers"); } @@ -165,34 +166,34 @@ vm_pager_allocate(type, handle, size, prot, off) ops = (type == PG_DFLT) ? dfltpagerops : pagertab[type]; if (ops) - return ((*ops->pgo_alloc)(handle, size, prot, off)); + return ((*ops->pgo_alloc) (handle, size, prot, off)); return (NULL); } void vm_pager_deallocate(pager) - vm_pager_t pager; + vm_pager_t pager; { if (pager == NULL) panic("vm_pager_deallocate: null pager"); - (*pager->pg_ops->pgo_dealloc)(pager); + (*pager->pg_ops->pgo_dealloc) (pager); } int vm_pager_get_pages(pager, m, count, reqpage, sync) - vm_pager_t pager; - vm_page_t *m; - int count; - int reqpage; - boolean_t sync; + vm_pager_t pager; + vm_page_t *m; + int count; + int reqpage; + boolean_t sync; { int i; if (pager == NULL) { - for (i=0;i<count;i++) { - if( i != reqpage) { + for (i = 0; i < count; i++) { + if (i != reqpage) { PAGE_WAKEUP(m[i]); vm_page_free(m[i]); } @@ -200,35 +201,34 @@ vm_pager_get_pages(pager, m, count, reqpage, sync) vm_page_zero_fill(m[reqpage]); return VM_PAGER_OK; } - - if( pager->pg_ops->pgo_getpages == 0) { - for(i=0;i<count;i++) { - if( i != reqpage) { + if (pager->pg_ops->pgo_getpages == 0) { + for (i = 0; i < count; i++) { + if (i != reqpage) { PAGE_WAKEUP(m[i]); vm_page_free(m[i]); } } - return(VM_PAGER_GET(pager, m[reqpage], sync)); + return (VM_PAGER_GET(pager, m[reqpage], sync)); } else { - return(VM_PAGER_GET_MULTI(pager, m, count, reqpage, sync)); + return (VM_PAGER_GET_MULTI(pager, m, count, reqpage, sync)); } } int vm_pager_put_pages(pager, m, count, sync, rtvals) - vm_pager_t pager; - vm_page_t *m; - int count; - boolean_t sync; - int *rtvals; + vm_pager_t pager; + vm_page_t *m; + int count; + boolean_t sync; + int *rtvals; { int i; - if( pager->pg_ops->pgo_putpages) - return(VM_PAGER_PUT_MULTI(pager, m, count, sync, rtvals)); + if (pager->pg_ops->pgo_putpages) + return (VM_PAGER_PUT_MULTI(pager, m, count, sync, rtvals)); else { - for(i=0;i<count;i++) { - rtvals[i] = VM_PAGER_PUT( pager, m[i], sync); + for (i = 0; i < count; i++) { + rtvals[i] = VM_PAGER_PUT(pager, m[i], sync); } return rtvals[0]; } @@ -236,12 +236,12 @@ vm_pager_put_pages(pager, m, count, sync, rtvals) boolean_t vm_pager_has_page(pager, offset) - vm_pager_t pager; - vm_offset_t offset; + vm_pager_t pager; + vm_offset_t offset; { if (pager == NULL) panic("vm_pager_has_page: null pager"); - return ((*pager->pg_ops->pgo_haspage)(pager, offset)); + return ((*pager->pg_ops->pgo_haspage) (pager, offset)); } /* @@ -255,37 +255,37 @@ vm_pager_sync() for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++) if (pgops) - (*(*pgops)->pgo_putpage)(NULL, NULL, 0); + (*(*pgops)->pgo_putpage) (NULL, NULL, 0); } #if 0 void vm_pager_cluster(pager, offset, loff, hoff) - vm_pager_t pager; - vm_offset_t offset; - vm_offset_t *loff; - vm_offset_t *hoff; + vm_pager_t pager; + vm_offset_t offset; + vm_offset_t *loff; + vm_offset_t *hoff; { if (pager == NULL) panic("vm_pager_cluster: null pager"); - return ((*pager->pg_ops->pgo_cluster)(pager, offset, loff, hoff)); + return ((*pager->pg_ops->pgo_cluster) (pager, offset, loff, hoff)); } #endif vm_offset_t vm_pager_map_page(m) - vm_page_t m; + vm_page_t m; { vm_offset_t kva; kva = kmem_alloc_wait(pager_map, PAGE_SIZE); pmap_kenter(kva, VM_PAGE_TO_PHYS(m)); - return(kva); + return (kva); } void vm_pager_unmap_page(kva) - vm_offset_t kva; + vm_offset_t kva; { pmap_kremove(kva); kmem_free_wakeup(pager_map, kva, PAGE_SIZE); @@ -293,11 +293,11 @@ vm_pager_unmap_page(kva) vm_page_t vm_pager_atop(kva) - vm_offset_t kva; + vm_offset_t kva; { vm_offset_t pa; - pa = pmap_kextract( kva); + pa = pmap_kextract(kva); if (pa == 0) panic("vm_pager_atop"); return (PHYS_TO_VM_PAGE(pa)); @@ -322,8 +322,8 @@ vm_pager_lookup(pglist, handle) */ int pager_cache(object, should_cache) - vm_object_t object; - boolean_t should_cache; + vm_object_t object; + boolean_t should_cache; { if (object == NULL) return (KERN_INVALID_ARGUMENT); @@ -343,10 +343,11 @@ pager_cache(object, should_cache) } /* - * allocate a physical buffer + * allocate a physical buffer */ struct buf * -getpbuf() { +getpbuf() +{ int s; struct buf *bp; @@ -354,7 +355,7 @@ getpbuf() { /* get a bp from the swap buffer header pool */ while ((bp = bswlist.tqh_first) == NULL) { bswneeded = 1; - tsleep((caddr_t)&bswneeded, PVM, "wswbuf", 0); + tsleep((caddr_t) & bswneeded, PVM, "wswbuf", 0); } TAILQ_REMOVE(&bswlist, bp, b_freelist); splx(s); @@ -362,7 +363,7 @@ getpbuf() { bzero(bp, sizeof *bp); bp->b_rcred = NOCRED; bp->b_wcred = NOCRED; - bp->b_data = (caddr_t) (MAXPHYS * (bp-swbuf)) + swapbkva; + bp->b_data = (caddr_t) (MAXPHYS * (bp - swbuf)) + swapbkva; bp->b_vnbufs.le_next = NOLIST; return bp; } @@ -371,7 +372,8 @@ getpbuf() { * allocate a physical buffer, if one is available */ struct buf * -trypbuf() { +trypbuf() +{ int s; struct buf *bp; @@ -386,7 +388,7 @@ trypbuf() { bzero(bp, sizeof *bp); bp->b_rcred = NOCRED; bp->b_wcred = NOCRED; - bp->b_data = (caddr_t) (MAXPHYS * (bp-swbuf)) + swapbkva; + bp->b_data = (caddr_t) (MAXPHYS * (bp - swbuf)) + swapbkva; bp->b_vnbufs.le_next = NOLIST; return bp; } @@ -410,18 +412,17 @@ relpbuf(bp) crfree(bp->b_wcred); bp->b_wcred = NOCRED; } - if (bp->b_vp) - brelvp(bp); + pbrelvp(bp); if (bp->b_flags & B_WANTED) - wakeup((caddr_t)bp); + wakeup((caddr_t) bp); TAILQ_INSERT_HEAD(&bswlist, bp, b_freelist); if (bswneeded) { bswneeded = 0; - wakeup((caddr_t)&bswlist); + wakeup((caddr_t) & bswlist); } splx(s); } diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h index ea204a1b03bf..939b727bb4be 100644 --- a/sys/vm/vm_pager.h +++ b/sys/vm/vm_pager.h @@ -37,7 +37,7 @@ * SUCH DAMAGE. * * @(#)vm_pager.h 8.4 (Berkeley) 1/12/94 - * $Id: vm_pager.h,v 1.3 1994/08/02 07:55:36 davidg Exp $ + * $Id: vm_pager.h,v 1.4 1994/10/09 01:52:17 phk Exp $ */ /* @@ -50,13 +50,12 @@ TAILQ_HEAD(pagerlst, pager_struct); -struct pager_struct { +struct pager_struct { TAILQ_ENTRY(pager_struct) pg_list; /* links for list management */ - caddr_t pg_handle; /* ext. handle (vp, dev, fp) */ - int pg_type; /* type of pager */ - int pg_flags; /* flags */ - struct pagerops *pg_ops; /* pager operations */ - void *pg_data; /* private pager data */ + caddr_t pg_handle; /* ext. handle (vp, dev, fp) */ + int pg_type; /* type of pager */ + struct pagerops *pg_ops; /* pager operations */ + void *pg_data; /* private pager data */ }; /* pager types */ @@ -69,23 +68,15 @@ struct pager_struct { #define PG_CLUSTERGET 1 #define PG_CLUSTERPUT 2 -struct pagerops { - void (*pgo_init) /* Initialize pager. */ - __P((void)); - vm_pager_t (*pgo_alloc) /* Allocate pager. */ - __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t)); - void (*pgo_dealloc) /* Disassociate. */ - __P((vm_pager_t)); - int (*pgo_getpage) - __P((vm_pager_t, vm_page_t, boolean_t)); - int (*pgo_getpages) /* Get (read) page. */ - __P((vm_pager_t, vm_page_t *, int, int, boolean_t)); - int (*pgo_putpage) - __P((vm_pager_t, vm_page_t, boolean_t)); - int (*pgo_putpages) /* Put (write) page. */ - __P((vm_pager_t, vm_page_t *, int, boolean_t, int *)); - boolean_t (*pgo_haspage) /* Does pager have page? */ - __P((vm_pager_t, vm_offset_t)); +struct pagerops { + void (*pgo_init) __P((void)); /* Initialize pager. */ + vm_pager_t(*pgo_alloc) __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t)); /* Allocate pager. */ + void (*pgo_dealloc) __P((vm_pager_t)); /* Disassociate. */ + int (*pgo_getpage) __P((vm_pager_t, vm_page_t, boolean_t)); + int (*pgo_getpages) __P((vm_pager_t, vm_page_t *, int, int, boolean_t)); /* Get (read) page. */ + int (*pgo_putpage) __P((vm_pager_t, vm_page_t, boolean_t)); + int (*pgo_putpages) __P((vm_pager_t, vm_page_t *, int, boolean_t, int *)); /* Put (write) page. */ + boolean_t(*pgo_haspage) __P((vm_pager_t, vm_offset_t)); /* Does pager have page? */ }; #define VM_PAGER_ALLOC(h, s, p, o) (*(pg)->pg_ops->pgo_alloc)(h, s, p, o) @@ -115,24 +106,19 @@ struct pagerops { #ifdef KERNEL extern struct pagerops *dfltpagerops; -vm_pager_t vm_pager_allocate - __P((int, caddr_t, vm_size_t, vm_prot_t, vm_offset_t)); -vm_page_t vm_pager_atop __P((vm_offset_t)); -void vm_pager_deallocate __P((vm_pager_t)); -int vm_pager_get_pages - __P((vm_pager_t, vm_page_t *, int, int, boolean_t)); -boolean_t vm_pager_has_page __P((vm_pager_t, vm_offset_t)); -void vm_pager_init __P((void)); -vm_pager_t vm_pager_lookup __P((struct pagerlst *, caddr_t)); -vm_offset_t vm_pager_map_pages __P((vm_page_t *, int, boolean_t)); -vm_offset_t vm_pager_map_page __P((vm_page_t)); -int vm_pager_put_pages - __P((vm_pager_t, vm_page_t *, int, boolean_t, int *)); -void vm_pager_sync __P((void)); -void vm_pager_unmap_pages __P((vm_offset_t, int)); -void vm_pager_unmap_page __P((vm_offset_t)); - -#define vm_pager_cancluster(p, b) ((p)->pg_flags & (b)) +vm_pager_t vm_pager_allocate __P((int, caddr_t, vm_size_t, vm_prot_t, vm_offset_t)); +vm_page_t vm_pager_atop __P((vm_offset_t)); +void vm_pager_deallocate __P((vm_pager_t)); +int vm_pager_get_pages __P((vm_pager_t, vm_page_t *, int, int, boolean_t)); +boolean_t vm_pager_has_page __P((vm_pager_t, vm_offset_t)); +void vm_pager_init __P((void)); +vm_pager_t vm_pager_lookup __P((struct pagerlst *, caddr_t)); +vm_offset_t vm_pager_map_pages __P((vm_page_t *, int, boolean_t)); +vm_offset_t vm_pager_map_page __P((vm_page_t)); +int vm_pager_put_pages __P((vm_pager_t, vm_page_t *, int, boolean_t, int *)); +void vm_pager_sync __P((void)); +void vm_pager_unmap_pages __P((vm_offset_t, int)); +void vm_pager_unmap_page __P((vm_offset_t)); /* * XXX compat with old interface @@ -154,4 +140,4 @@ void vm_pager_unmap_page __P((vm_offset_t)); }) #endif -#endif /* _VM_PAGER_ */ +#endif /* _VM_PAGER_ */ diff --git a/sys/vm/vm_param.h b/sys/vm/vm_param.h index 4fc3449452e7..4b047f9e06ca 100644 --- a/sys/vm/vm_param.h +++ b/sys/vm/vm_param.h @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -40,17 +40,17 @@ * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id$ + * $Id: vm_param.h,v 1.3 1994/08/02 07:55:37 davidg Exp $ */ /* @@ -76,7 +76,8 @@ /* * This belongs in types.h, but breaks too many existing programs. */ -typedef int boolean_t; +typedef int boolean_t; + #define TRUE 1 #define FALSE 0 @@ -94,36 +95,51 @@ typedef int boolean_t; * we can easily make them constant if we so desire. */ #ifndef PAGE_SIZE -#define PAGE_SIZE cnt.v_page_size /* size of page */ +#define PAGE_SIZE cnt.v_page_size /* size of page */ #endif #ifndef PAGE_MASK -#define PAGE_MASK page_mask /* size of page - 1 */ +#define PAGE_MASK page_mask /* size of page - 1 */ #endif #ifndef PAGE_SHIFT -#define PAGE_SHIFT page_shift /* bits to shift for pages */ +#define PAGE_SHIFT page_shift /* bits to shift for pages */ #endif #endif #ifdef KERNEL -extern vm_size_t page_mask; -extern int page_shift; +extern vm_size_t page_mask; +extern int page_shift; + #endif /* * CTL_VM identifiers */ -#define VM_METER 1 /* struct vmmeter */ -#define VM_LOADAVG 2 /* struct loadavg */ -#define VM_MAXID 3 /* number of valid vm ids */ +#define VM_METER 1 /* struct vmmeter */ +#define VM_LOADAVG 2 /* struct loadavg */ +#define VM_V_FREE_MIN 3 /* cnt.v_free_min */ +#define VM_V_FREE_TARGET 4 /* cnt.v_free_target */ +#define VM_V_FREE_RESERVED 5 /* cnt.v_free_reserved */ +#define VM_V_INACTIVE_TARGET 6 /* cnt.v_inactive_target */ +#define VM_V_CACHE_MIN 7 /* cnt.v_cache_max */ +#define VM_V_CACHE_MAX 8 /* cnt.v_cache_min */ +#define VM_V_PAGEOUT_FREE_MIN 9 /* cnt.v_pageout_free_min */ +#define VM_MAXID 10 /* number of valid vm ids */ #define CTL_VM_NAMES { \ { 0, 0 }, \ { "vmmeter", CTLTYPE_STRUCT }, \ { "loadavg", CTLTYPE_STRUCT }, \ + { "v_free_min", CTLTYPE_INT }, \ + { "v_free_target", CTLTYPE_INT }, \ + { "v_free_reserved", CTLTYPE_INT }, \ + { "v_inactive_target", CTLTYPE_INT }, \ + { "v_cache_min", CTLTYPE_INT }, \ + { "v_cache_max", CTLTYPE_INT }, \ + { "v_pageout_free_min", CTLTYPE_INT}, \ } -/* +/* * Return values from the VM routines. */ #define KERN_SUCCESS 0 @@ -142,50 +158,12 @@ extern int page_shift; * No rounding is used. */ #ifdef KERNEL - -#if 0 - -#ifndef atop -#define atop(x) (((unsigned)(x)) >> PAGE_SHIFT) -#endif -#ifndef ptoa -#define ptoa(x) ((vm_offset_t)((x) << PAGE_SHIFT)) -#endif - -/* - * Round off or truncate to the nearest page. These will work - * for either addresses or counts (i.e., 1 byte rounds to 1 page). - */ -#ifndef round_page -#define round_page(x) \ - ((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) & ~PAGE_MASK)) -#endif -#ifndef trunc_page -#define trunc_page(x) \ - ((vm_offset_t)(((vm_offset_t)(x)) & ~PAGE_MASK)) -#endif -#ifndef num_pages #define num_pages(x) \ ((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) >> PAGE_SHIFT)) -#endif - -#endif -#define num_pages(x) \ - ((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) >> PAGE_SHIFT)) - -extern vm_size_t mem_size; /* size of physical memory (bytes) */ -extern vm_offset_t first_addr; /* first physical page */ -extern vm_offset_t last_addr; /* last physical page */ - -#else -#if 0 -/* out-of-kernel versions of round_page and trunc_page */ -#define round_page(x) \ - ((((vm_offset_t)(x) + (vm_page_size - 1)) / vm_page_size) * vm_page_size) -#define trunc_page(x) \ - ((((vm_offset_t)(x)) / vm_page_size) * vm_page_size) -#endif -#endif /* KERNEL */ -#endif /* ASSEMBLER */ -#endif /* _VM_PARAM_ */ +extern vm_size_t mem_size; /* size of physical memory (bytes) */ +extern vm_offset_t first_addr; /* first physical page */ +extern vm_offset_t last_addr; /* last physical page */ +#endif /* KERNEL */ +#endif /* ASSEMBLER */ +#endif /* _VM_PARAM_ */ diff --git a/sys/vm/vm_prot.h b/sys/vm/vm_prot.h index e34dc0e69545..36079a0c1ed9 100644 --- a/sys/vm/vm_prot.h +++ b/sys/vm/vm_prot.h @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -40,17 +40,17 @@ * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id$ + * $Id: vm_prot.h,v 1.3 1994/08/02 07:55:38 davidg Exp $ */ /* @@ -77,7 +77,7 @@ * vm_prot_t VM protection values. */ -typedef u_char vm_prot_t; +typedef u_char vm_prot_t; /* * Protection values, defined as bits within the vm_prot_t type @@ -101,4 +101,4 @@ typedef u_char vm_prot_t; #define VM_PROT_ALL (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) -#endif /* _VM_PROT_ */ +#endif /* _VM_PROT_ */ diff --git a/sys/vm/vm_swap.c b/sys/vm/vm_swap.c index be750b148059..3f59f2e903c6 100644 --- a/sys/vm/vm_swap.c +++ b/sys/vm/vm_swap.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)vm_swap.c 8.5 (Berkeley) 2/17/94 - * $Id: vm_swap.c,v 1.11 1994/10/22 17:53:35 phk Exp $ + * $Id: vm_swap.c,v 1.12 1994/11/22 08:47:20 davidg Exp $ */ #include <sys/param.h> @@ -51,15 +51,18 @@ * Indirect driver for multi-controller paging. */ -int nswap, nswdev; -int vm_swap_size; +int nswap, nswdev; +int vm_swap_size; + #ifdef SEQSWAP -int niswdev; /* number of interleaved swap devices */ -int niswap; /* size of interleaved swap area */ +int niswdev; /* number of interleaved swap devices */ +int niswap; /* size of interleaved swap area */ + #endif int bswneeded; vm_offset_t swapbkva; /* swap buffers kva */ + /* * Set up swap devices. * Initialize linked list of free swap @@ -75,12 +78,12 @@ swapinit() int error; /* - * Count swap devices, and adjust total swap space available. - * Some of the space will not be countable until later (dynamically + * Count swap devices, and adjust total swap space available. Some of + * the space will not be countable until later (dynamically * configurable devices) and some of the counted space will not be * available until a swapon() system call is issued, both usually * happen when the system goes multi-user. - * + * * If using NFS for swap, swdevt[0] will already be bdevvp'd. XXX */ #ifdef SEQSWAP @@ -104,7 +107,7 @@ swapinit() /* * The remainder must be sequential */ - for ( ; swp->sw_dev != NODEV; swp++) { + for (; swp->sw_dev != NODEV; swp++) { if ((swp->sw_flags & SW_SEQUENTIAL) == 0) panic("binit: mis-ordered swap devices"); nswdev++; @@ -136,13 +139,13 @@ swapinit() panic("swapvp"); #endif /* - * If there is no swap configured, tell the user. We don't automatically - * activate any swapspaces in the kernel; the user must explicitly use - * swapon to enable swaping on a device. + * If there is no swap configured, tell the user. We don't + * automatically activate any swapspaces in the kernel; the user must + * explicitly use swapon to enable swaping on a device. */ if (nswap == 0) printf("WARNING: no swap space found\n"); - for (swp = swdevt; ;swp++) { + for (swp = swdevt;; swp++) { if (swp->sw_dev == NODEV) { if (swp->sw_vp == NULL) break; @@ -151,8 +154,8 @@ swapinit() error = swfree(p, swp - swdevt); if (error) { printf( - "Couldn't enable swapspace %d, error = %d", - swp-swdevt,error); + "Couldn't enable swapspace %d, error = %d", + swp - swdevt, error); } } } @@ -168,10 +171,9 @@ swstrategy(bp) #ifdef GENERIC /* - * A mini-root gets copied into the front of the swap - * and we run over top of the swap area just long - * enough for us to do a mkfs and restor of the real - * root (sure beats rewriting standalone restor). + * A mini-root gets copied into the front of the swap and we run over + * top of the swap area just long enough for us to do a mkfs and + * restor of the real root (sure beats rewriting standalone restor). */ #define MINIROOTSIZE 4096 if (rootdev == dumpdev) @@ -189,7 +191,7 @@ swstrategy(bp) if (bp->b_blkno < niswap) { if (niswdev > 1) { off = bp->b_blkno % dmmax; - if (off+sz > dmmax) { + if (off + sz > dmmax) { bp->b_error = EINVAL; bp->b_flags |= B_ERROR; biodone(bp); @@ -198,7 +200,7 @@ swstrategy(bp) seg = bp->b_blkno / dmmax; index = seg % niswdev; seg /= niswdev; - bp->b_blkno = seg*dmmax + off; + bp->b_blkno = seg * dmmax + off; } else index = 0; } else { @@ -206,16 +208,16 @@ swstrategy(bp) bp->b_blkno -= niswap; for (index = niswdev, swp = &swdevt[niswdev]; - swp->sw_dev != NODEV; - swp++, index++) { + swp->sw_dev != NODEV; + swp++, index++) { if (bp->b_blkno < swp->sw_nblks) break; bp->b_blkno -= swp->sw_nblks; } if (swp->sw_dev == NODEV || - bp->b_blkno+sz > swp->sw_nblks) { + bp->b_blkno + sz > swp->sw_nblks) { bp->b_error = swp->sw_dev == NODEV ? - ENODEV : EINVAL; + ENODEV : EINVAL; bp->b_flags |= B_ERROR; biodone(bp); return; @@ -223,7 +225,7 @@ swstrategy(bp) } #else off = bp->b_blkno % dmmax; - if (off+sz > dmmax) { + if (off + sz > dmmax) { bp->b_error = EINVAL; bp->b_flags |= B_ERROR; biodone(bp); @@ -232,7 +234,7 @@ swstrategy(bp) seg = bp->b_blkno / dmmax; index = seg % nswdev; seg /= nswdev; - bp->b_blkno = seg*dmmax + off; + bp->b_blkno = seg * dmmax + off; #endif } else index = 0; @@ -252,13 +254,13 @@ swstrategy(bp) vp->v_numoutput--; if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { vp->v_flag &= ~VBWAIT; - wakeup((caddr_t)&vp->v_numoutput); + wakeup((caddr_t) & vp->v_numoutput); } } sp->sw_vp->v_numoutput++; } if (bp->b_vp != NULL) - brelvp(bp); + pbrelvp(bp); bp->b_vp = sp->sw_vp; VOP_STRATEGY(bp); } @@ -269,8 +271,9 @@ swstrategy(bp) * if already swapping on this device. */ struct swapon_args { - char *name; + char *name; }; + /* ARGSUSED */ int swapon(p, uap, retval) @@ -296,7 +299,7 @@ swapon(p, uap, retval) vrele(vp); return (ENOTBLK); } - dev = (dev_t)vp->v_rdev; + dev = (dev_t) vp->v_rdev; if (major(dev) >= nblkdev) { vrele(vp); return (ENXIO); @@ -318,11 +321,11 @@ swapon(p, uap, retval) #ifdef SEQSWAP /* * If we have reached a non-freed sequential device without - * finding what we are looking for, it is an error. - * That is because all interleaved devices must come first - * and sequential devices must be freed in order. + * finding what we are looking for, it is an error. That is + * because all interleaved devices must come first and + * sequential devices must be freed in order. */ - if ((sp->sw_flags & (SW_SEQUENTIAL|SW_FREED)) == SW_SEQUENTIAL) + if ((sp->sw_flags & (SW_SEQUENTIAL | SW_FREED)) == SW_SEQUENTIAL) break; #endif } @@ -351,22 +354,22 @@ swfree(p, index) sp = &swdevt[index]; vp = sp->sw_vp; - error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p); + error = VOP_OPEN(vp, FREAD | FWRITE, p->p_ucred, p); if (error) return (error); sp->sw_flags |= SW_FREED; nblks = sp->sw_nblks; /* - * Some devices may not exist til after boot time. - * If so, their nblk count will be 0. + * Some devices may not exist til after boot time. If so, their nblk + * count will be 0. */ if (nblks <= 0) { int perdev; dev_t dev = sp->sw_dev; if (bdevsw[major(dev)].d_psize == 0 || - (nblks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) { - (void) VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p); + (nblks = (*bdevsw[major(dev)].d_psize) (dev)) == -1) { + (void) VOP_CLOSE(vp, FREAD | FWRITE, p->p_ucred, p); sp->sw_flags &= ~SW_FREED; return (ENXIO); } @@ -388,7 +391,7 @@ swfree(p, index) sp->sw_nblks = nblks; } if (nblks == 0) { - (void) VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p); + (void) VOP_CLOSE(vp, FREAD | FWRITE, p->p_ucred, p); sp->sw_flags &= ~SW_FREED; return (0); /* XXX error? */ } @@ -399,26 +402,26 @@ swfree(p, index) blk = niswap; for (swp = &swdevt[niswdev]; swp != sp; swp++) blk += swp->sw_nblks; - rlist_free(&swaplist, blk, blk + nblks - 1); + rlist_free(&swaplist, blk, blk + nblks - 1); vm_swap_size += nblks; return (0); } #endif for (dvbase = dmmax; dvbase < nblks; dvbase += dmmax) { blk = nblks - dvbase; - + #ifdef SEQSWAP - if ((vsbase = index*dmmax + dvbase*niswdev) >= niswap) + if ((vsbase = index * dmmax + dvbase * niswdev) >= niswap) panic("swfree"); #else - if ((vsbase = index*dmmax + dvbase*nswdev) >= nswap) + if ((vsbase = index * dmmax + dvbase * nswdev) >= nswap) panic("swfree"); #endif if (blk > dmmax) blk = dmmax; /* XXX -- we need to exclude the first cluster as above */ /* but for now, this will work fine... */ - rlist_free(&swaplist, vsbase, vsbase + blk - 1); + rlist_free(&swaplist, vsbase, vsbase + blk - 1); vm_swap_size += blk; } return (0); diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c index d9216deef571..92cd3a48716c 100644 --- a/sys/vm/vm_unix.c +++ b/sys/vm/vm_unix.c @@ -38,7 +38,7 @@ * from: Utah $Hdr: vm_unix.c 1.1 89/11/07$ * * @(#)vm_unix.c 8.1 (Berkeley) 6/11/93 - * $Id$ + * $Id: vm_unix.c,v 1.3 1994/08/02 07:55:41 davidg Exp $ */ /* @@ -54,7 +54,7 @@ extern int swap_pager_full; struct obreak_args { - char *nsiz; + char *nsiz; }; /* ARGSUSED */ @@ -69,34 +69,34 @@ obreak(p, uap, retval) int rv; register int diff; - old = (vm_offset_t)vm->vm_daddr; + old = (vm_offset_t) vm->vm_daddr; new = round_page(uap->nsiz); - if ((int)(new - old) > p->p_rlimit[RLIMIT_DATA].rlim_cur) - return(ENOMEM); + if ((int) (new - old) > p->p_rlimit[RLIMIT_DATA].rlim_cur) + return (ENOMEM); old = round_page(old + ctob(vm->vm_dsize)); diff = new - old; if (diff > 0) { if (swap_pager_full) { - return(ENOMEM); + return (ENOMEM); } rv = vm_allocate(&vm->vm_map, &old, diff, FALSE); if (rv != KERN_SUCCESS) { - return(ENOMEM); + return (ENOMEM); } vm->vm_dsize += btoc(diff); } else if (diff < 0) { diff = -diff; rv = vm_deallocate(&vm->vm_map, new, diff); if (rv != KERN_SUCCESS) { - return(ENOMEM); + return (ENOMEM); } vm->vm_dsize -= btoc(diff); } - return(0); + return (0); } struct ovadvise_args { - int anom; + int anom; }; /* ARGSUSED */ diff --git a/sys/vm/vm_user.c b/sys/vm/vm_user.c index d8c8e136780a..17ea45b5f35e 100644 --- a/sys/vm/vm_user.c +++ b/sys/vm/vm_user.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -40,17 +40,17 @@ * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_user.c,v 1.3 1994/08/02 07:55:42 davidg Exp $ + * $Id: vm_user.c,v 1.4 1994/10/15 10:28:47 davidg Exp $ */ /* @@ -74,7 +74,7 @@ #include <vm/vm.h> -simple_lock_data_t vm_alloc_lock; /* XXX */ +simple_lock_data_t vm_alloc_lock; /* XXX */ #ifdef MACHVMCOMPAT /* @@ -87,6 +87,7 @@ struct svm_allocate_args { vm_size_t size; boolean_t anywhere; }; + /* ARGSUSED */ int svm_allocate(p, uap, retval) @@ -97,17 +98,17 @@ svm_allocate(p, uap, retval) vm_offset_t addr; int rv; - uap->map = p->p_map; /* XXX */ + uap->map = p->p_map; /* XXX */ - if (copyin((caddr_t)uap->addr, (caddr_t)&addr, sizeof (addr))) + if (copyin((caddr_t) uap->addr, (caddr_t) & addr, sizeof(addr))) rv = KERN_INVALID_ARGUMENT; else rv = vm_allocate(uap->map, &addr, uap->size, uap->anywhere); if (rv == KERN_SUCCESS) { - if (copyout((caddr_t)&addr, (caddr_t)uap->addr, sizeof(addr))) + if (copyout((caddr_t) & addr, (caddr_t) uap->addr, sizeof(addr))) rv = KERN_INVALID_ARGUMENT; } - return((int)rv); + return ((int) rv); } struct svm_deallocate_args { @@ -115,6 +116,7 @@ struct svm_deallocate_args { vm_offset_t addr; vm_size_t size; }; + /* ARGSUSED */ int svm_deallocate(p, uap, retval) @@ -124,9 +126,9 @@ svm_deallocate(p, uap, retval) { int rv; - uap->map = p->p_map; /* XXX */ + uap->map = p->p_map; /* XXX */ rv = vm_deallocate(uap->map, uap->addr, uap->size); - return((int)rv); + return ((int) rv); } struct svm_inherit_args { @@ -135,6 +137,7 @@ struct svm_inherit_args { vm_size_t size; vm_inherit_t inherit; }; + /* ARGSUSED */ int svm_inherit(p, uap, retval) @@ -144,9 +147,9 @@ svm_inherit(p, uap, retval) { int rv; - uap->map = p->p_map; /* XXX */ + uap->map = p->p_map; /* XXX */ rv = vm_inherit(uap->map, uap->addr, uap->size, uap->inherit); - return((int)rv); + return ((int) rv); } struct svm_protect_args { @@ -156,6 +159,7 @@ struct svm_protect_args { boolean_t setmax; vm_prot_t prot; }; + /* ARGSUSED */ int svm_protect(p, uap, retval) @@ -165,9 +169,9 @@ svm_protect(p, uap, retval) { int rv; - uap->map = p->p_map; /* XXX */ + uap->map = p->p_map; /* XXX */ rv = vm_protect(uap->map, uap->addr, uap->size, uap->setmax, uap->prot); - return((int)rv); + return ((int) rv); } #endif @@ -177,15 +181,15 @@ svm_protect(p, uap, retval) */ int vm_inherit(map, start, size, new_inheritance) - register vm_map_t map; - vm_offset_t start; - vm_size_t size; - vm_inherit_t new_inheritance; + register vm_map_t map; + vm_offset_t start; + vm_size_t size; + vm_inherit_t new_inheritance; { if (map == NULL) - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); - return(vm_map_inherit(map, trunc_page(start), round_page(start+size), new_inheritance)); + return (vm_map_inherit(map, trunc_page(start), round_page(start + size), new_inheritance)); } /* @@ -195,16 +199,16 @@ vm_inherit(map, start, size, new_inheritance) int vm_protect(map, start, size, set_maximum, new_protection) - register vm_map_t map; - vm_offset_t start; - vm_size_t size; - boolean_t set_maximum; - vm_prot_t new_protection; + register vm_map_t map; + vm_offset_t start; + vm_size_t size; + boolean_t set_maximum; + vm_prot_t new_protection; { if (map == NULL) - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); - return(vm_map_protect(map, trunc_page(start), round_page(start+size), new_protection, set_maximum)); + return (vm_map_protect(map, trunc_page(start), round_page(start + size), new_protection, set_maximum)); } /* @@ -213,20 +217,19 @@ vm_protect(map, start, size, set_maximum, new_protection) */ int vm_allocate(map, addr, size, anywhere) - register vm_map_t map; - register vm_offset_t *addr; - register vm_size_t size; - boolean_t anywhere; + register vm_map_t map; + register vm_offset_t *addr; + register vm_size_t size; + boolean_t anywhere; { - int result; + int result; if (map == NULL) - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); if (size == 0) { *addr = 0; - return(KERN_SUCCESS); + return (KERN_SUCCESS); } - if (anywhere) *addr = vm_map_min(map); else @@ -235,7 +238,7 @@ vm_allocate(map, addr, size, anywhere) result = vm_map_find(map, NULL, (vm_offset_t) 0, addr, size, anywhere); - return(result); + return (result); } /* @@ -244,17 +247,17 @@ vm_allocate(map, addr, size, anywhere) */ int vm_deallocate(map, start, size) - register vm_map_t map; - vm_offset_t start; - vm_size_t size; + register vm_map_t map; + vm_offset_t start; + vm_size_t size; { if (map == NULL) - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); if (size == (vm_offset_t) 0) - return(KERN_SUCCESS); + return (KERN_SUCCESS); - return(vm_map_remove(map, trunc_page(start), round_page(start+size))); + return (vm_map_remove(map, trunc_page(start), round_page(start + size))); } #if 1 @@ -263,27 +266,26 @@ vm_deallocate(map, start, size) */ int vm_allocate_with_pager(map, addr, size, anywhere, pager, poffset, internal) - register vm_map_t map; - register vm_offset_t *addr; - register vm_size_t size; - boolean_t anywhere; - vm_pager_t pager; - vm_offset_t poffset; - boolean_t internal; + register vm_map_t map; + register vm_offset_t *addr; + register vm_size_t size; + boolean_t anywhere; + vm_pager_t pager; + vm_offset_t poffset; + boolean_t internal; { - register vm_object_t object; - register int result; + register vm_object_t object; + register int result; if (map == NULL) - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); *addr = trunc_page(*addr); size = round_page(size); /* - * Lookup the pager/paging-space in the object cache. - * If it's not there, then create a new object and cache - * it. + * Lookup the pager/paging-space in the object cache. If it's not + * there, then create a new object and cache it. */ object = vm_object_lookup(pager); if (object == NULL) { @@ -291,8 +293,8 @@ vm_allocate_with_pager(map, addr, size, anywhere, pager, poffset, internal) /* * From Mike Hibler: "unnamed anonymous objects should never * be on the hash list ... For now you can just change - * vm_allocate_with_pager to not do vm_object_enter if this - * is an internal object ..." + * vm_allocate_with_pager to not do vm_object_enter if this is + * an internal object ..." */ if (!internal) vm_object_enter(object, pager); @@ -309,6 +311,6 @@ vm_allocate_with_pager(map, addr, size, anywhere, pager, poffset, internal) vm_object_deallocate(object); else if (pager != NULL) vm_object_setpager(object, pager, (vm_offset_t) 0, TRUE); - return(result); + return (result); } #endif diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index 2768260d04ad..eedef9e7f107 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -37,7 +37,7 @@ * SUCH DAMAGE. * * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 - * $Id: vnode_pager.c,v 1.17 1994/11/17 01:22:45 gibbs Exp $ + * $Id: vnode_pager.c,v 1.18 1994/11/24 14:43:22 davidg Exp $ */ /* @@ -68,6 +68,7 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/kernel.h> #include <sys/proc.h> #include <sys/malloc.h> #include <sys/vnode.h> @@ -81,14 +82,14 @@ #include <sys/buf.h> #include <miscfs/specfs/specdev.h> -int vnode_pager_putmulti(); +int vnode_pager_putmulti(); -void vnode_pager_init(); +void vnode_pager_init(); vm_pager_t vnode_pager_alloc(caddr_t, vm_offset_t, vm_prot_t, vm_offset_t); -void vnode_pager_dealloc(); -int vnode_pager_getpage(); -int vnode_pager_getmulti(); -int vnode_pager_putpage(); +void vnode_pager_dealloc(); +int vnode_pager_getpage(); +int vnode_pager_getmulti(); +int vnode_pager_putpage(); boolean_t vnode_pager_haspage(); struct pagerops vnodepagerops = { @@ -132,10 +133,11 @@ vnode_pager_alloc(handle, size, prot, offset) { register vm_pager_t pager; register vn_pager_t vnp; - vm_object_t object; + vm_object_t object, tobject; struct vattr vattr; struct vnode *vp; struct proc *p = curproc; /* XXX */ + int rtval; /* * Pageout to vnode, no can do yet. @@ -148,9 +150,11 @@ vnode_pager_alloc(handle, size, prot, offset) * with vm_pager_lookup. */ vp = (struct vnode *) handle; - object = (vm_object_t) vp->v_vmdata; + while ((object = (vm_object_t) vp->v_vmdata) && (object->flags & OBJ_DEAD)) + tsleep((caddr_t) object, PVM, "vadead", 0); + pager = NULL; - if( object != NULL) + if (object != NULL) pager = object->pager; if (pager == NULL) { @@ -165,15 +169,15 @@ vnode_pager_alloc(handle, size, prot, offset) free((caddr_t) pager, M_VMPAGER); return (NULL); } - /* * And an object of the appropriate size */ - if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) == 0) { + if ((rtval = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) == 0) { object = vm_object_allocate(round_page(vattr.va_size)); vm_object_enter(object, pager); vm_object_setpager(object, pager, 0, TRUE); } else { + printf("Error in getattr: %d\n", rtval); free((caddr_t) vnp, M_VMPGDATA); free((caddr_t) pager, M_VMPAGER); return (NULL); @@ -210,11 +214,22 @@ vnode_pager_dealloc(pager) { register vn_pager_t vnp = (vn_pager_t) pager->pg_data; register struct vnode *vp; + vm_object_t object; vp = vnp->vnp_vp; if (vp) { + int s = splbio(); + + object = (vm_object_t) vp->v_vmdata; + if (object) { + while (object->paging_in_progress) { + tsleep(object, PVM, "vnpdea", 0); + } + } + splx(s); + vp->v_vmdata = NULL; - vp->v_flag &= ~(VTEXT|VVMIO); + vp->v_flag &= ~(VTEXT | VVMIO); vrele(vp); } TAILQ_REMOVE(&vnode_pager_list, pager, pg_list); @@ -226,8 +241,8 @@ int vnode_pager_getmulti(pager, m, count, reqpage, sync) vm_pager_t pager; vm_page_t *m; - int count; - int reqpage; + int count; + int reqpage; boolean_t sync; { @@ -257,7 +272,7 @@ vnode_pager_putpage(pager, m, sync) boolean_t sync; { vm_page_t marray[1]; - int rtvals[1]; + int rtvals[1]; if (pager == NULL) return FALSE; @@ -270,9 +285,9 @@ int vnode_pager_putmulti(pager, m, c, sync, rtvals) vm_pager_t pager; vm_page_t *m; - int c; + int c; boolean_t sync; - int *rtvals; + int *rtvals; { return vnode_pager_output((vn_pager_t) pager->pg_data, m, c, rtvals); } @@ -286,12 +301,12 @@ vnode_pager_haspage(pager, offset) register vn_pager_t vnp = (vn_pager_t) pager->pg_data; register struct vnode *vp = vnp->vnp_vp; daddr_t bn; - int err; + int err; daddr_t block; /* - * If filesystem no longer mounted or offset beyond end of - * file we do not have the page. + * If filesystem no longer mounted or offset beyond end of file we do + * not have the page. */ if ((vp->v_mount == NULL) || (offset >= vnp->vnp_size)) return FALSE; @@ -306,13 +321,8 @@ vnode_pager_haspage(pager, offset) * Assumes that the vnode has whole page or nothing. */ err = VOP_BMAP(vp, block, (struct vnode **) 0, &bn, 0); -/* - printf("vnode_pager_haspage: (%d)0x%x: err: %d, bn: %d\n", - offset, offset, err, bn); -*/ - if (err) { + if (err) return (TRUE); - } return ((long) bn < 0 ? FALSE : TRUE); } @@ -328,7 +338,7 @@ vnode_pager_haspage(pager, offset) void vnode_pager_setsize(vp, nsize) struct vnode *vp; - u_long nsize; + u_long nsize; { register vn_pager_t vnp; register vm_object_t object; @@ -344,9 +354,9 @@ vnode_pager_setsize(vp, nsize) * Hasn't changed size */ object = (vm_object_t) vp->v_vmdata; - if( object == NULL) + if (object == NULL) return; - if( (pager = object->pager) == NULL) + if ((pager = object->pager) == NULL) return; vnp = (vn_pager_t) pager->pg_data; if (nsize == vnp->vnp_size) @@ -366,11 +376,12 @@ vnode_pager_setsize(vp, nsize) * File has shrunk. Toss any cached pages beyond the new EOF. */ if (nsize < vnp->vnp_size) { - vm_object_lock(object); - vm_object_page_remove(object, - round_page((vm_offset_t) nsize), vnp->vnp_size); - vm_object_unlock(object); - + if (round_page((vm_offset_t) nsize) < vnp->vnp_size) { + vm_object_lock(object); + vm_object_page_remove(object, + round_page((vm_offset_t) nsize), vnp->vnp_size); + vm_object_unlock(object); + } /* * this gets rid of garbage at the end of a page that is now * only partially backed by the vnode... @@ -383,29 +394,10 @@ vnode_pager_setsize(vp, nsize) if (m) { kva = vm_pager_map_page(m); bzero((caddr_t) kva + (nsize & PAGE_MASK), - round_page(nsize) - nsize); + round_page(nsize) - nsize); vm_pager_unmap_page(kva); } } - } else { - - /* - * this allows the filesystem and VM cache to stay in sync if - * the VM page hasn't been modified... After the page is - * removed -- it will be faulted back in from the filesystem - * cache. - */ - if (vnp->vnp_size & PAGE_MASK) { - vm_page_t m; - - m = vm_page_lookup(object, trunc_page(vnp->vnp_size)); - if (m && (m->flags & PG_CLEAN)) { - vm_object_lock(object); - vm_object_page_remove(object, - vnp->vnp_size, vnp->vnp_size); - vm_object_unlock(object); - } - } } vnp->vnp_size = (vm_offset_t) nsize; object->size = round_page(nsize); @@ -453,8 +445,9 @@ vnode_pager_uncache(vp) * Not a mapped vnode */ object = (vm_object_t) vp->v_vmdata; - if( object == NULL) - return(TRUE); + if (object == NULL) + return (TRUE); + pager = object->pager; if (pager == NULL) return (TRUE); @@ -501,12 +494,15 @@ vnode_pager_addr(vp, address) struct vnode *vp; vm_offset_t address; { - int rtaddress; - int bsize; + int rtaddress; + int bsize; vm_offset_t block; struct vnode *rtvp; - int err; - int vblock, voffset; + int err; + int vblock, voffset; + + if ((int) address < 0) + return -1; bsize = vp->v_mount->mnt_stat.f_iosize; vblock = address / bsize; @@ -531,7 +527,7 @@ vnode_pager_iodone(bp) { bp->b_flags |= B_DONE; wakeup((caddr_t) bp); - if( bp->b_flags & B_ASYNC) { + if (bp->b_flags & B_ASYNC) { vm_offset_t paddr; vm_page_t m; vm_object_t obj = 0; @@ -539,35 +535,32 @@ vnode_pager_iodone(bp) int npages; paddr = (vm_offset_t) bp->b_data; - if( bp->b_bufsize != bp->b_bcount) - bzero( bp->b_data + bp->b_bcount, - bp->b_bufsize - bp->b_bcount); + if (bp->b_bufsize != bp->b_bcount) + bzero(bp->b_data + bp->b_bcount, + bp->b_bufsize - bp->b_bcount); npages = (bp->b_bufsize + PAGE_SIZE - 1) / PAGE_SIZE; -/* - printf("bcount: %d, bufsize: %d, npages: %d\n", - bp->b_bcount, bp->b_bufsize, npages); -*/ - for( i = 0; i < npages; i++) { + for (i = 0; i < npages; i++) { m = PHYS_TO_VM_PAGE(pmap_kextract(paddr + i * PAGE_SIZE)); obj = m->object; - if( m) { - m->flags |= PG_CLEAN; - m->flags &= ~(PG_LAUNDRY|PG_FAKE); + if (m) { + m->dirty = 0; + m->valid = VM_PAGE_BITS_ALL; + if (m->flags & PG_WANTED) + m->flags |= PG_REFERENCED; PAGE_WAKEUP(m); } else { panic("vnode_pager_iodone: page is gone!!!"); } } - pmap_qremove( paddr, npages); - if( obj) { + pmap_qremove(paddr, npages); + if (obj) { --obj->paging_in_progress; - if( obj->paging_in_progress == 0) + if (obj->paging_in_progress == 0) wakeup((caddr_t) obj); } else { panic("vnode_pager_iodone: object is gone???"); } - HOLDRELE(bp->b_vp); relpbuf(bp); } } @@ -580,74 +573,31 @@ vnode_pager_input_smlfs(vnp, m) vn_pager_t vnp; vm_page_t m; { - int i; - int s; - vm_offset_t paging_offset; + int i; + int s; struct vnode *dp, *vp; struct buf *bp; - vm_offset_t foff; vm_offset_t kva; - int fileaddr; - int block; + int fileaddr; + int block; vm_offset_t bsize; - int error = 0; + int error = 0; - paging_offset = m->object->paging_offset; vp = vnp->vnp_vp; bsize = vp->v_mount->mnt_stat.f_iosize; - foff = m->offset + paging_offset; - VOP_BMAP(vp, foff, &dp, 0, 0); + VOP_BMAP(vp, 0, &dp, 0, 0); kva = vm_pager_map_page(m); for (i = 0; i < PAGE_SIZE / bsize; i++) { - /* - * calculate logical block and offset - */ - block = foff / bsize + i; - s = splbio(); - while ((bp = incore(vp, block)) != 0) { - int amount; - - /* - * wait until the buffer is avail or gone - */ - if (bp->b_flags & B_BUSY) { - bp->b_flags |= B_WANTED; - tsleep((caddr_t) bp, PVM, "vnwblk", 0); - continue; - } - amount = bsize; - if ((foff + bsize) > vnp->vnp_size) - amount = vnp->vnp_size - foff; + if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid)) + continue; - /* - * make sure that this page is in the buffer - */ - if ((amount > 0) && amount <= bp->b_bcount) { - bp->b_flags |= B_BUSY; - splx(s); - - /* - * copy the data from the buffer - */ - bcopy(bp->b_un.b_addr, (caddr_t) kva + i * bsize, amount); - if (amount < bsize) { - bzero((caddr_t) kva + amount, bsize - amount); - } - bp->b_flags &= ~B_BUSY; - wakeup((caddr_t) bp); - goto nextblock; - } - break; - } - splx(s); - fileaddr = vnode_pager_addr(vp, foff + i * bsize); + fileaddr = vnode_pager_addr(vp, m->offset + i * bsize); if (fileaddr != -1) { bp = getpbuf(); - VHOLD(vp); /* build a minimal buffer header */ bp->b_flags = B_BUSY | B_READ | B_CALL; @@ -660,7 +610,7 @@ vnode_pager_input_smlfs(vnp, m) crhold(bp->b_wcred); bp->b_un.b_addr = (caddr_t) kva + i * bsize; bp->b_blkno = fileaddr / DEV_BSIZE; - bgetvp(dp, bp); + pbgetvp(dp, bp); bp->b_bcount = bsize; bp->b_bufsize = bsize; @@ -684,18 +634,20 @@ vnode_pager_input_smlfs(vnp, m) HOLDRELE(vp); if (error) break; + + vm_page_set_clean(m, i * bsize, bsize); + vm_page_set_valid(m, i * bsize, bsize); } else { + vm_page_set_clean(m, i * bsize, bsize); bzero((caddr_t) kva + i * bsize, bsize); } nextblock: } vm_pager_unmap_page(kva); + pmap_clear_modify(VM_PAGE_TO_PHYS(m)); if (error) { return VM_PAGER_ERROR; } - pmap_clear_modify(VM_PAGE_TO_PHYS(m)); - m->flags |= PG_CLEAN; - m->flags &= ~PG_LAUNDRY; return VM_PAGER_OK; } @@ -711,33 +663,31 @@ vnode_pager_input_old(vnp, m) { struct uio auio; struct iovec aiov; - int error; - int size; - vm_offset_t foff; + int error; + int size; vm_offset_t kva; error = 0; - foff = m->offset + m->object->paging_offset; /* * Return failure if beyond current EOF */ - if (foff >= vnp->vnp_size) { + if (m->offset >= vnp->vnp_size) { return VM_PAGER_BAD; } else { size = PAGE_SIZE; - if (foff + size > vnp->vnp_size) - size = vnp->vnp_size - foff; -/* - * Allocate a kernel virtual address and initialize so that - * we can use VOP_READ/WRITE routines. - */ + if (m->offset + size > vnp->vnp_size) + size = vnp->vnp_size - m->offset; + /* + * Allocate a kernel virtual address and initialize so that + * we can use VOP_READ/WRITE routines. + */ kva = vm_pager_map_page(m); aiov.iov_base = (caddr_t) kva; aiov.iov_len = size; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; - auio.uio_offset = foff; + auio.uio_offset = m->offset; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_resid = size; @@ -755,8 +705,7 @@ vnode_pager_input_old(vnp, m) vm_pager_unmap_page(kva); } pmap_clear_modify(VM_PAGE_TO_PHYS(m)); - m->flags |= PG_CLEAN; - m->flags &= ~PG_LAUNDRY; + m->dirty = 0; return error ? VM_PAGER_ERROR : VM_PAGER_OK; } @@ -767,39 +716,30 @@ int vnode_pager_input(vnp, m, count, reqpage) register vn_pager_t vnp; vm_page_t *m; - int count, reqpage; + int count, reqpage; { - int i; + int i; vm_offset_t kva, foff; - int size, sizea; + int size, sizea; vm_object_t object; - vm_offset_t paging_offset; struct vnode *dp, *vp; - int bsize; + int bsize; - int first, last; - int reqaddr, firstaddr; - int block, offset; + int first, last; + int reqaddr, firstaddr; + int block, offset; struct buf *bp, *bpa; - int counta; - int s; - int failflag; + int counta; + int s; + int failflag; - int error = 0; + int error = 0; object = m[reqpage]->object; /* all vm_page_t items are in same * object */ - paging_offset = object->paging_offset; vp = vnp->vnp_vp; - - /* - * Make sure underlying filesystem is still mounted. - */ - if (vp->v_mount == NULL) - return VM_PAGER_FAIL; - bsize = vp->v_mount->mnt_stat.f_iosize; /* get the UNDERLYING device for the file with VOP_BMAP() */ @@ -808,12 +748,12 @@ vnode_pager_input(vnp, m, count, reqpage) * originally, we did not check for an error return value -- assuming * an fs always has a bmap entry point -- that assumption is wrong!!! */ - foff = m[reqpage]->offset + paging_offset; + foff = m[reqpage]->offset; /* * if we can't bmap, use old VOP code */ - if (VOP_BMAP(vp, foff, &dp, 0, 0)) { + if (VOP_BMAP(vp, 0, &dp, 0, 0)) { for (i = 0; i < count; i++) { if (i != reqpage) { vnode_pager_freepage(m[i]); @@ -829,7 +769,7 @@ vnode_pager_input(vnp, m, count, reqpage) * blocksize, but it can handle large reads itself. */ } else if ((PAGE_SIZE / bsize) > 1 && - (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { + (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { for (i = 0; i < count; i++) { if (i != reqpage) { @@ -840,111 +780,28 @@ vnode_pager_input(vnp, m, count, reqpage) cnt.v_vnodepgsin++; return vnode_pager_input_smlfs(vnp, m[reqpage]); } -/* - * here on direct device I/O - */ - - -#ifdef NOTYET - if( (vp->v_flag & VVMIO) == 0) { -#endif /* - * This pathetic hack gets data from the buffer cache, if it's there. - * I believe that this is not really necessary, and the ends can be - * gotten by defaulting to the normal vfs read behavior, but this - * might be more efficient, because the will NOT invoke read-aheads - * and one of the purposes of this code is to bypass the buffer cache - * and keep from flushing it by reading in a program. + * if ANY DEV_BSIZE blocks are valid on a large filesystem block + * then, the entire page is valid -- */ - - /* - * calculate logical block and offset - */ - block = foff / bsize; - offset = foff % bsize; - s = splbio(); - - /* - * if we have a buffer in core, then try to use it - */ - while ((bp = incore(vp, block)) != 0) { - int amount; - - /* - * wait until the buffer is avail or gone - */ - if (bp->b_flags & B_BUSY) { - bp->b_flags |= B_WANTED; - tsleep((caddr_t) bp, PVM, "vnwblk", 0); - continue; - } - amount = PAGE_SIZE; - if ((foff + amount) > vnp->vnp_size) - amount = vnp->vnp_size - foff; - - /* - * make sure that this page is in the buffer - */ - if ((amount > 0) && (offset + amount) <= bp->b_bcount) { - bp->b_flags |= B_BUSY; - splx(s); - kva = kmem_alloc_wait( pager_map, PAGE_SIZE); - - /* - * map the requested page - */ - pmap_qenter(kva, &m[reqpage], 1); - - /* - * copy the data from the buffer - */ - bcopy(bp->b_un.b_addr + offset, (caddr_t) kva, amount); - if (amount < PAGE_SIZE) { - bzero((caddr_t) kva + amount, PAGE_SIZE - amount); - } - - /* - * unmap the page and free the kva - */ - pmap_qremove( kva, 1); - kmem_free_wakeup(pager_map, kva, PAGE_SIZE); - - /* - * release the buffer back to the block subsystem - */ - bp->b_flags &= ~B_BUSY; - wakeup((caddr_t) bp); - - /* - * we did not have to do any work to get the requested - * page, the read behind/ahead does not justify a read - */ - for (i = 0; i < count; i++) { - if (i != reqpage) { - vnode_pager_freepage(m[i]); - } - } - count = 1; - reqpage = 0; - m[0] = m[reqpage]; - - /* - * sorry for the goto - */ - goto finishup; - } - - /* - * buffer is nowhere to be found, read from the disk - */ - break; + if (m[reqpage]->valid) { + m[reqpage]->valid = VM_PAGE_BITS_ALL; + for (i = 0; i < count; i++) { + if (i != reqpage) + vnode_pager_freepage(m[i]); } - splx(s); -#ifdef NOTYET + return VM_PAGER_OK; } -#endif + /* + * here on direct device I/O + */ reqaddr = vnode_pager_addr(vp, foff); + if (reqaddr == -1 && foff < vnp->vnp_size) { + printf("reqaddr: %d, foff: %d, vnp_size: %d\n", + reqaddr, foff, vnp->vnp_size); + Debugger(""); + } s = splbio(); /* @@ -956,12 +813,7 @@ vnode_pager_input(vnp, m, count, reqpage) first = reqpage; for (i = reqpage - 1; i >= 0; --i) { if (failflag || -#ifdef NOTYET - ((vp->v_flag & VVMIO) == 0 && incore(vp, (foff + (i - reqpage) * PAGE_SIZE) / bsize)) || -#else - (incore(vp, (foff + (i - reqpage) * PAGE_SIZE) / bsize)) || -#endif - (vnode_pager_addr(vp, m[i]->offset + paging_offset)) + (vnode_pager_addr(vp, m[i]->offset)) != reqaddr + (i - reqpage) * PAGE_SIZE) { vnode_pager_freepage(m[i]); failflag = 1; @@ -978,12 +830,7 @@ vnode_pager_input(vnp, m, count, reqpage) last = reqpage + 1; for (i = reqpage + 1; i < count; i++) { if (failflag || -#ifdef NOTYET - ((vp->v_flag & VVMIO) == 0 && incore(vp, (foff + (i - reqpage) * PAGE_SIZE) / bsize)) || -#else - (incore(vp, (foff + (i - reqpage) * PAGE_SIZE) / bsize)) || -#endif - (vnode_pager_addr(vp, m[i]->offset + paging_offset)) + (vnode_pager_addr(vp, m[i]->offset)) != reqaddr + (i - reqpage) * PAGE_SIZE) { vnode_pager_freepage(m[i]); failflag = 1; @@ -1005,11 +852,10 @@ vnode_pager_input(vnp, m, count, reqpage) count -= first; reqpage -= first; } - /* * calculate the file virtual address for the transfer */ - foff = m[0]->offset + paging_offset; + foff = m[0]->offset; /* * and get the disk physical address (in bytes) @@ -1030,25 +876,23 @@ vnode_pager_input(vnp, m, count, reqpage) size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); counta = 0; - if( count*PAGE_SIZE > bsize) + if (count * PAGE_SIZE > bsize) counta = (count - reqpage) - 1; bpa = 0; sizea = 0; - if( counta) { + if (counta) { bpa = getpbuf(); count -= counta; - sizea = size - count*PAGE_SIZE; + sizea = size - count * PAGE_SIZE; size = count * PAGE_SIZE; } - bp = getpbuf(); - kva = (vm_offset_t)bp->b_data; + kva = (vm_offset_t) bp->b_data; /* * and map the pages to be read into the kva */ pmap_qenter(kva, m, count); - VHOLD(vp); /* build a minimal buffer header */ bp->b_flags = B_BUSY | B_READ | B_CALL; @@ -1061,7 +905,7 @@ vnode_pager_input(vnp, m, count, reqpage) if (bp->b_wcred != NOCRED) crhold(bp->b_wcred); bp->b_blkno = firstaddr / DEV_BSIZE; - bgetvp(dp, bp); + pbgetvp(dp, bp); bp->b_bcount = size; bp->b_bufsize = size; @@ -1071,13 +915,12 @@ vnode_pager_input(vnp, m, count, reqpage) /* do the input */ VOP_STRATEGY(bp); - if( counta) { - for(i=0;i<counta;i++) { - vm_page_deactivate(m[count+i]); + if (counta) { + for (i = 0; i < counta; i++) { + vm_page_deactivate(m[count + i]); } - pmap_qenter((vm_offset_t)bpa->b_data, &m[count], counta); + pmap_qenter((vm_offset_t) bpa->b_data, &m[count], counta); ++m[count]->object->paging_in_progress; - VHOLD(vp); bpa->b_flags = B_BUSY | B_READ | B_CALL | B_ASYNC; bpa->b_iodone = vnode_pager_iodone; /* B_PHYS is not set, but it is nice to fill this in */ @@ -1088,14 +931,13 @@ vnode_pager_input(vnp, m, count, reqpage) if (bpa->b_wcred != NOCRED) crhold(bpa->b_wcred); bpa->b_blkno = (firstaddr + count * PAGE_SIZE) / DEV_BSIZE; - bgetvp(dp, bpa); + pbgetvp(dp, bpa); bpa->b_bcount = sizea; - bpa->b_bufsize = counta*PAGE_SIZE; + bpa->b_bufsize = counta * PAGE_SIZE; cnt.v_vnodepgsin += counta; VOP_STRATEGY(bpa); } - s = splbio(); /* we definitely need to be at splbio here */ @@ -1110,7 +952,7 @@ vnode_pager_input(vnp, m, count, reqpage) if (size != count * PAGE_SIZE) bzero((caddr_t) kva + size, PAGE_SIZE * count - size); } - pmap_qremove( kva, count); + pmap_qremove(kva, count); /* * free the buffer header back to the swap buffer pool @@ -1121,8 +963,8 @@ vnode_pager_input(vnp, m, count, reqpage) finishup: for (i = 0; i < count; i++) { pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); - m[i]->flags |= PG_CLEAN; - m[i]->flags &= ~PG_LAUNDRY; + m[i]->dirty = 0; + m[i]->valid = VM_PAGE_BITS_ALL; if (i != reqpage) { /* @@ -1138,9 +980,11 @@ finishup: * now tell them that it is ok to use */ if (!error) { - vm_page_deactivate(m[i]); + if (i != reqpage - 1) + vm_page_deactivate(m[i]); + else + vm_page_activate(m[i]); PAGE_WAKEUP(m[i]); - m[i]->flags &= ~PG_FAKE; } else { vnode_pager_freepage(m[i]); } @@ -1160,36 +1004,41 @@ vnode_pager_output_old(vnp, m) register vn_pager_t vnp; vm_page_t m; { - vm_offset_t foff; - vm_offset_t kva; + vm_offset_t kva, kva2; vm_offset_t size; struct iovec aiov; struct uio auio; struct vnode *vp; - int error; + int error; vp = vnp->vnp_vp; - foff = m->offset + m->object->paging_offset; /* - * Return failure if beyond current EOF + * Dont return failure if beyond current EOF placate the VM system. */ - if (foff >= vnp->vnp_size) { - return VM_PAGER_BAD; + if (m->offset >= vnp->vnp_size) { + return VM_PAGER_OK; } else { size = PAGE_SIZE; - if (foff + size > vnp->vnp_size) - size = vnp->vnp_size - foff; -/* - * Allocate a kernel virtual address and initialize so that - * we can use VOP_WRITE routines. - */ + if (m->offset + size > vnp->vnp_size) + size = vnp->vnp_size - m->offset; + + kva2 = kmem_alloc(pager_map, PAGE_SIZE); + /* + * Allocate a kernel virtual address and initialize so that + * we can use VOP_WRITE routines. + */ kva = vm_pager_map_page(m); - aiov.iov_base = (caddr_t) kva; + bcopy((caddr_t) kva, (caddr_t) kva2, size); + vm_pager_unmap_page(kva); + pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + PAGE_WAKEUP(m); + + aiov.iov_base = (caddr_t) kva2; aiov.iov_len = size; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; - auio.uio_offset = foff; + auio.uio_offset = m->offset; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_WRITE; auio.uio_resid = size; @@ -1197,13 +1046,13 @@ vnode_pager_output_old(vnp, m) error = VOP_WRITE(vp, &auio, 0, curproc->p_ucred); + kmem_free_wakeup(pager_map, kva2, PAGE_SIZE); if (!error) { if ((size - auio.uio_resid) == 0) { error = EINVAL; } } - vm_pager_unmap_page(kva); - return error ? VM_PAGER_ERROR: VM_PAGER_OK; + return error ? VM_PAGER_ERROR : VM_PAGER_OK; } } @@ -1215,42 +1064,31 @@ vnode_pager_output_smlfs(vnp, m) vn_pager_t vnp; vm_page_t m; { - int i; - int s; - vm_offset_t paging_offset; + int i; + int s; struct vnode *dp, *vp; struct buf *bp; - vm_offset_t foff; vm_offset_t kva; - int fileaddr; + int fileaddr; vm_offset_t bsize; - int error = 0; + int error = 0; - paging_offset = m->object->paging_offset; vp = vnp->vnp_vp; bsize = vp->v_mount->mnt_stat.f_iosize; - foff = m->offset + paging_offset; - VOP_BMAP(vp, foff, &dp, 0, 0); + VOP_BMAP(vp, 0, &dp, 0, 0); kva = vm_pager_map_page(m); for (i = 0; !error && i < (PAGE_SIZE / bsize); i++) { + if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid & m->dirty) == 0) + continue; /* * calculate logical block and offset */ - fileaddr = vnode_pager_addr(vp, foff + i * bsize); + fileaddr = vnode_pager_addr(vp, m->offset + i * bsize); if (fileaddr != -1) { - s = splbio(); - bp = incore(vp, (foff / bsize) + i); - if (bp) { - bp = getblk(vp, (foff / bsize) + i, bp->b_bufsize, 0, 0); - bp->b_flags |= B_INVAL; - brelse(bp); - } - splx(s); bp = getpbuf(); - VHOLD(vp); /* build a minimal buffer header */ bp->b_flags = B_BUSY | B_CALL | B_WRITE; @@ -1263,7 +1101,7 @@ vnode_pager_output_smlfs(vnp, m) crhold(bp->b_wcred); bp->b_un.b_addr = (caddr_t) kva + i * bsize; bp->b_blkno = fileaddr / DEV_BSIZE; - bgetvp(dp, bp); + pbgetvp(dp, bp); ++dp->v_numoutput; /* for NFS */ bp->b_dirtyoff = 0; @@ -1284,6 +1122,7 @@ vnode_pager_output_smlfs(vnp, m) if ((bp->b_flags & B_ERROR) != 0) error = EIO; + vm_page_set_clean(m, i * bsize, bsize); /* * free the buffer header back to the swap buffer pool */ @@ -1305,25 +1144,25 @@ int vnode_pager_output(vnp, m, count, rtvals) vn_pager_t vnp; vm_page_t *m; - int count; - int *rtvals; + int count; + int *rtvals; { - int i, j; + int i, j; vm_offset_t kva, foff; - int size; + int size; vm_object_t object; - vm_offset_t paging_offset; struct vnode *dp, *vp; struct buf *bp; vm_offset_t reqaddr; - int bsize; - int s; + int bsize; + int s; + daddr_t block; + struct timeval tv; - int error = 0; + int error = 0; retryoutput: object = m[0]->object; /* all vm_page_t items are in same object */ - paging_offset = object->paging_offset; vp = vnp->vnp_vp; @@ -1338,20 +1177,27 @@ retryoutput: for (i = 0; i < count; i++) rtvals[i] = VM_PAGER_AGAIN; + if ((int) m[0]->offset < 0) { + printf("vnode_pager_output: attempt to write meta-data!!! -- 0x%x\n", m[0]->offset); + m[0]->dirty = 0; + rtvals[0] = VM_PAGER_OK; + return VM_PAGER_OK; + } /* * if the filesystem does not have a bmap, then use the old code */ - if (VOP_BMAP(vp, m[0]->offset + paging_offset, &dp, 0, 0)) { + if (VOP_BMAP(vp, (m[0]->offset / bsize), &dp, &block, 0) || + (block == -1)) { rtvals[0] = vnode_pager_output_old(vnp, m[0]); - pmap_clear_modify(VM_PAGE_TO_PHYS(m[0])); - m[0]->flags |= PG_CLEAN; - m[0]->flags &= ~PG_LAUNDRY; + m[0]->dirty = 0; cnt.v_vnodeout++; cnt.v_vnodepgsout++; return rtvals[0]; } + tv = time; + VOP_UPDATE(vp, &tv, &tv, 0); /* * if the filesystem has a small blocksize, then use the small block @@ -1364,17 +1210,14 @@ retryoutput: rtvals[i] = vnode_pager_output_smlfs(vnp, m[i]); if (rtvals[i] == VM_PAGER_OK) { pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); - m[i]->flags |= PG_CLEAN; - m[i]->flags &= ~PG_LAUNDRY; } } cnt.v_vnodeout++; cnt.v_vnodepgsout += count; return rtvals[0]; } - for (i = 0; i < count; i++) { - foff = m[i]->offset + paging_offset; + foff = m[i]->offset; if (foff >= vnp->vnp_size) { for (j = i; j < count; j++) rtvals[j] = VM_PAGER_BAD; @@ -1385,7 +1228,7 @@ retryoutput: if (count == 0) { return rtvals[0]; } - foff = m[0]->offset + paging_offset; + foff = m[0]->offset; reqaddr = vnode_pager_addr(vp, foff); /* @@ -1393,7 +1236,7 @@ retryoutput: * for a page being in buffer cache. */ for (i = 1; i < count; i++) { - if (vnode_pager_addr(vp, m[i]->offset + paging_offset) + if (vnode_pager_addr(vp, m[i]->offset) != reqaddr + i * PAGE_SIZE) { count = i; break; @@ -1414,50 +1257,12 @@ retryoutput: size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); bp = getpbuf(); - kva = (vm_offset_t)bp->b_data; + kva = (vm_offset_t) bp->b_data; /* * and map the pages to be read into the kva */ pmap_qenter(kva, m, count); -#if 0 - printf("vnode: writing foff: %d, devoff: %d, size: %d\n", - foff, reqaddr, size); -#endif - - /* - * next invalidate the incore vfs_bio data - */ -#ifdef NOTYET - if( (vp->v_flag & VVMIO) == 0) { -#endif - for (i = 0; i < count; i++) { - int filblock = (foff + i * PAGE_SIZE) / bsize; - struct buf *fbp; - s = splbio(); - fbp = incore(vp, filblock); - if (fbp) { - fbp = getblk(vp, filblock, fbp->b_bufsize, 0, 0); - if (fbp->b_flags & B_DELWRI) { - if (fbp->b_bufsize <= PAGE_SIZE) - fbp->b_flags &= ~B_DELWRI; - else { - bwrite(fbp); - fbp = getblk(vp, filblock, - fbp->b_bufsize, 0, 0); - } - } - fbp->b_flags |= B_INVAL; - brelse(fbp); - } - splx(s); - } -#ifdef NOTYET - } -#endif - - - VHOLD(vp); /* build a minimal buffer header */ bp->b_flags = B_BUSY | B_WRITE | B_CALL; bp->b_iodone = vnode_pager_iodone; @@ -1470,7 +1275,7 @@ retryoutput: if (bp->b_wcred != NOCRED) crhold(bp->b_wcred); bp->b_blkno = reqaddr / DEV_BSIZE; - bgetvp(dp, bp); + pbgetvp(dp, bp); ++dp->v_numoutput; /* for NFS */ @@ -1498,7 +1303,7 @@ retryoutput: if ((bp->b_flags & B_ERROR) != 0) error = EIO; - pmap_qremove( kva, count); + pmap_qremove(kva, count); /* * free the buffer header back to the swap buffer pool @@ -1509,8 +1314,7 @@ retryoutput: if (!error) { for (i = 0; i < count; i++) { pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); - m[i]->flags |= PG_CLEAN; - m[i]->flags &= ~PG_LAUNDRY; + m[i]->dirty = 0; rtvals[i] = VM_PAGER_OK; } } else if (count != 1) { @@ -1521,5 +1325,5 @@ retryoutput: if (error) { printf("vnode_pager_output: I/O write error\n"); } - return (error ? VM_PAGER_ERROR: VM_PAGER_OK); + return (error ? VM_PAGER_ERROR : VM_PAGER_OK); } diff --git a/sys/vm/vnode_pager.h b/sys/vm/vnode_pager.h index 9270b2c153ca..381af99feb69 100644 --- a/sys/vm/vnode_pager.h +++ b/sys/vm/vnode_pager.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)vnode_pager.h 8.1 (Berkeley) 6/11/93 - * $Id$ + * $Id: vnode_pager.h,v 1.3 1994/08/02 07:55:43 davidg Exp $ */ #ifndef _VNODE_PAGER_ @@ -46,12 +46,12 @@ * VNODE pager private data. */ struct vnpager { - int vnp_flags; /* flags */ - struct vnode *vnp_vp; /* vnode */ - vm_size_t vnp_size; /* vnode current size */ + int vnp_flags; /* flags */ + struct vnode *vnp_vp; /* vnode */ + vm_size_t vnp_size; /* vnode current size */ }; -typedef struct vnpager *vn_pager_t; +typedef struct vnpager *vn_pager_t; #define VN_PAGER_NULL ((vn_pager_t)0) -#endif /* _VNODE_PAGER_ */ +#endif /* _VNODE_PAGER_ */ |