summaryrefslogtreecommitdiff
path: root/sys/vm/vm_pageout.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/vm/vm_pageout.c')
-rw-r--r--sys/vm/vm_pageout.c269
1 files changed, 198 insertions, 71 deletions
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 606981f819e4..06f24d63479e 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -65,7 +65,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
- * $Id: vm_pageout.c,v 1.128 1998/10/25 17:44:59 phk Exp $
+ * $Id: vm_pageout.c,v 1.129 1998/10/31 17:21:31 peter Exp $
*/
/*
@@ -211,13 +211,10 @@ void pmap_collect(void);
* Clean the page and remove it from the laundry.
*
* We set the busy bit to cause potential page faults on this page to
- * block.
- *
- * And we set pageout-in-progress to keep the object from disappearing
- * during pageout. This guarantees that the page won't move from the
- * inactive queue. (However, any other page on the inactive queue may
- * move!)
+ * block. Note the careful timing, however, the busy bit isn't set till
+ * late and we cannot do anything that will mess with the page.
*/
+
static int
vm_pageout_clean(m)
vm_page_t m;
@@ -231,12 +228,23 @@ vm_pageout_clean(m)
object = m->object;
/*
+ * It doesn't cost us anything to pageout OBJT_DEFAULT or OBJT_SWAP
+ * with the new swapper, but we could have serious problems paging
+ * out other object types if there is insufficient memory.
+ *
+ * Unfortunately, checking free memory here is far too late, so the
+ * check has been moved up a procedural level.
+ */
+
+#if 0
+ /*
* If not OBJT_SWAP, additional memory may be needed to do the pageout.
* Try to avoid the deadlock.
*/
if ((object->type == OBJT_DEFAULT) &&
((cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min))
return 0;
+#endif
/*
* Don't mess with the page if it's busy.
@@ -245,12 +253,21 @@ vm_pageout_clean(m)
((m->busy != 0) || (m->flags & PG_BUSY)))
return 0;
+#if 0
+ /*
+ * XXX REMOVED XXX. vm_object_collapse() can block, which can
+ * change the page state. Calling vm_object_collapse() might also
+ * destroy or rename the page because we have not busied it yet!!!
+ * So this code segment is removed.
+ */
/*
- * Try collapsing before it's too late.
+ * Try collapsing before it's too late. XXX huh? Why are we doing
+ * this here?
*/
if (object->backing_object) {
vm_object_collapse(object);
}
+#endif
mc[vm_pageout_page_count] = m;
pageout_count = 1;
@@ -351,6 +368,16 @@ do_backward:
return vm_pageout_flush(&mc[page_base], pageout_count, 0);
}
+/*
+ * vm_pageout_flush() - launder the given pages
+ *
+ * The given pages are laundered. Note that we setup for the start of
+ * I/O ( i.e. busy the page ), mark it read-only, and bump the object
+ * reference count all in here rather then in the parent. If we want
+ * the parent to do more sophisticated things we may have to change
+ * the ordering.
+ */
+
int
vm_pageout_flush(mc, count, flags)
vm_page_t *mc;
@@ -362,6 +389,14 @@ vm_pageout_flush(mc, count, flags)
int numpagedout = 0;
int i;
+ /*
+ * Initiate I/O. Bump the vm_page_t->busy counter and
+ * mark the pages read-only.
+ *
+ * We do not have to fixup the clean/dirty bits here... we can
+ * allow the pager to do it after the I/O completes.
+ */
+
for (i = 0; i < count; i++) {
vm_page_io_start(mc[i]);
vm_page_protect(mc[i], VM_PROT_READ);
@@ -585,25 +620,24 @@ vm_pageout_map_deactivate_pages(map, desired)
}
#endif
+/*
+ * Don't try to be fancy - being fancy can lead to VOP_LOCK's and therefore
+ * to vnode deadlocks. We only do it for OBJT_DEFAULT and OBJT_SWAP objects
+ * which we know can be trivially freed.
+ */
+
void
vm_pageout_page_free(vm_page_t m) {
- struct vnode *vp;
- vm_object_t object;
-
- object = m->object;
- object->ref_count++;
-
- if (object->type == OBJT_VNODE) {
- vp = object->handle;
- vp->v_usecount++;
- if (VSHOULDBUSY(vp))
- vbusy(vp);
- }
+ vm_object_t object = m->object;
+ int type = object->type;
+ if (type == OBJT_SWAP || type == OBJT_DEFAULT)
+ vm_object_reference(object);
vm_page_busy(m);
vm_page_protect(m, VM_PROT_NONE);
vm_page_free(m);
- vm_object_deallocate(object);
+ if (type == OBJT_SWAP || type == OBJT_DEFAULT)
+ vm_object_deallocate(object);
}
/*
@@ -613,9 +647,10 @@ static int
vm_pageout_scan()
{
vm_page_t m, next;
- int page_shortage, addl_page_shortage, maxscan, pcount;
+ int page_shortage, maxscan, pcount;
+ int addl_page_shortage, addl_page_shortage_init;
int maxlaunder;
- int pages_freed;
+ int launder_loop = 0;
struct proc *p, *bigproc;
vm_offset_t size, bigsize;
vm_object_t object;
@@ -629,31 +664,53 @@ vm_pageout_scan()
*/
pmap_collect();
- /*
- * Start scanning the inactive queue for pages we can free. We keep
- * scanning until we have enough free pages or we have scanned through
- * the entire queue. If we encounter dirty pages, we start cleaning
- * them.
- */
-
- pages_freed = 0;
- addl_page_shortage = vm_pageout_deficit;
+ addl_page_shortage_init = vm_pageout_deficit;
vm_pageout_deficit = 0;
if (max_page_launder == 0)
max_page_launder = 1;
- maxlaunder = (cnt.v_inactive_target > max_page_launder) ?
- max_page_launder : cnt.v_inactive_target;
-rescan0:
- maxscan = cnt.v_inactive_count;
- for( m = TAILQ_FIRST(&vm_page_queue_inactive);
+ /*
+ * Calculate the number of pages we want to either free or move
+ * to the cache.
+ */
+
+ page_shortage = (cnt.v_free_target + cnt.v_cache_min) -
+ (cnt.v_free_count + cnt.v_cache_count);
+ page_shortage += addl_page_shortage_init;
+
+ /*
+ * Figure out what to do with dirty pages when they are encountered.
+ * Assume that 1/3 of the pages on the inactive list are clean. If
+ * we think we can reach our target, disable laundering (do not
+ * clean any dirty pages). If we miss the target we will loop back
+ * up and do a laundering run.
+ */
- (m != NULL) && (maxscan-- > 0) &&
- ((cnt.v_cache_count + cnt.v_free_count) <
- (cnt.v_cache_min + cnt.v_free_target));
+ if (cnt.v_inactive_count / 3 > page_shortage) {
+ maxlaunder = 0;
+ launder_loop = 0;
+ } else {
+ maxlaunder =
+ (cnt.v_inactive_target > max_page_launder) ?
+ max_page_launder : cnt.v_inactive_target;
+ launder_loop = 1;
+ }
- m = next) {
+ /*
+ * Start scanning the inactive queue for pages we can move to the
+ * cache or free. The scan will stop when the target is reached or
+ * we have scanned the entire inactive queue.
+ */
+
+rescan0:
+ addl_page_shortage = addl_page_shortage_init;
+ maxscan = cnt.v_inactive_count;
+ for (
+ m = TAILQ_FIRST(&vm_page_queue_inactive);
+ m != NULL && maxscan-- > 0 && page_shortage > 0;
+ m = next
+ ) {
cnt.v_pdpages++;
@@ -681,19 +738,21 @@ rescan0:
}
/*
- * If the object is not being used, we ignore previous references.
+ * If the object is not being used, we ignore previous
+ * references.
*/
if (m->object->ref_count == 0) {
vm_page_flag_clear(m, PG_REFERENCED);
pmap_clear_reference(VM_PAGE_TO_PHYS(m));
/*
- * Otherwise, if the page has been referenced while in the inactive
- * queue, we bump the "activation count" upwards, making it less
- * likely that the page will be added back to the inactive queue
- * prematurely again. Here we check the page tables (or emulated
- * bits, if any), given the upper level VM system not knowing anything
- * about existing references.
+ * Otherwise, if the page has been referenced while in the
+ * inactive queue, we bump the "activation count" upwards,
+ * making it less likely that the page will be added back to
+ * the inactive queue prematurely again. Here we check the
+ * page tables (or emulated bits, if any), given the upper
+ * level VM system not knowing anything about existing
+ * references.
*/
} else if (((m->flags & PG_REFERENCED) == 0) &&
(actcount = pmap_ts_referenced(VM_PAGE_TO_PHYS(m)))) {
@@ -703,10 +762,10 @@ rescan0:
}
/*
- * If the upper level VM system knows about any page references,
- * we activate the page. We also set the "activation count" higher
- * than normal so that we will less likely place pages back onto the
- * inactive queue again.
+ * If the upper level VM system knows about any page
+ * references, we activate the page. We also set the
+ * "activation count" higher than normal so that we will less
+ * likely place pages back onto the inactive queue again.
*/
if ((m->flags & PG_REFERENCED) != 0) {
vm_page_flag_clear(m, PG_REFERENCED);
@@ -717,9 +776,10 @@ rescan0:
}
/*
- * If the upper level VM system doesn't know anything about the
- * page being dirty, we have to check for it again. As far as the
- * VM code knows, any partially dirty pages are fully dirty.
+ * If the upper level VM system doesn't know anything about
+ * the page being dirty, we have to check for it again. As
+ * far as the VM code knows, any partially dirty pages are
+ * fully dirty.
*/
if (m->dirty == 0) {
vm_page_test_dirty(m);
@@ -733,14 +793,14 @@ rescan0:
if (m->valid == 0) {
vm_pageout_page_free(m);
cnt.v_dfree++;
- pages_freed++;
+ --page_shortage;
/*
* Clean pages can be placed onto the cache queue.
*/
} else if (m->dirty == 0) {
vm_page_cache(m);
- pages_freed++;
+ --page_shortage;
/*
* Dirty pages need to be paged out. Note that we clean
@@ -763,8 +823,8 @@ rescan0:
}
/*
- * We don't bother paging objects that are "dead". Those
- * objects are in a "rundown" state.
+ * We don't bother paging objects that are "dead".
+ * Those objects are in a "rundown" state.
*/
if (!swap_pageouts_ok || (object->flags & OBJ_DEAD)) {
s = splvm();
@@ -774,10 +834,61 @@ rescan0:
continue;
}
- if ((object->type == OBJT_VNODE) &&
- (object->flags & OBJ_DEAD) == 0) {
+ /*
+ * For now we protect against potential memory
+ * deadlocks by requiring significant memory to be
+ * free if the object is not OBJT_DEFAULT or OBJT_SWAP.
+ * We do not 'trust' any other object type to operate
+ * with low memory, not even OBJT_DEVICE. The VM
+ * allocator will special case allocations done by
+ * the pageout daemon so the check below actually
+ * does have some hysteresis in it. It isn't the best
+ * solution, though.
+ */
+
+ if (
+ object->type != OBJT_DEFAULT &&
+ object->type != OBJT_SWAP &&
+ cnt.v_free_count < cnt.v_free_reserved
+ ) {
+ s = splvm();
+ TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
+ TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
+ splx(s);
+ continue;
+ }
+
+ /*
+ * Presumably we have sufficient free memory to do
+ * the more sophisticated checks and locking required
+ * for vnodes.
+ *
+ * The object is already known NOT to be dead. The
+ * vget() may still block, though, because
+ * VOP_ISLOCKED() doesn't check to see if an inode
+ * (v_data) is associated with the vnode. If it isn't,
+ * vget() will load in it from disk. Worse, vget()
+ * may actually get stuck waiting on "inode" if another
+ * process is in the process of bringing the inode in.
+ * This is bad news for us either way.
+ *
+ * So for the moment we check v_data == NULL as a
+ * workaround. This means that vnodes which do not
+ * use v_data in the way we expect probably will not
+ * wind up being paged out by the pager and it will be
+ * up to the syncer to get them. That's better then
+ * us blocking here.
+ *
+ * This whole code section is bogus - we need to fix
+ * the vnode pager to handle vm_page_t's without us
+ * having to do any sophisticated VOP tests.
+ */
+
+ if (object->type == OBJT_VNODE) {
vp = object->handle;
+
if (VOP_ISLOCKED(vp) ||
+ vp->v_data == NULL ||
vget(vp, LK_EXCLUSIVE|LK_NOOBJ, curproc)) {
if ((m->queue == PQ_INACTIVE) &&
(m->hold_count == 0) &&
@@ -844,19 +955,34 @@ rescan0:
}
/*
- * Compute the page shortage. If we are still very low on memory be
- * sure that we will move a minimal amount of pages from active to
- * inactive.
+ * If we still have a page shortage and we didn't launder anything,
+ * run the inactive scan again and launder something this time.
+ */
+
+ if (launder_loop == 0 && page_shortage > 0) {
+ launder_loop = 1;
+ maxlaunder =
+ (cnt.v_inactive_target > max_page_launder) ?
+ max_page_launder : cnt.v_inactive_target;
+ goto rescan0;
+ }
+
+ /*
+ * Compute the page shortage from the point of view of having to
+ * move pages from the active queue to the inactive queue.
*/
+
page_shortage = (cnt.v_inactive_target + cnt.v_cache_min) -
(cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count);
page_shortage += addl_page_shortage;
- if (page_shortage <= 0) {
- page_shortage = 0;
- }
+
+ /*
+ * Scan the active queue for things we can deactivate
+ */
pcount = cnt.v_active_count;
m = TAILQ_FIRST(&vm_page_queue_active);
+
while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) {
/*
@@ -943,10 +1069,14 @@ rescan0:
}
s = splvm();
+
/*
* We try to maintain some *really* free pages, this allows interrupt
- * code to be guaranteed space.
+ * code to be guaranteed space. Since both cache and free queues
+ * are considered basically 'free', moving pages from cache to free
+ * does not effect other calculations.
*/
+
while (cnt.v_free_count < cnt.v_free_reserved) {
static int cache_rover = 0;
m = vm_page_list_find(PQ_CACHE, cache_rover);
@@ -995,7 +1125,6 @@ rescan0:
#endif
}
-
/*
* make sure that we have swap space -- if we are low on memory and
* swap -- then kill the biggest process.
@@ -1242,10 +1371,8 @@ vm_pageout()
cnt.v_pdwakeups++;
vm_pages_needed = 0;
splx(s);
- vm_pager_sync();
vm_pageout_scan();
vm_pageout_deficit = 0;
- vm_pager_sync();
wakeup(&cnt.v_free_count);
}
}