summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKirk McKusick <mckusick@FreeBSD.org>2000-01-10 00:24:24 +0000
committerKirk McKusick <mckusick@FreeBSD.org>2000-01-10 00:24:24 +0000
commitcf60e8e4bf442e10aeb65803cfcbdb1cd3a875e3 (patch)
tree126ab6feda3d7d9183f59410c61d778b2e490d46
parentbd5f5da94da66c03392e82dcb9631879023c437e (diff)
Notes
-rw-r--r--sys/contrib/softupdates/ffs_softdep.c242
-rw-r--r--sys/kern/vfs_bio.c149
-rw-r--r--sys/sys/bio.h7
-rw-r--r--sys/sys/buf.h7
-rw-r--r--sys/ufs/ffs/ffs_alloc.c7
-rw-r--r--sys/ufs/ffs/ffs_softdep.c242
-rw-r--r--sys/ufs/ffs/ffs_softdep_stub.c4
-rw-r--r--sys/ufs/ffs/ffs_vfsops.c13
-rw-r--r--sys/ufs/ffs/ffs_vnops.c49
-rw-r--r--sys/ufs/ufs/ufs_extern.h2
-rw-r--r--sys/ufs/ufs/ufs_lookup.c17
-rw-r--r--sys/ufs/ufs/ufs_vnops.c66
12 files changed, 626 insertions, 179 deletions
diff --git a/sys/contrib/softupdates/ffs_softdep.c b/sys/contrib/softupdates/ffs_softdep.c
index 14e1bb244153..dee1891d7199 100644
--- a/sys/contrib/softupdates/ffs_softdep.c
+++ b/sys/contrib/softupdates/ffs_softdep.c
@@ -52,7 +52,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * from: @(#)ffs_softdep.c 9.45 (McKusick) 1/9/00
+ * from: @(#)ffs_softdep.c 9.46 (McKusick) 1/9/00
* $FreeBSD$
*/
@@ -212,6 +212,8 @@ static void softdep_disk_write_complete __P((struct buf *));
static void softdep_deallocate_dependencies __P((struct buf *));
static int softdep_fsync __P((struct vnode *));
static int softdep_process_worklist __P((struct mount *));
+static void softdep_move_dependencies __P((struct buf *, struct buf *));
+static int softdep_count_dependencies __P((struct buf *bp, int));
struct bio_ops bioops = {
softdep_disk_io_initiation, /* io_start */
@@ -219,6 +221,8 @@ struct bio_ops bioops = {
softdep_deallocate_dependencies, /* io_deallocate */
softdep_fsync, /* io_fsync */
softdep_process_worklist, /* io_sync */
+ softdep_move_dependencies, /* io_movedeps */
+ softdep_count_dependencies, /* io_countdeps */
};
/*
@@ -472,7 +476,6 @@ static int stat_dir_entry; /* bufs redirtied as dir entry cannot write */
#ifdef DEBUG
#include <vm/vm.h>
#include <sys/sysctl.h>
-#if defined(__FreeBSD__)
SYSCTL_INT(_debug, OID_AUTO, max_softdeps, CTLFLAG_RW, &max_softdeps, 0, "");
SYSCTL_INT(_debug, OID_AUTO, tickdelay, CTLFLAG_RW, &tickdelay, 0, "");
SYSCTL_INT(_debug, OID_AUTO, blk_limit_push, CTLFLAG_RW, &stat_blk_limit_push, 0,"");
@@ -483,19 +486,6 @@ SYSCTL_INT(_debug, OID_AUTO, indir_blk_ptrs, CTLFLAG_RW, &stat_indir_blk_ptrs, 0
SYSCTL_INT(_debug, OID_AUTO, inode_bitmap, CTLFLAG_RW, &stat_inode_bitmap, 0, "");
SYSCTL_INT(_debug, OID_AUTO, direct_blk_ptrs, CTLFLAG_RW, &stat_direct_blk_ptrs, 0, "");
SYSCTL_INT(_debug, OID_AUTO, dir_entry, CTLFLAG_RW, &stat_dir_entry, 0, "");
-#else /* !__FreeBSD__ */
-struct ctldebug debug20 = { "max_softdeps", &max_softdeps };
-struct ctldebug debug21 = { "tickdelay", &tickdelay };
-struct ctldebug debug23 = { "blk_limit_push", &stat_blk_limit_push };
-struct ctldebug debug24 = { "ino_limit_push", &stat_ino_limit_push };
-struct ctldebug debug25 = { "blk_limit_hit", &stat_blk_limit_hit };
-struct ctldebug debug26 = { "ino_limit_hit", &stat_ino_limit_hit };
-struct ctldebug debug27 = { "indir_blk_ptrs", &stat_indir_blk_ptrs };
-struct ctldebug debug28 = { "inode_bitmap", &stat_inode_bitmap };
-struct ctldebug debug29 = { "direct_blk_ptrs", &stat_direct_blk_ptrs };
-struct ctldebug debug30 = { "dir_entry", &stat_dir_entry };
-#endif /* !__FreeBSD__ */
-
#endif /* DEBUG */
/*
@@ -637,6 +627,31 @@ softdep_process_worklist(matchmnt)
}
/*
+ * Move dependencies from one buffer to another.
+ */
+static void
+softdep_move_dependencies(oldbp, newbp)
+ struct buf *oldbp;
+ struct buf *newbp;
+{
+ struct worklist *wk, *wktail;
+
+ if (LIST_FIRST(&newbp->b_dep) != NULL)
+ panic("softdep_move_dependencies: need merge code");
+ wktail = 0;
+ ACQUIRE_LOCK(&lk);
+ while (wk = LIST_FIRST(&oldbp->b_dep)) {
+ LIST_REMOVE(wk, wk_list);
+ if (wktail == 0)
+ LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list);
+ else
+ LIST_INSERT_AFTER(wktail, wk, wk_list);
+ wktail = wk;
+ }
+ FREE_LOCK(&lk);
+}
+
+/*
* Purge the work list of all items associated with a particular mount point.
*/
int
@@ -1633,11 +1648,6 @@ softdep_setup_freeblocks(ip, length)
if ((inodedep->id_state & IOSTARTED) != 0)
panic("softdep_setup_freeblocks: inode busy");
/*
- * Add the freeblks structure to the list of operations that
- * must await the zero'ed inode being written to disk.
- */
- WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list);
- /*
* Because the file length has been truncated to zero, any
* pending block allocation dependency structures associated
* with this inode are obsolete and can simply be de-allocated.
@@ -1647,6 +1657,16 @@ softdep_setup_freeblocks(ip, length)
merge_inode_lists(inodedep);
while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
free_allocdirect(&inodedep->id_inoupdt, adp, 1);
+ /*
+ * Add the freeblks structure to the list of operations that
+ * must await the zero'ed inode being written to disk. If we
+ * still have a bitmap dependency, then the inode has never been
+ * written to disk, so we can process the freeblks immediately.
+ */
+ if ((inodedep->id_state & DEPCOMPLETE) == 0)
+ handle_workitem_freeblocks(freeblks);
+ else
+ WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list);
FREE_LOCK(&lk);
bdwrite(bp);
/*
@@ -1841,36 +1861,35 @@ softdep_freefile(pvp, ino, mode)
*/
ACQUIRE_LOCK(&lk);
if (inodedep_lookup(ip->i_fs, ino, 0, &inodedep) == 0) {
- add_to_worklist(&freefile->fx_list);
FREE_LOCK(&lk);
+ handle_workitem_freefile(freefile);
return;
}
/*
* If we still have a bitmap dependency, then the inode has never
* been written to disk. Drop the dependency as it is no longer
- * necessary since the inode is being deallocated. We could process
- * the freefile immediately, but then we would have to clear the
- * id_inowait dependencies here and it is easier just to let the
- * zero'ed inode be written and let them be cleaned up in the
- * normal followup actions that follow the inode write.
+ * necessary since the inode is being deallocated. We set the
+ * ALLCOMPLETE flags since the bitmap now properly shows that the
+ * inode is not allocated. Even if the inode is actively being
+ * written, it has been rolled back to its zero'ed state, so we
+ * are ensured that a zero inode is what is on the disk. For short
+ * lived files, this change will usually result in removing all the
+ * depedencies from the inode so that it can be freed immediately.
*/
- if ((inodedep->id_state & DEPCOMPLETE) == 0) {
- inodedep->id_state |= DEPCOMPLETE;
+ if ((inodedep->id_state & DEPCOMPLETE) == 0) {
+ inodedep->id_state |= ALLCOMPLETE;
LIST_REMOVE(inodedep, id_deps);
inodedep->id_buf = NULL;
+ WORKLIST_REMOVE(&inodedep->id_list);
}
- /*
- * If the inodedep has no dependencies associated with it,
- * then we must free it here and free the file immediately.
- * This case arises when an early allocation fails (for
- * example, the user is over their file quota).
- */
- if (free_inodedep(inodedep) == 0)
+ if (free_inodedep(inodedep) == 0) {
WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list);
- else
- add_to_worklist(&freefile->fx_list);
- FREE_LOCK(&lk);
+ FREE_LOCK(&lk);
+ } else {
+ FREE_LOCK(&lk);
+ handle_workitem_freefile(freefile);
+ }
}
/*
@@ -2318,11 +2337,12 @@ softdep_setup_remove(bp, dp, ip, isrmdir)
if ((dirrem->dm_state & COMPLETE) == 0) {
LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,
dm_next);
+ FREE_LOCK(&lk);
} else {
dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino;
- add_to_worklist(&dirrem->dm_list);
+ FREE_LOCK(&lk);
+ handle_workitem_remove(dirrem);
}
- FREE_LOCK(&lk);
}
/*
@@ -2515,19 +2535,22 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
}
/*
- * Called whenever the link count on an inode is increased.
+ * Called whenever the link count on an inode is changed.
* It creates an inode dependency so that the new reference(s)
* to the inode cannot be committed to disk until the updated
* inode has been written.
*/
void
-softdep_increase_linkcnt(ip)
+softdep_change_linkcnt(ip)
struct inode *ip; /* the inode with the increased link count */
{
struct inodedep *inodedep;
ACQUIRE_LOCK(&lk);
(void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC, &inodedep);
+ if (ip->i_nlink < ip->i_effnlink)
+ panic("softdep_change_linkcnt: bad delta");
+ inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
FREE_LOCK(&lk);
}
@@ -2550,14 +2573,19 @@ handle_workitem_remove(dirrem)
return;
}
ip = VTOI(vp);
+ ACQUIRE_LOCK(&lk);
+ if ((inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, 0, &inodedep)) == 0)
+ panic("handle_workitem_remove: lost inodedep 1");
/*
* Normal file deletion.
*/
if ((dirrem->dm_state & RMDIR) == 0) {
ip->i_nlink--;
+ ip->i_flag |= IN_CHANGE;
if (ip->i_nlink < ip->i_effnlink)
panic("handle_workitem_remove: bad file delta");
- ip->i_flag |= IN_CHANGE;
+ inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
+ FREE_LOCK(&lk);
vput(vp);
num_dirrem -= 1;
WORKITEM_FREE(dirrem, D_DIRREM);
@@ -2571,9 +2599,11 @@ handle_workitem_remove(dirrem)
* the parent decremented to account for the loss of "..".
*/
ip->i_nlink -= 2;
+ ip->i_flag |= IN_CHANGE;
if (ip->i_nlink < ip->i_effnlink)
panic("handle_workitem_remove: bad dir delta");
- ip->i_flag |= IN_CHANGE;
+ inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
+ FREE_LOCK(&lk);
if ((error = UFS_TRUNCATE(vp, (off_t)0, 0, p->p_ucred, p)) != 0)
softdep_error("handle_workitem_remove: truncate", error);
/*
@@ -2587,14 +2617,37 @@ handle_workitem_remove(dirrem)
WORKITEM_FREE(dirrem, D_DIRREM);
return;
}
+ /*
+ * If we still have a bitmap dependency, then the inode has never
+ * been written to disk. Drop the dependency as it is no longer
+ * necessary since the inode is being deallocated. We set the
+ * ALLCOMPLETE flags since the bitmap now properly shows that the
+ * inode is not allocated. Even if the inode is actively being
+ * written, it has been rolled back to its zero'ed state, so we
+ * are ensured that a zero inode is what is on the disk. For short
+ * lived files, this change will usually result in removing all the
+ * depedencies from the inode so that it can be freed immediately.
+ */
ACQUIRE_LOCK(&lk);
- (void) inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, DEPALLOC,
- &inodedep);
+ if ((inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, 0, &inodedep)) == 0)
+ panic("handle_workitem_remove: lost inodedep 2");
+ if ((inodedep->id_state & DEPCOMPLETE) == 0) {
+ inodedep->id_state |= ALLCOMPLETE;
+ LIST_REMOVE(inodedep, id_deps);
+ inodedep->id_buf = NULL;
+ WORKLIST_REMOVE(&inodedep->id_list);
+ }
dirrem->dm_state = 0;
dirrem->dm_oldinum = dirrem->dm_dirinum;
- WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
- FREE_LOCK(&lk);
- vput(vp);
+ if (free_inodedep(inodedep) == 0) {
+ WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
+ FREE_LOCK(&lk);
+ vput(vp);
+ } else {
+ FREE_LOCK(&lk);
+ vput(vp);
+ handle_workitem_remove(dirrem);
+ }
}
/*
@@ -3456,12 +3509,7 @@ softdep_load_inodeblock(ip)
FREE_LOCK(&lk);
return;
}
- if (inodedep->id_nlinkdelta != 0) {
- ip->i_effnlink -= inodedep->id_nlinkdelta;
- ip->i_flag |= IN_MODIFIED;
- inodedep->id_nlinkdelta = 0;
- (void) free_inodedep(inodedep);
- }
+ ip->i_effnlink -= inodedep->id_nlinkdelta;
FREE_LOCK(&lk);
}
@@ -3500,9 +3548,8 @@ softdep_update_inodeblock(ip, bp, waitfor)
FREE_LOCK(&lk);
return;
}
- if (ip->i_nlink < ip->i_effnlink)
+ if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_effnlink)
panic("softdep_update_inodeblock: bad delta");
- inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
/*
* Changes have been initiated. Anything depending on these
* changes cannot occur until this inode has been written.
@@ -4405,6 +4452,87 @@ clear_inodedeps(p)
}
/*
+ * Function to determine if the buffer has outstanding dependencies
+ * that will cause a roll-back if the buffer is written. If wantcount
+ * is set, return number of dependencies, otherwise just yes or no.
+ */
+static int
+softdep_count_dependencies(bp, wantcount)
+ struct buf *bp;
+ int wantcount;
+{
+ struct worklist *wk;
+ struct inodedep *inodedep;
+ struct indirdep *indirdep;
+ struct allocindir *aip;
+ struct pagedep *pagedep;
+ struct diradd *dap;
+ int i, retval;
+
+ retval = 0;
+ ACQUIRE_LOCK(&lk);
+ for (wk = LIST_FIRST(&bp->b_dep); wk; wk = LIST_NEXT(wk, wk_list)) {
+ switch (wk->wk_type) {
+
+ case D_INODEDEP:
+ inodedep = WK_INODEDEP(wk);
+ if ((inodedep->id_state & DEPCOMPLETE) == 0) {
+ /* bitmap allocation dependency */
+ retval += 1;
+ if (!wantcount)
+ goto out;
+ }
+ if (TAILQ_FIRST(&inodedep->id_inoupdt)) {
+ /* direct block pointer dependency */
+ retval += 1;
+ if (!wantcount)
+ goto out;
+ }
+ continue;
+
+ case D_INDIRDEP:
+ indirdep = WK_INDIRDEP(wk);
+ for (aip = LIST_FIRST(&indirdep->ir_deplisthd);
+ aip; aip = LIST_NEXT(aip, ai_next)) {
+ /* indirect block pointer dependency */
+ retval += 1;
+ if (!wantcount)
+ goto out;
+ }
+ continue;
+
+ case D_PAGEDEP:
+ pagedep = WK_PAGEDEP(wk);
+ for (i = 0; i < DAHASHSZ; i++) {
+ for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]);
+ dap; dap = LIST_NEXT(dap, da_pdlist)) {
+ /* directory entry dependency */
+ retval += 1;
+ if (!wantcount)
+ goto out;
+ }
+ }
+ continue;
+
+ case D_BMSAFEMAP:
+ case D_ALLOCDIRECT:
+ case D_ALLOCINDIR:
+ case D_MKDIR:
+ /* never a dependency on these blocks */
+ continue;
+
+ default:
+ panic("softdep_check_for_rollback: Unexpected type %s",
+ TYPENAME(wk->wk_type));
+ /* NOTREACHED */
+ }
+ }
+out:
+ FREE_LOCK(&lk);
+ return retval;
+}
+
+/*
* Acquire exclusive access to a buffer.
* Must be called with splbio blocked.
* Return 1 if buffer was acquired.
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 9d2b5c27f978..f12316ba6a8f 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -68,6 +68,7 @@ static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off,
static void vfs_clean_pages(struct buf * bp);
static void vfs_setdirty(struct buf *bp);
static void vfs_vmio_release(struct buf *bp);
+static void vfs_backgroundwritedone(struct buf *bp);
static int flushbufqueues(void);
static int bd_request;
@@ -349,7 +350,7 @@ bufinit(void)
* buffer cache operation.
*/
maxbufspace = (nbuf + 8) * DFLTBSIZE;
- hibufspace = imax(3 * maxbufspace / 4, maxbufspace - MAXBSIZE * 5);
+ hibufspace = imax(3 * maxbufspace / 4, maxbufspace - MAXBSIZE * 10);
/*
* Limit the amount of malloc memory since it is wired permanently into
* the kernel space. Even though this is accounted for in the buffer
@@ -593,6 +594,7 @@ int
bwrite(struct buf * bp)
{
int oldflags, s;
+ struct buf *newbp;
if (bp->b_flags & B_INVAL) {
brelse(bp);
@@ -606,8 +608,66 @@ bwrite(struct buf * bp)
panic("bwrite: buffer is not busy???");
#endif
s = splbio();
+ /*
+ * If a background write is already in progress, delay
+ * writing this block if it is asynchronous. Otherwise
+ * wait for the background write to complete.
+ */
+ if (bp->b_xflags & BX_BKGRDINPROG) {
+ if (bp->b_flags & B_ASYNC) {
+ splx(s);
+ bdwrite(bp);
+ return (0);
+ }
+ bp->b_xflags |= BX_BKGRDWAIT;
+ tsleep(&bp->b_xflags, PRIBIO, "biord", 0);
+ if (bp->b_xflags & BX_BKGRDINPROG)
+ panic("bwrite: still writing");
+ }
+
+ /* Mark the buffer clean */
bundirty(bp);
+ /*
+ * If this buffer is marked for background writing and we
+ * do not have to wait for it, make a copy and write the
+ * copy so as to leave this buffer ready for further use.
+ */
+ if ((bp->b_xflags & BX_BKGRDWRITE) && (bp->b_flags & B_ASYNC)) {
+ if (bp->b_flags & B_CALL)
+ panic("bwrite: need chained iodone");
+
+ /* get a new block */
+ newbp = geteblk(bp->b_bufsize);
+
+ /* set it to be identical to the old block */
+ memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
+ bgetvp(bp->b_vp, newbp);
+ newbp->b_lblkno = bp->b_lblkno;
+ newbp->b_blkno = bp->b_blkno;
+ newbp->b_offset = bp->b_offset;
+ newbp->b_iodone = vfs_backgroundwritedone;
+ newbp->b_flags |= B_ASYNC | B_CALL;
+ newbp->b_flags &= ~B_INVAL;
+
+ /* move over the dependencies */
+ if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_movedeps)
+ (*bioops.io_movedeps)(bp, newbp);
+
+ /*
+ * Initiate write on the copy, release the original to
+ * the B_LOCKED queue so that it cannot go away until
+ * the background write completes. If not locked it could go
+ * away and then be reconstituted while it was being written.
+ * If the reconstituted buffer were written, we could end up
+ * with two background copies being written at the same time.
+ */
+ bp->b_xflags |= BX_BKGRDINPROG;
+ bp->b_flags |= B_LOCKED;
+ bqrelse(bp);
+ bp = newbp;
+ }
+
bp->b_flags &= ~(B_READ | B_DONE | B_ERROR);
bp->b_flags |= B_WRITEINPROG | B_CACHE;
@@ -630,6 +690,56 @@ bwrite(struct buf * bp)
}
/*
+ * Complete a background write started from bwrite.
+ */
+static void
+vfs_backgroundwritedone(bp)
+ struct buf *bp;
+{
+ struct buf *origbp;
+
+ /*
+ * Find the original buffer that we are writing.
+ */
+ if ((origbp = gbincore(bp->b_vp, bp->b_lblkno)) == NULL)
+ panic("backgroundwritedone: lost buffer");
+ /*
+ * Process dependencies then return any unfinished ones.
+ */
+ if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_complete)
+ (*bioops.io_complete)(bp);
+ if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_movedeps)
+ (*bioops.io_movedeps)(bp, origbp);
+ /*
+ * Clear the BX_BKGRDINPROG flag in the original buffer
+ * and awaken it if it is waiting for the write to complete.
+ */
+ origbp->b_xflags &= ~BX_BKGRDINPROG;
+ if (origbp->b_xflags & BX_BKGRDWAIT) {
+ origbp->b_xflags &= ~BX_BKGRDWAIT;
+ wakeup(&origbp->b_xflags);
+ }
+ /*
+ * Clear the B_LOCKED flag and remove it from the locked
+ * queue if it currently resides there.
+ */
+ origbp->b_flags &= ~B_LOCKED;
+ if (BUF_LOCK(origbp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
+ bremfree(origbp);
+ bqrelse(origbp);
+ }
+ /*
+ * This buffer is marked B_NOCACHE, so when it is released
+ * by biodone, it will be tossed. We mark it with B_READ
+ * to avoid biodone doing a second vwakeup.
+ */
+ bp->b_flags |= B_NOCACHE | B_READ;
+ bp->b_flags &= ~(B_CACHE | B_CALL | B_DONE);
+ bp->b_iodone = 0;
+ biodone(bp);
+}
+
+/*
* Delayed write. (Buffer is marked dirty). Do not bother writing
* anything if the buffer is marked invalid.
*
@@ -757,6 +867,10 @@ bundirty(bp)
--numdirtybuffers;
numdirtywakeup();
}
+ /*
+ * Since it is now being written, we can clear its deferred write flag.
+ */
+ bp->b_flags &= ~B_DEFERRED;
}
/*
@@ -895,12 +1009,16 @@ brelse(struct buf * bp)
*
* Normally we can do this whether a buffer is B_DELWRI or not. If
* the buffer is an NFS buffer, it is tracking piecemeal writes or
- * the commit state and we cannot afford to lose the buffer.
+ * the commit state and we cannot afford to lose the buffer. If the
+ * buffer has a background write in progress, we need to keep it
+ * around to prevent it from being reconstituted and starting a second
+ * background write.
*/
if ((bp->b_flags & B_VMIO)
&& !(bp->b_vp->v_tag == VT_NFS &&
!vn_isdisk(bp->b_vp) &&
- (bp->b_flags & B_DELWRI))
+ (bp->b_flags & B_DELWRI) &&
+ (bp->b_xflags & BX_BKGRDINPROG))
) {
int i, j, resid;
@@ -997,6 +1115,9 @@ brelse(struct buf * bp)
/* buffers with no memory */
if (bp->b_bufsize == 0) {
bp->b_flags |= B_INVAL;
+ bp->b_xflags &= ~BX_BKGRDWRITE;
+ if (bp->b_xflags & BX_BKGRDINPROG)
+ panic("losing buffer 1");
if (bp->b_kvasize) {
bp->b_qindex = QUEUE_EMPTYKVA;
kvawakeup = 1;
@@ -1011,6 +1132,9 @@ brelse(struct buf * bp)
/* buffers with junk contents */
} else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE | B_RELBUF)) {
bp->b_flags |= B_INVAL;
+ bp->b_xflags &= ~BX_BKGRDWRITE;
+ if (bp->b_xflags & BX_BKGRDINPROG)
+ panic("losing buffer 2");
bp->b_qindex = QUEUE_CLEAN;
if (bp->b_kvasize)
kvawakeup = 1;
@@ -1501,6 +1625,8 @@ restart:
}
if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate)
(*bioops.io_deallocate)(bp);
+ if (bp->b_xflags & BX_BKGRDINPROG)
+ panic("losing buffer 3");
LIST_REMOVE(bp, b_hash);
LIST_INSERT_HEAD(&invalhash, bp, b_hash);
@@ -1508,6 +1634,7 @@ restart:
allocbuf(bp, 0);
bp->b_flags = 0;
+ bp->b_xflags = 0;
bp->b_dev = NODEV;
bp->b_vp = NULL;
bp->b_blkno = bp->b_lblkno = 0;
@@ -1761,7 +1888,8 @@ flushbufqueues(void)
while (bp) {
KASSERT((bp->b_flags & B_DELWRI), ("unexpected clean buffer %p", bp));
- if ((bp->b_flags & B_DELWRI) != 0) {
+ if ((bp->b_flags & B_DELWRI) != 0 &&
+ (bp->b_xflags & BX_BKGRDINPROG) == 0) {
if (bp->b_flags & B_INVAL) {
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT) != 0)
panic("flushbufqueues: locked buf");
@@ -1770,13 +1898,24 @@ flushbufqueues(void)
++r;
break;
}
+ if (LIST_FIRST(&bp->b_dep) != NULL &&
+ bioops.io_countdeps &&
+ (bp->b_flags & B_DEFERRED) == 0 &&
+ (*bioops.io_countdeps)(bp, 0)) {
+ TAILQ_REMOVE(&bufqueues[QUEUE_DIRTY],
+ bp, b_freelist);
+ TAILQ_INSERT_TAIL(&bufqueues[QUEUE_DIRTY],
+ bp, b_freelist);
+ bp->b_flags |= B_DEFERRED;
+ continue;
+ }
vfs_bio_awrite(bp);
++r;
break;
}
bp = TAILQ_NEXT(bp, b_freelist);
}
- return(r);
+ return (r);
}
/*
diff --git a/sys/sys/bio.h b/sys/sys/bio.h
index 7168a894e993..f38bf4510138 100644
--- a/sys/sys/bio.h
+++ b/sys/sys/bio.h
@@ -65,6 +65,8 @@ extern struct bio_ops {
void (*io_deallocate) __P((struct buf *));
int (*io_fsync) __P((struct vnode *));
int (*io_sync) __P((struct mount *));
+ void (*io_movedeps) __P((struct buf *, struct buf *));
+ int (*io_countdeps) __P((struct buf *, int));
} bioops;
struct iodone_chain {
@@ -194,7 +196,7 @@ struct buf {
#define B_NEEDCOMMIT 0x00000002 /* Append-write in progress. */
#define B_ASYNC 0x00000004 /* Start I/O, do not wait. */
#define B_UNUSED0 0x00000008 /* Old B_BAD */
-#define B_UNUSED1 0x00000010 /* Old B_BUSY */
+#define B_DEFERRED 0x00000010 /* Skipped over for cleaning */
#define B_CACHE 0x00000020 /* Bread found us in the cache. */
#define B_CALL 0x00000040 /* Call b_iodone from biodone. */
#define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */
@@ -235,6 +237,9 @@ struct buf {
*/
#define BX_VNDIRTY 0x00000001 /* On vnode dirty list */
#define BX_VNCLEAN 0x00000002 /* On vnode clean list */
+#define BX_BKGRDWRITE 0x00000004 /* Do writes in background */
+#define BX_BKGRDINPROG 0x00000008 /* Background write in progress */
+#define BX_BKGRDWAIT 0x00000010 /* Background write waiting */
#define NOOFFSET (-1LL) /* No buffer offset calculated yet */
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index 7168a894e993..f38bf4510138 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -65,6 +65,8 @@ extern struct bio_ops {
void (*io_deallocate) __P((struct buf *));
int (*io_fsync) __P((struct vnode *));
int (*io_sync) __P((struct mount *));
+ void (*io_movedeps) __P((struct buf *, struct buf *));
+ int (*io_countdeps) __P((struct buf *, int));
} bioops;
struct iodone_chain {
@@ -194,7 +196,7 @@ struct buf {
#define B_NEEDCOMMIT 0x00000002 /* Append-write in progress. */
#define B_ASYNC 0x00000004 /* Start I/O, do not wait. */
#define B_UNUSED0 0x00000008 /* Old B_BAD */
-#define B_UNUSED1 0x00000010 /* Old B_BUSY */
+#define B_DEFERRED 0x00000010 /* Skipped over for cleaning */
#define B_CACHE 0x00000020 /* Bread found us in the cache. */
#define B_CALL 0x00000040 /* Call b_iodone from biodone. */
#define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */
@@ -235,6 +237,9 @@ struct buf {
*/
#define BX_VNDIRTY 0x00000001 /* On vnode dirty list */
#define BX_VNCLEAN 0x00000002 /* On vnode clean list */
+#define BX_BKGRDWRITE 0x00000004 /* Do writes in background */
+#define BX_BKGRDINPROG 0x00000008 /* Background write in progress */
+#define BX_BKGRDWAIT 0x00000010 /* Background write waiting */
#define NOOFFSET (-1LL) /* No buffer offset calculated yet */
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index c3e1c3172743..bdd00aa5565e 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -836,6 +836,7 @@ ffs_fragextend(ip, cg, bprev, osize, nsize)
brelse(bp);
return (0);
}
+ bp->b_xflags |= BX_BKGRDWRITE;
cgp->cg_time = time_second;
bno = dtogd(fs, bprev);
for (i = numfrags(fs, osize); i < frags; i++)
@@ -903,6 +904,7 @@ ffs_alloccg(ip, cg, bpref, size)
brelse(bp);
return (0);
}
+ bp->b_xflags |= BX_BKGRDWRITE;
cgp->cg_time = time_second;
if (size == fs->fs_bsize) {
bno = ffs_alloccgblk(ip, bp, bpref);
@@ -1113,6 +1115,7 @@ ffs_clusteralloc(ip, cg, bpref, len)
cgp = (struct cg *)bp->b_data;
if (!cg_chkmagic(cgp))
goto fail;
+ bp->b_xflags |= BX_BKGRDWRITE;
/*
* Check to see if a cluster of the needed size (or bigger) is
* available in this cylinder group.
@@ -1227,6 +1230,7 @@ ffs_nodealloccg(ip, cg, ipref, mode)
brelse(bp);
return (0);
}
+ bp->b_xflags |= BX_BKGRDWRITE;
cgp->cg_time = time_second;
if (ipref) {
ipref %= fs->fs_ipg;
@@ -1322,6 +1326,7 @@ ffs_blkfree(ip, bno, size)
brelse(bp);
return;
}
+ bp->b_xflags |= BX_BKGRDWRITE;
cgp->cg_time = time_second;
bno = dtogd(fs, bno);
if (size == fs->fs_bsize) {
@@ -1419,6 +1424,7 @@ ffs_checkblk(ip, bno, size)
cgp = (struct cg *)bp->b_data;
if (!cg_chkmagic(cgp))
panic("ffs_checkblk: cg magic mismatch");
+ bp->b_xflags |= BX_BKGRDWRITE;
bno = dtogd(fs, bno);
if (size == fs->fs_bsize) {
free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
@@ -1484,6 +1490,7 @@ ffs_vfree( pvp, ino, mode)
brelse(bp);
return (0);
}
+ bp->b_xflags |= BX_BKGRDWRITE;
cgp->cg_time = time_second;
ino %= fs->fs_ipg;
if (isclr(cg_inosused(cgp), ino)) {
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 14e1bb244153..dee1891d7199 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -52,7 +52,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * from: @(#)ffs_softdep.c 9.45 (McKusick) 1/9/00
+ * from: @(#)ffs_softdep.c 9.46 (McKusick) 1/9/00
* $FreeBSD$
*/
@@ -212,6 +212,8 @@ static void softdep_disk_write_complete __P((struct buf *));
static void softdep_deallocate_dependencies __P((struct buf *));
static int softdep_fsync __P((struct vnode *));
static int softdep_process_worklist __P((struct mount *));
+static void softdep_move_dependencies __P((struct buf *, struct buf *));
+static int softdep_count_dependencies __P((struct buf *bp, int));
struct bio_ops bioops = {
softdep_disk_io_initiation, /* io_start */
@@ -219,6 +221,8 @@ struct bio_ops bioops = {
softdep_deallocate_dependencies, /* io_deallocate */
softdep_fsync, /* io_fsync */
softdep_process_worklist, /* io_sync */
+ softdep_move_dependencies, /* io_movedeps */
+ softdep_count_dependencies, /* io_countdeps */
};
/*
@@ -472,7 +476,6 @@ static int stat_dir_entry; /* bufs redirtied as dir entry cannot write */
#ifdef DEBUG
#include <vm/vm.h>
#include <sys/sysctl.h>
-#if defined(__FreeBSD__)
SYSCTL_INT(_debug, OID_AUTO, max_softdeps, CTLFLAG_RW, &max_softdeps, 0, "");
SYSCTL_INT(_debug, OID_AUTO, tickdelay, CTLFLAG_RW, &tickdelay, 0, "");
SYSCTL_INT(_debug, OID_AUTO, blk_limit_push, CTLFLAG_RW, &stat_blk_limit_push, 0,"");
@@ -483,19 +486,6 @@ SYSCTL_INT(_debug, OID_AUTO, indir_blk_ptrs, CTLFLAG_RW, &stat_indir_blk_ptrs, 0
SYSCTL_INT(_debug, OID_AUTO, inode_bitmap, CTLFLAG_RW, &stat_inode_bitmap, 0, "");
SYSCTL_INT(_debug, OID_AUTO, direct_blk_ptrs, CTLFLAG_RW, &stat_direct_blk_ptrs, 0, "");
SYSCTL_INT(_debug, OID_AUTO, dir_entry, CTLFLAG_RW, &stat_dir_entry, 0, "");
-#else /* !__FreeBSD__ */
-struct ctldebug debug20 = { "max_softdeps", &max_softdeps };
-struct ctldebug debug21 = { "tickdelay", &tickdelay };
-struct ctldebug debug23 = { "blk_limit_push", &stat_blk_limit_push };
-struct ctldebug debug24 = { "ino_limit_push", &stat_ino_limit_push };
-struct ctldebug debug25 = { "blk_limit_hit", &stat_blk_limit_hit };
-struct ctldebug debug26 = { "ino_limit_hit", &stat_ino_limit_hit };
-struct ctldebug debug27 = { "indir_blk_ptrs", &stat_indir_blk_ptrs };
-struct ctldebug debug28 = { "inode_bitmap", &stat_inode_bitmap };
-struct ctldebug debug29 = { "direct_blk_ptrs", &stat_direct_blk_ptrs };
-struct ctldebug debug30 = { "dir_entry", &stat_dir_entry };
-#endif /* !__FreeBSD__ */
-
#endif /* DEBUG */
/*
@@ -637,6 +627,31 @@ softdep_process_worklist(matchmnt)
}
/*
+ * Move dependencies from one buffer to another.
+ */
+static void
+softdep_move_dependencies(oldbp, newbp)
+ struct buf *oldbp;
+ struct buf *newbp;
+{
+ struct worklist *wk, *wktail;
+
+ if (LIST_FIRST(&newbp->b_dep) != NULL)
+ panic("softdep_move_dependencies: need merge code");
+ wktail = 0;
+ ACQUIRE_LOCK(&lk);
+ while (wk = LIST_FIRST(&oldbp->b_dep)) {
+ LIST_REMOVE(wk, wk_list);
+ if (wktail == 0)
+ LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list);
+ else
+ LIST_INSERT_AFTER(wktail, wk, wk_list);
+ wktail = wk;
+ }
+ FREE_LOCK(&lk);
+}
+
+/*
* Purge the work list of all items associated with a particular mount point.
*/
int
@@ -1633,11 +1648,6 @@ softdep_setup_freeblocks(ip, length)
if ((inodedep->id_state & IOSTARTED) != 0)
panic("softdep_setup_freeblocks: inode busy");
/*
- * Add the freeblks structure to the list of operations that
- * must await the zero'ed inode being written to disk.
- */
- WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list);
- /*
* Because the file length has been truncated to zero, any
* pending block allocation dependency structures associated
* with this inode are obsolete and can simply be de-allocated.
@@ -1647,6 +1657,16 @@ softdep_setup_freeblocks(ip, length)
merge_inode_lists(inodedep);
while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
free_allocdirect(&inodedep->id_inoupdt, adp, 1);
+ /*
+ * Add the freeblks structure to the list of operations that
+ * must await the zero'ed inode being written to disk. If we
+ * still have a bitmap dependency, then the inode has never been
+ * written to disk, so we can process the freeblks immediately.
+ */
+ if ((inodedep->id_state & DEPCOMPLETE) == 0)
+ handle_workitem_freeblocks(freeblks);
+ else
+ WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list);
FREE_LOCK(&lk);
bdwrite(bp);
/*
@@ -1841,36 +1861,35 @@ softdep_freefile(pvp, ino, mode)
*/
ACQUIRE_LOCK(&lk);
if (inodedep_lookup(ip->i_fs, ino, 0, &inodedep) == 0) {
- add_to_worklist(&freefile->fx_list);
FREE_LOCK(&lk);
+ handle_workitem_freefile(freefile);
return;
}
/*
* If we still have a bitmap dependency, then the inode has never
* been written to disk. Drop the dependency as it is no longer
- * necessary since the inode is being deallocated. We could process
- * the freefile immediately, but then we would have to clear the
- * id_inowait dependencies here and it is easier just to let the
- * zero'ed inode be written and let them be cleaned up in the
- * normal followup actions that follow the inode write.
+ * necessary since the inode is being deallocated. We set the
+ * ALLCOMPLETE flags since the bitmap now properly shows that the
+ * inode is not allocated. Even if the inode is actively being
+ * written, it has been rolled back to its zero'ed state, so we
+ * are ensured that a zero inode is what is on the disk. For short
+ * lived files, this change will usually result in removing all the
+ * depedencies from the inode so that it can be freed immediately.
*/
- if ((inodedep->id_state & DEPCOMPLETE) == 0) {
- inodedep->id_state |= DEPCOMPLETE;
+ if ((inodedep->id_state & DEPCOMPLETE) == 0) {
+ inodedep->id_state |= ALLCOMPLETE;
LIST_REMOVE(inodedep, id_deps);
inodedep->id_buf = NULL;
+ WORKLIST_REMOVE(&inodedep->id_list);
}
- /*
- * If the inodedep has no dependencies associated with it,
- * then we must free it here and free the file immediately.
- * This case arises when an early allocation fails (for
- * example, the user is over their file quota).
- */
- if (free_inodedep(inodedep) == 0)
+ if (free_inodedep(inodedep) == 0) {
WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list);
- else
- add_to_worklist(&freefile->fx_list);
- FREE_LOCK(&lk);
+ FREE_LOCK(&lk);
+ } else {
+ FREE_LOCK(&lk);
+ handle_workitem_freefile(freefile);
+ }
}
/*
@@ -2318,11 +2337,12 @@ softdep_setup_remove(bp, dp, ip, isrmdir)
if ((dirrem->dm_state & COMPLETE) == 0) {
LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,
dm_next);
+ FREE_LOCK(&lk);
} else {
dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino;
- add_to_worklist(&dirrem->dm_list);
+ FREE_LOCK(&lk);
+ handle_workitem_remove(dirrem);
}
- FREE_LOCK(&lk);
}
/*
@@ -2515,19 +2535,22 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
}
/*
- * Called whenever the link count on an inode is increased.
+ * Called whenever the link count on an inode is changed.
* It creates an inode dependency so that the new reference(s)
* to the inode cannot be committed to disk until the updated
* inode has been written.
*/
void
-softdep_increase_linkcnt(ip)
+softdep_change_linkcnt(ip)
struct inode *ip; /* the inode with the increased link count */
{
struct inodedep *inodedep;
ACQUIRE_LOCK(&lk);
(void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC, &inodedep);
+ if (ip->i_nlink < ip->i_effnlink)
+ panic("softdep_change_linkcnt: bad delta");
+ inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
FREE_LOCK(&lk);
}
@@ -2550,14 +2573,19 @@ handle_workitem_remove(dirrem)
return;
}
ip = VTOI(vp);
+ ACQUIRE_LOCK(&lk);
+ if ((inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, 0, &inodedep)) == 0)
+ panic("handle_workitem_remove: lost inodedep 1");
/*
* Normal file deletion.
*/
if ((dirrem->dm_state & RMDIR) == 0) {
ip->i_nlink--;
+ ip->i_flag |= IN_CHANGE;
if (ip->i_nlink < ip->i_effnlink)
panic("handle_workitem_remove: bad file delta");
- ip->i_flag |= IN_CHANGE;
+ inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
+ FREE_LOCK(&lk);
vput(vp);
num_dirrem -= 1;
WORKITEM_FREE(dirrem, D_DIRREM);
@@ -2571,9 +2599,11 @@ handle_workitem_remove(dirrem)
* the parent decremented to account for the loss of "..".
*/
ip->i_nlink -= 2;
+ ip->i_flag |= IN_CHANGE;
if (ip->i_nlink < ip->i_effnlink)
panic("handle_workitem_remove: bad dir delta");
- ip->i_flag |= IN_CHANGE;
+ inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
+ FREE_LOCK(&lk);
if ((error = UFS_TRUNCATE(vp, (off_t)0, 0, p->p_ucred, p)) != 0)
softdep_error("handle_workitem_remove: truncate", error);
/*
@@ -2587,14 +2617,37 @@ handle_workitem_remove(dirrem)
WORKITEM_FREE(dirrem, D_DIRREM);
return;
}
+ /*
+ * If we still have a bitmap dependency, then the inode has never
+ * been written to disk. Drop the dependency as it is no longer
+ * necessary since the inode is being deallocated. We set the
+ * ALLCOMPLETE flags since the bitmap now properly shows that the
+ * inode is not allocated. Even if the inode is actively being
+ * written, it has been rolled back to its zero'ed state, so we
+ * are ensured that a zero inode is what is on the disk. For short
+ * lived files, this change will usually result in removing all the
+ * depedencies from the inode so that it can be freed immediately.
+ */
ACQUIRE_LOCK(&lk);
- (void) inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, DEPALLOC,
- &inodedep);
+ if ((inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, 0, &inodedep)) == 0)
+ panic("handle_workitem_remove: lost inodedep 2");
+ if ((inodedep->id_state & DEPCOMPLETE) == 0) {
+ inodedep->id_state |= ALLCOMPLETE;
+ LIST_REMOVE(inodedep, id_deps);
+ inodedep->id_buf = NULL;
+ WORKLIST_REMOVE(&inodedep->id_list);
+ }
dirrem->dm_state = 0;
dirrem->dm_oldinum = dirrem->dm_dirinum;
- WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
- FREE_LOCK(&lk);
- vput(vp);
+ if (free_inodedep(inodedep) == 0) {
+ WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
+ FREE_LOCK(&lk);
+ vput(vp);
+ } else {
+ FREE_LOCK(&lk);
+ vput(vp);
+ handle_workitem_remove(dirrem);
+ }
}
/*
@@ -3456,12 +3509,7 @@ softdep_load_inodeblock(ip)
FREE_LOCK(&lk);
return;
}
- if (inodedep->id_nlinkdelta != 0) {
- ip->i_effnlink -= inodedep->id_nlinkdelta;
- ip->i_flag |= IN_MODIFIED;
- inodedep->id_nlinkdelta = 0;
- (void) free_inodedep(inodedep);
- }
+ ip->i_effnlink -= inodedep->id_nlinkdelta;
FREE_LOCK(&lk);
}
@@ -3500,9 +3548,8 @@ softdep_update_inodeblock(ip, bp, waitfor)
FREE_LOCK(&lk);
return;
}
- if (ip->i_nlink < ip->i_effnlink)
+ if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_effnlink)
panic("softdep_update_inodeblock: bad delta");
- inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
/*
* Changes have been initiated. Anything depending on these
* changes cannot occur until this inode has been written.
@@ -4405,6 +4452,87 @@ clear_inodedeps(p)
}
/*
+ * Function to determine if the buffer has outstanding dependencies
+ * that will cause a roll-back if the buffer is written. If wantcount
+ * is set, return number of dependencies, otherwise just yes or no.
+ */
+static int
+softdep_count_dependencies(bp, wantcount)
+ struct buf *bp;
+ int wantcount;
+{
+ struct worklist *wk;
+ struct inodedep *inodedep;
+ struct indirdep *indirdep;
+ struct allocindir *aip;
+ struct pagedep *pagedep;
+ struct diradd *dap;
+ int i, retval;
+
+ retval = 0;
+ ACQUIRE_LOCK(&lk);
+ for (wk = LIST_FIRST(&bp->b_dep); wk; wk = LIST_NEXT(wk, wk_list)) {
+ switch (wk->wk_type) {
+
+ case D_INODEDEP:
+ inodedep = WK_INODEDEP(wk);
+ if ((inodedep->id_state & DEPCOMPLETE) == 0) {
+ /* bitmap allocation dependency */
+ retval += 1;
+ if (!wantcount)
+ goto out;
+ }
+ if (TAILQ_FIRST(&inodedep->id_inoupdt)) {
+ /* direct block pointer dependency */
+ retval += 1;
+ if (!wantcount)
+ goto out;
+ }
+ continue;
+
+ case D_INDIRDEP:
+ indirdep = WK_INDIRDEP(wk);
+ for (aip = LIST_FIRST(&indirdep->ir_deplisthd);
+ aip; aip = LIST_NEXT(aip, ai_next)) {
+ /* indirect block pointer dependency */
+ retval += 1;
+ if (!wantcount)
+ goto out;
+ }
+ continue;
+
+ case D_PAGEDEP:
+ pagedep = WK_PAGEDEP(wk);
+ for (i = 0; i < DAHASHSZ; i++) {
+ for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]);
+ dap; dap = LIST_NEXT(dap, da_pdlist)) {
+ /* directory entry dependency */
+ retval += 1;
+ if (!wantcount)
+ goto out;
+ }
+ }
+ continue;
+
+ case D_BMSAFEMAP:
+ case D_ALLOCDIRECT:
+ case D_ALLOCINDIR:
+ case D_MKDIR:
+ /* never a dependency on these blocks */
+ continue;
+
+ default:
+ panic("softdep_check_for_rollback: Unexpected type %s",
+ TYPENAME(wk->wk_type));
+ /* NOTREACHED */
+ }
+ }
+out:
+ FREE_LOCK(&lk);
+ return retval;
+}
+
+/*
* Acquire exclusive access to a buffer.
* Must be called with splbio blocked.
* Return 1 if buffer was acquired.
diff --git a/sys/ufs/ffs/ffs_softdep_stub.c b/sys/ufs/ffs/ffs_softdep_stub.c
index 72f819b23cde..4b8411d32353 100644
--- a/sys/ufs/ffs/ffs_softdep_stub.c
+++ b/sys/ufs/ffs/ffs_softdep_stub.c
@@ -210,11 +210,11 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
}
void
-softdep_increase_linkcnt(ip)
+softdep_change_linkcnt(ip)
struct inode *ip;
{
- panic("softdep_increase_linkcnt called");
+ panic("softdep_change_linkcnt called");
}
void
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index 18fb15396b96..77e821f53a9a 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -671,10 +671,6 @@ ffs_mountfs(devvp, mp, p, malloctype)
bp = NULL;
fs = ump->um_fs;
fs->fs_ronly = ronly;
- if (ronly == 0) {
- fs->fs_fmod = 1;
- fs->fs_clean = 0;
- }
size = fs->fs_cssize;
blks = howmany(size, fs->fs_fsize);
if (fs->fs_contigsumsize > 0)
@@ -747,6 +743,7 @@ ffs_mountfs(devvp, mp, p, malloctype)
free(base, M_UFSMNT);
goto out;
}
+ fs->fs_fmod = 1;
fs->fs_clean = 0;
(void) ffs_sbupdate(ump, MNT_WAIT);
}
@@ -964,9 +961,9 @@ loop:
simple_lock(&vp->v_interlock);
nvp = vp->v_mntvnodes.le_next;
ip = VTOI(vp);
- if ((vp->v_type == VNON) || (((ip->i_flag &
- (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) &&
- (TAILQ_EMPTY(&vp->v_dirtyblkhd) || (waitfor == MNT_LAZY)))) {
+ if (vp->v_type == VNON || ((ip->i_flag &
+ (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
+ TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
simple_unlock(&vp->v_interlock);
continue;
}
@@ -1080,7 +1077,7 @@ restart:
return (error);
}
bzero((caddr_t)ip, sizeof(struct inode));
- lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
+ lockinit(&ip->i_lock, PINOD, "inode", 0, LK_CANRECURSE);
vp->v_data = ip;
ip->i_vnode = vp;
ip->i_fs = fs = ump->um_fs;
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index eb99b2c7c3ae..6087d81dbdd3 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -123,10 +123,11 @@ ffs_fsync(ap)
struct vnode *vp = ap->a_vp;
struct buf *bp;
struct buf *nbp;
- int s, error, passes, skipmeta;
+ int s, error, wait, passes, skipmeta;
daddr_t lbn;
+ wait = (ap->a_waitfor == MNT_WAIT);
if (vn_isdisk(vp)) {
lbn = INT_MAX;
if (vp->v_specmountpoint != NULL &&
@@ -143,7 +144,7 @@ ffs_fsync(ap)
*/
passes = NIADDR + 1;
skipmeta = 0;
- if (ap->a_waitfor == MNT_WAIT)
+ if (wait)
skipmeta = 1;
s = splbio();
loop:
@@ -153,33 +154,43 @@ loop:
for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
nbp = TAILQ_NEXT(bp, b_vnbufs);
/*
- * First time through on a synchronous call,
- * or if it's already scheduled, skip to the next
- * buffer
+ * Reasons to skip this buffer: it has already been considered
+ * on this pass, this pass is the first time through on a
+ * synchronous flush request and the buffer being considered
+ * is metadata, the buffer has dependencies that will cause
+ * it to be redirtied and it has not already been deferred,
+ * or it is already being written.
*/
- if ((bp->b_flags & B_SCANNED) ||
- ((skipmeta == 1) && (bp->b_lblkno < 0)) ||
- BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
+ if ((bp->b_flags & B_SCANNED) != 0)
+ continue;
+ bp->b_flags |= B_SCANNED;
+ if ((skipmeta == 1 && bp->b_lblkno < 0))
+ continue;
+ if (!wait && LIST_FIRST(&bp->b_dep) != NULL &&
+ (bp->b_flags & B_DEFERRED) == 0 &&
+ bioops.io_countdeps && (*bioops.io_countdeps)(bp, 0)) {
+ bp->b_flags |= B_DEFERRED;
+ continue;
+ }
+ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
continue;
if ((bp->b_flags & B_DELWRI) == 0)
panic("ffs_fsync: not dirty");
+ if (vp != bp->b_vp)
+ panic("ffs_fsync: vp != vp->b_vp");
/*
- * If data is outstanding to another vnode, or we were
- * asked to wait for everything, or it's not a file or BDEV,
- * start the IO on this buffer immediatly.
+ * If this is a synchronous flush request, or it is not a
+ * file or device, start the write on this buffer immediatly.
*/
- bp->b_flags |= B_SCANNED;
- if (((bp->b_vp != vp) || (ap->a_waitfor == MNT_WAIT)) ||
- ((vp->v_type != VREG) && (vp->v_type != VBLK))) {
+ if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) {
/*
* On our final pass through, do all I/O synchronously
* so that we can find out if our flush is failing
* because of write errors.
*/
- if (passes > 0 || (ap->a_waitfor != MNT_WAIT)) {
- if ((bp->b_flags & B_CLUSTEROK) &&
- ap->a_waitfor != MNT_WAIT) {
+ if (passes > 0 || !wait) {
+ if ((bp->b_flags & B_CLUSTEROK) && !wait) {
BUF_UNLOCK(bp);
(void) vfs_bio_awrite(bp);
} else {
@@ -224,7 +235,7 @@ loop:
goto loop;
}
- if (ap->a_waitfor == MNT_WAIT) {
+ if (wait) {
while (vp->v_numoutput) {
vp->v_flag |= VBWAIT;
(void) tsleep((caddr_t)&vp->v_numoutput,
@@ -260,5 +271,5 @@ loop:
}
}
splx(s);
- return (UFS_UPDATE(vp, ap->a_waitfor == MNT_WAIT));
+ return (UFS_UPDATE(vp, wait));
}
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
index feec939d5842..d576be977abc 100644
--- a/sys/ufs/ufs/ufs_extern.h
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -102,6 +102,6 @@ void softdep_setup_remove __P((struct buf *,struct inode *, struct inode *,
int));
void softdep_setup_directory_change __P((struct buf *, struct inode *,
struct inode *, long, int));
-void softdep_increase_linkcnt __P((struct inode *));
+void softdep_change_linkcnt __P((struct inode *));
#endif /* !_UFS_UFS_EXTERN_H_ */
diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c
index 77c0151abd49..7a0232d83f03 100644
--- a/sys/ufs/ufs/ufs_lookup.c
+++ b/sys/ufs/ufs/ufs_lookup.c
@@ -899,17 +899,19 @@ ufs_dirremove(dvp, ip, flags, isrmdir)
ep->d_reclen += dp->i_reclen;
}
out:
- if (ip) {
- ip->i_effnlink--;
- ip->i_flag |= IN_CHANGE;
- }
if (DOINGSOFTDEP(dvp)) {
- if (ip)
+ if (ip) {
+ ip->i_effnlink--;
+ softdep_change_linkcnt(ip);
softdep_setup_remove(bp, dp, ip, isrmdir);
+ }
bdwrite(bp);
} else {
- if (ip)
+ if (ip) {
+ ip->i_effnlink--;
ip->i_nlink--;
+ ip->i_flag |= IN_CHANGE;
+ }
if (flags & DOWHITEOUT)
error = VOP_BWRITE(bp->b_vp, bp);
else if (DOINGASYNC(dvp) && dp->i_count != 0) {
@@ -946,12 +948,13 @@ ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir)
if (!OFSFMT(vdp))
ep->d_type = newtype;
oip->i_effnlink--;
- oip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(vdp)) {
+ softdep_change_linkcnt(oip);
softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir);
bdwrite(bp);
} else {
oip->i_nlink--;
+ oip->i_flag |= IN_CHANGE;
if (DOINGASYNC(vdp)) {
bdwrite(bp);
error = 0;
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 9adae8ca5947..9616b3792bd8 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -754,7 +754,7 @@ ufs_link(ap)
ip->i_nlink++;
ip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(vp))
- softdep_increase_linkcnt(ip);
+ softdep_change_linkcnt(ip);
error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp)));
if (!error) {
ufs_makedirentry(ip, cnp, &newdir);
@@ -765,6 +765,8 @@ ufs_link(ap)
ip->i_effnlink--;
ip->i_nlink--;
ip->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(vp))
+ softdep_change_linkcnt(ip);
}
out1:
if (tdvp != vp)
@@ -1014,7 +1016,7 @@ abortit:
ip->i_nlink++;
ip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(fvp))
- softdep_increase_linkcnt(ip);
+ softdep_change_linkcnt(ip);
if ((error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) |
DOINGASYNC(fvp)))) != 0) {
VOP_UNLOCK(fvp, 0, p);
@@ -1079,7 +1081,7 @@ abortit:
dp->i_nlink++;
dp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(tdvp))
- softdep_increase_linkcnt(dp);
+ softdep_change_linkcnt(dp);
error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
DOINGASYNC(tdvp)));
if (error)
@@ -1092,6 +1094,8 @@ abortit:
dp->i_effnlink--;
dp->i_nlink--;
dp->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(tdvp))
+ softdep_change_linkcnt(dp);
(void)UFS_UPDATE(tdvp, 1);
}
goto bad;
@@ -1146,10 +1150,12 @@ abortit:
if (doingdirectory) {
if (!newparent) {
dp->i_effnlink--;
- dp->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(tdvp))
+ softdep_change_linkcnt(dp);
}
xp->i_effnlink--;
- xp->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(tvp))
+ softdep_change_linkcnt(xp);
}
VN_POLLEVENT(tdvp, POLLWRITE);
if (doingdirectory && !DOINGSOFTDEP(tvp)) {
@@ -1164,9 +1170,12 @@ abortit:
* disk, so when running with that code we avoid doing
* them now.
*/
- if (!newparent)
+ if (!newparent) {
dp->i_nlink--;
+ dp->i_flag |= IN_CHANGE;
+ }
xp->i_nlink--;
+ xp->i_flag |= IN_CHANGE;
ioflag = DOINGASYNC(tvp) ? 0 : IO_SYNC;
if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag,
tcnp->cn_cred, tcnp->cn_proc)) != 0)
@@ -1247,6 +1256,8 @@ out:
ip->i_nlink--;
ip->i_flag |= IN_CHANGE;
ip->i_flag &= ~IN_RENAME;
+ if (DOINGSOFTDEP(fvp))
+ softdep_change_linkcnt(ip);
vput(fvp);
} else
vrele(fvp);
@@ -1359,7 +1370,7 @@ ufs_mkdir(ap)
ip->i_effnlink = 2;
ip->i_nlink = 2;
if (DOINGSOFTDEP(tvp))
- softdep_increase_linkcnt(ip);
+ softdep_change_linkcnt(ip);
if (cnp->cn_flags & ISWHITEOUT)
ip->i_flags |= UF_OPAQUE;
@@ -1372,7 +1383,7 @@ ufs_mkdir(ap)
dp->i_nlink++;
dp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(dvp))
- softdep_increase_linkcnt(dp);
+ softdep_change_linkcnt(dp);
error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp)));
if (error)
goto bad;
@@ -1440,6 +1451,8 @@ bad:
dp->i_effnlink--;
dp->i_nlink--;
dp->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(dvp))
+ softdep_change_linkcnt(dp);
/*
* No need to do an explicit VOP_TRUNCATE here, vrele will
* do this for us because we set the link count to 0.
@@ -1447,6 +1460,8 @@ bad:
ip->i_effnlink = 0;
ip->i_nlink = 0;
ip->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(tvp))
+ softdep_change_linkcnt(ip);
vput(tvp);
}
out:
@@ -1505,29 +1520,36 @@ ufs_rmdir(ap)
* inode. If we crash in between, the directory
* will be reattached to lost+found,
*/
+ dp->i_effnlink--;
+ ip->i_effnlink--;
+ if (DOINGSOFTDEP(vp)) {
+ softdep_change_linkcnt(dp);
+ softdep_change_linkcnt(ip);
+ }
error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
- if (error)
+ if (error) {
+ dp->i_effnlink++;
+ ip->i_effnlink++;
+ if (DOINGSOFTDEP(vp)) {
+ softdep_change_linkcnt(dp);
+ softdep_change_linkcnt(ip);
+ }
goto out;
+ }
VN_POLLEVENT(dvp, POLLWRITE|POLLNLINK);
cache_purge(dvp);
/*
* Truncate inode. The only stuff left in the directory is "." and
* "..". The "." reference is inconsequential since we are quashing
- * it. We have removed the "." reference and the reference in the
- * parent directory, but there may be other hard links. So,
- * ufs_dirremove will set the UF_IMMUTABLE flag to ensure that no
- * new entries are made. The soft dependency code will arrange to
- * do these operations after the parent directory entry has been
- * deleted on disk, so when running with that code we avoid doing
- * them now.
+ * it. The soft dependency code will arrange to do these operations
+ * after the parent directory entry has been deleted on disk, so
+ * when running with that code we avoid doing them now.
*/
- dp->i_effnlink--;
- dp->i_flag |= IN_CHANGE;
- ip->i_effnlink--;
- ip->i_flag |= IN_CHANGE;
if (!DOINGSOFTDEP(vp)) {
dp->i_nlink--;
+ dp->i_flag |= IN_CHANGE;
ip->i_nlink--;
+ ip->i_flag |= IN_CHANGE;
ioflag = DOINGASYNC(vp) ? 0 : IO_SYNC;
error = UFS_TRUNCATE(vp, (off_t)0, ioflag, cnp->cn_cred,
cnp->cn_proc);
@@ -2119,7 +2141,7 @@ ufs_makeinode(mode, dvp, vpp, cnp)
ip->i_effnlink = 1;
ip->i_nlink = 1;
if (DOINGSOFTDEP(tvp))
- softdep_increase_linkcnt(ip);
+ softdep_change_linkcnt(ip);
if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
suser_xxx(cnp->cn_cred, 0, 0))
ip->i_mode &= ~ISGID;
@@ -2148,6 +2170,8 @@ bad:
ip->i_effnlink = 0;
ip->i_nlink = 0;
ip->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(tvp))
+ softdep_change_linkcnt(ip);
vput(tvp);
return (error);
}