diff options
| author | Kirk McKusick <mckusick@FreeBSD.org> | 2000-01-10 00:24:24 +0000 |
|---|---|---|
| committer | Kirk McKusick <mckusick@FreeBSD.org> | 2000-01-10 00:24:24 +0000 |
| commit | cf60e8e4bf442e10aeb65803cfcbdb1cd3a875e3 (patch) | |
| tree | 126ab6feda3d7d9183f59410c61d778b2e490d46 | |
| parent | bd5f5da94da66c03392e82dcb9631879023c437e (diff) | |
Notes
| -rw-r--r-- | sys/contrib/softupdates/ffs_softdep.c | 242 | ||||
| -rw-r--r-- | sys/kern/vfs_bio.c | 149 | ||||
| -rw-r--r-- | sys/sys/bio.h | 7 | ||||
| -rw-r--r-- | sys/sys/buf.h | 7 | ||||
| -rw-r--r-- | sys/ufs/ffs/ffs_alloc.c | 7 | ||||
| -rw-r--r-- | sys/ufs/ffs/ffs_softdep.c | 242 | ||||
| -rw-r--r-- | sys/ufs/ffs/ffs_softdep_stub.c | 4 | ||||
| -rw-r--r-- | sys/ufs/ffs/ffs_vfsops.c | 13 | ||||
| -rw-r--r-- | sys/ufs/ffs/ffs_vnops.c | 49 | ||||
| -rw-r--r-- | sys/ufs/ufs/ufs_extern.h | 2 | ||||
| -rw-r--r-- | sys/ufs/ufs/ufs_lookup.c | 17 | ||||
| -rw-r--r-- | sys/ufs/ufs/ufs_vnops.c | 66 |
12 files changed, 626 insertions, 179 deletions
diff --git a/sys/contrib/softupdates/ffs_softdep.c b/sys/contrib/softupdates/ffs_softdep.c index 14e1bb244153..dee1891d7199 100644 --- a/sys/contrib/softupdates/ffs_softdep.c +++ b/sys/contrib/softupdates/ffs_softdep.c @@ -52,7 +52,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * from: @(#)ffs_softdep.c 9.45 (McKusick) 1/9/00 + * from: @(#)ffs_softdep.c 9.46 (McKusick) 1/9/00 * $FreeBSD$ */ @@ -212,6 +212,8 @@ static void softdep_disk_write_complete __P((struct buf *)); static void softdep_deallocate_dependencies __P((struct buf *)); static int softdep_fsync __P((struct vnode *)); static int softdep_process_worklist __P((struct mount *)); +static void softdep_move_dependencies __P((struct buf *, struct buf *)); +static int softdep_count_dependencies __P((struct buf *bp, int)); struct bio_ops bioops = { softdep_disk_io_initiation, /* io_start */ @@ -219,6 +221,8 @@ struct bio_ops bioops = { softdep_deallocate_dependencies, /* io_deallocate */ softdep_fsync, /* io_fsync */ softdep_process_worklist, /* io_sync */ + softdep_move_dependencies, /* io_movedeps */ + softdep_count_dependencies, /* io_countdeps */ }; /* @@ -472,7 +476,6 @@ static int stat_dir_entry; /* bufs redirtied as dir entry cannot write */ #ifdef DEBUG #include <vm/vm.h> #include <sys/sysctl.h> -#if defined(__FreeBSD__) SYSCTL_INT(_debug, OID_AUTO, max_softdeps, CTLFLAG_RW, &max_softdeps, 0, ""); SYSCTL_INT(_debug, OID_AUTO, tickdelay, CTLFLAG_RW, &tickdelay, 0, ""); SYSCTL_INT(_debug, OID_AUTO, blk_limit_push, CTLFLAG_RW, &stat_blk_limit_push, 0,""); @@ -483,19 +486,6 @@ SYSCTL_INT(_debug, OID_AUTO, indir_blk_ptrs, CTLFLAG_RW, &stat_indir_blk_ptrs, 0 SYSCTL_INT(_debug, OID_AUTO, inode_bitmap, CTLFLAG_RW, &stat_inode_bitmap, 0, ""); SYSCTL_INT(_debug, OID_AUTO, direct_blk_ptrs, CTLFLAG_RW, &stat_direct_blk_ptrs, 0, ""); SYSCTL_INT(_debug, OID_AUTO, dir_entry, CTLFLAG_RW, &stat_dir_entry, 0, ""); -#else /* !__FreeBSD__ */ -struct ctldebug debug20 = { "max_softdeps", &max_softdeps }; -struct ctldebug debug21 = { "tickdelay", &tickdelay }; -struct ctldebug debug23 = { "blk_limit_push", &stat_blk_limit_push }; -struct ctldebug debug24 = { "ino_limit_push", &stat_ino_limit_push }; -struct ctldebug debug25 = { "blk_limit_hit", &stat_blk_limit_hit }; -struct ctldebug debug26 = { "ino_limit_hit", &stat_ino_limit_hit }; -struct ctldebug debug27 = { "indir_blk_ptrs", &stat_indir_blk_ptrs }; -struct ctldebug debug28 = { "inode_bitmap", &stat_inode_bitmap }; -struct ctldebug debug29 = { "direct_blk_ptrs", &stat_direct_blk_ptrs }; -struct ctldebug debug30 = { "dir_entry", &stat_dir_entry }; -#endif /* !__FreeBSD__ */ - #endif /* DEBUG */ /* @@ -637,6 +627,31 @@ softdep_process_worklist(matchmnt) } /* + * Move dependencies from one buffer to another. + */ +static void +softdep_move_dependencies(oldbp, newbp) + struct buf *oldbp; + struct buf *newbp; +{ + struct worklist *wk, *wktail; + + if (LIST_FIRST(&newbp->b_dep) != NULL) + panic("softdep_move_dependencies: need merge code"); + wktail = 0; + ACQUIRE_LOCK(&lk); + while (wk = LIST_FIRST(&oldbp->b_dep)) { + LIST_REMOVE(wk, wk_list); + if (wktail == 0) + LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list); + else + LIST_INSERT_AFTER(wktail, wk, wk_list); + wktail = wk; + } + FREE_LOCK(&lk); +} + +/* * Purge the work list of all items associated with a particular mount point. */ int @@ -1633,11 +1648,6 @@ softdep_setup_freeblocks(ip, length) if ((inodedep->id_state & IOSTARTED) != 0) panic("softdep_setup_freeblocks: inode busy"); /* - * Add the freeblks structure to the list of operations that - * must await the zero'ed inode being written to disk. - */ - WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list); - /* * Because the file length has been truncated to zero, any * pending block allocation dependency structures associated * with this inode are obsolete and can simply be de-allocated. @@ -1647,6 +1657,16 @@ softdep_setup_freeblocks(ip, length) merge_inode_lists(inodedep); while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0) free_allocdirect(&inodedep->id_inoupdt, adp, 1); + /* + * Add the freeblks structure to the list of operations that + * must await the zero'ed inode being written to disk. If we + * still have a bitmap dependency, then the inode has never been + * written to disk, so we can process the freeblks immediately. + */ + if ((inodedep->id_state & DEPCOMPLETE) == 0) + handle_workitem_freeblocks(freeblks); + else + WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list); FREE_LOCK(&lk); bdwrite(bp); /* @@ -1841,36 +1861,35 @@ softdep_freefile(pvp, ino, mode) */ ACQUIRE_LOCK(&lk); if (inodedep_lookup(ip->i_fs, ino, 0, &inodedep) == 0) { - add_to_worklist(&freefile->fx_list); FREE_LOCK(&lk); + handle_workitem_freefile(freefile); return; } /* * If we still have a bitmap dependency, then the inode has never * been written to disk. Drop the dependency as it is no longer - * necessary since the inode is being deallocated. We could process - * the freefile immediately, but then we would have to clear the - * id_inowait dependencies here and it is easier just to let the - * zero'ed inode be written and let them be cleaned up in the - * normal followup actions that follow the inode write. + * necessary since the inode is being deallocated. We set the + * ALLCOMPLETE flags since the bitmap now properly shows that the + * inode is not allocated. Even if the inode is actively being + * written, it has been rolled back to its zero'ed state, so we + * are ensured that a zero inode is what is on the disk. For short + * lived files, this change will usually result in removing all the + * depedencies from the inode so that it can be freed immediately. */ - if ((inodedep->id_state & DEPCOMPLETE) == 0) { - inodedep->id_state |= DEPCOMPLETE; + if ((inodedep->id_state & DEPCOMPLETE) == 0) { + inodedep->id_state |= ALLCOMPLETE; LIST_REMOVE(inodedep, id_deps); inodedep->id_buf = NULL; + WORKLIST_REMOVE(&inodedep->id_list); } - /* - * If the inodedep has no dependencies associated with it, - * then we must free it here and free the file immediately. - * This case arises when an early allocation fails (for - * example, the user is over their file quota). - */ - if (free_inodedep(inodedep) == 0) + if (free_inodedep(inodedep) == 0) { WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list); - else - add_to_worklist(&freefile->fx_list); - FREE_LOCK(&lk); + FREE_LOCK(&lk); + } else { + FREE_LOCK(&lk); + handle_workitem_freefile(freefile); + } } /* @@ -2318,11 +2337,12 @@ softdep_setup_remove(bp, dp, ip, isrmdir) if ((dirrem->dm_state & COMPLETE) == 0) { LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem, dm_next); + FREE_LOCK(&lk); } else { dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino; - add_to_worklist(&dirrem->dm_list); + FREE_LOCK(&lk); + handle_workitem_remove(dirrem); } - FREE_LOCK(&lk); } /* @@ -2515,19 +2535,22 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir) } /* - * Called whenever the link count on an inode is increased. + * Called whenever the link count on an inode is changed. * It creates an inode dependency so that the new reference(s) * to the inode cannot be committed to disk until the updated * inode has been written. */ void -softdep_increase_linkcnt(ip) +softdep_change_linkcnt(ip) struct inode *ip; /* the inode with the increased link count */ { struct inodedep *inodedep; ACQUIRE_LOCK(&lk); (void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC, &inodedep); + if (ip->i_nlink < ip->i_effnlink) + panic("softdep_change_linkcnt: bad delta"); + inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink; FREE_LOCK(&lk); } @@ -2550,14 +2573,19 @@ handle_workitem_remove(dirrem) return; } ip = VTOI(vp); + ACQUIRE_LOCK(&lk); + if ((inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, 0, &inodedep)) == 0) + panic("handle_workitem_remove: lost inodedep 1"); /* * Normal file deletion. */ if ((dirrem->dm_state & RMDIR) == 0) { ip->i_nlink--; + ip->i_flag |= IN_CHANGE; if (ip->i_nlink < ip->i_effnlink) panic("handle_workitem_remove: bad file delta"); - ip->i_flag |= IN_CHANGE; + inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink; + FREE_LOCK(&lk); vput(vp); num_dirrem -= 1; WORKITEM_FREE(dirrem, D_DIRREM); @@ -2571,9 +2599,11 @@ handle_workitem_remove(dirrem) * the parent decremented to account for the loss of "..". */ ip->i_nlink -= 2; + ip->i_flag |= IN_CHANGE; if (ip->i_nlink < ip->i_effnlink) panic("handle_workitem_remove: bad dir delta"); - ip->i_flag |= IN_CHANGE; + inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink; + FREE_LOCK(&lk); if ((error = UFS_TRUNCATE(vp, (off_t)0, 0, p->p_ucred, p)) != 0) softdep_error("handle_workitem_remove: truncate", error); /* @@ -2587,14 +2617,37 @@ handle_workitem_remove(dirrem) WORKITEM_FREE(dirrem, D_DIRREM); return; } + /* + * If we still have a bitmap dependency, then the inode has never + * been written to disk. Drop the dependency as it is no longer + * necessary since the inode is being deallocated. We set the + * ALLCOMPLETE flags since the bitmap now properly shows that the + * inode is not allocated. Even if the inode is actively being + * written, it has been rolled back to its zero'ed state, so we + * are ensured that a zero inode is what is on the disk. For short + * lived files, this change will usually result in removing all the + * depedencies from the inode so that it can be freed immediately. + */ ACQUIRE_LOCK(&lk); - (void) inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, DEPALLOC, - &inodedep); + if ((inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, 0, &inodedep)) == 0) + panic("handle_workitem_remove: lost inodedep 2"); + if ((inodedep->id_state & DEPCOMPLETE) == 0) { + inodedep->id_state |= ALLCOMPLETE; + LIST_REMOVE(inodedep, id_deps); + inodedep->id_buf = NULL; + WORKLIST_REMOVE(&inodedep->id_list); + } dirrem->dm_state = 0; dirrem->dm_oldinum = dirrem->dm_dirinum; - WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list); - FREE_LOCK(&lk); - vput(vp); + if (free_inodedep(inodedep) == 0) { + WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list); + FREE_LOCK(&lk); + vput(vp); + } else { + FREE_LOCK(&lk); + vput(vp); + handle_workitem_remove(dirrem); + } } /* @@ -3456,12 +3509,7 @@ softdep_load_inodeblock(ip) FREE_LOCK(&lk); return; } - if (inodedep->id_nlinkdelta != 0) { - ip->i_effnlink -= inodedep->id_nlinkdelta; - ip->i_flag |= IN_MODIFIED; - inodedep->id_nlinkdelta = 0; - (void) free_inodedep(inodedep); - } + ip->i_effnlink -= inodedep->id_nlinkdelta; FREE_LOCK(&lk); } @@ -3500,9 +3548,8 @@ softdep_update_inodeblock(ip, bp, waitfor) FREE_LOCK(&lk); return; } - if (ip->i_nlink < ip->i_effnlink) + if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_effnlink) panic("softdep_update_inodeblock: bad delta"); - inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink; /* * Changes have been initiated. Anything depending on these * changes cannot occur until this inode has been written. @@ -4405,6 +4452,87 @@ clear_inodedeps(p) } /* + * Function to determine if the buffer has outstanding dependencies + * that will cause a roll-back if the buffer is written. If wantcount + * is set, return number of dependencies, otherwise just yes or no. + */ +static int +softdep_count_dependencies(bp, wantcount) + struct buf *bp; + int wantcount; +{ + struct worklist *wk; + struct inodedep *inodedep; + struct indirdep *indirdep; + struct allocindir *aip; + struct pagedep *pagedep; + struct diradd *dap; + int i, retval; + + retval = 0; + ACQUIRE_LOCK(&lk); + for (wk = LIST_FIRST(&bp->b_dep); wk; wk = LIST_NEXT(wk, wk_list)) { + switch (wk->wk_type) { + + case D_INODEDEP: + inodedep = WK_INODEDEP(wk); + if ((inodedep->id_state & DEPCOMPLETE) == 0) { + /* bitmap allocation dependency */ + retval += 1; + if (!wantcount) + goto out; + } + if (TAILQ_FIRST(&inodedep->id_inoupdt)) { + /* direct block pointer dependency */ + retval += 1; + if (!wantcount) + goto out; + } + continue; + + case D_INDIRDEP: + indirdep = WK_INDIRDEP(wk); + for (aip = LIST_FIRST(&indirdep->ir_deplisthd); + aip; aip = LIST_NEXT(aip, ai_next)) { + /* indirect block pointer dependency */ + retval += 1; + if (!wantcount) + goto out; + } + continue; + + case D_PAGEDEP: + pagedep = WK_PAGEDEP(wk); + for (i = 0; i < DAHASHSZ; i++) { + for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]); + dap; dap = LIST_NEXT(dap, da_pdlist)) { + /* directory entry dependency */ + retval += 1; + if (!wantcount) + goto out; + } + } + continue; + + case D_BMSAFEMAP: + case D_ALLOCDIRECT: + case D_ALLOCINDIR: + case D_MKDIR: + /* never a dependency on these blocks */ + continue; + + default: + panic("softdep_check_for_rollback: Unexpected type %s", + TYPENAME(wk->wk_type)); + /* NOTREACHED */ + } + } +out: + FREE_LOCK(&lk); + return retval; +} + +/* * Acquire exclusive access to a buffer. * Must be called with splbio blocked. * Return 1 if buffer was acquired. diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 9d2b5c27f978..f12316ba6a8f 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -68,6 +68,7 @@ static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, static void vfs_clean_pages(struct buf * bp); static void vfs_setdirty(struct buf *bp); static void vfs_vmio_release(struct buf *bp); +static void vfs_backgroundwritedone(struct buf *bp); static int flushbufqueues(void); static int bd_request; @@ -349,7 +350,7 @@ bufinit(void) * buffer cache operation. */ maxbufspace = (nbuf + 8) * DFLTBSIZE; - hibufspace = imax(3 * maxbufspace / 4, maxbufspace - MAXBSIZE * 5); + hibufspace = imax(3 * maxbufspace / 4, maxbufspace - MAXBSIZE * 10); /* * Limit the amount of malloc memory since it is wired permanently into * the kernel space. Even though this is accounted for in the buffer @@ -593,6 +594,7 @@ int bwrite(struct buf * bp) { int oldflags, s; + struct buf *newbp; if (bp->b_flags & B_INVAL) { brelse(bp); @@ -606,8 +608,66 @@ bwrite(struct buf * bp) panic("bwrite: buffer is not busy???"); #endif s = splbio(); + /* + * If a background write is already in progress, delay + * writing this block if it is asynchronous. Otherwise + * wait for the background write to complete. + */ + if (bp->b_xflags & BX_BKGRDINPROG) { + if (bp->b_flags & B_ASYNC) { + splx(s); + bdwrite(bp); + return (0); + } + bp->b_xflags |= BX_BKGRDWAIT; + tsleep(&bp->b_xflags, PRIBIO, "biord", 0); + if (bp->b_xflags & BX_BKGRDINPROG) + panic("bwrite: still writing"); + } + + /* Mark the buffer clean */ bundirty(bp); + /* + * If this buffer is marked for background writing and we + * do not have to wait for it, make a copy and write the + * copy so as to leave this buffer ready for further use. + */ + if ((bp->b_xflags & BX_BKGRDWRITE) && (bp->b_flags & B_ASYNC)) { + if (bp->b_flags & B_CALL) + panic("bwrite: need chained iodone"); + + /* get a new block */ + newbp = geteblk(bp->b_bufsize); + + /* set it to be identical to the old block */ + memcpy(newbp->b_data, bp->b_data, bp->b_bufsize); + bgetvp(bp->b_vp, newbp); + newbp->b_lblkno = bp->b_lblkno; + newbp->b_blkno = bp->b_blkno; + newbp->b_offset = bp->b_offset; + newbp->b_iodone = vfs_backgroundwritedone; + newbp->b_flags |= B_ASYNC | B_CALL; + newbp->b_flags &= ~B_INVAL; + + /* move over the dependencies */ + if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_movedeps) + (*bioops.io_movedeps)(bp, newbp); + + /* + * Initiate write on the copy, release the original to + * the B_LOCKED queue so that it cannot go away until + * the background write completes. If not locked it could go + * away and then be reconstituted while it was being written. + * If the reconstituted buffer were written, we could end up + * with two background copies being written at the same time. + */ + bp->b_xflags |= BX_BKGRDINPROG; + bp->b_flags |= B_LOCKED; + bqrelse(bp); + bp = newbp; + } + bp->b_flags &= ~(B_READ | B_DONE | B_ERROR); bp->b_flags |= B_WRITEINPROG | B_CACHE; @@ -630,6 +690,56 @@ bwrite(struct buf * bp) } /* + * Complete a background write started from bwrite. + */ +static void +vfs_backgroundwritedone(bp) + struct buf *bp; +{ + struct buf *origbp; + + /* + * Find the original buffer that we are writing. + */ + if ((origbp = gbincore(bp->b_vp, bp->b_lblkno)) == NULL) + panic("backgroundwritedone: lost buffer"); + /* + * Process dependencies then return any unfinished ones. + */ + if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_complete) + (*bioops.io_complete)(bp); + if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_movedeps) + (*bioops.io_movedeps)(bp, origbp); + /* + * Clear the BX_BKGRDINPROG flag in the original buffer + * and awaken it if it is waiting for the write to complete. + */ + origbp->b_xflags &= ~BX_BKGRDINPROG; + if (origbp->b_xflags & BX_BKGRDWAIT) { + origbp->b_xflags &= ~BX_BKGRDWAIT; + wakeup(&origbp->b_xflags); + } + /* + * Clear the B_LOCKED flag and remove it from the locked + * queue if it currently resides there. + */ + origbp->b_flags &= ~B_LOCKED; + if (BUF_LOCK(origbp, LK_EXCLUSIVE | LK_NOWAIT) == 0) { + bremfree(origbp); + bqrelse(origbp); + } + /* + * This buffer is marked B_NOCACHE, so when it is released + * by biodone, it will be tossed. We mark it with B_READ + * to avoid biodone doing a second vwakeup. + */ + bp->b_flags |= B_NOCACHE | B_READ; + bp->b_flags &= ~(B_CACHE | B_CALL | B_DONE); + bp->b_iodone = 0; + biodone(bp); +} + +/* * Delayed write. (Buffer is marked dirty). Do not bother writing * anything if the buffer is marked invalid. * @@ -757,6 +867,10 @@ bundirty(bp) --numdirtybuffers; numdirtywakeup(); } + /* + * Since it is now being written, we can clear its deferred write flag. + */ + bp->b_flags &= ~B_DEFERRED; } /* @@ -895,12 +1009,16 @@ brelse(struct buf * bp) * * Normally we can do this whether a buffer is B_DELWRI or not. If * the buffer is an NFS buffer, it is tracking piecemeal writes or - * the commit state and we cannot afford to lose the buffer. + * the commit state and we cannot afford to lose the buffer. If the + * buffer has a background write in progress, we need to keep it + * around to prevent it from being reconstituted and starting a second + * background write. */ if ((bp->b_flags & B_VMIO) && !(bp->b_vp->v_tag == VT_NFS && !vn_isdisk(bp->b_vp) && - (bp->b_flags & B_DELWRI)) + (bp->b_flags & B_DELWRI) && + (bp->b_xflags & BX_BKGRDINPROG)) ) { int i, j, resid; @@ -997,6 +1115,9 @@ brelse(struct buf * bp) /* buffers with no memory */ if (bp->b_bufsize == 0) { bp->b_flags |= B_INVAL; + bp->b_xflags &= ~BX_BKGRDWRITE; + if (bp->b_xflags & BX_BKGRDINPROG) + panic("losing buffer 1"); if (bp->b_kvasize) { bp->b_qindex = QUEUE_EMPTYKVA; kvawakeup = 1; @@ -1011,6 +1132,9 @@ brelse(struct buf * bp) /* buffers with junk contents */ } else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE | B_RELBUF)) { bp->b_flags |= B_INVAL; + bp->b_xflags &= ~BX_BKGRDWRITE; + if (bp->b_xflags & BX_BKGRDINPROG) + panic("losing buffer 2"); bp->b_qindex = QUEUE_CLEAN; if (bp->b_kvasize) kvawakeup = 1; @@ -1501,6 +1625,8 @@ restart: } if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate) (*bioops.io_deallocate)(bp); + if (bp->b_xflags & BX_BKGRDINPROG) + panic("losing buffer 3"); LIST_REMOVE(bp, b_hash); LIST_INSERT_HEAD(&invalhash, bp, b_hash); @@ -1508,6 +1634,7 @@ restart: allocbuf(bp, 0); bp->b_flags = 0; + bp->b_xflags = 0; bp->b_dev = NODEV; bp->b_vp = NULL; bp->b_blkno = bp->b_lblkno = 0; @@ -1761,7 +1888,8 @@ flushbufqueues(void) while (bp) { KASSERT((bp->b_flags & B_DELWRI), ("unexpected clean buffer %p", bp)); - if ((bp->b_flags & B_DELWRI) != 0) { + if ((bp->b_flags & B_DELWRI) != 0 && + (bp->b_xflags & BX_BKGRDINPROG) == 0) { if (bp->b_flags & B_INVAL) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT) != 0) panic("flushbufqueues: locked buf"); @@ -1770,13 +1898,24 @@ flushbufqueues(void) ++r; break; } + if (LIST_FIRST(&bp->b_dep) != NULL && + bioops.io_countdeps && + (bp->b_flags & B_DEFERRED) == 0 && + (*bioops.io_countdeps)(bp, 0)) { + TAILQ_REMOVE(&bufqueues[QUEUE_DIRTY], + bp, b_freelist); + TAILQ_INSERT_TAIL(&bufqueues[QUEUE_DIRTY], + bp, b_freelist); + bp->b_flags |= B_DEFERRED; + continue; + } vfs_bio_awrite(bp); ++r; break; } bp = TAILQ_NEXT(bp, b_freelist); } - return(r); + return (r); } /* diff --git a/sys/sys/bio.h b/sys/sys/bio.h index 7168a894e993..f38bf4510138 100644 --- a/sys/sys/bio.h +++ b/sys/sys/bio.h @@ -65,6 +65,8 @@ extern struct bio_ops { void (*io_deallocate) __P((struct buf *)); int (*io_fsync) __P((struct vnode *)); int (*io_sync) __P((struct mount *)); + void (*io_movedeps) __P((struct buf *, struct buf *)); + int (*io_countdeps) __P((struct buf *, int)); } bioops; struct iodone_chain { @@ -194,7 +196,7 @@ struct buf { #define B_NEEDCOMMIT 0x00000002 /* Append-write in progress. */ #define B_ASYNC 0x00000004 /* Start I/O, do not wait. */ #define B_UNUSED0 0x00000008 /* Old B_BAD */ -#define B_UNUSED1 0x00000010 /* Old B_BUSY */ +#define B_DEFERRED 0x00000010 /* Skipped over for cleaning */ #define B_CACHE 0x00000020 /* Bread found us in the cache. */ #define B_CALL 0x00000040 /* Call b_iodone from biodone. */ #define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */ @@ -235,6 +237,9 @@ struct buf { */ #define BX_VNDIRTY 0x00000001 /* On vnode dirty list */ #define BX_VNCLEAN 0x00000002 /* On vnode clean list */ +#define BX_BKGRDWRITE 0x00000004 /* Do writes in background */ +#define BX_BKGRDINPROG 0x00000008 /* Background write in progress */ +#define BX_BKGRDWAIT 0x00000010 /* Background write waiting */ #define NOOFFSET (-1LL) /* No buffer offset calculated yet */ diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 7168a894e993..f38bf4510138 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -65,6 +65,8 @@ extern struct bio_ops { void (*io_deallocate) __P((struct buf *)); int (*io_fsync) __P((struct vnode *)); int (*io_sync) __P((struct mount *)); + void (*io_movedeps) __P((struct buf *, struct buf *)); + int (*io_countdeps) __P((struct buf *, int)); } bioops; struct iodone_chain { @@ -194,7 +196,7 @@ struct buf { #define B_NEEDCOMMIT 0x00000002 /* Append-write in progress. */ #define B_ASYNC 0x00000004 /* Start I/O, do not wait. */ #define B_UNUSED0 0x00000008 /* Old B_BAD */ -#define B_UNUSED1 0x00000010 /* Old B_BUSY */ +#define B_DEFERRED 0x00000010 /* Skipped over for cleaning */ #define B_CACHE 0x00000020 /* Bread found us in the cache. */ #define B_CALL 0x00000040 /* Call b_iodone from biodone. */ #define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */ @@ -235,6 +237,9 @@ struct buf { */ #define BX_VNDIRTY 0x00000001 /* On vnode dirty list */ #define BX_VNCLEAN 0x00000002 /* On vnode clean list */ +#define BX_BKGRDWRITE 0x00000004 /* Do writes in background */ +#define BX_BKGRDINPROG 0x00000008 /* Background write in progress */ +#define BX_BKGRDWAIT 0x00000010 /* Background write waiting */ #define NOOFFSET (-1LL) /* No buffer offset calculated yet */ diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index c3e1c3172743..bdd00aa5565e 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -836,6 +836,7 @@ ffs_fragextend(ip, cg, bprev, osize, nsize) brelse(bp); return (0); } + bp->b_xflags |= BX_BKGRDWRITE; cgp->cg_time = time_second; bno = dtogd(fs, bprev); for (i = numfrags(fs, osize); i < frags; i++) @@ -903,6 +904,7 @@ ffs_alloccg(ip, cg, bpref, size) brelse(bp); return (0); } + bp->b_xflags |= BX_BKGRDWRITE; cgp->cg_time = time_second; if (size == fs->fs_bsize) { bno = ffs_alloccgblk(ip, bp, bpref); @@ -1113,6 +1115,7 @@ ffs_clusteralloc(ip, cg, bpref, len) cgp = (struct cg *)bp->b_data; if (!cg_chkmagic(cgp)) goto fail; + bp->b_xflags |= BX_BKGRDWRITE; /* * Check to see if a cluster of the needed size (or bigger) is * available in this cylinder group. @@ -1227,6 +1230,7 @@ ffs_nodealloccg(ip, cg, ipref, mode) brelse(bp); return (0); } + bp->b_xflags |= BX_BKGRDWRITE; cgp->cg_time = time_second; if (ipref) { ipref %= fs->fs_ipg; @@ -1322,6 +1326,7 @@ ffs_blkfree(ip, bno, size) brelse(bp); return; } + bp->b_xflags |= BX_BKGRDWRITE; cgp->cg_time = time_second; bno = dtogd(fs, bno); if (size == fs->fs_bsize) { @@ -1419,6 +1424,7 @@ ffs_checkblk(ip, bno, size) cgp = (struct cg *)bp->b_data; if (!cg_chkmagic(cgp)) panic("ffs_checkblk: cg magic mismatch"); + bp->b_xflags |= BX_BKGRDWRITE; bno = dtogd(fs, bno); if (size == fs->fs_bsize) { free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno)); @@ -1484,6 +1490,7 @@ ffs_vfree( pvp, ino, mode) brelse(bp); return (0); } + bp->b_xflags |= BX_BKGRDWRITE; cgp->cg_time = time_second; ino %= fs->fs_ipg; if (isclr(cg_inosused(cgp), ino)) { diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index 14e1bb244153..dee1891d7199 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -52,7 +52,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * from: @(#)ffs_softdep.c 9.45 (McKusick) 1/9/00 + * from: @(#)ffs_softdep.c 9.46 (McKusick) 1/9/00 * $FreeBSD$ */ @@ -212,6 +212,8 @@ static void softdep_disk_write_complete __P((struct buf *)); static void softdep_deallocate_dependencies __P((struct buf *)); static int softdep_fsync __P((struct vnode *)); static int softdep_process_worklist __P((struct mount *)); +static void softdep_move_dependencies __P((struct buf *, struct buf *)); +static int softdep_count_dependencies __P((struct buf *bp, int)); struct bio_ops bioops = { softdep_disk_io_initiation, /* io_start */ @@ -219,6 +221,8 @@ struct bio_ops bioops = { softdep_deallocate_dependencies, /* io_deallocate */ softdep_fsync, /* io_fsync */ softdep_process_worklist, /* io_sync */ + softdep_move_dependencies, /* io_movedeps */ + softdep_count_dependencies, /* io_countdeps */ }; /* @@ -472,7 +476,6 @@ static int stat_dir_entry; /* bufs redirtied as dir entry cannot write */ #ifdef DEBUG #include <vm/vm.h> #include <sys/sysctl.h> -#if defined(__FreeBSD__) SYSCTL_INT(_debug, OID_AUTO, max_softdeps, CTLFLAG_RW, &max_softdeps, 0, ""); SYSCTL_INT(_debug, OID_AUTO, tickdelay, CTLFLAG_RW, &tickdelay, 0, ""); SYSCTL_INT(_debug, OID_AUTO, blk_limit_push, CTLFLAG_RW, &stat_blk_limit_push, 0,""); @@ -483,19 +486,6 @@ SYSCTL_INT(_debug, OID_AUTO, indir_blk_ptrs, CTLFLAG_RW, &stat_indir_blk_ptrs, 0 SYSCTL_INT(_debug, OID_AUTO, inode_bitmap, CTLFLAG_RW, &stat_inode_bitmap, 0, ""); SYSCTL_INT(_debug, OID_AUTO, direct_blk_ptrs, CTLFLAG_RW, &stat_direct_blk_ptrs, 0, ""); SYSCTL_INT(_debug, OID_AUTO, dir_entry, CTLFLAG_RW, &stat_dir_entry, 0, ""); -#else /* !__FreeBSD__ */ -struct ctldebug debug20 = { "max_softdeps", &max_softdeps }; -struct ctldebug debug21 = { "tickdelay", &tickdelay }; -struct ctldebug debug23 = { "blk_limit_push", &stat_blk_limit_push }; -struct ctldebug debug24 = { "ino_limit_push", &stat_ino_limit_push }; -struct ctldebug debug25 = { "blk_limit_hit", &stat_blk_limit_hit }; -struct ctldebug debug26 = { "ino_limit_hit", &stat_ino_limit_hit }; -struct ctldebug debug27 = { "indir_blk_ptrs", &stat_indir_blk_ptrs }; -struct ctldebug debug28 = { "inode_bitmap", &stat_inode_bitmap }; -struct ctldebug debug29 = { "direct_blk_ptrs", &stat_direct_blk_ptrs }; -struct ctldebug debug30 = { "dir_entry", &stat_dir_entry }; -#endif /* !__FreeBSD__ */ - #endif /* DEBUG */ /* @@ -637,6 +627,31 @@ softdep_process_worklist(matchmnt) } /* + * Move dependencies from one buffer to another. + */ +static void +softdep_move_dependencies(oldbp, newbp) + struct buf *oldbp; + struct buf *newbp; +{ + struct worklist *wk, *wktail; + + if (LIST_FIRST(&newbp->b_dep) != NULL) + panic("softdep_move_dependencies: need merge code"); + wktail = 0; + ACQUIRE_LOCK(&lk); + while (wk = LIST_FIRST(&oldbp->b_dep)) { + LIST_REMOVE(wk, wk_list); + if (wktail == 0) + LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list); + else + LIST_INSERT_AFTER(wktail, wk, wk_list); + wktail = wk; + } + FREE_LOCK(&lk); +} + +/* * Purge the work list of all items associated with a particular mount point. */ int @@ -1633,11 +1648,6 @@ softdep_setup_freeblocks(ip, length) if ((inodedep->id_state & IOSTARTED) != 0) panic("softdep_setup_freeblocks: inode busy"); /* - * Add the freeblks structure to the list of operations that - * must await the zero'ed inode being written to disk. - */ - WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list); - /* * Because the file length has been truncated to zero, any * pending block allocation dependency structures associated * with this inode are obsolete and can simply be de-allocated. @@ -1647,6 +1657,16 @@ softdep_setup_freeblocks(ip, length) merge_inode_lists(inodedep); while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0) free_allocdirect(&inodedep->id_inoupdt, adp, 1); + /* + * Add the freeblks structure to the list of operations that + * must await the zero'ed inode being written to disk. If we + * still have a bitmap dependency, then the inode has never been + * written to disk, so we can process the freeblks immediately. + */ + if ((inodedep->id_state & DEPCOMPLETE) == 0) + handle_workitem_freeblocks(freeblks); + else + WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list); FREE_LOCK(&lk); bdwrite(bp); /* @@ -1841,36 +1861,35 @@ softdep_freefile(pvp, ino, mode) */ ACQUIRE_LOCK(&lk); if (inodedep_lookup(ip->i_fs, ino, 0, &inodedep) == 0) { - add_to_worklist(&freefile->fx_list); FREE_LOCK(&lk); + handle_workitem_freefile(freefile); return; } /* * If we still have a bitmap dependency, then the inode has never * been written to disk. Drop the dependency as it is no longer - * necessary since the inode is being deallocated. We could process - * the freefile immediately, but then we would have to clear the - * id_inowait dependencies here and it is easier just to let the - * zero'ed inode be written and let them be cleaned up in the - * normal followup actions that follow the inode write. + * necessary since the inode is being deallocated. We set the + * ALLCOMPLETE flags since the bitmap now properly shows that the + * inode is not allocated. Even if the inode is actively being + * written, it has been rolled back to its zero'ed state, so we + * are ensured that a zero inode is what is on the disk. For short + * lived files, this change will usually result in removing all the + * depedencies from the inode so that it can be freed immediately. */ - if ((inodedep->id_state & DEPCOMPLETE) == 0) { - inodedep->id_state |= DEPCOMPLETE; + if ((inodedep->id_state & DEPCOMPLETE) == 0) { + inodedep->id_state |= ALLCOMPLETE; LIST_REMOVE(inodedep, id_deps); inodedep->id_buf = NULL; + WORKLIST_REMOVE(&inodedep->id_list); } - /* - * If the inodedep has no dependencies associated with it, - * then we must free it here and free the file immediately. - * This case arises when an early allocation fails (for - * example, the user is over their file quota). - */ - if (free_inodedep(inodedep) == 0) + if (free_inodedep(inodedep) == 0) { WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list); - else - add_to_worklist(&freefile->fx_list); - FREE_LOCK(&lk); + FREE_LOCK(&lk); + } else { + FREE_LOCK(&lk); + handle_workitem_freefile(freefile); + } } /* @@ -2318,11 +2337,12 @@ softdep_setup_remove(bp, dp, ip, isrmdir) if ((dirrem->dm_state & COMPLETE) == 0) { LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem, dm_next); + FREE_LOCK(&lk); } else { dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino; - add_to_worklist(&dirrem->dm_list); + FREE_LOCK(&lk); + handle_workitem_remove(dirrem); } - FREE_LOCK(&lk); } /* @@ -2515,19 +2535,22 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir) } /* - * Called whenever the link count on an inode is increased. + * Called whenever the link count on an inode is changed. * It creates an inode dependency so that the new reference(s) * to the inode cannot be committed to disk until the updated * inode has been written. */ void -softdep_increase_linkcnt(ip) +softdep_change_linkcnt(ip) struct inode *ip; /* the inode with the increased link count */ { struct inodedep *inodedep; ACQUIRE_LOCK(&lk); (void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC, &inodedep); + if (ip->i_nlink < ip->i_effnlink) + panic("softdep_change_linkcnt: bad delta"); + inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink; FREE_LOCK(&lk); } @@ -2550,14 +2573,19 @@ handle_workitem_remove(dirrem) return; } ip = VTOI(vp); + ACQUIRE_LOCK(&lk); + if ((inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, 0, &inodedep)) == 0) + panic("handle_workitem_remove: lost inodedep 1"); /* * Normal file deletion. */ if ((dirrem->dm_state & RMDIR) == 0) { ip->i_nlink--; + ip->i_flag |= IN_CHANGE; if (ip->i_nlink < ip->i_effnlink) panic("handle_workitem_remove: bad file delta"); - ip->i_flag |= IN_CHANGE; + inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink; + FREE_LOCK(&lk); vput(vp); num_dirrem -= 1; WORKITEM_FREE(dirrem, D_DIRREM); @@ -2571,9 +2599,11 @@ handle_workitem_remove(dirrem) * the parent decremented to account for the loss of "..". */ ip->i_nlink -= 2; + ip->i_flag |= IN_CHANGE; if (ip->i_nlink < ip->i_effnlink) panic("handle_workitem_remove: bad dir delta"); - ip->i_flag |= IN_CHANGE; + inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink; + FREE_LOCK(&lk); if ((error = UFS_TRUNCATE(vp, (off_t)0, 0, p->p_ucred, p)) != 0) softdep_error("handle_workitem_remove: truncate", error); /* @@ -2587,14 +2617,37 @@ handle_workitem_remove(dirrem) WORKITEM_FREE(dirrem, D_DIRREM); return; } + /* + * If we still have a bitmap dependency, then the inode has never + * been written to disk. Drop the dependency as it is no longer + * necessary since the inode is being deallocated. We set the + * ALLCOMPLETE flags since the bitmap now properly shows that the + * inode is not allocated. Even if the inode is actively being + * written, it has been rolled back to its zero'ed state, so we + * are ensured that a zero inode is what is on the disk. For short + * lived files, this change will usually result in removing all the + * depedencies from the inode so that it can be freed immediately. + */ ACQUIRE_LOCK(&lk); - (void) inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, DEPALLOC, - &inodedep); + if ((inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, 0, &inodedep)) == 0) + panic("handle_workitem_remove: lost inodedep 2"); + if ((inodedep->id_state & DEPCOMPLETE) == 0) { + inodedep->id_state |= ALLCOMPLETE; + LIST_REMOVE(inodedep, id_deps); + inodedep->id_buf = NULL; + WORKLIST_REMOVE(&inodedep->id_list); + } dirrem->dm_state = 0; dirrem->dm_oldinum = dirrem->dm_dirinum; - WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list); - FREE_LOCK(&lk); - vput(vp); + if (free_inodedep(inodedep) == 0) { + WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list); + FREE_LOCK(&lk); + vput(vp); + } else { + FREE_LOCK(&lk); + vput(vp); + handle_workitem_remove(dirrem); + } } /* @@ -3456,12 +3509,7 @@ softdep_load_inodeblock(ip) FREE_LOCK(&lk); return; } - if (inodedep->id_nlinkdelta != 0) { - ip->i_effnlink -= inodedep->id_nlinkdelta; - ip->i_flag |= IN_MODIFIED; - inodedep->id_nlinkdelta = 0; - (void) free_inodedep(inodedep); - } + ip->i_effnlink -= inodedep->id_nlinkdelta; FREE_LOCK(&lk); } @@ -3500,9 +3548,8 @@ softdep_update_inodeblock(ip, bp, waitfor) FREE_LOCK(&lk); return; } - if (ip->i_nlink < ip->i_effnlink) + if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_effnlink) panic("softdep_update_inodeblock: bad delta"); - inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink; /* * Changes have been initiated. Anything depending on these * changes cannot occur until this inode has been written. @@ -4405,6 +4452,87 @@ clear_inodedeps(p) } /* + * Function to determine if the buffer has outstanding dependencies + * that will cause a roll-back if the buffer is written. If wantcount + * is set, return number of dependencies, otherwise just yes or no. + */ +static int +softdep_count_dependencies(bp, wantcount) + struct buf *bp; + int wantcount; +{ + struct worklist *wk; + struct inodedep *inodedep; + struct indirdep *indirdep; + struct allocindir *aip; + struct pagedep *pagedep; + struct diradd *dap; + int i, retval; + + retval = 0; + ACQUIRE_LOCK(&lk); + for (wk = LIST_FIRST(&bp->b_dep); wk; wk = LIST_NEXT(wk, wk_list)) { + switch (wk->wk_type) { + + case D_INODEDEP: + inodedep = WK_INODEDEP(wk); + if ((inodedep->id_state & DEPCOMPLETE) == 0) { + /* bitmap allocation dependency */ + retval += 1; + if (!wantcount) + goto out; + } + if (TAILQ_FIRST(&inodedep->id_inoupdt)) { + /* direct block pointer dependency */ + retval += 1; + if (!wantcount) + goto out; + } + continue; + + case D_INDIRDEP: + indirdep = WK_INDIRDEP(wk); + for (aip = LIST_FIRST(&indirdep->ir_deplisthd); + aip; aip = LIST_NEXT(aip, ai_next)) { + /* indirect block pointer dependency */ + retval += 1; + if (!wantcount) + goto out; + } + continue; + + case D_PAGEDEP: + pagedep = WK_PAGEDEP(wk); + for (i = 0; i < DAHASHSZ; i++) { + for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]); + dap; dap = LIST_NEXT(dap, da_pdlist)) { + /* directory entry dependency */ + retval += 1; + if (!wantcount) + goto out; + } + } + continue; + + case D_BMSAFEMAP: + case D_ALLOCDIRECT: + case D_ALLOCINDIR: + case D_MKDIR: + /* never a dependency on these blocks */ + continue; + + default: + panic("softdep_check_for_rollback: Unexpected type %s", + TYPENAME(wk->wk_type)); + /* NOTREACHED */ + } + } +out: + FREE_LOCK(&lk); + return retval; +} + +/* * Acquire exclusive access to a buffer. * Must be called with splbio blocked. * Return 1 if buffer was acquired. diff --git a/sys/ufs/ffs/ffs_softdep_stub.c b/sys/ufs/ffs/ffs_softdep_stub.c index 72f819b23cde..4b8411d32353 100644 --- a/sys/ufs/ffs/ffs_softdep_stub.c +++ b/sys/ufs/ffs/ffs_softdep_stub.c @@ -210,11 +210,11 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir) } void -softdep_increase_linkcnt(ip) +softdep_change_linkcnt(ip) struct inode *ip; { - panic("softdep_increase_linkcnt called"); + panic("softdep_change_linkcnt called"); } void diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index 18fb15396b96..77e821f53a9a 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -671,10 +671,6 @@ ffs_mountfs(devvp, mp, p, malloctype) bp = NULL; fs = ump->um_fs; fs->fs_ronly = ronly; - if (ronly == 0) { - fs->fs_fmod = 1; - fs->fs_clean = 0; - } size = fs->fs_cssize; blks = howmany(size, fs->fs_fsize); if (fs->fs_contigsumsize > 0) @@ -747,6 +743,7 @@ ffs_mountfs(devvp, mp, p, malloctype) free(base, M_UFSMNT); goto out; } + fs->fs_fmod = 1; fs->fs_clean = 0; (void) ffs_sbupdate(ump, MNT_WAIT); } @@ -964,9 +961,9 @@ loop: simple_lock(&vp->v_interlock); nvp = vp->v_mntvnodes.le_next; ip = VTOI(vp); - if ((vp->v_type == VNON) || (((ip->i_flag & - (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) && - (TAILQ_EMPTY(&vp->v_dirtyblkhd) || (waitfor == MNT_LAZY)))) { + if (vp->v_type == VNON || ((ip->i_flag & + (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && + TAILQ_EMPTY(&vp->v_dirtyblkhd))) { simple_unlock(&vp->v_interlock); continue; } @@ -1080,7 +1077,7 @@ restart: return (error); } bzero((caddr_t)ip, sizeof(struct inode)); - lockinit(&ip->i_lock, PINOD, "inode", 0, 0); + lockinit(&ip->i_lock, PINOD, "inode", 0, LK_CANRECURSE); vp->v_data = ip; ip->i_vnode = vp; ip->i_fs = fs = ump->um_fs; diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index eb99b2c7c3ae..6087d81dbdd3 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -123,10 +123,11 @@ ffs_fsync(ap) struct vnode *vp = ap->a_vp; struct buf *bp; struct buf *nbp; - int s, error, passes, skipmeta; + int s, error, wait, passes, skipmeta; daddr_t lbn; + wait = (ap->a_waitfor == MNT_WAIT); if (vn_isdisk(vp)) { lbn = INT_MAX; if (vp->v_specmountpoint != NULL && @@ -143,7 +144,7 @@ ffs_fsync(ap) */ passes = NIADDR + 1; skipmeta = 0; - if (ap->a_waitfor == MNT_WAIT) + if (wait) skipmeta = 1; s = splbio(); loop: @@ -153,33 +154,43 @@ loop: for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = TAILQ_NEXT(bp, b_vnbufs); /* - * First time through on a synchronous call, - * or if it's already scheduled, skip to the next - * buffer + * Reasons to skip this buffer: it has already been considered + * on this pass, this pass is the first time through on a + * synchronous flush request and the buffer being considered + * is metadata, the buffer has dependencies that will cause + * it to be redirtied and it has not already been deferred, + * or it is already being written. */ - if ((bp->b_flags & B_SCANNED) || - ((skipmeta == 1) && (bp->b_lblkno < 0)) || - BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) + if ((bp->b_flags & B_SCANNED) != 0) + continue; + bp->b_flags |= B_SCANNED; + if ((skipmeta == 1 && bp->b_lblkno < 0)) + continue; + if (!wait && LIST_FIRST(&bp->b_dep) != NULL && + (bp->b_flags & B_DEFERRED) == 0 && + bioops.io_countdeps && (*bioops.io_countdeps)(bp, 0)) { + bp->b_flags |= B_DEFERRED; + continue; + } + if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) continue; if ((bp->b_flags & B_DELWRI) == 0) panic("ffs_fsync: not dirty"); + if (vp != bp->b_vp) + panic("ffs_fsync: vp != vp->b_vp"); /* - * If data is outstanding to another vnode, or we were - * asked to wait for everything, or it's not a file or BDEV, - * start the IO on this buffer immediatly. + * If this is a synchronous flush request, or it is not a + * file or device, start the write on this buffer immediatly. */ - bp->b_flags |= B_SCANNED; - if (((bp->b_vp != vp) || (ap->a_waitfor == MNT_WAIT)) || - ((vp->v_type != VREG) && (vp->v_type != VBLK))) { + if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) { /* * On our final pass through, do all I/O synchronously * so that we can find out if our flush is failing * because of write errors. */ - if (passes > 0 || (ap->a_waitfor != MNT_WAIT)) { - if ((bp->b_flags & B_CLUSTEROK) && - ap->a_waitfor != MNT_WAIT) { + if (passes > 0 || !wait) { + if ((bp->b_flags & B_CLUSTEROK) && !wait) { BUF_UNLOCK(bp); (void) vfs_bio_awrite(bp); } else { @@ -224,7 +235,7 @@ loop: goto loop; } - if (ap->a_waitfor == MNT_WAIT) { + if (wait) { while (vp->v_numoutput) { vp->v_flag |= VBWAIT; (void) tsleep((caddr_t)&vp->v_numoutput, @@ -260,5 +271,5 @@ loop: } } splx(s); - return (UFS_UPDATE(vp, ap->a_waitfor == MNT_WAIT)); + return (UFS_UPDATE(vp, wait)); } diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h index feec939d5842..d576be977abc 100644 --- a/sys/ufs/ufs/ufs_extern.h +++ b/sys/ufs/ufs/ufs_extern.h @@ -102,6 +102,6 @@ void softdep_setup_remove __P((struct buf *,struct inode *, struct inode *, int)); void softdep_setup_directory_change __P((struct buf *, struct inode *, struct inode *, long, int)); -void softdep_increase_linkcnt __P((struct inode *)); +void softdep_change_linkcnt __P((struct inode *)); #endif /* !_UFS_UFS_EXTERN_H_ */ diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c index 77c0151abd49..7a0232d83f03 100644 --- a/sys/ufs/ufs/ufs_lookup.c +++ b/sys/ufs/ufs/ufs_lookup.c @@ -899,17 +899,19 @@ ufs_dirremove(dvp, ip, flags, isrmdir) ep->d_reclen += dp->i_reclen; } out: - if (ip) { - ip->i_effnlink--; - ip->i_flag |= IN_CHANGE; - } if (DOINGSOFTDEP(dvp)) { - if (ip) + if (ip) { + ip->i_effnlink--; + softdep_change_linkcnt(ip); softdep_setup_remove(bp, dp, ip, isrmdir); + } bdwrite(bp); } else { - if (ip) + if (ip) { + ip->i_effnlink--; ip->i_nlink--; + ip->i_flag |= IN_CHANGE; + } if (flags & DOWHITEOUT) error = VOP_BWRITE(bp->b_vp, bp); else if (DOINGASYNC(dvp) && dp->i_count != 0) { @@ -946,12 +948,13 @@ ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir) if (!OFSFMT(vdp)) ep->d_type = newtype; oip->i_effnlink--; - oip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(vdp)) { + softdep_change_linkcnt(oip); softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); bdwrite(bp); } else { oip->i_nlink--; + oip->i_flag |= IN_CHANGE; if (DOINGASYNC(vdp)) { bdwrite(bp); error = 0; diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 9adae8ca5947..9616b3792bd8 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -754,7 +754,7 @@ ufs_link(ap) ip->i_nlink++; ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(vp)) - softdep_increase_linkcnt(ip); + softdep_change_linkcnt(ip); error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp))); if (!error) { ufs_makedirentry(ip, cnp, &newdir); @@ -765,6 +765,8 @@ ufs_link(ap) ip->i_effnlink--; ip->i_nlink--; ip->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(vp)) + softdep_change_linkcnt(ip); } out1: if (tdvp != vp) @@ -1014,7 +1016,7 @@ abortit: ip->i_nlink++; ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(fvp)) - softdep_increase_linkcnt(ip); + softdep_change_linkcnt(ip); if ((error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) | DOINGASYNC(fvp)))) != 0) { VOP_UNLOCK(fvp, 0, p); @@ -1079,7 +1081,7 @@ abortit: dp->i_nlink++; dp->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(tdvp)) - softdep_increase_linkcnt(dp); + softdep_change_linkcnt(dp); error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) | DOINGASYNC(tdvp))); if (error) @@ -1092,6 +1094,8 @@ abortit: dp->i_effnlink--; dp->i_nlink--; dp->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(tdvp)) + softdep_change_linkcnt(dp); (void)UFS_UPDATE(tdvp, 1); } goto bad; @@ -1146,10 +1150,12 @@ abortit: if (doingdirectory) { if (!newparent) { dp->i_effnlink--; - dp->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(tdvp)) + softdep_change_linkcnt(dp); } xp->i_effnlink--; - xp->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(tvp)) + softdep_change_linkcnt(xp); } VN_POLLEVENT(tdvp, POLLWRITE); if (doingdirectory && !DOINGSOFTDEP(tvp)) { @@ -1164,9 +1170,12 @@ abortit: * disk, so when running with that code we avoid doing * them now. */ - if (!newparent) + if (!newparent) { dp->i_nlink--; + dp->i_flag |= IN_CHANGE; + } xp->i_nlink--; + xp->i_flag |= IN_CHANGE; ioflag = DOINGASYNC(tvp) ? 0 : IO_SYNC; if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag, tcnp->cn_cred, tcnp->cn_proc)) != 0) @@ -1247,6 +1256,8 @@ out: ip->i_nlink--; ip->i_flag |= IN_CHANGE; ip->i_flag &= ~IN_RENAME; + if (DOINGSOFTDEP(fvp)) + softdep_change_linkcnt(ip); vput(fvp); } else vrele(fvp); @@ -1359,7 +1370,7 @@ ufs_mkdir(ap) ip->i_effnlink = 2; ip->i_nlink = 2; if (DOINGSOFTDEP(tvp)) - softdep_increase_linkcnt(ip); + softdep_change_linkcnt(ip); if (cnp->cn_flags & ISWHITEOUT) ip->i_flags |= UF_OPAQUE; @@ -1372,7 +1383,7 @@ ufs_mkdir(ap) dp->i_nlink++; dp->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(dvp)) - softdep_increase_linkcnt(dp); + softdep_change_linkcnt(dp); error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp))); if (error) goto bad; @@ -1440,6 +1451,8 @@ bad: dp->i_effnlink--; dp->i_nlink--; dp->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(dvp)) + softdep_change_linkcnt(dp); /* * No need to do an explicit VOP_TRUNCATE here, vrele will * do this for us because we set the link count to 0. @@ -1447,6 +1460,8 @@ bad: ip->i_effnlink = 0; ip->i_nlink = 0; ip->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(tvp)) + softdep_change_linkcnt(ip); vput(tvp); } out: @@ -1505,29 +1520,36 @@ ufs_rmdir(ap) * inode. If we crash in between, the directory * will be reattached to lost+found, */ + dp->i_effnlink--; + ip->i_effnlink--; + if (DOINGSOFTDEP(vp)) { + softdep_change_linkcnt(dp); + softdep_change_linkcnt(ip); + } error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1); - if (error) + if (error) { + dp->i_effnlink++; + ip->i_effnlink++; + if (DOINGSOFTDEP(vp)) { + softdep_change_linkcnt(dp); + softdep_change_linkcnt(ip); + } goto out; + } VN_POLLEVENT(dvp, POLLWRITE|POLLNLINK); cache_purge(dvp); /* * Truncate inode. The only stuff left in the directory is "." and * "..". The "." reference is inconsequential since we are quashing - * it. We have removed the "." reference and the reference in the - * parent directory, but there may be other hard links. So, - * ufs_dirremove will set the UF_IMMUTABLE flag to ensure that no - * new entries are made. The soft dependency code will arrange to - * do these operations after the parent directory entry has been - * deleted on disk, so when running with that code we avoid doing - * them now. + * it. The soft dependency code will arrange to do these operations + * after the parent directory entry has been deleted on disk, so + * when running with that code we avoid doing them now. */ - dp->i_effnlink--; - dp->i_flag |= IN_CHANGE; - ip->i_effnlink--; - ip->i_flag |= IN_CHANGE; if (!DOINGSOFTDEP(vp)) { dp->i_nlink--; + dp->i_flag |= IN_CHANGE; ip->i_nlink--; + ip->i_flag |= IN_CHANGE; ioflag = DOINGASYNC(vp) ? 0 : IO_SYNC; error = UFS_TRUNCATE(vp, (off_t)0, ioflag, cnp->cn_cred, cnp->cn_proc); @@ -2119,7 +2141,7 @@ ufs_makeinode(mode, dvp, vpp, cnp) ip->i_effnlink = 1; ip->i_nlink = 1; if (DOINGSOFTDEP(tvp)) - softdep_increase_linkcnt(ip); + softdep_change_linkcnt(ip); if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) && suser_xxx(cnp->cn_cred, 0, 0)) ip->i_mode &= ~ISGID; @@ -2148,6 +2170,8 @@ bad: ip->i_effnlink = 0; ip->i_nlink = 0; ip->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(tvp)) + softdep_change_linkcnt(ip); vput(tvp); return (error); } |
