diff options
author | Mateusz Guzik <mjg@FreeBSD.org> | 2023-08-24 05:34:08 +0000 |
---|---|---|
committer | Mateusz Guzik <mjg@FreeBSD.org> | 2023-08-24 22:12:40 +0000 |
commit | c1d85ac3df82df721e3d33b292579c4de491488e (patch) | |
tree | 0d5c39906fa972f701cdbf013dd5677c0fccbb80 /sys/kern/vfs_subr.c | |
parent | 9b5d724cad10087e34165199e55f15f2df744ed5 (diff) | |
download | src-c1d85ac3df82df721e3d33b292579c4de491488e.tar.gz src-c1d85ac3df82df721e3d33b292579c4de491488e.zip |
Diffstat (limited to 'sys/kern/vfs_subr.c')
-rw-r--r-- | sys/kern/vfs_subr.c | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 0f3f00abfd4a..f1e1d1e3a0ca 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -196,6 +196,10 @@ static counter_u64_t recycles_free_count; SYSCTL_COUNTER_U64(_vfs, OID_AUTO, recycles_free, CTLFLAG_RD, &recycles_free_count, "Number of free vnodes recycled to meet vnode cache targets"); +static counter_u64_t vnode_skipped_requeues; +SYSCTL_COUNTER_U64(_vfs, OID_AUTO, vnode_skipped_requeues, CTLFLAG_RD, &vnode_skipped_requeues, + "Number of times LRU requeue was skipped due to lock contention"); + static u_long deferred_inact; SYSCTL_ULONG(_vfs, OID_AUTO, deferred_inact, CTLFLAG_RD, &deferred_inact, 0, "Number of times inactive processing was deferred"); @@ -732,6 +736,7 @@ vntblinit(void *dummy __unused) vnodes_created = counter_u64_alloc(M_WAITOK); recycles_count = counter_u64_alloc(M_WAITOK); recycles_free_count = counter_u64_alloc(M_WAITOK); + vnode_skipped_requeues = counter_u64_alloc(M_WAITOK); /* * Initialize the filesystem syncer. @@ -1280,11 +1285,13 @@ vnlru_free_impl(int count, struct vfsops *mnt_op, struct vnode *mvp) struct vnode *vp; struct mount *mp; int ocount; + bool retried; mtx_assert(&vnode_list_mtx, MA_OWNED); if (count > max_vnlru_free) count = max_vnlru_free; ocount = count; + retried = false; vp = mvp; for (;;) { if (count == 0) { @@ -1292,6 +1299,24 @@ vnlru_free_impl(int count, struct vfsops *mnt_op, struct vnode *mvp) } vp = TAILQ_NEXT(vp, v_vnodelist); if (__predict_false(vp == NULL)) { + /* + * The free vnode marker can be past eligible vnodes: + * 1. if vdbatch_process trylock failed + * 2. if vtryrecycle failed + * + * If so, start the scan from scratch. + */ + if (!retried && vnlru_read_freevnodes() > 0) { + TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist); + TAILQ_INSERT_HEAD(&vnode_list, mvp, v_vnodelist); + vp = mvp; + retried++; + continue; + } + + /* + * Give up + */ TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist); TAILQ_INSERT_TAIL(&vnode_list, mvp, v_vnodelist); break; @@ -3528,6 +3553,17 @@ vdbatch_process(struct vdbatch *vd) MPASS(curthread->td_pinned > 0); MPASS(vd->index == VDBATCH_SIZE); + /* + * Attempt to requeue the passed batch, but give up easily. + * + * Despite batching the mechanism is prone to transient *significant* + * lock contention, where vnode_list_mtx becomes the primary bottleneck + * if multiple CPUs get here (one real-world example is highly parallel + * do-nothing make , which will stat *tons* of vnodes). Since it is + * quasi-LRU (read: not that great even if fully honoured) just dodge + * the problem. Parties which don't like it are welcome to implement + * something better. + */ critical_enter(); if (mtx_trylock(&vnode_list_mtx)) { for (i = 0; i < VDBATCH_SIZE; i++) { @@ -3540,6 +3576,8 @@ vdbatch_process(struct vdbatch *vd) } mtx_unlock(&vnode_list_mtx); } else { + counter_u64_add(vnode_skipped_requeues, 1); + for (i = 0; i < VDBATCH_SIZE; i++) { vp = vd->tab[i]; vd->tab[i] = NULL; |