aboutsummaryrefslogtreecommitdiff
path: root/sys/kern/vfs_subr.c
diff options
context:
space:
mode:
authorMateusz Guzik <mjg@FreeBSD.org>2023-08-24 05:34:08 +0000
committerMateusz Guzik <mjg@FreeBSD.org>2023-08-24 22:12:40 +0000
commitc1d85ac3df82df721e3d33b292579c4de491488e (patch)
tree0d5c39906fa972f701cdbf013dd5677c0fccbb80 /sys/kern/vfs_subr.c
parent9b5d724cad10087e34165199e55f15f2df744ed5 (diff)
downloadsrc-c1d85ac3df82df721e3d33b292579c4de491488e.tar.gz
src-c1d85ac3df82df721e3d33b292579c4de491488e.zip
Diffstat (limited to 'sys/kern/vfs_subr.c')
-rw-r--r--sys/kern/vfs_subr.c38
1 files changed, 38 insertions, 0 deletions
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 0f3f00abfd4a..f1e1d1e3a0ca 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -196,6 +196,10 @@ static counter_u64_t recycles_free_count;
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, recycles_free, CTLFLAG_RD, &recycles_free_count,
"Number of free vnodes recycled to meet vnode cache targets");
+static counter_u64_t vnode_skipped_requeues;
+SYSCTL_COUNTER_U64(_vfs, OID_AUTO, vnode_skipped_requeues, CTLFLAG_RD, &vnode_skipped_requeues,
+ "Number of times LRU requeue was skipped due to lock contention");
+
static u_long deferred_inact;
SYSCTL_ULONG(_vfs, OID_AUTO, deferred_inact, CTLFLAG_RD,
&deferred_inact, 0, "Number of times inactive processing was deferred");
@@ -732,6 +736,7 @@ vntblinit(void *dummy __unused)
vnodes_created = counter_u64_alloc(M_WAITOK);
recycles_count = counter_u64_alloc(M_WAITOK);
recycles_free_count = counter_u64_alloc(M_WAITOK);
+ vnode_skipped_requeues = counter_u64_alloc(M_WAITOK);
/*
* Initialize the filesystem syncer.
@@ -1280,11 +1285,13 @@ vnlru_free_impl(int count, struct vfsops *mnt_op, struct vnode *mvp)
struct vnode *vp;
struct mount *mp;
int ocount;
+ bool retried;
mtx_assert(&vnode_list_mtx, MA_OWNED);
if (count > max_vnlru_free)
count = max_vnlru_free;
ocount = count;
+ retried = false;
vp = mvp;
for (;;) {
if (count == 0) {
@@ -1292,6 +1299,24 @@ vnlru_free_impl(int count, struct vfsops *mnt_op, struct vnode *mvp)
}
vp = TAILQ_NEXT(vp, v_vnodelist);
if (__predict_false(vp == NULL)) {
+ /*
+ * The free vnode marker can be past eligible vnodes:
+ * 1. if vdbatch_process trylock failed
+ * 2. if vtryrecycle failed
+ *
+ * If so, start the scan from scratch.
+ */
+ if (!retried && vnlru_read_freevnodes() > 0) {
+ TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
+ TAILQ_INSERT_HEAD(&vnode_list, mvp, v_vnodelist);
+ vp = mvp;
+ retried++;
+ continue;
+ }
+
+ /*
+ * Give up
+ */
TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
TAILQ_INSERT_TAIL(&vnode_list, mvp, v_vnodelist);
break;
@@ -3528,6 +3553,17 @@ vdbatch_process(struct vdbatch *vd)
MPASS(curthread->td_pinned > 0);
MPASS(vd->index == VDBATCH_SIZE);
+ /*
+ * Attempt to requeue the passed batch, but give up easily.
+ *
+ * Despite batching the mechanism is prone to transient *significant*
+ * lock contention, where vnode_list_mtx becomes the primary bottleneck
+ * if multiple CPUs get here (one real-world example is highly parallel
+ * do-nothing make , which will stat *tons* of vnodes). Since it is
+ * quasi-LRU (read: not that great even if fully honoured) just dodge
+ * the problem. Parties which don't like it are welcome to implement
+ * something better.
+ */
critical_enter();
if (mtx_trylock(&vnode_list_mtx)) {
for (i = 0; i < VDBATCH_SIZE; i++) {
@@ -3540,6 +3576,8 @@ vdbatch_process(struct vdbatch *vd)
}
mtx_unlock(&vnode_list_mtx);
} else {
+ counter_u64_add(vnode_skipped_requeues, 1);
+
for (i = 0; i < VDBATCH_SIZE; i++) {
vp = vd->tab[i];
vd->tab[i] = NULL;