src - FreeBSD source tree

diff options


context:
space:
mode:

author	Mateusz Guzik <mjg@FreeBSD.org>	2020-10-24 01:14:17 +0000
committer	Mateusz Guzik <mjg@FreeBSD.org>	2020-10-24 01:14:17 +0000
commit	208cb7c4b6965cc76c5343c4b0e138260546c8c6 (patch)
tree	00b9f1fa110e3bf76b5fb24297779f20d9c6b986 /sys/kern/vfs_cache.c
parent	1d4440569070276266002e18ba196637d1661e2c (diff)
download	src-208cb7c4b6965cc76c5343c4b0e138260546c8c6.tar.gz src-208cb7c4b6965cc76c5343c4b0e138260546c8c6.zip

Notes

Diffstat (limited to 'sys/kern/vfs_cache.c')

-rw-r--r--

sys/kern/vfs_cache.c

220

1 files changed, 138 insertions, 82 deletions

diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index 504871c63ace..16fdfe051b7d 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c

@@ -174,6 +174,19 @@ struct namecache_ts {

#define CACHE_ZONE_ALIGNMENT UMA_ALIGNOF(time_t)

+/*

+ * TODO: the initial value of CACHE_PATH_CUTOFF was inherited from the

+ * 4.4 BSD codebase. Later on struct namecache was tweaked to become

+ * smaller and the value was bumped to retain the total size, but it

+ * was never re-evaluated for suitability. A simple test counting

+ * lengths during package building shows that the value of 45 covers

+ * about 86% of all added entries, reaching 99% at 65.

+ *

+ * Regardless of the above, use of dedicated zones instead of malloc may be

+ * inducing additional waste. This may be hard to address as said zones are

+ * tied to VFS SMR. Even if retaining them, the current split should be

+ * reevaluated.

+ */

#ifdef __LP64__

#define CACHE_PATH_CUTOFF 45

#define CACHE_LARGE_PAD 6

@@ -212,6 +225,8 @@ _Static_assert((CACHE_ZONE_LARGE_TS_SIZE % (CACHE_ZONE_ALIGNMENT + 1)) == 0, "ba

#define NEG_HOT 0x01

+static bool cache_neg_evict_cond(u_long lnumcache);

* Mark an entry as invalid.

@@ -380,61 +395,6 @@ VP2VNODELOCK(struct vnode *vp)

return (&vnodelocks[(((uintptr_t)(vp) >> 8) & ncvnodehash)]);

}

-/*

- * UMA zones for the VFS cache.

- *

- * The small cache is used for entries with short names, which are the

- * most common. The large cache is used for entries which are too big to

- * fit in the small cache.

- */

-static uma_zone_t __read_mostly cache_zone_small;

-static uma_zone_t __read_mostly cache_zone_small_ts;

-static uma_zone_t __read_mostly cache_zone_large;

-static uma_zone_t __read_mostly cache_zone_large_ts;

-static struct namecache *

-cache_alloc(int len, int ts)

- struct namecache_ts *ncp_ts;

- struct namecache *ncp;

- if (__predict_false(ts)) {

- if (len <= CACHE_PATH_CUTOFF)

- ncp_ts = uma_zalloc_smr(cache_zone_small_ts, M_WAITOK);

- else

- ncp_ts = uma_zalloc_smr(cache_zone_large_ts, M_WAITOK);

- ncp = &ncp_ts->nc_nc;

- } else {

- if (len <= CACHE_PATH_CUTOFF)

- ncp = uma_zalloc_smr(cache_zone_small, M_WAITOK);

- else

- ncp = uma_zalloc_smr(cache_zone_large, M_WAITOK);

- }

- return (ncp);

-static void

-cache_free(struct namecache *ncp)

- struct namecache_ts *ncp_ts;

- MPASS(ncp != NULL);

- if ((ncp->nc_flag & NCF_DVDROP) != 0)

- vdrop(ncp->nc_dvp);

- if (__predict_false(ncp->nc_flag & NCF_TS)) {

- ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc);

- if (ncp->nc_nlen <= CACHE_PATH_CUTOFF)

- uma_zfree_smr(cache_zone_small_ts, ncp_ts);

- else

- uma_zfree_smr(cache_zone_large_ts, ncp_ts);

- } else {

- if (ncp->nc_nlen <= CACHE_PATH_CUTOFF)

- uma_zfree_smr(cache_zone_small, ncp);

- else

- uma_zfree_smr(cache_zone_large, ncp);

- }

static void

cache_out_ts(struct namecache *ncp, struct timespec *tsp, int *ticksp)

{

@@ -547,6 +507,126 @@ cache_assert_vnode_locked(struct vnode *vp)

}

+ * Directory vnodes with entries are held for two reasons:

+ * 1. make them less of a target for reclamation in vnlru

+ * 2. suffer smaller performance penalty in locked lookup as requeieing is avoided

+ *

+ * Note this preferably would not be done and it's a hold over from. It will be

+ * feasible to eliminate altogether if all filesystems start supporting

+ * lockless lookup.

+ */

+static void

+cache_hold_vnode(struct vnode *vp)

+ cache_assert_vnode_locked(vp);

+ VNPASS(LIST_EMPTY(&vp->v_cache_src), vp);

+ vhold(vp);

+ counter_u64_add(numcachehv, 1);

+static void

+cache_drop_vnode(struct vnode *vp)

+ /*

+ * Called after all locks are dropped, meaning we can't assert

+ * on the state of v_cache_src.

+ */

+ vdrop(vp);

+ counter_u64_add(numcachehv, -1);

+/*

+ * UMA zones.

+ */

+static uma_zone_t __read_mostly cache_zone_small;

+static uma_zone_t __read_mostly cache_zone_small_ts;

+static uma_zone_t __read_mostly cache_zone_large;

+static uma_zone_t __read_mostly cache_zone_large_ts;

+static struct namecache *

+cache_alloc_uma(int len, bool ts)

+ struct namecache_ts *ncp_ts;

+ struct namecache *ncp;

+ if (__predict_false(ts)) {

+ if (len <= CACHE_PATH_CUTOFF)

+ ncp_ts = uma_zalloc_smr(cache_zone_small_ts, M_WAITOK);

+ else

+ ncp_ts = uma_zalloc_smr(cache_zone_large_ts, M_WAITOK);

+ ncp = &ncp_ts->nc_nc;

+ } else {

+ if (len <= CACHE_PATH_CUTOFF)

+ ncp = uma_zalloc_smr(cache_zone_small, M_WAITOK);

+ else

+ ncp = uma_zalloc_smr(cache_zone_large, M_WAITOK);

+ }

+ return (ncp);

+static void

+cache_free_uma(struct namecache *ncp)

+ struct namecache_ts *ncp_ts;

+ if (__predict_false(ncp->nc_flag & NCF_TS)) {

+ ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc);

+ if (ncp->nc_nlen <= CACHE_PATH_CUTOFF)

+ uma_zfree_smr(cache_zone_small_ts, ncp_ts);

+ else

+ uma_zfree_smr(cache_zone_large_ts, ncp_ts);

+ } else {

+ if (ncp->nc_nlen <= CACHE_PATH_CUTOFF)

+ uma_zfree_smr(cache_zone_small, ncp);

+ else

+ uma_zfree_smr(cache_zone_large, ncp);

+ }

+static struct namecache *

+cache_alloc(int len, bool ts)

+ u_long lnumcache;

+ /*

+ * Avoid blowout in namecache entries.

+ *

+ * Bugs:

+ * 1. filesystems may end up tryng to add an already existing entry

+ * (for example this can happen after a cache miss during concurrent

+ * lookup), in which case we will call cache_neg_evict despite not

+ * adding anything.

+ * 2. the routine may fail to free anything and no provisions are made

+ * to make it try harder (see the inside for failure modes)

+ * 3. it only ever looks at negative entries.

+ */

+ lnumcache = atomic_fetchadd_long(&numcache, 1) + 1;

+ if (cache_neg_evict_cond(lnumcache)) {

+ lnumcache = atomic_load_long(&numcache);

+ }

+ if (__predict_false(lnumcache >= ncsize)) {

+ atomic_subtract_long(&numcache, 1);

+ counter_u64_add(numdrops, 1);

+ return (NULL);

+ }

+ return (cache_alloc_uma(len, ts));

+static void

+cache_free(struct namecache *ncp)

+ MPASS(ncp != NULL);

+ if ((ncp->nc_flag & NCF_DVDROP) != 0) {

+ cache_drop_vnode(ncp->nc_dvp);

+ }

+ cache_free_uma(ncp);

+ atomic_subtract_long(&numcache, 1);

+/*

* TODO: With the value stored we can do better than computing the hash based

* on the address. The choice of FNV should also be revisited.

@@ -1298,10 +1378,8 @@ cache_zap_locked(struct namecache *ncp)

LIST_REMOVE(ncp, nc_src);

if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {

ncp->nc_flag |= NCF_DVDROP;

- counter_u64_add(numcachehv, -1);

}

- atomic_subtract_long(&numcache, 1);

}

static void

@@ -2110,7 +2188,6 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,

uint32_t hash;

int flag;

int len;

- u_long lnumcache;

VNPASS(dvp != vp, dvp);

VNPASS(!VN_IS_DOOMED(dvp), dvp);

@@ -2135,27 +2212,9 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,

}

- /*

- * Avoid blowout in namecache entries.

- *

- * Bugs:

- * 1. filesystems may end up tryng to add an already existing entry

- * (for example this can happen after a cache miss during concurrent

- * lookup), in which case we will call cache_neg_evict despite not

- * adding anything.

- * 2. the routine may fail to free anything and no provisions are made

- * to make it try harder (see the inside for failure modes)

- * 3. it only ever looks at negative entries.

- */

- lnumcache = atomic_fetchadd_long(&numcache, 1) + 1;

- if (cache_neg_evict_cond(lnumcache)) {

- lnumcache = atomic_load_long(&numcache);

- }

- if (__predict_false(lnumcache >= ncsize)) {

- atomic_subtract_long(&numcache, 1);

- counter_u64_add(numdrops, 1);

+ ncp = cache_alloc(cnp->cn_namelen, tsp != NULL);

+ if (ncp == NULL)

return;

- }

cache_celockstate_init(&cel);

ndd = NULL;

@@ -2165,7 +2224,6 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,

* Calculate the hash key and setup as much of the new

* namecache entry as possible before acquiring the lock.

- ncp = cache_alloc(cnp->cn_namelen, tsp != NULL);

ncp->nc_flag = flag | NCF_WIP;

ncp->nc_vp = vp;

if (vp == NULL)

@@ -2276,8 +2334,7 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,

if (flag != NCF_ISDOTDOT) {

if (LIST_EMPTY(&dvp->v_cache_src)) {

- vhold(dvp);

- counter_u64_add(numcachehv, 1);

+ cache_hold_vnode(dvp);

}

LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);

}

@@ -2318,7 +2375,6 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,

return;

out_unlock_free:

cache_enter_unlock(&cel);

- atomic_subtract_long(&numcache, 1);

cache_free(ncp);

return;

}