From 208cb7c4b6965cc76c5343c4b0e138260546c8c6 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sat, 24 Oct 2020 01:14:17 +0000 Subject: cache: refactor alloc/free This in particular centralizes manipulation of numcache. --- sys/kern/vfs_cache.c | 220 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 138 insertions(+), 82 deletions(-) (limited to 'sys/kern/vfs_cache.c') diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 504871c63ace..16fdfe051b7d 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -174,6 +174,19 @@ struct namecache_ts { */ #define CACHE_ZONE_ALIGNMENT UMA_ALIGNOF(time_t) +/* + * TODO: the initial value of CACHE_PATH_CUTOFF was inherited from the + * 4.4 BSD codebase. Later on struct namecache was tweaked to become + * smaller and the value was bumped to retain the total size, but it + * was never re-evaluated for suitability. A simple test counting + * lengths during package building shows that the value of 45 covers + * about 86% of all added entries, reaching 99% at 65. + * + * Regardless of the above, use of dedicated zones instead of malloc may be + * inducing additional waste. This may be hard to address as said zones are + * tied to VFS SMR. Even if retaining them, the current split should be + * reevaluated. + */ #ifdef __LP64__ #define CACHE_PATH_CUTOFF 45 #define CACHE_LARGE_PAD 6 @@ -212,6 +225,8 @@ _Static_assert((CACHE_ZONE_LARGE_TS_SIZE % (CACHE_ZONE_ALIGNMENT + 1)) == 0, "ba */ #define NEG_HOT 0x01 +static bool cache_neg_evict_cond(u_long lnumcache); + /* * Mark an entry as invalid. * @@ -380,61 +395,6 @@ VP2VNODELOCK(struct vnode *vp) return (&vnodelocks[(((uintptr_t)(vp) >> 8) & ncvnodehash)]); } -/* - * UMA zones for the VFS cache. - * - * The small cache is used for entries with short names, which are the - * most common. The large cache is used for entries which are too big to - * fit in the small cache. - */ -static uma_zone_t __read_mostly cache_zone_small; -static uma_zone_t __read_mostly cache_zone_small_ts; -static uma_zone_t __read_mostly cache_zone_large; -static uma_zone_t __read_mostly cache_zone_large_ts; - -static struct namecache * -cache_alloc(int len, int ts) -{ - struct namecache_ts *ncp_ts; - struct namecache *ncp; - - if (__predict_false(ts)) { - if (len <= CACHE_PATH_CUTOFF) - ncp_ts = uma_zalloc_smr(cache_zone_small_ts, M_WAITOK); - else - ncp_ts = uma_zalloc_smr(cache_zone_large_ts, M_WAITOK); - ncp = &ncp_ts->nc_nc; - } else { - if (len <= CACHE_PATH_CUTOFF) - ncp = uma_zalloc_smr(cache_zone_small, M_WAITOK); - else - ncp = uma_zalloc_smr(cache_zone_large, M_WAITOK); - } - return (ncp); -} - -static void -cache_free(struct namecache *ncp) -{ - struct namecache_ts *ncp_ts; - - MPASS(ncp != NULL); - if ((ncp->nc_flag & NCF_DVDROP) != 0) - vdrop(ncp->nc_dvp); - if (__predict_false(ncp->nc_flag & NCF_TS)) { - ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc); - if (ncp->nc_nlen <= CACHE_PATH_CUTOFF) - uma_zfree_smr(cache_zone_small_ts, ncp_ts); - else - uma_zfree_smr(cache_zone_large_ts, ncp_ts); - } else { - if (ncp->nc_nlen <= CACHE_PATH_CUTOFF) - uma_zfree_smr(cache_zone_small, ncp); - else - uma_zfree_smr(cache_zone_large, ncp); - } -} - static void cache_out_ts(struct namecache *ncp, struct timespec *tsp, int *ticksp) { @@ -546,6 +506,126 @@ cache_assert_vnode_locked(struct vnode *vp) cache_assert_vlp_locked(vlp); } +/* + * Directory vnodes with entries are held for two reasons: + * 1. make them less of a target for reclamation in vnlru + * 2. suffer smaller performance penalty in locked lookup as requeieing is avoided + * + * Note this preferably would not be done and it's a hold over from. It will be + * feasible to eliminate altogether if all filesystems start supporting + * lockless lookup. + */ +static void +cache_hold_vnode(struct vnode *vp) +{ + + cache_assert_vnode_locked(vp); + VNPASS(LIST_EMPTY(&vp->v_cache_src), vp); + vhold(vp); + counter_u64_add(numcachehv, 1); +} + +static void +cache_drop_vnode(struct vnode *vp) +{ + + /* + * Called after all locks are dropped, meaning we can't assert + * on the state of v_cache_src. + */ + vdrop(vp); + counter_u64_add(numcachehv, -1); +} + +/* + * UMA zones. + */ +static uma_zone_t __read_mostly cache_zone_small; +static uma_zone_t __read_mostly cache_zone_small_ts; +static uma_zone_t __read_mostly cache_zone_large; +static uma_zone_t __read_mostly cache_zone_large_ts; + +static struct namecache * +cache_alloc_uma(int len, bool ts) +{ + struct namecache_ts *ncp_ts; + struct namecache *ncp; + + if (__predict_false(ts)) { + if (len <= CACHE_PATH_CUTOFF) + ncp_ts = uma_zalloc_smr(cache_zone_small_ts, M_WAITOK); + else + ncp_ts = uma_zalloc_smr(cache_zone_large_ts, M_WAITOK); + ncp = &ncp_ts->nc_nc; + } else { + if (len <= CACHE_PATH_CUTOFF) + ncp = uma_zalloc_smr(cache_zone_small, M_WAITOK); + else + ncp = uma_zalloc_smr(cache_zone_large, M_WAITOK); + } + return (ncp); +} + +static void +cache_free_uma(struct namecache *ncp) +{ + struct namecache_ts *ncp_ts; + + if (__predict_false(ncp->nc_flag & NCF_TS)) { + ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc); + if (ncp->nc_nlen <= CACHE_PATH_CUTOFF) + uma_zfree_smr(cache_zone_small_ts, ncp_ts); + else + uma_zfree_smr(cache_zone_large_ts, ncp_ts); + } else { + if (ncp->nc_nlen <= CACHE_PATH_CUTOFF) + uma_zfree_smr(cache_zone_small, ncp); + else + uma_zfree_smr(cache_zone_large, ncp); + } +} + +static struct namecache * +cache_alloc(int len, bool ts) +{ + u_long lnumcache; + + /* + * Avoid blowout in namecache entries. + * + * Bugs: + * 1. filesystems may end up tryng to add an already existing entry + * (for example this can happen after a cache miss during concurrent + * lookup), in which case we will call cache_neg_evict despite not + * adding anything. + * 2. the routine may fail to free anything and no provisions are made + * to make it try harder (see the inside for failure modes) + * 3. it only ever looks at negative entries. + */ + lnumcache = atomic_fetchadd_long(&numcache, 1) + 1; + if (cache_neg_evict_cond(lnumcache)) { + lnumcache = atomic_load_long(&numcache); + } + if (__predict_false(lnumcache >= ncsize)) { + atomic_subtract_long(&numcache, 1); + counter_u64_add(numdrops, 1); + return (NULL); + } + return (cache_alloc_uma(len, ts)); +} + +static void +cache_free(struct namecache *ncp) +{ + + MPASS(ncp != NULL); + if ((ncp->nc_flag & NCF_DVDROP) != 0) { + cache_drop_vnode(ncp->nc_dvp); + } + cache_free_uma(ncp); + atomic_subtract_long(&numcache, 1); +} + /* * TODO: With the value stored we can do better than computing the hash based * on the address. The choice of FNV should also be revisited. @@ -1298,10 +1378,8 @@ cache_zap_locked(struct namecache *ncp) LIST_REMOVE(ncp, nc_src); if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { ncp->nc_flag |= NCF_DVDROP; - counter_u64_add(numcachehv, -1); } } - atomic_subtract_long(&numcache, 1); } static void @@ -2110,7 +2188,6 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, uint32_t hash; int flag; int len; - u_long lnumcache; VNPASS(dvp != vp, dvp); VNPASS(!VN_IS_DOOMED(dvp), dvp); @@ -2135,27 +2212,9 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, } } - /* - * Avoid blowout in namecache entries. - * - * Bugs: - * 1. filesystems may end up tryng to add an already existing entry - * (for example this can happen after a cache miss during concurrent - * lookup), in which case we will call cache_neg_evict despite not - * adding anything. - * 2. the routine may fail to free anything and no provisions are made - * to make it try harder (see the inside for failure modes) - * 3. it only ever looks at negative entries. - */ - lnumcache = atomic_fetchadd_long(&numcache, 1) + 1; - if (cache_neg_evict_cond(lnumcache)) { - lnumcache = atomic_load_long(&numcache); - } - if (__predict_false(lnumcache >= ncsize)) { - atomic_subtract_long(&numcache, 1); - counter_u64_add(numdrops, 1); + ncp = cache_alloc(cnp->cn_namelen, tsp != NULL); + if (ncp == NULL) return; - } cache_celockstate_init(&cel); ndd = NULL; @@ -2165,7 +2224,6 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, * Calculate the hash key and setup as much of the new * namecache entry as possible before acquiring the lock. */ - ncp = cache_alloc(cnp->cn_namelen, tsp != NULL); ncp->nc_flag = flag | NCF_WIP; ncp->nc_vp = vp; if (vp == NULL) @@ -2276,8 +2334,7 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, if (flag != NCF_ISDOTDOT) { if (LIST_EMPTY(&dvp->v_cache_src)) { - vhold(dvp); - counter_u64_add(numcachehv, 1); + cache_hold_vnode(dvp); } LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); } @@ -2318,7 +2375,6 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, return; out_unlock_free: cache_enter_unlock(&cel); - atomic_subtract_long(&numcache, 1); cache_free(ncp); return; } -- cgit v1.2.3