From 90836c3270786135dcae04c9eff80cdef9fe602c Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sun, 5 Feb 2017 03:26:34 +0000 Subject: mtx: switch to fcmpset The found value is passed to locking routines in order to reduce cacheline accesses. mtx_unlock grows an explicit check for regular unlock. On ll/sc architectures the routine can fail even if the lock could have been handled by the inline primitive. Discussed with: jhb Tested by: pho (previous version) --- sys/kern/kern_mutex.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'sys/kern/kern_mutex.c') diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index 35bbd5f78c3f..8e6133d6bfde 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -455,12 +455,11 @@ _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line) * sleep waiting for it), or if we need to recurse on it. */ void -__mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, +__mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v, uintptr_t tid, int opts, const char *file, int line) { struct mtx *m; struct turnstile *ts; - uintptr_t v; #ifdef ADAPTIVE_MUTEXES volatile struct thread *owner; #endif @@ -489,7 +488,6 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, lock_delay_arg_init(&lda, NULL); #endif m = mtxlock2mtx(c); - v = MTX_READ_VALUE(m); if (__predict_false(lv_mtx_owner(v) == (struct thread *)tid)) { KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 || @@ -520,9 +518,8 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, for (;;) { if (v == MTX_UNOWNED) { - if (_mtx_obtain_lock(m, tid)) + if (_mtx_obtain_lock_fetch(m, &v, tid)) break; - v = MTX_READ_VALUE(m); continue; } #ifdef KDTRACE_HOOKS @@ -674,12 +671,11 @@ _mtx_lock_spin_failed(struct mtx *m) * is handled inline. */ void -_mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts, - const char *file, int line) +_mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v, uintptr_t tid, + int opts, const char *file, int line) { struct mtx *m; struct lock_delay_arg lda; - uintptr_t v; #ifdef LOCK_PROFILING int contested = 0; uint64_t waittime = 0; @@ -706,12 +702,10 @@ _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts, #ifdef KDTRACE_HOOKS spin_time -= lockstat_nsecs(&m->lock_object); #endif - v = MTX_READ_VALUE(m); for (;;) { if (v == MTX_UNOWNED) { - if (_mtx_obtain_lock(m, tid)) + if (_mtx_obtain_lock_fetch(m, &v, tid)) break; - v = MTX_READ_VALUE(m); continue; } /* Give interrupts a chance while we spin. */ @@ -796,14 +790,11 @@ retry: m->lock_object.lo_name, file, line)); WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); - v = MTX_READ_VALUE(m); for (;;) { - if (v == MTX_UNOWNED) { - if (_mtx_obtain_lock(m, tid)) - break; - v = MTX_READ_VALUE(m); + if (_mtx_obtain_lock_fetch(m, &v, tid)) + break; + if (v == MTX_UNOWNED) continue; - } if (v == tid) { m->mtx_recurse++; break; @@ -896,11 +887,18 @@ __mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; struct turnstile *ts; + uintptr_t v; if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); + v = MTX_READ_VALUE(m); + + if (v == (uintptr_t)curthread) { + if (_mtx_release_lock(m, (uintptr_t)curthread)) + return; + } if (mtx_recursed(m)) { if (--(m->mtx_recurse) == 0) -- cgit v1.2.3 From 08da2677755f0874ea99c5a42181aacc3eb17690 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sun, 5 Feb 2017 08:04:11 +0000 Subject: mtx: move lockstat handling out of inline primitives Lockstat requires checking if it is enabled and if so, calling a 6 argument function. Further, determining whether to call it on unlock requires pre-reading the lock value. This is problematic in at least 3 ways: - more branches in the hot path than necessary - additional cacheline ping pong under contention - bigger code Instead, check first if lockstat handling is necessary and if so, just fall back to regular locking routines. For this purpose a new macro is introduced (LOCKSTAT_PROFILE_ENABLED). LOCK_PROFILING uninlines all primitives. Fold in the current inline lock variant into the _mtx_lock_flags to retain the support. With this change the inline variants are not used when LOCK_PROFILING is defined and thus can ignore its existence. This results in: text data bss dec hex filename 22259667 1303208 4994976 28557851 1b3c21b kernel.orig 21797315 1303208 4994976 28095499 1acb40b kernel.patched i.e. about 3% reduction in text size. A remaining action is to remove spurious arguments for internal kernel consumers. --- sys/kern/kern_mutex.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'sys/kern/kern_mutex.c') diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index 8e6133d6bfde..f49d87c81999 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -265,6 +265,7 @@ void __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; + uintptr_t tid, v; if (SCHEDULER_STOPPED()) return; @@ -282,7 +283,13 @@ __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line) WITNESS_CHECKORDER(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); - __mtx_lock(m, curthread, opts, file, line); + tid = (uintptr_t)curthread; + v = MTX_UNOWNED; + if (!_mtx_obtain_lock_fetch(m, &v, tid)) + _mtx_lock_sleep(m, v, tid, opts, file, line); + else + LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, + m, 0, 0, file, line); LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file, line); WITNESS_LOCK(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_EXCLUSIVE, @@ -310,7 +317,7 @@ __mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file, int line) line); mtx_assert(m, MA_OWNED); - __mtx_unlock(m, curthread, opts, file, line); + __mtx_unlock_sleep(c, opts, file, line); TD_LOCKS_DEC(curthread); } @@ -887,20 +894,17 @@ __mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; struct turnstile *ts; - uintptr_t v; if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); - v = MTX_READ_VALUE(m); - if (v == (uintptr_t)curthread) { + if (!mtx_recursed(m)) { + LOCKSTAT_PROFILE_RELEASE_LOCK(adaptive__release, m); if (_mtx_release_lock(m, (uintptr_t)curthread)) return; - } - - if (mtx_recursed(m)) { + } else { if (--(m->mtx_recurse) == 0) atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED); if (LOCK_LOG_TEST(&m->lock_object, opts)) -- cgit v1.2.3 From cae4ab7f3718736b51c048440a5eadd7d7251623 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sun, 5 Feb 2017 09:35:17 +0000 Subject: mtx: fix up _mtx_obtain_lock_fetch usage in thread lock Since _mtx_obtain_lock_fetch no longer sets the argument to MTX_UNOWNED, callers have to do it on their own. --- sys/kern/kern_mutex.c | 1 + 1 file changed, 1 insertion(+) (limited to 'sys/kern/kern_mutex.c') diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index f49d87c81999..76a009d0cfca 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -782,6 +782,7 @@ thread_lock_flags_(struct thread *td, int opts, const char *file, int line) #ifdef KDTRACE_HOOKS spin_time -= lockstat_nsecs(&td->td_lock->lock_object); #endif + v = MTX_UNOWNED; for (;;) { retry: spinlock_enter(); -- cgit v1.2.3 From dc0896512cd91cb390cc35ba96e9285cd04383da Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sun, 5 Feb 2017 09:53:13 +0000 Subject: mtx: fixup r313278, the assignemnt was supposed to go inside the loop --- sys/kern/kern_mutex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'sys/kern/kern_mutex.c') diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index 76a009d0cfca..83977bb6425e 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -782,9 +782,9 @@ thread_lock_flags_(struct thread *td, int opts, const char *file, int line) #ifdef KDTRACE_HOOKS spin_time -= lockstat_nsecs(&td->td_lock->lock_object); #endif - v = MTX_UNOWNED; for (;;) { retry: + v = MTX_UNOWNED; spinlock_enter(); m = td->td_lock; KASSERT(m->mtx_lock != MTX_DESTROYED, -- cgit v1.2.3