diff options
Diffstat (limited to 'sys/kern/subr_epoch.c')
| -rw-r--r-- | sys/kern/subr_epoch.c | 202 |
1 files changed, 159 insertions, 43 deletions
diff --git a/sys/kern/subr_epoch.c b/sys/kern/subr_epoch.c index a63f669fea75..9104f1e0880a 100644 --- a/sys/kern/subr_epoch.c +++ b/sys/kern/subr_epoch.c @@ -55,6 +55,27 @@ __FBSDID("$FreeBSD$"); static MALLOC_DEFINE(M_EPOCH, "epoch", "epoch based reclamation"); +#ifdef __amd64__ +#define EPOCH_ALIGN CACHE_LINE_SIZE*2 +#else +#define EPOCH_ALIGN CACHE_LINE_SIZE +#endif + +TAILQ_HEAD (epoch_tdlist, epoch_tracker); +typedef struct epoch_record { + ck_epoch_record_t er_record; + volatile struct epoch_tdlist er_tdlist; + volatile uint32_t er_gen; + uint32_t er_cpuid; +} __aligned(EPOCH_ALIGN) *epoch_record_t; + +struct epoch { + struct ck_epoch e_epoch __aligned(EPOCH_ALIGN); + epoch_record_t e_pcpu_record; + int e_idx; + int e_flags; +}; + /* arbitrary --- needs benchmarking */ #define MAX_ADAPTIVE_SPIN 100 #define MAX_EPOCHS 64 @@ -119,11 +140,15 @@ epoch_init(void *arg __unused) epoch_call_count = counter_u64_alloc(M_WAITOK); epoch_call_task_count = counter_u64_alloc(M_WAITOK); - pcpu_zone_record = uma_zcreate("epoch_record pcpu", sizeof(struct epoch_record), - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU); + pcpu_zone_record = uma_zcreate("epoch_record pcpu", + sizeof(struct epoch_record), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_PCPU); CPU_FOREACH(cpu) { - GROUPTASK_INIT(DPCPU_ID_PTR(cpu, epoch_cb_task), 0, epoch_call_task, NULL); - taskqgroup_attach_cpu(qgroup_softirq, DPCPU_ID_PTR(cpu, epoch_cb_task), NULL, cpu, -1, "epoch call task"); + GROUPTASK_INIT(DPCPU_ID_PTR(cpu, epoch_cb_task), 0, + epoch_call_task, NULL); + taskqgroup_attach_cpu(qgroup_softirq, + DPCPU_ID_PTR(cpu, epoch_cb_task), NULL, cpu, -1, + "epoch call task"); } inited = 1; global_epoch = epoch_alloc(0); @@ -150,13 +175,21 @@ epoch_ctor(epoch_t epoch) CPU_FOREACH(cpu) { er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu); bzero(er, sizeof(*er)); - ck_epoch_register(&epoch->e_epoch, &er->er_read_record, NULL); - ck_epoch_register(&epoch->e_epoch, &er->er_write_record, NULL); + ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL); TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist); er->er_cpuid = cpu; } } +static void +epoch_adjust_prio(struct thread *td, u_char prio) +{ + + thread_lock(td); + sched_prio(td, prio); + thread_unlock(td); +} + epoch_t epoch_alloc(int flags) { @@ -192,51 +225,126 @@ epoch_free(epoch_t epoch) free(epoch, M_EPOCH); } +static epoch_record_t +epoch_currecord(epoch_t epoch) +{ + + return (zpcpu_get_cpu(epoch->e_pcpu_record, curcpu)); +} + +#define INIT_CHECK(epoch) \ + do { \ + if (__predict_false((epoch) == NULL)) \ + return; \ + } while (0) + void -epoch_enter_preempt_KBI(epoch_t epoch, epoch_tracker_t et) +epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et) { + struct epoch_record *er; + struct thread *td; + + MPASS(cold || epoch != NULL); + INIT_CHECK(epoch); + MPASS(epoch->e_flags & EPOCH_PREEMPT); +#ifdef EPOCH_TRACKER_DEBUG + et->et_magic_pre = EPOCH_MAGIC0; + et->et_magic_post = EPOCH_MAGIC1; +#endif + td = curthread; + et->et_td = td; + td->td_epochnest++; + critical_enter(); + sched_pin(); - epoch_enter_preempt(epoch, et); + td->td_pre_epoch_prio = td->td_priority; + er = epoch_currecord(epoch); + TAILQ_INSERT_TAIL(&er->er_tdlist, et, et_link); + ck_epoch_begin(&er->er_record, &et->et_section); + critical_exit(); } void -epoch_exit_preempt_KBI(epoch_t epoch, epoch_tracker_t et) +epoch_enter(epoch_t epoch) { + struct thread *td; + epoch_record_t er; + + MPASS(cold || epoch != NULL); + INIT_CHECK(epoch); + td = curthread; - epoch_exit_preempt(epoch, et); + td->td_epochnest++; + critical_enter(); + er = epoch_currecord(epoch); + ck_epoch_begin(&er->er_record, NULL); } void -epoch_enter_KBI(epoch_t epoch) +epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et) { + struct epoch_record *er; + struct thread *td; - epoch_enter(epoch); + INIT_CHECK(epoch); + td = curthread; + critical_enter(); + sched_unpin(); + MPASS(td->td_epochnest); + td->td_epochnest--; + er = epoch_currecord(epoch); + MPASS(epoch->e_flags & EPOCH_PREEMPT); + MPASS(et != NULL); + MPASS(et->et_td == td); +#ifdef EPOCH_TRACKER_DEBUG + MPASS(et->et_magic_pre == EPOCH_MAGIC0); + MPASS(et->et_magic_post == EPOCH_MAGIC1); + et->et_magic_pre = 0; + et->et_magic_post = 0; +#endif +#ifdef INVARIANTS + et->et_td = (void*)0xDEADBEEF; +#endif + ck_epoch_end(&er->er_record, &et->et_section); + TAILQ_REMOVE(&er->er_tdlist, et, et_link); + er->er_gen++; + if (__predict_false(td->td_pre_epoch_prio != td->td_priority)) + epoch_adjust_prio(td, td->td_pre_epoch_prio); + critical_exit(); } void -epoch_exit_KBI(epoch_t epoch) +epoch_exit(epoch_t epoch) { + struct thread *td; + epoch_record_t er; - epoch_exit(epoch); + INIT_CHECK(epoch); + td = curthread; + MPASS(td->td_epochnest); + td->td_epochnest--; + er = epoch_currecord(epoch); + ck_epoch_end(&er->er_record, NULL); + critical_exit(); } /* - * epoch_block_handler_preempt is a callback from the ck code when another thread is - * currently in an epoch section. + * epoch_block_handler_preempt() is a callback from the CK code when another + * thread is currently in an epoch section. */ static void -epoch_block_handler_preempt(struct ck_epoch *global __unused, ck_epoch_record_t *cr, - void *arg __unused) +epoch_block_handler_preempt(struct ck_epoch *global __unused, + ck_epoch_record_t *cr, void *arg __unused) { epoch_record_t record; struct thread *td, *owner, *curwaittd; - struct epoch_thread *tdwait; + struct epoch_tracker *tdwait; struct turnstile *ts; struct lock_object *lock; int spincount, gen; int locksheld __unused; - record = __containerof(cr, struct epoch_record, er_read_record); + record = __containerof(cr, struct epoch_record, er_record); td = curthread; locksheld = td->td_locks; spincount = 0; @@ -318,25 +426,27 @@ epoch_block_handler_preempt(struct ck_epoch *global __unused, ck_epoch_record_t if (TD_IS_INHIBITED(curwaittd) && TD_ON_LOCK(curwaittd) && ((ts = curwaittd->td_blocked) != NULL)) { /* - * We unlock td to allow turnstile_wait to reacquire the - * the thread lock. Before unlocking it we enter a critical - * section to prevent preemption after we reenable interrupts - * by dropping the thread lock in order to prevent curwaittd - * from getting to run. + * We unlock td to allow turnstile_wait to reacquire + * the thread lock. Before unlocking it we enter a + * critical section to prevent preemption after we + * reenable interrupts by dropping the thread lock in + * order to prevent curwaittd from getting to run. */ critical_enter(); thread_unlock(td); owner = turnstile_lock(ts, &lock); /* - * The owner pointer indicates that the lock succeeded. Only - * in case we hold the lock and the turnstile we locked is still - * the one that curwaittd is blocked on can we continue. Otherwise - * The turnstile pointer has been changed out from underneath - * us, as in the case where the lock holder has signalled curwaittd, + * The owner pointer indicates that the lock succeeded. + * Only in case we hold the lock and the turnstile we + * locked is still the one that curwaittd is blocked on + * can we continue. Otherwise the turnstile pointer has + * been changed out from underneath us, as in the case + * where the lock holder has signalled curwaittd, * and we need to continue. */ if (owner != NULL && ts == curwaittd->td_blocked) { - MPASS(TD_IS_INHIBITED(curwaittd) && TD_ON_LOCK(curwaittd)); + MPASS(TD_IS_INHIBITED(curwaittd) && + TD_ON_LOCK(curwaittd)); critical_exit(); turnstile_wait(ts, owner, curwaittd->td_tsqueue); counter_u64_add(turnstile_count, 1); @@ -386,9 +496,8 @@ epoch_wait_preempt(epoch_t epoch) if ((epoch->e_flags & EPOCH_LOCKED) == 0) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "epoch_wait() can be long running"); - KASSERT(!in_epoch(epoch), - ("epoch_wait_preempt() called in the middle " - "of an epoch section of the same epoch")); + KASSERT(!in_epoch(epoch), ("epoch_wait_preempt() called in the middle " + "of an epoch section of the same epoch")); #endif thread_lock(td); DROP_GIANT(); @@ -401,7 +510,8 @@ epoch_wait_preempt(epoch_t epoch) td->td_pinned = 0; sched_bind(td, old_cpu); - ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler_preempt, NULL); + ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler_preempt, + NULL); /* restore CPU binding, if any */ if (was_bound != 0) { @@ -462,7 +572,7 @@ epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t critical_enter(); *DPCPU_PTR(epoch_cb_count) += 1; er = epoch_currecord(epoch); - ck_epoch_call(&er->er_write_record, cb, (ck_epoch_cb_t *)callback); + ck_epoch_call(&er->er_record, cb, (ck_epoch_cb_t *)callback); critical_exit(); return; boottime: @@ -486,7 +596,7 @@ epoch_call_task(void *arg __unused) if (__predict_false((epoch = allepochs[i]) == NULL)) continue; er = epoch_currecord(epoch); - record = &er->er_write_record; + record = &er->er_record; if ((npending = record->n_pending) == 0) continue; ck_epoch_poll_deferred(record, &cb_stack); @@ -502,7 +612,7 @@ epoch_call_task(void *arg __unused) head = ck_stack_batch_pop_npsc(&cb_stack); for (cursor = head; cursor != NULL; cursor = next) { struct ck_epoch_entry *entry = - ck_epoch_entry_container(cursor); + ck_epoch_entry_container(cursor); next = CK_STACK_NEXT(cursor); entry->function(entry); @@ -512,7 +622,7 @@ epoch_call_task(void *arg __unused) int in_epoch_verbose(epoch_t epoch, int dump_onfail) { - struct epoch_thread *tdwait; + struct epoch_tracker *tdwait; struct thread *td; epoch_record_t er; @@ -548,9 +658,15 @@ in_epoch(epoch_t epoch) } void -epoch_adjust_prio(struct thread *td, u_char prio) +epoch_thread_init(struct thread *td) { - thread_lock(td); - sched_prio(td, prio); - thread_unlock(td); + + td->td_et = malloc(sizeof(struct epoch_tracker), M_EPOCH, M_WAITOK); +} + +void +epoch_thread_fini(struct thread *td) +{ + + free(td->td_et, M_EPOCH); } |
