aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module/zfs/multilist.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/module/zfs/multilist.c')
-rw-r--r--sys/contrib/openzfs/module/zfs/multilist.c452
1 files changed, 452 insertions, 0 deletions
diff --git a/sys/contrib/openzfs/module/zfs/multilist.c b/sys/contrib/openzfs/module/zfs/multilist.c
new file mode 100644
index 000000000000..46fb79269310
--- /dev/null
+++ b/sys/contrib/openzfs/module/zfs/multilist.c
@@ -0,0 +1,452 @@
+// SPDX-License-Identifier: CDDL-1.0
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/multilist.h>
+#include <sys/trace_zfs.h>
+
+/*
+ * This overrides the number of sublists in each multilist_t, which defaults
+ * to the number of CPUs in the system (see multilist_create()).
+ */
+uint_t zfs_multilist_num_sublists = 0;
+
+/*
+ * Given the object contained on the list, return a pointer to the
+ * object's multilist_node_t structure it contains.
+ */
+#ifdef ZFS_DEBUG
+static multilist_node_t *
+multilist_d2l(multilist_t *ml, void *obj)
+{
+ return ((multilist_node_t *)((char *)obj + ml->ml_offset));
+}
+#else
+#define multilist_d2l(ml, obj) ((void) sizeof (ml), (void) sizeof (obj), NULL)
+#endif
+
+/*
+ * Initialize a new mutlilist using the parameters specified.
+ *
+ * - 'size' denotes the size of the structure containing the
+ * multilist_node_t.
+ * - 'offset' denotes the byte offset of the mutlilist_node_t within
+ * the structure that contains it.
+ * - 'num' specifies the number of internal sublists to create.
+ * - 'index_func' is used to determine which sublist to insert into
+ * when the multilist_insert() function is called; as well as which
+ * sublist to remove from when multilist_remove() is called. The
+ * requirements this function must meet, are the following:
+ *
+ * - It must always return the same value when called on the same
+ * object (to ensure the object is removed from the list it was
+ * inserted into).
+ *
+ * - It must return a value in the range [0, number of sublists).
+ * The multilist_get_num_sublists() function may be used to
+ * determine the number of sublists in the multilist.
+ *
+ * Also, in order to reduce internal contention between the sublists
+ * during insertion and removal, this function should choose evenly
+ * between all available sublists when inserting. This isn't a hard
+ * requirement, but a general rule of thumb in order to garner the
+ * best multi-threaded performance out of the data structure.
+ */
+static void
+multilist_create_impl(multilist_t *ml, size_t size, size_t offset,
+ uint_t num, multilist_sublist_index_func_t *index_func)
+{
+ ASSERT3U(size, >, 0);
+ ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
+ ASSERT3U(num, >, 0);
+ ASSERT3P(index_func, !=, NULL);
+
+ ml->ml_offset = offset;
+ ml->ml_num_sublists = num;
+ ml->ml_index_func = index_func;
+
+ ml->ml_sublists = vmem_zalloc(sizeof (multilist_sublist_t) *
+ ml->ml_num_sublists, KM_SLEEP);
+
+ ASSERT3P(ml->ml_sublists, !=, NULL);
+
+ for (int i = 0; i < ml->ml_num_sublists; i++) {
+ multilist_sublist_t *mls = &ml->ml_sublists[i];
+ mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL);
+ list_create(&mls->mls_list, size, offset);
+ }
+}
+
+/*
+ * Allocate a new multilist, using the default number of sublists (the number
+ * of CPUs, or at least 4, or the tunable zfs_multilist_num_sublists). Note
+ * that the multilists do not expand if more CPUs are hot-added. In that case,
+ * we will have less fanout than boot_ncpus, but we don't want to always
+ * reserve the RAM necessary to create the extra slots for additional CPUs up
+ * front, and dynamically adding them is a complex task.
+ */
+void
+multilist_create(multilist_t *ml, size_t size, size_t offset,
+ multilist_sublist_index_func_t *index_func)
+{
+ uint_t num_sublists;
+
+ if (zfs_multilist_num_sublists > 0) {
+ num_sublists = zfs_multilist_num_sublists;
+ } else {
+ num_sublists = MAX(boot_ncpus, 4);
+ }
+
+ multilist_create_impl(ml, size, offset, num_sublists, index_func);
+}
+
+/*
+ * Destroy the given multilist object, and free up any memory it holds.
+ */
+void
+multilist_destroy(multilist_t *ml)
+{
+ ASSERT(multilist_is_empty(ml));
+
+ for (int i = 0; i < ml->ml_num_sublists; i++) {
+ multilist_sublist_t *mls = &ml->ml_sublists[i];
+
+ ASSERT(list_is_empty(&mls->mls_list));
+
+ list_destroy(&mls->mls_list);
+ mutex_destroy(&mls->mls_lock);
+ }
+
+ ASSERT3P(ml->ml_sublists, !=, NULL);
+ vmem_free(ml->ml_sublists,
+ sizeof (multilist_sublist_t) * ml->ml_num_sublists);
+
+ ml->ml_num_sublists = 0;
+ ml->ml_offset = 0;
+ ml->ml_sublists = NULL;
+}
+
+/*
+ * Insert the given object into the multilist.
+ *
+ * This function will insert the object specified into the sublist
+ * determined using the function given at multilist creation time.
+ *
+ * The sublist locks are automatically acquired if not already held, to
+ * ensure consistency when inserting and removing from multiple threads.
+ */
+void
+multilist_insert(multilist_t *ml, void *obj)
+{
+ unsigned int sublist_idx = ml->ml_index_func(ml, obj);
+ multilist_sublist_t *mls;
+ boolean_t need_lock;
+
+ DTRACE_PROBE3(multilist__insert, multilist_t *, ml,
+ unsigned int, sublist_idx, void *, obj);
+
+ ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
+
+ mls = &ml->ml_sublists[sublist_idx];
+
+ /*
+ * Note: Callers may already hold the sublist lock by calling
+ * multilist_sublist_lock(). Here we rely on MUTEX_HELD()
+ * returning TRUE if and only if the current thread holds the
+ * lock. While it's a little ugly to make the lock recursive in
+ * this way, it works and allows the calling code to be much
+ * simpler -- otherwise it would have to pass around a flag
+ * indicating that it already has the lock.
+ */
+ need_lock = !MUTEX_HELD(&mls->mls_lock);
+
+ if (need_lock)
+ mutex_enter(&mls->mls_lock);
+
+ ASSERT(!multilist_link_active(multilist_d2l(ml, obj)));
+
+ multilist_sublist_insert_head(mls, obj);
+
+ if (need_lock)
+ mutex_exit(&mls->mls_lock);
+}
+
+/*
+ * Remove the given object from the multilist.
+ *
+ * This function will remove the object specified from the sublist
+ * determined using the function given at multilist creation time.
+ *
+ * The necessary sublist locks are automatically acquired, to ensure
+ * consistency when inserting and removing from multiple threads.
+ */
+void
+multilist_remove(multilist_t *ml, void *obj)
+{
+ unsigned int sublist_idx = ml->ml_index_func(ml, obj);
+ multilist_sublist_t *mls;
+ boolean_t need_lock;
+
+ DTRACE_PROBE3(multilist__remove, multilist_t *, ml,
+ unsigned int, sublist_idx, void *, obj);
+
+ ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
+
+ mls = &ml->ml_sublists[sublist_idx];
+ /* See comment in multilist_insert(). */
+ need_lock = !MUTEX_HELD(&mls->mls_lock);
+
+ if (need_lock)
+ mutex_enter(&mls->mls_lock);
+
+ ASSERT(multilist_link_active(multilist_d2l(ml, obj)));
+
+ multilist_sublist_remove(mls, obj);
+
+ if (need_lock)
+ mutex_exit(&mls->mls_lock);
+}
+
+/*
+ * Check to see if this multilist object is empty.
+ *
+ * This will return TRUE if it finds all of the sublists of this
+ * multilist to be empty, and FALSE otherwise. Each sublist lock will be
+ * automatically acquired as necessary.
+ *
+ * If concurrent insertions and removals are occurring, the semantics
+ * of this function become a little fuzzy. Instead of locking all
+ * sublists for the entire call time of the function, each sublist is
+ * only locked as it is individually checked for emptiness. Thus, it's
+ * possible for this function to return TRUE with non-empty sublists at
+ * the time the function returns. This would be due to another thread
+ * inserting into a given sublist, after that specific sublist was check
+ * and deemed empty, but before all sublists have been checked.
+ */
+int
+multilist_is_empty(multilist_t *ml)
+{
+ for (int i = 0; i < ml->ml_num_sublists; i++) {
+ multilist_sublist_t *mls = &ml->ml_sublists[i];
+ /* See comment in multilist_insert(). */
+ boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock);
+
+ if (need_lock)
+ mutex_enter(&mls->mls_lock);
+
+ if (!list_is_empty(&mls->mls_list)) {
+ if (need_lock)
+ mutex_exit(&mls->mls_lock);
+
+ return (FALSE);
+ }
+
+ if (need_lock)
+ mutex_exit(&mls->mls_lock);
+ }
+
+ return (TRUE);
+}
+
+/* Return the number of sublists composing this multilist */
+unsigned int
+multilist_get_num_sublists(multilist_t *ml)
+{
+ return (ml->ml_num_sublists);
+}
+
+/* Return a randomly selected, valid sublist index for this multilist */
+unsigned int
+multilist_get_random_index(multilist_t *ml)
+{
+ return (random_in_range(ml->ml_num_sublists));
+}
+
+void
+multilist_sublist_lock(multilist_sublist_t *mls)
+{
+ mutex_enter(&mls->mls_lock);
+}
+
+/* Lock and return the sublist specified at the given index */
+multilist_sublist_t *
+multilist_sublist_lock_idx(multilist_t *ml, unsigned int sublist_idx)
+{
+ multilist_sublist_t *mls;
+
+ ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
+ mls = &ml->ml_sublists[sublist_idx];
+ mutex_enter(&mls->mls_lock);
+
+ return (mls);
+}
+
+/* Lock and return the sublist that would be used to store the specified obj */
+multilist_sublist_t *
+multilist_sublist_lock_obj(multilist_t *ml, void *obj)
+{
+ return (multilist_sublist_lock_idx(ml, ml->ml_index_func(ml, obj)));
+}
+
+void
+multilist_sublist_unlock(multilist_sublist_t *mls)
+{
+ mutex_exit(&mls->mls_lock);
+}
+
+/*
+ * We're allowing any object to be inserted into this specific sublist,
+ * but this can lead to trouble if multilist_remove() is called to
+ * remove this object. Specifically, if calling ml_index_func on this
+ * object returns an index for sublist different than what is passed as
+ * a parameter here, any call to multilist_remove() with this newly
+ * inserted object is undefined! (the call to multilist_remove() will
+ * remove the object from a list that it isn't contained in)
+ */
+void
+multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj)
+{
+ ASSERT(MUTEX_HELD(&mls->mls_lock));
+ list_insert_head(&mls->mls_list, obj);
+}
+
+/* please see comment above multilist_sublist_insert_head */
+void
+multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj)
+{
+ ASSERT(MUTEX_HELD(&mls->mls_lock));
+ list_insert_tail(&mls->mls_list, obj);
+}
+
+/* please see comment above multilist_sublist_insert_head */
+void
+multilist_sublist_insert_after(multilist_sublist_t *mls, void *prev, void *obj)
+{
+ ASSERT(MUTEX_HELD(&mls->mls_lock));
+ list_insert_after(&mls->mls_list, prev, obj);
+}
+
+/* please see comment above multilist_sublist_insert_head */
+void
+multilist_sublist_insert_before(multilist_sublist_t *mls, void *next, void *obj)
+{
+ ASSERT(MUTEX_HELD(&mls->mls_lock));
+ list_insert_before(&mls->mls_list, next, obj);
+}
+
+/*
+ * Move the object one element forward in the list.
+ *
+ * This function will move the given object forward in the list (towards
+ * the head) by one object. So, in essence, it will swap its position in
+ * the list with its "prev" pointer. If the given object is already at the
+ * head of the list, it cannot be moved forward any more than it already
+ * is, so no action is taken.
+ *
+ * NOTE: This function **must not** remove any object from the list other
+ * than the object given as the parameter. This is relied upon in
+ * arc_evict_state_impl().
+ */
+void
+multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj)
+{
+ void *prev = list_prev(&mls->mls_list, obj);
+
+ ASSERT(MUTEX_HELD(&mls->mls_lock));
+ ASSERT(!list_is_empty(&mls->mls_list));
+
+ /* 'obj' must be at the head of the list, nothing to do */
+ if (prev == NULL)
+ return;
+
+ list_remove(&mls->mls_list, obj);
+ list_insert_before(&mls->mls_list, prev, obj);
+}
+
+void
+multilist_sublist_remove(multilist_sublist_t *mls, void *obj)
+{
+ ASSERT(MUTEX_HELD(&mls->mls_lock));
+ list_remove(&mls->mls_list, obj);
+}
+
+int
+multilist_sublist_is_empty(multilist_sublist_t *mls)
+{
+ ASSERT(MUTEX_HELD(&mls->mls_lock));
+ return (list_is_empty(&mls->mls_list));
+}
+
+int
+multilist_sublist_is_empty_idx(multilist_t *ml, unsigned int sublist_idx)
+{
+ multilist_sublist_t *mls;
+ int empty;
+
+ ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
+ mls = &ml->ml_sublists[sublist_idx];
+ ASSERT(!MUTEX_HELD(&mls->mls_lock));
+ mutex_enter(&mls->mls_lock);
+ empty = list_is_empty(&mls->mls_list);
+ mutex_exit(&mls->mls_lock);
+ return (empty);
+}
+
+void *
+multilist_sublist_head(multilist_sublist_t *mls)
+{
+ ASSERT(MUTEX_HELD(&mls->mls_lock));
+ return (list_head(&mls->mls_list));
+}
+
+void *
+multilist_sublist_tail(multilist_sublist_t *mls)
+{
+ ASSERT(MUTEX_HELD(&mls->mls_lock));
+ return (list_tail(&mls->mls_list));
+}
+
+void *
+multilist_sublist_next(multilist_sublist_t *mls, void *obj)
+{
+ ASSERT(MUTEX_HELD(&mls->mls_lock));
+ return (list_next(&mls->mls_list, obj));
+}
+
+void *
+multilist_sublist_prev(multilist_sublist_t *mls, void *obj)
+{
+ ASSERT(MUTEX_HELD(&mls->mls_lock));
+ return (list_prev(&mls->mls_list, obj));
+}
+
+void
+multilist_link_init(multilist_node_t *link)
+{
+ list_link_init(link);
+}
+
+int
+multilist_link_active(multilist_node_t *link)
+{
+ return (list_link_active(link));
+}
+
+ZFS_MODULE_PARAM(zfs, zfs_, multilist_num_sublists, UINT, ZMOD_RW,
+ "Number of sublists used in each multilist");