1 files changed, 1540 insertions, 0 deletions
diff --git a/sys/contrib/openzfs/lib/libzpool/kernel.c b/sys/contrib/openzfs/lib/libzpool/kernel.c
new file mode 100644
index 000000000000..8ed374627264
--- /dev/null
+++ b/sys/contrib/openzfs/lib/libzpool/kernel.c
@@ -0,0 +1,1540 @@
+// SPDX-License-Identifier: CDDL-1.0
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2016 Actifio, Inc. All rights reserved.
+ */
+
+#include <assert.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <libzutil.h>
+#include <sys/crypto/icp.h>
+#include <sys/processor.h>
+#include <sys/rrwlock.h>
+#include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/stat.h>
+#include <sys/systeminfo.h>
+#include <sys/time.h>
+#include <sys/utsname.h>
+#include <sys/zfs_context.h>
+#include <sys/zfs_onexit.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zstd/zstd.h>
+#include <sys/zvol.h>
+#include <zfs_fletcher.h>
+#include <zlib.h>
+
+/*
+ * Emulation of kernel services in userland.
+ */
+
+uint64_t physmem;
+uint32_t hostid;
+struct utsname hw_utsname;
+
+/* If set, all blocks read will be copied to the specified directory. */
+char *vn_dumpdir = NULL;
+
+/* this only exists to have its address taken */
+struct proc p0;
+
+/*
+ * =========================================================================
+ * threads
+ * =========================================================================
+ *
+ * TS_STACK_MIN is dictated by the minimum allowed pthread stack size.  While
+ * TS_STACK_MAX is somewhat arbitrary, it was selected to be large enough for
+ * the expected stack depth while small enough to avoid exhausting address
+ * space with high thread counts.
+ */
+#define	TS_STACK_MIN	MAX(PTHREAD_STACK_MIN, 32768)
+#define	TS_STACK_MAX	(256 * 1024)
+
+struct zk_thread_wrapper {
+	void (*func)(void *);
+	void *arg;
+};
+
+static void *
+zk_thread_wrapper(void *arg)
+{
+	struct zk_thread_wrapper ztw;
+	memcpy(&ztw, arg, sizeof (ztw));
+	free(arg);
+	ztw.func(ztw.arg);
+	return (NULL);
+}
+
+kthread_t *
+zk_thread_create(const char *name, void (*func)(void *), void *arg,
+    size_t stksize, int state)
+{
+	pthread_attr_t attr;
+	pthread_t tid;
+	char *stkstr;
+	struct zk_thread_wrapper *ztw;
+	int detachstate = PTHREAD_CREATE_DETACHED;
+
+	VERIFY0(pthread_attr_init(&attr));
+
+	if (state & TS_JOINABLE)
+		detachstate = PTHREAD_CREATE_JOINABLE;
+
+	VERIFY0(pthread_attr_setdetachstate(&attr, detachstate));
+
+	/*
+	 * We allow the default stack size in user space to be specified by
+	 * setting the ZFS_STACK_SIZE environment variable.  This allows us
+	 * the convenience of observing and debugging stack overruns in
+	 * user space.  Explicitly specified stack sizes will be honored.
+	 * The usage of ZFS_STACK_SIZE is discussed further in the
+	 * ENVIRONMENT VARIABLES sections of the ztest(1) man page.
+	 */
+	if (stksize == 0) {
+		stkstr = getenv("ZFS_STACK_SIZE");
+
+		if (stkstr == NULL)
+			stksize = TS_STACK_MAX;
+		else
+			stksize = MAX(atoi(stkstr), TS_STACK_MIN);
+	}
+
+	VERIFY3S(stksize, >, 0);
+	stksize = P2ROUNDUP(MAX(stksize, TS_STACK_MIN), PAGESIZE);
+
+	/*
+	 * If this ever fails, it may be because the stack size is not a
+	 * multiple of system page size.
+	 */
+	VERIFY0(pthread_attr_setstacksize(&attr, stksize));
+	VERIFY0(pthread_attr_setguardsize(&attr, PAGESIZE));
+
+	VERIFY(ztw = malloc(sizeof (*ztw)));
+	ztw->func = func;
+	ztw->arg = arg;
+	VERIFY0(pthread_create(&tid, &attr, zk_thread_wrapper, ztw));
+	VERIFY0(pthread_attr_destroy(&attr));
+
+	pthread_setname_np(tid, name);
+
+	return ((void *)(uintptr_t)tid);
+}
+
+/*
+ * =========================================================================
+ * kstats
+ * =========================================================================
+ */
+kstat_t *
+kstat_create(const char *module, int instance, const char *name,
+    const char *class, uchar_t type, ulong_t ndata, uchar_t ks_flag)
+{
+	(void) module, (void) instance, (void) name, (void) class, (void) type,
+	    (void) ndata, (void) ks_flag;
+	return (NULL);
+}
+
+void
+kstat_install(kstat_t *ksp)
+{
+	(void) ksp;
+}
+
+void
+kstat_delete(kstat_t *ksp)
+{
+	(void) ksp;
+}
+
+void
+kstat_set_raw_ops(kstat_t *ksp,
+    int (*headers)(char *buf, size_t size),
+    int (*data)(char *buf, size_t size, void *data),
+    void *(*addr)(kstat_t *ksp, loff_t index))
+{
+	(void) ksp, (void) headers, (void) data, (void) addr;
+}
+
+/*
+ * =========================================================================
+ * mutexes
+ * =========================================================================
+ */
+
+void
+mutex_init(kmutex_t *mp, char *name, int type, void *cookie)
+{
+	(void) name, (void) type, (void) cookie;
+	VERIFY0(pthread_mutex_init(&mp->m_lock, NULL));
+	memset(&mp->m_owner, 0, sizeof (pthread_t));
+}
+
+void
+mutex_destroy(kmutex_t *mp)
+{
+	VERIFY0(pthread_mutex_destroy(&mp->m_lock));
+}
+
+void
+mutex_enter(kmutex_t *mp)
+{
+	VERIFY0(pthread_mutex_lock(&mp->m_lock));
+	mp->m_owner = pthread_self();
+}
+
+int
+mutex_enter_check_return(kmutex_t *mp)
+{
+	int error = pthread_mutex_lock(&mp->m_lock);
+	if (error == 0)
+		mp->m_owner = pthread_self();
+	return (error);
+}
+
+int
+mutex_tryenter(kmutex_t *mp)
+{
+	int error = pthread_mutex_trylock(&mp->m_lock);
+	if (error == 0) {
+		mp->m_owner = pthread_self();
+		return (1);
+	} else {
+		VERIFY3S(error, ==, EBUSY);
+		return (0);
+	}
+}
+
+void
+mutex_exit(kmutex_t *mp)
+{
+	memset(&mp->m_owner, 0, sizeof (pthread_t));
+	VERIFY0(pthread_mutex_unlock(&mp->m_lock));
+}
+
+/*
+ * =========================================================================
+ * rwlocks
+ * =========================================================================
+ */
+
+void
+rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
+{
+	(void) name, (void) type, (void) arg;
+	VERIFY0(pthread_rwlock_init(&rwlp->rw_lock, NULL));
+	rwlp->rw_readers = 0;
+	rwlp->rw_owner = 0;
+}
+
+void
+rw_destroy(krwlock_t *rwlp)
+{
+	VERIFY0(pthread_rwlock_destroy(&rwlp->rw_lock));
+}
+
+void
+rw_enter(krwlock_t *rwlp, krw_t rw)
+{
+	if (rw == RW_READER) {
+		VERIFY0(pthread_rwlock_rdlock(&rwlp->rw_lock));
+		atomic_inc_uint(&rwlp->rw_readers);
+	} else {
+		VERIFY0(pthread_rwlock_wrlock(&rwlp->rw_lock));
+		rwlp->rw_owner = pthread_self();
+	}
+}
+
+void
+rw_exit(krwlock_t *rwlp)
+{
+	if (RW_READ_HELD(rwlp))
+		atomic_dec_uint(&rwlp->rw_readers);
+	else
+		rwlp->rw_owner = 0;
+
+	VERIFY0(pthread_rwlock_unlock(&rwlp->rw_lock));
+}
+
+int
+rw_tryenter(krwlock_t *rwlp, krw_t rw)
+{
+	int error;
+
+	if (rw == RW_READER)
+		error = pthread_rwlock_tryrdlock(&rwlp->rw_lock);
+	else
+		error = pthread_rwlock_trywrlock(&rwlp->rw_lock);
+
+	if (error == 0) {
+		if (rw == RW_READER)
+			atomic_inc_uint(&rwlp->rw_readers);
+		else
+			rwlp->rw_owner = pthread_self();
+
+		return (1);
+	}
+
+	VERIFY3S(error, ==, EBUSY);
+
+	return (0);
+}
+
+uint32_t
+zone_get_hostid(void *zonep)
+{
+	/*
+	 * We're emulating the system's hostid in userland.
+	 */
+	(void) zonep;
+	return (hostid);
+}
+
+int
+rw_tryupgrade(krwlock_t *rwlp)
+{
+	(void) rwlp;
+	return (0);
+}
+
+/*
+ * =========================================================================
+ * condition variables
+ * =========================================================================
+ */
+
+void
+cv_init(kcondvar_t *cv, char *name, int type, void *arg)
+{
+	(void) name, (void) type, (void) arg;
+	VERIFY0(pthread_cond_init(cv, NULL));
+}
+
+void
+cv_destroy(kcondvar_t *cv)
+{
+	VERIFY0(pthread_cond_destroy(cv));
+}
+
+void
+cv_wait(kcondvar_t *cv, kmutex_t *mp)
+{
+	memset(&mp->m_owner, 0, sizeof (pthread_t));
+	VERIFY0(pthread_cond_wait(cv, &mp->m_lock));
+	mp->m_owner = pthread_self();
+}
+
+int
+cv_wait_sig(kcondvar_t *cv, kmutex_t *mp)
+{
+	cv_wait(cv, mp);
+	return (1);
+}
+
+int
+cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
+{
+	int error;
+	struct timeval tv;
+	struct timespec ts;
+	clock_t delta;
+
+	delta = abstime - ddi_get_lbolt();
+	if (delta <= 0)
+		return (-1);
+
+	VERIFY0(gettimeofday(&tv, NULL));
+
+	ts.tv_sec = tv.tv_sec + delta / hz;
+	ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC + (delta % hz) * (NANOSEC / hz);
+	if (ts.tv_nsec >= NANOSEC) {
+		ts.tv_sec++;
+		ts.tv_nsec -= NANOSEC;
+	}
+
+	memset(&mp->m_owner, 0, sizeof (pthread_t));
+	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
+	mp->m_owner = pthread_self();
+
+	if (error == ETIMEDOUT)
+		return (-1);
+
+	VERIFY0(error);
+
+	return (1);
+}
+
+int
+cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
+    int flag)
+{
+	(void) res;
+	int error;
+	struct timeval tv;
+	struct timespec ts;
+	hrtime_t delta;
+
+	ASSERT(flag == 0 || flag == CALLOUT_FLAG_ABSOLUTE);
+
+	delta = tim;
+	if (flag & CALLOUT_FLAG_ABSOLUTE)
+		delta -= gethrtime();
+
+	if (delta <= 0)
+		return (-1);
+
+	VERIFY0(gettimeofday(&tv, NULL));
+
+	ts.tv_sec = tv.tv_sec + delta / NANOSEC;
+	ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC + (delta % NANOSEC);
+	if (ts.tv_nsec >= NANOSEC) {
+		ts.tv_sec++;
+		ts.tv_nsec -= NANOSEC;
+	}
+
+	memset(&mp->m_owner, 0, sizeof (pthread_t));
+	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
+	mp->m_owner = pthread_self();
+
+	if (error == ETIMEDOUT)
+		return (-1);
+
+	VERIFY0(error);
+
+	return (1);
+}
+
+void
+cv_signal(kcondvar_t *cv)
+{
+	VERIFY0(pthread_cond_signal(cv));
+}
+
+void
+cv_broadcast(kcondvar_t *cv)
+{
+	VERIFY0(pthread_cond_broadcast(cv));
+}
+
+/*
+ * =========================================================================
+ * procfs list
+ * =========================================================================
+ */
+
+void
+seq_printf(struct seq_file *m, const char *fmt, ...)
+{
+	(void) m, (void) fmt;
+}
+
+void
+procfs_list_install(const char *module,
+    const char *submodule,
+    const char *name,
+    mode_t mode,
+    procfs_list_t *procfs_list,
+    int (*show)(struct seq_file *f, void *p),
+    int (*show_header)(struct seq_file *f),
+    int (*clear)(procfs_list_t *procfs_list),
+    size_t procfs_list_node_off)
+{
+	(void) module, (void) submodule, (void) name, (void) mode, (void) show,
+	    (void) show_header, (void) clear;
+	mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);
+	list_create(&procfs_list->pl_list,
+	    procfs_list_node_off + sizeof (procfs_list_node_t),
+	    procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));
+	procfs_list->pl_next_id = 1;
+	procfs_list->pl_node_offset = procfs_list_node_off;
+}
+
+void
+procfs_list_uninstall(procfs_list_t *procfs_list)
+{
+	(void) procfs_list;
+}
+
+void
+procfs_list_destroy(procfs_list_t *procfs_list)
+{
+	ASSERT(list_is_empty(&procfs_list->pl_list));
+	list_destroy(&procfs_list->pl_list);
+	mutex_destroy(&procfs_list->pl_lock);
+}
+
+#define	NODE_ID(procfs_list, obj) \
+		(((procfs_list_node_t *)(((char *)obj) + \
+		(procfs_list)->pl_node_offset))->pln_id)
+
+void
+procfs_list_add(procfs_list_t *procfs_list, void *p)
+{
+	ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
+	NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;
+	list_insert_tail(&procfs_list->pl_list, p);
+}
+
+/*
+ * =========================================================================
+ * vnode operations
+ * =========================================================================
+ */
+
+/*
+ * =========================================================================
+ * Figure out which debugging statements to print
+ * =========================================================================
+ */
+
+static char *dprintf_string;
+static int dprintf_print_all;
+
+int
+dprintf_find_string(const char *string)
+{
+	char *tmp_str = dprintf_string;
+	int len = strlen(string);
+
+	/*
+	 * Find out if this is a string we want to print.
+	 * String format: file1.c,function_name1,file2.c,file3.c
+	 */
+
+	while (tmp_str != NULL) {
+		if (strncmp(tmp_str, string, len) == 0 &&
+		    (tmp_str[len] == ',' || tmp_str[len] == '\0'))
+			return (1);
+		tmp_str = strchr(tmp_str, ',');
+		if (tmp_str != NULL)
+			tmp_str++; /* Get rid of , */
+	}
+	return (0);
+}
+
+void
+dprintf_setup(int *argc, char **argv)
+{
+	int i, j;
+
+	/*
+	 * Debugging can be specified two ways: by setting the
+	 * environment variable ZFS_DEBUG, or by including a
+	 * "debug=..."  argument on the command line.  The command
+	 * line setting overrides the environment variable.
+	 */
+
+	for (i = 1; i < *argc; i++) {
+		int len = strlen("debug=");
+		/* First look for a command line argument */
+		if (strncmp("debug=", argv[i], len) == 0) {
+			dprintf_string = argv[i] + len;
+			/* Remove from args */
+			for (j = i; j < *argc; j++)
+				argv[j] = argv[j+1];
+			argv[j] = NULL;
+			(*argc)--;
+		}
+	}
+
+	if (dprintf_string == NULL) {
+		/* Look for ZFS_DEBUG environment variable */
+		dprintf_string = getenv("ZFS_DEBUG");
+	}
+
+	/*
+	 * Are we just turning on all debugging?
+	 */
+	if (dprintf_find_string("on"))
+		dprintf_print_all = 1;
+
+	if (dprintf_string != NULL)
+		zfs_flags |= ZFS_DEBUG_DPRINTF;
+}
+
+/*
+ * =========================================================================
+ * debug printfs
+ * =========================================================================
+ */
+void
+__dprintf(boolean_t dprint, const char *file, const char *func,
+    int line, const char *fmt, ...)
+{
+	/* Get rid of annoying "../common/" prefix to filename. */
+	const char *newfile = zfs_basename(file);
+
+	va_list adx;
+	if (dprint) {
+		/* dprintf messages are printed immediately */
+
+		if (!dprintf_print_all &&
+		    !dprintf_find_string(newfile) &&
+		    !dprintf_find_string(func))
+			return;
+
+		/* Print out just the function name if requested */
+		flockfile(stdout);
+		if (dprintf_find_string("pid"))
+			(void) printf("%d ", getpid());
+		if (dprintf_find_string("tid"))
+			(void) printf("%ju ",
+			    (uintmax_t)(uintptr_t)pthread_self());
+		if (dprintf_find_string("cpu"))
+			(void) printf("%u ", getcpuid());
+		if (dprintf_find_string("time"))
+			(void) printf("%llu ", gethrtime());
+		if (dprintf_find_string("long"))
+			(void) printf("%s, line %d: ", newfile, line);
+		(void) printf("dprintf: %s: ", func);
+		va_start(adx, fmt);
+		(void) vprintf(fmt, adx);
+		va_end(adx);
+		funlockfile(stdout);
+	} else {
+		/* zfs_dbgmsg is logged for dumping later */
+		size_t size;
+		char *buf;
+		int i;
+
+		size = 1024;
+		buf = umem_alloc(size, UMEM_NOFAIL);
+		i = snprintf(buf, size, "%s:%d:%s(): ", newfile, line, func);
+
+		if (i < size) {
+			va_start(adx, fmt);
+			(void) vsnprintf(buf + i, size - i, fmt, adx);
+			va_end(adx);
+		}
+
+		__zfs_dbgmsg(buf);
+
+		umem_free(buf, size);
+	}
+}
+
+/*
+ * =========================================================================
+ * cmn_err() and panic()
+ * =========================================================================
+ */
+static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
+static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
+
+__attribute__((noreturn)) void
+vpanic(const char *fmt, va_list adx)
+{
+	(void) fprintf(stderr, "error: ");
+	(void) vfprintf(stderr, fmt, adx);
+	(void) fprintf(stderr, "\n");
+
+	abort();	/* think of it as a "user-level crash dump" */
+}
+
+__attribute__((noreturn)) void
+panic(const char *fmt, ...)
+{
+	va_list adx;
+
+	va_start(adx, fmt);
+	vpanic(fmt, adx);
+	va_end(adx);
+}
+
+void
+vcmn_err(int ce, const char *fmt, va_list adx)
+{
+	if (ce == CE_PANIC)
+		vpanic(fmt, adx);
+	if (ce != CE_NOTE) {	/* suppress noise in userland stress testing */
+		(void) fprintf(stderr, "%s", ce_prefix[ce]);
+		(void) vfprintf(stderr, fmt, adx);
+		(void) fprintf(stderr, "%s", ce_suffix[ce]);
+	}
+}
+
+void
+cmn_err(int ce, const char *fmt, ...)
+{
+	va_list adx;
+
+	va_start(adx, fmt);
+	vcmn_err(ce, fmt, adx);
+	va_end(adx);
+}
+
+/*
+ * =========================================================================
+ * misc routines
+ * =========================================================================
+ */
+
+void
+delay(clock_t ticks)
+{
+	(void) poll(0, 0, ticks * (1000 / hz));
+}
+
+/*
+ * Find highest one bit set.
+ * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
+ * The __builtin_clzll() function is supported by both GCC and Clang.
+ */
+int
+highbit64(uint64_t i)
+{
+	if (i == 0)
+	return (0);
+
+	return (NBBY * sizeof (uint64_t) - __builtin_clzll(i));
+}
+
+/*
+ * Find lowest one bit set.
+ * Returns bit number + 1 of lowest bit that is set, otherwise returns 0.
+ * The __builtin_ffsll() function is supported by both GCC and Clang.
+ */
+int
+lowbit64(uint64_t i)
+{
+	if (i == 0)
+		return (0);
+
+	return (__builtin_ffsll(i));
+}
+
+const char *random_path = "/dev/random";
+const char *urandom_path = "/dev/urandom";
+static int random_fd = -1, urandom_fd = -1;
+
+void
+random_init(void)
+{
+	VERIFY((random_fd = open(random_path, O_RDONLY | O_CLOEXEC)) != -1);
+	VERIFY((urandom_fd = open(urandom_path, O_RDONLY | O_CLOEXEC)) != -1);
+}
+
+void
+random_fini(void)
+{
+	close(random_fd);
+	close(urandom_fd);
+
+	random_fd = -1;
+	urandom_fd = -1;
+}
+
+static int
+random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
+{
+	size_t resid = len;
+	ssize_t bytes;
+
+	ASSERT(fd != -1);
+
+	while (resid != 0) {
+		bytes = read(fd, ptr, resid);
+		ASSERT3S(bytes, >=, 0);
+		ptr += bytes;
+		resid -= bytes;
+	}
+
+	return (0);
+}
+
+int
+random_get_bytes(uint8_t *ptr, size_t len)
+{
+	return (random_get_bytes_common(ptr, len, random_fd));
+}
+
+int
+random_get_pseudo_bytes(uint8_t *ptr, size_t len)
+{
+	return (random_get_bytes_common(ptr, len, urandom_fd));
+}
+
+int
+ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
+{
+	errno = 0;
+	*result = strtoull(str, nptr, base);
+	if (*result == 0)
+		return (errno);
+	return (0);
+}
+
+utsname_t *
+utsname(void)
+{
+	return (&hw_utsname);
+}
+
+/*
+ * =========================================================================
+ * kernel emulation setup & teardown
+ * =========================================================================
+ */
+static int
+umem_out_of_memory(void)
+{
+	char errmsg[] = "out of memory -- generating core dump\n";
+
+	(void) fprintf(stderr, "%s", errmsg);
+	abort();
+	return (0);
+}
+
+static void
+spa_config_load(void)
+{
+	void *buf = NULL;
+	nvlist_t *nvlist, *child;
+	nvpair_t *nvpair;
+	char *pathname;
+	zfs_file_t *fp;
+	zfs_file_attr_t zfa;
+	uint64_t fsize;
+	int err;
+
+	/*
+	 * Open the configuration file.
+	 */
+	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+	(void) snprintf(pathname, MAXPATHLEN, "%s", spa_config_path);
+
+	err = zfs_file_open(pathname, O_RDONLY, 0, &fp);
+	if (err)
+		err = zfs_file_open(ZPOOL_CACHE_BOOT, O_RDONLY, 0, &fp);
+
+	kmem_free(pathname, MAXPATHLEN);
+
+	if (err)
+		return;
+
+	if (zfs_file_getattr(fp, &zfa))
+		goto out;
+
+	fsize = zfa.zfa_size;
+	buf = kmem_alloc(fsize, KM_SLEEP);
+
+	/*
+	 * Read the nvlist from the file.
+	 */
+	if (zfs_file_read(fp, buf, fsize, NULL) < 0)
+		goto out;
+
+	/*
+	 * Unpack the nvlist.
+	 */
+	if (nvlist_unpack(buf, fsize, &nvlist, KM_SLEEP) != 0)
+		goto out;
+
+	/*
+	 * Iterate over all elements in the nvlist, creating a new spa_t for
+	 * each one with the specified configuration.
+	 */
+	mutex_enter(&spa_namespace_lock);
+	nvpair = NULL;
+	while ((nvpair = nvlist_next_nvpair(nvlist, nvpair)) != NULL) {
+		if (nvpair_type(nvpair) != DATA_TYPE_NVLIST)
+			continue;
+
+		child = fnvpair_value_nvlist(nvpair);
+
+		if (spa_lookup(nvpair_name(nvpair)) != NULL)
+			continue;
+		(void) spa_add(nvpair_name(nvpair), child, NULL);
+	}
+	mutex_exit(&spa_namespace_lock);
+
+	nvlist_free(nvlist);
+
+out:
+	if (buf != NULL)
+		kmem_free(buf, fsize);
+
+	zfs_file_close(fp);
+}
+
+void
+kernel_init(int mode)
+{
+	extern uint_t rrw_tsd_key;
+
+	umem_nofail_callback(umem_out_of_memory);
+
+	physmem = sysconf(_SC_PHYS_PAGES);
+
+	dprintf("physmem = %llu pages (%.2f GB)\n", (u_longlong_t)physmem,
+	    (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
+
+	hostid = (mode & SPA_MODE_WRITE) ? get_system_hostid() : 0;
+
+	random_init();
+
+	VERIFY0(uname(&hw_utsname));
+
+	system_taskq_init();
+	icp_init();
+
+	zstd_init();
+
+	spa_init((spa_mode_t)mode);
+	spa_config_load();
+
+	fletcher_4_init();
+
+	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
+}
+
+void
+kernel_fini(void)
+{
+	fletcher_4_fini();
+	spa_fini();
+
+	zstd_fini();
+
+	icp_fini();
+	system_taskq_fini();
+
+	random_fini();
+}
+
+uid_t
+crgetuid(cred_t *cr)
+{
+	(void) cr;
+	return (0);
+}
+
+uid_t
+crgetruid(cred_t *cr)
+{
+	(void) cr;
+	return (0);
+}
+
+gid_t
+crgetgid(cred_t *cr)
+{
+	(void) cr;
+	return (0);
+}
+
+int
+crgetngroups(cred_t *cr)
+{
+	(void) cr;
+	return (0);
+}
+
+gid_t *
+crgetgroups(cred_t *cr)
+{
+	(void) cr;
+	return (NULL);
+}
+
+int
+zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
+{
+	(void) name, (void) cr;
+	return (0);
+}
+
+int
+zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
+{
+	(void) from, (void) to, (void) cr;
+	return (0);
+}
+
+int
+zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
+{
+	(void) name, (void) cr;
+	return (0);
+}
+
+int
+secpolicy_zfs(const cred_t *cr)
+{
+	(void) cr;
+	return (0);
+}
+
+ksiddomain_t *
+ksid_lookupdomain(const char *dom)
+{
+	ksiddomain_t *kd;
+
+	kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
+	kd->kd_name = spa_strdup(dom);
+	return (kd);
+}
+
+void
+ksiddomain_rele(ksiddomain_t *ksid)
+{
+	spa_strfree(ksid->kd_name);
+	umem_free(ksid, sizeof (ksiddomain_t));
+}
+
+char *
+kmem_vasprintf(const char *fmt, va_list adx)
+{
+	char *buf = NULL;
+	va_list adx_copy;
+
+	va_copy(adx_copy, adx);
+	VERIFY(vasprintf(&buf, fmt, adx_copy) != -1);
+	va_end(adx_copy);
+
+	return (buf);
+}
+
+char *
+kmem_asprintf(const char *fmt, ...)
+{
+	char *buf = NULL;
+	va_list adx;
+
+	va_start(adx, fmt);
+	VERIFY(vasprintf(&buf, fmt, adx) != -1);
+	va_end(adx);
+
+	return (buf);
+}
+
+/*
+ * kmem_scnprintf() will return the number of characters that it would have
+ * printed whenever it is limited by value of the size variable, rather than
+ * the number of characters that it did print. This can cause misbehavior on
+ * subsequent uses of the return value, so we define a safe version that will
+ * return the number of characters actually printed, minus the NULL format
+ * character.  Subsequent use of this by the safe string functions is safe
+ * whether it is snprintf(), strlcat() or strlcpy().
+ */
+int
+kmem_scnprintf(char *restrict str, size_t size, const char *restrict fmt, ...)
+{
+	int n;
+	va_list ap;
+
+	/* Make the 0 case a no-op so that we do not return -1 */
+	if (size == 0)
+		return (0);
+
+	va_start(ap, fmt);
+	n = vsnprintf(str, size, fmt, ap);
+	va_end(ap);
+
+	if (n >= size)
+		n = size - 1;
+
+	return (n);
+}
+
+zfs_file_t *
+zfs_onexit_fd_hold(int fd, minor_t *minorp)
+{
+	(void) fd;
+	*minorp = 0;
+	return (NULL);
+}
+
+void
+zfs_onexit_fd_rele(zfs_file_t *fp)
+{
+	(void) fp;
+}
+
+int
+zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
+    uintptr_t *action_handle)
+{
+	(void) minor, (void) func, (void) data, (void) action_handle;
+	return (0);
+}
+
+fstrans_cookie_t
+spl_fstrans_mark(void)
+{
+	return ((fstrans_cookie_t)0);
+}
+
+void
+spl_fstrans_unmark(fstrans_cookie_t cookie)
+{
+	(void) cookie;
+}
+
+int
+kmem_cache_reap_active(void)
+{
+	return (0);
+}
+
+void
+zvol_create_minors(const char *name)
+{
+	(void) name;
+}
+
+void
+zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)
+{
+	(void) spa, (void) name, (void) async;
+}
+
+void
+zvol_rename_minors(spa_t *spa, const char *oldname, const char *newname,
+    boolean_t async)
+{
+	(void) spa, (void) oldname, (void) newname, (void) async;
+}
+
+/*
+ * Open file
+ *
+ * path - fully qualified path to file
+ * flags - file attributes O_READ / O_WRITE / O_EXCL
+ * fpp - pointer to return file pointer
+ *
+ * Returns 0 on success underlying error on failure.
+ */
+int
+zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp)
+{
+	int fd;
+	int dump_fd;
+	int err;
+	int old_umask = 0;
+	zfs_file_t *fp;
+	struct stat64 st;
+
+	if (!(flags & O_CREAT) && stat64(path, &st) == -1)
+		return (errno);
+
+	if (!(flags & O_CREAT) && S_ISBLK(st.st_mode))
+		flags |= O_DIRECT;
+
+	if (flags & O_CREAT)
+		old_umask = umask(0);
+
+	fd = open64(path, flags, mode);
+	if (fd == -1)
+		return (errno);
+
+	if (flags & O_CREAT)
+		(void) umask(old_umask);
+
+	if (vn_dumpdir != NULL) {
+		char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL);
+		const char *inpath = zfs_basename(path);
+
+		(void) snprintf(dumppath, MAXPATHLEN,
+		    "%s/%s", vn_dumpdir, inpath);
+		dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
+		umem_free(dumppath, MAXPATHLEN);
+		if (dump_fd == -1) {
+			err = errno;
+			close(fd);
+			return (err);
+		}
+	} else {
+		dump_fd = -1;
+	}
+
+	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
+
+	fp = umem_zalloc(sizeof (zfs_file_t), UMEM_NOFAIL);
+	fp->f_fd = fd;
+	fp->f_dump_fd = dump_fd;
+	*fpp = fp;
+
+	return (0);
+}
+
+void
+zfs_file_close(zfs_file_t *fp)
+{
+	close(fp->f_fd);
+	if (fp->f_dump_fd != -1)
+		close(fp->f_dump_fd);
+
+	umem_free(fp, sizeof (zfs_file_t));
+}
+
+/*
+ * Stateful write - use os internal file pointer to determine where to
+ * write and update on successful completion.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to write to
+ * buf - buffer to write
+ * count - # of bytes to write
+ * resid -  pointer to count of unwritten bytes  (if short write)
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
+{
+	ssize_t rc;
+
+	rc = write(fp->f_fd, buf, count);
+	if (rc < 0)
+		return (errno);
+
+	if (resid) {
+		*resid = count - rc;
+	} else if (rc != count) {
+		return (EIO);
+	}
+
+	return (0);
+}
+
+/*
+ * Stateless write - os internal file pointer is not updated.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to write to
+ * buf - buffer to write
+ * count - # of bytes to write
+ * off - file offset to write to (only valid for seekable types)
+ * resid -  pointer to count of unwritten bytes
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_pwrite(zfs_file_t *fp, const void *buf,
+    size_t count, loff_t pos, uint8_t ashift, ssize_t *resid)
+{
+	ssize_t rc, split, done;
+	int sectors;
+
+	/*
+	 * To simulate partial disk writes, we split writes into two
+	 * system calls so that the process can be killed in between.
+	 * This is used by ztest to simulate realistic failure modes.
+	 */
+	sectors = count >> ashift;
+	split = (sectors > 0 ? rand() % sectors : 0) << ashift;
+	rc = pwrite64(fp->f_fd, buf, split, pos);
+	if (rc != -1) {
+		done = rc;
+		rc = pwrite64(fp->f_fd, (char *)buf + split,
+		    count - split, pos + split);
+	}
+#ifdef __linux__
+	if (rc == -1 && errno == EINVAL) {
+		/*
+		 * Under Linux, this most likely means an alignment issue
+		 * (memory or disk) due to O_DIRECT, so we abort() in order
+		 * to catch the offender.
+		 */
+		abort();
+	}
+#endif
+
+	if (rc < 0)
+		return (errno);
+
+	done += rc;
+
+	if (resid) {
+		*resid = count - done;
+	} else if (done != count) {
+		return (EIO);
+	}
+
+	return (0);
+}
+
+/*
+ * Stateful read - use os internal file pointer to determine where to
+ * read and update on successful completion.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to read from
+ * buf - buffer to write
+ * count - # of bytes to read
+ * resid -  pointer to count of unread bytes (if short read)
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid)
+{
+	int rc;
+
+	rc = read(fp->f_fd, buf, count);
+	if (rc < 0)
+		return (errno);
+
+	if (resid) {
+		*resid = count - rc;
+	} else if (rc != count) {
+		return (EIO);
+	}
+
+	return (0);
+}
+
+/*
+ * Stateless read - os internal file pointer is not updated.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to read from
+ * buf - buffer to write
+ * count - # of bytes to write
+ * off - file offset to read from (only valid for seekable types)
+ * resid -  pointer to count of unwritten bytes (if short write)
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off,
+    ssize_t *resid)
+{
+	ssize_t rc;
+
+	rc = pread64(fp->f_fd, buf, count, off);
+	if (rc < 0) {
+#ifdef __linux__
+		/*
+		 * Under Linux, this most likely means an alignment issue
+		 * (memory or disk) due to O_DIRECT, so we abort() in order to
+		 * catch the offender.
+		 */
+		if (errno == EINVAL)
+			abort();
+#endif
+		return (errno);
+	}
+
+	if (fp->f_dump_fd != -1) {
+		int status;
+
+		status = pwrite64(fp->f_dump_fd, buf, rc, off);
+		ASSERT(status != -1);
+	}
+
+	if (resid) {
+		*resid = count - rc;
+	} else if (rc != count) {
+		return (EIO);
+	}
+
+	return (0);
+}
+
+/*
+ * lseek - set / get file pointer
+ *
+ * fp -  pointer to file (pipe, socket, etc) to read from
+ * offp - value to seek to, returns current value plus passed offset
+ * whence - see man pages for standard lseek whence values
+ *
+ * Returns 0 on success errno on failure (ESPIPE for non seekable types)
+ */
+int
+zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)
+{
+	loff_t rc;
+
+	rc = lseek(fp->f_fd, *offp, whence);
+	if (rc < 0)
+		return (errno);
+
+	*offp = rc;
+
+	return (0);
+}
+
+/*
+ * Get file attributes
+ *
+ * filp - file pointer
+ * zfattr - pointer to file attr structure
+ *
+ * Currently only used for fetching size and file mode
+ *
+ * Returns 0 on success or error code of underlying getattr call on failure.
+ */
+int
+zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr)
+{
+	struct stat64 st;
+
+	if (fstat64_blk(fp->f_fd, &st) == -1)
+		return (errno);
+
+	zfattr->zfa_size = st.st_size;
+	zfattr->zfa_mode = st.st_mode;
+
+	return (0);
+}
+
+/*
+ * Sync file to disk
+ *
+ * filp - file pointer
+ * flags - O_SYNC and or O_DSYNC
+ *
+ * Returns 0 on success or error code of underlying sync call on failure.
+ */
+int
+zfs_file_fsync(zfs_file_t *fp, int flags)
+{
+	(void) flags;
+
+	if (fsync(fp->f_fd) < 0)
+		return (errno);
+
+	return (0);
+}
+
+/*
+ * deallocate - zero and/or deallocate file storage
+ *
+ * fp - file pointer
+ * offset - offset to start zeroing or deallocating
+ * len - length to zero or deallocate
+ */
+int
+zfs_file_deallocate(zfs_file_t *fp, loff_t offset, loff_t len)
+{
+	int rc;
+#if defined(__linux__)
+	rc = fallocate(fp->f_fd,
+	    FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, len);
+#elif defined(__FreeBSD__) && (__FreeBSD_version >= 1400029)
+	struct spacectl_range rqsr = {
+		.r_offset = offset,
+		.r_len = len,
+	};
+	rc = fspacectl(fp->f_fd, SPACECTL_DEALLOC, &rqsr, 0, &rqsr);
+#else
+	(void) fp, (void) offset, (void) len;
+	rc = EOPNOTSUPP;
+#endif
+	if (rc)
+		return (SET_ERROR(rc));
+	return (0);
+}
+
+/*
+ * Request current file pointer offset
+ *
+ * fp - pointer to file
+ *
+ * Returns current file offset.
+ */
+loff_t
+zfs_file_off(zfs_file_t *fp)
+{
+	return (lseek(fp->f_fd, SEEK_CUR, 0));
+}
+
+/*
+ * unlink file
+ *
+ * path - fully qualified file path
+ *
+ * Returns 0 on success.
+ *
+ * OPTIONAL
+ */
+int
+zfs_file_unlink(const char *path)
+{
+	return (remove(path));
+}
+
+/*
+ * Get reference to file pointer
+ *
+ * fd - input file descriptor
+ *
+ * Returns pointer to file struct or NULL.
+ * Unsupported in user space.
+ */
+zfs_file_t *
+zfs_file_get(int fd)
+{
+	(void) fd;
+	abort();
+	return (NULL);
+}
+/*
+ * Drop reference to file pointer
+ *
+ * fp - pointer to file struct
+ *
+ * Unsupported in user space.
+ */
+void
+zfs_file_put(zfs_file_t *fp)
+{
+	abort();
+	(void) fp;
+}
+
+void
+zfsvfs_update_fromname(const char *oldname, const char *newname)
+{
+	(void) oldname, (void) newname;
+}
+
+void
+spa_import_os(spa_t *spa)
+{
+	(void) spa;
+}
+
+void
+spa_export_os(spa_t *spa)
+{
+	(void) spa;
+}
+
+void
+spa_activate_os(spa_t *spa)
+{
+	(void) spa;
+}
+
+void
+spa_deactivate_os(spa_t *spa)
+{
+	(void) spa;
+}